From 8a57639da8e1e5614a2425a8f25bd8b6ba22d6e1 Mon Sep 17 00:00:00 2001 From: Haitao Pan Date: Sun, 21 Dec 2025 19:23:19 +0800 Subject: [PATCH] feat(playbooks): add comprehensive vhosts roles and ops scripts --- alicloud_dns_record.yml | 12 + alicloud_dns_sync.yml | 16 + apply-branch-protection.yml | 7 + common | 8 + deepflow/deepflow-agent-playbook/Readme.md | 2 + .../inventory/js2_hosts.ini | 8 + deepflow/deepflow-agent-playbook/playbook.yml | 10 + .../roles/deepflow_upgrade/tasks/main.yml | 16 + deploy-docker-harbor.yml | 5 + deploy-docker-keycloak.yml | 5 + deploy_OpenObserve_docker.yaml | 5 + deploy_Tempo_docker.yaml | 5 + deploy_VictoriaLogs_docker.yaml | 5 + deploy_VictoriaMetrics_docker.yaml | 5 + deploy_blackbox_exporters_vhosts.yml | 50 + deploy_deepflow_agent | 7 + deploy_exporters_vhosts.yml | 15 + deploy_grafana_docker.yaml | 11 + deploy_monitor_server.yml | 72 + deploy_neurapress_docker.yaml | 11 + deploy_nginx_vhosts.yml | 23 + deploy_nodejs_vhosts.yml | 37 + deploy_openresty_vhosts.yml | 48 + deploy_otel_docker.yaml | 5 + deploy_postgre_vhosts.yml | 162 + deploy_postgres_vhosts.yml | 8 + deploy_redis_vhosts.yml | 10 + deploy_tiny_monitor_server_vhost.yml | 21 + deploy_vhosts_otel-collector.yml | 55 + deploy_xcontrol_server._vhosts.yml | 8 + deploy_xcontrol_web.yml | 8 + deploy_zitadel_docker.yaml | 12 + docs/alicloud_dns_sync.md | 1 + gpu_k8s_init.yml | 15 + gpu_k8s_reset.yml | 13 + init-harbor-server | 17 + init_chaos_mesh | 17 + init_chartmuseum | 8 + init_deepflow | 16 + init_flagger-loadtester | 16 + init_gitlab | 23 + init_grafana_alloy | 8 + init_harbor_server | 8 + init_jenkins | 18 + init_k3s_cluster_agent | 8 + init_k3s_cluster_server | 8 + init_k3s_cluster_std | 27 + init_k3s_cluster_with_argo_server | 38 + init_observability-agent | 13 + init_observability-server | 29 + init_openldap | 18 + init_splunk-otel-collector | 13 + init_telegraf | 10 + init_vault | 8 + init_vpn_gateway.yml | 7 + inventory.ini | 32 + keycloak_server | 7 + pre_setup.sh | 48 + renew_nodes_ssl_certs | 8 + roles/README.md | 33 + roles/charts/app/meta/main.yml | 2 + roles/charts/app/tasks/main.yml | 16 + roles/charts/app/templates/.gitignore | 2 + roles/charts/app/templates/deploy-app.yaml | 18 + .../charts/argo-server/files/setup-argocd.sh | 100 + roles/charts/argo-server/meta/main.yml | 2 + roles/charts/argo-server/tasks/main.yml | 2 + roles/charts/chaos-mesh/files/setup.sh | 24 + roles/charts/chaos-mesh/howto.md | 124 + roles/charts/chaos-mesh/meta/main.yml | 2 + roles/charts/chaos-mesh/tasks/main.yml | 4 + roles/charts/chartmuseum/files/setup.sh | 37 + roles/charts/chartmuseum/meta/main.yml | 2 + roles/charts/chartmuseum/tasks/main.yml | 4 + roles/charts/chartmuseum/vars/main.yml | 8 + roles/charts/clickhouse/meta/main.yml | 2 + roles/charts/clickhouse/tasks/main.yml | 48 + roles/charts/clickhouse/templates/.gitignore | 2 + .../clickhouse-cluster/clickhouse-config.yaml | 94 + .../clickhouse-ingress.yaml | 18 + .../clickhouse-service.yaml | 23 + .../clickhouse-statefulset.yml | 103 + .../clickhouse-user-config.yaml | 19 + .../templates/otel-collector/configmap.yaml | 142 + .../templates/otel-collector/deployment.yaml | 42 + .../templates/otel-collector/ingress.yaml | 19 + .../templates/otel-collector/service.yaml | 48 + .../charts/clickhouse/templates/postsetup.sh | 27 + .../templates/qryn/qryn-deployment.yaml | 36 + .../templates/qryn/qryn-ingress.yaml | 24 + .../templates/qryn/qryn-service.yaml | 15 + roles/charts/deepflow/Readme.md | 12 + roles/charts/deepflow/files/post-setup.sh | 7 + roles/charts/deepflow/files/pre-setup.sh | 6 + roles/charts/deepflow/files/setup.sh | 29 + roles/charts/deepflow/meta/main.yml | 2 + roles/charts/deepflow/tasks/main.yml | 19 + roles/charts/embedding-service/README.md | 3 + roles/charts/embedding-service/tasks/main.yml | 5 + roles/charts/feast/README.md | 3 + roles/charts/feast/tasks/main.yml | 5 + .../charts/flagger-loadtester/files/setup.sh | 47 + roles/charts/flagger-loadtester/meta/main.yml | 2 + .../charts/flagger-loadtester/tasks/main.yml | 4 + roles/charts/flink-operator/README.md | 3 + roles/charts/flink-operator/tasks/main.yml | 5 + roles/charts/gitlab/files/post-setup.sh | 30 + roles/charts/gitlab/files/pre-setup.sh | 9 + roles/charts/gitlab/files/setup-with-oidc.sh | 106 + .../charts/gitlab/files/setup-with_aws-s3.sh | 154 + roles/charts/gitlab/files/setup.sh | 119 + roles/charts/gitlab/meta/main.yml | 5 + roles/charts/gitlab/tasks/main.yml | 58 + .../charts/gitlab/templates/gitlab-backup-cfg | 5 + roles/charts/gitlab/templates/provider.yaml | 18 + roles/charts/gpu-operator/files/setup.sh | 11 + roles/charts/gpu-operator/tasks/main.yml | 3 + roles/charts/grafana/README.md | 3 + roles/charts/grafana/tasks/main.yml | 5 + roles/charts/harbor/files/post-setup.sh | 14 + roles/charts/harbor/files/pre-setup.sh | 13 + .../harbor/files/setup-bitnami-harbor.sh | 85 + .../harbor/files/setup-office-harbor.sh | 91 + roles/charts/harbor/meta/main.yml | 4 + roles/charts/harbor/tasks/main.yml | 38 + .../harbor/templates/harbor-oidc-config.json | 11 + roles/charts/harbor/vars/main.yml | 9 + roles/charts/helm-repos/tasks/main.yml | 9 + roles/charts/iceberg-bucket/README.md | 3 + roles/charts/iceberg-bucket/tasks/main.yml | 5 + roles/charts/inference-gateway/README.md | 3 + roles/charts/inference-gateway/tasks/main.yml | 5 + roles/charts/jenkins/files/pre-setup.sh | 6 + roles/charts/jenkins/files/setup.sh | 86 + roles/charts/jenkins/howto.md | 124 + roles/charts/jenkins/meta/main.yml | 3 + roles/charts/jenkins/tasks/main.yml | 18 + roles/charts/kafka-cluster/README.md | 3 + roles/charts/kafka-cluster/tasks/main.yml | 5 + roles/charts/keycloak/files/pre-setup.sh | 5 + roles/charts/keycloak/files/setup-keycloak.sh | 39 + roles/charts/keycloak/meta/main.yml | 3 + roles/charts/keycloak/readme.md | 8 + roles/charts/keycloak/tasks/main.yml | 29 + .../templates/aws-gloabl-oidc-broker.yaml | 74 + roles/charts/keycloak/vars/main.yml | 16 + .../kubernetes-dashboard/files/setup.sh | 11 + .../kubernetes-dashboard/tasks/main.yml | 3 + roles/charts/loki/README.md | 3 + roles/charts/loki/tasks/main.yml | 5 + roles/charts/metrics-server/files/setup.sh | 5 + roles/charts/metrics-server/tasks/main.yml | 3 + roles/charts/minio/README.md | 3 + roles/charts/minio/tasks/main.yml | 5 + roles/charts/mlflow/README.md | 3 + roles/charts/mlflow/tasks/main.yml | 5 + roles/charts/mysql/files/setup-mysql.sh | 6 + roles/charts/mysql/tasks/main.yml | 3 + roles/charts/node-exporter/tasks/main.yml | 23 + .../templates/node-exporter.service | 14 + roles/charts/nvidia-operator/README.md | 3 + roles/charts/nvidia-operator/tasks/main.yml | 5 + .../charts/observability-agent/files/setup.sh | 46 + .../charts/observability-agent/meta/main.yml | 2 + .../charts/observability-agent/tasks/main.yml | 3 + .../files/mysql-db-init-setup.sh | 6 + .../files/setup-observable-server.sh | 132 + .../charts/observability-server/meta/main.yml | 3 + .../observability-server/tasks/main.yml | 39 + roles/charts/openldap/files/setup-openldap.sh | 44 + roles/charts/openldap/meta/main.yml | 3 + roles/charts/openldap/tasks/main.yml | 13 + roles/charts/openldap/templates/.gitignore | 2 + roles/charts/openldap/templates/ingress.yaml | 45 + roles/charts/openobserve/README.md | 3 + roles/charts/openobserve/tasks/main.yml | 5 + roles/charts/postgres/README.md | 3 + roles/charts/postgres/tasks/main.yml | 5 + roles/charts/postgresql/files/post-setup.sh | 16 + .../postgresql/files/setup-postgresql.sh | 13 + roles/charts/postgresql/tasks/main.yml | 3 + roles/charts/prometheus-stack/README.md | 3 + roles/charts/prometheus-stack/tasks/main.yml | 5 + roles/charts/prometheus/files/setup.sh | 13 + roles/charts/prometheus/tasks/main.yml | 3 + roles/charts/ray-cluster/README.md | 3 + roles/charts/ray-cluster/tasks/main.yml | 5 + roles/charts/redis/files/setup-redis.sh | 14 + roles/charts/redis/tasks/main.yml | 3 + roles/charts/redpanda/README.md | 3 + roles/charts/redpanda/tasks/main.yml | 5 + roles/charts/sglang/README.md | 3 + roles/charts/sglang/tasks/main.yml | 5 + roles/charts/spark-operator/README.md | 3 + roles/charts/spark-operator/tasks/main.yml | 5 + .../splunk-otel-collector/files/setup.sh | 20 + .../splunk-otel-collector/tasks/main.yml | 2 + roles/charts/tempo/README.md | 3 + roles/charts/tempo/tasks/main.yml | 5 + roles/charts/trino/README.md | 3 + roles/charts/trino/tasks/main.yml | 5 + roles/charts/vllm/README.md | 3 + roles/charts/vllm/tasks/main.yml | 5 + roles/docker/OpenObserve/README.md | 5 + roles/docker/OpenObserve/tasks/main.yml | 5 + .../OpenObserve/templates/docker-compose.yaml | 41 + roles/docker/Tempo/README.md | 5 + roles/docker/Tempo/tasks/main.yml | 5 + .../Tempo/templates/docker-compose.yaml | 41 + roles/docker/VictoriaLogs/README.md | 5 + roles/docker/VictoriaLogs/tasks/main.yml | 5 + .../templates/docker-compose.yaml | 41 + roles/docker/VictoriaMetrics/README.md | 5 + roles/docker/VictoriaMetrics/tasks/main.yml | 5 + .../templates/docker-compose.yaml | 41 + roles/docker/XControl/README.md | 113 + roles/docker/XControl/defaults/main.yml | 112 + roles/docker/XControl/files/nginx/nginx.conf | 5 + roles/docker/XControl/files/run.sh | 6 + roles/docker/XControl/tasks/main.yml | 76 + .../XControl/templates/config/account.yaml | 63 + .../XControl/templates/config/server.yaml | 54 + .../XControl/templates/docker-compose.yaml | 147 + .../templates/nginx/conf.d/accounts.conf | 40 + .../templates/nginx/conf.d/artifact.conf | 47 + .../nginx/conf.d/bootstrap-nginx.conf | 12 + .../templates/nginx/conf.d/default.conf | 35 + .../templates/nginx/conf.d/homepage.conf | 136 + .../templates/nginx/conf.d/rag-server.conf | 69 + roles/docker/clickhouse/README.md | 3 + roles/docker/clickhouse/tasks/main.yml | 5 + roles/docker/embedding-service/README.md | 3 + roles/docker/embedding-service/tasks/main.yml | 5 + roles/docker/grafana/README.md | 20 + roles/docker/grafana/defaults/main.yml | 9 + roles/docker/grafana/tasks/main.yml | 33 + .../grafana/templates/docker-compose.yaml.j2 | 17 + roles/docker/harbor/README.md | 99 + roles/docker/harbor/defaults/main.yml | 138 + roles/docker/harbor/tasks/main.yml | 37 + roles/docker/harbor/tasks/post-setup.yml | 19 + roles/docker/harbor/tasks/pre-setup.yml | 40 + .../templates/common/config/core/app.conf | 6 + .../harbor/templates/common/config/core/env | 47 + .../harbor/templates/common/config/db/env | 2 + .../common/config/jobservice/config.yml | 38 + .../templates/common/config/jobservice/env | 13 + .../common/config/log/logrotate.conf | 8 + .../common/config/log/rsyslog_docker.conf | 7 + .../templates/common/config/nginx/nginx.conf | 149 + .../templates/common/config/portal/nginx.conf | 42 + .../common/config/registry/config.yml | 49 + .../templates/common/config/registry/passwd | 1 + .../templates/common/config/registry/root.crt | 0 .../common/config/registryctl/config.yml | 5 + .../templates/common/config/registryctl/env | 2 + .../harbor/templates/docker-compose.yml.j2 | 195 + roles/docker/kafka/README.md | 3 + roles/docker/kafka/tasks/main.yml | 5 + roles/docker/keycloak/README.md | 113 + roles/docker/keycloak/defaults/main.yml | 36 + .../docker/keycloak/files/create_keystore.sh | 32 + roles/docker/keycloak/tasks/main.yml | 33 + roles/docker/keycloak/tasks/post-setup.yml | 31 + roles/docker/keycloak/tasks/pre-setup.yml | 22 + .../keycloak/templates/docker-compose.yml.j2 | 67 + roles/docker/keycloak/templates/nginx.conf.j2 | 70 + roles/docker/loki/README.md | 3 + roles/docker/loki/tasks/main.yml | 5 + roles/docker/minio/README.md | 3 + roles/docker/minio/tasks/main.yml | 5 + roles/docker/mlflow/README.md | 3 + roles/docker/mlflow/tasks/main.yml | 5 + roles/docker/neurapress/defaults/main.yml | 7 + .../docker/neurapress/files/nginx/nginx.conf | 51 + roles/docker/neurapress/files/run.sh | 6 + roles/docker/neurapress/tasks/main.yml | 69 + .../neurapress/templates/docker-compose.yaml | 68 + .../nginx/conf.d/bootstrap-nginx.conf | 11 + .../templates/nginx/conf.d/default.conf | 49 + roles/docker/otel/README.md | 5 + roles/docker/otel/tasks/main.yml | 5 + .../docker/otel/templates/docker-compose.yaml | 41 + roles/docker/postgres/README.md | 3 + roles/docker/postgres/tasks/main.yml | 5 + roles/docker/ray/README.md | 3 + roles/docker/ray/tasks/main.yml | 5 + roles/docker/redpanda/README.md | 3 + roles/docker/redpanda/tasks/main.yml | 5 + roles/docker/sglang/README.md | 3 + roles/docker/sglang/tasks/main.yml | 5 + roles/docker/trino/README.md | 3 + roles/docker/trino/tasks/main.yml | 5 + roles/docker/vllm/README.md | 3 + roles/docker/vllm/tasks/main.yml | 5 + roles/docker/zitadel/README.md | 28 + roles/docker/zitadel/defaults/main.yml | 6 + .../zitadel/files/certbot/conf/.gitkeep | 0 .../docker/zitadel/files/certbot/www/.gitkeep | 0 roles/docker/zitadel/files/nginx/nginx.conf | 5 + roles/docker/zitadel/files/run.sh | 6 + roles/docker/zitadel/tasks/main.yml | 105 + .../zitadel/templates/docker-compose.yaml | 187 + .../nginx/conf.d/bootstrap-nginx.conf | 11 + .../templates/nginx/conf.d/default.conf | 42 + roles/github/defaults/main.yml | 3 + roles/github/tasks/main.yml | 11 + roles/vhosts/OpenResty/defaults/main.yml | 3 + roles/vhosts/OpenResty/handlers/main.yml | 4 + roles/vhosts/OpenResty/meta/main.yml | 2 + roles/vhosts/OpenResty/tasks/geoip.yml | 23 + roles/vhosts/OpenResty/tasks/main.yml | 82 + .../OpenResty/templates/artifact.conf.j2 | 56 + .../OpenResty/templates/geo_redirect.conf.j2 | 45 + .../templates/homepage-static.conf.j2 | 128 + .../OpenResty/templates/homepage.conf.j2 | 78 + .../vhosts/OpenResty/templates/nginx.conf.j2 | 32 + .../vhosts/OpenResty/templates/panel.conf.j2 | 1 + roles/vhosts/Redis/meta/main.yml | 2 + roles/vhosts/Redis/tasks/main.yml | 11 + .../alerting/files/setup-observable-server.sh | 102 + roles/vhosts/alerting/meta/main.yml | 2 + roles/vhosts/alerting/tasks/main.yml | 17 + .../vhosts/alerting/templates/alerting_rules | 37 + .../vhosts/alerting/templates/recording_rules | 55 + .../alicloud_dns_record/defaults/main.yml | 17 + .../library/alicloud_dns_record.py | 167 + .../vhosts/alicloud_dns_record/tasks/main.yml | 18 + .../alicloud_dns_sync/defaults/main.yml | 7 + .../alicloud_dns_sync/files/dns_sync.py | 59 + .../vhosts/alicloud_dns_sync/tasks/main.yaml | 18 + .../templates/dns_records.yaml.j2 | 7 + roles/vhosts/alloy/defaults/main.yml | 23 + .../files/loki_journal_sources_gateway.yml | 5 + .../files/loki_journal_sources_k3s_agent.yml | 3 + .../files/loki_journal_sources_k3s_server.yml | 3 + .../files/loki_journal_sources_postgresql.yml | 3 + .../alloy/files/loki_journal_sources_vpn.yml | 3 + roles/vhosts/alloy/tasks/main.yml | 55 + roles/vhosts/alloy/templates/config.alloy.j2 | 19 + .../blackbox_exporter/defaults/main.yml | 15 + .../blackbox_exporter/handlers/main.yml | 9 + roles/vhosts/blackbox_exporter/meta/main.yml | 2 + roles/vhosts/blackbox_exporter/tasks/main.yml | 72 + .../templates/blackbox.service.j2 | 16 + .../templates/blackbox.yml.j2 | 15 + .../files/certs_automated_issuance.sh | 32 + .../files/fetch_certs_from_vault.py | 71 + .../cert-manager/files/get_certificate.sh | 35 + .../cert-manager/files/update-certs-secret.sh | 10 + roles/vhosts/cert-manager/meta/main.yml | 2 + roles/vhosts/cert-manager/tasks/main.yml | 7 + roles/vhosts/cert-manager/vars/main.yml | 2 + roles/vhosts/chasquid/handlers/main.yml | 19 + roles/vhosts/chasquid/tasks/main.yml | 166 + .../chasquid/templates/chasquid.conf.j2 | 16 + .../chasquid/templates/chasquid.service.j2 | 24 + roles/vhosts/common/defaults/main.yml | 51 +- roles/vhosts/common/handlers/main.yml | 4 + roles/vhosts/common/tasks/addons/gpu.yml | 22 + roles/vhosts/common/tasks/addons/s3fs.yml | 98 + roles/vhosts/common/tasks/common_debian.yml | 21 + roles/vhosts/common/tasks/common_redhat.yml | 8 + roles/vhosts/common/tasks/main.yml | 79 +- roles/vhosts/common/tasks/manage_keyring.yml | 85 + roles/vhosts/common/tasks/packages.yml | 48 + .../vhosts/common/tasks/packages_cleanup.yml | 16 + roles/vhosts/common/tasks/repo_setup.yml | 115 + roles/vhosts/common/templates/hostname.j2 | 1 + roles/vhosts/deepflow_agent/tasks/main.yml | 49 + .../templates/deepflow-agent.yaml.j2 | 7 + roles/vhosts/docker/defaults/main.yml | 7 + roles/vhosts/docker/meta/main.yml | 3 + roles/vhosts/docker/tasks/main.yml | 103 + roles/vhosts/dovecot/handlers/main.yml | 10 + roles/vhosts/dovecot/tasks/main.yml | 66 + .../vhosts/dovecot/templates/10-auth.conf.j2 | 3 + .../vhosts/dovecot/templates/10-mail.conf.j2 | 5 + .../dovecot/templates/10-master.conf.j2 | 24 + roles/vhosts/dovecot/templates/10-ssl.conf.j2 | 5 + .../vhosts/dovecot/templates/dovecot.conf.j2 | 3 + roles/vhosts/firewall/defaults/main.yml | 31 + roles/vhosts/firewall/handlers/main.yml | 2 + roles/vhosts/firewall/tasks/main.yml | 98 + .../gpu-k8s-reset/files/reset-gpu-k8s.sh | 17 + roles/vhosts/gpu-k8s-reset/tasks/main.yml | 3 + roles/vhosts/gpu-k8s/defaults/main.yml | 13 + .../gpu-k8s/files/get_labring_registry.sh | 12 + roles/vhosts/gpu-k8s/files/run_sealos.sh | 24 + .../vhosts/gpu-k8s/tasks/install_cluster.yml | 129 + roles/vhosts/gpu-k8s/tasks/install_driver.yml | 22 + roles/vhosts/gpu-k8s/tasks/main.yml | 8 + roles/vhosts/gpu-k8s/tasks/run_test.yml | 16 + roles/vhosts/grafana/defaults/main.yml | 13 + roles/vhosts/grafana/tasks/main.yml | 94 + .../grafana/templates/dashboards.yaml.j2 | 10 + roles/vhosts/grafana/templates/env.conf.j2 | 4 + .../templates/grafana-dash-pull.service.j2 | 7 + .../templates/grafana-dash-pull.timer.j2 | 10 + roles/vhosts/grafana/templates/grafana.ini.j2 | 22 + roles/vhosts/k3s-addon/files/setup-argocd.sh | 102 + .../k3s-addon/files/setup-dns-provider.sh | 36 + roles/vhosts/k3s-addon/files/setup-egress.sh | 24 + roles/vhosts/k3s-addon/files/setup-flagger.sh | 21 + roles/vhosts/k3s-addon/files/setup-fluxcd.sh | 46 + .../k3s-addon/files/setup-ingress-apisix.sh | 38 + roles/vhosts/k3s-addon/files/setup-ingress.sh | 145 + .../k3s-addon/files/setup-keda-operator.sh | 6 + .../files/setup-prometheus-operator.sh | 52 + roles/vhosts/k3s-addon/meta/main.yml | 2 + roles/vhosts/k3s-addon/tasks/main.yml | 15 + .../templates/ingress-apisix-dashboard.yaml | 33 + .../templates/ingress-apisix-values.yaml | 24 + .../kubernetes-discovery-config.yaml | 65 + .../kubernetes-discovery-serviceaccount.yaml | 40 + .../templates/kubernetes-discovery.yaml | 47 + .../k3s-cluster-agent/defaults/main.yml | 1 + .../k3s-cluster-agent/tasks/bootstrap.yml | 0 .../k3s-cluster-agent/tasks/destroy.yml | 0 roles/vhosts/k3s-cluster-agent/tasks/main.yml | 2 + .../k3s-cluster-agent/tasks/upgrade.yml | 0 .../templates/install_k3s_agent.sh.j2 | 3 + roles/vhosts/k3s-cluster-agent/vars/main.yml | 5 + .../k3s-cluster-server/defaults/main.yml | 1 + .../k3s-cluster-server/tasks/add-master.yml | 0 .../k3s-cluster-server/tasks/backup.yml | 0 .../k3s-cluster-server/tasks/bootstrap.yml | 0 .../k3s-cluster-server/tasks/destroy.yml | 0 .../vhosts/k3s-cluster-server/tasks/main.yml | 2 + .../k3s-cluster-server/tasks/recovery.yml | 0 .../k3s-cluster-server/tasks/upgrade.yml | 0 .../templates/install_k3s_server.sh.j2 | 3 + roles/vhosts/k3s-cluster-server/vars/main.yml | 17 + roles/vhosts/k3s-reset/files/reset-k3s.sh | 32 + roles/vhosts/k3s-reset/tasks/main.yml | 4 + roles/vhosts/k3s/files/setup-cni-cilium.sh | 19 + roles/vhosts/k3s/files/setup-cni-kubeovn.sh | 17 + roles/vhosts/k3s/files/setup-k3s.sh | 134 + roles/vhosts/k3s/meta/main.yml | 2 + roles/vhosts/k3s/tasks/main.yml | 11 + roles/vhosts/k3s/templates/cni_install.sh | 3657 +++++++++++++++++ roles/vhosts/k8s-node/defaults/main.yml | 3 + roles/vhosts/k8s-node/tasks/apt_setup.yml | 65 + roles/vhosts/k8s-node/tasks/containerd.yml | 33 + roles/vhosts/k8s-node/tasks/gpu.yml | 74 + roles/vhosts/k8s-node/tasks/main.yml | 23 + roles/vhosts/k8s-node/tasks/network.yml | 46 + roles/vhosts/k8s-node/tasks/packages.yml | 60 + roles/vhosts/k8s-node/tasks/reboot.yml | 33 + roles/vhosts/k8s-node/tasks/system_config.yml | 103 + roles/vhosts/k8s-node/tasks/user_setup.yml | 36 + .../files/display_network_info.sh | 10 + roles/vhosts/network_info/tasks/main.yml | 15 + roles/vhosts/nginx-proxy/defaults/main.yml | 4 + roles/vhosts/nginx-proxy/handlers/main.yml | 4 + roles/vhosts/nginx-proxy/tasks/main.yml | 28 + .../nginx-proxy/templates/nginx-proxy.conf.j2 | 45 + roles/vhosts/nginx/defaults/main.yml | 12 + roles/vhosts/nginx/handlers/main.yml | 4 + roles/vhosts/nginx/tasks/main.yml | 84 + roles/vhosts/nginx/templates/artifact.conf.j2 | 33 + .../nginx/templates/cn-homepage-https.conf.j2 | 35 + .../templates/cn-homepage-redirect.conf.j2 | 5 + roles/vhosts/nginx/templates/grafana.conf.j2 | 29 + roles/vhosts/nginx/templates/metrics.conf.j2 | 43 + roles/vhosts/node_exporter/meta/main.yml | 2 + roles/vhosts/node_exporter/tasks/main.yml | 78 + .../templates/node-exporter.service | 25 + roles/vhosts/nodejs/defaults/main.yml | 13 + roles/vhosts/nodejs/handlers/main.yml | 2 + roles/vhosts/nodejs/tasks/darwin.yml | 38 + roles/vhosts/nodejs/tasks/main.yml | 115 + roles/vhosts/nodejs/tasks/ubuntu.yml | 53 + .../vhosts/nodejs/templates/npm_global.sh.j2 | 3 + roles/vhosts/openobserve/defaults/main.yml | 8 + roles/vhosts/openobserve/tasks/main.yml | 21 + .../templates/openobserve.service.j2 | 25 + roles/vhosts/otel-collector/meta/main.yml | 2 + roles/vhosts/otel-collector/tasks/main.yml | 114 + .../templates/otel-collector.service | 13 + .../otel-collector/templates/otel-config.yaml | 74 + roles/vhosts/postgres/handlers/main.yml | 5 + roles/vhosts/postgres/meta/main.yml | 2 + roles/vhosts/postgres/tasks/main.yml | 120 + roles/vhosts/process_exporter/meta/main.yml | 2 + roles/vhosts/process_exporter/tasks/main.yml | 71 + .../templates/process-exporter.service | 18 + .../templates/process-exporter.yml | 5 + .../vhosts/prometheus-transfer/meta/main.yml | 2 + .../vhosts/prometheus-transfer/tasks/main.yml | 21 + .../templates/prometheus-transfer.service | 16 + .../templates/prometheus-transfer.yml | 9 + .../start-prometheus-transfer-service.sh | 2 + .../stop-prometheus-transfer-service.sh | 2 + roles/vhosts/prometheus/defaults/main.yml | 15 + roles/vhosts/prometheus/tasks/main.yml | 116 + .../vhosts/prometheus/templates/nodes.json.j2 | 3 + .../templates/prometheus.service.j2 | 19 + .../prometheus/templates/prometheus.yml.j2 | 23 + roles/vhosts/promtail-agent/meta/main.yml | 2 + roles/vhosts/promtail-agent/tasks/main.yml | 19 + .../templates/promtail-agent.service | 15 + .../promtail-agent/templates/promtail.yaml | 38 + roles/vhosts/sealos-k8s/defaults/main.yml | 1 + roles/vhosts/sealos-k8s/tasks/main.yml | 115 + .../vhosts/sealos-k8s/templates/hosts.toml.j2 | 4 + .../sealos-k8s/templates/resolved.conf.j2 | 4 + roles/vhosts/secret-manger/tasks/main.yml | 48 + roles/vhosts/ssh-trust/defaults/main.yml | 2 + roles/vhosts/ssh-trust/tasks/main.yml | 23 + roles/vhosts/telegraf/handlers/main.yml | 5 + roles/vhosts/telegraf/meta/main.yml | 2 + roles/vhosts/telegraf/tasks/main.yml | 49 + roles/vhosts/telegraf/templates/telegraf.conf | 32 + roles/vhosts/vault/files/setup.sh | 34 + roles/vhosts/vault/meta/main.yml | 2 + roles/vhosts/vault/readme.md | 4 + roles/vhosts/vault/tasks/main.yml | 4 + roles/vhosts/vault/vars/main.yml | 7 + .../files/enable_ip_forward.sh | 5 + roles/vhosts/wireguard-client/tasks/main.yml | 17 + .../wireguard-client/templates/server.conf | 15 + .../files/enable_ip_forward.sh | 5 + .../wireguard-gateway/handlers/main.yml | 4 + roles/vhosts/wireguard-gateway/meta/main.yml | 2 + roles/vhosts/wireguard-gateway/tasks/main.yml | 45 + .../wireguard-gateway/templates/wg0.conf.j2 | 27 + .../vhosts/xcontrol_server/defaults/main.yml | 9 + roles/vhosts/xcontrol_server/tasks/main.yml | 32 + .../templates/server-qwen-ai.yaml.j2 | 55 + .../templates/xcontrol-server.service.j2 | 15 + roles/vhosts/zot/defaults/main.yml | 33 + roles/vhosts/zot/tasks/main.yml | 102 + roles/vhosts/zot/templates/config.json.j2 | 45 + roles/vhosts/zot/templates/zot.service.j2 | 17 + scripts/Fetch_packages_depends.sh | 58 + scripts/Jenkinsfile | 27 + scripts/ansible_playbook_hosts_setup.sh | 40 + scripts/argo_application-demo.yaml | 22 + scripts/argocd_all_in_one.sh | 45 + scripts/artifact/setup-harbor.sh | 66 + scripts/backup_docker_registry_secret.sh | 21 + scripts/check_docker_registry_secret.sh | 33 + scripts/deepflow-agent-batch-tools-v1.0.sh | 265 ++ .../backup_images_v6.3-20250309-17.json | 435 ++ scripts/deepflow/check_k8s_node_config.sh | 215 + scripts/deepflow/clean-failed-pods.sh | 14 + .../deepflow-server-master-controller-pre.sh | 20 + .../deepflow-server-slave-controller-pre.sh | 20 + scripts/deepflow/deepflow_k8s_backup.sh | 161 + scripts/deepflow/deploy-k8s.sh | 22 + scripts/deepflow/df-web-ai-push-all.sh | 83 + scripts/deepflow/images.txt | 6 + scripts/deepflow/pull-all-v6.4.sh | 40 + .../pull_save_scp_image_multi_arch.sh | 181 + scripts/deepflow/setup-agent-all-in-one.sh | 194 + scripts/deepflow/setup-deepflow-agent.sh | 13 + scripts/deploy-open-webui.sh | 1 + scripts/deploy_deepflow_agent.sh | 368 ++ scripts/dynamic_inventory.py | 69 + scripts/fluxcd_all_in_one.sh | 30 + scripts/gather_network_info.yml | 12 + scripts/generate_ssl.sh | 79 + scripts/get-standalone-cert.sh | 38 + .../global-monitor/agent-group-config.yaml | 3 + scripts/global-monitor/config/containerd.toml | 34 + .../config/deepflow-registry.yaml | 23 + scripts/global-monitor/config/nginx.conf | 19 + scripts/global-monitor/config/registry.yaml | 23 + scripts/global-monitor/custom-domain.yaml | 5 + .../deepflow-registry/all.tag.list | 51 + .../deepflow-registry/compose.yaml | 12 + .../deepflow-registry/push_images.sh | 33 + .../deepflow-registry/setup-nerdctl.sh | 17 + .../deepflow-registry/setup-registry.sh | 12 + .../deepflow-registry/show_images.sh | 39 + .../deepflow-sever-values-v6.3.yaml | 117 + .../setup-agent-group-config.sh | 8 + scripts/global-monitor/setup-coroot.sh | 11 + .../setup-deepflow-Host-Domain-Group.sh | 5 + .../setup-deepflow-Host-Domain.sh | 8 + .../setup-deepflow-add-domain.sh | 13 + .../setup-deepflow-server-ee-all-in-one.sh | 36 + scripts/global-monitor/setup-deepflow.sh | 27 + scripts/global-monitor/setup-ingress.sh | 56 + .../global-monitor/setup-kubesphere-core.sh | 1 + scripts/global-monitor/setup-signoz.sh | 10 + scripts/gpu-k8s.sh | 28 + scripts/ingress-installer.sh | 106 + .../ingress/deepflow-front-end-ingress.yaml | 28 + scripts/init-remote-xray.sh | 32 + scripts/init-update-server.sh | 134 + scripts/init_ansible_role.sh | 99 + scripts/init_linux_user.sh | 40 + scripts/install-single-gpu-k8s.sh | 199 + .../iptables_whitelist_enforce_final_fixed.sh | 120 + scripts/k3s-cluster/.gitignore | 13 + scripts/k3s-cluster/check-cilium-egress.sh | 31 + .../k3s-cluster/check_cilium_requirements.sh | 140 + scripts/k3s-cluster/cilium-cli.sh | 9 + scripts/k3s-cluster/cilium-fixed.sh | 37 + .../deploy_velero_with_chart_values_yaml.sh | 85 + scripts/k3s-cluster/egress-nat-test.yaml | 46 + .../init_k3s_cluster_agent_role.sh | 46 + .../init_k3s_cluster_server_role.sh | 69 + scripts/k3s-cluster/k3s.service | 37 + scripts/k3s-cluster/k3s.service-without-cni | 38 + scripts/k3s-cluster/k8s_backup_config.yaml | 25 + scripts/k3s-cluster/k8s_backup_tool.sh | 391 ++ scripts/k3s-cluster/k8s_backup_tool_howto.md | 119 + scripts/k3s-cluster/k8s_restore_all.sh | 21 + scripts/k3s-cluster/set-node-label.sh | 6 + scripts/k3s-cluster/setup-cilium-cni.sh | 79 + scripts/k3s-cluster/setup-cilium-helm.sh | 28 + scripts/k3s-cluster/setup-egress-gateway.sh | 77 + scripts/k3s-cluster/setup-k3s-agent.sh | 72 + .../k3s-cluster/setup-k3s-cluster-agent.sh | 72 + .../k3s-cluster/setup-k3s-cluster-with-br0.sh | 42 + scripts/k3s-cluster/setup-k3s-cluster.md | 38 + scripts/k3s-cluster/setup-k3s-cluster.sh | 284 ++ scripts/k3s-cluster/setup-k3s-with-gitops.sh | 206 + scripts/k3s-cluster/setup-k3s-with-ingress.sh | 226 + scripts/k3s-cluster/setup-nginx-ingress.sh | 81 + scripts/k3s_all_in_one.sh | 93 + .../GatewayAPI-deepflow-example.yaml | 44 + scripts/kong-gateway/GatewayAPI-example.yaml | 81 + .../kong-gateway/GatewayAPI-http-example.yaml | 81 + scripts/kong-gateway/deploy-kong-gateway.sh | 77 + scripts/list_cluster_namespace_map.sh | 6 + scripts/make_k3s_offline_package.sh | 471 +++ scripts/merge_csv.py | 30 + scripts/merge_vars.py | 91 + scripts/netcheck.sh | 62 + scripts/network-config/ubuntu/init-wsl.sh | 40 + scripts/network-config/ubuntu/readme.md | 10 + scripts/network-config/windows/readme.md | 1 + .../windows/start-wsl-bridge.ps1 | 80 + .../pipeline-library/vars/ansibleSteps.groovy | 74 + scripts/pulp-installer.sh | 120 + scripts/pulp-operator-repo-gateway.yaml | 40 + scripts/pulp-operator-repo.yaml | 43 + scripts/pulp-operator.sh | 5 + scripts/registry/.gitignore | 17 + scripts/registry/all.tag.list | 47 + scripts/registry/clean_unlabeled_images.sh | 12 + scripts/registry/push_images.sh | 39 + scripts/registry/setup-nerdctl.sh | 17 + scripts/registry/setup-registry.sh | 260 ++ scripts/registry/show_images.sh | 37 + ...new_docker_registry_secret_with_kubectl.sh | 30 + scripts/rewrite-cover-history.sh | 30 + scripts/rollout_docker_registry_secret.sh | 21 + scripts/secret/README.md | 45 + scripts/secret/hcp/__init__.py | 1 + scripts/secret/hcp/secret.py | 55 + scripts/secret/setup.py | 17 + scripts/secret/tests/__init__.py | 0 scripts/secret/tests/test_secret.py | 29 + scripts/setup-gitea.sh | 83 + scripts/setup-grafana-agent.sh | 74 + scripts/setup-microservice-demo.sh | 11 + scripts/setup-open-telemetry-demo.sh | 116 + scripts/setup-vector.sh | 101 + scripts/sing-box/README.md | 95 + .../sing-box/client-gvisor-tun-reality.json | 72 + scripts/sing-box/client-gvisor-tun-vless.json | 69 + scripts/sing-box/create-reality-keypair.sh | 1 + scripts/sing-box/install-singbox.sh | 136 + scripts/sing-box/server-VLESS-TCP-XTLS.json | 34 + scripts/sing-box/server-reality.json | 38 + scripts/update-server/Chart.yaml | 4 + .../update-server/templates/configmap.yaml | 18 + .../update-server/templates/deployment.yaml | 35 + scripts/update-server/templates/route.yaml | 21 + scripts/update-server/templates/service.yaml | 10 + scripts/update-server/values.yaml | 10 + scripts/verify_rew_docker_registry_secret.sh | 63 + scripts/website-homepage/Chart.yaml | 4 + .../website-homepage/templates/configmap.yaml | 29 + .../templates/deployment.yaml | 35 + scripts/website-homepage/templates/route.yaml | 21 + .../website-homepage/templates/service.yaml | 10 + scripts/website-homepage/values.yaml | 10 + setup-docker.yml | 5 + vars/dns_records_svc_plus.yaml | 15 + wireguard_ali_vpn_gw | 29 + wireguard_client | 24 + wireguard_gateway | 7 + 688 files changed, 27226 insertions(+), 32 deletions(-) create mode 100644 alicloud_dns_record.yml create mode 100644 alicloud_dns_sync.yml create mode 100644 apply-branch-protection.yml create mode 100644 common create mode 100644 deepflow/deepflow-agent-playbook/Readme.md create mode 100644 deepflow/deepflow-agent-playbook/inventory/js2_hosts.ini create mode 100644 deepflow/deepflow-agent-playbook/playbook.yml create mode 100644 deepflow/deepflow-agent-playbook/roles/deepflow_upgrade/tasks/main.yml create mode 100644 deploy-docker-harbor.yml create mode 100644 deploy-docker-keycloak.yml create mode 100644 deploy_OpenObserve_docker.yaml create mode 100644 deploy_Tempo_docker.yaml create mode 100644 deploy_VictoriaLogs_docker.yaml create mode 100644 deploy_VictoriaMetrics_docker.yaml create mode 100644 deploy_blackbox_exporters_vhosts.yml create mode 100644 deploy_deepflow_agent create mode 100644 deploy_exporters_vhosts.yml create mode 100644 deploy_grafana_docker.yaml create mode 100644 deploy_monitor_server.yml create mode 100644 deploy_neurapress_docker.yaml create mode 100644 deploy_nginx_vhosts.yml create mode 100644 deploy_nodejs_vhosts.yml create mode 100644 deploy_openresty_vhosts.yml create mode 100644 deploy_otel_docker.yaml create mode 100644 deploy_postgre_vhosts.yml create mode 100644 deploy_postgres_vhosts.yml create mode 100644 deploy_redis_vhosts.yml create mode 100644 deploy_tiny_monitor_server_vhost.yml create mode 100644 deploy_vhosts_otel-collector.yml create mode 100644 deploy_xcontrol_server._vhosts.yml create mode 100644 deploy_xcontrol_web.yml create mode 100644 deploy_zitadel_docker.yaml create mode 100644 docs/alicloud_dns_sync.md create mode 100644 gpu_k8s_init.yml create mode 100644 gpu_k8s_reset.yml create mode 100644 init-harbor-server create mode 100644 init_chaos_mesh create mode 100644 init_chartmuseum create mode 100644 init_deepflow create mode 100644 init_flagger-loadtester create mode 100644 init_gitlab create mode 100644 init_grafana_alloy create mode 100644 init_harbor_server create mode 100644 init_jenkins create mode 100644 init_k3s_cluster_agent create mode 100644 init_k3s_cluster_server create mode 100644 init_k3s_cluster_std create mode 100644 init_k3s_cluster_with_argo_server create mode 100644 init_observability-agent create mode 100644 init_observability-server create mode 100644 init_openldap create mode 100644 init_splunk-otel-collector create mode 100644 init_telegraf create mode 100644 init_vault create mode 100755 init_vpn_gateway.yml create mode 100644 inventory.ini create mode 100644 keycloak_server create mode 100644 pre_setup.sh create mode 100644 renew_nodes_ssl_certs create mode 100644 roles/README.md create mode 100644 roles/charts/app/meta/main.yml create mode 100755 roles/charts/app/tasks/main.yml create mode 100644 roles/charts/app/templates/.gitignore create mode 100644 roles/charts/app/templates/deploy-app.yaml create mode 100644 roles/charts/argo-server/files/setup-argocd.sh create mode 100644 roles/charts/argo-server/meta/main.yml create mode 100755 roles/charts/argo-server/tasks/main.yml create mode 100644 roles/charts/chaos-mesh/files/setup.sh create mode 100644 roles/charts/chaos-mesh/howto.md create mode 100644 roles/charts/chaos-mesh/meta/main.yml create mode 100755 roles/charts/chaos-mesh/tasks/main.yml create mode 100644 roles/charts/chartmuseum/files/setup.sh create mode 100644 roles/charts/chartmuseum/meta/main.yml create mode 100755 roles/charts/chartmuseum/tasks/main.yml create mode 100644 roles/charts/chartmuseum/vars/main.yml create mode 100644 roles/charts/clickhouse/meta/main.yml create mode 100755 roles/charts/clickhouse/tasks/main.yml create mode 100644 roles/charts/clickhouse/templates/.gitignore create mode 100644 roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-config.yaml create mode 100644 roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-ingress.yaml create mode 100644 roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-service.yaml create mode 100644 roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-statefulset.yml create mode 100644 roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-user-config.yaml create mode 100644 roles/charts/clickhouse/templates/otel-collector/configmap.yaml create mode 100644 roles/charts/clickhouse/templates/otel-collector/deployment.yaml create mode 100644 roles/charts/clickhouse/templates/otel-collector/ingress.yaml create mode 100644 roles/charts/clickhouse/templates/otel-collector/service.yaml create mode 100755 roles/charts/clickhouse/templates/postsetup.sh create mode 100644 roles/charts/clickhouse/templates/qryn/qryn-deployment.yaml create mode 100644 roles/charts/clickhouse/templates/qryn/qryn-ingress.yaml create mode 100644 roles/charts/clickhouse/templates/qryn/qryn-service.yaml create mode 100644 roles/charts/deepflow/Readme.md create mode 100644 roles/charts/deepflow/files/post-setup.sh create mode 100644 roles/charts/deepflow/files/pre-setup.sh create mode 100644 roles/charts/deepflow/files/setup.sh create mode 100644 roles/charts/deepflow/meta/main.yml create mode 100755 roles/charts/deepflow/tasks/main.yml create mode 100644 roles/charts/embedding-service/README.md create mode 100644 roles/charts/embedding-service/tasks/main.yml create mode 100644 roles/charts/feast/README.md create mode 100644 roles/charts/feast/tasks/main.yml create mode 100644 roles/charts/flagger-loadtester/files/setup.sh create mode 100644 roles/charts/flagger-loadtester/meta/main.yml create mode 100755 roles/charts/flagger-loadtester/tasks/main.yml create mode 100644 roles/charts/flink-operator/README.md create mode 100644 roles/charts/flink-operator/tasks/main.yml create mode 100755 roles/charts/gitlab/files/post-setup.sh create mode 100755 roles/charts/gitlab/files/pre-setup.sh create mode 100644 roles/charts/gitlab/files/setup-with-oidc.sh create mode 100644 roles/charts/gitlab/files/setup-with_aws-s3.sh create mode 100644 roles/charts/gitlab/files/setup.sh create mode 100644 roles/charts/gitlab/meta/main.yml create mode 100755 roles/charts/gitlab/tasks/main.yml create mode 100644 roles/charts/gitlab/templates/gitlab-backup-cfg create mode 100644 roles/charts/gitlab/templates/provider.yaml create mode 100755 roles/charts/gpu-operator/files/setup.sh create mode 100644 roles/charts/gpu-operator/tasks/main.yml create mode 100644 roles/charts/grafana/README.md create mode 100644 roles/charts/grafana/tasks/main.yml create mode 100644 roles/charts/harbor/files/post-setup.sh create mode 100644 roles/charts/harbor/files/pre-setup.sh create mode 100644 roles/charts/harbor/files/setup-bitnami-harbor.sh create mode 100644 roles/charts/harbor/files/setup-office-harbor.sh create mode 100644 roles/charts/harbor/meta/main.yml create mode 100755 roles/charts/harbor/tasks/main.yml create mode 100644 roles/charts/harbor/templates/harbor-oidc-config.json create mode 100644 roles/charts/harbor/vars/main.yml create mode 100644 roles/charts/helm-repos/tasks/main.yml create mode 100644 roles/charts/iceberg-bucket/README.md create mode 100644 roles/charts/iceberg-bucket/tasks/main.yml create mode 100644 roles/charts/inference-gateway/README.md create mode 100644 roles/charts/inference-gateway/tasks/main.yml create mode 100644 roles/charts/jenkins/files/pre-setup.sh create mode 100644 roles/charts/jenkins/files/setup.sh create mode 100644 roles/charts/jenkins/howto.md create mode 100644 roles/charts/jenkins/meta/main.yml create mode 100755 roles/charts/jenkins/tasks/main.yml create mode 100644 roles/charts/kafka-cluster/README.md create mode 100644 roles/charts/kafka-cluster/tasks/main.yml create mode 100644 roles/charts/keycloak/files/pre-setup.sh create mode 100644 roles/charts/keycloak/files/setup-keycloak.sh create mode 100644 roles/charts/keycloak/meta/main.yml create mode 100644 roles/charts/keycloak/readme.md create mode 100755 roles/charts/keycloak/tasks/main.yml create mode 100644 roles/charts/keycloak/templates/aws-gloabl-oidc-broker.yaml create mode 100644 roles/charts/keycloak/vars/main.yml create mode 100755 roles/charts/kubernetes-dashboard/files/setup.sh create mode 100644 roles/charts/kubernetes-dashboard/tasks/main.yml create mode 100644 roles/charts/loki/README.md create mode 100644 roles/charts/loki/tasks/main.yml create mode 100755 roles/charts/metrics-server/files/setup.sh create mode 100644 roles/charts/metrics-server/tasks/main.yml create mode 100644 roles/charts/minio/README.md create mode 100644 roles/charts/minio/tasks/main.yml create mode 100644 roles/charts/mlflow/README.md create mode 100644 roles/charts/mlflow/tasks/main.yml create mode 100644 roles/charts/mysql/files/setup-mysql.sh create mode 100755 roles/charts/mysql/tasks/main.yml create mode 100755 roles/charts/node-exporter/tasks/main.yml create mode 100755 roles/charts/node-exporter/templates/node-exporter.service create mode 100644 roles/charts/nvidia-operator/README.md create mode 100644 roles/charts/nvidia-operator/tasks/main.yml create mode 100644 roles/charts/observability-agent/files/setup.sh create mode 100644 roles/charts/observability-agent/meta/main.yml create mode 100755 roles/charts/observability-agent/tasks/main.yml create mode 100644 roles/charts/observability-server/files/mysql-db-init-setup.sh create mode 100644 roles/charts/observability-server/files/setup-observable-server.sh create mode 100644 roles/charts/observability-server/meta/main.yml create mode 100755 roles/charts/observability-server/tasks/main.yml create mode 100644 roles/charts/openldap/files/setup-openldap.sh create mode 100644 roles/charts/openldap/meta/main.yml create mode 100755 roles/charts/openldap/tasks/main.yml create mode 100644 roles/charts/openldap/templates/.gitignore create mode 100644 roles/charts/openldap/templates/ingress.yaml create mode 100644 roles/charts/openobserve/README.md create mode 100644 roles/charts/openobserve/tasks/main.yml create mode 100644 roles/charts/postgres/README.md create mode 100644 roles/charts/postgres/tasks/main.yml create mode 100644 roles/charts/postgresql/files/post-setup.sh create mode 100644 roles/charts/postgresql/files/setup-postgresql.sh create mode 100755 roles/charts/postgresql/tasks/main.yml create mode 100644 roles/charts/prometheus-stack/README.md create mode 100644 roles/charts/prometheus-stack/tasks/main.yml create mode 100755 roles/charts/prometheus/files/setup.sh create mode 100644 roles/charts/prometheus/tasks/main.yml create mode 100644 roles/charts/ray-cluster/README.md create mode 100644 roles/charts/ray-cluster/tasks/main.yml create mode 100644 roles/charts/redis/files/setup-redis.sh create mode 100755 roles/charts/redis/tasks/main.yml create mode 100644 roles/charts/redpanda/README.md create mode 100644 roles/charts/redpanda/tasks/main.yml create mode 100644 roles/charts/sglang/README.md create mode 100644 roles/charts/sglang/tasks/main.yml create mode 100644 roles/charts/spark-operator/README.md create mode 100644 roles/charts/spark-operator/tasks/main.yml create mode 100644 roles/charts/splunk-otel-collector/files/setup.sh create mode 100755 roles/charts/splunk-otel-collector/tasks/main.yml create mode 100644 roles/charts/tempo/README.md create mode 100644 roles/charts/tempo/tasks/main.yml create mode 100644 roles/charts/trino/README.md create mode 100644 roles/charts/trino/tasks/main.yml create mode 100644 roles/charts/vllm/README.md create mode 100644 roles/charts/vllm/tasks/main.yml create mode 100644 roles/docker/OpenObserve/README.md create mode 100644 roles/docker/OpenObserve/tasks/main.yml create mode 100644 roles/docker/OpenObserve/templates/docker-compose.yaml create mode 100644 roles/docker/Tempo/README.md create mode 100644 roles/docker/Tempo/tasks/main.yml create mode 100644 roles/docker/Tempo/templates/docker-compose.yaml create mode 100644 roles/docker/VictoriaLogs/README.md create mode 100644 roles/docker/VictoriaLogs/tasks/main.yml create mode 100644 roles/docker/VictoriaLogs/templates/docker-compose.yaml create mode 100644 roles/docker/VictoriaMetrics/README.md create mode 100644 roles/docker/VictoriaMetrics/tasks/main.yml create mode 100644 roles/docker/VictoriaMetrics/templates/docker-compose.yaml create mode 100644 roles/docker/XControl/README.md create mode 100644 roles/docker/XControl/defaults/main.yml create mode 100644 roles/docker/XControl/files/nginx/nginx.conf create mode 100644 roles/docker/XControl/files/run.sh create mode 100644 roles/docker/XControl/tasks/main.yml create mode 100644 roles/docker/XControl/templates/config/account.yaml create mode 100644 roles/docker/XControl/templates/config/server.yaml create mode 100644 roles/docker/XControl/templates/docker-compose.yaml create mode 100644 roles/docker/XControl/templates/nginx/conf.d/accounts.conf create mode 100644 roles/docker/XControl/templates/nginx/conf.d/artifact.conf create mode 100644 roles/docker/XControl/templates/nginx/conf.d/bootstrap-nginx.conf create mode 100644 roles/docker/XControl/templates/nginx/conf.d/default.conf create mode 100644 roles/docker/XControl/templates/nginx/conf.d/homepage.conf create mode 100644 roles/docker/XControl/templates/nginx/conf.d/rag-server.conf create mode 100644 roles/docker/clickhouse/README.md create mode 100644 roles/docker/clickhouse/tasks/main.yml create mode 100644 roles/docker/embedding-service/README.md create mode 100644 roles/docker/embedding-service/tasks/main.yml create mode 100644 roles/docker/grafana/README.md create mode 100644 roles/docker/grafana/defaults/main.yml create mode 100644 roles/docker/grafana/tasks/main.yml create mode 100644 roles/docker/grafana/templates/docker-compose.yaml.j2 create mode 100644 roles/docker/harbor/README.md create mode 100644 roles/docker/harbor/defaults/main.yml create mode 100644 roles/docker/harbor/tasks/main.yml create mode 100644 roles/docker/harbor/tasks/post-setup.yml create mode 100644 roles/docker/harbor/tasks/pre-setup.yml create mode 100644 roles/docker/harbor/templates/common/config/core/app.conf create mode 100644 roles/docker/harbor/templates/common/config/core/env create mode 100644 roles/docker/harbor/templates/common/config/db/env create mode 100644 roles/docker/harbor/templates/common/config/jobservice/config.yml create mode 100644 roles/docker/harbor/templates/common/config/jobservice/env create mode 100644 roles/docker/harbor/templates/common/config/log/logrotate.conf create mode 100644 roles/docker/harbor/templates/common/config/log/rsyslog_docker.conf create mode 100644 roles/docker/harbor/templates/common/config/nginx/nginx.conf create mode 100644 roles/docker/harbor/templates/common/config/portal/nginx.conf create mode 100644 roles/docker/harbor/templates/common/config/registry/config.yml create mode 100644 roles/docker/harbor/templates/common/config/registry/passwd create mode 100755 roles/docker/harbor/templates/common/config/registry/root.crt create mode 100644 roles/docker/harbor/templates/common/config/registryctl/config.yml create mode 100644 roles/docker/harbor/templates/common/config/registryctl/env create mode 100644 roles/docker/harbor/templates/docker-compose.yml.j2 create mode 100644 roles/docker/kafka/README.md create mode 100644 roles/docker/kafka/tasks/main.yml create mode 100644 roles/docker/keycloak/README.md create mode 100644 roles/docker/keycloak/defaults/main.yml create mode 100644 roles/docker/keycloak/files/create_keystore.sh create mode 100644 roles/docker/keycloak/tasks/main.yml create mode 100644 roles/docker/keycloak/tasks/post-setup.yml create mode 100644 roles/docker/keycloak/tasks/pre-setup.yml create mode 100644 roles/docker/keycloak/templates/docker-compose.yml.j2 create mode 100644 roles/docker/keycloak/templates/nginx.conf.j2 create mode 100644 roles/docker/loki/README.md create mode 100644 roles/docker/loki/tasks/main.yml create mode 100644 roles/docker/minio/README.md create mode 100644 roles/docker/minio/tasks/main.yml create mode 100644 roles/docker/mlflow/README.md create mode 100644 roles/docker/mlflow/tasks/main.yml create mode 100644 roles/docker/neurapress/defaults/main.yml create mode 100644 roles/docker/neurapress/files/nginx/nginx.conf create mode 100644 roles/docker/neurapress/files/run.sh create mode 100644 roles/docker/neurapress/tasks/main.yml create mode 100644 roles/docker/neurapress/templates/docker-compose.yaml create mode 100644 roles/docker/neurapress/templates/nginx/conf.d/bootstrap-nginx.conf create mode 100644 roles/docker/neurapress/templates/nginx/conf.d/default.conf create mode 100644 roles/docker/otel/README.md create mode 100644 roles/docker/otel/tasks/main.yml create mode 100644 roles/docker/otel/templates/docker-compose.yaml create mode 100644 roles/docker/postgres/README.md create mode 100644 roles/docker/postgres/tasks/main.yml create mode 100644 roles/docker/ray/README.md create mode 100644 roles/docker/ray/tasks/main.yml create mode 100644 roles/docker/redpanda/README.md create mode 100644 roles/docker/redpanda/tasks/main.yml create mode 100644 roles/docker/sglang/README.md create mode 100644 roles/docker/sglang/tasks/main.yml create mode 100644 roles/docker/trino/README.md create mode 100644 roles/docker/trino/tasks/main.yml create mode 100644 roles/docker/vllm/README.md create mode 100644 roles/docker/vllm/tasks/main.yml create mode 100644 roles/docker/zitadel/README.md create mode 100644 roles/docker/zitadel/defaults/main.yml create mode 100644 roles/docker/zitadel/files/certbot/conf/.gitkeep create mode 100644 roles/docker/zitadel/files/certbot/www/.gitkeep create mode 100644 roles/docker/zitadel/files/nginx/nginx.conf create mode 100644 roles/docker/zitadel/files/run.sh create mode 100644 roles/docker/zitadel/tasks/main.yml create mode 100644 roles/docker/zitadel/templates/docker-compose.yaml create mode 100644 roles/docker/zitadel/templates/nginx/conf.d/bootstrap-nginx.conf create mode 100644 roles/docker/zitadel/templates/nginx/conf.d/default.conf create mode 100644 roles/github/defaults/main.yml create mode 100644 roles/github/tasks/main.yml create mode 100644 roles/vhosts/OpenResty/defaults/main.yml create mode 100644 roles/vhosts/OpenResty/handlers/main.yml create mode 100644 roles/vhosts/OpenResty/meta/main.yml create mode 100644 roles/vhosts/OpenResty/tasks/geoip.yml create mode 100644 roles/vhosts/OpenResty/tasks/main.yml create mode 100644 roles/vhosts/OpenResty/templates/artifact.conf.j2 create mode 100644 roles/vhosts/OpenResty/templates/geo_redirect.conf.j2 create mode 100644 roles/vhosts/OpenResty/templates/homepage-static.conf.j2 create mode 100644 roles/vhosts/OpenResty/templates/homepage.conf.j2 create mode 100644 roles/vhosts/OpenResty/templates/nginx.conf.j2 create mode 100644 roles/vhosts/OpenResty/templates/panel.conf.j2 create mode 100644 roles/vhosts/Redis/meta/main.yml create mode 100644 roles/vhosts/Redis/tasks/main.yml create mode 100644 roles/vhosts/alerting/files/setup-observable-server.sh create mode 100644 roles/vhosts/alerting/meta/main.yml create mode 100755 roles/vhosts/alerting/tasks/main.yml create mode 100644 roles/vhosts/alerting/templates/alerting_rules create mode 100644 roles/vhosts/alerting/templates/recording_rules create mode 100644 roles/vhosts/alicloud_dns_record/defaults/main.yml create mode 100644 roles/vhosts/alicloud_dns_record/library/alicloud_dns_record.py create mode 100644 roles/vhosts/alicloud_dns_record/tasks/main.yml create mode 100644 roles/vhosts/alicloud_dns_sync/defaults/main.yml create mode 100644 roles/vhosts/alicloud_dns_sync/files/dns_sync.py create mode 100644 roles/vhosts/alicloud_dns_sync/tasks/main.yaml create mode 100644 roles/vhosts/alicloud_dns_sync/templates/dns_records.yaml.j2 create mode 100644 roles/vhosts/alloy/defaults/main.yml create mode 100644 roles/vhosts/alloy/files/loki_journal_sources_gateway.yml create mode 100644 roles/vhosts/alloy/files/loki_journal_sources_k3s_agent.yml create mode 100644 roles/vhosts/alloy/files/loki_journal_sources_k3s_server.yml create mode 100644 roles/vhosts/alloy/files/loki_journal_sources_postgresql.yml create mode 100644 roles/vhosts/alloy/files/loki_journal_sources_vpn.yml create mode 100644 roles/vhosts/alloy/tasks/main.yml create mode 100644 roles/vhosts/alloy/templates/config.alloy.j2 create mode 100644 roles/vhosts/blackbox_exporter/defaults/main.yml create mode 100644 roles/vhosts/blackbox_exporter/handlers/main.yml create mode 100644 roles/vhosts/blackbox_exporter/meta/main.yml create mode 100644 roles/vhosts/blackbox_exporter/tasks/main.yml create mode 100644 roles/vhosts/blackbox_exporter/templates/blackbox.service.j2 create mode 100644 roles/vhosts/blackbox_exporter/templates/blackbox.yml.j2 create mode 100644 roles/vhosts/cert-manager/files/certs_automated_issuance.sh create mode 100644 roles/vhosts/cert-manager/files/fetch_certs_from_vault.py create mode 100644 roles/vhosts/cert-manager/files/get_certificate.sh create mode 100644 roles/vhosts/cert-manager/files/update-certs-secret.sh create mode 100644 roles/vhosts/cert-manager/meta/main.yml create mode 100755 roles/vhosts/cert-manager/tasks/main.yml create mode 100644 roles/vhosts/cert-manager/vars/main.yml create mode 100644 roles/vhosts/chasquid/handlers/main.yml create mode 100644 roles/vhosts/chasquid/tasks/main.yml create mode 100644 roles/vhosts/chasquid/templates/chasquid.conf.j2 create mode 100644 roles/vhosts/chasquid/templates/chasquid.service.j2 create mode 100644 roles/vhosts/common/tasks/addons/gpu.yml create mode 100644 roles/vhosts/common/tasks/addons/s3fs.yml create mode 100644 roles/vhosts/common/tasks/common_debian.yml create mode 100644 roles/vhosts/common/tasks/common_redhat.yml create mode 100644 roles/vhosts/common/tasks/manage_keyring.yml create mode 100644 roles/vhosts/common/tasks/packages.yml create mode 100644 roles/vhosts/common/tasks/packages_cleanup.yml create mode 100644 roles/vhosts/common/tasks/repo_setup.yml create mode 100755 roles/vhosts/common/templates/hostname.j2 create mode 100644 roles/vhosts/deepflow_agent/tasks/main.yml create mode 100644 roles/vhosts/deepflow_agent/templates/deepflow-agent.yaml.j2 create mode 100644 roles/vhosts/docker/defaults/main.yml create mode 100644 roles/vhosts/docker/meta/main.yml create mode 100644 roles/vhosts/docker/tasks/main.yml create mode 100644 roles/vhosts/dovecot/handlers/main.yml create mode 100644 roles/vhosts/dovecot/tasks/main.yml create mode 100644 roles/vhosts/dovecot/templates/10-auth.conf.j2 create mode 100644 roles/vhosts/dovecot/templates/10-mail.conf.j2 create mode 100644 roles/vhosts/dovecot/templates/10-master.conf.j2 create mode 100644 roles/vhosts/dovecot/templates/10-ssl.conf.j2 create mode 100644 roles/vhosts/dovecot/templates/dovecot.conf.j2 create mode 100644 roles/vhosts/firewall/defaults/main.yml create mode 100644 roles/vhosts/firewall/handlers/main.yml create mode 100644 roles/vhosts/firewall/tasks/main.yml create mode 100755 roles/vhosts/gpu-k8s-reset/files/reset-gpu-k8s.sh create mode 100644 roles/vhosts/gpu-k8s-reset/tasks/main.yml create mode 100644 roles/vhosts/gpu-k8s/defaults/main.yml create mode 100755 roles/vhosts/gpu-k8s/files/get_labring_registry.sh create mode 100755 roles/vhosts/gpu-k8s/files/run_sealos.sh create mode 100644 roles/vhosts/gpu-k8s/tasks/install_cluster.yml create mode 100644 roles/vhosts/gpu-k8s/tasks/install_driver.yml create mode 100644 roles/vhosts/gpu-k8s/tasks/main.yml create mode 100644 roles/vhosts/gpu-k8s/tasks/run_test.yml create mode 100644 roles/vhosts/grafana/defaults/main.yml create mode 100644 roles/vhosts/grafana/tasks/main.yml create mode 100644 roles/vhosts/grafana/templates/dashboards.yaml.j2 create mode 100644 roles/vhosts/grafana/templates/env.conf.j2 create mode 100644 roles/vhosts/grafana/templates/grafana-dash-pull.service.j2 create mode 100644 roles/vhosts/grafana/templates/grafana-dash-pull.timer.j2 create mode 100644 roles/vhosts/grafana/templates/grafana.ini.j2 create mode 100644 roles/vhosts/k3s-addon/files/setup-argocd.sh create mode 100644 roles/vhosts/k3s-addon/files/setup-dns-provider.sh create mode 100644 roles/vhosts/k3s-addon/files/setup-egress.sh create mode 100644 roles/vhosts/k3s-addon/files/setup-flagger.sh create mode 100644 roles/vhosts/k3s-addon/files/setup-fluxcd.sh create mode 100644 roles/vhosts/k3s-addon/files/setup-ingress-apisix.sh create mode 100644 roles/vhosts/k3s-addon/files/setup-ingress.sh create mode 100644 roles/vhosts/k3s-addon/files/setup-keda-operator.sh create mode 100644 roles/vhosts/k3s-addon/files/setup-prometheus-operator.sh create mode 100644 roles/vhosts/k3s-addon/meta/main.yml create mode 100755 roles/vhosts/k3s-addon/tasks/main.yml create mode 100644 roles/vhosts/k3s-addon/templates/ingress-apisix-dashboard.yaml create mode 100644 roles/vhosts/k3s-addon/templates/ingress-apisix-values.yaml create mode 100644 roles/vhosts/k3s-addon/templates/kubernetes-discovery-config.yaml create mode 100644 roles/vhosts/k3s-addon/templates/kubernetes-discovery-serviceaccount.yaml create mode 100644 roles/vhosts/k3s-addon/templates/kubernetes-discovery.yaml create mode 100644 roles/vhosts/k3s-cluster-agent/defaults/main.yml create mode 100644 roles/vhosts/k3s-cluster-agent/tasks/bootstrap.yml create mode 100644 roles/vhosts/k3s-cluster-agent/tasks/destroy.yml create mode 100644 roles/vhosts/k3s-cluster-agent/tasks/main.yml create mode 100644 roles/vhosts/k3s-cluster-agent/tasks/upgrade.yml create mode 100644 roles/vhosts/k3s-cluster-agent/templates/install_k3s_agent.sh.j2 create mode 100644 roles/vhosts/k3s-cluster-agent/vars/main.yml create mode 100644 roles/vhosts/k3s-cluster-server/defaults/main.yml create mode 100644 roles/vhosts/k3s-cluster-server/tasks/add-master.yml create mode 100644 roles/vhosts/k3s-cluster-server/tasks/backup.yml create mode 100644 roles/vhosts/k3s-cluster-server/tasks/bootstrap.yml create mode 100644 roles/vhosts/k3s-cluster-server/tasks/destroy.yml create mode 100644 roles/vhosts/k3s-cluster-server/tasks/main.yml create mode 100644 roles/vhosts/k3s-cluster-server/tasks/recovery.yml create mode 100644 roles/vhosts/k3s-cluster-server/tasks/upgrade.yml create mode 100644 roles/vhosts/k3s-cluster-server/templates/install_k3s_server.sh.j2 create mode 100644 roles/vhosts/k3s-cluster-server/vars/main.yml create mode 100644 roles/vhosts/k3s-reset/files/reset-k3s.sh create mode 100755 roles/vhosts/k3s-reset/tasks/main.yml create mode 100644 roles/vhosts/k3s/files/setup-cni-cilium.sh create mode 100644 roles/vhosts/k3s/files/setup-cni-kubeovn.sh create mode 100644 roles/vhosts/k3s/files/setup-k3s.sh create mode 100644 roles/vhosts/k3s/meta/main.yml create mode 100755 roles/vhosts/k3s/tasks/main.yml create mode 100644 roles/vhosts/k3s/templates/cni_install.sh create mode 100644 roles/vhosts/k8s-node/defaults/main.yml create mode 100644 roles/vhosts/k8s-node/tasks/apt_setup.yml create mode 100644 roles/vhosts/k8s-node/tasks/containerd.yml create mode 100644 roles/vhosts/k8s-node/tasks/gpu.yml create mode 100644 roles/vhosts/k8s-node/tasks/main.yml create mode 100644 roles/vhosts/k8s-node/tasks/network.yml create mode 100644 roles/vhosts/k8s-node/tasks/packages.yml create mode 100644 roles/vhosts/k8s-node/tasks/reboot.yml create mode 100644 roles/vhosts/k8s-node/tasks/system_config.yml create mode 100644 roles/vhosts/k8s-node/tasks/user_setup.yml create mode 100644 roles/vhosts/network_info/files/display_network_info.sh create mode 100755 roles/vhosts/network_info/tasks/main.yml create mode 100644 roles/vhosts/nginx-proxy/defaults/main.yml create mode 100644 roles/vhosts/nginx-proxy/handlers/main.yml create mode 100644 roles/vhosts/nginx-proxy/tasks/main.yml create mode 100644 roles/vhosts/nginx-proxy/templates/nginx-proxy.conf.j2 create mode 100644 roles/vhosts/nginx/defaults/main.yml create mode 100644 roles/vhosts/nginx/handlers/main.yml create mode 100644 roles/vhosts/nginx/tasks/main.yml create mode 100644 roles/vhosts/nginx/templates/artifact.conf.j2 create mode 100644 roles/vhosts/nginx/templates/cn-homepage-https.conf.j2 create mode 100644 roles/vhosts/nginx/templates/cn-homepage-redirect.conf.j2 create mode 100644 roles/vhosts/nginx/templates/grafana.conf.j2 create mode 100644 roles/vhosts/nginx/templates/metrics.conf.j2 create mode 100644 roles/vhosts/node_exporter/meta/main.yml create mode 100644 roles/vhosts/node_exporter/tasks/main.yml create mode 100644 roles/vhosts/node_exporter/templates/node-exporter.service create mode 100644 roles/vhosts/nodejs/defaults/main.yml create mode 100644 roles/vhosts/nodejs/handlers/main.yml create mode 100644 roles/vhosts/nodejs/tasks/darwin.yml create mode 100644 roles/vhosts/nodejs/tasks/main.yml create mode 100644 roles/vhosts/nodejs/tasks/ubuntu.yml create mode 100644 roles/vhosts/nodejs/templates/npm_global.sh.j2 create mode 100644 roles/vhosts/openobserve/defaults/main.yml create mode 100644 roles/vhosts/openobserve/tasks/main.yml create mode 100644 roles/vhosts/openobserve/templates/openobserve.service.j2 create mode 100644 roles/vhosts/otel-collector/meta/main.yml create mode 100644 roles/vhosts/otel-collector/tasks/main.yml create mode 100644 roles/vhosts/otel-collector/templates/otel-collector.service create mode 100644 roles/vhosts/otel-collector/templates/otel-config.yaml create mode 100644 roles/vhosts/postgres/handlers/main.yml create mode 100644 roles/vhosts/postgres/meta/main.yml create mode 100644 roles/vhosts/postgres/tasks/main.yml create mode 100644 roles/vhosts/process_exporter/meta/main.yml create mode 100644 roles/vhosts/process_exporter/tasks/main.yml create mode 100644 roles/vhosts/process_exporter/templates/process-exporter.service create mode 100644 roles/vhosts/process_exporter/templates/process-exporter.yml create mode 100644 roles/vhosts/prometheus-transfer/meta/main.yml create mode 100755 roles/vhosts/prometheus-transfer/tasks/main.yml create mode 100644 roles/vhosts/prometheus-transfer/templates/prometheus-transfer.service create mode 100644 roles/vhosts/prometheus-transfer/templates/prometheus-transfer.yml create mode 100755 roles/vhosts/prometheus-transfer/templates/start-prometheus-transfer-service.sh create mode 100755 roles/vhosts/prometheus-transfer/templates/stop-prometheus-transfer-service.sh create mode 100644 roles/vhosts/prometheus/defaults/main.yml create mode 100644 roles/vhosts/prometheus/tasks/main.yml create mode 100644 roles/vhosts/prometheus/templates/nodes.json.j2 create mode 100644 roles/vhosts/prometheus/templates/prometheus.service.j2 create mode 100644 roles/vhosts/prometheus/templates/prometheus.yml.j2 create mode 100644 roles/vhosts/promtail-agent/meta/main.yml create mode 100755 roles/vhosts/promtail-agent/tasks/main.yml create mode 100644 roles/vhosts/promtail-agent/templates/promtail-agent.service create mode 100644 roles/vhosts/promtail-agent/templates/promtail.yaml create mode 100644 roles/vhosts/sealos-k8s/defaults/main.yml create mode 100644 roles/vhosts/sealos-k8s/tasks/main.yml create mode 100644 roles/vhosts/sealos-k8s/templates/hosts.toml.j2 create mode 100644 roles/vhosts/sealos-k8s/templates/resolved.conf.j2 create mode 100755 roles/vhosts/secret-manger/tasks/main.yml create mode 100644 roles/vhosts/ssh-trust/defaults/main.yml create mode 100644 roles/vhosts/ssh-trust/tasks/main.yml create mode 100644 roles/vhosts/telegraf/handlers/main.yml create mode 100644 roles/vhosts/telegraf/meta/main.yml create mode 100755 roles/vhosts/telegraf/tasks/main.yml create mode 100644 roles/vhosts/telegraf/templates/telegraf.conf create mode 100644 roles/vhosts/vault/files/setup.sh create mode 100644 roles/vhosts/vault/meta/main.yml create mode 100644 roles/vhosts/vault/readme.md create mode 100755 roles/vhosts/vault/tasks/main.yml create mode 100644 roles/vhosts/vault/vars/main.yml create mode 100644 roles/vhosts/wireguard-client/files/enable_ip_forward.sh create mode 100755 roles/vhosts/wireguard-client/tasks/main.yml create mode 100755 roles/vhosts/wireguard-client/templates/server.conf create mode 100644 roles/vhosts/wireguard-gateway/files/enable_ip_forward.sh create mode 100644 roles/vhosts/wireguard-gateway/handlers/main.yml create mode 100755 roles/vhosts/wireguard-gateway/meta/main.yml create mode 100755 roles/vhosts/wireguard-gateway/tasks/main.yml create mode 100644 roles/vhosts/wireguard-gateway/templates/wg0.conf.j2 create mode 100644 roles/vhosts/xcontrol_server/defaults/main.yml create mode 100644 roles/vhosts/xcontrol_server/tasks/main.yml create mode 100644 roles/vhosts/xcontrol_server/templates/server-qwen-ai.yaml.j2 create mode 100644 roles/vhosts/xcontrol_server/templates/xcontrol-server.service.j2 create mode 100644 roles/vhosts/zot/defaults/main.yml create mode 100644 roles/vhosts/zot/tasks/main.yml create mode 100644 roles/vhosts/zot/templates/config.json.j2 create mode 100644 roles/vhosts/zot/templates/zot.service.j2 create mode 100644 scripts/Fetch_packages_depends.sh create mode 100644 scripts/Jenkinsfile create mode 100644 scripts/ansible_playbook_hosts_setup.sh create mode 100644 scripts/argo_application-demo.yaml create mode 100644 scripts/argocd_all_in_one.sh create mode 100644 scripts/artifact/setup-harbor.sh create mode 100644 scripts/backup_docker_registry_secret.sh create mode 100644 scripts/check_docker_registry_secret.sh create mode 100644 scripts/deepflow-agent-batch-tools-v1.0.sh create mode 100755 scripts/deepflow/backup_images_v6.3-20250309-17.json create mode 100755 scripts/deepflow/check_k8s_node_config.sh create mode 100755 scripts/deepflow/clean-failed-pods.sh create mode 100755 scripts/deepflow/deepflow-server-master-controller-pre.sh create mode 100755 scripts/deepflow/deepflow-server-slave-controller-pre.sh create mode 100755 scripts/deepflow/deepflow_k8s_backup.sh create mode 100755 scripts/deepflow/deploy-k8s.sh create mode 100644 scripts/deepflow/df-web-ai-push-all.sh create mode 100644 scripts/deepflow/images.txt create mode 100644 scripts/deepflow/pull-all-v6.4.sh create mode 100644 scripts/deepflow/pull_save_scp_image_multi_arch.sh create mode 100644 scripts/deepflow/setup-agent-all-in-one.sh create mode 100644 scripts/deepflow/setup-deepflow-agent.sh create mode 100644 scripts/deploy-open-webui.sh create mode 100644 scripts/deploy_deepflow_agent.sh create mode 100644 scripts/dynamic_inventory.py create mode 100644 scripts/fluxcd_all_in_one.sh create mode 100644 scripts/gather_network_info.yml create mode 100644 scripts/generate_ssl.sh create mode 100644 scripts/get-standalone-cert.sh create mode 100644 scripts/global-monitor/agent-group-config.yaml create mode 100644 scripts/global-monitor/config/containerd.toml create mode 100644 scripts/global-monitor/config/deepflow-registry.yaml create mode 100644 scripts/global-monitor/config/nginx.conf create mode 100644 scripts/global-monitor/config/registry.yaml create mode 100644 scripts/global-monitor/custom-domain.yaml create mode 100644 scripts/global-monitor/deepflow-registry/all.tag.list create mode 100644 scripts/global-monitor/deepflow-registry/compose.yaml create mode 100644 scripts/global-monitor/deepflow-registry/push_images.sh create mode 100644 scripts/global-monitor/deepflow-registry/setup-nerdctl.sh create mode 100644 scripts/global-monitor/deepflow-registry/setup-registry.sh create mode 100644 scripts/global-monitor/deepflow-registry/show_images.sh create mode 100644 scripts/global-monitor/deepflow-sever-values-v6.3.yaml create mode 100644 scripts/global-monitor/setup-agent-group-config.sh create mode 100644 scripts/global-monitor/setup-coroot.sh create mode 100644 scripts/global-monitor/setup-deepflow-Host-Domain-Group.sh create mode 100644 scripts/global-monitor/setup-deepflow-Host-Domain.sh create mode 100644 scripts/global-monitor/setup-deepflow-add-domain.sh create mode 100644 scripts/global-monitor/setup-deepflow-server-ee-all-in-one.sh create mode 100644 scripts/global-monitor/setup-deepflow.sh create mode 100644 scripts/global-monitor/setup-ingress.sh create mode 100644 scripts/global-monitor/setup-kubesphere-core.sh create mode 100644 scripts/global-monitor/setup-signoz.sh create mode 100644 scripts/gpu-k8s.sh create mode 100644 scripts/ingress-installer.sh create mode 100644 scripts/ingress/deepflow-front-end-ingress.yaml create mode 100644 scripts/init-remote-xray.sh create mode 100644 scripts/init-update-server.sh create mode 100644 scripts/init_ansible_role.sh create mode 100644 scripts/init_linux_user.sh create mode 100644 scripts/install-single-gpu-k8s.sh create mode 100644 scripts/iptables_whitelist_enforce_final_fixed.sh create mode 100644 scripts/k3s-cluster/.gitignore create mode 100644 scripts/k3s-cluster/check-cilium-egress.sh create mode 100644 scripts/k3s-cluster/check_cilium_requirements.sh create mode 100644 scripts/k3s-cluster/cilium-cli.sh create mode 100644 scripts/k3s-cluster/cilium-fixed.sh create mode 100755 scripts/k3s-cluster/deploy_velero_with_chart_values_yaml.sh create mode 100644 scripts/k3s-cluster/egress-nat-test.yaml create mode 100644 scripts/k3s-cluster/init_k3s_cluster_agent_role.sh create mode 100644 scripts/k3s-cluster/init_k3s_cluster_server_role.sh create mode 100644 scripts/k3s-cluster/k3s.service create mode 100644 scripts/k3s-cluster/k3s.service-without-cni create mode 100755 scripts/k3s-cluster/k8s_backup_config.yaml create mode 100755 scripts/k3s-cluster/k8s_backup_tool.sh create mode 100644 scripts/k3s-cluster/k8s_backup_tool_howto.md create mode 100644 scripts/k3s-cluster/k8s_restore_all.sh create mode 100644 scripts/k3s-cluster/set-node-label.sh create mode 100644 scripts/k3s-cluster/setup-cilium-cni.sh create mode 100644 scripts/k3s-cluster/setup-cilium-helm.sh create mode 100644 scripts/k3s-cluster/setup-egress-gateway.sh create mode 100644 scripts/k3s-cluster/setup-k3s-agent.sh create mode 100644 scripts/k3s-cluster/setup-k3s-cluster-agent.sh create mode 100644 scripts/k3s-cluster/setup-k3s-cluster-with-br0.sh create mode 100644 scripts/k3s-cluster/setup-k3s-cluster.md create mode 100644 scripts/k3s-cluster/setup-k3s-cluster.sh create mode 100644 scripts/k3s-cluster/setup-k3s-with-gitops.sh create mode 100644 scripts/k3s-cluster/setup-k3s-with-ingress.sh create mode 100644 scripts/k3s-cluster/setup-nginx-ingress.sh create mode 100644 scripts/k3s_all_in_one.sh create mode 100644 scripts/kong-gateway/GatewayAPI-deepflow-example.yaml create mode 100644 scripts/kong-gateway/GatewayAPI-example.yaml create mode 100644 scripts/kong-gateway/GatewayAPI-http-example.yaml create mode 100644 scripts/kong-gateway/deploy-kong-gateway.sh create mode 100644 scripts/list_cluster_namespace_map.sh create mode 100644 scripts/make_k3s_offline_package.sh create mode 100644 scripts/merge_csv.py create mode 100644 scripts/merge_vars.py create mode 100644 scripts/netcheck.sh create mode 100644 scripts/network-config/ubuntu/init-wsl.sh create mode 100644 scripts/network-config/ubuntu/readme.md create mode 100644 scripts/network-config/windows/readme.md create mode 100644 scripts/network-config/windows/start-wsl-bridge.ps1 create mode 100644 scripts/pipeline-library/vars/ansibleSteps.groovy create mode 100644 scripts/pulp-installer.sh create mode 100644 scripts/pulp-operator-repo-gateway.yaml create mode 100644 scripts/pulp-operator-repo.yaml create mode 100644 scripts/pulp-operator.sh create mode 100644 scripts/registry/.gitignore create mode 100644 scripts/registry/all.tag.list create mode 100644 scripts/registry/clean_unlabeled_images.sh create mode 100644 scripts/registry/push_images.sh create mode 100644 scripts/registry/setup-nerdctl.sh create mode 100644 scripts/registry/setup-registry.sh create mode 100644 scripts/registry/show_images.sh create mode 100644 scripts/renew_docker_registry_secret_with_kubectl.sh create mode 100644 scripts/rewrite-cover-history.sh create mode 100644 scripts/rollout_docker_registry_secret.sh create mode 100644 scripts/secret/README.md create mode 100644 scripts/secret/hcp/__init__.py create mode 100644 scripts/secret/hcp/secret.py create mode 100644 scripts/secret/setup.py create mode 100644 scripts/secret/tests/__init__.py create mode 100644 scripts/secret/tests/test_secret.py create mode 100644 scripts/setup-gitea.sh create mode 100644 scripts/setup-grafana-agent.sh create mode 100644 scripts/setup-microservice-demo.sh create mode 100644 scripts/setup-open-telemetry-demo.sh create mode 100644 scripts/setup-vector.sh create mode 100644 scripts/sing-box/README.md create mode 100644 scripts/sing-box/client-gvisor-tun-reality.json create mode 100644 scripts/sing-box/client-gvisor-tun-vless.json create mode 100644 scripts/sing-box/create-reality-keypair.sh create mode 100644 scripts/sing-box/install-singbox.sh create mode 100644 scripts/sing-box/server-VLESS-TCP-XTLS.json create mode 100644 scripts/sing-box/server-reality.json create mode 100644 scripts/update-server/Chart.yaml create mode 100644 scripts/update-server/templates/configmap.yaml create mode 100644 scripts/update-server/templates/deployment.yaml create mode 100644 scripts/update-server/templates/route.yaml create mode 100644 scripts/update-server/templates/service.yaml create mode 100644 scripts/update-server/values.yaml create mode 100644 scripts/verify_rew_docker_registry_secret.sh create mode 100644 scripts/website-homepage/Chart.yaml create mode 100644 scripts/website-homepage/templates/configmap.yaml create mode 100644 scripts/website-homepage/templates/deployment.yaml create mode 100644 scripts/website-homepage/templates/route.yaml create mode 100644 scripts/website-homepage/templates/service.yaml create mode 100644 scripts/website-homepage/values.yaml create mode 100644 setup-docker.yml create mode 100644 vars/dns_records_svc_plus.yaml create mode 100755 wireguard_ali_vpn_gw create mode 100755 wireguard_client create mode 100755 wireguard_gateway diff --git a/alicloud_dns_record.yml b/alicloud_dns_record.yml new file mode 100644 index 0000000..6c84aff --- /dev/null +++ b/alicloud_dns_record.yml @@ -0,0 +1,12 @@ +- name: setup OpenResty server + hosts: global-homepage.svc.plus + become: true + vars: + alicloud_dns_domain: "svc.plus" + alicloud_dns_rr: "www" + alicloud_dns_type: "A" + alicloud_dns_value: "1.2.3.4" + alicloud_access_key: "{{ aliyun_ak }}" + alicloud_secret_key: "{{ aliyun_sk }}" + roles: + - role: vhosts/alicloud_dns_record diff --git a/alicloud_dns_sync.yml b/alicloud_dns_sync.yml new file mode 100644 index 0000000..ab407ba --- /dev/null +++ b/alicloud_dns_sync.yml @@ -0,0 +1,16 @@ +--- +- hosts: localhost + gather_facts: no + + # 动态加载 DNS 配置文件 + vars_files: + - vars/dns_records_svc_plus.yaml # ← 可以切换成不同环境 + + # 如果你想在命令行覆盖 AK/SK,则可以使用 --extra-vars + vars: + alicloud_access_key: "{{ aliyun_ak | default('') }}" + alicloud_secret_key: "{{ aliyun_sk | default('') }}" + + roles: + - role: vhosts/alicloud_dns_sync + diff --git a/apply-branch-protection.yml b/apply-branch-protection.yml new file mode 100644 index 0000000..6a6b690 --- /dev/null +++ b/apply-branch-protection.yml @@ -0,0 +1,7 @@ +--- +- name: Apply branch protection rules + hosts: localhost + connection: local + gather_facts: false + roles: + - github diff --git a/common b/common new file mode 100644 index 0000000..11a2796 --- /dev/null +++ b/common @@ -0,0 +1,8 @@ +--- +- name: Init Linux OS Common setting + hosts: all + user: ubuntu + become: yes + gather_facts: yes + roles: + - vhosts/common diff --git a/deepflow/deepflow-agent-playbook/Readme.md b/deepflow/deepflow-agent-playbook/Readme.md new file mode 100644 index 0000000..7ab68e6 --- /dev/null +++ b/deepflow/deepflow-agent-playbook/Readme.md @@ -0,0 +1,2 @@ +ansible-playbook -i inventory/js2_hosts.ini playbook.yml -e "ansible_ssh_user=ubuntu area=js2" -D -C +ansible-playbook -i inventory/js2_hosts.ini playbook.yml -e "ansible_ssh_user=ubuntu area=js2" -D diff --git a/deepflow/deepflow-agent-playbook/inventory/js2_hosts.ini b/deepflow/deepflow-agent-playbook/inventory/js2_hosts.ini new file mode 100644 index 0000000..5e45dd9 --- /dev/null +++ b/deepflow/deepflow-agent-playbook/inventory/js2_hosts.ini @@ -0,0 +1,8 @@ +[js2] +10.200.11.[1:24] + +[all:vars] +ansible_port=22 +ansible_ssh_user=ubuntu +ansible_host_key_checking=False +ansible_ssh_private_key_file=~/.ssh/id_rsa diff --git a/deepflow/deepflow-agent-playbook/playbook.yml b/deepflow/deepflow-agent-playbook/playbook.yml new file mode 100644 index 0000000..f153d7f --- /dev/null +++ b/deepflow/deepflow-agent-playbook/playbook.yml @@ -0,0 +1,10 @@ +- name: DeepFlow Agent Upgrade for 区域节点 + hosts: all + become: true + gather_facts: false + vars: + area: js2 + upgrade_zip_path: ./DeepFlow-Agent-Upgrade-20250523.zip + roles: + - deepflow_upgrade + diff --git a/deepflow/deepflow-agent-playbook/roles/deepflow_upgrade/tasks/main.yml b/deepflow/deepflow-agent-playbook/roles/deepflow_upgrade/tasks/main.yml new file mode 100644 index 0000000..0301238 --- /dev/null +++ b/deepflow/deepflow-agent-playbook/roles/deepflow_upgrade/tasks/main.yml @@ -0,0 +1,16 @@ +- name: Sync upgrade package to remote using rsync + synchronize: + src: "{{ upgrade_zip_path }}" + dest: /tmp/ + mode: push + +- name: Unzip upgrade package + unarchive: + src: "/tmp/{{ upgrade_zip_path | basename }}" + dest: /tmp/ + remote_src: yes + +- name: Execute upgrade script + command: bash update_agent.sh --area {{ area }} + args: + chdir: /tmp/DeepFlow-Agent-Upgrade diff --git a/deploy-docker-harbor.yml b/deploy-docker-harbor.yml new file mode 100644 index 0000000..b97054a --- /dev/null +++ b/deploy-docker-harbor.yml @@ -0,0 +1,5 @@ +--- +- hosts: all + become: yes + roles: + - docker/harbor diff --git a/deploy-docker-keycloak.yml b/deploy-docker-keycloak.yml new file mode 100644 index 0000000..9bd9c42 --- /dev/null +++ b/deploy-docker-keycloak.yml @@ -0,0 +1,5 @@ +--- +- hosts: all + become: yes + roles: + - docker/keycloak diff --git a/deploy_OpenObserve_docker.yaml b/deploy_OpenObserve_docker.yaml new file mode 100644 index 0000000..5bef319 --- /dev/null +++ b/deploy_OpenObserve_docker.yaml @@ -0,0 +1,5 @@ +- name: setup OpenObserve + hosts: all + become: true + roles: + - docker/OpenObserve/ diff --git a/deploy_Tempo_docker.yaml b/deploy_Tempo_docker.yaml new file mode 100644 index 0000000..a7badf6 --- /dev/null +++ b/deploy_Tempo_docker.yaml @@ -0,0 +1,5 @@ +- name: setup Tempo + hosts: all + become: true + roles: + - docker/Tempo/ diff --git a/deploy_VictoriaLogs_docker.yaml b/deploy_VictoriaLogs_docker.yaml new file mode 100644 index 0000000..58eda8f --- /dev/null +++ b/deploy_VictoriaLogs_docker.yaml @@ -0,0 +1,5 @@ +- name: setup VictoriaLogs + hosts: all + become: true + roles: + - docker/VictoriaLogs/ diff --git a/deploy_VictoriaMetrics_docker.yaml b/deploy_VictoriaMetrics_docker.yaml new file mode 100644 index 0000000..6d5efb5 --- /dev/null +++ b/deploy_VictoriaMetrics_docker.yaml @@ -0,0 +1,5 @@ +- name: setup VictoriaMetrics + hosts: all + become: true + roles: + - docker/VictoriaMetrics/ diff --git a/deploy_blackbox_exporters_vhosts.yml b/deploy_blackbox_exporters_vhosts.yml new file mode 100644 index 0000000..9d662a2 --- /dev/null +++ b/deploy_blackbox_exporters_vhosts.yml @@ -0,0 +1,50 @@ +- name: Deploy blackbox exporter + hosts: global-homepage.svc.plus + become: true + vars: + hosts: + - name: "www.svc.plus" + path: + - "/docs/" + - "/download/" + - "/login/" + - "/logout/" + - '/register/' + - name: "cn-homepage.svc.plus" + path: + - "/docs/" + - "/download/" + - "/login/" + - "/logout/" + - '/register/' + - name: "dl.svc.plus" + path: + - "/" + roles: + - roles/vhosts/common/ + - roles/vhosts/blackbox_exporter/ +- name: Deploy blackbox exporter + hosts: cn-homepage.svc.plus + become: true + vars: + hosts: + - name: "www.svc.plus" + path: + - "/docs/" + - "/download/" + - "/login/" + - "/logout/" + - '/register/' + - name: "cn-homepage.svc.plus" + path: + - "/docs/" + - "/download/" + - "/login/" + - "/logout/" + - '/register/' + - name: "dl.svc.plus" + path: + - "/" + roles: + - roles/vhosts/common/ + - roles/vhosts/blackbox_exporter/ diff --git a/deploy_deepflow_agent b/deploy_deepflow_agent new file mode 100644 index 0000000..f8ba686 --- /dev/null +++ b/deploy_deepflow_agent @@ -0,0 +1,7 @@ +--- +- name: Deploy or Upgrade DeepFlow Agent + hosts: all + become: true + + roles: + - role: roles/vhosts/deepflow_agent diff --git a/deploy_exporters_vhosts.yml b/deploy_exporters_vhosts.yml new file mode 100644 index 0000000..75ba6b8 --- /dev/null +++ b/deploy_exporters_vhosts.yml @@ -0,0 +1,15 @@ +- name: setup otel exporters + hosts: cn-homepage.svc.plus + become: true + roles: + - roles/vhosts/common/ + - roles/vhosts/node_exporter/ + - roles/vhosts/process_exporter/ + +- name: setup otel exporters + hosts: global-homepage.svc.plus + become: true + roles: + - roles/vhosts/common/ + - roles/vhosts/node_exporter/ + - roles/vhosts/process_exporter/ diff --git a/deploy_grafana_docker.yaml b/deploy_grafana_docker.yaml new file mode 100644 index 0000000..b838032 --- /dev/null +++ b/deploy_grafana_docker.yaml @@ -0,0 +1,11 @@ +- name: setup grafana (docker) + hosts: all + become: true + vars: + grafana_domain: "{{ domain }}" + grafana_workspace: /opt/grafana + grafana_admin_user: admin + grafana_admin_password: admin + roles: + - vhosts/docker/ + - docker/grafana/ diff --git a/deploy_monitor_server.yml b/deploy_monitor_server.yml new file mode 100644 index 0000000..a00cf2d --- /dev/null +++ b/deploy_monitor_server.yml @@ -0,0 +1,72 @@ +- name: setup otel exporters + hosts: otel.svc.plus + become: true + vars: + group: web + otlp_endpoint: &otel_endpoint https://otel.svc.plus/api/default/ + otlp_auth: &otel_auth "Basic cm9vdEBleGFtcGxlLmNvbTpRN01wRjZBTzZFelRjRjdJ" + otel_prometheus_node_static_configs: &otel_node_static_configs + - targets: ['172.31.2.33:9100'] + labels: + vendor: aws + account: prod + group: core + name: tky-proxy.svc.plus + iid: '172.31.2.33' + - targets: ['167.179.72.223:9100'] + labels: + vendor: aws + account: prod + group: web-system + name: global-homepage.svc.plus + iid: '167.179.72.223' + - targets: ['47.120.61.35:9100'] + labels: + vendor: alicloud + account: prod + group: web-system + name: cn-homepage.svc.plus + iid: '47.120.61.35' + otel_prometheus_process_static_configs: &otel_process_static_configs + - targets: ['172.31.2.33:9256'] + labels: + vendor: aws + account: prod + group: core + name: tky-proxy.svc.plus + iid: '172.31.2.33' + - targets: ['167.179.72.223:9256'] + labels: + vendor: aws + account: prod + group: web-system + name: global-homepage.svc.plus + iid: '167.179.72.223' + - targets: ['47.120.61.35:9256'] + labels: + vendor: alicloud + account: prod + group: web-system + name: cn-homepage.svc.plus + iid: '47.120.61.35' + exporters: + endpoint: *otel_endpoint + roles: + - roles/vhosts/otel-collector/ + +- name: setup otel exporters + hosts: otel.svc.plus + become: true + vars: + group: web + otlp_endpoint: *otel_endpoint + otlp_auth: *otel_auth + otel_prometheus_node_static_configs: *otel_node_static_configs + otel_prometheus_process_static_configs: *otel_process_static_configs + exporters: + endpoint: *otel_endpoint + roles: + - roles/vhosts/node_exporter/ + - roles/vhosts/process_exporter/ + - roles/vhosts/grafana/ + - roles/vhosts/openobserve/ diff --git a/deploy_neurapress_docker.yaml b/deploy_neurapress_docker.yaml new file mode 100644 index 0000000..b8b89dc --- /dev/null +++ b/deploy_neurapress_docker.yaml @@ -0,0 +1,11 @@ +- name: setup neurapress + hosts: all + become: true + vars: + neurapress_domain: "{{ domain }}" + neurapress_workspace: /opt/neurapress + neurapress_image: neurapress:prod + neurapress_certbot_email: manbuzhe2009@qq.com + roles: + - vhosts/docker/ + - docker/neurapress/ diff --git a/deploy_nginx_vhosts.yml b/deploy_nginx_vhosts.yml new file mode 100644 index 0000000..f742d56 --- /dev/null +++ b/deploy_nginx_vhosts.yml @@ -0,0 +1,23 @@ +- name: Setup Nginx server + hosts: localhost + become: true + vars: + vhosts: + - name: cn-homepage.svc.plus + domain: + - www.svc.plus + - cn-homepage.svc.plus + ssl_certificate: /etc/ssl/svc.plus.pem + ssl_certificate_key: /etc/ssl/svc.plus.rsa.key + root: /data/update-server/dashboard + type: homepage-static + - name: cn-artifact.svc.plus + domain: + - artifact.svc.plus + - cn-artifact.svc.plus + ssl_certificate: /etc/ssl/svc.plus.pem + ssl_certificate_key: /etc/ssl/svc.plus.rsa.key + root: /data/update-server + type: artifact + roles: + - roles/vhosts/nginx/ diff --git a/deploy_nodejs_vhosts.yml b/deploy_nodejs_vhosts.yml new file mode 100644 index 0000000..4ffd662 --- /dev/null +++ b/deploy_nodejs_vhosts.yml @@ -0,0 +1,37 @@ +--- +- name: Configure Node.js runtime for vhosts + hosts: all + gather_facts: true + become: yes + vars: + # Choose Node.js version + # Examples: "20.x" (LTS), "18.x", "22.x", or specific version like "20.11.0" + nodejs_version: "20.x" + + # Install Yarn package manager (default: true) + # install_yarn: false + + # Add npm global bin to PATH (default: true) + # add_npm_to_path: true + + # Custom npm prefix + # npm_config_prefix: "/usr/local/lib/npm" + + # Additional packages to install globally (optional) + # global_npm_packages: + # - pm2 + # - typescript + # - eslint + # - @angular/cli + + roles: + - role: vhosts/nodejs + + post_tasks: + - name: Install additional global npm packages + npm: + name: "{{ item }}" + state: latest + global: yes + loop: "{{ global_npm_packages | default([]) }}" + when: global_npm_packages is defined and global_npm_packages | length > 0 diff --git a/deploy_openresty_vhosts.yml b/deploy_openresty_vhosts.yml new file mode 100644 index 0000000..51a6d43 --- /dev/null +++ b/deploy_openresty_vhosts.yml @@ -0,0 +1,48 @@ +- name: setup OpenResty server + hosts: cn-homepage.svc.plus + become: true + vars: + vhosts: + - name: cn-homepage.svc.plus + domain: + - www.svc.plus + - cn-homepage.svc.plus + ssl_certificate: /etc/ssl/svc.plus.pem + ssl_certificate_key: /etc/ssl/svc.plus.rsa.key + root: /data/update-server/dashboard + type: homepage-static + - name: cn-artifact.svc.plus + domain: + - artifact.svc.plus + - cn-artifact.svc.plus + ssl_certificate: /etc/ssl/svc.plus.pem + ssl_certificate_key: /etc/ssl/svc.plus.rsa.key + root: /data/update-server + type: artifact + roles: + - roles/vhosts/OpenResty/ +- name: setup OpenResty server + hosts: global-homepage.svc.plus + become: true + vars: + vhosts: + - name: global-homepage.svc.plus + domain: + - www.svc.plus + - global-homepage.svc.plus + ssl_certificate: /etc/ssl/svc.plus.pem + ssl_certificate_key: /etc/ssl/svc.plus.rsa.key + root: /data/update-server/dashboard + type: homepage-static + - name: global-artifact.svc.plus + domain: + - artifact.svc.plus + - global-artifact.svc.plus + ssl_certificate: /etc/ssl/svc.plus.pem + ssl_certificate_key: /etc/ssl/svc.plus.rsa.key + root: /data/update-server + autoindex_paths: + - "/" + type: artifact + roles: + - roles/vhosts/OpenResty/ diff --git a/deploy_otel_docker.yaml b/deploy_otel_docker.yaml new file mode 100644 index 0000000..1e14904 --- /dev/null +++ b/deploy_otel_docker.yaml @@ -0,0 +1,5 @@ +- name: setup otel + hosts: all + become: true + roles: + - docker/otel/ diff --git a/deploy_postgre_vhosts.yml b/deploy_postgre_vhosts.yml new file mode 100644 index 0000000..52cc342 --- /dev/null +++ b/deploy_postgre_vhosts.yml @@ -0,0 +1,162 @@ +- name: Setup postgres server + hosts: cn-homepage.svc.plus + become: true + vars: + group: cn-homepage.svc.plus + repo_setup: true + apt_keyrings: &postgresql_common_keyrings + - name: postgresql + content: | + -----BEGIN PGP PUBLIC KEY BLOCK----- + Version: Hockeypuck 2.2 + Comment: Hostname: + + xsFNBE6XR8IBEACVdDKT2HEH1IyHzXkb4nIWAY7echjRxo7MTcj4vbXAyBKOfjja + UrBEJWHN6fjKJXOYWXHLIYg0hOGeW9qcSiaa1/rYIbOzjfGfhE4x0Y+NJHS1db0V + G6GUj3qXaeyqIJGS2z7m0Thy4Lgr/LpZlZ78Nf1fliSzBlMo1sV7PpP/7zUO+aA4 + bKa8Rio3weMXQOZgclzgeSdqtwKnyKTQdXY5MkH1QXyFIk1nTfWwyqpJjHlgtwMi + c2cxjqG5nnV9rIYlTTjYG6RBglq0SmzF/raBnF4Lwjxq4qRqvRllBXdFu5+2pMfC + IZ10HPRdqDCTN60DUix+BTzBUT30NzaLhZbOMT5RvQtvTVgWpeIn20i2NrPWNCUh + hj490dKDLpK/v+A5/i8zPvN4c6MkDHi1FZfaoz3863dylUBR3Ip26oM0hHXf4/2U + A/oA4pCl2W0hc4aNtozjKHkVjRx5Q8/hVYu+39csFWxo6YSB/KgIEw+0W8DiTII3 + RQj/OlD68ZDmGLyQPiJvaEtY9fDrcSpI0Esm0i4sjkNbuuh0Cvwwwqo5EF1zfkVj + Tqz2REYQGMJGc5LUbIpk5sMHo1HWV038TWxlDRwtOdzw08zQA6BeWe9FOokRPeR2 + AqhyaJJwOZJodKZ76S+LDwFkTLzEKnYPCzkoRwLrEdNt1M7wQBThnC5z6wARAQAB + zRxQb3N0Z3JlU1FMIERlYmlhbiBSZXBvc2l0b3J5wsGOBBMBCAA4AhsDBQsJCAcD + BRUKCQgLBRYCAwEAAh4BAheAFiEEuXsK/KoaR/BE8kSgf8x9RqzMTPgFAlhtCD8A + CgkQf8x9RqzMTPgECxAAk8uL+dwveTv6eH21tIHcltt8U3Ofajdo+D/ayO53LiYO + xi27kdHD0zvFMUWXLGxQtWyeqqDRvDagfWglHucIcaLxoxNwL8+e+9hVFIEskQAY + kVToBCKMXTQDLarz8/J030Pmcv3ihbwB+jhnykMuyyNmht4kq0CNgnlcMCdVz0d3 + z/09puryIHJrD+A8y3TD4RM74snQuwc9u5bsckvRtRJKbP3GX5JaFZAqUyZNRJRJ + Tn2OQRBhCpxhlZ2afkAPFIq2aVnEt/Ie6tmeRCzsW3lOxEH2K7MQSfSu/kRz7ELf + Cz3NJHj7rMzC+76Rhsas60t9CjmvMuGONEpctijDWONLCuch3Pdj6XpC+MVxpgBy + 2VUdkunb48YhXNW0jgFGM/BFRj+dMQOUbY8PjJjsmVV0joDruWATQG/M4C7O8iU0 + B7o6yVv4m8LDEN9CiR6r7H17m4xZseT3f+0QpMe7iQjz6XxTUFRQxXqzmNnloA1T + 7VjwPqIIzkj/u0V8nICG/ktLzp1OsCFatWXh7LbU+hwYl6gsFH/mFDqVxJ3+DKQi + vyf1NatzEwl62foVjGUSpvh3ymtmtUQ4JUkNDsXiRBWczaiGSuzD9Qi0ONdkAX3b + ewqmN4TfE+XIpCPxxHXwGq9Rv1IFjOdCX0iG436GHyTLC1tTUIKF5xV4Y0+cXIPC + wX0EEwEIACcCGwMFCwkIBwMFFQoJCAsFFgIDAQACHgECF4AFAlLpFRkFCQ6EJy0A + CgkQf8x9RqzMTPhOZA//Zp0e25pcvle7cLc0YuFr9pBv2JIkLzPm83nkcwKmxaWa + yUIG4Sv6pH6hm8+S/CHQij/yFCX+o3ngMw2J9HBUvafZ4bnbI0RGJ70GsAwraQ0V + lkIfg7GUw3TzvoGYO42rZTru9S0K/6nFP6D1HUu+U+AsJONLeb6oypQgInfXQExP + ZyliUnHdipei4WR1YFW6sjSkZT/5C3J1wkAvPl5lvOVthI9Zs6bZlJLZwusKxU0U + M4Btgu1Sf3nnJcHmzisixwS9PMHE+AgPWIGSec/N27a0KmTTvImV6K6nEjXJey0K + 2+EYJuIBsYUNorOGBwDFIhfRk9qGlpgt0KRyguV+AP5qvgry95IrYtrOuE7307Si + dEbSnvO5ezNemE7gT9Z1tM7IMPfmoKph4BfpNoH7aXiQh1Wo+ChdP92hZUtQrY2N + m13cmkxYjQ4ZgMWfYMC+DA/GooSgZM5i6hYqyyfAuUD9kwRN6BqTbuAUAp+hCWYe + N4D88sLYpFh3paDYNKJ+Gf7Yyi6gThcV956RUFDH3ys5Dk0vDL9NiWwdebWfRFbz + oRM3dyGP889aOyLzS3mh6nHzZrNGhW73kslSQek8tjKrB+56hXOnb4HaElTZGDvD + 5wmrrhN94kbyGtz3cydIohvNO9d90+29h0eGEDYti7j7maHkBKUAwlcPvMg5m3bC + wX0EEwEIACcCGwMFCwkIBwMFFQoJCAsFFgIDAQACHgECF4AFAlEqbZUFCQg2wEEA + CgkQf8x9RqzMTPhFMQ//WxAfKMdpSIA9oIC/yPD/dJpY/+DyouOljpE6MucMy/Ar + BECjFTBwi/j9NYM4ynAk34IkhuNexc1i9/05f5RM6+riLCLgAOsADDbHD4miZzoS + xiVr6GQ3YXMbOGld9kV9Sy6mGNjcUov7iFcf5Hy5w3AjPfKuR9zXswyfzIU1YXOb + iiZT38l55pp/BSgvGVQsvbNjsff5CbEKXS7q3xW+WzN0QWF6YsfNVhFjRGj8hKtH + vwKcA02wwjLeLXVTm6915ZUKhZXUFc0vM4Pj4EgNswH8Ojw9AJaKWJIZmLyW+aP+ + wpu6YwVCicxBY59CzBO2pPJDfKFQzUtrErk9irXeuCCLesDyirxJhv8o0JAvmnMA + KOLhNFUrSQ2m+3EnF7zhfz70gHW+EG8X8mL/EN3/dUM09j6TVrjtw43RLxBzwMDe + ariFF9yC+5bLtnGgxjsB9Ik6GV5v34/NEEGf1qBiAzFmDVFRZlrNDkq6gmpvGnA5 + hUWNr+y0i01LjGyaLSWHYjgw2UEQOqcUtTFK9MNzbZze4mVaHMEz9/aMfX25R6qb + iNqCChveIm8mYr5Ds2zdZx+G5bAKdzX7nx2IUAxFQJEE94VLSp3npAaTWv3sHr7d + R8tSyUJ9poDwgw4W9BIcnAM7zvFYbLF5FNggg/26njHCCN70sHt8zGxKQINMc6TC + wX0EEwEIACcCGwMFCwkIBwMFFQoJCAsFFgIDAQACHgECF4AFAlB5KywFCQPDFt8A + CgkQf8x9RqzMTPhuCQ//QAjRSAOCQ02qmUAikT+mTB6baOAakkYq6uHbEO7qPZkv + 4E/M+HPIJ4wdnBNeSQjfvdNcZBA/x0hr5EMcBneKKPDj4hJ0panOIRQmNSTThQw9 + OU351gm3YQctAMPRUu1fTJAL/AuZUQf9ESmhyVtWNlH/56HBfYjE4iVeaRkkNLJy + X3vkWdJSMwC/LO3Lw/0M3R8itDsm74F8w4xOdSQ52nSRFRh7PunFtREl+QzQ3EA/ + WB4AIj3VohIGkWDfPFCzV3cyZQiEnjAe9gG5pHsXHUWQsDFZ12t784JgkGyO5wT2 + 6pzTiuApWM3k/9V+o3HJSgH5hn7wuTi3TelEFwP1fNzI5iUUtZdtxbFOfWMnZAyp + EhaLmXNkg4zDkH44r0ss9fR0DAgUav1a25UnbOn4PgIEQy2fgHKHwRpCy20d6oCS + lmgyWsR40EPPYvtGq49A2aK6ibXmdvvFT+Ts8Z+q2SkFpoYFX20mR2nsF0fbt1lf + H65P64dukxeRGteWIeNakDD40bAAOH8+OaoTGVBJ2ACJfLVNM53PEoftavAwUYMr + R910qvwYfd/46rh46g1Frr9SFMKYE9uvIJIgDsQB3QBp71houU4H55M5GD8XURYs + +bfiQpJG1p7eB8e5jZx1SagNWc4XwL2FzQ9svrkbg1Y+359buUiP7T6QXX2zY+/C + RgQQEQgABgUCTpdI7gAKCRDFr3dKWFELWqaPAKD1TtT5c3sZz92Fj97KYmqbNQZP + +ACfSC6+hfvlj4GxmUjp1aepoVTo3wfCwVwEEAEIAAYFAk6XSQsACgkQTFprqxLS + p64F8Q//cCcutwrH50UoRFejg0EIZav6LUKejC6kpLeubbEtuaIH3r2zMblPGc4i + +eMQKo/PqyQrceRXeNNlqO6/exHozYi2meudxa6IudhwJIOn1MQykJbNMSC2sGUp + 1W5M1N5EYgt4hy+qhlfnD66LR4G+9t5FscTJSy84SdiOuqgCOpQmPkVRm1HX5X1+ + dmnzMOCk5LHHQuiacV0qeGO7JcBCVEIDr+uhU1H2u5GPFNHm5u15n25tOxVivb94 + xg6NDjouECBH7cCVuW79YcExH/0X3/9G45rjdHlKPH1OIUJiiX47OTxdG3dAbB4Q + fnViRJhjehFscFvYWSqXo3pgWqUsEvv9qJac2ZEMSz9x2mj0ekWxuM6/hGWxJdB+ + +985rIelPmc7VRAXOjIxWknrXnPCZAMlPlDLu6+vZ5BhFX0Be3y38f7GNCxFkJzl + hWZ4Cj3WojMj+0DaC1eKTj3rJ7OJlt9S9xnO7OOPEUTGyzgNIDAyCiu8F4huLPaT + ape6RupxOMHZeoCVlqx3ouWctelB2oNXcxxiQ/8y+21aHfD4n/CiIFwDvIQjl7dg + mT3u5Lr6yxuosR3QJx1P6rP5ZrDTP9khT30t+HZCbvs5Pq+v/9m6XDmi+NlU7Zuh + Ehy97tL3uBDgoL4b/5BpFL5U9nruPlQzGq1P9jj40dxAaDAX/WLCwFwEEAECAAYF + AlNObS8ACgkQak9cqaePZ1molQf/WYxinFiP38X2HDuzng+krVpQ/H8GMBvrq9i+ + jpg2Q/Rhdd/BbLKeYlndcCWdXTLuh9L4Ey98tAxpHJX0pN1XRe/vrEeYHtaKo/M0 + 1beecsCp9V8WMmbc1SkXM6UG1jzWLN8xKN5mCJrVpD57RlGddxA/XyTqkCl8JhsP + TUtJavACNwzolLJozHIAB0OdRj8S+EvmBb5kcY/9+opaNq4k/uMHt38g2VoKZZIC + G4zXAWe6N/nlCCMhi5iLgf0IrBW5Eqo0pMqnsseB60WJ3WaHkpj73lzxsRq2kW8Z + 7PKFGy+5bDXX8qEmtKOvhYtYyrwyJavU52pQeLOwY7chDrzhc8LBXAQQAQoABgUC + WK7LHAAKCRB/GCjHdaJGA/o5D/911ePhusgnrS1BFc+IMZEUijmgJhIQ3JY2Rs8o + pz66vTPlnoa+edOyaAWWQUM10NERCzw6VUo+Ss0IeHQfd+YlGsyakMGGVlzojXVq + NASFQqF9A4vuiVNGqoXlIOdo+RStRtvlj0U779CLUclIOpZGHs68dRsI3K2EmSzj + DDgOlq+SbmEEgSN542qtR7vAMBT+GOah9sVVWY+1+0jPOg4HttiT7yn5p1j9yi2v + DKRjHatGV3Q7sLf1oow+z4XHws6ZPsQZqBMaH5xbJuzHVNq4uNIAqSaWvpbmRMjq + dwfSV8LwJoszZIx09a1vnT103AITUhJxRr6kLbwZ4khSmGgol7vTKGdPd06kyln8 + bKLzosHadoM/NQKvzRxao4VZxRvmuLuCIF+Quqbbb830gWDYxdGqvux2iOuiiDKa + lJ/o6ko77qyWsl7hA5L51OG07ZeeHOf38ReUkHcg9cmqdyPY1R0+5upWmcclN11i + qa/QWz5LvFKd4JWbl31rWtSXJJ0QOiSA5ZXjjkbZ08bKDyWl88P3l2bYrh2W+G+h + GiD0Lg9odUCr2m/Url5iiYdtImeTXMxXNQ/9JIzqPaOHgNUMqgbhqDdGqPXOoZPb + 2tXx0AcQQa5mW5ve8dmHdCYCe8GLvW1PCuaD73vjhFSV/s7hoR2QW+p7UmgeYqd3 + 26cEacLAcwQQAQoAHRYhBCoy/gbahgGIctbIPoDB+7VZbdmbBQJbg5y2AAoJEIDB + +7VZbdmbh2IIAK7tjZGvX/axljW2YgcjqN4Dim/ukNa0rBs6m6N1o5msmPYzAxbk + qiwtr05T1v+L4HIE6RO8BvoLEttfij2Gf0V29yL+NOcYOxkqjM3mKNVdO5Oth9mG + fnHEAv/msg/PQ7x24qF2yPxeW0hMcVO24mVN0cQ1s+/D4hMSfE9prPaKhWDcLb/t + 0J4lmekULACK4zwHZKKn9YMD3BGcQceJvqMtguNVnxEJZ9STqv6cxMBLpIvnQIHl + XifNno+VNzqo23NRIpVzImV1zlE2prW9+5o4ljELLerHGVSAAzvrIn8t1uo2gc8I + inHk+X7IEcpkMubJXFj6qwuv2TxcdLHdNFDCwXMEEAEKAB0WIQSTSHXCzDNDnepf + 7whU7TuPprNXZwUCXOUwzQAKCRBU7TuPprNXZ/h+D/4/cxj/GReBRlWQc16vGVCa + 4CAV5yWT2n2ZZvXNYf7Kpx5JD6PDdkLS+r3hlfASn2PeozNPk4Z5g3rqPWioxdML + H3LepPRUoIOnRaKTNko8tPhPuRvOxOEn4SKh6NKQNqc4P6XfCa+26MvNVPaYONQM + 5ClaGRwNvBPfLkGIPOUD12nihb4z02u1sFZtOfX8P5nrhadfxjeNKVXZ4RvaJtFr + K5oFef+2DB+BkZULN+L5AY1MmTA/eDiYHS3m2WxnLZE251g8j0BZh/pO6DCSHxNM + AQMqrZW82o6BCItHJFiQvJ6cyoGmaVgYbMMCWtVmlROTm+6QsxNKR7WEymQ8gaDN + p9bPAFHa2MKGgqIUabj+DY32Wz+wNR9g08tl5X/YJO/MARs3LiY+Qy/iqrhp2r7o + 1FdnFSewy08D92u0w0EDxz2u96vWcDzxr2s1iXbhkhDIw7UGrJwfUqQ0eOtGGuDB + vNQS2mGttkXTUgYS8t1oAS2qPPpxHj0RVLGU6yanJjJTRUfdvX2a/2vSP6nFN8oM + li0O1pCxbkSTSwX9ltjfzstdg1mj7/l55njcgSMtC4cU8gKz8JxFzhGr2VMp8FoN + QfhmOlCKZRD3apgKBf18GRt928w7avoGERyY//Z6KM1lzoTYzbBRRq2FEfCnEXVw + jYyiMunW5stvFrnOYK+AL8LA8wQQAQoAHRYhBOL4VIJcPEdF1+gNvuUHu/kqCA+W + BQJmWfFNAAoJEOUHu/kqCA+WODQL/juZhOTrLR8n4cKHCwm1MNmtRA1xd9mPtjpI + jXvn/16MDttapukAxXpjfo3sDsL8nAjla0t8WgdYx/MQywI396YZiaNF7nDAoNCD + wnaEP2i2g+vJRDPniR3+dNwZilITfEVwunHkwh9qCq+NgOPYSkqnShVY+EElIHjG + lrqfSeBgBF3kJi2tWjF+ECSr0bk7OHP4LbcksIFfjTq0U24BA63fcpP9ogomNAvn + SaFSumPET7PRX52OJm0JhbZjs97liXe8lkTjtgMWA+S8t62s/DZRSDC6WxPjZzmo + q8izUhr3hrw4kIQl5hD2AJ0sHHXo/b+ME+08qZMpkPFmWMj5YGtvDd6frqNhqW1e + 6Q5pnzAJv91sjmYMHKZrGfA0vWY/NcCni0MLpFGVpNiwa+mP4DNNCadU7nt2AfFa + LnO5YBT9AbpoYtOrFh4DxrNo0Wss46+Nd4IBDdCofkb4BdlrP9kCCPmSaHxaau+i + pVHMEzodLsS7KmQt4c6gAQW0dwsbp8KmBBAWCgBOFiEEWN4UGzqiopPVV787DEOU + R/WIRFQFAmcsbe8Fgy7/WQADBQF4JoY8W14+XStbQC5da2FsaVwuc2hcLmNoc2hc + LXNcLmFudG9uPiQAAAoJEAxDlEf1iERUAoUA/iKXsf21IPCffbK/XOovLsAsX/oA + cQ5XYIhVsIvuJMSjAP97o/c5cJSFI511AMIh/DN4Yw7pe6YRvamUB8BlJlLdBA== + =BkpO + -----END PGP PUBLIC KEY BLOCK----- + repos: &postgresql_common_repos + - name: postgresql + uri: "http://apt.postgresql.org/pub/repos/apt" + suite: "{{ ansible_distribution_release }}-pgdg" + components: ["main"] + enabled: true + cleanup: + - /etc/apt/sources.list.d/pgdg.list + - /etc/apt/sources.list.d/pgdg.sources + postgresql_use_official_repo: false + roles: + - roles/vhosts/common/ + - roles/vhosts/postgres/ + +- name: Setup postgres server + hosts: global-homepage.svc.plus + become: true + vars: + group: global-homepage.svc.plus + repo_setup: true + apt_keyrings: *postgresql_common_keyrings + repos: *postgresql_common_repos + postgresql_use_official_repo: false + roles: + - roles/vhosts/common/ + - roles/vhosts/postgres/ diff --git a/deploy_postgres_vhosts.yml b/deploy_postgres_vhosts.yml new file mode 100644 index 0000000..405a116 --- /dev/null +++ b/deploy_postgres_vhosts.yml @@ -0,0 +1,8 @@ +- name: Deploy PostgreSQL on vhosts + hosts: "{{ postgresql_target | default('postgresql') }}" + become: true + vars: + group: "{{ group | default(postgresql_target | default('postgresql')) }}" + roles: + - roles/vhosts/common/ + - roles/vhosts/postgres/ diff --git a/deploy_redis_vhosts.yml b/deploy_redis_vhosts.yml new file mode 100644 index 0000000..a7ba1a1 --- /dev/null +++ b/deploy_redis_vhosts.yml @@ -0,0 +1,10 @@ +- name: Setup Redis server + hosts: cn-homepage.svc.plus + become: true + roles: + - roles/vhosts/Redis/ +- name: Setup Redis server + hosts: global-homepage.svc.plus + become: true + roles: + - roles/vhosts/Redis/ diff --git a/deploy_tiny_monitor_server_vhost.yml b/deploy_tiny_monitor_server_vhost.yml new file mode 100644 index 0000000..07bee11 --- /dev/null +++ b/deploy_tiny_monitor_server_vhost.yml @@ -0,0 +1,21 @@ +- name: setup tiny monitor server + hosts: cn-homepage.svc.plus + become: true + vars: + group: cn-homepage.svc.plus + roles: + - roles/vhosts/common/ + - roles/vhosts/prometheus/ + - roles/vhosts/grafana/ + - roles/vhosts/nginx/ + +- name: setup tiny monitor server + hosts: global-homepage.svc.plus + become: true + vars: + group: global-homepage.svc.plus + roles: + - roles/vhosts/common/ + - roles/vhosts/prometheus/ + - roles/vhosts/grafana/ + - roles/vhosts/nginx/ diff --git a/deploy_vhosts_otel-collector.yml b/deploy_vhosts_otel-collector.yml new file mode 100644 index 0000000..7d7c992 --- /dev/null +++ b/deploy_vhosts_otel-collector.yml @@ -0,0 +1,55 @@ +- name: setup otel exporters + hosts: otel.svc.plus + become: true + vars: + group: web + otlp_endpoint: https://otel.svc.plus/api/default/ + otlp_auth: "Basic cm9vdEBleGFtcGxlLmNvbTpRN01wRjZBTzZFelRjRjdJ" + otel_prometheus_node_static_configs: + - targets: ['172.31.2.33:9100'] + labels: + vendor: aws + account: prod + group: core + name: tky-proxy.svc.plus + iid: '172.31.2.33' + - targets: ['167.179.72.223:9100'] + labels: + vendor: aws + account: prod + group: web-system + name: global-homepage.svc.plus + iid: '167.179.72.223' + - targets: ['47.120.61.35:9100'] + labels: + vendor: alicloud + account: prod + group: web-system + name: cn-homepage.svc.plus + iid: '47.120.61.35' + otel_prometheus_process_static_configs: + - targets: ['172.31.2.33:9256'] + labels: + vendor: aws + account: prod + group: core + name: tky-proxy.svc.plus + iid: '172.31.2.33' + - targets: ['167.179.72.223:9256'] + labels: + vendor: aws + account: prod + group: web-system + name: global-homepage.svc.plus + iid: '167.179.72.223' + - targets: ['47.120.61.35:9256'] + labels: + vendor: alicloud + account: prod + group: web-system + name: cn-homepage.svc.plus + iid: '47.120.61.35' + exporters: + endpoint: https://otel.svc.plus/api/default/ + roles: + - roles/vhosts/otel-collector/ diff --git a/deploy_xcontrol_server._vhosts.yml b/deploy_xcontrol_server._vhosts.yml new file mode 100644 index 0000000..a4be64f --- /dev/null +++ b/deploy_xcontrol_server._vhosts.yml @@ -0,0 +1,8 @@ +- name: setup xcontrol server + hosts: cn-homepage.svc.plus + become: true + vars: + group: cn-homepage.svc.plus + roles: + - roles/vhosts/common/ + - roles/vhosts/xcontrol_server/ diff --git a/deploy_xcontrol_web.yml b/deploy_xcontrol_web.yml new file mode 100644 index 0000000..09639ce --- /dev/null +++ b/deploy_xcontrol_web.yml @@ -0,0 +1,8 @@ +- name: setup xcontrol web + hosts: all + become: true + vars: + group: mail + roles: + #- roles/vhosts/common/ + - roles/vhosts/nodejs/ diff --git a/deploy_zitadel_docker.yaml b/deploy_zitadel_docker.yaml new file mode 100644 index 0000000..9081b4d --- /dev/null +++ b/deploy_zitadel_docker.yaml @@ -0,0 +1,12 @@ +- name: setup zitadel + hosts: all + become: true + vars: + zitadel_target_host: auth.svc.plus + zitadel_domain: "{{ domain }}" + zitadel_masterkey: MasterkeyNeedsToHave32Characters + zitadel_workspace: /opt/zitadel + roles: + #- vhosts/common/ + - vhosts/docker/ + - docker/zitadel/ diff --git a/docs/alicloud_dns_sync.md b/docs/alicloud_dns_sync.md new file mode 100644 index 0000000..11a0a66 --- /dev/null +++ b/docs/alicloud_dns_sync.md @@ -0,0 +1 @@ +ansible-playbook batch_dns_sync.yml --extra-vars "aliyun_ak=XXXX aliyun_sk=YYYY" diff --git a/gpu_k8s_init.yml b/gpu_k8s_init.yml new file mode 100644 index 0000000..96f87e5 --- /dev/null +++ b/gpu_k8s_init.yml @@ -0,0 +1,15 @@ +- hosts: all + become: true + vars: + # Use the inventory hostname for delegation so Ansible + # applies the correct connection variables + ops_host: "k8s-1" + masters: + - "k8s-1" + nodes: + - "k8s-2" + - "k8s-3" + roles: + - roles/vhosts/common/ + - roles/vhosts/ssh-trust/ + - roles/vhosts/gpu-k8s/ diff --git a/gpu_k8s_reset.yml b/gpu_k8s_reset.yml new file mode 100644 index 0000000..43f8c50 --- /dev/null +++ b/gpu_k8s_reset.yml @@ -0,0 +1,13 @@ +- hosts: all + become: true + vars: + # Use the inventory hostname for delegation so Ansible + # applies the correct connection variables + ops_host: "k8s-1" + masters: + - "k8s-1" + nodes: + - "k8s-2" + - "k8s-3" + roles: + - roles/vhosts/gpu-k8s-reset/ diff --git a/init-harbor-server b/init-harbor-server new file mode 100644 index 0000000..70d8e1f --- /dev/null +++ b/init-harbor-server @@ -0,0 +1,17 @@ +- name: setup harbor + hosts: all + user: root + become: yes + gather_facts: yes + tasks: + - include_role: + name: harbor + vars: + group: master + namespace: harbor + db_namespace: database + update_secret: true + tls: + - secret_name: harbor-tls + keyfile: /etc/ssl/onwalk.net.key + certfile: /etc/ssl/onwalk.net.pem diff --git a/init_chaos_mesh b/init_chaos_mesh new file mode 100644 index 0000000..4f9f530 --- /dev/null +++ b/init_chaos_mesh @@ -0,0 +1,17 @@ +- name: setup chaos-mesh server + hosts: all + user: root + become: yes + gather_facts: yes + tasks: + - include_role: + name: chaos-mesh + vars: + group: master + domain: onwalk.net + namespace: chaos-mesh + update_secret: true + tls: + - secret_name: chaos-mesh-tls + keyfile: /etc/ssl/onwalk.net.key + certfile: /etc/ssl/onwalk.net.pem diff --git a/init_chartmuseum b/init_chartmuseum new file mode 100644 index 0000000..6e1f24c --- /dev/null +++ b/init_chartmuseum @@ -0,0 +1,8 @@ +--- +- name: deploy chartmuseum + hosts: all + user: ubuntu + become: yes + gather_facts: yes + roles: + - chartmuseum diff --git a/init_deepflow b/init_deepflow new file mode 100644 index 0000000..57b59a4 --- /dev/null +++ b/init_deepflow @@ -0,0 +1,16 @@ +- name: setup deepflow server + hosts: all + user: root + become: yes + gather_facts: yes + tasks: + - include_role: + name: deepflow + vars: + group: master + update_secret: true + namespace: monitoring + tls: + - secret_name: obs-tls + keyfile: /etc/ssl/onwalk.net.key + certfile: /etc/ssl/onwalk.net.pem diff --git a/init_flagger-loadtester b/init_flagger-loadtester new file mode 100644 index 0000000..f7d8315 --- /dev/null +++ b/init_flagger-loadtester @@ -0,0 +1,16 @@ +- name: setup flagger-loadtester server + hosts: all + user: root + become: yes + gather_facts: yes + tasks: + - include_role: + name: flagger-loadtester + vars: + group: master + update_secret: true + namespace: loadtester + tls: + - secret_name: obs-tls + keyfile: /etc/ssl/${DOMAIN}.key + certfile: /etc/ssl/${DOMAIN}.pem diff --git a/init_gitlab b/init_gitlab new file mode 100644 index 0000000..3e7411c --- /dev/null +++ b/init_gitlab @@ -0,0 +1,23 @@ +- name: setup gitlab + hosts: all + user: root + become: yes + gather_facts: yes + tasks: + - include_role: + name: gitlab + vars: + group: master + gitlab_version: '7.0.4' + namespace: gitlab + db_namespace: database + domain: onwalk.net + auto_issuance: false + update_secret: true + tls: + - secret_name: gitlab-tls + keyfile: /etc/ssl/onwalk.net.key + certfile: /etc/ssl/onwalk.net.pem + gitlab_oidc_client_id: gitlab-oidc + gitlab_oidc_isser: 'https://keycloak.onwalk.net/realms/cloud-sso' + gitlab_oidc_redirect_uri: 'https://gitlab.onwalk.net/users/auth/openid_connect/callback' diff --git a/init_grafana_alloy b/init_grafana_alloy new file mode 100644 index 0000000..f34d339 --- /dev/null +++ b/init_grafana_alloy @@ -0,0 +1,8 @@ +--- +- name: deploy grafana alloy agent + hosts: all + user: ubuntu + become: yes + gather_facts: yes + roles: + - alloy diff --git a/init_harbor_server b/init_harbor_server new file mode 100644 index 0000000..5bb2c7b --- /dev/null +++ b/init_harbor_server @@ -0,0 +1,8 @@ +--- +- name: deploy harbor server + hosts: all + user: ubuntu + become: yes + gather_facts: yes + roles: + - harbor diff --git a/init_jenkins b/init_jenkins new file mode 100644 index 0000000..e30dd66 --- /dev/null +++ b/init_jenkins @@ -0,0 +1,18 @@ +- name: setup jenkins server + hosts: all + user: root + become: yes + gather_facts: yes + tasks: + - include_role: + name: jenkins + vars: + group: master + domain: onwalk.net + namespace: jenkins + update_secret: true + db_namespace: database + tls: + - secret_name: jenkins-tls + keyfile: /etc/ssl/onwalk.net.key + certfile: /etc/ssl/onwalk.net.pem diff --git a/init_k3s_cluster_agent b/init_k3s_cluster_agent new file mode 100644 index 0000000..daa9b1f --- /dev/null +++ b/init_k3s_cluster_agent @@ -0,0 +1,8 @@ +--- +- name: Initialize K3s Cluster Agent + hosts: all + user: ubuntu + become: yes + gather_facts: yes + roles: + - k3s-cluster-agent diff --git a/init_k3s_cluster_server b/init_k3s_cluster_server new file mode 100644 index 0000000..6f33338 --- /dev/null +++ b/init_k3s_cluster_server @@ -0,0 +1,8 @@ +--- +- name: Initialize K3s Cluster Server + hosts: all + user: ubuntu + become: yes + gather_facts: yes + roles: + - k3s-cluster-server diff --git a/init_k3s_cluster_std b/init_k3s_cluster_std new file mode 100644 index 0000000..727c462 --- /dev/null +++ b/init_k3s_cluster_std @@ -0,0 +1,27 @@ +- name: set artifact cluster with vhosts + hosts: all + user: root + become: yes + gather_facts: yes + tasks: + - include_role: + name: k3s-reset + vars: + group: master + cluster_reset: 'enable' + - include_role: + name: k3s + vars: + group: master + cni: default + version: 'v1.27.2+k3s1' + pod_cidr: '10.10.0.0/16' + svc_cidr: '172.16.0.0/16' + enable_api_access: true + - include_role: + name: k3s-addon + vars: + group: master + ingress: nginx + external_dns: enable + cert_issuance: vault diff --git a/init_k3s_cluster_with_argo_server b/init_k3s_cluster_with_argo_server new file mode 100644 index 0000000..7c818c1 --- /dev/null +++ b/init_k3s_cluster_with_argo_server @@ -0,0 +1,38 @@ +- name: set artifact cluster with vhosts + hosts: all + user: root + become: yes + gather_facts: yes + tasks: + - include_role: + name: k3s-reset + vars: + group: master + cluster_reset: 'enable' + - include_role: + name: k3s + vars: + group: master + cni: default + version: 'v1.27.2+k3s1' + pod_cidr: '10.10.0.0/16' + svc_cidr: '172.16.0.0/16' + enable_api_access: true + - include_role: + name: k3s-addon + vars: + group: master + ingress: disable + external_dns: disable + cert_issuance: vault + - include_role: + name: argo-server + vars: + group: master + namespace: argocd + domain: onwalk.net + update_secret: true + tls: + - secret_name: argocd-server-tls + keyfile: /etc/ssl/onwalk.net.key + certfile: /etc/ssl/onwalk.net.pem diff --git a/init_observability-agent b/init_observability-agent new file mode 100644 index 0000000..8c2b666 --- /dev/null +++ b/init_observability-agent @@ -0,0 +1,13 @@ +- name: setup observability agent + hosts: all + user: root + become: yes + gather_facts: yes + tasks: + - include_role: + name: observability-agent + vars: + group: master + namespace: monitoring + deepflowserverip: 10.146.0.8 + deepflowk8sclusterid: d-kqjofXyZbg diff --git a/init_observability-server b/init_observability-server new file mode 100644 index 0000000..cba56a0 --- /dev/null +++ b/init_observability-server @@ -0,0 +1,29 @@ +- name: setup observability server + hosts: all + user: root + become: yes + gather_facts: yes + tasks: + - include_role: + name: observability-server + vars: + group: master + update_secret: true + auto_issuance: false + namespace: monitoring + db_namespace: database + tls: + - secret_name: obs-tls + keyfile: /etc/ssl/svc.ink.key + certfile: /etc/ssl/svc.ink.pem + - include_role: + name: flagger-loadtester + vars: + group: master + update_secret: true + auto_issuance: false + namespace: loadtester + tls: + - secret_name: obs-tls + keyfile: /etc/ssl/svc.ink.key + certfile: /etc/ssl/svc.ink.pem diff --git a/init_openldap b/init_openldap new file mode 100644 index 0000000..069d3eb --- /dev/null +++ b/init_openldap @@ -0,0 +1,18 @@ +- name: setup openldap + hosts: all + user: root + become: yes + gather_facts: yes + tasks: + - include_role: + name: openldap + vars: + group: master + namespace: itsm + domain: onwalk.net + update_secret: true + auto_issuance: false + tls: + - secret_name: openldap-tls + keyfile: /etc/ssl/onwalk.net.key + certfile: /etc/ssl/onwalk.net.pem diff --git a/init_splunk-otel-collector b/init_splunk-otel-collector new file mode 100644 index 0000000..ec1e411 --- /dev/null +++ b/init_splunk-otel-collector @@ -0,0 +1,13 @@ +- name: setup splunk otel collector + hosts: all + user: root + become: yes + gather_facts: yes + tasks: + - include_role: + name: splunk-otel-collector + vars: + group: master + namespace: default + splunk_hec_url: https://xxxx.splunkcloud.com:8088/services/collector/event + splunk_hec_token: "token-xxxxxx" diff --git a/init_telegraf b/init_telegraf new file mode 100644 index 0000000..32bfd57 --- /dev/null +++ b/init_telegraf @@ -0,0 +1,10 @@ +- name: Setup telegraf + hosts: all + user: root + become: yes + gather_facts: yes + tasks: + - include_role: + name: telegraf + vars: + update_secret: true diff --git a/init_vault b/init_vault new file mode 100644 index 0000000..45dc7e0 --- /dev/null +++ b/init_vault @@ -0,0 +1,8 @@ +--- +- name: deploy vault server + hosts: all + user: ubuntu + become: yes + gather_facts: yes + roles: + - vault diff --git a/init_vpn_gateway.yml b/init_vpn_gateway.yml new file mode 100755 index 0000000..525b7c7 --- /dev/null +++ b/init_vpn_gateway.yml @@ -0,0 +1,7 @@ +--- +- hosts: vpn-gateway + user: ubuntu + become: yes + gather_facts: yes + roles: + - wireguard-gateway diff --git a/inventory.ini b/inventory.ini new file mode 100644 index 0000000..36298ee --- /dev/null +++ b/inventory.ini @@ -0,0 +1,32 @@ +[web] +cn-homepage.svc.plus ansible_host=47.120.61.35 +global-homepage.svc.plus ansible_host=167.179.72.223 + +[deepflow_agents] +192.168.1.101 ansible_user=root ansible_ssh_pass=pass101 +192.168.1.102 ansible_user=admin ansible_ssh_pass=pass102 +192.168.1.103 ansible_user=root ansible_ssh_pass=pass103 ansible_port=2222 +192.168.1.104 ansible_user=ubuntu ansible_ssh_private_key_file=~/.ssh/id_rsa_ubuntu + +[mail] +smtp.svc.plus ansible_host=45.130.167.90 + +[bootstrap] +auth.svc.plus ansible_host=34.92.122.119 ansible_user=root ansible_ssh_private_key_file=~/.ssh/id_rsa + +[all:vars] +ansible_port=22 +ansible_user=root +ansible_host_key_checking=False + +# SSH 密钥或密码(二选一) +# ansible_ssh_private_key_file=~/.ssh/id_rsa +# ansible_ssh_pass=your_password + +# DeepFlow agent 配置变量 +controller_ips=["10.10.10.10", "10.10.10.11"] +vtap_group_id="g-P22vLIMdB6" + +# DeepFlow agent 安装包位置 +agent_base_dir="deepflow-agent-for-linux" +agent_package_name="deepflow-agent-1.0-5407.systemd.x86_64.rpm" diff --git a/keycloak_server b/keycloak_server new file mode 100644 index 0000000..96a381b --- /dev/null +++ b/keycloak_server @@ -0,0 +1,7 @@ +--- +- hosts: all + user: ubuntu + become: yes + gather_facts: yes + roles: + - keycloak diff --git a/pre_setup.sh b/pre_setup.sh new file mode 100644 index 0000000..7379c04 --- /dev/null +++ b/pre_setup.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +# Function to check if a variable is empty +check_empty() { + if [ -z "${!1}" ]; then + echo "$1 is empty. Aborting." + exit 1 + fi +} + +# List of variables to check +variables=("DNS_AK" "DNS_SK" "OSS_AK" "OSS_SK" "ROOT_PASSWORD" "SMTP_PASSWORD" "GITLAB_OIDC_CLIENT_TOKEN" "HARBOR_OIDC_CLIENT_TOKEN" "SSH_USER" "SSH_HOST_IP" "SSH_HOST_DOMAIN" "SSH_PRIVATE_KEY") + +# Loop through variables and check if each one is empty +for var in "${variables[@]}"; do + check_empty "$var" +done + +sudo apt install jq ansible -y + +mkdir -pv ~/.ssh/ +cat > ~/.ssh/id_rsa << EOF +$SSH_PRIVATE_KEY +EOF +sudo chmod 0400 ~/.ssh/id_rsa +md5sum ~/.ssh/id_rsa + +mkdir -pv hosts/ + +cat > hosts/inventory << EOF +[master] +$SSH_HOST_DOMAIN ansible_host=$SSH_HOST_IP + +[all:vars] +ansible_port=22 +ansible_ssh_user=$SSH_USER +ansible_ssh_private_key_file=~/.ssh/id_rsa +ansible_host_key_checking=False +ingress_ip=$SSH_HOST_IP +dns_ak=$DNS_AK +dns_sk=$DNS_SK +oss_ak=$OSS_AK +oss_sk=$OSS_SK +admin_password=$ROOT_PASSWORD +smtp_password=$SMTP_PASSWORD +gitlab_oidc_client_token=$GITLAB_OIDC_CLIENT_TOKEN +harbor_oidc_client_token=$HARBOR_OIDC_CLIENT_TOKEN +EOF diff --git a/renew_nodes_ssl_certs b/renew_nodes_ssl_certs new file mode 100644 index 0000000..3fc0c75 --- /dev/null +++ b/renew_nodes_ssl_certs @@ -0,0 +1,8 @@ +--- +- name: renew nodes ssl certs + hosts: all + user: ubuntu + become: yes + gather_facts: yes + roles: + - cert-manager diff --git a/roles/README.md b/roles/README.md new file mode 100644 index 0000000..0d1196f --- /dev/null +++ b/roles/README.md @@ -0,0 +1,33 @@ +# Playbook roles planning + +This document clarifies what should live under `/playbooks/roles/` for host-level automation (Ansible) versus what should be delivered through Helm charts, and ensures we cover the five tiers across data platforms: data warehouse → big data → ML → DL → large models. + +## Scope rules +- **Ansible roles**: host-coupled configuration that is not itself a cloud resource (GPU driver/runtime, OS tuning, user/SSH prep, rendering on-host config files, database bootstrapping, etc.). +- **Helm charts**: anything that runs as a Kubernetes workload (operators, clusters, services running in pods). + +## Base roles shared across tiers (Ansible) +- GPU driver and CUDA stack installation. +- Docker/Containerd runtime setup. +- System parameter tuning (kernel limits, hugepages, network stack), plus user home/SSH layout. +- Database initialization tasks (e.g., bootstrap PostgreSQL/ClickHouse on hosts) and rendering templated configs such as `ClickHouse/users.xml`. + +## Coverage by capability tier +| Tier | Host-focused roles (Ansible) | Kubernetes services (Helm) | +| --- | --- | --- | +| Data warehouse | ClickHouse host bootstrap & config render; PostgreSQL init where needed. | — | +| Big data | JVM/runtime, local disks, and OS tuning for data nodes. | Spark Operator; Flink Operator; Kafka/Redpanda; MinIO. | +| ML | GPU runtime base (drivers, container runtime), Python ML base image prep; user workspace/SSH. | Ray Cluster; MLflow; JupyterHub. | +| DL | Same GPU/system tuning plus inference node bootstrap (tensorRT/cuDNN as needed). | Triton Inference Server; LMDeploy (for deployment runtimes). | +| Large models | Secure SSH/user profiles and config templating for model storage/IO. | vLLM serving; model-specific Helm releases atop Ray/K8s. | + +## Suggested role layout under `/playbooks/roles/` +- `common/` (new): shared tasks for system tuning, users/SSH, and package repos for GPU/runtime support. +- `gpu/`: install GPU drivers + CUDA toolkit. +- `container_runtime/`: install and configure Docker/Containerd with GPU runtime integration. +- `database_init/`: bootstrap on-host databases (e.g., PostgreSQL, ClickHouse), render config files (`users.xml`, etc.). +- `bigdata_node_prep/`: OS/disk tuning for Spark/Flink/Kafka/Redpanda/MinIO hosts. +- `ml_node_prep/`: Python/conda base, SSH workspace prep for ML workloads. +- `dl_inference_node/`: tensorRT/cuDNN dependencies and runtime checks for Triton/LMDeploy nodes. + +Helm-delivered components should live under `playbooks/roles/charts/` or the repo’s Helm release structure and include Spark/Flink Operators, Kafka/Redpanda/MinIO, Ray Cluster, Triton, vLLM/LMDeploy, MLflow, and JupyterHub. diff --git a/roles/charts/app/meta/main.yml b/roles/charts/app/meta/main.yml new file mode 100644 index 0000000..9711b33 --- /dev/null +++ b/roles/charts/app/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: common diff --git a/roles/charts/app/tasks/main.yml b/roles/charts/app/tasks/main.yml new file mode 100755 index 0000000..f7fdd0e --- /dev/null +++ b/roles/charts/app/tasks/main.yml @@ -0,0 +1,16 @@ +- name: Prep DIR + shell: "mkdir -pv /tmp/app/" + +- name: Prep NameSpace + shell: "kubectl create namespace default || echo true" + +- name: Sync Deploy yaml + template: src=templates/{{ item }} dest=/tmp/app/{{ item }} owner=root group=root mode=0644 force=yes unsafe_writes=yes + with_items: + - deploy-app.yaml + +- name: Setup App + shell: "kubectl apply -f /tmp/app/{{ item }}" + when: inventory_hostname in groups[group] + with_items: + - deploy-app.yaml diff --git a/roles/charts/app/templates/.gitignore b/roles/charts/app/templates/.gitignore new file mode 100644 index 0000000..a194b20 --- /dev/null +++ b/roles/charts/app/templates/.gitignore @@ -0,0 +1,2 @@ +/clickhouse-keeper-k8s.iml +/.idea/ diff --git a/roles/charts/app/templates/deploy-app.yaml b/roles/charts/app/templates/deploy-app.yaml new file mode 100644 index 0000000..aebc9ff --- /dev/null +++ b/roles/charts/app/templates/deploy-app.yaml @@ -0,0 +1,18 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: app +spec: + replicas: 1 + selector: + matchLabels: + app: demo + template: + metadata: + labels: + app: demo + spec: + containers: + - name: demo + image: {{ app_image }}:{{ app_tag }} + imagePullPolicy: Always diff --git a/roles/charts/argo-server/files/setup-argocd.sh b/roles/charts/argo-server/files/setup-argocd.sh new file mode 100644 index 0000000..8aec1f0 --- /dev/null +++ b/roles/charts/argo-server/files/setup-argocd.sh @@ -0,0 +1,100 @@ +#!/bin/bash + +# 检查参数是否为空 +check_not_empty() { + if [[ -z $1 ]]; then + echo "Error: $2 is empty. Please provide a value." + exit 1 + fi +} + +helm repo add argo https://argoproj.github.io/argo-helm +helm repo update + +# 使用 Helm 部署 Argo CD +#helm upgrade --install argocd argo/argo-cd -n argocd --create-namespace + +cat < values.yaml +global: + domain: argocd.onwalk.net +server: + service: + type: ClusterIP + servicePortHttp: 80 + servicePortHttps: 443 + servicePortHttpName: http + servicePortHttpsName: https + ingress: + enabled: false + ingressClassName: "nginx" + hostname: argocd.onwalk.net + annotations: + nginx.ingress.kubernetes.io/force-ssl-redirect: "true" + nginx.ingress.kubernetes.io/backend-protocol: "HTTP" + tls: true +repoServer: + extraContainers: + - name: helmfile + image: ghcr.io/helmfile/helmfile:v0.157.0 + # Entrypoint should be Argo CD lightweight CMP server i.e. argocd-cmp-server + command: ["/var/run/argocd/argocd-cmp-server"] + env: + - name: HELM_CACHE_HOME + value: /tmp/helm/cache + - name: HELM_CONFIG_HOME + value: /tmp/helm/config + - name: HELMFILE_CACHE_HOME + value: /tmp/helmfile/cache + - name: HELMFILE_TEMPDIR + value: /tmp/helmfile/tmp + securityContext: + runAsNonRoot: true + runAsUser: 999 + volumeMounts: + - mountPath: /var/run/argocd + name: var-files + - mountPath: /home/argocd/cmp-server/plugins + name: plugins + # Register helmfile plugin into sidecar + - mountPath: /home/argocd/cmp-server/config/plugin.yaml + subPath: helmfile.yaml + name: argocd-cmp-cm + # Starting with v2.4, do NOT mount the same tmp volume as the repo-server container. The filesystem separation helps mitigate path traversal attacks. + - mountPath: /tmp + name: helmfile-tmp + volumes: + - name: argocd-cmp-cm + configMap: + name: argocd-cmp-cm + - name: helmfile-tmp + emptyDir: {} +configs: + cmp: + create: true + plugins: + helmfile: + allowConcurrency: true + discover: + fileName: helmfile.yaml + generate: + command: + - bash + - "-c" + - | + if [[ -v ENV_NAME ]]; then + helmfile -n "$ARGOCD_APP_NAMESPACE" -e $ENV_NAME template --include-crds -q + elif [[ -v ARGOCD_ENV_ENV_NAME ]]; then + helmfile -n "$ARGOCD_APP_NAMESPACE" -e "$ARGOCD_ENV_ENV_NAME" template --include-crds -q + else + helmfile -n "$ARGOCD_APP_NAMESPACE" template --include-crds -q + fi + lockRepo: false +EOF + +helm upgrade --install argocd argo/argo-cd -n argocd -f values.yaml + +# 等待 Argo CD 完全启动 +echo "Waiting for Argo CD to be ready..." +kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=argocd-server -n argocd --timeout=180s + +echo "Argo CD deployment and configuration complete." diff --git a/roles/charts/argo-server/meta/main.yml b/roles/charts/argo-server/meta/main.yml new file mode 100644 index 0000000..83cef7b --- /dev/null +++ b/roles/charts/argo-server/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: cert-manager diff --git a/roles/charts/argo-server/tasks/main.yml b/roles/charts/argo-server/tasks/main.yml new file mode 100755 index 0000000..fbf7c91 --- /dev/null +++ b/roles/charts/argo-server/tasks/main.yml @@ -0,0 +1,2 @@ +- name: Set ArgoCD Contoller + script: files/setup-argocd.sh diff --git a/roles/charts/chaos-mesh/files/setup.sh b/roles/charts/chaos-mesh/files/setup.sh new file mode 100644 index 0000000..d26f23a --- /dev/null +++ b/roles/charts/chaos-mesh/files/setup.sh @@ -0,0 +1,24 @@ +#!/bin/bash +set -x +export domain=$1 +export secret=$2 +export namespace=$3 + +cat > values.yaml << EOF +chaosDaemon: + runtime: containerd + socketPath: /run/k3s/containerd/containerd.sock +dashboard: + create: true + ingress: + enabled: true + ingressClassName: "nginx" + hosts: + - name: chaos-mesh.$domain + tls: true + tlsSecret: $secret +EOF + +helm repo add chaos-mesh https://charts.chaos-mesh.org +helm repo update +helm upgrade --install chaos-mesh chaos-mesh/chaos-mesh -n $namespace --create-namespace --version 2.6.3 -f values.yaml diff --git a/roles/charts/chaos-mesh/howto.md b/roles/charts/chaos-mesh/howto.md new file mode 100644 index 0000000..e978bcb --- /dev/null +++ b/roles/charts/chaos-mesh/howto.md @@ -0,0 +1,124 @@ +# Jenkins Mater 部署 + +# Jenkins Node IaC Runner 设置 +1. 安装git terraform + +## GitLab to trigger Jenkins + +1. Gitlab https://gitlab.xxx.com/-/profile/personal_access_tokens + +2. GitLab和Jenkins的集成可以让你在GitLab中的代码更新后自动触发Jenkins的构建任务。以下是配置GitLab插件和Jenkins以实现GitLab触发Jenkins的步骤: +3. 在Jenkins中安装GitLab插件 +首先,你需要在Jenkins中安装GitLab插件。登录到Jenkins的管理界面,然后转到“Manage Jenkins” > “Manage Plugins” > “Available”,在搜索框中输入“GitLab”,找到并安装“GitLab Plugin”。 +4. 在Jenkins中配置GitLab连接 +安装完插件后,你需要配置GitLab的连接。转到“Manage Jenkins” > “Configure System”,滚动到“GitLab”部分,点击“Add GitLab Server” > “Server”,输入你的GitLab服务器URL,并生成并输入一个与你的GitLab账户相关联的API Token。 +5. 在Jenkins中创建一个新的任务 +创建一个新的任务,并在源代码管理部分选择“Git”,输入你的GitLab项目的URL。在构建触发器部分,选择“Build when a change is pushed to GitLab”。 +记录:GitLab webhook URL: https://jenkins.xxx.xxx/project/alicloud-oss-pipeline +6. 在GitLab中配置Webhook +在你的GitLab项目中,转到“Settings” > “Integrations” -> 启用"Jenkins" +- 在URL中输入步骤5记录的 Webhook URL https://jenkins.xxx.xxx/project/alicloud-oss-pipeline +- 选择你想要触发Jenkins任务的事件(例如,当代码被推送时) +- Project name: 输入项目名称 +- Username: Jenkins 用户名 +- Password: Jenkins 认证密码 +- 保存更改, 测试设置,返回状态200为配置正确 + +以上就是配置GitLab插件和Jenkins以实现GitLab触发Jenkins的步骤。在完成这些步骤后,每当你的GitLab项目有更新时,都会自动触发对应的Jenkins构建任务。 + +## 要将GitHub代码仓库与Jenkins关联起来,您需要完成以下步骤: + +1 要在 GitHub 中启用 webhook 功能以触发 Jenkins 构建,请按照以下步骤操作: +2 进入 GitHub 仓库设置:在要设置 webhook 的 GitHub 仓库页面上,点击右上角的“Settings”。 +3 选择 Webhooks 选项:在仓库设置页面的左侧菜单中,选择“Webhooks”。 +4 添加 Webhook:在 Webhooks 页面的右上角,点击“Add webhook”。 + +配置 Webhook: + +1. Payload URL:输入 Jenkins 服务器的 webhook URL。格式应为 http://your-jenkins-server/github-webhook/。确保替换 your-jenkins-server 为您 Jenkins 服务器的实际地址。 +2. Content type:选择 application/json。 +3. Secret(可选):如果需要额外的安全性,可以输入一个秘密令牌。 +4. SSL verification:选择是否验证 SSL 证书。 +5. Which events would you like to trigger this webhook?:选择触发 webhook 的事件。通常选择 Just the push event(只有推送事件)或 Let me select individual events(让我选择单独的事件)并选择适当的事件(例如,push、pull request 等)。 +添加 Webhook:点击页面底部的“Add webhook”按钮以保存配置。 + +完成以上步骤后,您的 GitHub 仓库就配置好了一个 webhook,可以触发 Jenkins 构建。记得在 Jenkins 中设置相应的任务来响应这些 webhook。 + + +安装Jenkins插件: + +确保您的Jenkins实例已经安装了“GitHub”和“GitHub Integration”插件。您可以在Jenkins管理界面的“插件管理”部分进行安装。 +配置GitHub Webhook: + +在GitHub仓库的设置中,找到“Webhooks”部分并添加一个新的Webhook。 +将“Payload URL”设置为您的Jenkins服务器的URL,通常是这样的格式:http:///github-webhook/。 +选择触发Webhook的事件,通常是“Just the push event”或者“Send me everything”。 +确保“Content type”设置为“application/json”。 +点击“Add webhook”保存设置。 +配置Jenkins Job: + +在Jenkins中创建一个新的构建任务或者配置现有的任务。 +在“源码管理”部分,选择“Git”并填写您的GitHub仓库的URL。 +在“构建触发器”部分,选择“GitHub hook trigger for GITScm polling”选项。这样,每当GitHub仓库有新的推送事件时,Jenkins就会自动触发构建。 +测试配置: + +推送一些改动到您的GitHub仓库,检查是否触发了Jenkins构建。 +在Jenkins的构建历史中查看构建是否成功执行。 +通过完成以上步骤,您的GitHub代码仓库就与Jenkins关联起来了,可以实现自动触发构建的功能。 + +要在 Jenkins 中设置 GitHub 服务,您需要进行以下步骤: + +安装 GitHub 插件:首先确保您的 Jenkins 实例已安装 GitHub 插件。如果尚未安装,请转到 Jenkins 的“插件管理”页面,在“可选插件”选项卡中搜索并安装 GitHub 插件。 + +配置 GitHub 服务器:在 Jenkins 管理界面中,转到“系统管理” > “系统设置”。 + +在系统设置页面中,找到并点击“GitHub”部分。 +点击“Add GitHub Server”添加一个新的 GitHub 服务器配置。 +在配置页面中,输入一个描述性的名称,例如“GitHub”。 +在 GitHub API URL 中输入 GitHub 的 API 地址。通常为 https://api.github.com。 +如果您的 GitHub 仓库需要身份验证,请在“凭据”部分选择一个已配置的凭据。如果尚未配置凭据,请点击“Add”添加一个新的凭据,选择类型为“Secret text”或“Username with password”,然后输入您的 GitHub 用户名和密码或访问令牌。 +完成配置后,点击“保存”保存 GitHub 服务器配置。 +验证配置:您可以在配置页面的底部点击“Test connection”来验证您的 GitHub 服务器配置是否正常工作。 + +保存设置:确保在完成配置后点击“保存”保存更改。 + +现在,您已成功配置了 Jenkins 的 GitHub 服务。您可以在 Jenkins 任务中使用这个配置来与 GitHub 仓库进行集成,例如触发构建、拉取代码等操作。 + + +对于 Jenkins 中的 GitHub API URL (https://api.github.com) 的凭据设置,您可以使用 GitHub Personal Access Token。这个 Token 可以通过以下步骤生成: + +在 GitHub 上登录您的账号。 +点击页面右上角的头像,选择“Settings”。 +在左侧边栏中,点击“Developer settings”。 +在左侧边栏中,点击“Personal access tokens”。 +点击“Generate new token”。 +输入一个描述性的名称,选择需要的权限(至少需要 repo 权限来访问仓库),然后点击“Generate token”。 +复制生成的 Token,并保存到一个安全的地方。请注意,这个 Token 只会显示一次,如果您丢失了,请重新生成一个新的 Token。 +在 Jenkins 中使用这个 Token 作为 GitHub API URL (https://api.github.com) 的凭据时,您可以将 Token 添加为 Jenkins 的凭据: + +进入 Jenkins 管理界面,转到“凭据” > “系统”。 +在“系统”页面中,点击“Global credentials (unrestricted)”。 +在凭据页面中,点击“Add credentials”。 +在“Kind”下拉菜单中选择“Secret text”。 +在“Secret”框中粘贴您在 GitHub 上生成的 Personal Access Token。 +输入一个描述性的名称,并点击“OK”保存凭据。 +现在,您可以在 Jenkins 的配置中使用这个凭据来访问 GitHub API (https://api.github.com)。 + +确保 Docker 已安装:在 Jenkins 代理节点上确认 Docker 已正确安装并配置。您可以通过在终端中执行 docker --version 命令来检查 Docker 是否可用。 + +检查 Docker 环境:如果 Docker 已安装,请确保 Docker 服务正在运行。您可以使用 sudo systemctl status docker 命令检查 Docker 服务的状态。 + +确认 Jenkins 全局工具配置:在 Jenkins 管理界面中,转到“系统管理”->“全局工具配置”,确保 Docker 工具已正确配置。如果未配置,您可以添加一个 Docker 工具,并指定正确的安装路径。 + +重启 Jenkins 服务:在进行了上述更改后,尝试重启 Jenkins 服务,以确保新的配置生效。 + +尝试在终端中执行 Docker 命令:在 Jenkins 代理节点上打开终端,尝试手动执行一些 Docker 命令(如 docker pull),看看是否能够正常执行 + +要设置 Jenkins Docker 流水线,你可以按照以下步骤进行操作: + +前提条件 +确保你的 Jenkins 实例已经安装了以下插件: + +Docker Pipeline +Docker Commons + diff --git a/roles/charts/chaos-mesh/meta/main.yml b/roles/charts/chaos-mesh/meta/main.yml new file mode 100644 index 0000000..1f2217b --- /dev/null +++ b/roles/charts/chaos-mesh/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: secret-manger diff --git a/roles/charts/chaos-mesh/tasks/main.yml b/roles/charts/chaos-mesh/tasks/main.yml new file mode 100755 index 0000000..75fdece --- /dev/null +++ b/roles/charts/chaos-mesh/tasks/main.yml @@ -0,0 +1,4 @@ +- name: Setup chaos-mesh Server + script: files/setup.sh {{ domain }} {{ item.secret_name }} {{ namespace }} + when: inventory_hostname in groups[group] and ( tls is defined) + loop: "{{ tls }}" diff --git a/roles/charts/chartmuseum/files/setup.sh b/roles/charts/chartmuseum/files/setup.sh new file mode 100644 index 0000000..f1f870f --- /dev/null +++ b/roles/charts/chartmuseum/files/setup.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +domain=$1 +namespace=$2 +admin_password=$3 +secret_name=$4 +storage_type=$5 + +cat > values.yaml << EOF +env: + open: + STORAGE: local + DISABLE_API: false + AUTH_ANONYMOUS_GET: true + secret: + BASIC_AUTH_USER: admin + BASIC_AUTH_PASS: '$admin_password' +ingress: + enabled: true + hosts: + - name: charts.$domain + path: / + tls: true + tlsSecret: $secret_name + ingressClassName: nginx +persistence: + enabled: true + accessMode: ReadWriteOnce + size: 8Gi + path: /storage + storageClass: "local-path" +EOF + +export KUBECONFIG=/etc/rancher/k3s/k3s.yaml +helm repo add chartmuseum https://chartmuseum.github.io/charts +helm repo update +helm upgrade --install chartmuseum chartmuseum/chartmuseum -f values.yaml -n $namespace diff --git a/roles/charts/chartmuseum/meta/main.yml b/roles/charts/chartmuseum/meta/main.yml new file mode 100644 index 0000000..1f2217b --- /dev/null +++ b/roles/charts/chartmuseum/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: secret-manger diff --git a/roles/charts/chartmuseum/tasks/main.yml b/roles/charts/chartmuseum/tasks/main.yml new file mode 100755 index 0000000..02d9e65 --- /dev/null +++ b/roles/charts/chartmuseum/tasks/main.yml @@ -0,0 +1,4 @@ +- name: Setup Chartmuseum Server + script: files/setup.sh {{ domain }} {{ namespace }} {{ admin_password }} {{ item.secret_name }} + loop: "{{ tls }}" + when: inventory_hostname in groups[group] diff --git a/roles/charts/chartmuseum/vars/main.yml b/roles/charts/chartmuseum/vars/main.yml new file mode 100644 index 0000000..993b09c --- /dev/null +++ b/roles/charts/chartmuseum/vars/main.yml @@ -0,0 +1,8 @@ +group: master +namespace: harbor +storage_type: oss +update_secret: true +tls: + - secret_name: chartmuseum-tls + keyfile: /etc/ssl/onwalk.net.key + certfile: /etc/ssl/onwalk.net.pem diff --git a/roles/charts/clickhouse/meta/main.yml b/roles/charts/clickhouse/meta/main.yml new file mode 100644 index 0000000..1f2217b --- /dev/null +++ b/roles/charts/clickhouse/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: secret-manger diff --git a/roles/charts/clickhouse/tasks/main.yml b/roles/charts/clickhouse/tasks/main.yml new file mode 100755 index 0000000..3a21731 --- /dev/null +++ b/roles/charts/clickhouse/tasks/main.yml @@ -0,0 +1,48 @@ +- name: Prep DIR + shell: "mkdir -pv /tmp/clickhouse-cluster/ && mkdir -pv /tmp/qryn" + +- name: Prep NameSpace + shell: "kubectl create namespace monitoring || echo true" + +- name: sync clickhouse deploy yaml + template: src=templates/{{ item }} dest=/tmp/{{ item }} owner=root group=root mode=0644 force=yes unsafe_writes=yes + with_items: + - clickhouse-cluster/clickhouse-config.yaml + - clickhouse-cluster/clickhouse-service.yaml + - clickhouse-cluster/clickhouse-user-config.yaml + - clickhouse-cluster/clickhouse-statefulset.yml + - postsetup.sh + +- name: Setup ClickHouse Server + shell: "cd /tmp/clickhouse-cluster && kubectl apply -f ." + when: inventory_hostname in groups[group] + +#- name: Post Setup ClickHouse Server +# shell: "cd /tmp/ && sh postsetup.sh" +# when: inventory_hostname in groups[group] + +- name: get clickhouse node ip + shell: " kubectl get pods -n monitoring -o wide | grep -E '^clickhouse-' | awk '{print $6}' " + register: ck_node_ip_raw + when: inventory_hostname in groups[group][0] + +- name: Check if ck_node_ip_raw is not empty + fail: + msg: "ck_node_ip_raw is empty, terminating the playbook." + when: ck_node_ip_raw.stdout_lines | length == 0 + +- name: set fact join command for ck_node_ip + set_fact: + ck_node_ip : "{{ ck_node_ip_raw.stdout_lines[0] }}" + when: inventory_hostname in groups[group][0] + +- name: sync clickhouse deploy yaml + template: src=templates/{{ item }} dest=/tmp/{{ item }} owner=root group=root mode=0644 force=yes unsafe_writes=yes + with_items: + - qryn/qryn-deployment.yaml + - qryn/qryn-service.yaml + - qryn/qryn-ingress.yaml + +- name: Setup Qryn Server + shell: "cd /tmp/qryn && kubectl apply -f ." + when: inventory_hostname in groups[group] diff --git a/roles/charts/clickhouse/templates/.gitignore b/roles/charts/clickhouse/templates/.gitignore new file mode 100644 index 0000000..a194b20 --- /dev/null +++ b/roles/charts/clickhouse/templates/.gitignore @@ -0,0 +1,2 @@ +/clickhouse-keeper-k8s.iml +/.idea/ diff --git a/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-config.yaml b/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-config.yaml new file mode 100644 index 0000000..7e48d80 --- /dev/null +++ b/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-config.yaml @@ -0,0 +1,94 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: clickhouse-config + namespace: monitoring +data: + keeper.xml: | + + + 0.0.0.0 + + trace + 1 + + + + + + + + + 2181 + + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + 10000 + 30000 + trace + 10000 + + + + 0 + clickhouse-0.clickhouse-service.monitoring + 9444 + + + 1 + clickhouse-1.clickhouse-service.monitoring + 9444 + + + 2 + clickhouse-2.clickhouse-service.monitoring + 9444 + + + + + + clickhouse-0.clickhouse-service.monitoring + 2181 + + + clickhouse-1.clickhouse-service.monitoring + 2181 + + + clickhouse-2.clickhouse-service.monitoring + 2181 + + + + + cluster.xml: | + + + + + + + clickhouse-0.clickhouse-service.monitoring + 9000 + + + + + clickhouse-1.clickhouse-service.monitoring + 9000 + + + + + + macros.xml: | + + + + testcluster + + 1 + + diff --git a/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-ingress.yaml b/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-ingress.yaml new file mode 100644 index 0000000..0e1131d --- /dev/null +++ b/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-ingress.yaml @@ -0,0 +1,18 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: clickhouse + namespace: monitoring +spec: + ingressClassName: nginx + rules: + - host: clickhouse.{{ domain }} + http: + paths: + - backend: + service: + name: clickhouse-service + port: + number: 8123 + path: / + pathType: Prefix diff --git a/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-service.yaml b/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-service.yaml new file mode 100644 index 0000000..e1eec7f --- /dev/null +++ b/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-service.yaml @@ -0,0 +1,23 @@ +kind: Service +apiVersion: v1 +metadata: + labels: + app: clickhouse + name: clickhouse-service + namespace: monitoring +spec: + ports: + - name: rest + port: 8123 + - name: keeper + port: 2181 + - name: replica-a + port: 9000 + - name: replica-b + port: 9009 + - name: raft + port: 9444 + + clusterIP: None + selector: + app: clickhouse diff --git a/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-statefulset.yml b/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-statefulset.yml new file mode 100644 index 0000000..3dd7ff6 --- /dev/null +++ b/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-statefulset.yml @@ -0,0 +1,103 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: clickhouse + namespace: monitoring +spec: + selector: + matchLabels: + app: clickhouse + serviceName: clickhouse-service + replicas: 3 + podManagementPolicy: "Parallel" + # podManagementPolicy: OrderedReady + template: + metadata: + labels: + app: clickhouse + spec: + containers: + - name: clickhouse + image: clickhouse/clickhouse-server:22.4.5 + imagePullPolicy: IfNotPresent + workingDir: / + command: + - /bin/bash + - -c + - |- + export CK_INDEX=${HOSTNAME##*-} + echo CK_INDEX=${CK_INDEX} + ./entrypoint.sh + env: + - name: HOSTNAME + valueFrom: + fieldRef: + fieldPath: metadata.name + ports: + - name: rest + containerPort: 8123 + - name: keeper + containerPort: 2181 + - name: replica-a + containerPort: 9000 + - name: replica-b + containerPort: 9009 + - name: raft + containerPort: 9444 + volumeMounts: + - name: clickhouse-config + mountPath: /etc/clickhouse-server/config.d/ + - name: clickhouse-user-config + mountPath: /etc/clickhouse-server/users.d/ + - name: clickhouse-meta + mountPath: /var/lib/clickhouse/coordination/ + - name: clickhouse-data + mountPath: /var/lib/clickhouse/ + volumes: + - name: clickhouse-config + configMap: + name: clickhouse-config + items: + - key: keeper.xml + path: keeper.xml + - key: cluster.xml + path: cluster.xml + - key: macros.xml + path: macros.xml + - name: clickhouse-user-config + configMap: + name: clickhouse-user-config + items: + - key: user.xml + path: user.xml + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + labels: + app.kubernetes.io/component: clickhouse + app.kubernetes.io/instance: clickhouse + app.kubernetes.io/name: clickhouse + name: clickhouse-meta + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + volumeMode: Filesystem + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + labels: + app.kubernetes.io/component: clickhouse + app.kubernetes.io/instance: clickhouse + app.kubernetes.io/name: clickhouse + name: clickhouse-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi + volumeMode: Filesystem diff --git a/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-user-config.yaml b/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-user-config.yaml new file mode 100644 index 0000000..694ee7d --- /dev/null +++ b/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-user-config.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: clickhouse-user-config + namespace: monitoring +data: + user.xml: | + + + + + 10000000000 + 4000 + 4096 + 4096 + random + + + diff --git a/roles/charts/clickhouse/templates/otel-collector/configmap.yaml b/roles/charts/clickhouse/templates/otel-collector/configmap.yaml new file mode 100644 index 0000000..46148e5 --- /dev/null +++ b/roles/charts/clickhouse/templates/otel-collector/configmap.yaml @@ -0,0 +1,142 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: otel-collector-config + namespace: default +data: + config.yaml: | + receivers: + loki: + use_incoming_timestamp: true + protocols: + http: + endpoint: 0.0.0.0:3100 + grpc: + endpoint: 0.0.0.0:3200 + syslog: + protocol: rfc5424 + tcp: + listen_address: "0.0.0.0:5514" + fluentforward: + endpoint: 0.0.0.0:24224 + splunk_hec: + endpoint: 0.0.0.0:8088 + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + jaeger: + protocols: + grpc: + endpoint: 0.0.0.0:14250 + thrift_http: + endpoint: 0.0.0.0:14268 + zipkin: + endpoint: 0.0.0.0:9411 + skywalking: + protocols: + grpc: + endpoint: 0.0.0.0:11800 + http: + endpoint: 0.0.0.0:12800 + prometheus: + config: + scrape_configs: + - job_name: 'otel-collector' + scrape_interval: 5s + static_configs: + - targets: ['exporter:8080'] + influxdb: + endpoint: 0.0.0.0:8086 + + connectors: + servicegraph: + latency_histogram_buckets: [ 100us, 1ms, 2ms, 6ms, 10ms, 100ms, 250ms ] + dimensions: [ cluster, namespace ] + store: + ttl: 2s + max_items: 1000 + cache_loop: 2m + store_expiration_loop: 2s + virtual_node_peer_attributes: + - db.name + - rpc.service + spanmetrics: + namespace: span.metrics + exemplars: + enabled: false + dimensions_cache_size: 1000 + aggregation_temporality: 'AGGREGATION_TEMPORALITY_CUMULATIVE' + metrics_flush_interval: 30s + metrics_expiration: 5m + events: + enabled: false + + processors: + batch: + send_batch_size: 10000 + timeout: 5s + memory_limiter: + check_interval: 2s + limit_mib: 1800 + spike_limit_mib: 500 + resourcedetection/system: + detectors: ['system'] + system: + hostname_sources: ['os'] + resource: + attributes: + - key: service.name + value: "serviceName" + action: upsert + metricstransform: + transforms: + - include: calls_total + action: update + new_name: traces_spanmetrics_calls_total + - include: latency + action: update + new_name: traces_spanmetrics_latency + + exporters: + qryn: + dsn: tcp://clickhouse-server:9000/qryn?username=default&password=************* + timeout: 10s + sending_queue: + queue_size: 100 + retry_on_failure: + enabled: true + initial_interval: 5s + max_interval: 30s + max_elapsed_time: 300s + logs: + format: raw + otlp/spanmetrics: + endpoint: localhost:4317 + tls: + insecure: true + + extensions: + health_check: + pprof: + zpages: + + service: + extensions: [pprof, zpages, health_check] + pipelines: + logs: + receivers: [fluentforward, otlp, loki, syslog, splunk_hec] + processors: [memory_limiter, resourcedetection/system, resource, batch] + exporters: [qryn] + traces: + receivers: [otlp, jaeger, zipkin, skywalking] + processors: [memory_limiter, resourcedetection/system, resource, batch] + exporters: [qryn, spanmetrics, servicegraph] + metrics: + receivers: [prometheus, influxdb, spanmetrics, servicegraph] + processors: [memory_limiter, resourcedetection/system, resource, batch] + exporters: [qryn] + + diff --git a/roles/charts/clickhouse/templates/otel-collector/deployment.yaml b/roles/charts/clickhouse/templates/otel-collector/deployment.yaml new file mode 100644 index 0000000..3017f46 --- /dev/null +++ b/roles/charts/clickhouse/templates/otel-collector/deployment.yaml @@ -0,0 +1,42 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: otel-collector + namespace: default + labels: + app: otel-collector +spec: + replicas: 1 + selector: + matchLabels: + app: otel-collector + template: + metadata: + labels: + app: otel-collector + spec: + containers: + - name: otel-collector + image: ghcr.io/metrico/qryn-otel-collector:latest + volumeMounts: + - name: config + mountPath: /etc/otel + subPath: config.yaml + ports: + - containerPort: 3100 + - containerPort: 3200 + - containerPort: 8088 + - containerPort: 5514 + - containerPort: 24224 + - containerPort: 4317 + - containerPort: 4318 + - containerPort: 14250 + - containerPort: 14268 + - containerPort: 9411 + - containerPort: 11800 + - containerPort: 12800 + - containerPort: 8086 + volumes: + - name: config + configMap: + name: otel-collector-config diff --git a/roles/charts/clickhouse/templates/otel-collector/ingress.yaml b/roles/charts/clickhouse/templates/otel-collector/ingress.yaml new file mode 100644 index 0000000..508b25c --- /dev/null +++ b/roles/charts/clickhouse/templates/otel-collector/ingress.yaml @@ -0,0 +1,19 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: otel-collector-ingress + namespace: default + annotations: + nginx.ingress.kubernetes.io/rewrite-target: / +spec: + rules: + - host: your-domain.example.com + http: + paths: + - path: /api + pathType: Prefix + backend: + service: + name: otel-collector + port: + number: 3100 diff --git a/roles/charts/clickhouse/templates/otel-collector/service.yaml b/roles/charts/clickhouse/templates/otel-collector/service.yaml new file mode 100644 index 0000000..7afcf54 --- /dev/null +++ b/roles/charts/clickhouse/templates/otel-collector/service.yaml @@ -0,0 +1,48 @@ +apiVersion: v1 +kind: Service +metadata: + name: otel-collector + namespace: default +spec: + ports: + - port: 3100 + targetPort: 3100 + protocol: TCP + - port: 3200 + targetPort: 3200 + protocol: TCP + - port: 8088 + targetPort: 8088 + protocol: TCP + - port: 5514 + targetPort: 5514 + protocol: TCP + - port: 24224 + targetPort: 24224 + protocol: TCP + - port: 4317 + targetPort: 4317 + protocol: TCP + - port: 4318 + targetPort: 4318 + protocol: TCP + - port: 14250 + targetPort: 14250 + protocol: TCP + - port: 14268 + targetPort: 14268 + protocol: TCP + - port: 9411 + targetPort: 9411 + protocol: TCP + - port: 11800 + targetPort: 11800 + protocol: TCP + - port: 12800 + targetPort: 12800 + protocol: TCP + - port: 8086 + targetPort: 8086 + protocol: TCP + selector: + app: otel-collector diff --git a/roles/charts/clickhouse/templates/postsetup.sh b/roles/charts/clickhouse/templates/postsetup.sh new file mode 100755 index 0000000..77eddd3 --- /dev/null +++ b/roles/charts/clickhouse/templates/postsetup.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +#检查 ClickHouse 版本 +#clickhouse-client --version | grep -q "21.8" +#if [ $? -ne 0 ]; then +#echo "ClickHouse 的版本必须至少为 21.8" +#exit 1 +#fi + +创建数据库 +for db in deepflow_system event ext_metrics flow_log flow_metrics flow_tag profile; do +clickhouse-client -u admin -p admin -q "CREATE DATABASE $db" +done + +创建用户 +clickhouse-client -u admin -p admin -q "CREATE USER admin IDENTIFIED WITH PLAINTEXT_PASSWORD BY 'admin'" +clickhouse-client -u admin -p admin -q "CREATE USER deepflow IDENTIFIED WITH PLAINTEXT_PASSWORD BY 'deepflow'" + +授权账户 +clickhouse-client -u admin -p admin -q "GRANT ALL ON . TO admin" +clickhouse-client -u admin -p admin -q "GRANT SELECT ON deepflow_system.* TO deepflow" +clickhouse-client -u admin -p admin -q "GRANT SELECT ON event.* TO deepflow" +clickhouse-client -u admin -p admin -q "GRANT SELECT ON ext_metrics.* TO deepflow" +clickhouse-client -u admin -p admin -q "GRANT SELECT ON flow_log.* TO deepflow" +clickhouse-client -u admin -p admin -q "GRANT SELECT ON flow_metrics.* TO deepflow" +clickhouse-client -u admin -p admin -q "GRANT SELECT ON flow_tag.* TO deepflow" +clickhouse-client -u admin -p admin -q "GRANT SELECT ON profile.* TO deepflow" diff --git a/roles/charts/clickhouse/templates/qryn/qryn-deployment.yaml b/roles/charts/clickhouse/templates/qryn/qryn-deployment.yaml new file mode 100644 index 0000000..4a2f4bc --- /dev/null +++ b/roles/charts/clickhouse/templates/qryn/qryn-deployment.yaml @@ -0,0 +1,36 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: qryn + namespace: monitoring + labels: + io.metrico.service: qryn +spec: + replicas: 2 + selector: + matchLabels: + io.metrico.service: qryn + strategy: {} + template: + metadata: + annotations: + qryn.cmd: qryn.dev + creationTimestamp: null + labels: + io.metrico.service: qryn + spec: + containers: + - env: + - name: CLICKHOUSE_AUTH + value: "default" + - name: CLICKHOUSE_PORT + value: "8123" + - name: CLICKHOUSE_SERVER + value: "{{ hostvars[groups[group][0]].ck_node_ip }}" + image: qxip/qryn + name: qryn + ports: + - containerPort: 3100 + resources: {} + restartPolicy: Always +status: {} diff --git a/roles/charts/clickhouse/templates/qryn/qryn-ingress.yaml b/roles/charts/clickhouse/templates/qryn/qryn-ingress.yaml new file mode 100644 index 0000000..09f6caf --- /dev/null +++ b/roles/charts/clickhouse/templates/qryn/qryn-ingress.yaml @@ -0,0 +1,24 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: data-gateway + namespace: monitoring + annotations: + nginx.ingress.kubernetes.io/ssl-redirect: "true" +spec: + ingressClassName: nginx + rules: + - host: data-gateway.{{ domain }} + http: + paths: + - backend: + service: + name: qryn + port: + number: 3100 + path: / + pathType: Prefix + tls: + - hosts: + - data-gateway.{{ domain }} + secretName: obs-tls diff --git a/roles/charts/clickhouse/templates/qryn/qryn-service.yaml b/roles/charts/clickhouse/templates/qryn/qryn-service.yaml new file mode 100644 index 0000000..178462e --- /dev/null +++ b/roles/charts/clickhouse/templates/qryn/qryn-service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + creationTimestamp: null + labels: + io.metrico.service: qryn + name: qryn + namespace: monitoring +spec: + ports: + - name: "3100" + port: 3100 + targetPort: 3100 + selector: + io.metrico.service: qryn diff --git a/roles/charts/deepflow/Readme.md b/roles/charts/deepflow/Readme.md new file mode 100644 index 0000000..ab6a38e --- /dev/null +++ b/roles/charts/deepflow/Readme.md @@ -0,0 +1,12 @@ + +# 统计存储数据 + +select formatReadableSize(sum(rows)) as "每天写入行数", formatReadableSize(sum(bytes_on_disk)) as "每天落盘的字节", formatReadableSize(sum(data_uncompressed_bytes)) as "压缩前字节", sum(data_uncompressed_bytes)/sum(bytes_on_disk) as "压缩比", sum(rows)/86400 as "平均每秒写入的行数" from cluster(df_cluster, system.parts) where partition like '%2024-12-03%' limit 10; + + + 可以grafana再 查下确认下,流日志的统计: +select min(partition),max(partition),formatReadableSize(sum(rows)) as "每天写入行数", formatReadableSize(sum(bytes_on_disk)) as "每天落盘的字节", formatReadableSize(sum(data_uncompressed_bytes)) as "压缩前字节", sum(data_uncompressed_bytes)/sum(bytes_on_disk) as "压缩比", sum(rows)/86400 as "平均每秒写入的行数" from cluster(df_cluster, system.parts) where partition like '%2024-12-03%' and table='l4_flow_log_local' limit 10; + +调用日志的统计: +select min(partition),max(partition),formatReadableSize(sum(rows)) as "每天写入行数", formatReadableSize(sum(bytes_on_disk)) as "每天落盘的字节", formatReadableSize(sum(data_uncompressed_bytes)) as "压缩前字节", sum(data_uncompressed_bytes)/sum(bytes_on_disk) as "压缩比", sum(rows)/86400 as "平均每秒写入的行数" from cluster(df_cluster, system.parts) where partition like '%2024-12-03%' and table='l7_flow_log_local' limit 10; + diff --git a/roles/charts/deepflow/files/post-setup.sh b/roles/charts/deepflow/files/post-setup.sh new file mode 100644 index 0000000..4f6d70d --- /dev/null +++ b/roles/charts/deepflow/files/post-setup.sh @@ -0,0 +1,7 @@ +sudo apt-get install -y apt-transport-https ca-certificates curl gnupg +curl -fsSL 'https://packages.clickhouse.com/rpm/lts/repodata/repomd.xml.key' | sudo gpg --dearmor -o /usr/share/keyrings/clickhouse-keyring.gpg + +echo "deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb stable main" | sudo tee \ + /etc/apt/sources.list.d/clickhouse.list +sudo apt-get update +sudo apt-get install -y clickhouse-client diff --git a/roles/charts/deepflow/files/pre-setup.sh b/roles/charts/deepflow/files/pre-setup.sh new file mode 100644 index 0000000..1da41ac --- /dev/null +++ b/roles/charts/deepflow/files/pre-setup.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export namespace=$1 + +export MYSQL_ROOT_PASSWORD=$(kubectl get secret --namespace $namespace mysql -o jsonpath="{.data.mysql-root-password}" | base64 -d) + +kubectl run mysql-client --rm --tty -i --restart='Never' --image docker.io/bitnami/mysql:8.0.32-debian-11-r14 --namespace $namespace --env MYSQL_ROOT_PASSWORD=$MYSQL_ROOT_PASSWORD --command -- bash -c "mysql -h mysql.database.svc.cluster.local -uroot -p$MYSQL_ROOT_PASSWORD -e 'create database IF NOT EXISTS jenkins;'" diff --git a/roles/charts/deepflow/files/setup.sh b/roles/charts/deepflow/files/setup.sh new file mode 100644 index 0000000..ef67087 --- /dev/null +++ b/roles/charts/deepflow/files/setup.sh @@ -0,0 +1,29 @@ +#!/bin/bash +set -x +export domain=$1 +export secret=$2 +export namespace=$3 + +cat << EOF > values-custom.yaml +clickhouse: + enabled: true +server: + enabled: true +deepflow-agent: + enabled: true +grafana: + enabled: true + service: + ingress: + enabled: true + ingressClassName: nginx + hosts: + - grafana.onwalk.net + tls: + - secretName: obs-tls + hosts: + - grafana.onwalk.net +EOF +helm repo add deepflow https://deepflowio.github.io/deepflow +helm repo update deepflow # use `helm repo update` when helm < 3.7.0 +helm upgrade --install deepflow -n monitoring deepflow/deepflow --create-namespace --version 6.4.9 -f values-custom.yaml diff --git a/roles/charts/deepflow/meta/main.yml b/roles/charts/deepflow/meta/main.yml new file mode 100644 index 0000000..1f2217b --- /dev/null +++ b/roles/charts/deepflow/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: secret-manger diff --git a/roles/charts/deepflow/tasks/main.yml b/roles/charts/deepflow/tasks/main.yml new file mode 100755 index 0000000..a06e1b1 --- /dev/null +++ b/roles/charts/deepflow/tasks/main.yml @@ -0,0 +1,19 @@ +#- name: get mysql db password +# shell: 'kubectl get secret --namespace database mysql -o jsonpath="{.data.mysql-root-password}" | base64 -d' +# register: mysql_db_password_raw +# when: inventory_hostname in groups[group][0] +# +#- name: set fact join command +# set_fact: +# mysql_db_password : "{{ mysql_db_password_raw.stdout }}" +# when: inventory_hostname in groups[group][0] +# +#- name: DB Pre Setup for Jenkins Server +# script: files/pre-setup.sh {{ db_namespace }} +# when: inventory_hostname in groups[group] +# script: files/setup.sh {{ domain }} {{ item.secret_name }} {{ namespace }} {{ mysql_db_password }} + +- name: Setup Deepflow Cluster + script: files/setup.sh {{ domain }} {{ item.secret_name }} {{ namespace }} + when: inventory_hostname in groups[group] and ( tls is defined) + loop: "{{ tls }}" diff --git a/roles/charts/embedding-service/README.md b/roles/charts/embedding-service/README.md new file mode 100644 index 0000000..102a312 --- /dev/null +++ b/roles/charts/embedding-service/README.md @@ -0,0 +1,3 @@ +# embedding-service (chart) + +Placeholder role for managing the Helm chart release of embedding-service. diff --git a/roles/charts/embedding-service/tasks/main.yml b/roles/charts/embedding-service/tasks/main.yml new file mode 100644 index 0000000..bed8247 --- /dev/null +++ b/roles/charts/embedding-service/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement Helm release tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement Helm release tasks." diff --git a/roles/charts/feast/README.md b/roles/charts/feast/README.md new file mode 100644 index 0000000..8027d5c --- /dev/null +++ b/roles/charts/feast/README.md @@ -0,0 +1,3 @@ +# feast (chart) + +Placeholder role for managing the Helm chart release of feast. diff --git a/roles/charts/feast/tasks/main.yml b/roles/charts/feast/tasks/main.yml new file mode 100644 index 0000000..bed8247 --- /dev/null +++ b/roles/charts/feast/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement Helm release tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement Helm release tasks." diff --git a/roles/charts/flagger-loadtester/files/setup.sh b/roles/charts/flagger-loadtester/files/setup.sh new file mode 100644 index 0000000..cd31a9a --- /dev/null +++ b/roles/charts/flagger-loadtester/files/setup.sh @@ -0,0 +1,47 @@ +#!/bin/bash +set -x + +# 检查参数是否为空 +check_not_empty() { + if [[ -z $1 ]]; then + echo "Error: $2 is empty. Please provide a value." + exit 1 + fi +} + +# 检查参数是否为空 +check_not_empty "$1" "DOMAIN" && DOMAIN=$1 + +helm repo add flagger https://flagger.app +kubectl create ns monitoring || true +helm upgrade -i flaggerloadtester flagger/loadtester --namespace=monitoring + +cat > flagger-loadtester-ingress.yaml << EOF +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + nginx.ingress.kubernetes.io/ssl-redirect: "true" + name: flagger + namespace: monitoring +spec: + ingressClassName: apisix + rules: + - host: flaggerloadtester.${DOMAIN} + http: + paths: + - backend: + service: + name: flagger-loadtester + port: + number: 80 + path: / + pathType: Prefix + tls: + - hosts: + - flaggerloadtester.${DOMAIN} + secretName: obs-tls +EOF + +kubectl apply -f flagger-loadtester-ingress.yaml + diff --git a/roles/charts/flagger-loadtester/meta/main.yml b/roles/charts/flagger-loadtester/meta/main.yml new file mode 100644 index 0000000..1f2217b --- /dev/null +++ b/roles/charts/flagger-loadtester/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: secret-manger diff --git a/roles/charts/flagger-loadtester/tasks/main.yml b/roles/charts/flagger-loadtester/tasks/main.yml new file mode 100755 index 0000000..803e4eb --- /dev/null +++ b/roles/charts/flagger-loadtester/tasks/main.yml @@ -0,0 +1,4 @@ +- name: Setup Loadtester Server + script: files/setup.sh {{ domain }} + when: inventory_hostname in groups[group] + diff --git a/roles/charts/flink-operator/README.md b/roles/charts/flink-operator/README.md new file mode 100644 index 0000000..fae40e4 --- /dev/null +++ b/roles/charts/flink-operator/README.md @@ -0,0 +1,3 @@ +# flink-operator (chart) + +Placeholder role for managing the Helm chart release of flink-operator. diff --git a/roles/charts/flink-operator/tasks/main.yml b/roles/charts/flink-operator/tasks/main.yml new file mode 100644 index 0000000..bed8247 --- /dev/null +++ b/roles/charts/flink-operator/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement Helm release tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement Helm release tasks." diff --git a/roles/charts/gitlab/files/post-setup.sh b/roles/charts/gitlab/files/post-setup.sh new file mode 100755 index 0000000..c994271 --- /dev/null +++ b/roles/charts/gitlab/files/post-setup.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +kubectl delete hpa --all -A + +# 获取所有部署 +DEPLOYMENTS=$(kubectl get deploy -n gitlab -o jsonpath='{.items[*].metadata.name}') + +# 遍历部署并设置副本数为1 +for DEPLOY in $DEPLOYMENTS +do + echo "Setting replicas=1 for deployment $DEPLOY" + kubectl scale deploy/$DEPLOY -n gitlab --replicas=1 +done + +# 遍历部署并获取 CPU 和内存配置 +for DEPLOY in $DEPLOYMENTS +do + echo "Deployment: $DEPLOY" + echo "====================" + kubectl get deploy $DEPLOY -n gitlab -o=jsonpath='{range .spec.template.spec.containers[*]}{.name}:{"\n"}{"\t"}cpu: {.resources.requests.cpu}{"\n"}{"\t"}mem: {.resources.requests.memory}{"\n"}{end}' + echo "====================" +done + +# 遍历部署并设置 CPU 和内存请求 +#for DEPLOY in $DEPLOYMENTS +#do +# echo "Setting cpu=0.1 and mem=100m for deployment $DEPLOY" +# kubectl patch deployment $DEPLOY -n gitlab -p '{"spec": {"template": {"spec": {"containers": [{"name": "'$DEPLOY'", "resources": {"requests": {"cpu": "0.1", "memory": "100m"}}}]}}}}' +# echo "====================" +#done diff --git a/roles/charts/gitlab/files/pre-setup.sh b/roles/charts/gitlab/files/pre-setup.sh new file mode 100755 index 0000000..34a5c71 --- /dev/null +++ b/roles/charts/gitlab/files/pre-setup.sh @@ -0,0 +1,9 @@ +#!/bin/bash +set +x + +export namespace=$1 +export POSTGRES_PASSWORD=$(kubectl get secret --namespace $namespace postgresql -o jsonpath="{.data.postgres-password}" | base64 -d) + +kubectl run postgresql-client --rm --tty -i --restart='Never' --namespace $namespace --image docker.io/bitnami/postgresql:15.2.0-debian-11-r11 --env="PGPASSWORD=$POSTGRES_PASSWORD" --command -- psql --host postgresql -U postgres -d postgres -p 5432 -w -c "CREATE DATABASE gitlabhq_production OWNER postgres;" || echo true + +kubectl run postgresql-client --rm --tty -i --restart='Never' --namespace $namespace --image docker.io/bitnami/postgresql:15.2.0-debian-11-r11 --env="PGPASSWORD=$POSTGRES_PASSWORD" --command -- psql --host postgresql -U postgres -d gitlabhq_production -p 5432 -w -c "CREATE EXTENSION IF NOT EXISTS plpgsql; CREATE EXTENSION IF NOT EXISTS pg_trgm; CREATE EXTENSION IF NOT EXISTS btree_gist;" || echo true diff --git a/roles/charts/gitlab/files/setup-with-oidc.sh b/roles/charts/gitlab/files/setup-with-oidc.sh new file mode 100644 index 0000000..b994887 --- /dev/null +++ b/roles/charts/gitlab/files/setup-with-oidc.sh @@ -0,0 +1,106 @@ +#!/bin/bash + +domain=$1 +namespace=$2 +object_bucket=$3 +gitlab_secret=$4 +gitlab_stmp_secret=$5 +smtp_port=$7 +smtp_domain=$8 +smtp_address=$9 +smtp_username=$10 +smtp_emailfrom=$11 +smtp_display_name=$12 +oidc_issuer_url=$13 +oidc_client_id=$14 +oidc_client_token=$15 + +cat > gitlab-values.yaml < gitlab-values.yaml < gitlab-values.yaml < values.yaml << EOF +global: + imageRegistry: "$registry" +exposureType: ingress +ingress: + core: + ingressClassName: "nginx" + hostname: images.${domain} + extraTls: + - hosts: + - images.${domain} + secretName: "$secret_name" +externalURL: https://images.${domain} + +postgresql: + enabled: false +redis: + enabled: false +notary: + enabled: false +trivy: + enabled: false + +externalDatabase: + host: postgresql.database.svc.cluster.local + user: postgres + port: 5432 + password: "$pg_db_password" + sslmode: disable + coreDatabase: harbor_core + clairDatabase: harbor_clair + clairUsername: "postgres" + clairPassword: "$pg_db_password" + notaryServerDatabase: harbor_notary_server + notaryServerUsername: "postgres" + notaryServerPassword: "$pg_db_password" + notarySignerDatabase: harbor_notary_signer + notarySignerUsername: "postgres" + notarySignerPassword: "$pg_db_password" +externalRedis: + host: redis-master.redis.svc.cluster.local + port: 6379 + password: "$redis_password" +persistence: + enabled: true + imageChartStorage: + type: $backend_type + oss: + accesskeyid: $ak + accesskeysecret: $sk + region: "oss-cn-wulanchabu" + bucket: "harbor-oss" + endpoint: "oss-cn-wulanchabu.aliyuncs.com" + s3: + region: ap-east-1 + bucket: artifact-s3 + accesskey: $ak + secretkey: $sk +EOF + +export KUBECONFIG=/etc/rancher/k3s/k3s.yaml +helm repo add bitnami https://charts.bitnami.com/bitnami +helm repo update bitnami +kubectl create ns $namespace || true +helm upgrade --install artifact bitnami/harbor --version=16.7.0 -f values.yaml -n $namespace diff --git a/roles/charts/harbor/files/setup-office-harbor.sh b/roles/charts/harbor/files/setup-office-harbor.sh new file mode 100644 index 0000000..7cf3ec3 --- /dev/null +++ b/roles/charts/harbor/files/setup-office-harbor.sh @@ -0,0 +1,91 @@ +#!/bin/bash + +ak=$1 +sk=$2 +domain=$3 +namespace=$4 +secret_name=$5 +redis_password=$6 +pg_db_password=$7 +storage_type=$8 + +cat > harbor-arm-config.yaml << EOF +portal: + image: + repository: ghcr.io/octohelm/harbor/harbor-portal + tag: v2.7.0@sha256:b3f4e0e990500362b554338579497ad89af5473e024564731563704ceab9305b +core: + image: + repository: ghcr.io/octohelm/harbor/harbor-core + tag: v2.7.0@sha256:dd7f3898f32caf8e03cee046596f03034f4297231458d4de39775dd58709b55a +jobservice: + image: + repository: ghcr.io/octohelm/harbor/harbor-jobservice + tag: v2.7.0@sha256:7abd6694f546172ffec4a87e389e8ba425fa6ee82479782693c120a89a291435 +registry: + registry: + image: + repository: ghcr.io/octohelm/harbor/registry-photon + tag: v2.7.0@sha256:d5f23b2bc4271b2eb1ec002eb0c0c51e708015944316e5bd17c61de73ea54415 + controller: + image: + repository: ghcr.io/svc-design/harbor-multi-arch-images/harbor-registryctl + tag: v2.7.0@sha256:ba2412c1a629ca1c2ca4584ba51eb05e964c7eef7b1f9f6ddb39d67512debaf5 +chartmuseum: + enabled: true + image: + repository: ghcr.io/octohelm/harbor/chartmuseum-photon + tag: v2.7.0@sha256:0815066d46474b9403b2d2e5f6f9e2ae44d067d8d2f8523b95ea3d3f20f3d058 +trivy: + enabled: false +notary: + enabled: false +expose: + type: ingress + tls: + enabled: true + certSource: secret + secret: + secretName: $secret_name + notarySecretName: $secret_name + ingress: + hosts: + core: harbor.${domain} + notary: artifact-notary.${domain} + className: "nginx" +externalURL: https://artifact.${domain} +database: + type: external + external: + host: "postgresql.database.svc.cluster.local" + port: "5432" + username: "postgres" + password: "$pg_db_password" + coreDatabase: "registry" + notaryServerDatabase: "notary_server" + notarySignerDatabase: "notary_signer" +redis: + type: external + external: + addr: "redis-master.redis.svc.cluster.local:6379" + password: "$redis_password" +persistence: + imageChartStorage: + type: $storage_type + oss: + accesskeyid: $ak + accesskeysecret: $sk + region: "oss-cn-wulanchabu" + bucket: "harbor-s3" + endpoint: "oss-cn-wulanchabu.aliyuncs.com" + s3: + region: ap-east-1 + bucket: artifact-s3 + accesskey: $ak + secretkey: $sk +EOF + +export KUBECONFIG=/etc/rancher/k3s/k3s.yaml +helm repo add harbor https://helm.goharbor.io +helm repo update +helm upgrade --install artifact harbor/harbor -f harbor-arm-config.yaml --version 1.11.1 -n $namespace diff --git a/roles/charts/harbor/meta/main.yml b/roles/charts/harbor/meta/main.yml new file mode 100644 index 0000000..2d3db3f --- /dev/null +++ b/roles/charts/harbor/meta/main.yml @@ -0,0 +1,4 @@ +dependencies: + - role: redis + - role: postgresql + - role: secret-manger diff --git a/roles/charts/harbor/tasks/main.yml b/roles/charts/harbor/tasks/main.yml new file mode 100755 index 0000000..23916e8 --- /dev/null +++ b/roles/charts/harbor/tasks/main.yml @@ -0,0 +1,38 @@ +- name: get redis password + shell: 'kubectl get secret --namespace {{ cache_namespace }} redis -o jsonpath="{.data.redis-password}" | base64 -d' + register: redis_command_raw + when: inventory_hostname in groups[group][0] + +- name: set fact join command for redis + set_fact: + redis_password : "{{ redis_command_raw.stdout }}" + +- name: get db password + shell: 'kubectl get secret --namespace {{ db_namespace }} postgresql -o jsonpath="{.data.postgres-password}" | base64 -d' + register: db_command_raw + when: inventory_hostname in groups[group][0] + +- name: set fact join command for pg_db + set_fact: + pg_db_password : "{{ db_command_raw.stdout }}" + when: inventory_hostname in groups[group][0] + +#- name: Show Debug Info +# debug: var=command_raw verbosity=0 + +- name: Pre Setup harbor DB + script: files/pre-setup.sh {{ namespace }} + when: inventory_hostname in groups[group] + +- name: Setup harbor Server + script: files/setup-bitnami-harbor.sh {{ oss_ak }} {{ oss_sk }} {{ domain }} {{ namespace }} {{ item.secret_name }} {{ hostvars[groups[group][0]].redis_password }} {{ hostvars[groups[group][0]].pg_db_password }} {{ backend_type }} {{ registry }} + loop: "{{ tls }}" + when: inventory_hostname in groups[group] + +#- name: Sync harbor-oidc-config.json +# template: src=templates/{{ item }} dest=/tmp/{{ item }} owner=root group=root mode=0644 force=yes unsafe_writes=yes +# with_items: +# - harbor-oidc-config.json + +#- name: Setup harbor oidc config +# script: files/post-setup.sh {{ admin_password }} diff --git a/roles/charts/harbor/templates/harbor-oidc-config.json b/roles/charts/harbor/templates/harbor-oidc-config.json new file mode 100644 index 0000000..a42d602 --- /dev/null +++ b/roles/charts/harbor/templates/harbor-oidc-config.json @@ -0,0 +1,11 @@ +{ + "auth_mode": "oidc_auth", + "oidc_name": "Keycloak-sso", + "oidc_endpoint": "https://keycloak.onwalk.net/realms/cloud-sso", + "oidc_client_id": "harbor-oidc", + "oidc_client_secret": '{{ harbor_oidc_client_token }}', + "oidc_scope": "openid,profile,email", + "oidc_groups_claim": "groups", + "oidc_auto_onboard": true, + "oidc_user_claim": "preferred_username" +} diff --git a/roles/charts/harbor/vars/main.yml b/roles/charts/harbor/vars/main.yml new file mode 100644 index 0000000..f079258 --- /dev/null +++ b/roles/charts/harbor/vars/main.yml @@ -0,0 +1,9 @@ +group: master +namespace: artifact +db_namespace: database +cache_namespace: redis +update_secret: true +tls: + - secret_name: harbor-tls + keyfile: /etc/ssl/svc.plus.key + certfile: /etc/ssl/svc.plus.pem diff --git a/roles/charts/helm-repos/tasks/main.yml b/roles/charts/helm-repos/tasks/main.yml new file mode 100644 index 0000000..3b02e75 --- /dev/null +++ b/roles/charts/helm-repos/tasks/main.yml @@ -0,0 +1,9 @@ +- name: Enable community plugins and third-party helm charts + shell: | + helm repo add kubernetes-dashboard https://kubernetes.github.io/dashboard/ || true + helm repo add nvidia https://helm.ngc.nvidia.com/nvidia || true + helm repo add prometheus-community https://prometheus-community.github.io/helm-charts || true + helm repo add metrics-server https://kubernetes-sigs.github.io/metrics-server/ || true + helm repo update + ignore_errors: yes + when: is_primary | bool diff --git a/roles/charts/iceberg-bucket/README.md b/roles/charts/iceberg-bucket/README.md new file mode 100644 index 0000000..ec0bf0d --- /dev/null +++ b/roles/charts/iceberg-bucket/README.md @@ -0,0 +1,3 @@ +# iceberg-bucket (chart) + +Placeholder role for managing the Helm chart release of iceberg-bucket. diff --git a/roles/charts/iceberg-bucket/tasks/main.yml b/roles/charts/iceberg-bucket/tasks/main.yml new file mode 100644 index 0000000..bed8247 --- /dev/null +++ b/roles/charts/iceberg-bucket/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement Helm release tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement Helm release tasks." diff --git a/roles/charts/inference-gateway/README.md b/roles/charts/inference-gateway/README.md new file mode 100644 index 0000000..d0943dd --- /dev/null +++ b/roles/charts/inference-gateway/README.md @@ -0,0 +1,3 @@ +# inference-gateway (chart) + +Placeholder role for managing the Helm chart release of inference-gateway. diff --git a/roles/charts/inference-gateway/tasks/main.yml b/roles/charts/inference-gateway/tasks/main.yml new file mode 100644 index 0000000..bed8247 --- /dev/null +++ b/roles/charts/inference-gateway/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement Helm release tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement Helm release tasks." diff --git a/roles/charts/jenkins/files/pre-setup.sh b/roles/charts/jenkins/files/pre-setup.sh new file mode 100644 index 0000000..1da41ac --- /dev/null +++ b/roles/charts/jenkins/files/pre-setup.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export namespace=$1 + +export MYSQL_ROOT_PASSWORD=$(kubectl get secret --namespace $namespace mysql -o jsonpath="{.data.mysql-root-password}" | base64 -d) + +kubectl run mysql-client --rm --tty -i --restart='Never' --image docker.io/bitnami/mysql:8.0.32-debian-11-r14 --namespace $namespace --env MYSQL_ROOT_PASSWORD=$MYSQL_ROOT_PASSWORD --command -- bash -c "mysql -h mysql.database.svc.cluster.local -uroot -p$MYSQL_ROOT_PASSWORD -e 'create database IF NOT EXISTS jenkins;'" diff --git a/roles/charts/jenkins/files/setup.sh b/roles/charts/jenkins/files/setup.sh new file mode 100644 index 0000000..9d8310c --- /dev/null +++ b/roles/charts/jenkins/files/setup.sh @@ -0,0 +1,86 @@ +#!/bin/bash +set -x +export domain=$1 +export secret=$2 +export namespace=$3 +export mysql_db_password=$4 + +cat > values.yaml << EOF + +controller: + agentListenerServiceType: "NodePort" + agentListenerNodePort: 50000 + admin: + username: 'admin' + password: "jenkins" + jenkinsUrlProtocol: "https" + jenkinsHome: "/var/jenkins_home" + jenkinsUrl: https://jenkins.$domain + ingress: + enabled: true + annotations: + kubernetes.io/tls-acme: "false" + ingressClassName: nginx + hostName: jenkins.$domain + path: '/' + tls: + - secretName: $secret + hosts: + - jenkins.$domain + installLatestPlugins: true + installPlugins: + - git:5.2.1 + - github:1.38.0 + - github-pullrequest:0.7.0 + - locale:314.v22ce953dfe9e + - database-mysql:1.4 + - database:191.vd5981b_97a_5fa_ + - credentials:1337.v60b_d7b_c7b_c9f + - credentials-binding:642.v737c34dea_6c2 # 更新版本以满足依赖关系 + - configuration-as-code:1775.v810dc950b_514 # 更新版本以满足依赖关系 + - gitlab-plugin:1.7.16 + - kubernetes:4029.v5712230ccb_f8 + - docker-plugin:1.6 + - docker-workflow:572.v950f58993843 + - docker-commons:439.va_3cb_0a_6a_fb_29 + - pipeline-stage-view:2.33 + - workflow-job:1385.vb_58b_86ea_fff1 + - workflow-cps:3883.vb_3ff2a_e3eea_f + - workflow-aggregator:596.v8c21c963d92d + JCasC: + enabled: true + defaultConfig: true + configScripts: + database: | + unclassified: + globalDatabaseConfiguration: + database: + mysql: + hostname: mysql.database.svc.cluster.local + username: "root" + database: "jenkins" + password: $mysql_db_password + properties: "?useSSL=false" + validationQuery: "SELECT 1" +agent: + enabled: true + replicas: 3 + numExecutors: 1 + jenkinsUrl: https://jenkins.$domain + image: + repository: "jenkins/inbound-agent" + tag: "latest" + customJenkinsLabels: [] + +persistence: + enabled: true + storageClass: "local-path" + size: "10Gi" +networkPolicy: + enabled: false +additionalConfig: {} +EOF + +helm repo add jenkins https://charts.jenkins.io +helm repo update +helm upgrade --install jenkins jenkins/jenkins -n $namespace --create-namespace -f values.yaml diff --git a/roles/charts/jenkins/howto.md b/roles/charts/jenkins/howto.md new file mode 100644 index 0000000..e978bcb --- /dev/null +++ b/roles/charts/jenkins/howto.md @@ -0,0 +1,124 @@ +# Jenkins Mater 部署 + +# Jenkins Node IaC Runner 设置 +1. 安装git terraform + +## GitLab to trigger Jenkins + +1. Gitlab https://gitlab.xxx.com/-/profile/personal_access_tokens + +2. GitLab和Jenkins的集成可以让你在GitLab中的代码更新后自动触发Jenkins的构建任务。以下是配置GitLab插件和Jenkins以实现GitLab触发Jenkins的步骤: +3. 在Jenkins中安装GitLab插件 +首先,你需要在Jenkins中安装GitLab插件。登录到Jenkins的管理界面,然后转到“Manage Jenkins” > “Manage Plugins” > “Available”,在搜索框中输入“GitLab”,找到并安装“GitLab Plugin”。 +4. 在Jenkins中配置GitLab连接 +安装完插件后,你需要配置GitLab的连接。转到“Manage Jenkins” > “Configure System”,滚动到“GitLab”部分,点击“Add GitLab Server” > “Server”,输入你的GitLab服务器URL,并生成并输入一个与你的GitLab账户相关联的API Token。 +5. 在Jenkins中创建一个新的任务 +创建一个新的任务,并在源代码管理部分选择“Git”,输入你的GitLab项目的URL。在构建触发器部分,选择“Build when a change is pushed to GitLab”。 +记录:GitLab webhook URL: https://jenkins.xxx.xxx/project/alicloud-oss-pipeline +6. 在GitLab中配置Webhook +在你的GitLab项目中,转到“Settings” > “Integrations” -> 启用"Jenkins" +- 在URL中输入步骤5记录的 Webhook URL https://jenkins.xxx.xxx/project/alicloud-oss-pipeline +- 选择你想要触发Jenkins任务的事件(例如,当代码被推送时) +- Project name: 输入项目名称 +- Username: Jenkins 用户名 +- Password: Jenkins 认证密码 +- 保存更改, 测试设置,返回状态200为配置正确 + +以上就是配置GitLab插件和Jenkins以实现GitLab触发Jenkins的步骤。在完成这些步骤后,每当你的GitLab项目有更新时,都会自动触发对应的Jenkins构建任务。 + +## 要将GitHub代码仓库与Jenkins关联起来,您需要完成以下步骤: + +1 要在 GitHub 中启用 webhook 功能以触发 Jenkins 构建,请按照以下步骤操作: +2 进入 GitHub 仓库设置:在要设置 webhook 的 GitHub 仓库页面上,点击右上角的“Settings”。 +3 选择 Webhooks 选项:在仓库设置页面的左侧菜单中,选择“Webhooks”。 +4 添加 Webhook:在 Webhooks 页面的右上角,点击“Add webhook”。 + +配置 Webhook: + +1. Payload URL:输入 Jenkins 服务器的 webhook URL。格式应为 http://your-jenkins-server/github-webhook/。确保替换 your-jenkins-server 为您 Jenkins 服务器的实际地址。 +2. Content type:选择 application/json。 +3. Secret(可选):如果需要额外的安全性,可以输入一个秘密令牌。 +4. SSL verification:选择是否验证 SSL 证书。 +5. Which events would you like to trigger this webhook?:选择触发 webhook 的事件。通常选择 Just the push event(只有推送事件)或 Let me select individual events(让我选择单独的事件)并选择适当的事件(例如,push、pull request 等)。 +添加 Webhook:点击页面底部的“Add webhook”按钮以保存配置。 + +完成以上步骤后,您的 GitHub 仓库就配置好了一个 webhook,可以触发 Jenkins 构建。记得在 Jenkins 中设置相应的任务来响应这些 webhook。 + + +安装Jenkins插件: + +确保您的Jenkins实例已经安装了“GitHub”和“GitHub Integration”插件。您可以在Jenkins管理界面的“插件管理”部分进行安装。 +配置GitHub Webhook: + +在GitHub仓库的设置中,找到“Webhooks”部分并添加一个新的Webhook。 +将“Payload URL”设置为您的Jenkins服务器的URL,通常是这样的格式:http:///github-webhook/。 +选择触发Webhook的事件,通常是“Just the push event”或者“Send me everything”。 +确保“Content type”设置为“application/json”。 +点击“Add webhook”保存设置。 +配置Jenkins Job: + +在Jenkins中创建一个新的构建任务或者配置现有的任务。 +在“源码管理”部分,选择“Git”并填写您的GitHub仓库的URL。 +在“构建触发器”部分,选择“GitHub hook trigger for GITScm polling”选项。这样,每当GitHub仓库有新的推送事件时,Jenkins就会自动触发构建。 +测试配置: + +推送一些改动到您的GitHub仓库,检查是否触发了Jenkins构建。 +在Jenkins的构建历史中查看构建是否成功执行。 +通过完成以上步骤,您的GitHub代码仓库就与Jenkins关联起来了,可以实现自动触发构建的功能。 + +要在 Jenkins 中设置 GitHub 服务,您需要进行以下步骤: + +安装 GitHub 插件:首先确保您的 Jenkins 实例已安装 GitHub 插件。如果尚未安装,请转到 Jenkins 的“插件管理”页面,在“可选插件”选项卡中搜索并安装 GitHub 插件。 + +配置 GitHub 服务器:在 Jenkins 管理界面中,转到“系统管理” > “系统设置”。 + +在系统设置页面中,找到并点击“GitHub”部分。 +点击“Add GitHub Server”添加一个新的 GitHub 服务器配置。 +在配置页面中,输入一个描述性的名称,例如“GitHub”。 +在 GitHub API URL 中输入 GitHub 的 API 地址。通常为 https://api.github.com。 +如果您的 GitHub 仓库需要身份验证,请在“凭据”部分选择一个已配置的凭据。如果尚未配置凭据,请点击“Add”添加一个新的凭据,选择类型为“Secret text”或“Username with password”,然后输入您的 GitHub 用户名和密码或访问令牌。 +完成配置后,点击“保存”保存 GitHub 服务器配置。 +验证配置:您可以在配置页面的底部点击“Test connection”来验证您的 GitHub 服务器配置是否正常工作。 + +保存设置:确保在完成配置后点击“保存”保存更改。 + +现在,您已成功配置了 Jenkins 的 GitHub 服务。您可以在 Jenkins 任务中使用这个配置来与 GitHub 仓库进行集成,例如触发构建、拉取代码等操作。 + + +对于 Jenkins 中的 GitHub API URL (https://api.github.com) 的凭据设置,您可以使用 GitHub Personal Access Token。这个 Token 可以通过以下步骤生成: + +在 GitHub 上登录您的账号。 +点击页面右上角的头像,选择“Settings”。 +在左侧边栏中,点击“Developer settings”。 +在左侧边栏中,点击“Personal access tokens”。 +点击“Generate new token”。 +输入一个描述性的名称,选择需要的权限(至少需要 repo 权限来访问仓库),然后点击“Generate token”。 +复制生成的 Token,并保存到一个安全的地方。请注意,这个 Token 只会显示一次,如果您丢失了,请重新生成一个新的 Token。 +在 Jenkins 中使用这个 Token 作为 GitHub API URL (https://api.github.com) 的凭据时,您可以将 Token 添加为 Jenkins 的凭据: + +进入 Jenkins 管理界面,转到“凭据” > “系统”。 +在“系统”页面中,点击“Global credentials (unrestricted)”。 +在凭据页面中,点击“Add credentials”。 +在“Kind”下拉菜单中选择“Secret text”。 +在“Secret”框中粘贴您在 GitHub 上生成的 Personal Access Token。 +输入一个描述性的名称,并点击“OK”保存凭据。 +现在,您可以在 Jenkins 的配置中使用这个凭据来访问 GitHub API (https://api.github.com)。 + +确保 Docker 已安装:在 Jenkins 代理节点上确认 Docker 已正确安装并配置。您可以通过在终端中执行 docker --version 命令来检查 Docker 是否可用。 + +检查 Docker 环境:如果 Docker 已安装,请确保 Docker 服务正在运行。您可以使用 sudo systemctl status docker 命令检查 Docker 服务的状态。 + +确认 Jenkins 全局工具配置:在 Jenkins 管理界面中,转到“系统管理”->“全局工具配置”,确保 Docker 工具已正确配置。如果未配置,您可以添加一个 Docker 工具,并指定正确的安装路径。 + +重启 Jenkins 服务:在进行了上述更改后,尝试重启 Jenkins 服务,以确保新的配置生效。 + +尝试在终端中执行 Docker 命令:在 Jenkins 代理节点上打开终端,尝试手动执行一些 Docker 命令(如 docker pull),看看是否能够正常执行 + +要设置 Jenkins Docker 流水线,你可以按照以下步骤进行操作: + +前提条件 +确保你的 Jenkins 实例已经安装了以下插件: + +Docker Pipeline +Docker Commons + diff --git a/roles/charts/jenkins/meta/main.yml b/roles/charts/jenkins/meta/main.yml new file mode 100644 index 0000000..7e3f81b --- /dev/null +++ b/roles/charts/jenkins/meta/main.yml @@ -0,0 +1,3 @@ +dependencies: + - role: mysql + - role: secret-manger diff --git a/roles/charts/jenkins/tasks/main.yml b/roles/charts/jenkins/tasks/main.yml new file mode 100755 index 0000000..2bd1ab0 --- /dev/null +++ b/roles/charts/jenkins/tasks/main.yml @@ -0,0 +1,18 @@ +- name: get mysql db password + shell: 'kubectl get secret --namespace database mysql -o jsonpath="{.data.mysql-root-password}" | base64 -d' + register: mysql_db_password_raw + when: inventory_hostname in groups[group][0] + +- name: set fact join command + set_fact: + mysql_db_password : "{{ mysql_db_password_raw.stdout }}" + when: inventory_hostname in groups[group][0] + +- name: DB Pre Setup for Jenkins Server + script: files/pre-setup.sh {{ db_namespace }} + when: inventory_hostname in groups[group] + +- name: Setup Jenkins Cluster + script: files/setup.sh {{ domain }} {{ item.secret_name }} {{ namespace }} {{ mysql_db_password }} + when: inventory_hostname in groups[group] and ( tls is defined) + loop: "{{ tls }}" diff --git a/roles/charts/kafka-cluster/README.md b/roles/charts/kafka-cluster/README.md new file mode 100644 index 0000000..3b84afa --- /dev/null +++ b/roles/charts/kafka-cluster/README.md @@ -0,0 +1,3 @@ +# kafka-cluster (chart) + +Placeholder role for managing the Helm chart release of kafka-cluster. diff --git a/roles/charts/kafka-cluster/tasks/main.yml b/roles/charts/kafka-cluster/tasks/main.yml new file mode 100644 index 0000000..bed8247 --- /dev/null +++ b/roles/charts/kafka-cluster/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement Helm release tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement Helm release tasks." diff --git a/roles/charts/keycloak/files/pre-setup.sh b/roles/charts/keycloak/files/pre-setup.sh new file mode 100644 index 0000000..ae0beaa --- /dev/null +++ b/roles/charts/keycloak/files/pre-setup.sh @@ -0,0 +1,5 @@ +#!/bin/bash +export namespace=$1 +export POSTGRES_PASSWORD=$(kubectl get secret --namespace $namespace postgresql -o jsonpath="{.data.postgres-password}" | base64 -d) + +kubectl run postgresql-client --rm --tty -i --restart='Never' --namespace $namespace --image docker.io/bitnami/postgresql:15.2.0-debian-11-r11 --env="PGPASSWORD=$POSTGRES_PASSWORD" --command -- psql --host postgresql -U postgres -d postgres -p 5432 -w -c "CREATE DATABASE keycloak;" || echo true diff --git a/roles/charts/keycloak/files/setup-keycloak.sh b/roles/charts/keycloak/files/setup-keycloak.sh new file mode 100644 index 0000000..8ffaa7b --- /dev/null +++ b/roles/charts/keycloak/files/setup-keycloak.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +export domain=$1 +export secret=$2 +export namespace=$3 +export keycloak_ui_password=$4 +export keycloak_db_password=$5 + +cat > keycloak-values.yaml << EOF +proxy: edge +tls: + enabled: false + existingSecret: "$secret" +auth: + adminPassword: "$keycloak_ui_password" +ingress: + enabled: false + ingressClassName: "nginx" + hostname: keycloak.${domain} + tls: true + extraTls: + - hosts: + - keycloak.${domain} + secretName: $secret +postgresql: + enabled: true +#externalDatabase: +# host: "postgresql.database.svc.cluster.local" +# port: 5432 +# user: postgres +# database: keycloak +# password: "$keycloak_db_password" +EOF + +helm repo add bitnami https://charts.bitnami.com/bitnami || echo true +helm repo update +kubectl create ns ${namespace} || echo true +kubectl create secret tls onwalk-tls --cert=/etc/ssl/onwalk.net.pem --key=/etc/ssl/onwalk.net.key -n ${namespace} || echo true +helm upgrade --install keycloak bitnami/keycloak -n $namespace -f keycloak-values.yaml diff --git a/roles/charts/keycloak/meta/main.yml b/roles/charts/keycloak/meta/main.yml new file mode 100644 index 0000000..1faf774 --- /dev/null +++ b/roles/charts/keycloak/meta/main.yml @@ -0,0 +1,3 @@ +dependencies: + - role: postgresql + - role: secret-manger diff --git a/roles/charts/keycloak/readme.md b/roles/charts/keycloak/readme.md new file mode 100644 index 0000000..c2d80d2 --- /dev/null +++ b/roles/charts/keycloak/readme.md @@ -0,0 +1,8 @@ +https://github.com/bitnami/charts/issues/6940 + +Describe the bug +Mixed Content: The page at 'https://keycloak.dev.trademaster.com.br/auth/admin/master/console/' was loaded over HTTPS, but requested an insecure script 'http://keycloak.dev.trademaster.com.br/auth/js/keycloak.js?version=7a4is'. This request has been blocked; the content must be served over HTTPS + +extraEnvVars: +name: KEYCLOAK_PROXY +value: reencrypt diff --git a/roles/charts/keycloak/tasks/main.yml b/roles/charts/keycloak/tasks/main.yml new file mode 100755 index 0000000..dab240e --- /dev/null +++ b/roles/charts/keycloak/tasks/main.yml @@ -0,0 +1,29 @@ +- name: Init Keycloak DB + script: files/pre-setup.sh {{ db_namespace }} + +- name: get db password + shell: 'kubectl get secret --namespace database postgresql -o jsonpath="{.data.postgres-password}" | base64 -d' + register: command_raw + when: inventory_hostname in groups[group][0] + +- name: set fact join command + set_fact: + keycloak_db_password : "{{ command_raw.stdout }}" + when: inventory_hostname in groups[group][0] + +#- name: Show Debug Info +# debug: var=command_raw verbosity=0 + +- name: Setup Keycloak Server + script: files/setup-keycloak.sh {{ domain }} {{ item.secret_name }} {{ namespace }} {{ admin_password }} {{ hostvars[groups[group][0]].keycloak_db_password }} + loop: "{{ tls }}" + when: inventory_hostname in groups[group] + +#- name: Sync aws-gloabl-oidc-broker deploy yaml +# template: src=templates/{{ item }} dest=/tmp/{{ item }} owner=root group=root mode=0644 force=yes unsafe_writes=yes +# with_items: +# - aws-gloabl-oidc-broker.yaml + +#- name: Setup aws-gloabl-oidc-broker +# shell: "kubectl apply -f /tmp/aws-gloabl-oidc-broker.yaml" +# when: inventory_hostname in groups[group] diff --git a/roles/charts/keycloak/templates/aws-gloabl-oidc-broker.yaml b/roles/charts/keycloak/templates/aws-gloabl-oidc-broker.yaml new file mode 100644 index 0000000..0aa9fd2 --- /dev/null +++ b/roles/charts/keycloak/templates/aws-gloabl-oidc-broker.yaml @@ -0,0 +1,74 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: aws-global-oidc-broker + namespace: itsm + labels: + io.metrico.service: aws-global-oidc-broker +spec: + replicas: 1 + selector: + matchLabels: + io.metrico.service: aws-global-oidc-broker + strategy: {} + template: + metadata: + creationTimestamp: null + labels: + io.metrico.service: aws-global-oidc-broker + spec: + containers: + - name: aws-global-oidc-broker + image: artifact.onwalk.net/public/aws-global-oidc-broker:1.2.0 + ports: + - containerPort: 5000 + resources: {} + env: + - name: KEYCLOAK_CLIENT_ID + value: "aws-oidc" + - name: KEYCLOAK_WELLKNOWN + value: "https://keycloak.apollo-ev.com/realms/cloud-sso/.well-known/openid-configuration" + - name: KEYCLOAK_CLIENT_SECRET + value: "WYyZJGUOOiwooIp700PtykmjYkrsPJPi" + - name: TITLE + value: "aws-oidc" + restartPolicy: Always +--- +apiVersion: v1 +kind: Service +metadata: + creationTimestamp: null + name: aws-global-oidc-broker + namespace: itsm +spec: + ports: + - name: "5000" + port: 5000 + nodePort: 5000 + targetPort: 5000 + selector: + io.metrico.service: aws-global-oidc-broker + type: NodePort +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: aws-global-oidc-broker + namespace: itsm +spec: + ingressClassName: nginx + rules: + - host: loki.apollo-ev.com + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: aws-global-oidc-broker + port: + number: 5000 + tls: + - hosts: + - loki.apollo-ev.com + secretName: keycloak-tls diff --git a/roles/charts/keycloak/vars/main.yml b/roles/charts/keycloak/vars/main.yml new file mode 100644 index 0000000..2be57f6 --- /dev/null +++ b/roles/charts/keycloak/vars/main.yml @@ -0,0 +1,16 @@ +group: master +domain: onwalk.net +namespace: keycloak +update_secret: true +db_namespace: database +admin_password: !vault | + $ANSIBLE_VAULT;1.1;AES256 + 37363730333461643562316539303664363262646238366336353434643461323263323437646362 + 3537373363343563316334333861623663383832363034350a326362646636643665383632383531 + 63646132343636613739383534323838613639656262363039323030353761636164633165303066 + 6230663366373831610a363032643039373330663239373733323931396531333733366364313265 + 3130 +tls: + - secret_name: keycloak-tls + keyfile: /etc/ssl/onwalk.net.key + certfile: /etc/ssl/onwalk.net.pem diff --git a/roles/charts/kubernetes-dashboard/files/setup.sh b/roles/charts/kubernetes-dashboard/files/setup.sh new file mode 100755 index 0000000..e29a439 --- /dev/null +++ b/roles/charts/kubernetes-dashboard/files/setup.sh @@ -0,0 +1,11 @@ +#!/bin/bash +helm upgrade --install kubernetes-dashboard kubernetes-dashboard/kubernetes-dashboard \ + --create-namespace \ + --namespace kubernetes-dashboard \ + --set app.scheduling.nodeSelector."kubernetes\.io/hostname"=$1 \ + --set auth.nodeSelector."kubernetes\.io/hostname"=$1 \ + --set api.nodeSelector."kubernetes\.io/hostname"=$1 \ + --set web.nodeSelector."kubernetes\.io/hostname"=$1 \ + --set metricsScraper.nodeSelector."kubernetes\.io/hostname"=$1 \ + --set kong.nodeSelector."kubernetes\.io/hostname"=$1 \ + --set persistence.enabled=false diff --git a/roles/charts/kubernetes-dashboard/tasks/main.yml b/roles/charts/kubernetes-dashboard/tasks/main.yml new file mode 100644 index 0000000..0dcaee4 --- /dev/null +++ b/roles/charts/kubernetes-dashboard/tasks/main.yml @@ -0,0 +1,3 @@ +- name: Install kubernetes dashboard + script: files/setup.sh {{ inventory_hostname }} + when: is_primary | bool diff --git a/roles/charts/loki/README.md b/roles/charts/loki/README.md new file mode 100644 index 0000000..0e10846 --- /dev/null +++ b/roles/charts/loki/README.md @@ -0,0 +1,3 @@ +# loki (chart) + +Placeholder role for managing the Helm chart release of loki. diff --git a/roles/charts/loki/tasks/main.yml b/roles/charts/loki/tasks/main.yml new file mode 100644 index 0000000..bed8247 --- /dev/null +++ b/roles/charts/loki/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement Helm release tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement Helm release tasks." diff --git a/roles/charts/metrics-server/files/setup.sh b/roles/charts/metrics-server/files/setup.sh new file mode 100755 index 0000000..36f59e4 --- /dev/null +++ b/roles/charts/metrics-server/files/setup.sh @@ -0,0 +1,5 @@ +#!/bin/bash +helm upgrade --install metrics-server metrics-server/metrics-server \ + --namespace kube-system \ + --set nodeSelector."kubernetes\.io/hostname"=$1 \ + --set persistence.enabled=false diff --git a/roles/charts/metrics-server/tasks/main.yml b/roles/charts/metrics-server/tasks/main.yml new file mode 100644 index 0000000..5c293d4 --- /dev/null +++ b/roles/charts/metrics-server/tasks/main.yml @@ -0,0 +1,3 @@ +- name: Install metrics server + script: files/setup.sh {{ inventory_hostname }} + when: is_primary | bool diff --git a/roles/charts/minio/README.md b/roles/charts/minio/README.md new file mode 100644 index 0000000..d872bea --- /dev/null +++ b/roles/charts/minio/README.md @@ -0,0 +1,3 @@ +# minio (chart) + +Placeholder role for managing the Helm chart release of minio. diff --git a/roles/charts/minio/tasks/main.yml b/roles/charts/minio/tasks/main.yml new file mode 100644 index 0000000..bed8247 --- /dev/null +++ b/roles/charts/minio/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement Helm release tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement Helm release tasks." diff --git a/roles/charts/mlflow/README.md b/roles/charts/mlflow/README.md new file mode 100644 index 0000000..684e5d2 --- /dev/null +++ b/roles/charts/mlflow/README.md @@ -0,0 +1,3 @@ +# mlflow (chart) + +Placeholder role for managing the Helm chart release of mlflow. diff --git a/roles/charts/mlflow/tasks/main.yml b/roles/charts/mlflow/tasks/main.yml new file mode 100644 index 0000000..bed8247 --- /dev/null +++ b/roles/charts/mlflow/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement Helm release tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement Helm release tasks." diff --git a/roles/charts/mysql/files/setup-mysql.sh b/roles/charts/mysql/files/setup-mysql.sh new file mode 100644 index 0000000..563a99e --- /dev/null +++ b/roles/charts/mysql/files/setup-mysql.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +helm repo add bitnami https://charts.bitnami.com/bitnami || echo true +helm repo up +kubectl create ns database || echo true +helm upgrade --install mysql bitnami/mysql -n database diff --git a/roles/charts/mysql/tasks/main.yml b/roles/charts/mysql/tasks/main.yml new file mode 100755 index 0000000..8b630bd --- /dev/null +++ b/roles/charts/mysql/tasks/main.yml @@ -0,0 +1,3 @@ +- name: Setup MySQL Server + script: files/setup-mysql.sh + when: inventory_hostname in groups[group] diff --git a/roles/charts/node-exporter/tasks/main.yml b/roles/charts/node-exporter/tasks/main.yml new file mode 100755 index 0000000..13898d7 --- /dev/null +++ b/roles/charts/node-exporter/tasks/main.yml @@ -0,0 +1,23 @@ +- name: create user prometheus + shell: "useradd prometheus -s /sbin/nologin | echo true" + when: inventory_hostname in groups[group] + +- name: clean old file + shell: "rm -f /usr/bin/node_exporter" + when: inventory_hostname in groups[group] + +- name: download node_exporter binary + shell: "curl -Lo /usr/bin/node_exporter https://mirrors.onwalk.net/tools/linux-amd64/node_exporter && chmod 755 /usr/bin/node_exporter" + when: inventory_hostname in groups[group] + +- name: create node-exporter.service + template: src=templates/node-exporter.service dest=/etc/systemd/system/node-exporter.service owner=root group=root mode=0644 + when: inventory_hostname in groups[group] + +- name: reload node-exporter service + shell: 'systemctl daemon-reload' + when: inventory_hostname in groups[group] + +- name: init node-exporter service + shell: 'systemctl restart node-exporter.service' + when: inventory_hostname in groups[group] diff --git a/roles/charts/node-exporter/templates/node-exporter.service b/roles/charts/node-exporter/templates/node-exporter.service new file mode 100755 index 0000000..f05a15b --- /dev/null +++ b/roles/charts/node-exporter/templates/node-exporter.service @@ -0,0 +1,14 @@ +[Unit] +Description=Prometheus Node Exporter +After=network.target + +[Service] +Type=simple +User=prometheus +ExecStart=/usr/bin/node_exporter +Restart=on-failure +RestartSec=30 +StartLimitInterval=0 + +[Install] +WantedBy=multi-user.target diff --git a/roles/charts/nvidia-operator/README.md b/roles/charts/nvidia-operator/README.md new file mode 100644 index 0000000..074992e --- /dev/null +++ b/roles/charts/nvidia-operator/README.md @@ -0,0 +1,3 @@ +# nvidia-operator (chart) + +Placeholder role for managing the Helm chart release of nvidia-operator. diff --git a/roles/charts/nvidia-operator/tasks/main.yml b/roles/charts/nvidia-operator/tasks/main.yml new file mode 100644 index 0000000..bed8247 --- /dev/null +++ b/roles/charts/nvidia-operator/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement Helm release tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement Helm release tasks." diff --git a/roles/charts/observability-agent/files/setup.sh b/roles/charts/observability-agent/files/setup.sh new file mode 100644 index 0000000..1417a83 --- /dev/null +++ b/roles/charts/observability-agent/files/setup.sh @@ -0,0 +1,46 @@ +#!/bin/bash +set -x +export domain=$1 +export deepflowserverip=$2 +export deepflowk8sclusterid=$3 + +cat > values.yaml << EOF +deepflow-agent: + enabled: true + deepflowServerNodeIPS: + - $deepflowserverip + deepflowK8sClusterID: $deepflowk8sclusterid +prometheus: + enabled: true + server: + name: agent + retention: "30m" + extraFlags: + - web.enable-lifecycle + - enable-feature=expand-external-labels + remoteWrite: + - name: remote_prometheus + url: 'https://prometheus.${domain}/api/v1/write' + persistentVolume: + enabled: false + alertmanager: + enabled: false + rometheus-pushgateway: + enabled: false + kube-state-metrics: + enabled: false + prometheus-node-exporter: + enabled: false +promtail: + enabled: true + config: + clients: + - url: https://data-gateway.${domain}/loki/api/v1/push +EOF + +node_name=`kubectl get nodes | awk 'NR>1 {print $1}'` +kubectl create namespace monitoring || echo true +kubectl label nodes $node prometheus=true --overwrite || echo true +helm repo add stable https://charts.onwalk.net/ || echo true +helm repo update +helm upgrade --install observabilityagent stable/observabilityagent -n monitoring -f values.yaml diff --git a/roles/charts/observability-agent/meta/main.yml b/roles/charts/observability-agent/meta/main.yml new file mode 100644 index 0000000..9711b33 --- /dev/null +++ b/roles/charts/observability-agent/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: common diff --git a/roles/charts/observability-agent/tasks/main.yml b/roles/charts/observability-agent/tasks/main.yml new file mode 100755 index 0000000..ed4f91f --- /dev/null +++ b/roles/charts/observability-agent/tasks/main.yml @@ -0,0 +1,3 @@ +- name: Setup Observability Agent + script: files/setup.sh {{ domain }} {{ deepflowserverip }} {{ deepflowk8sclusterid }} + when: inventory_hostname in groups[group] diff --git a/roles/charts/observability-server/files/mysql-db-init-setup.sh b/roles/charts/observability-server/files/mysql-db-init-setup.sh new file mode 100644 index 0000000..a12840b --- /dev/null +++ b/roles/charts/observability-server/files/mysql-db-init-setup.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export namespace=$1 + +export MYSQL_ROOT_PASSWORD=$(kubectl get secret --namespace $namespace mysql -o jsonpath="{.data.mysql-root-password}" | base64 -d) + +kubectl run mysql-client --rm --tty -i --restart='Never' --image docker.io/bitnami/mysql:8.0.32-debian-11-r14 --namespace $namespace --env MYSQL_ROOT_PASSWORD=$MYSQL_ROOT_PASSWORD --command -- bash -c "mysql -h mysql.database.svc.cluster.local -uroot -p$MYSQL_ROOT_PASSWORD -e 'create database IF NOT EXISTS grafana;'" diff --git a/roles/charts/observability-server/files/setup-observable-server.sh b/roles/charts/observability-server/files/setup-observable-server.sh new file mode 100644 index 0000000..9daa528 --- /dev/null +++ b/roles/charts/observability-server/files/setup-observable-server.sh @@ -0,0 +1,132 @@ +#!/bin/bash +set -x +export domain=$1 +export secret=$2 +export namespace=$3 +export mysql_db_password=$4 +export ck_node_ip1=$5 +export ck_node_ip2=$6 +export ck_node_ip3=$7 + +node_name=`kubectl get nodes | awk '{print $1}' | tail -n 1` +kubectl label nodes $node_name app=prometheus --overwrite + +cat > values.yaml << EOF +influxdb: + enabled: true + ingress: + tls: true + enabled: true + secretName: ${secret} + hostname: influxdb.${domain} + className: nginx +deepflow: + enabled: true + clickhouse: + enabled: true + mysql: + enabled: false + grafana: + enabled: true + ingress: + enabled: true + ingressClassName: nginx + hosts: + - grafana.${domain} + tls: + - secretName: ${secret} + hosts: + - grafana.${domain} + global: + #externalClickHouse: + # enabled: true + # type: ep + # clusterName: default + # storagePolicy: default + # username: default + # password: '' + # hosts: + # - ip: $ck_node_ip1 + # port: 9000 + # - ip: $ck_node_ip2 + # port: 9000 + # - ip: $ck_node_ip3 + # port: 9000 + externalMySQL: + enabled: true + ip: mysql.database.svc.cluster.local + port: 3306 + username: root + password: $mysql_db_password +prometheus: + enabled: true + alertmanager: + enabled: false + prometheus-pushgateway: + enabled: false + kube-state-metrics: + enabled: true + server: + extraArgs: + enable-feature: remote-write-receiver + ingress: + enabled: true + ingressClassName: nginx + hosts: + - prometheus.${domain} + tls: + - secretName: ${secret} + hosts: + - prometheus.${domain} + alertmanagers: + - static_configs: + - targets: + - alertmanager.${domain} + serverFiles: + prometheus.yml: + rule_files: + - /etc/config/recording_rules.yml + - /etc/config/alerting_rules.yml +alertmanager: + configmapReload: + enabled: true + ingress: + enabled: true + className: "nginx" + hosts: + - host: alertmanager.$domain + paths: + - path: / + pathType: ImplementationSpecific + tls: + - secretName: ${secret} + hosts: + - alertmanager.$domain + config: + global: + resolve_timeout: 5m + smtp_smarthost: 'smtp.qq.com:465' + smtp_from: '11111111@qq.com' + smtp_auth_username: '11111111@qq.com' + smtp_auth_password: '123456' + smtp_require_tls: false + templates: + - '/etc/alertmanager/*.tmpl' + receivers: + - name: 'default-receiver' + email_configs: + - to: '{{ template "email.to" . }}' + html: '{{ template "email.to.html" . }}' + route: + group_wait: 10s + group_interval: 5m + receiver: default-receiver + repeat_interval: 1h +EOF + +helm repo add stable https://charts.onwalk.net/ || echo true +helm repo update +kubectl delete deploy observability-server -n ${namespace} || echo true +helm upgrade --install observability-server stable/observableserver -n ${namespace} -f values.yaml +sudo curl -o /usr/bin/deepflow-ctl https://deepflow-ce.oss-cn-beijing.aliyuncs.com/bin/ctl/stable/linux/$(arch | sed 's|x86_64|amd64|' | sed 's|aarch64|arm64|')/deepflow-ctl +sudo chmod a+x /usr/bin/deepflow-ctl diff --git a/roles/charts/observability-server/meta/main.yml b/roles/charts/observability-server/meta/main.yml new file mode 100644 index 0000000..7e3f81b --- /dev/null +++ b/roles/charts/observability-server/meta/main.yml @@ -0,0 +1,3 @@ +dependencies: + - role: mysql + - role: secret-manger diff --git a/roles/charts/observability-server/tasks/main.yml b/roles/charts/observability-server/tasks/main.yml new file mode 100755 index 0000000..af8743b --- /dev/null +++ b/roles/charts/observability-server/tasks/main.yml @@ -0,0 +1,39 @@ +- name: Post Setup MySQL Server + script: files/mysql-db-init-setup.sh {{ db_namespace }} + when: inventory_hostname in groups[group] + +- name: get mysql db password + shell: 'kubectl get secret --namespace database mysql -o jsonpath="{.data.mysql-root-password}" | base64 -d' + register: mysql_db_password_raw + when: inventory_hostname in groups[group][0] + +- name: set fact join command + set_fact: + mysql_db_password : "{{ mysql_db_password_raw.stdout }}" + when: inventory_hostname in groups[group][0] + +#- name: get clickhouse node ips +# shell: "kubectl get pods -n monitoring -o wide | grep clickhouse | awk '{print $6}'" +# register: ck_node_ips_raw +# when: inventory_hostname in groups[group][0] +# +#- name: set fact join command for ck_node_ip1 +# set_fact: +# ck_node_ip1 : "{{ ck_node_ips_raw.stdout_lines[0] }}" +# when: inventory_hostname in groups[group][0] +# +#- name: set fact join command for ck_node_ip2 +# set_fact: +# ck_node_ip2 : "{{ ck_node_ips_raw.stdout_lines[1] }}" +# when: inventory_hostname in groups[group][0] +# +#- name: set fact join command for ck_node_ip3 +# set_fact: +# ck_node_ip3 : "{{ ck_node_ips_raw.stdout_lines[2] }}" +# when: inventory_hostname in groups[group][0] + +- name: Setup Observability Server + script: files/setup-observable-server.sh {{ domain }} {{ item.secret_name }} {{ namespace }} {{ mysql_db_password }} + #script: files/setup-observable-server.sh {{ domain }} {{ item.secret_name }} {{ namespace }} {{ mysql_db_password }} {{ ck_node_ip1 }} {{ ck_node_ip2 }} {{ ck_node_ip3 }} + when: inventory_hostname in groups[group] and ( tls is defined) + loop: "{{ tls }}" diff --git a/roles/charts/openldap/files/setup-openldap.sh b/roles/charts/openldap/files/setup-openldap.sh new file mode 100644 index 0000000..803df34 --- /dev/null +++ b/roles/charts/openldap/files/setup-openldap.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +export domain=$1 +export secret=$2 +export namespace=$3 +export password=$4 + +cat > openldap-vaules.yaml << EOF +global: + ldapDomain: $domain + adminPassword: $password + configPassword: $password +service: + type: NodePort + ldapPortNodePort: 389 + sslLdapPortNodePort: 636 +phpldapadmin: + enabled: true + ingress: + enabled: false + ingressClassName: nginx + hosts: + - openldap-admin.${domain} + tls: + - secretName: ${secret} + hosts: + - openldap-admin.${domain} +ltb-passwd: + enabled: true + ingress: + enabled: false + ingressClassName: nginx + hosts: + - openldap-ltb.${domain} + tls: + - secretName: ${secret} + hosts: + - openldap-ltb.${domain} +EOF + +helm repo add openldap https://jp-gouin.github.io/helm-openldap/ +helm repo up +kubectl create ns ${namespace} || echo true +helm upgrade --install openldap openldap/openldap-stack-ha -n ${namespace} --create-namespace -f openldap-vaules.yaml diff --git a/roles/charts/openldap/meta/main.yml b/roles/charts/openldap/meta/main.yml new file mode 100644 index 0000000..6fc3ce8 --- /dev/null +++ b/roles/charts/openldap/meta/main.yml @@ -0,0 +1,3 @@ +dependencies: + - role: cert-manager + - role: secret-manger diff --git a/roles/charts/openldap/tasks/main.yml b/roles/charts/openldap/tasks/main.yml new file mode 100755 index 0000000..f1266ef --- /dev/null +++ b/roles/charts/openldap/tasks/main.yml @@ -0,0 +1,13 @@ +- name: Setup OpenLdap Server + script: files/setup-openldap.sh {{ domain }} {{ item.secret_name }} {{ namespace }} {{ admin_password }} + loop: "{{ tls }}" + when: inventory_hostname in groups[group] + +- name: sync ldap ingress config + template: src=templates/{{ item }} dest=/tmp/{{ item }} owner=root group=root mode=0644 force=yes unsafe_writes=yes + with_items: + - ingress.yaml + +- name: Setup ldap ingress + shell: "cd /tmp/ && kubectl apply -f ingress.yaml" + when: inventory_hostname in groups[group] diff --git a/roles/charts/openldap/templates/.gitignore b/roles/charts/openldap/templates/.gitignore new file mode 100644 index 0000000..a194b20 --- /dev/null +++ b/roles/charts/openldap/templates/.gitignore @@ -0,0 +1,2 @@ +/clickhouse-keeper-k8s.iml +/.idea/ diff --git a/roles/charts/openldap/templates/ingress.yaml b/roles/charts/openldap/templates/ingress.yaml new file mode 100644 index 0000000..7afd5f1 --- /dev/null +++ b/roles/charts/openldap/templates/ingress.yaml @@ -0,0 +1,45 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: openldap + namespace: itsm +spec: + ingressClassName: nginx + rules: + - host: ldap.onwalk.net + http: + paths: + - backend: + service: + name: openldap-headless + port: + name: http + path: / + pathType: ImplementationSpecific +--- +apiVersion: k8s.nginx.org/v1alpha1 +kind: GlobalConfiguration +metadata: + name: nginx-configuration + namespace: ingress +spec: + listeners: + - name: ldap-tcp + port: 389 + protocol: TCP +--- +apiVersion: k8s.nginx.org/v1alpha1 +kind: TransportServer +metadata: + name: ldap-tcp + namespace: itsm +spec: + listener: + name: ldap-tcp + protocol: TCP + upstreams: + - name: ldap-app + service: openldap-headless + port: 389 + action: + pass: ldap-app diff --git a/roles/charts/openobserve/README.md b/roles/charts/openobserve/README.md new file mode 100644 index 0000000..de83e41 --- /dev/null +++ b/roles/charts/openobserve/README.md @@ -0,0 +1,3 @@ +# openobserve (chart) + +Placeholder role for managing the Helm chart release of openobserve. diff --git a/roles/charts/openobserve/tasks/main.yml b/roles/charts/openobserve/tasks/main.yml new file mode 100644 index 0000000..bed8247 --- /dev/null +++ b/roles/charts/openobserve/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement Helm release tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement Helm release tasks." diff --git a/roles/charts/postgres/README.md b/roles/charts/postgres/README.md new file mode 100644 index 0000000..5f58d87 --- /dev/null +++ b/roles/charts/postgres/README.md @@ -0,0 +1,3 @@ +# postgres (chart) + +Placeholder role for managing the Helm chart release of postgres. diff --git a/roles/charts/postgres/tasks/main.yml b/roles/charts/postgres/tasks/main.yml new file mode 100644 index 0000000..bed8247 --- /dev/null +++ b/roles/charts/postgres/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement Helm release tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement Helm release tasks." diff --git a/roles/charts/postgresql/files/post-setup.sh b/roles/charts/postgresql/files/post-setup.sh new file mode 100644 index 0000000..9e984b8 --- /dev/null +++ b/roles/charts/postgresql/files/post-setup.sh @@ -0,0 +1,16 @@ +#!/bin/bash +export namespace=$1 +export POSTGRES_PASSWORD=$(kubectl get secret --namespace $namespace postgresql -o jsonpath="{.data.postgres-password}" | base64 -d) + +kubectl run postgresql-client --rm --tty -i --restart='Never' --namespace $namespace --image docker.io/bitnami/postgresql:15.2.0-debian-11-r11 --env="PGPASSWORD=$POSTGRES_PASSWORD" --command -- psql --host postgresql -U postgres -d postgres -p 5432 -w -c "CREATE EXTENSION IF NOT EXISTS pg_trgm; CREATE EXTENSION IF NOT EXISTS btree_gist; CREATE DATABASE gitlabhq_production OWNER gitlab;" || echo true + +#create user gitlab with encrypted password 'xxxxxx' +#grant all privileges on database gitlabhq_production to gitlab; + +kubectl run postgresql-client --rm --tty -i --restart='Never' --namespace $namespace --image docker.io/bitnami/postgresql:15.2.0-debian-11-r11 --env="PGPASSWORD=$POSTGRES_PASSWORD" --command -- psql --host postgresql -U postgres -d postgres -p 5432 -w -c "CREATE DATABASE keycloak;" || echo true + +kubectl run postgresql-client --rm --tty -i --restart='Never' --namespace $namespace --image docker.io/bitnami/postgresql:15.2.0-debian-11-r11 --env="PGPASSWORD=$POSTGRES_PASSWORD" --command -- psql --host postgresql -U postgres -d postgres -p 5432 -w -c "CREATE DATABASE registry;" || echo true + +kubectl run postgresql-client --rm --tty -i --restart='Never' --namespace $namespace --image docker.io/bitnami/postgresql:15.2.0-debian-11-r11 --env="PGPASSWORD=$POSTGRES_PASSWORD" --command -- psql --host postgresql -U postgres -d postgres -p 5432 -w -c "CREATE DATABASE notary_server;" || echo true + +kubectl run postgresql-client --rm --tty -i --restart='Never' --namespace $namespace --image docker.io/bitnami/postgresql:15.2.0-debian-11-r11 --env="PGPASSWORD=$POSTGRES_PASSWORD" --command -- psql --host postgresql -U postgres -d postgres -p 5432 -w -c "CREATE DATABASE notary_signer;" || echo true diff --git a/roles/charts/postgresql/files/setup-postgresql.sh b/roles/charts/postgresql/files/setup-postgresql.sh new file mode 100644 index 0000000..98c7bf7 --- /dev/null +++ b/roles/charts/postgresql/files/setup-postgresql.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +export namespace=$1 +export registry=$2 + +helm repo add bitnami https://charts.bitnami.com/bitnami || echo true +helm repo up +cat > values.yaml << EOF +global: + imageRegistry: "$registry" +EOF +kubectl create ns $namespace || echo true +helm upgrade --install postgresql bitnami/postgresql --version 12.8.2 -n $namespace -f values.yaml diff --git a/roles/charts/postgresql/tasks/main.yml b/roles/charts/postgresql/tasks/main.yml new file mode 100755 index 0000000..4d57687 --- /dev/null +++ b/roles/charts/postgresql/tasks/main.yml @@ -0,0 +1,3 @@ +- name: Install PostgreSQL Server + script: files/setup-postgresql.sh {{ db_namespace }} {{ registry }} + when: inventory_hostname in groups[group] diff --git a/roles/charts/prometheus-stack/README.md b/roles/charts/prometheus-stack/README.md new file mode 100644 index 0000000..24b4e27 --- /dev/null +++ b/roles/charts/prometheus-stack/README.md @@ -0,0 +1,3 @@ +# prometheus-stack (chart) + +Placeholder role for managing the Helm chart release of prometheus-stack. diff --git a/roles/charts/prometheus-stack/tasks/main.yml b/roles/charts/prometheus-stack/tasks/main.yml new file mode 100644 index 0000000..bed8247 --- /dev/null +++ b/roles/charts/prometheus-stack/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement Helm release tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement Helm release tasks." diff --git a/roles/charts/prometheus/files/setup.sh b/roles/charts/prometheus/files/setup.sh new file mode 100755 index 0000000..e07702b --- /dev/null +++ b/roles/charts/prometheus/files/setup.sh @@ -0,0 +1,13 @@ +#!/bin/bash +helm upgrade --install prometheus prometheus-community/prometheus \ + --namespace chutes \ + --create-namespace \ + --set server.persistentVolume.enabled=false \ + --set alertmanager.persistentVolume.enabled=false \ + --set prometheus-pushgateway.persistentVolume.enabled=false \ + --set prometheus-server.persistentVolume.enabled=false \ + --set alertmanager.persistence.enabled=false \ + --set server.nodeSelector."kubernetes\.io/hostname"=$1 \ + --set alertmanager.nodeSelector."kubernetes\.io/hostname"=$1 \ + --set pushgateway.nodeSelector."kubernetes\.io/hostname"=$1 \ + --set kubeStateMetrics.nodeSelector."kubernetes\.io/hostname"=$1 diff --git a/roles/charts/prometheus/tasks/main.yml b/roles/charts/prometheus/tasks/main.yml new file mode 100644 index 0000000..4f97501 --- /dev/null +++ b/roles/charts/prometheus/tasks/main.yml @@ -0,0 +1,3 @@ +- name: Install Prometheus + script: files/setup.sh {{ inventory_hostname }} + when: is_primary | bool diff --git a/roles/charts/ray-cluster/README.md b/roles/charts/ray-cluster/README.md new file mode 100644 index 0000000..cd3e16f --- /dev/null +++ b/roles/charts/ray-cluster/README.md @@ -0,0 +1,3 @@ +# ray-cluster (chart) + +Placeholder role for managing the Helm chart release of ray-cluster. diff --git a/roles/charts/ray-cluster/tasks/main.yml b/roles/charts/ray-cluster/tasks/main.yml new file mode 100644 index 0000000..bed8247 --- /dev/null +++ b/roles/charts/ray-cluster/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement Helm release tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement Helm release tasks." diff --git a/roles/charts/redis/files/setup-redis.sh b/roles/charts/redis/files/setup-redis.sh new file mode 100644 index 0000000..f08c99a --- /dev/null +++ b/roles/charts/redis/files/setup-redis.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +export namespace=$1 +export registry=$2 + +cat > values.yaml << EOF +global: + imageRegistry: "$registry" +EOF + +helm repo add bitnami https://charts.bitnami.com/bitnami +helm repo up bitnami +kubectl create ns $namespace || true +helm upgrade --install redis bitnami/redis --set architecture=standalone -n $namespace -f values.yaml diff --git a/roles/charts/redis/tasks/main.yml b/roles/charts/redis/tasks/main.yml new file mode 100755 index 0000000..7951595 --- /dev/null +++ b/roles/charts/redis/tasks/main.yml @@ -0,0 +1,3 @@ +- name: Setup Redis Server + script: files/setup-redis.sh {{ cache_namespace }} {{ registry }} + when: inventory_hostname in groups[group] diff --git a/roles/charts/redpanda/README.md b/roles/charts/redpanda/README.md new file mode 100644 index 0000000..7a83b19 --- /dev/null +++ b/roles/charts/redpanda/README.md @@ -0,0 +1,3 @@ +# redpanda (chart) + +Placeholder role for managing the Helm chart release of redpanda. diff --git a/roles/charts/redpanda/tasks/main.yml b/roles/charts/redpanda/tasks/main.yml new file mode 100644 index 0000000..bed8247 --- /dev/null +++ b/roles/charts/redpanda/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement Helm release tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement Helm release tasks." diff --git a/roles/charts/sglang/README.md b/roles/charts/sglang/README.md new file mode 100644 index 0000000..a71d2b3 --- /dev/null +++ b/roles/charts/sglang/README.md @@ -0,0 +1,3 @@ +# sglang (chart) + +Placeholder role for managing the Helm chart release of sglang. diff --git a/roles/charts/sglang/tasks/main.yml b/roles/charts/sglang/tasks/main.yml new file mode 100644 index 0000000..bed8247 --- /dev/null +++ b/roles/charts/sglang/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement Helm release tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement Helm release tasks." diff --git a/roles/charts/spark-operator/README.md b/roles/charts/spark-operator/README.md new file mode 100644 index 0000000..18811ee --- /dev/null +++ b/roles/charts/spark-operator/README.md @@ -0,0 +1,3 @@ +# spark-operator (chart) + +Placeholder role for managing the Helm chart release of spark-operator. diff --git a/roles/charts/spark-operator/tasks/main.yml b/roles/charts/spark-operator/tasks/main.yml new file mode 100644 index 0000000..bed8247 --- /dev/null +++ b/roles/charts/spark-operator/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement Helm release tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement Helm release tasks." diff --git a/roles/charts/splunk-otel-collector/files/setup.sh b/roles/charts/splunk-otel-collector/files/setup.sh new file mode 100644 index 0000000..5a3c0c3 --- /dev/null +++ b/roles/charts/splunk-otel-collector/files/setup.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +Splunk_HEC_URL=$1 +Splunk_HEC_TOKEN=$2 + +helm repo add splunk-otel-collector-chart https://signalfx.github.io/splunk-otel-collector-chart +helm repo update + +cat > vaules.yaml << EOF +clusterName: Demo +splunkPlatform: + endpoint: $Splunk_HEC_URL + token: $Splunk_HEC_TOKEN + index: harbor + insecureSkipVerify: true +EOF + +helm upgrade --install splunk-otel-collector splunk-otel-collector-chart/splunk-otel-collector -f vaules.yaml + +curl -k "${Splunk_HEC_URL}" -H "Authorization: Splunk ${Splunk_HEC_TOKEN}" -d '{"event": "Hello, world!", "sourcetype": "manual"}' diff --git a/roles/charts/splunk-otel-collector/tasks/main.yml b/roles/charts/splunk-otel-collector/tasks/main.yml new file mode 100755 index 0000000..09cc640 --- /dev/null +++ b/roles/charts/splunk-otel-collector/tasks/main.yml @@ -0,0 +1,2 @@ +- name: Setup splunk otel collector + script: files/setup.sh {{ splunk_hec_url }} {{ splunk_hec_token }} diff --git a/roles/charts/tempo/README.md b/roles/charts/tempo/README.md new file mode 100644 index 0000000..6f6e9b9 --- /dev/null +++ b/roles/charts/tempo/README.md @@ -0,0 +1,3 @@ +# tempo (chart) + +Placeholder role for managing the Helm chart release of tempo. diff --git a/roles/charts/tempo/tasks/main.yml b/roles/charts/tempo/tasks/main.yml new file mode 100644 index 0000000..bed8247 --- /dev/null +++ b/roles/charts/tempo/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement Helm release tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement Helm release tasks." diff --git a/roles/charts/trino/README.md b/roles/charts/trino/README.md new file mode 100644 index 0000000..0e89d79 --- /dev/null +++ b/roles/charts/trino/README.md @@ -0,0 +1,3 @@ +# trino (chart) + +Placeholder role for managing the Helm chart release of trino. diff --git a/roles/charts/trino/tasks/main.yml b/roles/charts/trino/tasks/main.yml new file mode 100644 index 0000000..bed8247 --- /dev/null +++ b/roles/charts/trino/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement Helm release tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement Helm release tasks." diff --git a/roles/charts/vllm/README.md b/roles/charts/vllm/README.md new file mode 100644 index 0000000..4bd6022 --- /dev/null +++ b/roles/charts/vllm/README.md @@ -0,0 +1,3 @@ +# vllm (chart) + +Placeholder role for managing the Helm chart release of vllm. diff --git a/roles/charts/vllm/tasks/main.yml b/roles/charts/vllm/tasks/main.yml new file mode 100644 index 0000000..bed8247 --- /dev/null +++ b/roles/charts/vllm/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement Helm release tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement Helm release tasks." diff --git a/roles/docker/OpenObserve/README.md b/roles/docker/OpenObserve/README.md new file mode 100644 index 0000000..7de9e96 --- /dev/null +++ b/roles/docker/OpenObserve/README.md @@ -0,0 +1,5 @@ +# OpenObserve (docker) + +Placeholder role for docker-compose style deployment of OpenObserve. + +Templates include docker-compose.yaml with bootstrap nginx and certbot services mirroring the Zitadel setup. diff --git a/roles/docker/OpenObserve/tasks/main.yml b/roles/docker/OpenObserve/tasks/main.yml new file mode 100644 index 0000000..c4f5488 --- /dev/null +++ b/roles/docker/OpenObserve/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement docker deployment tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement docker deployment tasks." diff --git a/roles/docker/OpenObserve/templates/docker-compose.yaml b/roles/docker/OpenObserve/templates/docker-compose.yaml new file mode 100644 index 0000000..580d705 --- /dev/null +++ b/roles/docker/OpenObserve/templates/docker-compose.yaml @@ -0,0 +1,41 @@ +services: + bootstrap-nginx: + profiles: ["bootstrap"] + image: nginx:mainline-alpine + container_name: bootstrap-nginx + volumes: + - "{{ zitadel_workspace }}/certbot/www:/var/www/certbot" + - "{{ zitadel_workspace }}/certbot/conf:/etc/letsencrypt" + - "{{ zitadel_workspace }}/nginx/nginx.conf:/etc/nginx/nginx.conf" + - "{{ zitadel_workspace }}/nginx/conf.d/bootstrap-nginx.conf:/etc/nginx/conf.d/bootstrap-nginx.conf" + ports: + - "80:80" # 暂时只占用80 + networks: + - app + healthcheck: + test: ["CMD", "wget", "-qO-", "http://localhost"] + interval: 3s + timeout: 2s + retries: 10 + start_period: 3s + certbot: + profiles: ["bootstrap"] + image: certbot/certbot + container_name: certbot + command: > + certonly --webroot + --webroot-path=/var/www/certbot + --email manbuzhe2009@qq.com + --agree-tos + --no-eff-email + --keep-until-expiring + --non-interactive + -d {{ zitadel_domain }} + volumes: + - "{{ zitadel_workspace }}/certbot/conf:/etc/letsencrypt" + - "{{ zitadel_workspace }}/certbot/www:/var/www/certbot" + networks: + - app + +networks: + app: diff --git a/roles/docker/Tempo/README.md b/roles/docker/Tempo/README.md new file mode 100644 index 0000000..10e86ec --- /dev/null +++ b/roles/docker/Tempo/README.md @@ -0,0 +1,5 @@ +# Tempo (docker) + +Placeholder role for docker-compose style deployment of Tempo. + +Templates include docker-compose.yaml with bootstrap nginx and certbot services mirroring the Zitadel setup. diff --git a/roles/docker/Tempo/tasks/main.yml b/roles/docker/Tempo/tasks/main.yml new file mode 100644 index 0000000..c4f5488 --- /dev/null +++ b/roles/docker/Tempo/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement docker deployment tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement docker deployment tasks." diff --git a/roles/docker/Tempo/templates/docker-compose.yaml b/roles/docker/Tempo/templates/docker-compose.yaml new file mode 100644 index 0000000..580d705 --- /dev/null +++ b/roles/docker/Tempo/templates/docker-compose.yaml @@ -0,0 +1,41 @@ +services: + bootstrap-nginx: + profiles: ["bootstrap"] + image: nginx:mainline-alpine + container_name: bootstrap-nginx + volumes: + - "{{ zitadel_workspace }}/certbot/www:/var/www/certbot" + - "{{ zitadel_workspace }}/certbot/conf:/etc/letsencrypt" + - "{{ zitadel_workspace }}/nginx/nginx.conf:/etc/nginx/nginx.conf" + - "{{ zitadel_workspace }}/nginx/conf.d/bootstrap-nginx.conf:/etc/nginx/conf.d/bootstrap-nginx.conf" + ports: + - "80:80" # 暂时只占用80 + networks: + - app + healthcheck: + test: ["CMD", "wget", "-qO-", "http://localhost"] + interval: 3s + timeout: 2s + retries: 10 + start_period: 3s + certbot: + profiles: ["bootstrap"] + image: certbot/certbot + container_name: certbot + command: > + certonly --webroot + --webroot-path=/var/www/certbot + --email manbuzhe2009@qq.com + --agree-tos + --no-eff-email + --keep-until-expiring + --non-interactive + -d {{ zitadel_domain }} + volumes: + - "{{ zitadel_workspace }}/certbot/conf:/etc/letsencrypt" + - "{{ zitadel_workspace }}/certbot/www:/var/www/certbot" + networks: + - app + +networks: + app: diff --git a/roles/docker/VictoriaLogs/README.md b/roles/docker/VictoriaLogs/README.md new file mode 100644 index 0000000..6ddf8eb --- /dev/null +++ b/roles/docker/VictoriaLogs/README.md @@ -0,0 +1,5 @@ +# VictoriaLogs (docker) + +Placeholder role for docker-compose style deployment of VictoriaLogs. + +Templates include docker-compose.yaml with bootstrap nginx and certbot services mirroring the Zitadel setup. diff --git a/roles/docker/VictoriaLogs/tasks/main.yml b/roles/docker/VictoriaLogs/tasks/main.yml new file mode 100644 index 0000000..c4f5488 --- /dev/null +++ b/roles/docker/VictoriaLogs/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement docker deployment tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement docker deployment tasks." diff --git a/roles/docker/VictoriaLogs/templates/docker-compose.yaml b/roles/docker/VictoriaLogs/templates/docker-compose.yaml new file mode 100644 index 0000000..580d705 --- /dev/null +++ b/roles/docker/VictoriaLogs/templates/docker-compose.yaml @@ -0,0 +1,41 @@ +services: + bootstrap-nginx: + profiles: ["bootstrap"] + image: nginx:mainline-alpine + container_name: bootstrap-nginx + volumes: + - "{{ zitadel_workspace }}/certbot/www:/var/www/certbot" + - "{{ zitadel_workspace }}/certbot/conf:/etc/letsencrypt" + - "{{ zitadel_workspace }}/nginx/nginx.conf:/etc/nginx/nginx.conf" + - "{{ zitadel_workspace }}/nginx/conf.d/bootstrap-nginx.conf:/etc/nginx/conf.d/bootstrap-nginx.conf" + ports: + - "80:80" # 暂时只占用80 + networks: + - app + healthcheck: + test: ["CMD", "wget", "-qO-", "http://localhost"] + interval: 3s + timeout: 2s + retries: 10 + start_period: 3s + certbot: + profiles: ["bootstrap"] + image: certbot/certbot + container_name: certbot + command: > + certonly --webroot + --webroot-path=/var/www/certbot + --email manbuzhe2009@qq.com + --agree-tos + --no-eff-email + --keep-until-expiring + --non-interactive + -d {{ zitadel_domain }} + volumes: + - "{{ zitadel_workspace }}/certbot/conf:/etc/letsencrypt" + - "{{ zitadel_workspace }}/certbot/www:/var/www/certbot" + networks: + - app + +networks: + app: diff --git a/roles/docker/VictoriaMetrics/README.md b/roles/docker/VictoriaMetrics/README.md new file mode 100644 index 0000000..9c5c2bc --- /dev/null +++ b/roles/docker/VictoriaMetrics/README.md @@ -0,0 +1,5 @@ +# VictoriaMetrics (docker) + +Placeholder role for docker-compose style deployment of VictoriaMetrics. + +Templates include docker-compose.yaml with bootstrap nginx and certbot services mirroring the Zitadel setup. diff --git a/roles/docker/VictoriaMetrics/tasks/main.yml b/roles/docker/VictoriaMetrics/tasks/main.yml new file mode 100644 index 0000000..c4f5488 --- /dev/null +++ b/roles/docker/VictoriaMetrics/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement docker deployment tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement docker deployment tasks." diff --git a/roles/docker/VictoriaMetrics/templates/docker-compose.yaml b/roles/docker/VictoriaMetrics/templates/docker-compose.yaml new file mode 100644 index 0000000..580d705 --- /dev/null +++ b/roles/docker/VictoriaMetrics/templates/docker-compose.yaml @@ -0,0 +1,41 @@ +services: + bootstrap-nginx: + profiles: ["bootstrap"] + image: nginx:mainline-alpine + container_name: bootstrap-nginx + volumes: + - "{{ zitadel_workspace }}/certbot/www:/var/www/certbot" + - "{{ zitadel_workspace }}/certbot/conf:/etc/letsencrypt" + - "{{ zitadel_workspace }}/nginx/nginx.conf:/etc/nginx/nginx.conf" + - "{{ zitadel_workspace }}/nginx/conf.d/bootstrap-nginx.conf:/etc/nginx/conf.d/bootstrap-nginx.conf" + ports: + - "80:80" # 暂时只占用80 + networks: + - app + healthcheck: + test: ["CMD", "wget", "-qO-", "http://localhost"] + interval: 3s + timeout: 2s + retries: 10 + start_period: 3s + certbot: + profiles: ["bootstrap"] + image: certbot/certbot + container_name: certbot + command: > + certonly --webroot + --webroot-path=/var/www/certbot + --email manbuzhe2009@qq.com + --agree-tos + --no-eff-email + --keep-until-expiring + --non-interactive + -d {{ zitadel_domain }} + volumes: + - "{{ zitadel_workspace }}/certbot/conf:/etc/letsencrypt" + - "{{ zitadel_workspace }}/certbot/www:/var/www/certbot" + networks: + - app + +networks: + app: diff --git a/roles/docker/XControl/README.md b/roles/docker/XControl/README.md new file mode 100644 index 0000000..e930488 --- /dev/null +++ b/roles/docker/XControl/README.md @@ -0,0 +1,113 @@ +# XControl Docker role + +This role provisions the XControl stack (Postgres, account service, RAG server, dashboard, Redis, and Nginx proxy with Certbot assets). Templates from `templates/` and static assets from `files/` are rendered into `{{ xcontrol_workspace }}` and the Docker Compose stack is started. + +## Layout +``` +files/ +├── docker-compose.yaml +├── config/ +│ ├── account.yaml +│ └── server.yaml +├── certbot/ +│ ├── conf/ +│ └── www/ +├── nginx/ +│ ├── conf.d/ +│ │ ├── accounts.conf +│ │ ├── artifact.conf +│ │ ├── bootstrap-nginx.conf +│ │ ├── default.conf +│ │ ├── homepage.conf +│ │ └── rag-server.conf +│ └── nginx.conf +└── run.sh +``` + +## Defaults +- `xcontrol_deploy_dir`: `/opt/xcontrol` +- `xcontrol_workspace`: `{{ xcontrol_deploy_dir }}` +- `xcontrol_certbot_domains`: `svc.plus` (comma-separated) +- `xcontrol_certbot_email`: `manbuzhe2009@qq.com` +- `xcontrol_homepage_domain`: `{{ xcontrol_primary_domain }}` +- `xcontrol_homepage_alias_domain`: `www.{{ xcontrol_primary_domain }}` +- `xcontrol_homepage_cn_domain`: `cn-homepage.{{ xcontrol_primary_domain }}` +- `xcontrol_accounts_domain`: `accounts.{{ xcontrol_primary_domain }}` +- `xcontrol_rag_domain`: `rag-server.{{ xcontrol_primary_domain }}` +- `xcontrol_rag_api_domain`: `api.{{ xcontrol_primary_domain }}` +- `xcontrol_artifact_domain`: `dl.{{ xcontrol_primary_domain }}` +- `xcontrol_artifact_cn_domain`: `cn-dl.{{ xcontrol_primary_domain }}` +- `xcontrol_db_host`: `db` +- `xcontrol_db_port`: `5432` +- `xcontrol_db_name`: `xcontrol` +- `xcontrol_db_user`: `xcontrol` +- `xcontrol_db_password`: `xcontrol` +- `xcontrol_account_mode`: `server-agent` +- `xcontrol_account_log_level`: `info` +- `xcontrol_account_auth_enable`: `true` +- `xcontrol_account_public_token`: `xcontrol-public-token-2024` +- `xcontrol_account_refresh_secret`: `xcontrol-refresh-secret-2024` +- `xcontrol_account_access_secret`: `xcontrol-access-secret-2024` +- `xcontrol_account_access_expiry`: `1h` +- `xcontrol_account_refresh_expiry`: `168h` +- `xcontrol_account_server_addr`: `:8080` +- `xcontrol_account_read_timeout`: `15s` +- `xcontrol_account_write_timeout`: `15s` +- `xcontrol_account_public_url`: `https://accounts.{{ xcontrol_primary_domain }}` +- `xcontrol_account_tls_enabled`: `false` +- `xcontrol_account_tls_redirect_http`: `false` +- `xcontrol_account_store_driver`: `postgres` +- `xcontrol_account_db_name`: `{{ xcontrol_db_name }}` +- `xcontrol_account_db_sslmode`: `disable` +- `xcontrol_account_db_max_open_conns`: `30` +- `xcontrol_account_db_max_idle_conns`: `10` +- `xcontrol_account_session_ttl`: `24h` +- `xcontrol_account_session_cache`: `memory` +- `xcontrol_account_smtp_host`: `smtp.example.com` +- `xcontrol_account_smtp_port`: `587` +- `xcontrol_account_smtp_username`: `apikey` +- `xcontrol_account_smtp_password`: `change-me` +- `xcontrol_account_smtp_from`: `XControl Account ` +- `xcontrol_account_smtp_timeout`: `10s` +- `xcontrol_account_smtp_tls_mode`: `auto` +- `xcontrol_account_smtp_tls_insecure_skip_verify`: `false` +- `xcontrol_account_xray_sync_enabled`: `false` +- `xcontrol_account_xray_sync_interval`: `5m` +- `xcontrol_account_xray_output_path`: `/usr/local/etc/xray/config.json` +- `xcontrol_account_xray_template_path`: `account/config/xray.config.template.json` +- `xcontrol_account_xray_validate_command`: `[]` +- `xcontrol_account_xray_restart_command`: `["systemctl", "restart", "xray.service"]` +- `xcontrol_account_agent_id`: `account-primary` +- `xcontrol_rag_server_addr`: `:8090` +- `xcontrol_rag_read_timeout`: `120s` +- `xcontrol_rag_write_timeout`: `120s` +- `xcontrol_rag_public_url`: `https://{{ xcontrol_rag_api_domain }}` +- `xcontrol_rag_allowed_origins`: `["https://{{ xcontrol_rag_api_domain }}", "https://{{ xcontrol_homepage_alias_domain }}", "https://{{ xcontrol_homepage_domain }}", "https://{{ xcontrol_accounts_domain }}", "http://localhost:3000", "http://127.0.0.1:3000"]` +- `xcontrol_rag_auth_enable`: `false` +- `xcontrol_rag_auth_url`: `https://{{ xcontrol_accounts_domain }}` +- `xcontrol_rag_api_base_url`: `https://{{ xcontrol_rag_api_domain }}` +- `xcontrol_rag_public_token`: `xcontrol-public-token-2025` +- `xcontrol_rag_redis_addr`: `""` +- `xcontrol_rag_redis_password`: `""` +- `xcontrol_rag_vectordb_db_name`: `rag` +- `xcontrol_rag_vectordb_sslmode`: `disable` +- `xcontrol_rag_vectordb_pgurl`: `postgres://{{ xcontrol_db_user }}:{{ xcontrol_db_password }}@{{ xcontrol_db_host }}:{{ xcontrol_db_port }}/{{ xcontrol_rag_vectordb_db_name }}?sslmode={{ xcontrol_rag_vectordb_sslmode }}` +- `xcontrol_rag_datasources`: `[{"name": "XControl", "repo": "https://github.com/svc-design/XControl", "path": "docs"}]` +- `xcontrol_rag_sync_repo_proxy`: `""` +- `xcontrol_rag_embedder_provider`: `chutes` +- `xcontrol_rag_embedder_models`: `["bge-m3"]` +- `xcontrol_rag_embedder_baseurl`: `http://127.0.0.1:9000` +- `xcontrol_rag_embedder_endpoint`: `http://127.0.0.1:9000/v1/embeddings` +- `xcontrol_rag_generator_provider`: `chutes` +- `xcontrol_rag_generator_models`: `["deepseek-r1:8b"]` +- `xcontrol_rag_generator_baseurl`: `http://127.0.0.1:11434` +- `xcontrol_rag_generator_endpoint`: `http://127.0.0.1:11434/v1/chat/completions` +- `xcontrol_rag_embedding_max_batch`: `64` +- `xcontrol_rag_embedding_dimension`: `1024` + +## RUN + +``` +ansible-playbook -i inventory.ini deploy_XControl_docker.yaml -e "domain=svc.plus" -D -C -l svc.plus +ansible-playbook -i inventory.ini deploy_XControl_docker.yaml -e "domain=svc.plus" -D -l svc.plus +``` diff --git a/roles/docker/XControl/defaults/main.yml b/roles/docker/XControl/defaults/main.yml new file mode 100644 index 0000000..719ff61 --- /dev/null +++ b/roles/docker/XControl/defaults/main.yml @@ -0,0 +1,112 @@ +--- +# Default deployment directory for XControl Docker stack +xcontrol_deploy_dir: /opt/xcontrol +xcontrol_workspace: "{{ xcontrol_deploy_dir }}" + +# Primary domain (first in the comma-separated domain list) +xcontrol_certbot_domains: svc.plus +xcontrol_primary_domain: "{{ xcontrol_certbot_domains.split(',')[0] | trim }}" +xcontrol_certbot_email: manbuzhe2009@qq.com + +# Subdomains for individual services +xcontrol_homepage_domain: "{{ xcontrol_primary_domain }}" +xcontrol_homepage_alias_domain: "www.{{ xcontrol_primary_domain }}" +xcontrol_homepage_cn_domain: "cn-homepage.{{ xcontrol_primary_domain }}" +xcontrol_accounts_domain: "accounts.{{ xcontrol_primary_domain }}" +xcontrol_rag_domain: "rag-server.{{ xcontrol_primary_domain }}" +xcontrol_rag_api_domain: "api.{{ xcontrol_primary_domain }}" +xcontrol_artifact_domain: "dl.{{ xcontrol_primary_domain }}" +xcontrol_artifact_cn_domain: "cn-dl.{{ xcontrol_primary_domain }}" + +# Database defaults +xcontrol_db_host: db +xcontrol_db_port: 5432 +xcontrol_db_name: xcontrol +xcontrol_db_user: xcontrol +xcontrol_db_password: xcontrol + +# Account service configuration defaults +xcontrol_account_mode: server-agent +xcontrol_account_log_level: info +xcontrol_account_auth_enable: true +xcontrol_account_public_token: xcontrol-public-token-2024 +xcontrol_account_refresh_secret: xcontrol-refresh-secret-2024 +xcontrol_account_access_secret: xcontrol-access-secret-2024 +xcontrol_account_access_expiry: 1h +xcontrol_account_refresh_expiry: 168h +xcontrol_account_server_addr: ":8080" +xcontrol_account_read_timeout: 15s +xcontrol_account_write_timeout: 15s +xcontrol_account_public_url: "https://{{ xcontrol_accounts_domain }}" +xcontrol_account_tls_enabled: false +xcontrol_account_tls_redirect_http: false +xcontrol_account_store_driver: postgres +xcontrol_account_db_name: "{{ xcontrol_db_name }}" +xcontrol_account_db_sslmode: disable +xcontrol_account_db_max_open_conns: 30 +xcontrol_account_db_max_idle_conns: 10 +xcontrol_account_session_ttl: 24h +xcontrol_account_session_cache: memory +xcontrol_account_smtp_host: smtp.example.com +xcontrol_account_smtp_port: 587 +xcontrol_account_smtp_username: apikey +xcontrol_account_smtp_password: change-me +xcontrol_account_smtp_from: "XControl Account " +xcontrol_account_smtp_timeout: 10s +xcontrol_account_smtp_tls_mode: auto +xcontrol_account_smtp_tls_insecure_skip_verify: false +xcontrol_account_xray_sync_enabled: false +xcontrol_account_xray_sync_interval: 5m +xcontrol_account_xray_output_path: /usr/local/etc/xray/config.json +xcontrol_account_xray_template_path: account/config/xray.config.template.json +xcontrol_account_xray_validate_command: [] +xcontrol_account_xray_restart_command: + - systemctl + - restart + - xray.service +xcontrol_account_agent_id: account-primary + +# Image overrides (optional) +xcontrol_account_image: ghcr.io/cloud-neutral-toolkit/account:latest +xcontrol_rag_image: manbuzhe2009/rag-server:latest +xcontrol_dashboard_image: manbuzhe2009/dashboard:latest +xcontrol_db_image: manbuzhe2009/postgres-runtime:latest + +# RAG server configuration defaults +xcontrol_rag_server_addr: ":8090" +xcontrol_rag_read_timeout: 120s +xcontrol_rag_write_timeout: 120s +xcontrol_rag_public_url: "https://{{ xcontrol_rag_api_domain }}" +xcontrol_rag_allowed_origins: + - "https://{{ xcontrol_rag_api_domain }}" + - "https://{{ xcontrol_homepage_alias_domain }}" + - "https://{{ xcontrol_homepage_domain }}" + - "https://{{ xcontrol_accounts_domain }}" + - "http://localhost:3000" + - "http://127.0.0.1:3000" +xcontrol_rag_auth_enable: false +xcontrol_rag_auth_url: "https://{{ xcontrol_accounts_domain }}" +xcontrol_rag_api_base_url: "https://{{ xcontrol_rag_api_domain }}" +xcontrol_rag_public_token: xcontrol-public-token-2025 +xcontrol_rag_redis_addr: "" +xcontrol_rag_redis_password: "" +xcontrol_rag_vectordb_db_name: rag +xcontrol_rag_vectordb_sslmode: disable +xcontrol_rag_vectordb_pgurl: "postgres://{{ xcontrol_db_user }}:{{ xcontrol_db_password }}@{{ xcontrol_db_host }}:{{ xcontrol_db_port }}/{{ xcontrol_rag_vectordb_db_name }}?sslmode={{ xcontrol_rag_vectordb_sslmode }}" +xcontrol_rag_datasources: + - name: XControl + repo: https://github.com/svc-design/XControl + path: docs +xcontrol_rag_sync_repo_proxy: "" +xcontrol_rag_embedder_provider: chutes +xcontrol_rag_embedder_models: + - bge-m3 +xcontrol_rag_embedder_baseurl: http://127.0.0.1:9000 +xcontrol_rag_embedder_endpoint: http://127.0.0.1:9000/v1/embeddings +xcontrol_rag_generator_provider: chutes +xcontrol_rag_generator_models: + - deepseek-r1:8b +xcontrol_rag_generator_baseurl: http://127.0.0.1:11434 +xcontrol_rag_generator_endpoint: http://127.0.0.1:11434/v1/chat/completions +xcontrol_rag_embedding_max_batch: 64 +xcontrol_rag_embedding_dimension: 1024 diff --git a/roles/docker/XControl/files/nginx/nginx.conf b/roles/docker/XControl/files/nginx/nginx.conf new file mode 100644 index 0000000..9fe4ac3 --- /dev/null +++ b/roles/docker/XControl/files/nginx/nginx.conf @@ -0,0 +1,5 @@ +events {} + +http { + include /etc/nginx/conf.d/*.conf; +} diff --git a/roles/docker/XControl/files/run.sh b/roles/docker/XControl/files/run.sh new file mode 100644 index 0000000..369e383 --- /dev/null +++ b/roles/docker/XControl/files/run.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Helper script to start the XControl docker compose stack +cd "$(dirname "$0")" +docker compose -f docker-compose.yaml up -d diff --git a/roles/docker/XControl/tasks/main.yml b/roles/docker/XControl/tasks/main.yml new file mode 100644 index 0000000..8c10436 --- /dev/null +++ b/roles/docker/XControl/tasks/main.yml @@ -0,0 +1,76 @@ +--- +- name: Ensure XControl directories exist + become: true + ansible.builtin.file: + path: "{{ item }}" + state: directory + mode: "0755" + loop: + - "{{ xcontrol_workspace }}" + - "{{ xcontrol_workspace }}/certbot" + - "{{ xcontrol_workspace }}/certbot/conf" + - "{{ xcontrol_workspace }}/certbot/www" + - "{{ xcontrol_workspace }}/config" + - "{{ xcontrol_workspace }}/nginx" + - "{{ xcontrol_workspace }}/nginx/conf.d" + +- name: Ensure XControl workspace ownership + become: true + ansible.builtin.file: + path: "{{ xcontrol_workspace }}" + state: directory + recurse: true + owner: "1000" + group: "1000" + mode: "0755" + +- name: Template XControl configuration files + become: true + ansible.builtin.template: + src: "{{ item.src }}" + dest: "{{ xcontrol_workspace }}/{{ item.dest }}" + mode: "{{ item.mode | default('0644') }}" + loop: + - { src: 'docker-compose.yaml', dest: 'docker-compose.yaml' } + - { src: 'config/account.yaml', dest: 'config/account.yaml' } + - { src: 'config/server.yaml', dest: 'config/server.yaml' } + - { src: 'nginx/conf.d/default.conf', dest: 'nginx/conf.d/default.conf' } + - { src: 'nginx/conf.d/bootstrap-nginx.conf', dest: 'nginx/conf.d/bootstrap-nginx.conf' } + - { src: 'nginx/conf.d/accounts.conf', dest: 'nginx/conf.d/accounts.conf' } + - { src: 'nginx/conf.d/homepage.conf', dest: 'nginx/conf.d/homepage.conf' } + - { src: 'nginx/conf.d/rag-server.conf', dest: 'nginx/conf.d/rag-server.conf' } + - { src: 'nginx/conf.d/artifact.conf', dest: 'nginx/conf.d/artifact.conf' } + +- name: Copy XControl static files + become: true + ansible.builtin.copy: + src: "{{ item.src }}" + dest: "{{ xcontrol_workspace }}/{{ item.dest }}" + mode: "{{ item.mode | default('0644') }}" + loop: + - { src: 'run.sh', dest: 'run.sh', mode: '0755' } + - { src: 'nginx/nginx.conf', dest: 'nginx/nginx.conf' } + +- name: Bootstrap NGINX (80-only for ACME) + become: true + command: docker compose --profile bootstrap -f {{ xcontrol_workspace }}/docker-compose.yaml up -d bootstrap-nginx + args: + chdir: "{{ xcontrol_workspace }}" + +- name: Run certbot initial ACME challenge + become: true + command: docker compose --profile bootstrap -f {{ xcontrol_workspace }}/docker-compose.yaml run --rm certbot + args: + chdir: "{{ xcontrol_workspace }}" + +- name: Destroy Bootstrap NGINX (80-only for ACME) + become: true + command: docker compose --profile bootstrap -f {{ xcontrol_workspace }}/docker-compose.yaml down bootstrap-nginx + args: + chdir: "{{ xcontrol_workspace }}" + +- name: Bring up XControl stack + become: true + command: docker compose -f {{ xcontrol_workspace }}/docker-compose.yaml up -d + args: + chdir: "{{ xcontrol_workspace }}" diff --git a/roles/docker/XControl/templates/config/account.yaml b/roles/docker/XControl/templates/config/account.yaml new file mode 100644 index 0000000..b028a0a --- /dev/null +++ b/roles/docker/XControl/templates/config/account.yaml @@ -0,0 +1,63 @@ +mode: "{{ xcontrol_account_mode }}" + +log: + level: {{ xcontrol_account_log_level | to_nice_yaml(indent=0) | trim }} + +auth: + enable: {{ xcontrol_account_auth_enable | bool | lower }} + token: + publicToken: "{{ xcontrol_account_public_token }}" + refreshSecret: "{{ xcontrol_account_refresh_secret }}" + accessSecret: "{{ xcontrol_account_access_secret }}" + accessExpiry: "{{ xcontrol_account_access_expiry }}" + refreshExpiry: "{{ xcontrol_account_refresh_expiry }}" + +server: + addr: "{{ xcontrol_account_server_addr }}" + readTimeout: {{ xcontrol_account_read_timeout | to_nice_yaml(indent=0) | trim }} + writeTimeout: {{ xcontrol_account_write_timeout | to_nice_yaml(indent=0) | trim }} + publicUrl: "{{ xcontrol_account_public_url }}" + allowedOrigins: + - "https://{{ xcontrol_accounts_domain }}" + - "https://{{ xcontrol_rag_api_domain }}" + - "https://{{ xcontrol_homepage_alias_domain }}" + - "http://localhost:3000" + - "http://127.0.0.1:3000" + - "http://localhost:8080" + - "http://127.0.0.1:8080" + tls: + enabled: {{ xcontrol_account_tls_enabled | bool | lower }} + redirectHttp: {{ xcontrol_account_tls_redirect_http | bool | lower }} + +store: + driver: "{{ xcontrol_account_store_driver }}" + dsn: "postgres://{{ xcontrol_db_user }}:{{ xcontrol_db_password }}@{{ xcontrol_db_host }}:{{ xcontrol_db_port }}/{{ xcontrol_account_db_name }}?sslmode={{ xcontrol_account_db_sslmode }}" + maxOpenConns: {{ xcontrol_account_db_max_open_conns }} + maxIdleConns: {{ xcontrol_account_db_max_idle_conns }} + +session: + ttl: {{ xcontrol_account_session_ttl | to_nice_yaml(indent=0) | trim }} + cache: "{{ xcontrol_account_session_cache }}" + +smtp: + host: "{{ xcontrol_account_smtp_host }}" + port: {{ xcontrol_account_smtp_port }} + username: "{{ xcontrol_account_smtp_username }}" + password: "{{ xcontrol_account_smtp_password }}" + from: "{{ xcontrol_account_smtp_from }}" + timeout: {{ xcontrol_account_smtp_timeout | to_nice_yaml(indent=0) | trim }} + tls: + mode: "{{ xcontrol_account_smtp_tls_mode }}" + insecureSkipVerify: {{ xcontrol_account_smtp_tls_insecure_skip_verify | bool | lower }} + +xray: + sync: + enabled: {{ xcontrol_account_xray_sync_enabled | bool | lower }} + interval: {{ xcontrol_account_xray_sync_interval | to_nice_yaml(indent=0) | trim }} + outputPath: "{{ xcontrol_account_xray_output_path }}" + templatePath: "{{ xcontrol_account_xray_template_path }}" + validateCommand: {{ xcontrol_account_xray_validate_command | to_nice_yaml(indent=2) | trim }} + restartCommand: {{ xcontrol_account_xray_restart_command | to_nice_yaml(indent=2) | trim }} + +agent: + id: "{{ xcontrol_account_agent_id }}" diff --git a/roles/docker/XControl/templates/config/server.yaml b/roles/docker/XControl/templates/config/server.yaml new file mode 100644 index 0000000..68a7325 --- /dev/null +++ b/roles/docker/XControl/templates/config/server.yaml @@ -0,0 +1,54 @@ +server: + addr: "{{ xcontrol_rag_server_addr }}" + readTimeout: "{{ xcontrol_rag_read_timeout }}" + writeTimeout: "{{ xcontrol_rag_write_timeout }}" + publicUrl: "{{ xcontrol_rag_public_url }}" + allowedOrigins: + {% for origin in xcontrol_rag_allowed_origins %} + - "{{ origin }}" + {% endfor %} + +auth: + enable: {{ xcontrol_rag_auth_enable | bool }} + authUrl: "{{ xcontrol_rag_auth_url }}" + apiBaseUrl: "{{ xcontrol_rag_api_base_url }}" + publicToken: "{{ xcontrol_rag_public_token }}" + +global: + redis: + addr: "{{ xcontrol_rag_redis_addr }}" + password: "{{ xcontrol_rag_redis_password }}" + vectordb: + pgurl: "{{ xcontrol_rag_vectordb_pgurl }}" + datasources: + {% for datasource in xcontrol_rag_datasources %} + - name: "{{ datasource.name }}" + repo: "{{ datasource.repo }}" + path: "{{ datasource.path }}" + {% endfor %} + +sync: + repo: + proxy: "{{ xcontrol_rag_sync_repo_proxy }}" + +models: + embedder: + provider: "{{ xcontrol_rag_embedder_provider }}" + models: + {% for model in xcontrol_rag_embedder_models %} + - "{{ model }}" + {% endfor %} + baseurl: "{{ xcontrol_rag_embedder_baseurl }}" + endpoint: "{{ xcontrol_rag_embedder_endpoint }}" + generator: + provider: "{{ xcontrol_rag_generator_provider }}" + models: + {% for model in xcontrol_rag_generator_models %} + - "{{ model }}" + {% endfor %} + baseurl: "{{ xcontrol_rag_generator_baseurl }}" + endpoint: "{{ xcontrol_rag_generator_endpoint }}" + +embedding: + max_batch: {{ xcontrol_rag_embedding_max_batch }} + dimension: {{ xcontrol_rag_embedding_dimension }} diff --git a/roles/docker/XControl/templates/docker-compose.yaml b/roles/docker/XControl/templates/docker-compose.yaml new file mode 100644 index 0000000..35509d7 --- /dev/null +++ b/roles/docker/XControl/templates/docker-compose.yaml @@ -0,0 +1,147 @@ +services: + db: + image: "{{ xcontrol_db_image }}" + container_name: xcontrol-db + restart: unless-stopped + environment: + POSTGRES_DB: "{{ xcontrol_db_name }}" + POSTGRES_USER: "{{ xcontrol_db_user }}" + POSTGRES_PASSWORD: "{{ xcontrol_db_password }}" + healthcheck: + test: ["CMD-SHELL", "pg_isready -U {{ xcontrol_db_user }}"] + interval: 5s + timeout: 60s + retries: 10 + start_period: 5s + volumes: + - "data:/var/lib/postgresql/data:rw" + networks: + - db + + account: + image: "{{ xcontrol_account_image }}" + container_name: account + restart: unless-stopped + environment: + PORT: 8080 + CONFIG_PATH: /etc/xcontrol/account-compose.yaml + volumes: + - "{{ xcontrol_workspace }}/config/account.yaml:/etc/xcontrol/account-compose.yaml:ro" + depends_on: + db: + condition: service_healthy + ports: + - "8080:8080" + networks: + - app + - db + + rag-server: + image: "{{ xcontrol_rag_image }}" + container_name: rag-server + restart: unless-stopped + environment: + PORT: 8090 + CONFIG_PATH: /etc/rag-server/server-compose.yaml + volumes: + - "{{ xcontrol_workspace }}/config/server.yaml:/etc/rag-server/server-compose.yaml:ro" + depends_on: + db: + condition: service_healthy + ports: + - "8090:8090" + networks: + - app + - db + + dashboard: + image: "{{ xcontrol_dashboard_image }}" + container_name: dashboard + restart: unless-stopped + environment: + PORT: 3000 + ports: + - "3000:3000" + depends_on: + account: + condition: service_started + rag-server: + condition: service_started + networks: + - app + + proxy-external-tls: + image: nginx:mainline-alpine + container_name: proxy-external-tls + restart: unless-stopped + volumes: + - "{{ xcontrol_workspace }}/nginx/nginx.conf:/etc/nginx/nginx.conf" + - "{{ xcontrol_workspace }}/nginx/conf.d:/etc/nginx/conf.d:ro" + - "{{ xcontrol_workspace }}/certbot/conf:/etc/letsencrypt" + - "{{ xcontrol_workspace }}/certbot/www:/var/www/certbot" + ports: + - "80:80" + - "443:443" + networks: + - app + depends_on: + account: + condition: service_started + rag-server: + condition: service_started + dashboard: + condition: service_started + + redis: + image: redis:7-alpine + container_name: redis + restart: unless-stopped + command: ["redis-server", "--save", "", "--appendonly", "no"] + networks: + - app + + bootstrap-nginx: + profiles: ["bootstrap"] + image: nginx:mainline-alpine + container_name: bootstrap-nginx + volumes: + - "{{ xcontrol_workspace }}/certbot/www:/var/www/certbot" + - "{{ xcontrol_workspace }}/certbot/conf:/etc/letsencrypt" + - "{{ xcontrol_workspace }}/nginx/nginx.conf:/etc/nginx/nginx.conf" + - "{{ xcontrol_workspace }}/nginx/conf.d/bootstrap-nginx.conf:/etc/nginx/conf.d/default.conf" + ports: + - "80:80" + networks: + - app + healthcheck: + test: ["CMD", "wget", "-qO-", "http://localhost"] + interval: 3s + timeout: 2s + retries: 10 + start_period: 3s + + certbot: + profiles: ["bootstrap"] + image: certbot/certbot + container_name: certbot + command: > + certonly --webroot + --webroot-path=/var/www/certbot + --email {{ xcontrol_certbot_email }} + --agree-tos + --no-eff-email + --keep-until-expiring + --non-interactive + -d {{ xcontrol_certbot_domains }} + volumes: + - "{{ xcontrol_workspace }}/certbot/conf:/etc/letsencrypt" + - "{{ xcontrol_workspace }}/certbot/www:/var/www/certbot" + networks: + - app + +networks: + app: + db: + +volumes: + data: diff --git a/roles/docker/XControl/templates/nginx/conf.d/accounts.conf b/roles/docker/XControl/templates/nginx/conf.d/accounts.conf new file mode 100644 index 0000000..0d8edbe --- /dev/null +++ b/roles/docker/XControl/templates/nginx/conf.d/accounts.conf @@ -0,0 +1,40 @@ +server { + listen 80; + server_name {{ xcontrol_accounts_domain }}; + return 301 https://$host$request_uri; +} + +server { + listen 443 ssl; + server_name {{ xcontrol_accounts_domain }}; + + ssl_certificate /etc/letsencrypt/live/{{ xcontrol_primary_domain }}/fullchain.pem; + ssl_certificate_key /etc/letsencrypt/live/{{ xcontrol_primary_domain }}/privkey.pem; + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers HIGH:!aNULL:!MD5; + + location ^~ /api/auth/ { + proxy_pass http://account:8080; + proxy_http_version 1.1; + + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + add_header Access-Control-Allow-Origin $cors_origin always; + add_header Access-Control-Allow-Methods "GET, POST, PUT, DELETE, OPTIONS" always; + add_header Access-Control-Allow-Headers "Authorization, Content-Type, Cookie" always; + add_header Access-Control-Allow-Credentials "true" always; + + if ($request_method = OPTIONS) { + return 204; + } + + add_header Cache-Control "no-store, no-cache, must-revalidate, proxy-revalidate"; + add_header Pragma "no-cache"; + add_header Expires "0"; + + proxy_cookie_path / "/; Secure; HttpOnly; SameSite=None"; + } +} diff --git a/roles/docker/XControl/templates/nginx/conf.d/artifact.conf b/roles/docker/XControl/templates/nginx/conf.d/artifact.conf new file mode 100644 index 0000000..547c4b2 --- /dev/null +++ b/roles/docker/XControl/templates/nginx/conf.d/artifact.conf @@ -0,0 +1,47 @@ +server { + listen 443 ssl; + server_name {{ xcontrol_artifact_domain }} {{ xcontrol_artifact_cn_domain }}; + + ssl_certificate /etc/letsencrypt/live/{{ xcontrol_primary_domain }}/fullchain.pem; + ssl_certificate_key /etc/letsencrypt/live/{{ xcontrol_primary_domain }}/privkey.pem; + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers HIGH:!aNULL:!MD5; + + root /data/update-server; + index index.html; + + location ^~ /.well-known/ { allow all; } + + # ✅ JSON 专用——放在 / 之前 + location ~* \.json$ { + try_files $uri =404; + add_header Cache-Control "public, max-age=60, s-maxage=60, stale-while-revalidate=300"; + default_type application/json; + } + + # 目录浏览 + location / { + autoindex on; + autoindex_exact_size off; + autoindex_localtime on; + add_header Accept-Ranges bytes; + try_files $uri $uri/ =404; + } + + # 大包直出 + location ~* \.(?:dmg|zip|tar\.gz|deb|rpm|exe|pkg|appimage|apk|ipa)$ { + expires 7d; + access_log off; + add_header Cache-Control "public"; + add_header Accept-Ranges bytes; + } + + # 隐藏 dotfiles(不拦 /.well-known/) + location ~ /\.(?!well-known/)[^/]+ { deny all; } +} + +server { + listen 80; + server_name {{ xcontrol_artifact_domain }} {{ xcontrol_artifact_cn_domain }}; + return 301 https://$host$request_uri; +} diff --git a/roles/docker/XControl/templates/nginx/conf.d/bootstrap-nginx.conf b/roles/docker/XControl/templates/nginx/conf.d/bootstrap-nginx.conf new file mode 100644 index 0000000..e802d50 --- /dev/null +++ b/roles/docker/XControl/templates/nginx/conf.d/bootstrap-nginx.conf @@ -0,0 +1,12 @@ +server { + listen 80; + server_name _; + + location ^~ /.well-known/acme-challenge/ { + root /var/www/certbot; + } + + location / { + return 200 "bootstrap"; + } +} diff --git a/roles/docker/XControl/templates/nginx/conf.d/default.conf b/roles/docker/XControl/templates/nginx/conf.d/default.conf new file mode 100644 index 0000000..fb6a9e2 --- /dev/null +++ b/roles/docker/XControl/templates/nginx/conf.d/default.conf @@ -0,0 +1,35 @@ +# ---------------------------------------------------- +# 80 - ACME Challenge + Redirect to HTTPS for homepage +# ---------------------------------------------------- +server { + listen 80; + server_name {{ xcontrol_homepage_domain }}; + + location ^~ /.well-known/acme-challenge/ { + root /var/www/certbot; + } + + location / { + return 301 https://$host$request_uri; + } +} + +# ---------------------------------------------------- +# 443 - TLS Termination for homepage +# ---------------------------------------------------- +server { + listen 443 ssl http2; + server_name {{ xcontrol_homepage_domain }}; + + ssl_certificate /etc/letsencrypt/live/{{ xcontrol_primary_domain }}/fullchain.pem; + ssl_certificate_key /etc/letsencrypt/live/{{ xcontrol_primary_domain }}/privkey.pem; + + ssl_protocols TLSv1.2 TLSv1.3; + ssl_prefer_server_ciphers on; + + location / { + proxy_pass http://dashboard:3000; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-Proto https; + } +} diff --git a/roles/docker/XControl/templates/nginx/conf.d/homepage.conf b/roles/docker/XControl/templates/nginx/conf.d/homepage.conf new file mode 100644 index 0000000..9c1474e --- /dev/null +++ b/roles/docker/XControl/templates/nginx/conf.d/homepage.conf @@ -0,0 +1,136 @@ +server { + listen 80; + server_name {{ xcontrol_homepage_alias_domain }} {{ xcontrol_homepage_cn_domain }}; + + # Certbot HTTP-01 challenge + location ^~ /.well-known/acme-challenge/ { + root /var/www/certbot; + } + + # All HTTP → HTTPS + location / { + return 301 https://$host$request_uri; + } +} + +server { + listen 443 ssl; + server_name {{ xcontrol_homepage_alias_domain }} {{ xcontrol_homepage_cn_domain }}; + + ssl_certificate /etc/letsencrypt/live/{{ xcontrol_primary_domain }}/fullchain.pem; + ssl_certificate_key /etc/letsencrypt/live/{{ xcontrol_primary_domain }}/privkey.pem; + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers HIGH:!aNULL:!MD5; + + # ====== 静态根目录(Next.js export 产物)====== + root /dashboard/; + index index.html; + + # (可选)放行 ACME/健康检查等 + location ^~ /.well-known/ { allow all; } + + # ======================= + # API 反向代理(保持原样) + # ======================= + location /api/ { + proxy_pass http://account:8080; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # /api/askai 接口限流(保持原样) + location = /api/askai { + access_by_lua_block { + local redis = require "resty.redis" + local r = redis:new() + r:set_timeout(200) + local ok, err = r:connect("redis", 6379) + if not ok then + ngx.log(ngx.ERR, "Redis connect error: ", err) + return ngx.exit(500) + end + + local user = ngx.var.arg_user or ngx.var.remote_addr + local today = os.date("%Y%m%d") + local key = "limit:user:" .. user .. ":" .. today + + local count, err = r:incr(key) + if count == 1 then r:expire(key, 86400) end + if count > 200 then + ngx.status = 429 + ngx.header["Content-Type"] = "text/plain; charset=utf-8" + ngx.say("Too Many Requests: daily limit reached") + return ngx.exit(429) + end + } + + proxy_pass http://account:8080; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # ======================= + # 静态文件直出(替换原先的 Next.js 动态代理) + # ======================= + + # Next 导出的静态资源(hash 不变 -> 长缓存) + location ^~ /_next/static/ { + try_files $uri =404; + access_log off; + expires 1y; + add_header Cache-Control "public, immutable, max-age=31536000"; + } + + # 其他常见静态资源:中等缓存 + location ~* \.(?:js|css|png|jpg|jpeg|gif|svg|webp|ico|woff2?|ttf)$ { + try_files $uri =404; + access_log off; + expires 7d; + add_header Cache-Control "public, max-age=604800"; + } + + # 主页与已导出的所有路由:按文件/目录匹配 + # 未命中的交给 404.html(保持静态站语义) + location / { + try_files $uri $uri/ /index.html =404; + } + + # 显式处理 404/500 路由目录(Next export 会生成 404/、500/ 与同名 .html) + location = /404.html { internal; } + error_page 404 /404.html; + + # 如果有 /favicon.ico,则直接给文件 + location = /favicon.ico { + try_files /favicon.ico =204; + access_log off; + expires 30d; + add_header Cache-Control "public, max-age=2592000"; + } + + # (可选)为某些目录开启目录索引(你有 dl-index、docs、download) + # 若需要列表页可以这样做;不需要则删除本段 + location ^~ /dl-index/ { + autoindex on; + autoindex_exact_size off; + autoindex_localtime on; + try_files $uri $uri/ =404; + } + + # 拒绝访问隐藏文件(如 .env) + location ~ /\. { + deny all; + } + + # (可选)开启 gzip(如启用 ngx_brotli,也可再加 br) + gzip on; + gzip_comp_level 5; + gzip_min_length 1k; + gzip_types text/plain text/css application/javascript application/json application/xml image/svg+xml; + gzip_vary on; +} diff --git a/roles/docker/XControl/templates/nginx/conf.d/rag-server.conf b/roles/docker/XControl/templates/nginx/conf.d/rag-server.conf new file mode 100644 index 0000000..0fee182 --- /dev/null +++ b/roles/docker/XControl/templates/nginx/conf.d/rag-server.conf @@ -0,0 +1,69 @@ +server { + listen 80; + server_name {{ xcontrol_rag_domain }} {{ xcontrol_rag_api_domain }}; + return 301 https://$host$request_uri; +} + +server { + listen 443 ssl; + server_name {{ xcontrol_rag_domain }} {{ xcontrol_rag_api_domain }}; + + ssl_certificate /etc/letsencrypt/live/{{ xcontrol_primary_domain }}/fullchain.pem; + ssl_certificate_key /etc/letsencrypt/live/{{ xcontrol_primary_domain }}/privkey.pem; + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers HIGH:!aNULL:!MD5; + + location ^~ /api/ { + proxy_pass http://rag-server:8090; + proxy_http_version 1.1; + + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + add_header Access-Control-Allow-Origin $cors_origin always; + add_header Access-Control-Allow-Methods "GET, POST, PUT, DELETE, OPTIONS" always; + add_header Access-Control-Allow-Headers "Authorization, Content-Type, Cookie" always; + add_header Access-Control-Allow-Credentials "true" always; + + if ($request_method = OPTIONS) { + return 204; + } + + add_header Cache-Control "no-store"; + } + + location = /api/askai { + access_by_lua_block { + local redis = require "resty.redis" + local r = redis:new() + r:set_timeout(200) + local ok, err = r:connect("redis", 6379) + if not ok then + ngx.log(ngx.ERR, "Redis connect error: ", err) + return ngx.exit(500) + end + + local user = ngx.var.arg_user or ngx.var.remote_addr + local today = os.date("%Y%m%d") + local key = "limit:user:" .. user .. ":" .. today + + local count, err = r:incr(key) + if count == 1 then r:expire(key, 86400) end + if count > 200 then + ngx.status = 429 + ngx.header["Content-Type"] = "text/plain; charset=utf-8" + ngx.say("Too Many Requests: daily limit reached") + return ngx.exit(429) + end + } + + proxy_pass http://rag-server:8090; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } +} diff --git a/roles/docker/clickhouse/README.md b/roles/docker/clickhouse/README.md new file mode 100644 index 0000000..d506f28 --- /dev/null +++ b/roles/docker/clickhouse/README.md @@ -0,0 +1,3 @@ +# clickhouse (docker) + +Placeholder role for docker-compose style deployment of clickhouse. diff --git a/roles/docker/clickhouse/tasks/main.yml b/roles/docker/clickhouse/tasks/main.yml new file mode 100644 index 0000000..c4f5488 --- /dev/null +++ b/roles/docker/clickhouse/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement docker deployment tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement docker deployment tasks." diff --git a/roles/docker/embedding-service/README.md b/roles/docker/embedding-service/README.md new file mode 100644 index 0000000..39f1cc3 --- /dev/null +++ b/roles/docker/embedding-service/README.md @@ -0,0 +1,3 @@ +# embedding-service (docker) + +Placeholder role for docker-compose style deployment of embedding-service. diff --git a/roles/docker/embedding-service/tasks/main.yml b/roles/docker/embedding-service/tasks/main.yml new file mode 100644 index 0000000..c4f5488 --- /dev/null +++ b/roles/docker/embedding-service/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement docker deployment tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement docker deployment tasks." diff --git a/roles/docker/grafana/README.md b/roles/docker/grafana/README.md new file mode 100644 index 0000000..3ddf520 --- /dev/null +++ b/roles/docker/grafana/README.md @@ -0,0 +1,20 @@ +# Grafana (Docker) + +This role deploys Grafana with Docker Compose, creating a persistent data directory and templating a simple `docker-compose.yaml` into `{{ grafana_workspace }}`. + +## Defaults +- `grafana_workspace`: `/opt/grafana` +- `grafana_image`: `grafana/grafana:10.4.6` +- `grafana_domain`: `grafana.svc.plus` +- `grafana_protocol`: `http` +- `grafana_host_port`: `3000` +- `grafana_admin_user`: `admin` +- `grafana_admin_password`: `admin` + +## Run + +Example playbook execution: + +```bash +ansible-playbook -i inventory.ini playbooks/deploy_grafana_docker.yaml -e "domain=grafana.example.com" -l grafana.example.com +``` diff --git a/roles/docker/grafana/defaults/main.yml b/roles/docker/grafana/defaults/main.yml new file mode 100644 index 0000000..f0e08c5 --- /dev/null +++ b/roles/docker/grafana/defaults/main.yml @@ -0,0 +1,9 @@ +--- +# Default settings for Grafana Docker deployment +grafana_workspace: /opt/grafana +grafana_image: grafana/grafana:10.4.6 +grafana_domain: grafana.svc.plus +grafana_protocol: http +grafana_host_port: 3000 +grafana_admin_user: admin +grafana_admin_password: admin diff --git a/roles/docker/grafana/tasks/main.yml b/roles/docker/grafana/tasks/main.yml new file mode 100644 index 0000000..0f76297 --- /dev/null +++ b/roles/docker/grafana/tasks/main.yml @@ -0,0 +1,33 @@ +--- +- name: Ensure Grafana directories exist + become: true + ansible.builtin.file: + path: "{{ item }}" + state: directory + mode: "0755" + loop: + - "{{ grafana_workspace }}" + - "{{ grafana_workspace }}/data" + - "{{ grafana_workspace }}/provisioning" + +- name: Ensure Grafana data directory ownership + become: true + ansible.builtin.file: + path: "{{ grafana_workspace }}/data" + state: directory + owner: "472" + group: "472" + recurse: true + +- name: Template Grafana Docker Compose file + become: true + ansible.builtin.template: + src: docker-compose.yaml.j2 + dest: "{{ grafana_workspace }}/docker-compose.yaml" + mode: "0644" + +- name: Launch Grafana with Docker Compose + become: true + ansible.builtin.command: docker compose -f {{ grafana_workspace }}/docker-compose.yaml up -d + args: + chdir: "{{ grafana_workspace }}" diff --git a/roles/docker/grafana/templates/docker-compose.yaml.j2 b/roles/docker/grafana/templates/docker-compose.yaml.j2 new file mode 100644 index 0000000..8856844 --- /dev/null +++ b/roles/docker/grafana/templates/docker-compose.yaml.j2 @@ -0,0 +1,17 @@ +version: '3.9' +services: + grafana: + image: "{{ grafana_image }}" + container_name: grafana + restart: unless-stopped + environment: + GF_SECURITY_ADMIN_USER: "{{ grafana_admin_user }}" + GF_SECURITY_ADMIN_PASSWORD: "{{ grafana_admin_password }}" + GF_SERVER_DOMAIN: "{{ grafana_domain }}" + GF_SERVER_ROOT_URL: "{{ grafana_protocol }}://{{ grafana_domain }}/" + ports: + - "{{ grafana_host_port }}:3000" + volumes: + - "{{ grafana_workspace }}/data:/var/lib/grafana" + - "{{ grafana_workspace }}/provisioning:/etc/grafana/provisioning" + user: "472:472" diff --git a/roles/docker/harbor/README.md b/roles/docker/harbor/README.md new file mode 100644 index 0000000..0f268e3 --- /dev/null +++ b/roles/docker/harbor/README.md @@ -0,0 +1,99 @@ +## Docker 镜像版本 + +| 服务 | 镜像版本 | +|-------------|---------------------------------| +| Keycloak | `bitnami/keycloak:26.0` | +| PostgreSQL | `postgres:16.0-bookworm` | +| Nginx | `nginx:1.27` | + +# 目录结构 +```ii +```bash +playbooks/roles/docker/keycloak +├── defaults/ # 存放默认变量的目录 +│ └── main.yml # 默认配置变量 +├── files/ # 存放静态文件的目录 +│ └── nginx.conf # Nginx 配置文件 +├── tasks/ # 存放任务脚本的目录 +│ ├── main.yml # 主要任务脚本 +│ ├── post-setup.yml # 部署后设置任务 +│ ├── pre-setup.yml # 部署前设置任务 +├── templates/ # 存放模板文件的目录 +│ ├── create_keystore.sh.j2 # 创建 Keystore 和 Truststore 的脚本模板 +│ └── docker-compose.yml.j2 # Docker Compose 配置文件模板 +└── README.md # 项目说明文件 +```bash +```````` +使用 Ansible 部署前的准备 +在运行 Playbook 之前,请确保以下准备工作已完成: + +1. 主机准备 +操作系统要求:本 Playbook 适用于 Ubuntu 20.04 及以上版本的主机。 + +主机要求:确保主机上已安装 Docker、Docker Compose 和 Ansible。你可以通过以下命令安装这些工具: + +bash +复制代码 +# 安装 Docker 和 Docker Compose +sudo apt-get update +sudo apt-get install docker.io docker-compose +主机名称:确保主机名称已正确设置,并且该主机可以访问 DNS 配置的域名。 + +2. 域名和 SSL 证书 +域名:确保你已经为 Keycloak 设置了域名(例如 keycloak.onwalk.net)。在实际部署前,你需要准备一个有效的域名和 SSL 证书。可以使用 Let’s Encrypt 或其他证书颁发机构获取证书。 + +证书文件:准备好 SSL 证书(如 onwalk.net.pem)和 SSL 密钥文件(如 onwalk.net.key)。这两个文件将用于配置 Keycloak 和 Nginx 服务的 HTTPS 访问。 + +证书路径应为 /etc/ssl/onwalk.net.pem,密钥路径应为 /etc/ssl/onwalk.net.key。 + +3. Ansible 配置文件(如果需要) +根据需要,你可以创建一个 inventory.ini 文件来指定部署目标主机: + +ini +复制代码 +[servers] +your_server_ip_or_hostname ansible_ssh_user=your_user ansible_ssh_private_key_file=your_key + +# Ansible Playbook 执行和部署 + + +1. 克隆仓库 +首先,克隆该仓库到你的本地机器: + +bash +复制代码 +git clone https://your_repository_url.git +cd ansible-playbook + +2. 测试执行 +ansible-playbook -i inventory/k3s-cluster playbooks/deploy-docker-harbor.yml -l cn-hw-node.svc.plus -D -C + +2. 执行部署 +执行部署任务时,使用以下命令来运行 Ansible Playbook: + +ansible-playbook -i inventory.ini playbooks/deploy-docker-keycloak.yml -l cn-gateway.svc.plus -D + +此命令将会启动以下步骤: + +- 安装并配置 Docker 和 Docker Compose。 +- 创建所需的 Keystore 和 Truststore 文件。 +- 启动 Keycloak 和 PostgreSQL 容器,Nginx 容器 + +3. 验证部署 +部署完成后,你可以通过以下命令检查 Keycloak 和 PostgreSQL 服务是否正常运行: + +docker ps -q -f name=postgres +docker ps -q -f name=keycloak +docker ps -q -f name=nginx + +如果服务正常运行,则会显示容器的 ID。 + +部署后的配置 +1. DNS 配置 +确保你的域名(如 keycloak.onwalk.net)已正确解析,并且指向部署 Keycloak 的主机。你可以使用 nslookup 或 dig 工具验证 DNS 解析: + + +## defaults/main.yml encrypt_string + +ansible-vault encrypt_string 'xxxxx' --name 'core_secret' + diff --git a/roles/docker/harbor/defaults/main.yml b/roles/docker/harbor/defaults/main.yml new file mode 100644 index 0000000..6102a5c --- /dev/null +++ b/roles/docker/harbor/defaults/main.yml @@ -0,0 +1,138 @@ +# External endpoint configuration +ext_endpoint: https://images.onwalk.net + +# Harbor Log service configuration +harbor_log_image: goharbor/harbor-log:v2.12.0 +harbor_log_container_name: harbor-log +harbor_log_volume: /var/log/harbor +logrotate_conf_path: ./common/config/log/logrotate.conf +rsyslog_conf_path: ./common/config/log/rsyslog_docker.conf +harbor_log_port: 1514 + +# Registry service configuration +registry_image: goharbor/registry-photon:v2.12.0 +registry_container_name: registry +registry_volume_storage: /data/registry +registry_config_volume: ./common/config/registry/ +registry_cert_path: /data/secret/registry/root.crt +shared_trust_certificates: ./common/config/shared/trust-certificates +harbor_syslog_port: 1514 + +# Registry Controller configuration +registryctl_image: goharbor/harbor-registryctl:v2.12.0 +registryctl_container_name: registryctl +registryctl_env_file: ./common/config/registryctl/env +registryctl_volume_storage: /data/registry +registryctl_config_volume: ./common/config/registry/ +registryctl_config_file: ./common/config/registryctl/config.yml +registry_credential_username: harbor_registry_user + +# PostgreSQL service configuration +postgresql_image: goharbor/harbor-db:v2.12.0 +postgresql_container_name: harbor-db +postgresql_data_volume: /data/database +postgresql_env_file: ./common/config/db/env +postgresql_shm_size: '1gb' +postgresql_host: postgresql +postgresql_port: 5432 +postgresql_username: postgres +postgresql_database: registry + +# Core service configuration +core_image: goharbor/harbor-core:v2.12.0 +core_container_name: harbor-core +core_env_file: ./common/config/core/env +core_ca_volume: /data/ca_download/ +core_data_volume: /data/ +core_certificates_volume: ./common/config/core/certificates/ +core_app_conf_path: ./common/config/core/app.conf +core_private_key_path: /data/secret/core/private_key.pem +core_secret_key_path: /data/secret/keys/secretkey + +# Portal service configuration +portal_image: goharbor/harbor-portal:v2.12.0 +portal_container_name: harbor-portal +portal_nginx_conf_path: ./common/config/portal/nginx.conf + +# Cache service configuration +redis_image: goharbor/redis-photon:v2.12.0 +redis_container_name: redis +redis_data_volume: /data/redis + +# Job service configuration +jobservice_image: goharbor/harbor-jobservice:v2.12.0 +jobservice_container_name: harbor-jobservice +jobservice_env_file: ./common/config/jobservice/env +jobservice_config_file: ./common/config/jobservice/config.yml +jobservice_trust_certificates: ./common/config/shared/trust-certificates +jobservice_log_driver: stdout + +# Proxy Service Configuration +proxy_image: goharbor/nginx-photon:v2.12.0 +proxy_container_name: nginx +proxy_restart_policy: always + +# Ali OSS Configuration +oss_accesskeyid: !vault | + $ANSIBLE_VAULT;1.1;AES256 + 36623836396330326132396463623864623134383661623162343235323764626665353432633932 + 3561336662643938386435643162633439666132353835650a326466363033316339653838653761 + 33643864626139643363343533653666303738383637653435346163323339666335323966396464 + 3135663763396238340a303062643539396430613834663563643862343734343230343965323735 + 66326138663430363431353461653364333734656366333635656535653239613235 +oss_accesskeysecret: !vault | + $ANSIBLE_VAULT;1.1;AES256 + 64616533326661396138656437653235376137333437646465633733376362626462623335646634 + 6333373431303235653531636638656261633031346236320a366666616333646261366539646665 + 32613833333762353336333534623561643631336538393933353635383662313339333734623436 + 6166626431633730390a303836323636343165363339343264656139343036306132653139363963 + 39626432336162636631326430393134653135303535353239366464376338616462 +oss_bucket: harbor-oss +oss_region: oss-cn-wulanchabu +oss_endpoint: harbor-oss.oss-cn-wulanchabu.aliyuncs.com + +# Sensitive variables +harbor_csrf_key: !vault | + $ANSIBLE_VAULT;1.1;AES256 + 63663337656331383635663037643036353832633639636165383030366561663130643731303934 + 3563313234626334646364343966616133306231623765620a306261353633316533396630353164 + 65633236336135303432666130346637393434616664306633316333333836363764613138366637 + 3338373365323666390a343463623862616636363733653031366237616238313031356434303439 + 31666266653836333230343766323966623862383630633662636633393234643131316565353437 + 3130333635373830393235373435383232396635346531623965 +core_secret: !vault | + $ANSIBLE_VAULT;1.1;AES256 + 35616638393966386331633338643332393336663530633239376430393735363430343031613137 + 6434633238313232323437366166633733376239646235380a383137363933326531363961356230 + 65326637653137646130663735363862343462383636326362353532633536366234643930336134 + 6234616561303965320a316230383863363861626534613038313132303862363731633530653938 + 31656439653338623437366363353035303666373734316666326563323531643362 +jobservice_secret: !vault | + $ANSIBLE_VAULT;1.1;AES256 + 38666535373439616132393061356266326361303631383663363638373933366464613061333433 + 6239663433336335323062303333393939313036373038340a316266663233343232626237623733 + 37363664663164646439633338333065333831333662393664303064376231646664306164316338 + 3831393630373033350a663031303333326531656166636436366431386633633832633466363836 + 34643163663935336539333865323830613531386331623663643432313531383861 +harbor_admin_password: !vault | + $ANSIBLE_VAULT;1.1;AES256 + 36336465346533313435383536386231353561663336326635323465313033383264633862333264 + 3037376630353534376565326437653730326130303636370a393737343635393335353233346137 + 64373532396339663065376534373534623732323762643634396630386430323766363334306663 + 3536636138343666330a323066393939333861656131623837626430666332363237616639323831 + 3532 +postgresql_password: !vault | + $ANSIBLE_VAULT;1.1;AES256 + 66626230333636656666346537343137303439613864616431343531333766336434313136626463 + 3238626161616635653566306162346232643735303236320a393231616534353130306264623231 + 39333032356632616462623736376161326464306433316234353665633136396332363866626336 + 3364333463313035390a353866663663643333393835613664643832613338356530353834633232 + 66346231346264396139333165633361326139383131363861623232646330326664 +registry_credential_password: !vault | + $ANSIBLE_VAULT;1.1;AES256 + 32633462323230656439313165616564373965636632646234396437363432653566653432636638 + 3664633136656437356331623330343463346536613361310a333962633365636335616236383230 + 38333837656637646633663330383132623837613063356331646264333437613132376130663764 + 3530626162323261620a333464316433383037306134386339633036623235376138663832366535 + 65613865323832326363393936376465363964363864616131393933343435623433356564373433 + 6266393231653930373138353332393538336262396238646266 diff --git a/roles/docker/harbor/tasks/main.yml b/roles/docker/harbor/tasks/main.yml new file mode 100644 index 0000000..fddafcb --- /dev/null +++ b/roles/docker/harbor/tasks/main.yml @@ -0,0 +1,37 @@ +--- +# 主任务:创建 Keycloak 服务,启动 Docker Compose 等 + +- name: 执行 pre-setup 操作 + include_tasks: "pre-setup.yml" + +- name: 渲染 Docker Compose Common 配置文件 + template: + src: "templates/{{ item }}" + dest: "/home/ubuntu/harbor/{{ item }}" + loop: + - common/config/portal/nginx.conf + - common/config/core/app.conf + - common/config/core/env + - common/config/jobservice/env + - common/config/jobservice/config.yml + - common/config/nginx/nginx.conf + - common/config/registry/root.crt + - common/config/registry/config.yml + - common/config/registry/passwd + - common/config/db/env + - common/config/log/logrotate.conf + - common/config/log/rsyslog_docker.conf + - common/config/registryctl/env + - common/config/registryctl/config.yml + +- name: 渲染 Docker Compose 配置文件 + template: + src: "templates/docker-compose.yml.j2" + dest: "/home/ubuntu/harbor/docker-compose.yml" + +- name: 启动 Docker Compose 服务 + become: true + command: docker-compose -f /home/ubuntu/harbor/docker-compose.yml up -d + +- name: 执行 post-setup 操作 + include_tasks: "post-setup.yml" diff --git a/roles/docker/harbor/tasks/post-setup.yml b/roles/docker/harbor/tasks/post-setup.yml new file mode 100644 index 0000000..05e30f3 --- /dev/null +++ b/roles/docker/harbor/tasks/post-setup.yml @@ -0,0 +1,19 @@ +--- +# post-setup.yml + +- name: 检查容器是否运行并输出状态 + command: docker ps -q -f name={{ item.name }} + register: container_status + loop: + - { name: "{{ core_container_name }}" } + - { name: "{{ proxy_container_name }}" } + - { name: "{{ redis_container_name }}" } + - { name: "{{ portal_container_name }}" } + - { name: "{{ registry_container_name }}" } + - { name: "{{ postgresql_container_name }}" } + - { name: "{{ jobservice_container_name }}" } + - { name: "{{ registryctl_container_name }}" } + changed_when: false + failed_when: container_status.stdout == "" + loop_control: + loop_var: item diff --git a/roles/docker/harbor/tasks/pre-setup.yml b/roles/docker/harbor/tasks/pre-setup.yml new file mode 100644 index 0000000..56211e8 --- /dev/null +++ b/roles/docker/harbor/tasks/pre-setup.yml @@ -0,0 +1,40 @@ +--- +- name: 安装 Docker 和 Docker Compose + apt: + name: + - docker.io + - docker-compose + state: present + update_cache: yes + +- name: 启动并启用 Docker 服务 + systemd: + name: docker + enabled: yes + state: started + +- name: 创建所需的目录结构 /etc/ssl + file: + path: "{{ item }}" + state: directory + mode: '0755' + with_items: + - /etc/ssl +- name: 创建所需的目录结构 common config + file: + path: "/home/ubuntu/harbor/{{ item }}" + state: directory + mode: '0755' + with_items: + - common/config/registryctl + - common/config/db + - common/config/jobservice + - common/config/registry + - common/config/portal + - common/config/core + - common/config/core/certificates + - common/config/log + - common/config/shared + - common/config/shared/trust-certificates + - common/config/nginx + - common/config/nginx/conf.d diff --git a/roles/docker/harbor/templates/common/config/core/app.conf b/roles/docker/harbor/templates/common/config/core/app.conf new file mode 100644 index 0000000..28351cd --- /dev/null +++ b/roles/docker/harbor/templates/common/config/core/app.conf @@ -0,0 +1,6 @@ +appname = Harbor +runmode = prod +enablegzip = true + +[prod] +httpport = 8080 diff --git a/roles/docker/harbor/templates/common/config/core/env b/roles/docker/harbor/templates/common/config/core/env new file mode 100644 index 0000000..f5a11e8 --- /dev/null +++ b/roles/docker/harbor/templates/common/config/core/env @@ -0,0 +1,47 @@ +CONFIG_PATH=/etc/core/app.conf +UAA_CA_ROOT=/etc/core/certificates/uaa_ca.pem +_REDIS_URL_CORE=redis://redis:6379?idle_timeout_seconds=30 +SYNC_QUOTA=true +_REDIS_URL_REG=redis://redis:6379/1?idle_timeout_seconds=30 + +LOG_LEVEL=info +EXT_ENDPOINT={{ ext_endpoint }} +DATABASE_TYPE=postgresql +POSTGRESQL_HOST={{ postgresql_host }} +POSTGRESQL_PORT={{ postgresql_port }} +POSTGRESQL_USERNAME={{ postgresql_username }} +POSTGRESQL_PASSWORD={{ postgresql_password }} +POSTGRESQL_DATABASE={{ postgresql_database }} +POSTGRESQL_SSLMODE=disable +POSTGRESQL_MAX_IDLE_CONNS=50 +POSTGRESQL_MAX_OPEN_CONNS=100 +POSTGRESQL_CONN_MAX_LIFETIME=5m +POSTGRESQL_CONN_MAX_IDLE_TIME=0 +REGISTRY_URL=http://registry:5000 +PORTAL_URL=http://portal:8080 +TOKEN_SERVICE_URL=http://core:8080/service/token +HARBOR_ADMIN_PASSWORD={{ harbor_admin_password }} +MAX_JOB_WORKERS=10 +CORE_SECRET={{ core_secret }} +JOBSERVICE_SECRET={{ jobservice_secret }} +WITH_TRIVY=False +CORE_URL=http://core:8080 +CORE_LOCAL_URL=http://127.0.0.1:8080 +JOBSERVICE_URL=http://jobservice:8080 +TRIVY_ADAPTER_URL=http://trivy-adapter:8080 +REGISTRY_STORAGE_PROVIDER_NAME=oss +READ_ONLY=false +RELOAD_KEY= +REGISTRY_CONTROLLER_URL=http://registryctl:8080 +REGISTRY_CREDENTIAL_USERNAME={{ registry_credential_username }} +REGISTRY_CREDENTIAL_PASSWORD={{ registry_credential_password }} +CSRF_KEY={{ harbor_csrf_key }} +ROBOT_SCANNER_NAME_PREFIX=oEBK0BPU +PERMITTED_REGISTRY_TYPES_FOR_PROXY_CACHE=docker-hub,harbor,azure-acr,ali-acr,aws-ecr,google-gcr,quay,docker-registry,github-ghcr,jfrog-artifactory + +HTTP_PROXY= +HTTPS_PROXY= +NO_PROXY= + +PORT=8080 +QUOTA_UPDATE_PROVIDER=db diff --git a/roles/docker/harbor/templates/common/config/db/env b/roles/docker/harbor/templates/common/config/db/env new file mode 100644 index 0000000..eb5d09e --- /dev/null +++ b/roles/docker/harbor/templates/common/config/db/env @@ -0,0 +1,2 @@ +POSTGRES_PASSWORD={{ postgresql_password }} + diff --git a/roles/docker/harbor/templates/common/config/jobservice/config.yml b/roles/docker/harbor/templates/common/config/jobservice/config.yml new file mode 100644 index 0000000..c09c577 --- /dev/null +++ b/roles/docker/harbor/templates/common/config/jobservice/config.yml @@ -0,0 +1,38 @@ +--- +#Protocol used to serve +protocol: "http" + +#Server listening port +port: 8080 + +#Worker pool +worker_pool: + #Worker concurrency + workers: 10 + backend: "redis" + #Additional config if use 'redis' backend + redis_pool: + #redis://[arbitrary_username:password@]ipaddress:port/database_index + redis_url: redis://redis:6379/2?idle_timeout_seconds=30 + namespace: "harbor_job_service_namespace" + idle_timeout_second: 3600 +#Loggers for the running job +job_loggers: + # The jobLoggers backend name, only support "STD_OUTPUT", "FILE" and/or "DB" + - name: "STD_OUTPUT" + level: "INFO" # INFO/DEBUG/WARNING/ERROR/FATAL + +#Loggers for the job service +loggers: + - name: "STD_OUTPUT" # Same with above + level: "INFO" + + +reaper: + # the max time to wait for a task to finish, if unfinished after max_update_hours, the task will be mark as error, but the task will continue to run, default value is 24, + max_update_hours: 24 + # the max time for execution in running state without new task created + max_dangling_hours: 168 + +# the max size of job log returned by API, default is 10M +max_retrieve_size_mb: 10 \ No newline at end of file diff --git a/roles/docker/harbor/templates/common/config/jobservice/env b/roles/docker/harbor/templates/common/config/jobservice/env new file mode 100644 index 0000000..712ad38 --- /dev/null +++ b/roles/docker/harbor/templates/common/config/jobservice/env @@ -0,0 +1,13 @@ +CORE_SECRET={{ core_secret }} +REGISTRY_URL=http://registry:5000 +JOBSERVICE_SECRET={{ jobservice_secret }} +CORE_URL=http://core:8080 +REGISTRY_CONTROLLER_URL=http://registryctl:8080 +JOBSERVICE_WEBHOOK_JOB_MAX_RETRY=3 +JOBSERVICE_WEBHOOK_JOB_HTTP_CLIENT_TIMEOUT=3 + +HTTP_PROXY={{ http_proxy | default('') }} +HTTPS_PROXY={{ https_proxy | default('') }} +NO_PROXY={{ no_proxy | default('') }} +REGISTRY_CREDENTIAL_USERNAME={{ registry_credential_username }} +REGISTRY_CREDENTIAL_PASSWORD={{ registry_credential_password }} diff --git a/roles/docker/harbor/templates/common/config/log/logrotate.conf b/roles/docker/harbor/templates/common/config/log/logrotate.conf new file mode 100644 index 0000000..97f5f93 --- /dev/null +++ b/roles/docker/harbor/templates/common/config/log/logrotate.conf @@ -0,0 +1,8 @@ +/var/log/docker/*.log { + rotate 50 + size 200M + copytruncate + compress + missingok + nodateext +} \ No newline at end of file diff --git a/roles/docker/harbor/templates/common/config/log/rsyslog_docker.conf b/roles/docker/harbor/templates/common/config/log/rsyslog_docker.conf new file mode 100644 index 0000000..0be27a6 --- /dev/null +++ b/roles/docker/harbor/templates/common/config/log/rsyslog_docker.conf @@ -0,0 +1,7 @@ +# Rsyslog configuration file for docker. + +template(name="DynaFile" type="string" string="/var/log/docker/%programname%.log") + +if $programname != "rsyslogd" then { + action(type="omfile" dynaFile="DynaFile") +} \ No newline at end of file diff --git a/roles/docker/harbor/templates/common/config/nginx/nginx.conf b/roles/docker/harbor/templates/common/config/nginx/nginx.conf new file mode 100644 index 0000000..77dd45c --- /dev/null +++ b/roles/docker/harbor/templates/common/config/nginx/nginx.conf @@ -0,0 +1,149 @@ +worker_processes auto; +pid /tmp/nginx.pid; + +events { + worker_connections 3096; + use epoll; + multi_accept on; +} + +http { + client_body_temp_path /tmp/client_body_temp; + proxy_temp_path /tmp/proxy_temp; + fastcgi_temp_path /tmp/fastcgi_temp; + uwsgi_temp_path /tmp/uwsgi_temp; + scgi_temp_path /tmp/scgi_temp; + tcp_nodelay on; + include /etc/nginx/conf.d/*.upstream.conf; + + # this is necessary for us to be able to disable request buffering in all cases + proxy_http_version 1.1; + + upstream core { + server core:8080; + } + + upstream portal { + server portal:8080; + } + + log_format timed_combined '$remote_addr - ' + '"$request" $status $body_bytes_sent ' + '"$http_referer" "$http_user_agent" ' + '$request_time $upstream_response_time $pipe'; + + access_log /dev/stdout timed_combined; + + map $http_x_forwarded_proto $x_forwarded_proto { + default $http_x_forwarded_proto; + "" $scheme; + } + + include /etc/nginx/conf.d/*.server.conf; + + server { + listen 8443 ssl; +# server_name harbordomain.com; + server_tokens off; + # SSL + ssl_certificate /etc/cert/server.crt; + ssl_certificate_key /etc/cert/server.key; + + # Recommendations from https://raymii.org/s/tutorials/Strong_SSL_Security_On_nginx.html + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers '!aNULL:kECDH+AESGCM:ECDH+AESGCM:RSA+AESGCM:kECDH+AES:ECDH+AES:RSA+AES:'; + ssl_prefer_server_ciphers on; + ssl_session_cache shared:SSL:10m; + + # disable any limits to avoid HTTP 413 for large image uploads + client_max_body_size 0; + + # required to avoid HTTP 411: see Issue #1486 (https://github.com/docker/docker/issues/1486) + chunked_transfer_encoding on; + + # Add extra headers + add_header Strict-Transport-Security "max-age=31536000; includeSubdomains; preload"; + add_header X-Frame-Options DENY; + add_header Content-Security-Policy "frame-ancestors 'none'"; + + # customized location config file can place to /etc/nginx dir with prefix harbor.https. and suffix .conf + include /etc/nginx/conf.d/harbor.https.*.conf; + + location / { + proxy_pass http://portal/; + proxy_set_header Host $http_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $x_forwarded_proto; + + proxy_cookie_path / "/; HttpOnly; Secure"; + + proxy_buffering off; + proxy_request_buffering off; + } + + location /c/ { + proxy_pass http://core/c/; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $x_forwarded_proto; + + proxy_cookie_path / "/; Secure"; + + proxy_buffering off; + proxy_request_buffering off; + } + + location /api/ { + proxy_pass http://core/api/; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $x_forwarded_proto; + + proxy_cookie_path / "/; Secure"; + + proxy_buffering off; + proxy_request_buffering off; + } + + location /v1/ { + return 404; + } + + location /v2/ { + proxy_pass http://core/v2/; + proxy_set_header Host $http_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $x_forwarded_proto; + proxy_buffering off; + proxy_request_buffering off; + proxy_send_timeout 900; + proxy_read_timeout 900; + } + + location /service/ { + proxy_pass http://core/service/; + proxy_set_header Host $http_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $x_forwarded_proto; + + proxy_cookie_path / "/; Secure"; + + proxy_buffering off; + proxy_request_buffering off; + } + + location /service/notifications { + return 404; + } + } + server { + listen 8080; + #server_name harbordomain.com; + return 308 https://$host:443$request_uri; + } +} \ No newline at end of file diff --git a/roles/docker/harbor/templates/common/config/portal/nginx.conf b/roles/docker/harbor/templates/common/config/portal/nginx.conf new file mode 100644 index 0000000..3058d65 --- /dev/null +++ b/roles/docker/harbor/templates/common/config/portal/nginx.conf @@ -0,0 +1,42 @@ + +worker_processes auto; +pid /tmp/nginx.pid; + +events { + worker_connections 1024; +} + +http { + + client_body_temp_path /tmp/client_body_temp; + proxy_temp_path /tmp/proxy_temp; + fastcgi_temp_path /tmp/fastcgi_temp; + uwsgi_temp_path /tmp/uwsgi_temp; + scgi_temp_path /tmp/scgi_temp; + + server { + listen 8080; + server_name localhost; + + root /usr/share/nginx/html; + index index.html index.htm; + include /etc/nginx/mime.types; + + gzip on; + gzip_min_length 1000; + gzip_proxied expired no-cache no-store private auth; + gzip_types text/plain text/css application/json application/javascript application/x-javascript text/xml application/xml application/xml+rss text/javascript; + + location /devcenter-api-2.0 { + try_files $uri $uri/ /swagger-ui-index.html; + } + + location / { + try_files $uri $uri/ /index.html; + } + + location = /index.html { + add_header Cache-Control "no-store, no-cache, must-revalidate"; + } + } +} \ No newline at end of file diff --git a/roles/docker/harbor/templates/common/config/registry/config.yml b/roles/docker/harbor/templates/common/config/registry/config.yml new file mode 100644 index 0000000..4d68ee7 --- /dev/null +++ b/roles/docker/harbor/templates/common/config/registry/config.yml @@ -0,0 +1,49 @@ +version: 0.1 +log: + level: info + fields: + service: registry +storage: + cache: + layerinfo: redis + oss: + accesskeyid: {{ oss_accesskeyid }} + accesskeysecret: {{ oss_accesskeysecret }} + bucket: {{ oss_bucket }} + region: {{ oss_region }} + endpoint: {{ oss_endpoint }} + internal: False + encrypt: False + secure: true + chunksize: 5242880 + rootdirectory: /docker + maintenance: + uploadpurging: + enabled: false + delete: + enabled: true +redis: + addr: redis:6379 + readtimeout: 10s + writetimeout: 10s + dialtimeout: 10s + password: + db: 1 + pool: + maxidle: 100 + maxactive: 500 + idletimeout: 60s +http: + addr: :5000 + secret: placeholder + debug: + addr: localhost:5001 +auth: + htpasswd: + realm: harbor-registry-basic-realm + path: /etc/registry/passwd +validation: + disabled: true +compatibility: + schema1: + enabled: true diff --git a/roles/docker/harbor/templates/common/config/registry/passwd b/roles/docker/harbor/templates/common/config/registry/passwd new file mode 100644 index 0000000..495c4a1 --- /dev/null +++ b/roles/docker/harbor/templates/common/config/registry/passwd @@ -0,0 +1 @@ +harbor_registry_user:$2y$05$GQPl7njy.t26N5rFZxqDu.CcXQi4eaYqC5heBhtzqz4x6bVfI7.Rq diff --git a/roles/docker/harbor/templates/common/config/registry/root.crt b/roles/docker/harbor/templates/common/config/registry/root.crt new file mode 100755 index 0000000..e69de29 diff --git a/roles/docker/harbor/templates/common/config/registryctl/config.yml b/roles/docker/harbor/templates/common/config/registryctl/config.yml new file mode 100644 index 0000000..2c70735 --- /dev/null +++ b/roles/docker/harbor/templates/common/config/registryctl/config.yml @@ -0,0 +1,5 @@ +--- +protocol: "http" +port: 8080 +log_level: info +registry_config: "/etc/registry/config.yml" \ No newline at end of file diff --git a/roles/docker/harbor/templates/common/config/registryctl/env b/roles/docker/harbor/templates/common/config/registryctl/env new file mode 100644 index 0000000..a7b5b24 --- /dev/null +++ b/roles/docker/harbor/templates/common/config/registryctl/env @@ -0,0 +1,2 @@ +CORE_SECRET={{ core_secret }} +JOBSERVICE_SECRET={{ jobservice_secret }} diff --git a/roles/docker/harbor/templates/docker-compose.yml.j2 b/roles/docker/harbor/templates/docker-compose.yml.j2 new file mode 100644 index 0000000..90eb501 --- /dev/null +++ b/roles/docker/harbor/templates/docker-compose.yml.j2 @@ -0,0 +1,195 @@ +version: '3' +services: + registry: + image: {{ registry_image }} + container_name: {{ registry_container_name }} + restart: always + cap_drop: + - ALL + cap_add: + - CHOWN + - SETGID + - SETUID + volumes: + - {{ registry_volume_storage }}:/storage:z + - {{ registry_config_volume }}:/etc/registry/:z + - type: bind + source: {{ registry_cert_path }} + target: /etc/registry/root.crt + - type: bind + source: {{ shared_trust_certificates }} + target: /harbor_cust_cert + networks: + - harbor + depends_on: + - postgresql # 移除了 log 依赖 + + registryctl: + image: {{ registryctl_image }} + container_name: {{ registryctl_container_name }} + env_file: + - {{ registryctl_env_file }} + restart: always + cap_drop: + - ALL + cap_add: + - CHOWN + - SETGID + - SETUID + volumes: + - {{ registryctl_volume_storage }}:/storage:z + - {{ registryctl_config_volume }}:/etc/registry/:z + - type: bind + source: {{ registryctl_config_file }} + target: /etc/registryctl/config.yml + - type: bind + source: {{ shared_trust_certificates }} + target: /harbor_cust_cert + networks: + - harbor + depends_on: + - registry # 移除了 log 依赖 + + postgresql: + image: {{ postgresql_image }} + container_name: {{ postgresql_container_name }} + restart: always + cap_drop: + - ALL + cap_add: + - CHOWN + - DAC_OVERRIDE + - SETGID + - SETUID + volumes: + - {{ postgresql_data_volume }}:/var/lib/postgresql/data:z + networks: + - harbor + env_file: + - {{ postgresql_env_file }} + shm_size: '{{ postgresql_shm_size }}' + + core: + image: {{ core_image }} + container_name: {{ core_container_name }} + env_file: + - {{ core_env_file }} + restart: always + cap_drop: + - ALL + cap_add: + - SETGID + - SETUID + volumes: + - {{ core_ca_volume }}:/etc/core/ca/:z + - {{ core_data_volume }}:/data/:z + - {{ core_certificates_volume }}:/etc/core/certificates/:z + - type: bind + source: {{ core_app_conf_path }} + target: /etc/core/app.conf + - type: bind + source: {{ core_private_key_path }} + target: /etc/core/private_key.pem + - type: bind + source: {{ core_secret_key_path }} + target: /etc/core/key + - type: bind + source: {{ shared_trust_certificates }} + target: /harbor_cust_cert + networks: + - harbor + depends_on: + - registry + - redis + - postgresql # 移除了 log 依赖 + + portal: + image: {{ portal_image }} + container_name: {{ portal_container_name }} + restart: always + cap_drop: + - ALL + cap_add: + - CHOWN + - SETGID + - SETUID + - NET_BIND_SERVICE + volumes: + - type: bind + source: {{ portal_nginx_conf_path }} + target: /etc/nginx/nginx.conf + networks: + - harbor + depends_on: + - core # 移除了 log 依赖 + + jobservice: + image: "{{ jobservice_image }}" + container_name: "{{ jobservice_container_name }}" + env_file: + - "{{ jobservice_env_file }}" + restart: always + cap_drop: + - ALL + cap_add: + - CHOWN + - SETGID + - SETUID + volumes: + - /data/job_logs:/var/log/jobs:z + - type: bind + source: "{{ jobservice_config_file }}" + target: /etc/jobservice/config.yml + - type: bind + source: "{{ jobservice_trust_certificates }}" + target: /harbor_cust_cert + networks: + - harbor + depends_on: + - core + + redis: + image: {{ redis_image }} + container_name: {{ redis_container_name }} + restart: always + cap_drop: + - ALL + cap_add: + - CHOWN + - SETGID + - SETUID + volumes: + - {{ redis_data_volume }}:/var/lib/redis + networks: + - harbor + + proxy: + image: {{ proxy_image }} + container_name: {{ proxy_container_name }} + restart: always + cap_drop: + - ALL + cap_add: + - CHOWN + - SETGID + - SETUID + - NET_BIND_SERVICE + volumes: + - ./common/config/nginx:/etc/nginx:z + - /data/secret/cert:/etc/cert:z + - type: bind + source: ./common/config/shared/trust-certificates + target: /harbor_cust_cert + ports: + - 80:8080 + - 443:8443 + networks: + - harbor + depends_on: + - registry + - core + - portal + +networks: + harbor: + external: false diff --git a/roles/docker/kafka/README.md b/roles/docker/kafka/README.md new file mode 100644 index 0000000..4f1f90f --- /dev/null +++ b/roles/docker/kafka/README.md @@ -0,0 +1,3 @@ +# kafka (docker) + +Placeholder role for docker-compose style deployment of kafka. diff --git a/roles/docker/kafka/tasks/main.yml b/roles/docker/kafka/tasks/main.yml new file mode 100644 index 0000000..c4f5488 --- /dev/null +++ b/roles/docker/kafka/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement docker deployment tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement docker deployment tasks." diff --git a/roles/docker/keycloak/README.md b/roles/docker/keycloak/README.md new file mode 100644 index 0000000..42bdfbe --- /dev/null +++ b/roles/docker/keycloak/README.md @@ -0,0 +1,113 @@ +## Docker 镜像版本 + +| 服务 | 镜像版本 | +|-------------|---------------------------------| +| Keycloak | `bitnami/keycloak:26.0` | +| PostgreSQL | `postgres:16.0-bookworm` | +| Nginx | `nginx:1.27` | + +# 目录结构 +```ii +```bash +playbooks/roles/docker/keycloak +├── defaults/ # 存放默认变量的目录 +│ └── main.yml # 默认配置变量 +├── files/ # 存放静态文件的目录 +│ └── nginx.conf # Nginx 配置文件 +├── tasks/ # 存放任务脚本的目录 +│ ├── main.yml # 主要任务脚本 +│ ├── post-setup.yml # 部署后设置任务 +│ ├── pre-setup.yml # 部署前设置任务 +├── templates/ # 存放模板文件的目录 +│ ├── create_keystore.sh.j2 # 创建 Keystore 和 Truststore 的脚本模板 +│ └── docker-compose.yml.j2 # Docker Compose 配置文件模板 +└── README.md # 项目说明文件 +```bash +```````` +使用 Ansible 部署前的准备 +在运行 Playbook 之前,请确保以下准备工作已完成: + +1. 主机准备 +操作系统要求:本 Playbook 适用于 Ubuntu 20.04 及以上版本的主机。 + +主机要求:确保主机上已安装 Docker、Docker Compose 和 Ansible。你可以通过以下命令安装这些工具: + +bash +复制代码 +# 安装 Docker 和 Docker Compose +sudo apt-get update +sudo apt-get install docker.io docker-compose +主机名称:确保主机名称已正确设置,并且该主机可以访问 DNS 配置的域名。 + +2. 域名和 SSL 证书 +域名:确保你已经为 Keycloak 设置了域名(例如 keycloak.onwalk.net)。在实际部署前,你需要准备一个有效的域名和 SSL 证书。可以使用 Let’s Encrypt 或其他证书颁发机构获取证书。 + +证书文件:准备好 SSL 证书(如 onwalk.net.pem)和 SSL 密钥文件(如 onwalk.net.key)。这两个文件将用于配置 Keycloak 和 Nginx 服务的 HTTPS 访问。 + +证书路径应为 /etc/ssl/onwalk.net.pem,密钥路径应为 /etc/ssl/onwalk.net.key。 + +3. Ansible 配置文件(如果需要) +根据需要,你可以创建一个 inventory.ini 文件来指定部署目标主机: + +ini +复制代码 +[servers] +your_server_ip_or_hostname ansible_ssh_user=your_user ansible_ssh_private_key_file=your_key + +# Ansible Playbook 执行和部署 + + +1. 克隆仓库 +首先,克隆该仓库到你的本地机器: + +bash +复制代码 +git clone https://your_repository_url.git +cd ansible-playbook + +2. 测试执行 +ansible-playbook -i inventory.ini playbooks/deploy-docker-keycloak.yml -l cn-gateway.svc.plus -D -C + +2. 执行部署 +执行部署任务时,使用以下命令来运行 Ansible Playbook: + +ansible-playbook -i inventory.ini playbooks/deploy-docker-keycloak.yml -l cn-gateway.svc.plus -D + +此命令将会启动以下步骤: + +- 安装并配置 Docker 和 Docker Compose。 +- 创建所需的 Keystore 和 Truststore 文件。 +- 启动 Keycloak 和 PostgreSQL 容器,Nginx 容器 + +3. 验证部署 +部署完成后,你可以通过以下命令检查 Keycloak 和 PostgreSQL 服务是否正常运行: + +bash +复制代码 +docker ps -q -f name=postgres +docker ps -q -f name=keycloak +docker ps -q -f name=nginx + +如果服务正常运行,则会显示容器的 ID。 + +部署后的配置 +1. DNS 配置 +确保你的域名(如 keycloak.onwalk.net)已正确解析,并且指向部署 Keycloak 的主机。你可以使用 nslookup 或 dig 工具验证 DNS 解析: + +bash +复制代码 +nslookup keycloak.onwalk.net +2. Keycloak 领域设置 +部署后,你需要在 Keycloak 管理控制台进行以下配置: + +创建新的 Realm(领域):登录到 Keycloak 管理界面,使用你在 .env 文件中设置的 KEYCLOAK_ADMIN 和 KEYCLOAK_ADMIN_PASSWORD 登录。然后创建一个新的 Realm(例如 onwalk)。 +配置客户端和身份提供者:根据需要,创建新的客户端并配置身份验证提供者(如 OAuth、SAML 等)。 +3. 前端配置 +前端应用(如使用 Keycloak 作为身份验证提供者的应用)需要配置与 Keycloak 的集成: + +在前端应用中设置正确的 Keycloak URL,例如 https://keycloak.onwalk.net。 +配置正确的 realm 和 client ID,以便与 Keycloak 实现 SSO(单点登录)。 + +如果遇到任何问题,请检查 Nginx 和 Keycloak 的日志,确保它们正确启动并按预期运行。 +通过使用 Ansible Playbook,你可以快速部署和配置一个完整的 Keycloak 环境。部署完成后,确保 DNS 解析、Keycloak 领域设置以及 HTTPS 配置正确,以便你的应用能够通过安全的 SSL/TLS 通道与 Keycloak 进行交互。 + diff --git a/roles/docker/keycloak/defaults/main.yml b/roles/docker/keycloak/defaults/main.yml new file mode 100644 index 0000000..ae526e2 --- /dev/null +++ b/roles/docker/keycloak/defaults/main.yml @@ -0,0 +1,36 @@ +--- +postgres_name: postgres +postgres_image: postgres:16.0-bookworm +postgres_db: keycloak +postgres_user: keycloak_user +postgres_password: keycloak_password + +keycloak_name: keycloak +keycloak_domain: keycloak.onwalk.net +keycloak_image: bitnami/keycloak:latest +keycloak_admin: admin +keycloak_admin_password: admin_password + +nginx_name: nginx +nginx_image: nginx:latest + +key_alias: mykey +key_password: your_key_password +keycloak_key_store: + file: /etc/ssl/keystore.jks + password: your_key_password +keycloak_trust_store: + file: /etc/ssl/truststore.jks + password: your_trust_password + +# SSL证书路径和文件名 +ca_certificate: + file: /etc/ssl/onwalk.net.pem +ssl_certificate: + path: /etc/ssl/ + name: onwalk.net.pem +ssl_certificate_key: + path: /etc/ssl/ + name: onwalk.net.key + +dhparam_path: /etc/ssl/dhparam.pem diff --git a/roles/docker/keycloak/files/create_keystore.sh b/roles/docker/keycloak/files/create_keystore.sh new file mode 100644 index 0000000..94d0302 --- /dev/null +++ b/roles/docker/keycloak/files/create_keystore.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +# 定义非空检查函数 +check_non_empty() { + if [ -z "$1" ]; then + echo "ERROR: $2 is not set." + exit 1 + fi +} + +# 使用非空检查函数检查所有变量 +check_non_empty "$KEYSTORE_FILE" "KEYSTORE_FILE" +check_non_empty "$TRUSTSTORE_FILE" "TRUSTSTORE_FILE" +check_non_empty "$KEYSTORE_PASSWORD" "KEYSTORE_PASSWORD" +check_non_empty "$TRUSTSTORE_PASSWORD" "TRUSTSTORE_PASSWORD" +check_non_empty "$KEY_ALIAS" "KEY_ALIAS" +check_non_empty "$KEY_PASSWORD" "KEY_PASSWORD" +check_non_empty "$ROOT_CA_CERT" "ROOT_CA_CERT" + +# 1. 创建 Keystore (包括私钥) +echo "Creating keystore..." +keytool -genkeypair -v -keystore "$KEYSTORE_FILE" -keyalg RSA -keysize 2048 -validity 365 -alias "$KEY_ALIAS" -storepass "$KEYSTORE_PASSWORD" -keypass "$KEY_PASSWORD" -dname "CN=localhost, OU=Dev, O=MyCompany, L=City, ST=State, C=US" -noprompt + +# 2. 创建 Truststore 并导入根证书 +echo "Creating truststore and importing root CA certificate..." +keytool -import -file "$ROOT_CA_CERT" -keystore "$TRUSTSTORE_FILE" -alias root-ca -storepass "$TRUSTSTORE_PASSWORD" -noprompt + +echo "Keystore and truststore have been created and configured successfully." + +# 创建 Diffie-Hellman 参数 +#echo "Generating Diffie-Hellman parameters..." +#openssl dhparam -out /etc/ssl/dhparam.pem 2048 diff --git a/roles/docker/keycloak/tasks/main.yml b/roles/docker/keycloak/tasks/main.yml new file mode 100644 index 0000000..db6fb0e --- /dev/null +++ b/roles/docker/keycloak/tasks/main.yml @@ -0,0 +1,33 @@ +--- +# 主任务:创建 Keycloak 服务,启动 Docker Compose 等 + +- name: 执行 pre-setup 操作 + include_tasks: "pre-setup.yml" + +- name: 执行 create_keystore.sh 脚本 + script: files/create_keystore.sh + environment: + KEYSTORE_FILE: "{{ keycloak_key_store.file }}" + TRUSTSTORE_FILE: "{{ keycloak_trust_store.file }}" + KEYSTORE_PASSWORD: "{{ keycloak_key_store.password }}" + TRUSTSTORE_PASSWORD: "{{ keycloak_trust_store.password }}" + KEY_ALIAS: "{{ key_alias }}" + KEY_PASSWORD: "{{ key_password }}" + ROOT_CA_CERT: "{{ ca_certificate.file }}" + +- name: 渲染 nginx 配置文件 + template: + src: "templates/nginx.conf.j2" + dest: "/tmp/nginx.conf" + +- name: 渲染 Docker Compose 配置文件 + template: + src: "templates/docker-compose.yml.j2" + dest: "/tmp/docker-compose.yml" + +- name: 启动 Docker Compose 服务 + become: true + command: docker-compose -f /tmp/docker-compose.yml up -d + +- name: 执行 post-setup 操作 + include_tasks: "post-setup.yml" diff --git a/roles/docker/keycloak/tasks/post-setup.yml b/roles/docker/keycloak/tasks/post-setup.yml new file mode 100644 index 0000000..9e3d888 --- /dev/null +++ b/roles/docker/keycloak/tasks/post-setup.yml @@ -0,0 +1,31 @@ +--- +# post-setup.yml + +- name: 检查容器是否运行并输出状态 + command: docker ps -q -f name={{ item.name }} + register: container_status + loop: + - { name: "{{ keycloak_name }}" } + - { name: "{{ postgres_name }}" } + - { name: "{{ nginx_name }}" } + changed_when: false + failed_when: container_status.stdout == "" + loop_control: + loop_var: item + +- name: 显示容器状态 + debug: + msg: | + Docker Compose 容器状态: + {% for item in [keycloak_name, postgres_name, nginx_name] %} + - {{ item }}: {% if container_status.results | selectattr('item.name', 'equalto', item) | map(attribute='stdout') | list | first %} 运行中 {% else %} 未运行 {% endif %} + {% endfor %} + +- name: 清理临时文件 + file: + path: "/tmp/{{ item }}" + state: absent + with_items: + - docker-compose.yml + - nginx.conf + ignore_errors: yes diff --git a/roles/docker/keycloak/tasks/pre-setup.yml b/roles/docker/keycloak/tasks/pre-setup.yml new file mode 100644 index 0000000..8c93817 --- /dev/null +++ b/roles/docker/keycloak/tasks/pre-setup.yml @@ -0,0 +1,22 @@ +--- +- name: 安装 Docker 和 Docker Compose + apt: + name: + - docker.io + - docker-compose + state: present + update_cache: yes + +- name: 启动并启用 Docker 服务 + systemd: + name: docker + enabled: yes + state: started + +- name: 创建所需的目录结构 + file: + path: "{{ item }}" + state: directory + mode: '0755' + with_items: + - /etc/ssl diff --git a/roles/docker/keycloak/templates/docker-compose.yml.j2 b/roles/docker/keycloak/templates/docker-compose.yml.j2 new file mode 100644 index 0000000..0e9d752 --- /dev/null +++ b/roles/docker/keycloak/templates/docker-compose.yml.j2 @@ -0,0 +1,67 @@ +version: '3.7' + +services: + postgres: + container_name: {{ postgres_name }} # 设置容器名称 + image: {{ postgres_image }} + environment: + POSTGRES_DB: {{ postgres_db }} + POSTGRES_USER: {{ postgres_user }} + POSTGRES_PASSWORD: {{ postgres_password }} + volumes: + - postgres_data:/var/lib/postgresql/data + networks: + - keycloak_network + + keycloak: + image: {{ keycloak_image }} + container_name: {{ keycloak_name }} # 设置容器名称 + environment: + KEYCLOAK_PROXY_HEADERS: xforwarded + KEYCLOAK_ADMIN: {{ keycloak_admin }} + KEYCLOAK_ADMIN_PASSWORD: {{ keycloak_admin_password }} + KEYCLOAK_DATABASE_VENDOR: postgresql + KEYCLOAK_DATABASE_HOST: {{ postgres_name }} + KEYCLOAK_DATABASE_PORT: 5432 + KEYCLOAK_DATABASE_USER: {{ postgres_user }} + KEYCLOAK_DATABASE_NAME: {{ postgres_db }} + KEYCLOAK_DATABASE_PASSWORD: {{ postgres_password }} + KEYCLOAK_HTTPS_KEY_STORE_FILE: /etc/ssl/keystore.jks + KEYCLOAK_HTTPS_KEY_STORE_PASSWORD: {{ keycloak_key_store.password }} # 私钥保护密码 + KEYCLOAK_HTTPS_TRUST_STORE_FILE: /etc/ssl/truststore.jks + KEYCLOAK_HTTPS_TRUST_STORE_PASSWORD: {{ keycloak_trust_store.password }} # 证书信任库保护密码 + ports: + - 8080:8080 + volumes: + - {{ keycloak_key_store.file }}:/etc/ssl/keystore.jks + - {{ keycloak_trust_store.file }}:/etc/ssl/truststore.jks + restart: always + depends_on: + - postgres + networks: + - keycloak_network + + nginx: + image: {{ nginx_image }} + container_name: {{ nginx_name }} # 设置容器名称 + depends_on: + - {{ keycloak_name }} + ports: + - "80:80" + - "443:443" + volumes: + - {{ ssl_certificate.path }}{{ ssl_certificate.name }}:/etc/ssl/certs/{{ ssl_certificate.name }} + - {{ ssl_certificate_key.path }}{{ ssl_certificate_key.name }}:/etc/ssl/certs/{{ ssl_certificate_key.name }} + - {{ dhparam_path }}:/etc/nginx/ssl/dhparam.pem + - ./nginx.conf:/etc/nginx/nginx.conf + restart: unless-stopped + networks: + - keycloak_network + +volumes: + postgres_data: + driver: local + +networks: + keycloak_network: + driver: bridge diff --git a/roles/docker/keycloak/templates/nginx.conf.j2 b/roles/docker/keycloak/templates/nginx.conf.j2 new file mode 100644 index 0000000..f58cf42 --- /dev/null +++ b/roles/docker/keycloak/templates/nginx.conf.j2 @@ -0,0 +1,70 @@ +# global settings +worker_processes auto; + +events { + worker_connections 1024; +} + +http { + # 全局配置 + include /etc/nginx/mime.types; + default_type application/octet-stream; + + # 配置日志 + access_log /dev/stdout; + error_log /dev/stderr; + + # 设置 Content-Security-Policy 头部,允许 keycloak_domain 被嵌入 + add_header Content-Security-Policy "frame-src 'self' https://{{ keycloak_domain }};"; + + # 配置反向代理缓冲区 + client_body_buffer_size 16k; # 客户端请求体缓冲区大小 + proxy_buffer_size 128k; # 设置反向代理请求的单个缓冲区大小 + proxy_buffers 4 256k; # 设置反向代理请求的缓冲区大小(4个 256KB) + proxy_max_temp_file_size 512m; # 设置最大临时文件大小 + + # 设置真实 IP 的来源 + set_real_ip_from 0.0.0.0/0; + real_ip_header X-Real-IP; + real_ip_recursive on; + + # server 配置 + server { + listen 80; + server_name {{ keycloak_domain }}; + + # 强制 HTTP 请求重定向到 HTTPS + return 301 https://$host$request_uri; + } + + server { + listen 443 ssl; + server_name {{ keycloak_domain }}; + + # SSL 配置 + ssl_certificate /etc/ssl/certs/{{ ssl_certificate.name }}; + ssl_certificate_key /etc/ssl/certs/{{ ssl_certificate_key.name }}; + + # 日志设置 + access_log /dev/stdout; + error_log /dev/stderr; + + # 配置反向代理 + location / { + proxy_pass http://{{ keycloak_name }}:8080; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-Port 443; + proxy_set_header Cookie $http_cookie; + proxy_redirect off; + } + + # SSL 强化 + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers 'ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES128-GCM-SHA256'; + ssl_prefer_server_ciphers off; + } +} + diff --git a/roles/docker/loki/README.md b/roles/docker/loki/README.md new file mode 100644 index 0000000..8b1cc00 --- /dev/null +++ b/roles/docker/loki/README.md @@ -0,0 +1,3 @@ +# loki (docker) + +Placeholder role for docker-compose style deployment of loki. diff --git a/roles/docker/loki/tasks/main.yml b/roles/docker/loki/tasks/main.yml new file mode 100644 index 0000000..c4f5488 --- /dev/null +++ b/roles/docker/loki/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement docker deployment tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement docker deployment tasks." diff --git a/roles/docker/minio/README.md b/roles/docker/minio/README.md new file mode 100644 index 0000000..05ec370 --- /dev/null +++ b/roles/docker/minio/README.md @@ -0,0 +1,3 @@ +# minio (docker) + +Placeholder role for docker-compose style deployment of minio. diff --git a/roles/docker/minio/tasks/main.yml b/roles/docker/minio/tasks/main.yml new file mode 100644 index 0000000..c4f5488 --- /dev/null +++ b/roles/docker/minio/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement docker deployment tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement docker deployment tasks." diff --git a/roles/docker/mlflow/README.md b/roles/docker/mlflow/README.md new file mode 100644 index 0000000..b4cd5b0 --- /dev/null +++ b/roles/docker/mlflow/README.md @@ -0,0 +1,3 @@ +# mlflow (docker) + +Placeholder role for docker-compose style deployment of mlflow. diff --git a/roles/docker/mlflow/tasks/main.yml b/roles/docker/mlflow/tasks/main.yml new file mode 100644 index 0000000..c4f5488 --- /dev/null +++ b/roles/docker/mlflow/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement docker deployment tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement docker deployment tasks." diff --git a/roles/docker/neurapress/defaults/main.yml b/roles/docker/neurapress/defaults/main.yml new file mode 100644 index 0000000..51a5116 --- /dev/null +++ b/roles/docker/neurapress/defaults/main.yml @@ -0,0 +1,7 @@ +--- +# Default deployment directory for Neurapress Docker stack +neurapress_deploy_dir: /opt/neurapress +neurapress_workspace: "{{ neurapress_deploy_dir }}" +neurapress_domain: write.svc.plus +neurapress_image: neurapress:prod +neurapress_certbot_email: manbuzhe2009@qq.com diff --git a/roles/docker/neurapress/files/nginx/nginx.conf b/roles/docker/neurapress/files/nginx/nginx.conf new file mode 100644 index 0000000..a228fd2 --- /dev/null +++ b/roles/docker/neurapress/files/nginx/nginx.conf @@ -0,0 +1,51 @@ +user nginx; +worker_processes auto; + +# Logs → container stdout / stderr +error_log /dev/stderr warn; +pid /var/run/nginx.pid; + +events { + worker_connections 1024; +} + +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + + # Access log → stdout + log_format main + '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent ' + '"$http_referer" "$http_user_agent"'; + + access_log /dev/stdout main; + + # Core performance (safe defaults) + sendfile on; + tcp_nodelay on; + keepalive_timeout 65; + server_tokens off; + + # TLS session cache (in-memory only) + ssl_session_cache shared:SSL:10m; + ssl_session_timeout 10m; + + # Gzip (lightweight) + gzip on; + gzip_comp_level 6; + gzip_min_length 256; + gzip_types + text/plain + text/css + application/json + application/javascript + application/xml + image/svg+xml; + + # Allow uploads (Markdown / images) + client_max_body_size 50m; + + # Load virtual hosts + include /etc/nginx/conf.d/*.conf; +} diff --git a/roles/docker/neurapress/files/run.sh b/roles/docker/neurapress/files/run.sh new file mode 100644 index 0000000..d915823 --- /dev/null +++ b/roles/docker/neurapress/files/run.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Helper script to start the Neurapress docker compose stack +cd "$(dirname "$0")" +docker compose -f docker-compose.yaml up -d diff --git a/roles/docker/neurapress/tasks/main.yml b/roles/docker/neurapress/tasks/main.yml new file mode 100644 index 0000000..300f6d1 --- /dev/null +++ b/roles/docker/neurapress/tasks/main.yml @@ -0,0 +1,69 @@ +--- +- name: Ensure Neurapress directories exist + become: true + ansible.builtin.file: + path: "{{ item }}" + state: directory + mode: "0755" + loop: + - "{{ neurapress_workspace }}" + - "{{ neurapress_workspace }}/certbot" + - "{{ neurapress_workspace }}/certbot/conf" + - "{{ neurapress_workspace }}/certbot/www" + - "{{ neurapress_workspace }}/nginx" + - "{{ neurapress_workspace }}/nginx/conf.d" + +- name: Ensure Neurapress workspace ownership + become: true + ansible.builtin.file: + path: "{{ neurapress_workspace }}" + state: directory + recurse: true + owner: "1000" + group: "1000" + mode: "0755" + +- name: Template Neurapress configuration files + become: true + ansible.builtin.template: + src: "{{ item.src }}" + dest: "{{ neurapress_workspace }}/{{ item.dest }}" + mode: "{{ item.mode | default('0644') }}" + loop: + - { src: 'docker-compose.yaml', dest: 'docker-compose.yaml' } + - { src: 'nginx/conf.d/default.conf', dest: 'nginx/conf.d/default.conf' } + - { src: 'nginx/conf.d/bootstrap-nginx.conf', dest: 'nginx/conf.d/bootstrap-nginx.conf' } + +- name: Copy Neurapress static files + become: true + ansible.builtin.copy: + src: "{{ item.src }}" + dest: "{{ neurapress_workspace }}/{{ item.dest }}" + mode: "{{ item.mode | default('0644') }}" + loop: + - { src: 'run.sh', dest: 'run.sh', mode: '0755' } + - { src: 'nginx/nginx.conf', dest: 'nginx/nginx.conf' } + +- name: Bootstrap NGINX (80-only for ACME) + become: true + command: docker compose --profile bootstrap -f {{ neurapress_workspace }}/docker-compose.yaml up -d bootstrap-nginx + args: + chdir: "{{ neurapress_workspace }}" + +- name: Run certbot initial ACME challenge + become: true + command: docker compose --profile bootstrap -f {{ neurapress_workspace }}/docker-compose.yaml run --rm certbot + args: + chdir: "{{ neurapress_workspace }}" + +- name: Destroy Bootstrap NGINX (80-only for ACME) + become: true + command: docker compose --profile bootstrap -f {{ neurapress_workspace }}/docker-compose.yaml down bootstrap-nginx + args: + chdir: "{{ neurapress_workspace }}" + +- name: Bring up Neurapress stack + become: true + command: docker compose -f {{ neurapress_workspace }}/docker-compose.yaml up -d + args: + chdir: "{{ neurapress_workspace }}" diff --git a/roles/docker/neurapress/templates/docker-compose.yaml b/roles/docker/neurapress/templates/docker-compose.yaml new file mode 100644 index 0000000..96fc210 --- /dev/null +++ b/roles/docker/neurapress/templates/docker-compose.yaml @@ -0,0 +1,68 @@ +services: + app: + image: "{{ neurapress_image }}" + command: pnpm start + ports: + - "3000:3000" + environment: + - NODE_ENV=production + networks: + - app + + nginx: + image: nginx:mainline-alpine + container_name: neurapress-nginx + depends_on: + - app + ports: + - "80:80" + - "443:443" + volumes: + - "{{ neurapress_workspace }}/nginx/nginx.conf:/etc/nginx/nginx.conf:ro" + - "{{ neurapress_workspace }}/nginx/conf.d:/etc/nginx/conf.d:ro" + - "{{ neurapress_workspace }}/certbot/conf:/etc/letsencrypt" + - "{{ neurapress_workspace }}/certbot/www:/var/www/certbot" + networks: + - app + + bootstrap-nginx: + profiles: ["bootstrap"] + image: nginx:mainline-alpine + container_name: bootstrap-nginx + volumes: + - "{{ neurapress_workspace }}/nginx/nginx.conf:/etc/nginx/nginx.conf:ro" + - "{{ neurapress_workspace }}/nginx/conf.d/bootstrap-nginx.conf:/etc/nginx/conf.d/default.conf:ro" + - "{{ neurapress_workspace }}/certbot/conf:/etc/letsencrypt" + - "{{ neurapress_workspace }}/certbot/www:/var/www/certbot" + ports: + - "80:80" + networks: + - app + healthcheck: + test: ["CMD", "wget", "-qO-", "http://{{ neurapress_domain }}"] + interval: 3s + timeout: 2s + retries: 10 + start_period: 3s + + certbot: + profiles: ["bootstrap"] + image: certbot/certbot + container_name: certbot + command: > + certonly --webroot + --webroot-path=/var/www/certbot + --email {{ neurapress_certbot_email }} + --agree-tos + --no-eff-email + --keep-until-expiring + --non-interactive + -d {{ neurapress_domain }} + volumes: + - "{{ neurapress_workspace }}/certbot/conf:/etc/letsencrypt" + - "{{ neurapress_workspace }}/certbot/www:/var/www/certbot" + networks: + - app + +networks: + app: diff --git a/roles/docker/neurapress/templates/nginx/conf.d/bootstrap-nginx.conf b/roles/docker/neurapress/templates/nginx/conf.d/bootstrap-nginx.conf new file mode 100644 index 0000000..58baf72 --- /dev/null +++ b/roles/docker/neurapress/templates/nginx/conf.d/bootstrap-nginx.conf @@ -0,0 +1,11 @@ +server { + listen 80; + server_name {{ neurapress_domain }}; + + location ^~ /.well-known/acme-challenge/ { + root /var/www/certbot; + } + + # 不 redirect!不要 https! + # certbot 需要纯 http 验证 +} diff --git a/roles/docker/neurapress/templates/nginx/conf.d/default.conf b/roles/docker/neurapress/templates/nginx/conf.d/default.conf new file mode 100644 index 0000000..3af97ed --- /dev/null +++ b/roles/docker/neurapress/templates/nginx/conf.d/default.conf @@ -0,0 +1,49 @@ +# ---------------------------------------------------- +# 80 - ACME Challenge + Redirect to HTTPS +# ---------------------------------------------------- +server { + listen 80; + server_name {{ neurapress_domain }}; + + # Certbot HTTP-01 challenge + location ^~ /.well-known/acme-challenge/ { + root /var/www/certbot; + } + + # All HTTP → HTTPS + location / { + return 301 https://$host$request_uri; + } +} + +# ---------------------------------------------------- +# 443 - TLS Termination for Neurapress +# ---------------------------------------------------- +server { + listen 443 ssl http2; + server_name {{ neurapress_domain }}; + + ssl_certificate /etc/letsencrypt/live/{{ neurapress_domain }}/fullchain.pem; + ssl_certificate_key /etc/letsencrypt/live/{{ neurapress_domain }}/privkey.pem; + + ssl_protocols TLSv1.2 TLSv1.3; + ssl_prefer_server_ciphers on; + + # Next.js / Neurapress + location / { + proxy_pass http://app:3000; + proxy_http_version 1.1; + + # WebSocket / HMR support + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + + # Standard headers + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto https; + + proxy_read_timeout 300; + } +} diff --git a/roles/docker/otel/README.md b/roles/docker/otel/README.md new file mode 100644 index 0000000..a6a0dae --- /dev/null +++ b/roles/docker/otel/README.md @@ -0,0 +1,5 @@ +# otel (docker) + +Placeholder role for docker-compose style deployment of otel. + +Templates include docker-compose.yaml with bootstrap nginx and certbot services mirroring the Zitadel setup. diff --git a/roles/docker/otel/tasks/main.yml b/roles/docker/otel/tasks/main.yml new file mode 100644 index 0000000..c4f5488 --- /dev/null +++ b/roles/docker/otel/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement docker deployment tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement docker deployment tasks." diff --git a/roles/docker/otel/templates/docker-compose.yaml b/roles/docker/otel/templates/docker-compose.yaml new file mode 100644 index 0000000..580d705 --- /dev/null +++ b/roles/docker/otel/templates/docker-compose.yaml @@ -0,0 +1,41 @@ +services: + bootstrap-nginx: + profiles: ["bootstrap"] + image: nginx:mainline-alpine + container_name: bootstrap-nginx + volumes: + - "{{ zitadel_workspace }}/certbot/www:/var/www/certbot" + - "{{ zitadel_workspace }}/certbot/conf:/etc/letsencrypt" + - "{{ zitadel_workspace }}/nginx/nginx.conf:/etc/nginx/nginx.conf" + - "{{ zitadel_workspace }}/nginx/conf.d/bootstrap-nginx.conf:/etc/nginx/conf.d/bootstrap-nginx.conf" + ports: + - "80:80" # 暂时只占用80 + networks: + - app + healthcheck: + test: ["CMD", "wget", "-qO-", "http://localhost"] + interval: 3s + timeout: 2s + retries: 10 + start_period: 3s + certbot: + profiles: ["bootstrap"] + image: certbot/certbot + container_name: certbot + command: > + certonly --webroot + --webroot-path=/var/www/certbot + --email manbuzhe2009@qq.com + --agree-tos + --no-eff-email + --keep-until-expiring + --non-interactive + -d {{ zitadel_domain }} + volumes: + - "{{ zitadel_workspace }}/certbot/conf:/etc/letsencrypt" + - "{{ zitadel_workspace }}/certbot/www:/var/www/certbot" + networks: + - app + +networks: + app: diff --git a/roles/docker/postgres/README.md b/roles/docker/postgres/README.md new file mode 100644 index 0000000..e426619 --- /dev/null +++ b/roles/docker/postgres/README.md @@ -0,0 +1,3 @@ +# postgres (docker) + +Placeholder role for docker-compose style deployment of postgres. diff --git a/roles/docker/postgres/tasks/main.yml b/roles/docker/postgres/tasks/main.yml new file mode 100644 index 0000000..c4f5488 --- /dev/null +++ b/roles/docker/postgres/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement docker deployment tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement docker deployment tasks." diff --git a/roles/docker/ray/README.md b/roles/docker/ray/README.md new file mode 100644 index 0000000..aa2878e --- /dev/null +++ b/roles/docker/ray/README.md @@ -0,0 +1,3 @@ +# ray (docker) + +Placeholder role for docker-compose style deployment of ray. diff --git a/roles/docker/ray/tasks/main.yml b/roles/docker/ray/tasks/main.yml new file mode 100644 index 0000000..c4f5488 --- /dev/null +++ b/roles/docker/ray/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement docker deployment tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement docker deployment tasks." diff --git a/roles/docker/redpanda/README.md b/roles/docker/redpanda/README.md new file mode 100644 index 0000000..0be26c7 --- /dev/null +++ b/roles/docker/redpanda/README.md @@ -0,0 +1,3 @@ +# redpanda (docker) + +Placeholder role for docker-compose style deployment of redpanda. diff --git a/roles/docker/redpanda/tasks/main.yml b/roles/docker/redpanda/tasks/main.yml new file mode 100644 index 0000000..c4f5488 --- /dev/null +++ b/roles/docker/redpanda/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement docker deployment tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement docker deployment tasks." diff --git a/roles/docker/sglang/README.md b/roles/docker/sglang/README.md new file mode 100644 index 0000000..c6fc437 --- /dev/null +++ b/roles/docker/sglang/README.md @@ -0,0 +1,3 @@ +# sglang (docker) + +Placeholder role for docker-compose style deployment of sglang. diff --git a/roles/docker/sglang/tasks/main.yml b/roles/docker/sglang/tasks/main.yml new file mode 100644 index 0000000..c4f5488 --- /dev/null +++ b/roles/docker/sglang/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement docker deployment tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement docker deployment tasks." diff --git a/roles/docker/trino/README.md b/roles/docker/trino/README.md new file mode 100644 index 0000000..d501eb5 --- /dev/null +++ b/roles/docker/trino/README.md @@ -0,0 +1,3 @@ +# trino (docker) + +Placeholder role for docker-compose style deployment of trino. diff --git a/roles/docker/trino/tasks/main.yml b/roles/docker/trino/tasks/main.yml new file mode 100644 index 0000000..c4f5488 --- /dev/null +++ b/roles/docker/trino/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement docker deployment tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement docker deployment tasks." diff --git a/roles/docker/vllm/README.md b/roles/docker/vllm/README.md new file mode 100644 index 0000000..203a542 --- /dev/null +++ b/roles/docker/vllm/README.md @@ -0,0 +1,3 @@ +# vllm (docker) + +Placeholder role for docker-compose style deployment of vllm. diff --git a/roles/docker/vllm/tasks/main.yml b/roles/docker/vllm/tasks/main.yml new file mode 100644 index 0000000..c4f5488 --- /dev/null +++ b/roles/docker/vllm/tasks/main.yml @@ -0,0 +1,5 @@ +--- +# TODO: implement docker deployment tasks +- name: Placeholder task + debug: + msg: "Role placeholder. Implement docker deployment tasks." diff --git a/roles/docker/zitadel/README.md b/roles/docker/zitadel/README.md new file mode 100644 index 0000000..b7d6990 --- /dev/null +++ b/roles/docker/zitadel/README.md @@ -0,0 +1,28 @@ +# Zitadel Docker role + +This role provisions a Zitadel stack with Postgres, optional TLS termination, login frontend, Nginx proxy, and Certbot assets. Templates from `templates/` and static assets from `files/` are rendered into `{{ zitadel_workspace }}` and the Docker Compose stack is started. + +## Layout +``` +files/ +├── certbot/ +│ ├── conf/ +│ └── www/ +├── docker-compose.yaml +├── nginx/ +│ ├── conf.d/ +│ │ └── default.conf +│ └── nginx.conf +└── run.sh +``` + +## Defaults +- `zitadel_deploy_dir`: `/opt/zitadel` +- `zitadel_workspace`: `{{ zitadel_deploy_dir }}` +- `zitadel_domain`: `auth.svc.plus` +- `zitadel_masterkey`: `MasterkeyNeedsToHave32Characters` + +## RUN + +ansible-playbook -i inventory.ini deploy_zitadel_docker.yaml -e "domain=auth.svc.plus" -D -C -l auth.svc.plus +ansible-playbook -i inventory.ini deploy_zitadel_docker.yaml -e "domain=auth.svc.plus" -D -l auth.svc.plus diff --git a/roles/docker/zitadel/defaults/main.yml b/roles/docker/zitadel/defaults/main.yml new file mode 100644 index 0000000..f26d8cd --- /dev/null +++ b/roles/docker/zitadel/defaults/main.yml @@ -0,0 +1,6 @@ +--- +# Default deployment directory for Zitadel Docker stack +zitadel_deploy_dir: /opt/zitadel +zitadel_workspace: "{{ zitadel_deploy_dir }}" +zitadel_domain: auth.svc.plus +zitadel_masterkey: MasterkeyNeedsToHave32Characters diff --git a/roles/docker/zitadel/files/certbot/conf/.gitkeep b/roles/docker/zitadel/files/certbot/conf/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/roles/docker/zitadel/files/certbot/www/.gitkeep b/roles/docker/zitadel/files/certbot/www/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/roles/docker/zitadel/files/nginx/nginx.conf b/roles/docker/zitadel/files/nginx/nginx.conf new file mode 100644 index 0000000..9fe4ac3 --- /dev/null +++ b/roles/docker/zitadel/files/nginx/nginx.conf @@ -0,0 +1,5 @@ +events {} + +http { + include /etc/nginx/conf.d/*.conf; +} diff --git a/roles/docker/zitadel/files/run.sh b/roles/docker/zitadel/files/run.sh new file mode 100644 index 0000000..77897d3 --- /dev/null +++ b/roles/docker/zitadel/files/run.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Helper script to start the Zitadel docker compose stack +cd "$(dirname "$0")" +docker-compose -f docker-compose.yaml up -d diff --git a/roles/docker/zitadel/tasks/main.yml b/roles/docker/zitadel/tasks/main.yml new file mode 100644 index 0000000..e8154e7 --- /dev/null +++ b/roles/docker/zitadel/tasks/main.yml @@ -0,0 +1,105 @@ +--- +- name: Ensure Zitadel directories exist + become: true + ansible.builtin.file: + path: "{{ item }}" + state: directory + mode: "0755" + loop: + - "{{ zitadel_workspace }}" + - "{{ zitadel_workspace }}/certbot" + - "{{ zitadel_workspace }}/certbot/conf" + - "{{ zitadel_workspace }}/certbot/www" + - "{{ zitadel_workspace }}/nginx" + - "{{ zitadel_workspace }}/nginx/conf.d" + +- name: Ensure Zitadel workspace ownership + become: true + ansible.builtin.file: + path: "{{ zitadel_workspace }}" + state: directory + recurse: true + owner: "1000" + group: "1000" + mode: "0755" + +- name: Template Zitadel configuration files + become: true + ansible.builtin.template: + src: "{{ item.src }}" + dest: "{{ zitadel_workspace }}/{{ item.dest }}" + mode: "{{ item.mode | default('0644') }}" + loop: + - { src: 'docker-compose.yaml', dest: 'docker-compose.yaml' } + - { src: 'nginx/conf.d/default.conf', dest: 'nginx/conf.d/default.conf' } + - { src: 'nginx/conf.d/bootstrap-nginx.conf', dest: 'nginx/conf.d/bootstrap-nginx.conf' } + +- name: Copy Zitadel static files + become: true + ansible.builtin.copy: + src: "{{ item.src }}" + dest: "{{ zitadel_workspace }}/{{ item.dest }}" + mode: "{{ item.mode | default('0644') }}" + loop: + - { src: 'run.sh', dest: 'run.sh', mode: '0755' } + - { src: 'nginx/nginx.conf', dest: 'nginx/nginx.conf' } + +- name: Bootstrap NGINX (80-only for ACME) + become: true + command: docker compose --profile bootstrap -f {{ zitadel_workspace }}/docker-compose.yaml up -d bootstrap-nginx + args: + chdir: "{{ zitadel_workspace }}" + +- name: Run certbot initial ACME challenge + become: true + command: docker compose --profile bootstrap -f {{ zitadel_workspace }}/docker-compose.yaml run --rm certbot + args: + chdir: "{{ zitadel_workspace }}" + +- name: Destroy Bootstrap NGINX (80-only for ACME) + become: true + command: docker compose --profile bootstrap -f {{ zitadel_workspace }}/docker-compose.yaml down bootstrap-nginx + args: + chdir: "{{ zitadel_workspace }}" + +# ------------------------------------------------------------------- +# 1. 判断 Zitadel 是否已经初始化 +# (是否已经生成 login-client.pat 或其它初始化标记) +# ------------------------------------------------------------------- +- name: Check if Zitadel initialized + stat: + path: "{{ zitadel_workspace }}/login-client.pat" + register: zitadel_initialized + + +# ------------------------------------------------------------------- +# 2. 如果未初始化,先清理所有可能失败的残留容器、状态 +# ------------------------------------------------------------------- +- name: Zitadel containers and Zitadel postgres volume (cleanup) + become: true + shell: | + docker compose -f {{ zitadel_workspace }}/docker-compose.yaml down || true + docker volume rm zitadel_data || true + args: + chdir: "{{ zitadel_workspace }}" + when: not zitadel_initialized.stat.exists + +# ------------------------------------------------------------------- +# 3. 执行第一次初始化(init + setup) +# ------------------------------------------------------------------- +- name: Run Zitadel init (one-time) + become: true + shell: | + docker compose -f {{ zitadel_workspace }}/docker-compose.yaml run --rm zitadel-init || true + args: + chdir: "{{ zitadel_workspace }}" + when: not zitadel_initialized.stat.exists + +# ------------------------------------------------------------------- +# 4. 启动正式 Zitadel stack(start-only) +# ------------------------------------------------------------------- +- name: Bring up Zitadel stack + become: true + command: docker compose -f {{ zitadel_workspace }}/docker-compose.yaml up -d + args: + chdir: "{{ zitadel_workspace }}" diff --git a/roles/docker/zitadel/templates/docker-compose.yaml b/roles/docker/zitadel/templates/docker-compose.yaml new file mode 100644 index 0000000..c6bf0ee --- /dev/null +++ b/roles/docker/zitadel/templates/docker-compose.yaml @@ -0,0 +1,187 @@ +services: + + zitadel-external-tls: + extends: + service: zitadel-init + command: 'start-from-setup --masterkey "{{ zitadel_masterkey }}"' + environment: + ZITADEL_EXTERNALPORT: 443 + ZITADEL_EXTERNALSECURE: true + ZITADEL_TLS_ENABLED: false + networks: + - app + - db + depends_on: + db: + condition: 'service_healthy' + zitadel-init: + condition: 'service_completed_successfully' + + zitadel-enabled-tls: + extends: + service: zitadel-init + command: 'start-from-setup --masterkey "{{ zitadel_masterkey }}"' + environment: + ZITADEL_EXTERNALPORT: 443 + ZITADEL_EXTERNALSECURE: true + ZITADEL_TLS_ENABLED: true + ZITADEL_TLS_CERTPATH: /etc/letsencrypt/live/{{ zitadel_domain }}/fullchain.pem + ZITADEL_TLS_KEYPATH: /etc/letsencrypt/live/{{ zitadel_domain }}/privkey.pem + volumes: + - "{{ zitadel_workspace }}/certbot/conf:/etc/letsencrypt" + networks: + - app + - db + depends_on: + zitadel-init: + condition: 'service_completed_successfully' + db: + condition: 'service_healthy' + + zitadel-init: + image: '${ZITADEL_IMAGE:-ghcr.io/zitadel/zitadel:latest}' + command: 'init' + depends_on: + db: + condition: 'service_healthy' + environment: + # Using an external domain other than localhost proofs, that the proxy configuration works. + # If Zitadel can't resolve a requests original host to this domain, + # it will return a 404 Instance not found error. + ZITADEL_EXTERNALDOMAIN: {{ zitadel_domain }} + # In case something doesn't work as expected, + # it can be handy to be able to read the access logs. + ZITADEL_LOGSTORE_ACCESS_STDOUT_ENABLED: true + # For convenience, ZITADEL should not ask to change the initial admin users password. + ZITADEL_FIRSTINSTANCE_ORG_HUMAN_PASSWORDCHANGEREQUIRED: false + # database configuration + ZITADEL_DATABASE_POSTGRES_HOST: db + ZITADEL_DATABASE_POSTGRES_USER_PASSWORD: zitadel_pw + # Set up a service account with IAM_LOGIN_CLIENT role and write the PAT to the file ./login-client.pat + ZITADEL_FIRSTINSTANCE_LOGINCLIENTPATPATH: /current-dir/login-client.pat + ZITADEL_FIRSTINSTANCE_ORG_LOGINCLIENT_MACHINE_USERNAME: login-client + ZITADEL_FIRSTINSTANCE_ORG_LOGINCLIENT_MACHINE_NAME: Automatically Initialized IAM Login Client + ZITADEL_FIRSTINSTANCE_ORG_LOGINCLIENT_PAT_EXPIRATIONDATE: '2029-01-01T00:00:00Z' + # The master key is used to + networks: + - db + healthcheck: + test: [ "CMD", "/app/zitadel", "ready" ] + interval: '10s' + timeout: '5s' + retries: 5 + start_period: '10s' + volumes: + - "{{ zitadel_workspace }}:/current-dir:rw" + + db: + restart: 'always' + image: postgres:17-alpine + environment: + POSTGRES_PASSWORD: postgres + healthcheck: + test: [ "CMD-SHELL", "pg_isready" ] + interval: 5s + timeout: 60s + retries: 10 + start_period: 5s + networks: + - db + volumes: + - 'data:/var/lib/postgresql/data:rw' + + login-external-tls: + restart: 'unless-stopped' + image: 'ghcr.io/zitadel/zitadel-login:latest' + environment: + - ZITADEL_API_URL=http://zitadel-external-tls:8080 + - NEXT_PUBLIC_BASE_PATH=/ui/v2/login + - ZITADEL_SERVICE_USER_TOKEN_FILE=/current-dir/login-client.pat + - CUSTOM_REQUEST_HEADERS=Host:{{ zitadel_domain }} + volumes: + - "{{ zitadel_workspace }}:/current-dir:ro" + networks: + - app + depends_on: + zitadel-external-tls: + condition: 'service_healthy' + + login-enabled-tls: + restart: 'unless-stopped' + image: 'ghcr.io/zitadel/zitadel-login:latest' + environment: + - ZITADEL_API_URL=https://zitadel-enabled-tls:8080 + - NEXT_PUBLIC_BASE_PATH=/ui/v2/login + - ZITADEL_SERVICE_USER_TOKEN_FILE=/current-dir/login-client.pat + - CUSTOM_REQUEST_HEADERS=Host:{{ zitadel_domain }} + - NODE_TLS_REJECT_UNAUTHORIZED=0 + volumes: + - "{{ zitadel_workspace }}:/current-dir:ro" + networks: + - app + depends_on: + zitadel-enabled-tls: + condition: 'service_healthy' + + proxy-external-tls: + image: nginx:mainline-alpine + container_name: proxy-external-tls + restart: unless-stopped + volumes: + - "{{ zitadel_workspace }}/nginx/nginx.conf:/etc/nginx/nginx.conf" + - "{{ zitadel_workspace }}/nginx/conf.d/default.conf:/etc/nginx/conf.d/default.conf:ro" + - "{{ zitadel_workspace }}/certbot/conf:/etc/letsencrypt" + - "{{ zitadel_workspace }}/certbot/www:/var/www/certbot" + ports: + - "80:80" + - "443:443" + networks: + - app + depends_on: + zitadel-external-tls: + condition: service_healthy + + bootstrap-nginx: + profiles: ["bootstrap"] + image: nginx:mainline-alpine + container_name: bootstrap-nginx + volumes: + - "{{ zitadel_workspace }}/certbot/www:/var/www/certbot" + - "{{ zitadel_workspace }}/certbot/conf:/etc/letsencrypt" + - "{{ zitadel_workspace }}/nginx/nginx.conf:/etc/nginx/nginx.conf" + - "{{ zitadel_workspace }}/nginx/conf.d/bootstrap-nginx.conf:/etc/nginx/conf.d/bootstrap-nginx.conf" + ports: + - "80:80" # 暂时只占用80 + networks: + - app + healthcheck: + test: ["CMD", "wget", "-qO-", "http://localhost"] + interval: 3s + timeout: 2s + retries: 10 + start_period: 3s + certbot: + profiles: ["bootstrap"] + image: certbot/certbot + container_name: certbot + command: > + certonly --webroot + --webroot-path=/var/www/certbot + --email manbuzhe2009@qq.com + --agree-tos + --no-eff-email + --keep-until-expiring + --non-interactive + -d {{ zitadel_domain }} + volumes: + - "{{ zitadel_workspace }}/certbot/conf:/etc/letsencrypt" + - "{{ zitadel_workspace }}/certbot/www:/var/www/certbot" + networks: + - app + +networks: + app: + db: + +volumes: + data: diff --git a/roles/docker/zitadel/templates/nginx/conf.d/bootstrap-nginx.conf b/roles/docker/zitadel/templates/nginx/conf.d/bootstrap-nginx.conf new file mode 100644 index 0000000..6264996 --- /dev/null +++ b/roles/docker/zitadel/templates/nginx/conf.d/bootstrap-nginx.conf @@ -0,0 +1,11 @@ +server { + listen 80; + server_name {{ zitadel_domain }}; + + location ^~ /.well-known/acme-challenge/ { + root /var/www/certbot; + } + + # 不 redirect!不要 https! + # certbot 需要纯 http 验证 +} diff --git a/roles/docker/zitadel/templates/nginx/conf.d/default.conf b/roles/docker/zitadel/templates/nginx/conf.d/default.conf new file mode 100644 index 0000000..8dfe8c3 --- /dev/null +++ b/roles/docker/zitadel/templates/nginx/conf.d/default.conf @@ -0,0 +1,42 @@ +# ---------------------------------------------------- +# 80 - ACME Challenge + Redirect to HTTPS +# ---------------------------------------------------- +server { + listen 80; + server_name {{ zitadel_domain }}; + + # Certbot HTTP-01 challenge + location ^~ /.well-known/acme-challenge/ { + root /var/www/certbot; + } + + # All HTTP → HTTPS + location / { + return 301 https://$host$request_uri; + } +} + +# ---------------------------------------------------- +# 443 - TLS Termination +# ---------------------------------------------------- +server { + listen 443 ssl http2; + server_name {{ zitadel_domain }}; + + ssl_certificate /etc/letsencrypt/live/{{ zitadel_domain }}/fullchain.pem; + ssl_certificate_key /etc/letsencrypt/live/{{ zitadel_domain }}/privkey.pem; + + ssl_protocols TLSv1.2 TLSv1.3; + ssl_prefer_server_ciphers on; + + location /ui/v2/login { + proxy_pass http://login-external-tls:3000; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-Proto https; + } + location / { + grpc_pass grpc://zitadel-external-tls:8080; + grpc_set_header Host $host; + grpc_set_header X-Forwarded-Proto https; + } +} diff --git a/roles/github/defaults/main.yml b/roles/github/defaults/main.yml new file mode 100644 index 0000000..f113e9d --- /dev/null +++ b/roles/github/defaults/main.yml @@ -0,0 +1,3 @@ +--- +owner: "" +repo: "" diff --git a/roles/github/tasks/main.yml b/roles/github/tasks/main.yml new file mode 100644 index 0000000..9bceabf --- /dev/null +++ b/roles/github/tasks/main.yml @@ -0,0 +1,11 @@ +--- +- name: Set branch protection using GitHub CLI + ansible.builtin.command: >- + gh api repos/{{ owner }}/{{ repo }}/branches/main/protection + --method PUT + -H "Accept: application/vnd.github+json" + --input .github/branch-protection-rules.json + args: + chdir: "{{ playbook_dir }}/.." + environment: + GITHUB_TOKEN: "{{ lookup('env', 'GITHUB_TOKEN') }}" diff --git a/roles/vhosts/OpenResty/defaults/main.yml b/roles/vhosts/OpenResty/defaults/main.yml new file mode 100644 index 0000000..a72b8e8 --- /dev/null +++ b/roles/vhosts/OpenResty/defaults/main.yml @@ -0,0 +1,3 @@ +vhost_defaults: + root: /data/update-server + autoindex_paths: [] diff --git a/roles/vhosts/OpenResty/handlers/main.yml b/roles/vhosts/OpenResty/handlers/main.yml new file mode 100644 index 0000000..22c1996 --- /dev/null +++ b/roles/vhosts/OpenResty/handlers/main.yml @@ -0,0 +1,4 @@ +- name: Restart OpenResty + systemd: + name: openresty + state: restarted diff --git a/roles/vhosts/OpenResty/meta/main.yml b/roles/vhosts/OpenResty/meta/main.yml new file mode 100644 index 0000000..9711b33 --- /dev/null +++ b/roles/vhosts/OpenResty/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: common diff --git a/roles/vhosts/OpenResty/tasks/geoip.yml b/roles/vhosts/OpenResty/tasks/geoip.yml new file mode 100644 index 0000000..2aee0ee --- /dev/null +++ b/roles/vhosts/OpenResty/tasks/geoip.yml @@ -0,0 +1,23 @@ +- name: Install GeoIP dependencies + apt: + name: luarocks + state: present + update_cache: true + +- name: Install lua-resty-maxminddb + command: "luarocks install lua-resty-maxminddb" + args: + creates: /usr/local/openresty/site/lualib/resty/maxminddb.lua + +- name: Ensure GeoIP database directory exists + file: + path: /usr/share/GeoIP + state: directory + mode: "0755" + +# yamllint disable rule:line-length +- name: Download V2Fly GeoIP database + get_url: + url: https://github.com/v2fly/geoip/releases/latest/download/geoip.dat + dest: /usr/share/GeoIP/geoip.dat +# yamllint enable rule:line-length diff --git a/roles/vhosts/OpenResty/tasks/main.yml b/roles/vhosts/OpenResty/tasks/main.yml new file mode 100644 index 0000000..2fdb8fe --- /dev/null +++ b/roles/vhosts/OpenResty/tasks/main.yml @@ -0,0 +1,82 @@ +- name: Install prerequisites for OpenResty + apt: + name: + - curl + - gnupg + - apt-transport-https + state: present + update_cache: true + +- name: Import OpenResty GPG key + shell: | + curl -fsSL https://openresty.org/package/pubkey.gpg | \ + gpg --dearmor -o /usr/share/keyrings/openresty.gpg + args: + creates: /usr/share/keyrings/openresty.gpg + +# yamllint disable rule:line-length +- name: Add OpenResty apt repository + apt_repository: + repo: "deb [signed-by=/usr/share/keyrings/openresty.gpg] http://openresty.org/package/ubuntu jammy main" + filename: openresty + state: present +# yamllint enable rule:line-length + +- name: Install OpenResty + apt: + name: openresty + state: present + update_cache: true + +- name: Ensure sites-available directory exists + file: + path: /usr/local/openresty/nginx/conf/sites-available + state: directory + +- name: Deploy nginx configuration + template: + src: nginx.conf.j2 + dest: /usr/local/openresty/nginx/conf/nginx.conf + notify: Restart OpenResty + +- name: Deploy vhost configurations + template: + # Use item.template if provided; otherwise select by vhost type + src: "{{ item.template | default(item.type ~ '.conf.j2') }}" + dest: "/usr/local/openresty/nginx/conf/sites-available/{{ item.name }}.conf" + loop: "{{ vhosts | default([]) }}" + notify: Restart OpenResty + +- name: Ensure artifact root directories exist + file: + path: "{{ item.root }}" + state: directory + owner: www-data + group: www-data + mode: "0755" + loop: "{{ vhosts | default([]) | selectattr('type', 'equalto', 'artifact') | selectattr('root', 'defined') | list }}" + +- name: Ensure homepage static root directories exist + file: + path: "{{ item.root | default('/data/update-server/dashboard') }}" + state: directory + owner: www-data + group: www-data + mode: "0755" + loop: "{{ vhosts | default([]) | selectattr('type', 'equalto', 'homepage-static') | list }}" + +- name: Enable and start OpenResty + systemd: + name: openresty + enabled: true + state: started + +- name: Verify OpenResty core API + shell: | + curl -fsS -X POST http://127.0.0.1/api/askai \ + -H "Content-Type: application/json" \ + -d '{"question":"你好"}' + register: openresty_verify + retries: 5 + delay: 3 + until: openresty_verify.rc == 0 diff --git a/roles/vhosts/OpenResty/templates/artifact.conf.j2 b/roles/vhosts/OpenResty/templates/artifact.conf.j2 new file mode 100644 index 0000000..3fe901b --- /dev/null +++ b/roles/vhosts/OpenResty/templates/artifact.conf.j2 @@ -0,0 +1,56 @@ +server { + listen 443 ssl; + server_name {{ item.domain | join(' ') }}; + + ssl_certificate {{ item.ssl_certificate }}; + ssl_certificate_key {{ item.ssl_certificate_key }}; + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers HIGH:!aNULL:!MD5; + + root {{ item.root | default(vhost_defaults.root) }}; + index index.html; + + # 建议:放行 ACME/健康检查等(避免被 dotfile 规则误伤) + location ^~ /.well-known/ { allow all; } + + {% set autoindex_paths = item.autoindex_paths | default(vhost_defaults.autoindex_paths) %} + + # 目录浏览(打开 autoindex)—可列出整个 {{ item.root | default(vhost_defaults.root) }} + {% if '/' in autoindex_paths %} + location / { + autoindex on; + autoindex_exact_size off; + autoindex_localtime on; + add_header Accept-Ranges bytes; + try_files $uri $uri/ =404; # 保持原有 404 语义 + } + {% elif autoindex_paths %} + {% for path in autoindex_paths %} + location {{ path }} { + autoindex on; + autoindex_exact_size off; + autoindex_localtime on; + } + {% endfor %} + {% endif %} + + # 常见安装包直下读文件(大小写不敏感) + # 这里无需 try_files,命中即直接读文件;减少一次磁盘判断 + location ~* \.(?:dmg|zip|tar\.gz|deb|rpm|exe|pkg|appimage|apk|ipa)$ { + expires 7d; + access_log off; + add_header Cache-Control "public"; + add_header Accept-Ranges bytes; + } + + # 隐藏 dotfiles(但不拦 /.well-known/,已在上面放行) + location ~ /\.(?!well-known/)[^/]+ { + deny all; + } +} + +server { + listen 80; + server_name {{ item.domain | join(' ') }}; + return 301 https://$host$request_uri; +} diff --git a/roles/vhosts/OpenResty/templates/geo_redirect.conf.j2 b/roles/vhosts/OpenResty/templates/geo_redirect.conf.j2 new file mode 100644 index 0000000..1196452 --- /dev/null +++ b/roles/vhosts/OpenResty/templates/geo_redirect.conf.j2 @@ -0,0 +1,45 @@ +lua_package_path "/usr/local/openresty/lualib/?.lua;;"; + +server { + listen 443 ssl http2; + server_name {{ item.name }}; + + ssl_certificate {{ item.ssl_certificate }}; + ssl_certificate_key {{ item.ssl_certificate_key }}; + + location / { + access_by_lua_block { + local region = ngx.var.cookie_region + if region == "CN" then + return ngx.redirect("https://{{ item.cn_target }}" .. ngx.var.request_uri, 302) + elseif region == "GLOBAL" then + return ngx.redirect("https://{{ item.global_target }}" .. ngx.var.request_uri, 302) + end + + local geoip = require("resty.maxminddb") + local reader, err = geoip.new("/usr/share/GeoIP/geoip.dat") + if not reader then + ngx.log(ngx.ERR, "failed to open MaxMind DB: ", err) + ngx.header["Set-Cookie"] = "region=GLOBAL; Path=/; Max-Age=3600" + return ngx.redirect("https://{{ item.global_target }}" .. ngx.var.request_uri, 302) + end + + local res, err = reader:lookup(ngx.var.remote_addr) + if err then + ngx.log(ngx.ERR, "failed to lookup IP: ", err) + ngx.header["Set-Cookie"] = "region=GLOBAL; Path=/; Max-Age=3600" + return ngx.redirect("https://{{ item.global_target }}" .. ngx.var.request_uri, 302) + end + + local country = res and res.country and res.country.iso_code or "XX" + + if country == "CN" then + ngx.header["Set-Cookie"] = "region=CN; Path=/; Max-Age=3600" + return ngx.redirect("https://{{ item.cn_target }}" .. ngx.var.request_uri, 302) + else + ngx.header["Set-Cookie"] = "region=GLOBAL; Path=/; Max-Age=3600" + return ngx.redirect("https://{{ item.global_target }}" .. ngx.var.request_uri, 302) + end + } + } +} diff --git a/roles/vhosts/OpenResty/templates/homepage-static.conf.j2 b/roles/vhosts/OpenResty/templates/homepage-static.conf.j2 new file mode 100644 index 0000000..ebe31b0 --- /dev/null +++ b/roles/vhosts/OpenResty/templates/homepage-static.conf.j2 @@ -0,0 +1,128 @@ +server { + listen 80; + server_name {{ item.domain | join(' ') }}; + return 301 https://$host$request_uri; +} + +server { + listen 443 ssl; + server_name {{ item.domain | join(' ') }}; + + ssl_certificate {{ item.ssl_certificate }}; + ssl_certificate_key {{ item.ssl_certificate_key }}; + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers HIGH:!aNULL:!MD5; + + # ====== 静态根目录(Next.js export 产物)====== + root {{ item.root | default('/data/update-server/dashboard') }}; + index index.html; + + # (可选)放行 ACME/健康检查等 + location ^~ /.well-known/ { allow all; } + + # ======================= + # API 反向代理(保持原样) + # ======================= + location /api/ { + proxy_pass http://127.0.0.1:8080; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # /api/askai 接口限流(保持原样) + location = /api/askai { + access_by_lua_block { + local redis = require "resty.redis" + local r = redis:new() + r:set_timeout(200) + local ok, err = r:connect("127.0.0.1", 6379) + if not ok then + ngx.log(ngx.ERR, "Redis connect error: ", err) + return ngx.exit(500) + end + + local user = ngx.var.arg_user or ngx.var.remote_addr + local today = os.date("%Y%m%d") + local key = "limit:user:" .. user .. ":" .. today + + local count, err = r:incr(key) + if count == 1 then r:expire(key, 86400) end + if count > 200 then + ngx.status = 429 + ngx.header["Content-Type"] = "text/plain; charset=utf-8" + ngx.say("Too Many Requests: daily limit reached") + return ngx.exit(429) + end + } + + proxy_pass http://127.0.0.1:8080; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # ======================= + # 静态文件直出(替换原先的 Next.js 动态代理) + # ======================= + + # Next 导出的静态资源(hash 不变 -> 长缓存) + location ^~ /_next/static/ { + try_files $uri =404; + access_log off; + expires 1y; + add_header Cache-Control "public, immutable, max-age=31536000"; + } + + # 其他常见静态资源:中等缓存 + location ~* \.(?:js|css|png|jpg|jpeg|gif|svg|webp|ico|woff2?|ttf)$ { + try_files $uri =404; + access_log off; + expires 7d; + add_header Cache-Control "public, max-age=604800"; + } + + # 主页与已导出的所有路由:按文件/目录匹配 + # 未命中的交给 404.html(保持静态站语义) + location / { + try_files $uri $uri/ /index.html =404; + } + + # 显式处理 404/500 路由目录(Next export 会生成 404/、500/ 与同名 .html) + location = /404.html { internal; } + error_page 404 /404.html; + + # 如果有 /favicon.ico,则直接给文件 + location = /favicon.ico { + try_files /favicon.ico =204; + access_log off; + expires 30d; + add_header Cache-Control "public, max-age=2592000"; + } + + # (可选)为某些目录开启目录索引 +{% for path in item.autoindex_paths | default([]) %} + location ^~ {{ path }} { + autoindex on; + autoindex_exact_size off; + autoindex_localtime on; + try_files $uri $uri/ =404; + } +{% endfor %} + + # 拒绝访问隐藏文件(如 .env) + location ~ /\. { + deny all; + } + + # (可选)开启 gzip(如启用 ngx_brotli,也可再加 br) + gzip on; + gzip_comp_level 5; + gzip_min_length 1k; + gzip_types text/plain text/css application/javascript application/json application/xml image/svg+xml; + gzip_vary on; +} diff --git a/roles/vhosts/OpenResty/templates/homepage.conf.j2 b/roles/vhosts/OpenResty/templates/homepage.conf.j2 new file mode 100644 index 0000000..30a6dbb --- /dev/null +++ b/roles/vhosts/OpenResty/templates/homepage.conf.j2 @@ -0,0 +1,78 @@ +server { + listen 80; + server_name {{ item.domain | join(' ') }}; + return 301 https://$host$request_uri; +} + +server { + listen 443 ssl; + server_name {{ item.domain | join(' ') }}; + + ssl_certificate {{ item.ssl_certificate }}; + ssl_certificate_key {{ item.ssl_certificate_key }}; + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers HIGH:!aNULL:!MD5; + + root /var/www/XControl/ui/homepage/out; + index index.html; + + # /api/ 路径反向代理,不跳转 + location /api/ { + proxy_pass http://127.0.0.1:8080; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # /api/askai 接口限流(基于 IP 或 user 参数) + location = /api/askai { + access_by_lua_block { + local redis = require "resty.redis" + local r = redis:new() + r:set_timeout(200) + local ok, err = r:connect("127.0.0.1", 6379) + if not ok then + ngx.log(ngx.ERR, "Redis connect error: ", err) + return ngx.exit(500) + end + + local user = ngx.var.arg_user or ngx.var.remote_addr + local today = os.date("%Y%m%d") + local key = "limit:user:" .. user .. ":" .. today + + local count, err = r:incr(key) + if count == 1 then r:expire(key, 86400) end + if count > 200 then + ngx.status = 429 + ngx.say("Too Many Requests: daily limit reached") + return ngx.exit(429) + end + } + + proxy_pass http://127.0.0.1:8080; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # 页面请求:默认静态页面 + HTML5 history fallback + location / { + try_files $uri $uri/ /index.html; + } + + # 静态资源缓存优化 + location ~* \.(?:ico|css|js|gif|jpe?g|png|woff2?)$ { + expires 30d; + access_log off; + add_header Cache-Control "public"; + } + + # 拒绝访问隐藏文件(如 .env) + location ~ /\. { + deny all; + } +} diff --git a/roles/vhosts/OpenResty/templates/nginx.conf.j2 b/roles/vhosts/OpenResty/templates/nginx.conf.j2 new file mode 100644 index 0000000..8677a55 --- /dev/null +++ b/roles/vhosts/OpenResty/templates/nginx.conf.j2 @@ -0,0 +1,32 @@ +worker_processes auto; +user www-data; + +events { + worker_connections 1024; +} + +http { + include mime.types; + default_type application/octet-stream; + lua_package_path "/usr/local/openresty/lualib/?.lua;;"; + + sendfile on; + keepalive_timeout 65; + + # 开启 Gzip (可选) + gzip on; + gzip_types text/plain text/css application/json application/javascript application/xml+rss; + + # 定义日志格式(可选) + log_format main '$remote_addr - $remote_user [$time_local] ' + '"$request" $status $body_bytes_sent ' + '"$http_referer" "$http_user_agent" ' + '$request_time'; + + # 全局访问日志 + access_log /usr/local/openresty/nginx/logs/access.log main; + error_log /usr/local/openresty/nginx/logs/error.log warn; + + # 引入 sites-available 下的配置 + include /usr/local/openresty/nginx/conf/sites-available/*.conf; +} diff --git a/roles/vhosts/OpenResty/templates/panel.conf.j2 b/roles/vhosts/OpenResty/templates/panel.conf.j2 new file mode 100644 index 0000000..a813a2f --- /dev/null +++ b/roles/vhosts/OpenResty/templates/panel.conf.j2 @@ -0,0 +1 @@ +# TODO: implement panel vhost template diff --git a/roles/vhosts/Redis/meta/main.yml b/roles/vhosts/Redis/meta/main.yml new file mode 100644 index 0000000..9711b33 --- /dev/null +++ b/roles/vhosts/Redis/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: common diff --git a/roles/vhosts/Redis/tasks/main.yml b/roles/vhosts/Redis/tasks/main.yml new file mode 100644 index 0000000..c2ec476 --- /dev/null +++ b/roles/vhosts/Redis/tasks/main.yml @@ -0,0 +1,11 @@ +- name: Install Redis server + apt: + name: redis-server + state: present + update_cache: yes + +- name: Enable and start Redis service + systemd: + name: redis-server + enabled: yes + state: started diff --git a/roles/vhosts/alerting/files/setup-observable-server.sh b/roles/vhosts/alerting/files/setup-observable-server.sh new file mode 100644 index 0000000..848ebe0 --- /dev/null +++ b/roles/vhosts/alerting/files/setup-observable-server.sh @@ -0,0 +1,102 @@ +#!/bin/bash + +export domain=$1 +export secret=$2 +export namespace=$3 +export mysql_db_password=$4 + +kubectl label nodes k3s-server prometheus=true --overwrite + +cat > values.yaml << EOF +deepflow: + enabled: true + clickhouse: + enabled: false + mysql: + enabled: false + grafana: + enabled: true + ingress: + enabled: true + ingressClassName: nginx + hosts: + - grafana.${domain} + tls: + - secretName: ${secret} + hosts: + - grafana.${domain} + global: + externalClickHouse: + enabled: true + type: ep + clusterName: default + storagePolicy: default + username: default + password: '' + hosts: + - ip: 10.1.2.3 + port: 9000 + - ip: 10.1.2.4 + port: 9000 + - ip: 10.1.2.5 + port: 9000 + externalMySQL: + enabled: true + ip: mysql.database.svc.cluster.local + port: 3306 + username: root + password: {{ mysql_db_password }} +prometheus: + enabled: true + alertmanager: + enabled: false + prometheus-pushgateway: + enabled: false + kube-state-metrics: + enabled: false + server: + ingress: + ingressClassName: nginx + hosts: + - prometheus.${domain} + tls: + - secretName: ${secret} + hosts: + - prometheus.${domain} + alertmanagers: + - static_configs: + - targets: + - alertmanager.${domain} + serverFiles: + prometheus.yml: + rule_files: + - /etc/config/recording_rules.yml + - /etc/config/alerting_rules.yml +alertmanager: + configmapReload: + enabled: false + config: + global: + resolve_timeout: 5m + smtp_smarthost: 'smtp.qq.com:465' + smtp_from: '11111111@qq.com' + smtp_auth_username: '11111111@qq.com' + smtp_auth_password: '123456' + smtp_require_tls: false + templates: + - '/etc/alertmanager/*.tmpl' + receivers: + - name: 'default-receiver' + email_configs: + - to: '{{ template "email.to" . }}' + html: '{{ template "email.to.html" . }}' + route: + group_wait: 10s + group_interval: 5m + receiver: default-receiver + repeat_interval: 1h +EOF + +helm repo add stable https://artifact.onwalk.net/chartrepo/public/ || echo true +helm repo update +helm upgrade --install observable-server stable/observableserver -n ${namspace} -f values.yaml diff --git a/roles/vhosts/alerting/meta/main.yml b/roles/vhosts/alerting/meta/main.yml new file mode 100644 index 0000000..83cef7b --- /dev/null +++ b/roles/vhosts/alerting/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: cert-manager diff --git a/roles/vhosts/alerting/tasks/main.yml b/roles/vhosts/alerting/tasks/main.yml new file mode 100755 index 0000000..cd7f06e --- /dev/null +++ b/roles/vhosts/alerting/tasks/main.yml @@ -0,0 +1,17 @@ +roles/alerting/tasks/main.yml- name: get db password + shell: 'kubectl get secret --namespace database postgresql -o jsonpath="{.data.postgres-password}" | base64 -d' + register: command_raw + when: inventory_hostname in groups[group][0] + +- name: set fact join command + set_fact: + mysql_db_password : "{{ command_raw.stdout_lines[0] }}" + when: inventory_hostname in groups[group][0] + +- name: Setup OpenLdap Server + script: files/setup-observable-server.sh {{ domain }} {{ secret }} {{ namespace }} {{ mysql_db_password }} + when: inventory_hostname in groups[group] + +- name: Check alerting rules config + shell: promtool check rules /path/to/example.rules.yml + when: inventory_hostname in groups[group] diff --git a/roles/vhosts/alerting/templates/alerting_rules b/roles/vhosts/alerting/templates/alerting_rules new file mode 100644 index 0000000..6b8a01e --- /dev/null +++ b/roles/vhosts/alerting/templates/alerting_rules @@ -0,0 +1,37 @@ +data: + alerting_rules.yml: | + groups: + - name: host-monitoring + rules: + - alert: HighLoad + expr: node_load1 > 2.0 + for: 5m + labels: + severity: warning + annotations: + summary: High load on {{ $labels.instance }} + description: "Load is {{ $value }} (threshold: 2.0)" + - alert: HighCpuUsage + expr: 100 - (avg by (instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 90 + for: 5m + labels: + severity: critical + annotations: + summary: High CPU usage on {{ $labels.instance }} + description: "CPU usage is {{ $value }}%" + - alert: HighMemoryUsage + expr: (node_memory_MemTotal_bytes - node_memory_MemFree_bytes - node_memory_Buffers_bytes - node_memory_Cached_bytes) / node_memory_MemTotal_bytes * 100 > 90 + for: 5m + labels: + severity: warning + annotations: + summary: High memory usage on {{ $labels.instance }} + description: "Memory usage is {{ $value }}%" + - alert: HighDiskUsage + expr: node_filesystem_avail_bytes{fstype="ext4"} / node_filesystem_size_bytes{fstype="ext4"} * 100 < 10 + for: 5m + labels: + severity: critical + annotations: + summary: High disk usage on {{ $labels.instance }} + description: "Disk usage is {{ $value }}% diff --git a/roles/vhosts/alerting/templates/recording_rules b/roles/vhosts/alerting/templates/recording_rules new file mode 100644 index 0000000..061e1c5 --- /dev/null +++ b/roles/vhosts/alerting/templates/recording_rules @@ -0,0 +1,55 @@ +data: + recording_rules.yml: | + groups: + - name: host-monitoring + rules: + - record: node_load1 + expr: node_load1 + - record: node_cpu_usage + expr: 100 - (avg by (instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) + - record: node_memory_usage + expr: (node_memory_MemTotal_bytes - node_memory_MemFree_bytes - node_memory_Buffers_bytes - node_memory_Cached_bytes) / node_memory_MemTotal_bytes * 100 + - record: node_disk_usage + expr: 100 - (avg by (instance) (node_filesystem_avail_bytes{fstype="ext4"} / node_filesystem_size_bytes{fstype="ext4"}) * 100) +groups: +- name: 实例存活告警规则 + rules: + - alert: 实例存活告警 + expr: up == 0 + for: 1m + labels: + user: prometheus + severity: warning + annotations: + summary: "主机宕机 !!!" + description: "该实例主机已经宕机超过一分钟了。" +- name: 内存报警规则 + rules: + - alert: 内存使用率告警 + expr: (1 - (node_memory_MemAvailable_bytes / (node_memory_MemTotal_bytes))) * 100 > 50 + for: 1m + labels: + severity: warning + annotations: + summary: "服务器可用内存不足。" + description: "内存使用率已超过50%(当前值:{{ $value }}%)" +- name: CPU报警规则 + rules: + - alert: CPU使用率告警 + expr: 100 - (avg by (instance)(irate(node_cpu_seconds_total{mode="idle"}[1m]) )) * 100 > 50 + for: 1m + labels: + severity: warning + annotations: + summary: "CPU使用率正在飙升。" + description: "CPU使用率超过50%(当前值:{{ $value }}%)" +- name: 磁盘使用率报警规则 + rules: + - alert: 磁盘使用率告警 + expr: 100 - node_filesystem_free_bytes{fstype=~"xfs|ext4"} / node_filesystem_size_bytes{fstype=~"xfs|ext4"} * 100 > 80 + for: 20m + labels: + severity: warning + annotations: + summary: "硬盘分区使用率过高" + description: "分区使用大于80%(当前值:{{ $value }}%)" diff --git a/roles/vhosts/alicloud_dns_record/defaults/main.yml b/roles/vhosts/alicloud_dns_record/defaults/main.yml new file mode 100644 index 0000000..22aacd1 --- /dev/null +++ b/roles/vhosts/alicloud_dns_record/defaults/main.yml @@ -0,0 +1,17 @@ +--- +- name: Ensure Alicloud DNS Record + alicloud_dns_record: + state: present + domain: "{{ alicloud_dns_domain }}" + rr: "{{ alicloud_dns_rr }}" + type: "{{ alicloud_dns_type }}" + value: "{{ alicloud_dns_value }}" + ttl: "{{ alicloud_dns_ttl }}" + priority: "{{ alicloud_dns_priority }}" + access_key_id: "{{ alicloud_access_key }}" + access_key_secret: "{{ alicloud_secret_key }}" + security_token: "{{ alicloud_security_token }}" + register: dns_result + +- debug: + var: dns_result diff --git a/roles/vhosts/alicloud_dns_record/library/alicloud_dns_record.py b/roles/vhosts/alicloud_dns_record/library/alicloud_dns_record.py new file mode 100644 index 0000000..0f48968 --- /dev/null +++ b/roles/vhosts/alicloud_dns_record/library/alicloud_dns_record.py @@ -0,0 +1,167 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +from ansible.module_utils.basic import AnsibleModule + +from alibabacloud_alidns20150109.client import Client as Alidns20150109Client +from alibabacloud_credentials.client import Client as CredentialClient +from alibabacloud_tea_openapi import models as open_api_models +from alibabacloud_tea_util import models as util_models +from alibabacloud_alidns20150109 import models as alidns_models + + +# Build Client (AK/SK 优先 → STS → Credential Chain) +def create_client(access_key_id=None, access_key_secret=None, security_token=None): + if access_key_id and access_key_secret: + config = open_api_models.Config( + access_key_id=access_key_id, + access_key_secret=access_key_secret, + security_token=security_token + ) + config.endpoint = "alidns.aliyuncs.com" + return Alidns20150109Client(config) + + credential = CredentialClient() + config = open_api_models.Config(credential=credential) + config.endpoint = "alidns.aliyuncs.com" + return Alidns20150109Client(config) + + +# Helper: find existing record +def find_record(client, domain, rr, record_type): + req = alidns_models.DescribeDomainRecordsRequest( + domain_name=domain, + rr_key_word=rr, + type_key_word=record_type, + page_size=100 + ) + resp = client.describe_domain_records_with_options( + req, util_models.RuntimeOptions() + ) + records = resp.body.domain_records.record or [] + + for r in records: + if r.rr == rr and r.type == record_type: + return r + + return None + + +def main(): + module = AnsibleModule( + argument_spec=dict( + state=dict(type='str', choices=['present', 'absent'], default='present'), + domain=dict(type='str', required=True), + rr=dict(type='str', required=True), + type=dict(type='str', required=True), + value=dict(type='str'), + ttl=dict(type='int', default=600), + priority=dict(type='int'), + + # 支持 AK/SK + access_key_id=dict(type='str', no_log=True), + access_key_secret=dict(type='str', no_log=True), + security_token=dict(type='str', no_log=True), + ), + supports_check_mode=True + ) + + state = module.params["state"] + domain = module.params["domain"] + rr = module.params["rr"] + record_type = module.params["type"] + value = module.params["value"] + ttl = module.params["ttl"] + priority = module.params["priority"] + + access_key_id = module.params["access_key_id"] + access_key_secret = module.params["access_key_secret"] + security_token = module.params["security_token"] + + client = create_client(access_key_id, access_key_secret, security_token) + + # Find record + try: + existing = find_record(client, domain, rr, record_type) + except Exception as e: + module.fail_json(msg=f"Failed to query DNS records: {e}") + + # ---------------------------- + # ABSENT (delete) + # ---------------------------- + if state == "absent": + if not existing: + module.exit_json(changed=False, msg="Record already absent") + + if module.check_mode: + module.exit_json(changed=True) + + try: + req = alidns_models.DeleteDomainRecordRequest( + record_id=existing.record_id + ) + client.delete_domain_record_with_options(req, util_models.RuntimeOptions()) + except Exception as e: + module.fail_json(msg=f"Failed to delete record: {e}") + + module.exit_json(changed=True, msg="Record deleted", record_id=existing.record_id) + + # ---------------------------- + # PRESENT (create / update) + # ---------------------------- + if not value: + module.fail_json(msg="value is required when state=present") + + if existing: + need_update = ( + existing.value != value or + existing.ttl != ttl or + (priority is not None and existing.priority != priority) + ) + + if not need_update: + module.exit_json(changed=False, msg="Record already up to date", record_id=existing.record_id) + + if module.check_mode: + module.exit_json(changed=True) + + try: + req = alidns_models.UpdateDomainRecordRequest( + record_id=existing.record_id, + rr=rr, + type=record_type, + value=value, + ttl=ttl, + priority=priority, + ) + client.update_domain_record_with_options(req, util_models.RuntimeOptions()) + except Exception as e: + module.fail_json(msg=f"Failed to update record: {e}") + + module.exit_json(changed=True, msg="Record updated", record_id=existing.record_id) + + # ---------------------------- + # CREATE + # ---------------------------- + if module.check_mode: + module.exit_json(changed=True) + + try: + req = alidns_models.AddDomainRecordRequest( + domain_name=domain, + rr=rr, + type=record_type, + value=value, + ttl=ttl, + priority=priority, + ) + resp = client.add_domain_record_with_options(req, util_models.RuntimeOptions()) + record_id = resp.body.record_id + except Exception as e: + module.fail_json(msg=f"Failed to create record: {e}") + + module.exit_json(changed=True, msg="Record created", record_id=record_id) + + +if __name__ == "__main__": + main() diff --git a/roles/vhosts/alicloud_dns_record/tasks/main.yml b/roles/vhosts/alicloud_dns_record/tasks/main.yml new file mode 100644 index 0000000..777570e --- /dev/null +++ b/roles/vhosts/alicloud_dns_record/tasks/main.yml @@ -0,0 +1,18 @@ +--- +- name: Ensure Alicloud DNS Record + alicloud_dns_record: + state: present + domain: "{{ alicloud_dns_domain }}" + rr: "{{ alicloud_dns_rr }}" + type: "{{ alicloud_dns_type }}" + value: "{{ alicloud_dns_value }}" + ttl: "{{ alicloud_dns_ttl }}" + priority: "{{ alicloud_dns_priority }}" + access_key_id: "{{ alicloud_access_key }}" + access_key_secret: "{{ alicloud_secret_key }}" + security_token: "{{ alicloud_security_token }}" + register: dns_result + +- debug: + var: dns_result + diff --git a/roles/vhosts/alicloud_dns_sync/defaults/main.yml b/roles/vhosts/alicloud_dns_sync/defaults/main.yml new file mode 100644 index 0000000..b018f88 --- /dev/null +++ b/roles/vhosts/alicloud_dns_sync/defaults/main.yml @@ -0,0 +1,7 @@ +--- +alicloud_dns_sync_domain: "" +alicloud_dns_sync_records: [] +alicloud_dns_sync_output: "/tmp/dns_records.yaml" + +alicloud_access_key: "" +alicloud_secret_key: "" diff --git a/roles/vhosts/alicloud_dns_sync/files/dns_sync.py b/roles/vhosts/alicloud_dns_sync/files/dns_sync.py new file mode 100644 index 0000000..0e9c3c1 --- /dev/null +++ b/roles/vhosts/alicloud_dns_sync/files/dns_sync.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +import sys +import yaml +from alibabacloud_alidns20150109.client import Client +from alibabacloud_tea_openapi import models as open_api_models + + +def client(ak, sk): + config = open_api_models.Config( + access_key_id=ak, + access_key_secret=sk, + endpoint="alidns.aliyuncs.com", + ) + return Client(config) + + +def sync(domain, records, ak, sk): + c = client(ak, sk) + + # get all existing records + resp = c.describe_domain_records( + open_api_models.Config(domain_name=domain) + ) + existing = { (i.rr, i.type): i for i in resp.body.domain_records.record } + + for rec in records: + key = (rec["rr"], rec["type"]) + ttl = rec.get("ttl", 600) + + if key not in existing: + print("CREATE:", rec) + c.add_domain_record({ + "DomainName": domain, + "RR": rec["rr"], + "Type": rec["type"], + "Value": rec["value"], + "TTL": ttl, + }) + else: + cur = existing[key] + if cur.value != rec["value"] or cur.ttl != ttl: + print("UPDATE:", rec) + c.update_domain_record({ + "RecordId": cur.record_id, + "RR": rec["rr"], + "Type": rec["type"], + "Value": rec["value"], + "TTL": ttl, + }) + + +if __name__ == "__main__": + fn = sys.argv[1] + ak = sys.argv[2] + sk = sys.argv[3] + + cfg = yaml.safe_load(open(fn)) + for domain, recs in cfg.items(): + sync(domain, recs, ak, sk) diff --git a/roles/vhosts/alicloud_dns_sync/tasks/main.yaml b/roles/vhosts/alicloud_dns_sync/tasks/main.yaml new file mode 100644 index 0000000..88b8658 --- /dev/null +++ b/roles/vhosts/alicloud_dns_sync/tasks/main.yaml @@ -0,0 +1,18 @@ +--- +- name: Generate DNS records file from template + template: + src: dns_records.yaml.j2 + dest: "{{ alicloud_dns_sync_output }}" + +- name: Upload dns_sync.py + copy: + src: dns_sync.py + dest: /tmp/dns_sync.py + mode: '0755' + +- name: Sync DNS records + command: > + python3 /tmp/dns_sync.py + {{ alicloud_dns_sync_output }} + {{ alicloud_access_key }} + {{ alicloud_secret_key }} diff --git a/roles/vhosts/alicloud_dns_sync/templates/dns_records.yaml.j2 b/roles/vhosts/alicloud_dns_sync/templates/dns_records.yaml.j2 new file mode 100644 index 0000000..f6abbf4 --- /dev/null +++ b/roles/vhosts/alicloud_dns_sync/templates/dns_records.yaml.j2 @@ -0,0 +1,7 @@ +{{ alicloud_dns_sync_domain }}: +{% for rec in alicloud_dns_sync_records %} + - rr: "{{ rec.rr }}" + type: "{{ rec.type }}" + value: "{{ rec.value }}" + ttl: {{ rec.ttl | default(600) }} +{% endfor %} diff --git a/roles/vhosts/alloy/defaults/main.yml b/roles/vhosts/alloy/defaults/main.yml new file mode 100644 index 0000000..85aab39 --- /dev/null +++ b/roles/vhosts/alloy/defaults/main.yml @@ -0,0 +1,23 @@ +# Default variables for alloy +alloy_config_path: "/etc/alloy/config.alloy" + +grafana_gpg_key_url: "https://apt.grafana.com/gpg.key" +grafana_apt_source: "deb [signed-by=/etc/apt/keyrings/grafana.gpg] https://apt.grafana.com stable main" + +loki_endpoint_url: "https://logs-prod-030.grafana.net/loki/api/v1/push" +loki_basic_auth_username: "965018" +loki_basic_auth_password: !vault | + $ANSIBLE_VAULT;1.1;AES256 + 35313466643636366632313038386338303535616334306134663261333237646231653965626333 + 3032306662663236363139653863623263623034363264630a373766353630326131376335386463 + 33363363306539303264346230366239303636366130636233333536646537613932393961343635 + 6266653566616331310a633861303963643237366362656139303232323066323665623130326163 + 31316366626161396636343966363263313637383665633037323666323266633062653966333566 + 61306664653334366331393265363265383832393361613663633138383730613666623038616531 + 35333031613765616562323663613333313464343732663930356337343836396133363265376666 + 33343836633465356330663266623838646461613633313031343232613066356336386665663165 + 65366530613966373466323934303466336537323433396135623933383239393530333762633963 + 36356562303361396332633166386236666265326339313731666632646335336136373931313131 + 65626231616233333061396135383334623030643532636335656262376464383039303834363938 + 63636466386266643234386536336164353138643839393934393464623037306636613964376462 + 38386466643633643036626239626438393762326434643563383237343762626139 diff --git a/roles/vhosts/alloy/files/loki_journal_sources_gateway.yml b/roles/vhosts/alloy/files/loki_journal_sources_gateway.yml new file mode 100644 index 0000000..ac3af8b --- /dev/null +++ b/roles/vhosts/alloy/files/loki_journal_sources_gateway.yml @@ -0,0 +1,5 @@ +loki_journal_sources: + - name: "xray" + unit: "xray.service" + - name: "xray_tproxy" + unit: "xray-tproxy.service" diff --git a/roles/vhosts/alloy/files/loki_journal_sources_k3s_agent.yml b/roles/vhosts/alloy/files/loki_journal_sources_k3s_agent.yml new file mode 100644 index 0000000..40ade11 --- /dev/null +++ b/roles/vhosts/alloy/files/loki_journal_sources_k3s_agent.yml @@ -0,0 +1,3 @@ +loki_journal_sources: + - name: "k3s_agent" + unit: "k3s-agent.service" diff --git a/roles/vhosts/alloy/files/loki_journal_sources_k3s_server.yml b/roles/vhosts/alloy/files/loki_journal_sources_k3s_server.yml new file mode 100644 index 0000000..63c41af --- /dev/null +++ b/roles/vhosts/alloy/files/loki_journal_sources_k3s_server.yml @@ -0,0 +1,3 @@ +loki_journal_sources: + - name: "k3s" + unit: "k3s.service" diff --git a/roles/vhosts/alloy/files/loki_journal_sources_postgresql.yml b/roles/vhosts/alloy/files/loki_journal_sources_postgresql.yml new file mode 100644 index 0000000..eea425c --- /dev/null +++ b/roles/vhosts/alloy/files/loki_journal_sources_postgresql.yml @@ -0,0 +1,3 @@ +loki_journal_sources: + - name: "postgresql" + unit: "postgresql.service" diff --git a/roles/vhosts/alloy/files/loki_journal_sources_vpn.yml b/roles/vhosts/alloy/files/loki_journal_sources_vpn.yml new file mode 100644 index 0000000..9298616 --- /dev/null +++ b/roles/vhosts/alloy/files/loki_journal_sources_vpn.yml @@ -0,0 +1,3 @@ +loki_journal_sources: + - name: "vpn" + unit: "wg-quick@wg0" diff --git a/roles/vhosts/alloy/tasks/main.yml b/roles/vhosts/alloy/tasks/main.yml new file mode 100644 index 0000000..23f680e --- /dev/null +++ b/roles/vhosts/alloy/tasks/main.yml @@ -0,0 +1,55 @@ +--- +- name: Install GPG + apt: + name: gpg + state: present + +- name: Create APT keyrings directory + file: + path: /etc/apt/keyrings/ + state: directory + mode: '0755' + become: yes + +- name: Add Grafana GPG key + shell: | + wget -q -O - {{ grafana_gpg_key_url }} | gpg --dearmor | sudo tee /etc/apt/keyrings/grafana.gpg > /dev/null + become: yes + +- name: Check if Grafana GPG key file is not empty + ansible.builtin.stat: + path: /etc/apt/keyrings/grafana.gpg + register: grafana_gpg_key_stat + +- name: Fail if Grafana GPG key file is empty + ansible.builtin.fail: + msg: "The Grafana GPG key file is empty or does not exist." + when: grafana_gpg_key_stat.stat.size == 0 + +- name: Add Grafana APT source + shell: | + echo "deb [signed-by=/etc/apt/keyrings/grafana.gpg] https://apt.grafana.com stable main" | sudo tee /etc/apt/sources.list.d/grafana.list + become: yes + +- name: Update APT package list and install Grafana Alloy + apt: + name: alloy + state: present + update_cache: yes + +- name: Create Alloy configuration directory + file: + path: /etc/alloy + state: directory + mode: '0770' + +- name: Create Alloy configuration file + template: + src: templates/config.alloy.j2 + dest: "/etc/alloy/config.alloy" + +- name: Reload and restart Alloy service + systemd: + name: alloy + state: restarted + daemon_reload: yes diff --git a/roles/vhosts/alloy/templates/config.alloy.j2 b/roles/vhosts/alloy/templates/config.alloy.j2 new file mode 100644 index 0000000..547c961 --- /dev/null +++ b/roles/vhosts/alloy/templates/config.alloy.j2 @@ -0,0 +1,19 @@ +loki.write "grafanacloud" { + endpoint { + url = "{{ loki_endpoint_url }}" + + basic_auth { + username = "{{ loki_basic_auth_username }}" + password = "{{ loki_basic_auth_password }}" + } + } +} + +{% for source in loki_journal_sources %} +loki.source.journal "{{ source.name }}" { + format_as_json = true + labels = {job = "{{ source.name }}"} + matches = "_SYSTEMD_UNIT={{ source.unit }}" + forward_to = [loki.write.grafanacloud.receiver] +} +{% endfor %} diff --git a/roles/vhosts/blackbox_exporter/defaults/main.yml b/roles/vhosts/blackbox_exporter/defaults/main.yml new file mode 100644 index 0000000..21e17c4 --- /dev/null +++ b/roles/vhosts/blackbox_exporter/defaults/main.yml @@ -0,0 +1,15 @@ +--- +blackbox_version: "0.27.0" +blackbox_user: "blackbox" +blackbox_bin: "/usr/local/bin/blackbox_exporter" +blackbox_conf_dir: "/opt/blackbox" +blackbox_conf_file: "/opt/blackbox/blackbox.yml" +blackbox_listen: ":9115" +blackbox_arch_map: + x86_64: linux-amd64 + amd64: linux-amd64 + aarch64: linux-arm64 + arm64: linux-arm64 +blackbox_download_base_url: "https://dl.svc.plus/prometheus/blackbox_exporter" +blackbox_tmp_dir: "/tmp" +dl_business_host: "dl.svc.plus" diff --git a/roles/vhosts/blackbox_exporter/handlers/main.yml b/roles/vhosts/blackbox_exporter/handlers/main.yml new file mode 100644 index 0000000..ec79359 --- /dev/null +++ b/roles/vhosts/blackbox_exporter/handlers/main.yml @@ -0,0 +1,9 @@ +--- +- name: Daemon reload + ansible.builtin.systemd: + daemon_reload: true + +- name: Restart blackbox + ansible.builtin.systemd: + name: blackbox + state: restarted diff --git a/roles/vhosts/blackbox_exporter/meta/main.yml b/roles/vhosts/blackbox_exporter/meta/main.yml new file mode 100644 index 0000000..23d65c7 --- /dev/null +++ b/roles/vhosts/blackbox_exporter/meta/main.yml @@ -0,0 +1,2 @@ +--- +dependencies: [] diff --git a/roles/vhosts/blackbox_exporter/tasks/main.yml b/roles/vhosts/blackbox_exporter/tasks/main.yml new file mode 100644 index 0000000..1945bba --- /dev/null +++ b/roles/vhosts/blackbox_exporter/tasks/main.yml @@ -0,0 +1,72 @@ +--- +- name: Determine archive architecture + ansible.builtin.set_fact: + blackbox_archive_arch: "{{ (blackbox_arch_map | default({})).get((ansible_architecture | lower), 'linux-amd64') }}" + +- name: Compute archive information + ansible.builtin.set_fact: + blackbox_archive_name: "blackbox_exporter-{{ blackbox_version }}.{{ blackbox_archive_arch }}" + blackbox_archive_file: "{{ blackbox_tmp_dir }}/blackbox_exporter-{{ blackbox_version }}.{{ blackbox_archive_arch }}.tar.gz" + blackbox_archive_url: "{{ blackbox_download_base_url }}/{{ blackbox_version }}/blackbox_exporter-{{ blackbox_version }}.{{ blackbox_archive_arch }}.tar.gz" + +- name: Ensure user exists + ansible.builtin.user: + name: "{{ blackbox_user }}" + shell: /usr/sbin/nologin + system: true + create_home: false + +- name: Create config dir + ansible.builtin.file: + path: "{{ blackbox_conf_dir }}" + state: directory + owner: "{{ blackbox_user }}" + group: "{{ blackbox_user }}" + mode: "0750" + +- name: Download blackbox_exporter tarball + ansible.builtin.get_url: + url: "{{ blackbox_archive_url }}" + dest: "{{ blackbox_archive_file }}" + mode: "0644" + +- name: Unpack archive + ansible.builtin.unarchive: + src: "{{ blackbox_archive_file }}" + dest: "{{ blackbox_tmp_dir }}" + remote_src: true + register: unpack + +- name: Install binary + ansible.builtin.copy: + src: "{{ blackbox_tmp_dir }}/{{ blackbox_archive_name }}/blackbox_exporter" + dest: "{{ blackbox_bin }}" + mode: "0755" + owner: "root" + group: "root" + remote_src: true + when: unpack is changed + +- name: Render blackbox configuration + ansible.builtin.template: + src: "blackbox.yml.j2" + dest: "{{ blackbox_conf_file }}" + owner: "{{ blackbox_user }}" + group: "{{ blackbox_user }}" + mode: "0640" + notify: Restart blackbox + +- name: Install systemd unit + ansible.builtin.template: + src: "blackbox.service.j2" + dest: "/etc/systemd/system/blackbox.service" + mode: "0644" + notify: + - Daemon reload + - Restart blackbox + +- name: Enable and start service + ansible.builtin.systemd: + name: blackbox + enabled: true + state: started diff --git a/roles/vhosts/blackbox_exporter/templates/blackbox.service.j2 b/roles/vhosts/blackbox_exporter/templates/blackbox.service.j2 new file mode 100644 index 0000000..45d31b1 --- /dev/null +++ b/roles/vhosts/blackbox_exporter/templates/blackbox.service.j2 @@ -0,0 +1,16 @@ +[Unit] +Description=Prometheus Blackbox Exporter +After=network-online.target +Wants=network-online.target + +[Service] +User={{ blackbox_user }} +Group={{ blackbox_user }} +ExecStart={{ blackbox_bin }} \ + --config.file={{ blackbox_conf_file }} \ + --web.listen-address={{ blackbox_listen }} +Restart=always +RestartSec=3 + +[Install] +WantedBy=multi-user.target diff --git a/roles/vhosts/blackbox_exporter/templates/blackbox.yml.j2 b/roles/vhosts/blackbox_exporter/templates/blackbox.yml.j2 new file mode 100644 index 0000000..88782d2 --- /dev/null +++ b/roles/vhosts/blackbox_exporter/templates/blackbox.yml.j2 @@ -0,0 +1,15 @@ +modules: + http_dl_download: + prober: http + timeout: 5s + http: + method: GET + follow_redirects: true + valid_status_codes: [200, 302] + headers: + Host: {{ dl_business_host }} + tls_config: + server_name: {{ dl_business_host }} + insecure_skip_verify: false + # fail_if_body_not_matches_regexp: + # - "(?i)\\.zip|\\.tar\\.gz|manifest" diff --git a/roles/vhosts/cert-manager/files/certs_automated_issuance.sh b/roles/vhosts/cert-manager/files/certs_automated_issuance.sh new file mode 100644 index 0000000..4e04058 --- /dev/null +++ b/roles/vhosts/cert-manager/files/certs_automated_issuance.sh @@ -0,0 +1,32 @@ +#!/bin/bash + + +#!/bin/bash +set -x +export domain=$1 +export Ali_Key=$2 +export Ali_Secret=$3 + +rm -fv ${domain}.key ${domain}.pem -f +rm -fv /etc/ssl/${domain}.* -f + +# Try to issue a certificate from ZeroSSL. If it fails, try Let's Encrypt. + +curl https://get.acme.sh | sh -s email=156405189@qq.com +sh ~/.acme.sh/acme.sh --set-default-ca --server zerossl --issue --force --dns dns_ali -d ${domain} -d "*.${domain}" +if [ $? -eq 0 ]; then + echo "Certificate from letsencrypt successfully issued" +else + sh ~/.acme.sh/acme.sh --set-default-ca --server letsencrypt --issue --force --dns dns_ali -d ${domain} -d "*.${domain}" + if [ $? -eq 0 ]; then + echo "Certificate from zerossl successfully issued" + else + echo "Command failed" + exit 1 + fi +fi + +cat ~/.acme.sh/${domain}_ecc/${domain}.cer > ${domain}.pem +cat ~/.acme.sh/${domain}_ecc/ca.cer >> ${domain}.pem +cat ~/.acme.sh/${domain}_ecc/${domain}.key > ${domain}.key +sudo cp ${domain}.pem /etc/ssl/ -f && sudo cp ${domain}.key /etc/ssl/ -f diff --git a/roles/vhosts/cert-manager/files/fetch_certs_from_vault.py b/roles/vhosts/cert-manager/files/fetch_certs_from_vault.py new file mode 100644 index 0000000..f18ceb2 --- /dev/null +++ b/roles/vhosts/cert-manager/files/fetch_certs_from_vault.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 + +import hvac +import os +import shutil +from datetime import datetime + +# Set your Vault configurations +vault_url = "{{ vault_url }}" +vault_token = "{{ vault_token }}" +vault_secret_path = "{{ vault_secret_path }}" +domain = "{{ domain }}" + +# Connect to Vault +client = hvac.Client(url=vault_url, token=vault_token) + +# Fetch Certificate and Private Key from Vault +vault_result = client.read(vault_secret_path) + +if vault_result and 'data' in vault_result: + certificate = vault_result['data'].get('certificate', '') + private_key = vault_result['data'].get('private_key', '') + + # Paths for certificate and private key files + cert_path = f"/etc/ssl/{domain}.pem" + key_path = f"/etc/ssl/{domain}.key" + + # Check if files already exist + cert_exists = os.path.exists(cert_path) + key_exists = os.path.exists(key_path) + + # Backup existing files with timestamp + backup_dir = "/opt/bak/" + timestamp = datetime.now().strftime("%Y%m%d%H%M%S") + + if cert_exists: + backup_cert_path = f"{backup_dir}{domain}_{timestamp}.pem" + shutil.move(cert_path, backup_cert_path) + print(f"Backup created: {backup_cert_path}") + + if key_exists: + backup_key_path = f"{backup_dir}{domain}_{timestamp}.key" + shutil.move(key_path, backup_key_path) + print(f"Backup created: {backup_key_path}") + + # Write Certificate to File (force overwrite) + with open(cert_path, 'w') as cert_file: + cert_file.write(certificate) + + # Write Private Key to File (force overwrite) + with open(key_path, 'w') as key_file: + key_file.write(private_key) + + # Set file permissions + os.chmod(cert_path, 0o644) + os.chown(cert_path, 0, 0) + + os.chmod(key_path, 0o600) + os.chown(key_path, 0, 0) + + if cert_exists: + print(f"Certificate updated: {cert_path}") + else: + print(f"Certificate written: {cert_path}") + + if key_exists: + print(f"Private key updated: {key_path}") + else: + print(f"Private key written: {key_path}") +else: + print("Failed to fetch certificate and private key from Vault.") diff --git a/roles/vhosts/cert-manager/files/get_certificate.sh b/roles/vhosts/cert-manager/files/get_certificate.sh new file mode 100644 index 0000000..4195c04 --- /dev/null +++ b/roles/vhosts/cert-manager/files/get_certificate.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +check_empty() { + if [ -z "$1" ]; then + echo "$2" + exit 1 + fi +} + +check_empty "$1" "Please provide DOMAIN" && export DOMAIN=$1 +check_empty "$2" "Please provide VAULT_ADDR" && export VAULT_ADDR=$2 +check_empty "$3" "Please provide VAULT_TOKEN" && export VAULT_TOKEN=$3 + +SECRET_PATH="certs/$DOMAIN" + +# Output paths +CERTIFICATE_PATH="/etc/ssl/${DOMAIN}.pem" +PRIVATE_KEY_PATH="/etc/ssl/${DOMAIN}.key" + +vault login -address=$VAULT_ADDR $VAULT_TOKEN +# Read certificate from Vault +vault kv get -field=certificate certs/${DOMAIN} > "$CERTIFICATE_PATH" +# Read private key from Vault +vault kv get -field=private_key certs/${DOMAIN} > "$PRIVATE_KEY_PATH" + +# Set permissions for the private key (modify as needed) +chmod 600 "$PRIVATE_KEY_PATH" + +# Check if certificate and private key files are non-empty +if [ ! -s "$CERTIFICATE_PATH" ] || [ ! -s "$PRIVATE_KEY_PATH" ]; then + echo "Certificate or private key is empty. Exiting..." + exit 1 +else + echo "Certificate and private key have been written to $CERTIFICATE_PATH and $PRIVATE_KEY_PATH" +fi diff --git a/roles/vhosts/cert-manager/files/update-certs-secret.sh b/roles/vhosts/cert-manager/files/update-certs-secret.sh new file mode 100644 index 0000000..d7af429 --- /dev/null +++ b/roles/vhosts/cert-manager/files/update-certs-secret.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +export secret=$1 +export key_file=$2 +export cert_file=$3 +export namespace=$4 + +kubectl create namespace $namespace || echo true +kubectl delete secret tls $secret -n $namespace || echo true +kubectl create secret tls $secret --cert=$cert_file --key=$key_file -n $namespace diff --git a/roles/vhosts/cert-manager/meta/main.yml b/roles/vhosts/cert-manager/meta/main.yml new file mode 100644 index 0000000..9711b33 --- /dev/null +++ b/roles/vhosts/cert-manager/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: common diff --git a/roles/vhosts/cert-manager/tasks/main.yml b/roles/vhosts/cert-manager/tasks/main.yml new file mode 100755 index 0000000..a6a9fca --- /dev/null +++ b/roles/vhosts/cert-manager/tasks/main.yml @@ -0,0 +1,7 @@ +- name: Fetch Certificate and Private Key from Vault + script: files/get_certificate.sh {{ domain }} {{ vault_url }} {{ vault_token }} + when: (inventory_hostname in groups[group]) and ( cert_issuance == 'vault' ) + +- name: certs automated issuance + script: files/certs_automated_issuance.sh {{ domain }} {{ dns_ak }} {{ dns_sk }} + when: (inventory_hostname in groups[group]) and ( cert_issuance == 'auto' ) diff --git a/roles/vhosts/cert-manager/vars/main.yml b/roles/vhosts/cert-manager/vars/main.yml new file mode 100644 index 0000000..e86c3cd --- /dev/null +++ b/roles/vhosts/cert-manager/vars/main.yml @@ -0,0 +1,2 @@ +group: master +cert_issuance: vault diff --git a/roles/vhosts/chasquid/handlers/main.yml b/roles/vhosts/chasquid/handlers/main.yml new file mode 100644 index 0000000..5335fe4 --- /dev/null +++ b/roles/vhosts/chasquid/handlers/main.yml @@ -0,0 +1,19 @@ +--- +- name: Restart chasquid + systemd: + name: chasquid + state: restarted + +- name: Reload chasquid + systemd: + name: chasquid + state: reloaded + +- name: Reload systemd daemon + systemd: + daemon_reload: yes + +- name: Reboot system + reboot: + reboot_timeout: 600 + msg: "Rebooting system after IPv6 configuration changes" diff --git a/roles/vhosts/chasquid/tasks/main.yml b/roles/vhosts/chasquid/tasks/main.yml new file mode 100644 index 0000000..383f758 --- /dev/null +++ b/roles/vhosts/chasquid/tasks/main.yml @@ -0,0 +1,166 @@ +--- +- name: Update apt cache + apt: + update_cache: yes + cache_valid_time: 3600 + +- name: Install required packages + apt: + name: + - chasquid + - swaks + - dovecot-core + - dovecot-imapd + - openssl + state: present + +- name: Create support user + user: + name: support + home: /home/support + shell: /usr/sbin/nologin + system: yes + createhome: yes + +- name: Add chasquid user to mail group + user: + name: chasquid + groups: mail + append: yes + +- name: Set capability on chasquid binary + command: setcap 'cap_net_bind_service=+ep' /usr/bin/chasquid + args: + creates: /usr/bin/chasquid + +- name: Disable chasquid socket services + systemd: + name: "{{ item }}" + state: stopped + enabled: no + loop: + - chasquid.socket + - chasquid-smtp.socket + - chasquid-submission.socket + - chasquid-submission_tls.socket + +- name: Disable IPv6 globally + sysctl: + name: "{{ item.name }}" + value: 1 + sysctl_file: /etc/sysctl.conf + state: present + reload: yes + loop: + - { name: 'net.ipv6.conf.all.disable_ipv6' } + - { name: 'net.ipv6.conf.default.disable_ipv6' } + notify: Reboot system + +- name: Create necessary directories + file: + path: "{{ item }}" + state: directory + mode: '0755' + owner: chasquid + group: chasquid + loop: + - "{{ CONFIG_DIR }}/domains/{{ DOMAIN }}/certs" + - "{{ DATA_DIR }}" + - "/var/mail" + +- name: Generate DKIM key + openssl_privatekey: + path: "{{ DKIM_KEY }}" + size: 1024 + type: RSA + notify: Restart chasquid + +- name: Extract DKIM public key + command: "openssl rsa -in {{ DKIM_KEY }} -pubout" + register: dkim_pub_result + changed_when: false + +- name: Save DKIM public key + copy: + content: "{{ dkim_pub_result.stdout }}" + dest: "{{ DKIM_PUB }}" + mode: '0644' + owner: chasquid + group: chasquid + notify: Restart chasquid + +- name: Create chasquid configuration + template: + src: chasquid.conf.j2 + dest: "{{ CONFIG_DIR }}/chasquid.conf" + mode: '0644' + owner: chasquid + group: chasquid + notify: Restart chasquid + +- name: Copy TLS certificates + copy: + src: "{{ CERT_PEM }}" + dest: "{{ CONFIG_DIR }}/domains/{{ DOMAIN }}/certs/fullchain.pem" + remote_src: yes + mode: '0640' + owner: root + group: chasquid + notify: Restart chasquid + ignore_errors: "{{ ignore_cert_missing | default(false) }}" + +- name: Copy TLS private key + copy: + src: "{{ CERT_KEY }}" + dest: "{{ CONFIG_DIR }}/domains/{{ DOMAIN }}/certs/key.pem" + remote_src: yes + mode: '0640' + owner: root + group: chasquid + notify: Restart chasquid + ignore_errors: "{{ ignore_cert_missing | default(false) }}" + +- name: Add chasquid user + command: "printf '%s\n%s\n' {{ USER_PASSWORD }} {{ USER_PASSWORD }} | chasquid-util user-add {{ USER }}@{{ DOMAIN }}" + args: + creates: "{{ CONFIG_DIR }}/users/{{ DOMAIN }}/{{ USER }}" + notify: Restart chasquid + +- name: Create custom chasquid systemd service file + template: + src: chasquid.service.j2 + dest: /etc/systemd/system/chasquid.service + mode: '0644' + owner: root + group: root + notify: + - Reload systemd daemon + - Restart chasquid + +- name: Enable and start chasquid service + systemd: + name: chasquid + state: started + enabled: yes + daemon_reload: yes + +- name: Display DNS records information + debug: + msg: | + 🔧 DNS records for {{ DOMAIN }} ({{ IP }}) + + A smtp.{{ DOMAIN }} {{ IP }} + MX {{ DOMAIN }} smtp.{{ DOMAIN }} + TXT {{ DOMAIN }} "v=spf1 include:smtp.{{ DOMAIN }} ~all" + TXT default._domainkey.{{ DOMAIN }} "v=DKIM1; k=rsa; p=" + TXT _dmarc.{{ DOMAIN }} "v=DMARC1; p=quarantine; rua=mailto:dmarc@{{ DOMAIN }}" + +- name: Test email sending with swaks + command: "swaks --server {{ SMTP_SERVER }} --port {{ SMTP_PORT }} --tls --auth PLAIN --auth-user {{ EMAIL_USER }} --auth-password {{ EMAIL_PASSWORD }} --from {{ EMAIL_FROM }} --to {{ EMAIL_TO }}" + register: swaks_result + changed_when: false + failed_when: false + +- name: Display test result + debug: + var: swaks_result diff --git a/roles/vhosts/chasquid/templates/chasquid.conf.j2 b/roles/vhosts/chasquid/templates/chasquid.conf.j2 new file mode 100644 index 0000000..2e6c3de --- /dev/null +++ b/roles/vhosts/chasquid/templates/chasquid.conf.j2 @@ -0,0 +1,16 @@ +hostname: "{{ HOSTNAME }}" +max_data_size_mb: 50 +submission_address: ":587" +submission_over_tls_address: ":465" +monitoring_address: "127.0.0.1:1099" +data_dir: "{{ DATA_DIR }}" +suffix_separators: "+" +dovecot_auth: true + +domain { + name: "{{ DOMAIN }}" + dkim_key: "{{ DKIM_KEY }}" + maildir_base: "/var/mail" +} + +smtp_address: [] diff --git a/roles/vhosts/chasquid/templates/chasquid.service.j2 b/roles/vhosts/chasquid/templates/chasquid.service.j2 new file mode 100644 index 0000000..366477f --- /dev/null +++ b/roles/vhosts/chasquid/templates/chasquid.service.j2 @@ -0,0 +1,24 @@ +[Unit] +Description=Chasquid SMTP Server (standalone) +After=network.target + +[Service] +Type=simple +ExecStart=/usr/bin/chasquid -config_dir {{ CONFIG_DIR }} +Restart=always +RestartSec=5 +User=chasquid +Group=chasquid + +# 允许绑定低端口 +AmbientCapabilities=CAP_NET_BIND_SERVICE +CapabilityBoundingSet=CAP_NET_BIND_SERVICE + +# 安全限制 +ProtectSystem=full +ProtectHome=yes +PrivateTmp=yes +NoNewPrivileges=yes + +[Install] +WantedBy=multi-user.target diff --git a/roles/vhosts/common/defaults/main.yml b/roles/vhosts/common/defaults/main.yml index aadc9eb..f3130c1 100644 --- a/roles/vhosts/common/defaults/main.yml +++ b/roles/vhosts/common/defaults/main.yml @@ -1,12 +1,11 @@ enable_set_timezone: true # 默认启用 Set timezone enable_set_hostname: true # 默认启用 Set hostname -enable_install_packages: true # 默认不安装额外的软件包 enable_all_hosts_update: false # 默认不更新所有主机的条目 rsyslog_log_rotation: # 可选的日志管理配置 enable: true # 启用 rsyslog 日志管理 rotate_count: 4 # 默认保留的日志文件数量 - rotate_frequency: weekly # 默认每周轮换, 可选:daily, hourly + rotate_frequency: daily # 默认每周轮换, 可选:daily, hourly max_log_size: 100M # 默认日志文件最大大小 journald_log_rotation: # 启用 journald 日志管理 @@ -16,3 +15,51 @@ journald_log_rotation: # 启用 journald 日志管理 max_file_sec: 1month # 默认日志文件保存的最大时长 system_max_use: 1G # 默认系统日志最大使用空间 runtime_max_use: 500M # 默认运行时日志最大使用空间 + +# 总开关 +enable_common: true + +repo: + apt: + enabled: false # 是否配置仓库/keys + enable_universe: false # Ubuntu 是否启用 universe 仓库 + auto_update_cache: true # 是否在 repo_setup 后自动 apt update + key_dir: /etc/apt/keyrings # keyring 目录 + keyrings: [] # 自定义 keyring 定义(由调用方传入) + legacy_paths: # 清理的遗留路径 + - /etc/apt/sources.list.d/hashicorp.sources + - /usr/share/keyrings/hashicorp-archive-keyring.gpg + entries: [] # 声明式仓库定义 + yum: + enabled: false # 预留 RPM 系列配置 + entries: [] + cleanup_paths: [] + +packages: + apt: + enabled: true # 是否安装软件包 + base_dependencies: # APT 前置依赖(避免未规范化源时更新) + - ca-certificates + - gnupg + list: # 要安装的软件包 + - auditd + - uidmap + - fuse-overlayfs + yum: + enabled: true + list: + - audit + - uidmap + - fuse-overlayfs + +packages_cleanup: + enabled: false + ubuntu: + purge: true + list: + - snapd + - resolvconf + - popularity-contest + - apport + - whoopsie + - modemmanager diff --git a/roles/vhosts/common/handlers/main.yml b/roles/vhosts/common/handlers/main.yml index da6f188..4a4bf1d 100644 --- a/roles/vhosts/common/handlers/main.yml +++ b/roles/vhosts/common/handlers/main.yml @@ -8,3 +8,7 @@ service: name: systemd-journald state: restarted + +- name: apt-update-cache + ansible.builtin.apt: + update_cache: true diff --git a/roles/vhosts/common/tasks/addons/gpu.yml b/roles/vhosts/common/tasks/addons/gpu.yml new file mode 100644 index 0000000..804eefd --- /dev/null +++ b/roles/vhosts/common/tasks/addons/gpu.yml @@ -0,0 +1,22 @@ +--- +- name: Addon(GPU) | add NVIDIA repositories + ansible.builtin.shell: | + add-apt-repository -y ppa:graphics-drivers + distribution=$(. /etc/os-release;echo $ID$VERSION_ID) + curl -s -L https://nvidia.github.io/libnvidia-container/gpgkey | apt-key add - + curl -s -L https://nvidia.github.io/libnvidia-container/$distribution/libnvidia-container.list | tee /etc/apt/sources.list.d/nvidia-container-runtime.list + curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | apt-key add - + curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | tee /etc/apt/sources.list.d/nvidia-docker.list + apt-get update + become: true + +- name: Addon(GPU) | install NVIDIA driver and container runtime + ansible.builtin.apt: + name: + - nvidia-modprobe + - nvidia-driver-535 + - nvidia-headless-535 + - nvidia-container-toolkit + state: present + update_cache: yes + become: true diff --git a/roles/vhosts/common/tasks/addons/s3fs.yml b/roles/vhosts/common/tasks/addons/s3fs.yml new file mode 100644 index 0000000..5861689 --- /dev/null +++ b/roles/vhosts/common/tasks/addons/s3fs.yml @@ -0,0 +1,98 @@ +--- +- name: "S3FS | 合并默认配置" + ansible.builtin.set_fact: + normalized_s3fs_config: "{{ { + 'bucket': '', + 'mount_point': '', + 'access_key': '', + 'secret_key': '', + 'url': 'https://s3.amazonaws.com', + 'region': 'us-east-1', + 'passwd_file': '~/.passwd-s3fs', + 'use_path_request_style': true, + 'allow_other': true, + 'nonempty': false + } | combine(s3fs_config | default({}), recursive=True) }}" + +- name: "S3FS | 检查 s3fs 配置" + ansible.builtin.fail: + msg: "S3FS 需要配置 s3fs_config.bucket 和 s3fs_config.mount_point" + when: + - normalized_s3fs_config.bucket | length == 0 or normalized_s3fs_config.mount_point | length == 0 + +- name: "S3FS | 检查 AWS 凭证" + ansible.builtin.fail: + msg: "S3FS 需要配置 s3fs_config.access_key 和 s3fs_config.secret_key" + when: + - normalized_s3fs_config.access_key | length == 0 or normalized_s3fs_config.secret_key | length == 0 + +- name: "S3FS | 安装 s3fs 软件包" + ansible.builtin.apt: + name: s3fs + state: present + become: true + when: ansible_facts.os_family == 'Debian' + +- name: "S3FS | 安装 s3fs 软件包 (CentOS/RHEL)" + ansible.builtin.yum: + name: s3fs-fuse + state: present + become: true + when: ansible_facts.os_family == 'RedHat' + +- name: "S3FS | 创建密码文件" + ansible.builtin.copy: + content: "{{ normalized_s3fs_config.access_key }}:{{ normalized_s3fs_config.secret_key }}" + dest: "{{ normalized_s3fs_config.passwd_file | expanduser }}" + mode: '0600' + owner: root + group: root + when: normalized_s3fs_config.access_key | length > 0 and normalized_s3fs_config.secret_key | length > 0 + +- name: "S3FS | 创建挂载点目录" + ansible.builtin.file: + path: "{{ normalized_s3fs_config.mount_point }}" + state: directory + mode: '0755' + owner: root + group: root + +- name: "S3FS | 检查是否已挂载" + ansible.builtin.shell: "mount | grep -q '{{ normalized_s3fs_config.mount_point }}' && echo 'mounted' || echo 'not mounted'" + register: s3fs_mount_check + changed_when: false + failed_when: false + +- name: "S3FS | 挂载 S3 存储桶" + ansible.builtin.command: > + s3fs {{ normalized_s3fs_config.bucket }} {{ normalized_s3fs_config.mount_point }} + -o passwd_file={{ normalized_s3fs_config.passwd_file | expanduser }} + {% if normalized_s3fs_config.allow_other %}-o allow_other{% endif %} + -o url={{ normalized_s3fs_config.url }} + {% if normalized_s3fs_config.use_path_request_style %}-o use_path_request_style{% endif %} + args: + creates: "{{ normalized_s3fs_config.mount_point }}/.s3fs_configured" + when: s3fs_mount_check.stdout == 'not mounted' + +- name: "S3FS | 创建挂载标记文件" + ansible.builtin.copy: + content: "S3FS mounted at {{ ansible_date_time.iso8601 }}" + dest: "{{ normalized_s3fs_config.mount_point }}/.s3fs_configured" + mode: '0644' + owner: root + group: root + when: s3fs_mount_check.stdout == 'not mounted' + +- name: "S3FS | 验证挂载" + ansible.builtin.shell: "mount | grep '{{ normalized_s3fs_config.mount_point }}'" + register: s3fs_verify_mount + changed_when: false + failed_when: true + +- name: "S3FS | 显示挂载信息" + ansible.builtin.debug: + msg: | + S3 存储桶已成功挂载! + 存储桶: {{ normalized_s3fs_config.bucket }} + 挂载点: {{ normalized_s3fs_config.mount_point }} + 状态: {{ s3fs_verify_mount.stdout }} diff --git a/roles/vhosts/common/tasks/common_debian.yml b/roles/vhosts/common/tasks/common_debian.yml new file mode 100644 index 0000000..5d02cdc --- /dev/null +++ b/roles/vhosts/common/tasks/common_debian.yml @@ -0,0 +1,21 @@ +--- +- name: Common(Debian) | repo & keys + ansible.builtin.include_tasks: repo_setup.yml + vars: + repo_config: "{{ repo.apt | default({}) }}" + when: (repo.apt.enabled | default(false)) | bool + tags: [repo, baseline] + +- name: Common(Debian) | packages + ansible.builtin.include_tasks: packages.yml + vars: + package_config: "{{ packages.apt | default({}) }}" + package_manager: apt + when: (packages.apt.enabled | default(false)) | bool + tags: [pkgs, baseline] + +- name: Common(Debian) | cleanup optional packages + ansible.builtin.include_tasks: packages_cleanup.yml + when: + - (packages_cleanup.enabled | default(false)) | bool + tags: [pkgs, baseline, cleanup] diff --git a/roles/vhosts/common/tasks/common_redhat.yml b/roles/vhosts/common/tasks/common_redhat.yml new file mode 100644 index 0000000..a9cf759 --- /dev/null +++ b/roles/vhosts/common/tasks/common_redhat.yml @@ -0,0 +1,8 @@ +--- +- name: Common(RedHat) | packages + ansible.builtin.include_tasks: packages.yml + vars: + package_config: "{{ packages.yum | default({}) }}" + package_manager: dnf + when: (packages.yum.enabled | default(false)) | bool + tags: [pkgs, baseline] diff --git a/roles/vhosts/common/tasks/main.yml b/roles/vhosts/common/tasks/main.yml index fbf7a91..8cc69b2 100644 --- a/roles/vhosts/common/tasks/main.yml +++ b/roles/vhosts/common/tasks/main.yml @@ -1,38 +1,57 @@ -- name: Set timezone - shell: "timedatectl set-timezone Asia/Shanghai" +--- +# ===== Base system (always) ===== +- name: Base | set timezone + ansible.builtin.command: "timedatectl set-timezone Asia/Shanghai" + changed_when: false + become: true -- name: Set hostname - shell: "hostname -F /etc/hostname" +- name: Base | render /etc/hostname + ansible.builtin.template: + src: templates/hostname.j2 + dest: /etc/hostname + owner: root + group: root + mode: "0644" + become: true -- name: update /etc/hostname - template: src=templates/hostname dest=/etc/hostname owner=root group=root mode=0644 unsafe_writes=yes +- name: Base | set hostname + ansible.builtin.hostname: + name: "{{ inventory_hostname }}" + become: true -- name: Update /etc/hosts - template: src=templates/hosts dest=/etc/hosts owner=root group=root mode=0644 force=yes unsafe_writes=yes +- name: Base | update /etc/hosts + ansible.builtin.template: + src: templates/hosts + dest: /etc/hosts + owner: root + group: root + mode: "0644" + become: true -- name: Set systemd-resolved and set static DNS - include_tasks: setup-systemd-resolved.yml +- name: Base | harden ssh + ansible.builtin.script: files/secure_ssh.sh + become: true -- name: Install packages - script: files/install-packages.sh - when: (ansible_facts['distribution'] == "Ubuntu") or (ansible_facts['distribution'] == "Debian") +# ===== Common baseline (OS split) ===== +- name: Common | Debian family baseline + ansible.builtin.import_tasks: common_debian.yml + when: + - enable_common | bool + - ansible_facts.os_family == "Debian" -- name: Include Privoxy sub task for SOCKS5 to HTTP proxy (optional) - include_tasks: setup-privoxy.yml - when: privoxy.enable | default(false) +- name: Common | RedHat family baseline + ansible.builtin.import_tasks: common_redhat.yml + when: + - enable_common | bool + - ansible_facts.os_family == "RedHat" -#- name: Include GPU Configuration -# include_tasks: include_gpu.yaml -# when: (ansible_facts['distribution'] == "Ubuntu") or (ansible_facts['distribution'] == "Debian") -# tags: -# - k3s -# - gpu -# - nvidia +# ===== Add-ons (default OFF) ===== +- name: Addon | S3FS mount + ansible.builtin.import_tasks: addons/s3fs.yml + when: s3fs_enable | default(false) | bool + tags: [s3fs, mount] -#- name: enable ip_forward -# shell: 'echo "net.ipv4.ip_forward = 1" >> /etc/sysctl.conf; echo "net.ipv4.conf.all.proxy_arp = 1" >> /etc/sysctl.conf ; sysctl -p /etc/sysctl.conf' - - -#- name: Install packages -# shell: "yum makecache && yum install -y audit container-selinux" -# when: (ansible_facts['distribution'] != "Ubuntu") or (ansible_facts['distribution'] != "Debian") +- name: Addon | GPU configuration + ansible.builtin.import_tasks: addons/gpu.yml + when: gpu_enable | default(false) | bool + tags: [gpu, nvidia] diff --git a/roles/vhosts/common/tasks/manage_keyring.yml b/roles/vhosts/common/tasks/manage_keyring.yml new file mode 100644 index 0000000..a2e8c8f --- /dev/null +++ b/roles/vhosts/common/tasks/manage_keyring.yml @@ -0,0 +1,85 @@ +--- +# 输入变量: +# - apt_keyring: 调用方定义的 keyring 字典 +# - keyring_dest: 计算后的二进制 keyring 路径 +# - keyring_ascii: 计算后的 ASCII key 路径(若需要) +# - keyring_state: 目标 state(present/absent) + +- name: "Keyring | Remove when state=absent" + ansible.builtin.file: + path: "{{ keyring_dest }}" + state: absent + when: keyring_state == 'absent' + become: true + +- name: "Keyring | Ensure present" + when: keyring_state != 'absent' + become: true + block: + - name: "Keyring | Download ASCII key from url" + ansible.builtin.get_url: + url: "{{ apt_keyring.url }}" + dest: "{{ keyring_ascii }}" + mode: "{{ apt_keyring.asc_mode | default('0644') }}" + when: apt_keyring.url is defined + register: keyring_ascii_fetch + + - name: "Keyring | Write ASCII key content" + ansible.builtin.copy: + content: "{{ apt_keyring.content }}" + dest: "{{ keyring_ascii }}" + mode: "{{ apt_keyring.asc_mode | default('0644') }}" + when: apt_keyring.content is defined + register: keyring_ascii_write + + - name: "Keyring | Ensure ASCII key file ownership" + ansible.builtin.file: + path: "{{ keyring_ascii }}" + owner: root + group: root + mode: "{{ apt_keyring.asc_mode | default('0644') }}" + state: file + when: (apt_keyring.content is defined) or (apt_keyring.url is defined) + + - name: "Keyring | Stat binary keyring" + ansible.builtin.stat: + path: "{{ keyring_dest }}" + register: keyring_dest_stat + + - name: "Keyring | Dearmor ASCII key" + ansible.builtin.command: + cmd: "gpg --dearmor -o {{ keyring_dest }} {{ keyring_ascii }}" + when: + - apt_keyring.dearmor | default(true) + - (apt_keyring.content is defined) or (apt_keyring.url is defined) + - (keyring_ascii_write is defined and keyring_ascii_write.changed) + or (keyring_ascii_fetch is defined and keyring_ascii_fetch.changed) + or (not keyring_dest_stat.stat.exists) + + - name: "Keyring | Download binary key" + ansible.builtin.get_url: + url: "{{ apt_keyring.binary_url }}" + dest: "{{ keyring_dest }}" + mode: "{{ apt_keyring.mode | default('0644') }}" + when: apt_keyring.binary_url is defined + + - name: "Keyring | Write binary key content" + ansible.builtin.copy: + content: "{{ apt_keyring.binary_content }}" + dest: "{{ keyring_dest }}" + mode: "{{ apt_keyring.mode | default('0644') }}" + when: apt_keyring.binary_content is defined + + - name: "Keyring | Refresh binary keyring stat" + ansible.builtin.stat: + path: "{{ keyring_dest }}" + register: keyring_dest_final + + - name: "Keyring | Ensure binary keyring permission" + ansible.builtin.file: + path: "{{ keyring_dest }}" + owner: root + group: root + mode: "{{ apt_keyring.mode | default('0644') }}" + state: file + when: keyring_dest_final.stat.exists diff --git a/roles/vhosts/common/tasks/packages.yml b/roles/vhosts/common/tasks/packages.yml new file mode 100644 index 0000000..8963f31 --- /dev/null +++ b/roles/vhosts/common/tasks/packages.yml @@ -0,0 +1,48 @@ +--- +- name: Normalize package config + ansible.builtin.set_fact: + normalized_package_list: "{{ package_config.list | default([], true) }}" + normalized_base_dependencies: "{{ package_config.base_dependencies | default([], true) }}" + +# 基础依赖(不在此处 update_cache,避免读取未规范化源) +- name: Ensure base APT deps (no update now) + ansible.builtin.apt: + name: "{{ normalized_base_dependencies }}" + state: present + update_cache: false + when: + - package_manager == 'apt' + - normalized_base_dependencies | length > 0 + become: true + +# 确保仓库缓存更新后再安装主包 +- name: Refresh apt cache before package install + ansible.builtin.apt: + update_cache: true + when: + - package_manager == 'apt' + - normalized_package_list | length > 0 + become: true + +# 实际安装 +- name: Install packages via apt + ansible.builtin.apt: + name: "{{ normalized_package_list }}" + state: present + environment: + DEBIAN_FRONTEND: noninteractive + APT_LISTCHANGES_FRONTEND: none + when: + - package_manager == 'apt' + - normalized_package_list | length > 0 + become: true + +- name: Install packages via dnf + ansible.builtin.dnf: + name: "{{ normalized_package_list }}" + state: present + when: + - package_manager == 'dnf' + - normalized_package_list | length > 0 + become: true + diff --git a/roles/vhosts/common/tasks/packages_cleanup.yml b/roles/vhosts/common/tasks/packages_cleanup.yml new file mode 100644 index 0000000..b2efac4 --- /dev/null +++ b/roles/vhosts/common/tasks/packages_cleanup.yml @@ -0,0 +1,16 @@ +--- +- name: Cleanup | normalize config + ansible.builtin.set_fact: + cleanup_config: "{{ packages_cleanup[ansible_facts.distribution | lower] | default({}) }}" + tags: [pkgs, baseline, cleanup] + +- name: Cleanup | remove optional packages + ansible.builtin.apt: + name: "{{ cleanup_config.list | default([]) }}" + state: absent + purge: "{{ cleanup_config.purge | default(true) }}" + when: + - ansible_facts.distribution == "Ubuntu" + - (cleanup_config.list | default([])) | length > 0 + tags: [pkgs, baseline, cleanup] + become: true diff --git a/roles/vhosts/common/tasks/repo_setup.yml b/roles/vhosts/common/tasks/repo_setup.yml new file mode 100644 index 0000000..f9c6bd9 --- /dev/null +++ b/roles/vhosts/common/tasks/repo_setup.yml @@ -0,0 +1,115 @@ +--- +- name: Repo | normalize apt repo config + ansible.builtin.set_fact: + apt_repo: "{{ { + 'key_dir': '/etc/apt/keyrings', + 'enable_universe': false, + 'auto_update_cache': true, + 'cache_valid_time': 3600, + 'bootstrap_packages': ['ca-certificates', 'gnupg'], + 'legacy_paths': [], + 'entries': [] + } | combine(repo_config | default({}), recursive=True) }}" + +- name: Repo | ensure keyring dir + ansible.builtin.file: + path: "{{ apt_repo.key_dir }}" + state: directory + owner: root + group: root + mode: "0755" + become: true + +- name: Repo | ensure bootstrap packages (for key download/dearmor) + ansible.builtin.apt: + name: "{{ apt_repo.bootstrap_packages }}" + state: present + update_cache: true + cache_valid_time: "{{ apt_repo.cache_valid_time }}" + when: (apt_repo.bootstrap_packages | default([])) | length > 0 + become: true + +- name: Repo | remove legacy repo/keyring paths + ansible.builtin.file: + path: "{{ item }}" + state: absent + loop: "{{ apt_repo.legacy_paths | default([]) }}" + become: true + +# Ubuntu 可选 universe(补齐 updates/security) +- name: Repo | enable Ubuntu universe (optional) + become: true + when: + - ansible_facts.distribution == "Ubuntu" + - apt_repo.enable_universe | bool + block: + - name: Repo | universe main + ansible.builtin.apt_repository: + repo: "deb http://archive.ubuntu.com/ubuntu {{ ansible_distribution_release }} universe" + filename: "ubuntu-{{ ansible_distribution_release }}-universe" + state: present + + - name: Repo | universe updates + ansible.builtin.apt_repository: + repo: "deb http://archive.ubuntu.com/ubuntu {{ ansible_distribution_release }}-updates universe" + filename: "ubuntu-{{ ansible_distribution_release }}-universe" + state: present + + - name: Repo | universe security + ansible.builtin.apt_repository: + repo: "deb http://security.ubuntu.com/ubuntu {{ ansible_distribution_release }}-security universe" + filename: "ubuntu-{{ ansible_distribution_release }}-universe" + state: present + +# 声明式添加自定义仓库:key_url -> dearmor -> add repo +- name: Repo | configure declared apt repositories + become: true + loop: "{{ apt_repo.entries | default([]) }}" + loop_control: + loop_var: repo + label: "{{ repo.name }}" + when: repo.enabled | default(false) | bool + block: + - name: Repo | fetch ASCII key (optional) + ansible.builtin.get_url: + url: "{{ repo.key_url }}" + dest: "{{ apt_repo.key_dir }}/{{ repo.name }}.asc" + mode: "0644" + when: repo.key_url is defined and (repo.key_url | length > 0) + + - name: Repo | dearmor key (optional) + ansible.builtin.command: + cmd: "gpg --dearmor -o {{ apt_repo.key_dir }}/{{ repo.name }}.gpg {{ apt_repo.key_dir }}/{{ repo.name }}.asc" + creates: "{{ apt_repo.key_dir }}/{{ repo.name }}.gpg" + when: repo.key_url is defined and (repo.key_url | length > 0) + + - name: Repo | ensure keyring permission + ansible.builtin.file: + path: "{{ repo.signed_by | default(apt_repo.key_dir ~ '/' ~ repo.name ~ '.gpg') }}" + owner: root + group: root + mode: "0644" + state: file + when: (repo.key_url is defined and (repo.key_url | length > 0)) or (repo.signed_by is defined) + + - name: Repo | cleanup repo specific paths (optional) + ansible.builtin.file: + path: "{{ item }}" + state: absent + loop: "{{ repo.cleanup | default([]) }}" + when: (repo.cleanup | default([])) | length > 0 + + - name: Repo | add apt repository (signed-by) + ansible.builtin.apt_repository: + repo: >- + deb [signed-by={{ repo.signed_by | default(apt_repo.key_dir ~ '/' ~ repo.name ~ '.gpg') }}] + {{ repo.uri }} {{ repo.suite }} {{ (repo.components | default(['main'])) | join(' ') }} + filename: "{{ repo.name }}" + state: present + +- name: Repo | update apt cache after repo setup (optional) + ansible.builtin.apt: + update_cache: true + cache_valid_time: "{{ apt_repo.cache_valid_time }}" + when: apt_repo.auto_update_cache | bool + become: true diff --git a/roles/vhosts/common/templates/hostname.j2 b/roles/vhosts/common/templates/hostname.j2 new file mode 100755 index 0000000..1fad51f --- /dev/null +++ b/roles/vhosts/common/templates/hostname.j2 @@ -0,0 +1 @@ +{{ inventory_hostname }} diff --git a/roles/vhosts/deepflow_agent/tasks/main.yml b/roles/vhosts/deepflow_agent/tasks/main.yml new file mode 100644 index 0000000..1110175 --- /dev/null +++ b/roles/vhosts/deepflow_agent/tasks/main.yml @@ -0,0 +1,49 @@ +- name: Copy agent package to /tmp on target + copy: + src: "{{ agent_base_dir }}/{{ agent_package_name }}" + dest: "/tmp/{{ agent_package_name }}" + mode: '0644' + +- name: Install agent package + become: true + shell: | + case "{{ agent_package_name }}" in + *.rpm) + rpm -Uvh --force /tmp/{{ agent_package_name }} + ;; + *.deb) + dpkg -i /tmp/{{ agent_package_name }} + ;; + *) + echo "Unsupported package format" + exit 1 + ;; + esac + args: + executable: /bin/bash + +- name: Render config to /tmp + template: + src: deepflow-agent.yaml.j2 + dest: "/tmp/deepflow-agent.yaml" + mode: '0644' + +- name: Move config to /etc + become: true + command: mv /tmp/deepflow-agent.yaml /etc/deepflow-agent.yaml + +- name: Restart deepflow-agent service + become: true + systemd: + name: deepflow-agent + state: restarted + enabled: true + +- name: Show service status + shell: systemctl is-active deepflow-agent + register: agent_status + changed_when: false + +- name: Report Agent status + debug: + msg: "DeepFlow Agent status on {{ inventory_hostname }}: {{ agent_status.stdout }}" diff --git a/roles/vhosts/deepflow_agent/templates/deepflow-agent.yaml.j2 b/roles/vhosts/deepflow_agent/templates/deepflow-agent.yaml.j2 new file mode 100644 index 0000000..4c5111b --- /dev/null +++ b/roles/vhosts/deepflow_agent/templates/deepflow-agent.yaml.j2 @@ -0,0 +1,7 @@ +controller-ips: +{% for ip in controller_ips %} + - {{ ip }} +{% endfor %} +{% if vtap_group_id is defined %} +vtap-group-id: "{{ vtap_group_id }}" +{% endif %} diff --git a/roles/vhosts/docker/defaults/main.yml b/roles/vhosts/docker/defaults/main.yml new file mode 100644 index 0000000..f25bedc --- /dev/null +++ b/roles/vhosts/docker/defaults/main.yml @@ -0,0 +1,7 @@ +--- +# Default Docker repository channel +# Available options: stable, test, nightly +# Default is stable + +# The channel used when configuring Docker repositories. +docker_channel: stable diff --git a/roles/vhosts/docker/meta/main.yml b/roles/vhosts/docker/meta/main.yml new file mode 100644 index 0000000..fdda41b --- /dev/null +++ b/roles/vhosts/docker/meta/main.yml @@ -0,0 +1,3 @@ +--- +dependencies: + - role: common diff --git a/roles/vhosts/docker/tasks/main.yml b/roles/vhosts/docker/tasks/main.yml new file mode 100644 index 0000000..79c3c01 --- /dev/null +++ b/roles/vhosts/docker/tasks/main.yml @@ -0,0 +1,103 @@ +--- +- name: Detect supported platform + ansible.builtin.set_fact: + docker_platform: >- + {{ + 'ubuntu' if ansible_distribution == 'Ubuntu' and ansible_distribution_version in ['22.04', '24.04'] + else 'rocky' if ansible_distribution == 'Rocky' and (ansible_distribution_major_version | int) in [8, 9, 10] + else 'unsupported' + }} + +- name: Determine repository architecture + ansible.builtin.set_fact: + docker_repo_arch: "{{ 'amd64' if ansible_architecture == 'x86_64' else ansible_architecture }}" + when: ansible_distribution == 'Ubuntu' + +- name: Ensure platform is supported + ansible.builtin.assert: + that: docker_platform != 'unsupported' + fail_msg: >- + Docker installation is only supported on Ubuntu 22.04/24.04 and Rocky Linux 8/9/10. + +- name: Install Docker on Ubuntu + when: docker_platform == 'ubuntu' + block: + - name: Install required packages + ansible.builtin.apt: + name: + - ca-certificates + - curl + - gnupg + - lsb-release + state: present + update_cache: true + + - name: Ensure apt keyring directory exists + ansible.builtin.file: + path: /etc/apt/keyrings + state: directory + mode: '0755' + + - name: Add Docker GPG key + ansible.builtin.get_url: + url: https://download.docker.com/linux/ubuntu/gpg + dest: /etc/apt/keyrings/docker.asc + mode: '0644' + register: docker_gpg_download + until: docker_gpg_download is succeeded + retries: 5 + delay: 3 + + - name: Add Docker repository + ansible.builtin.apt_repository: + repo: >- + deb [arch={{ docker_repo_arch }} signed-by=/etc/apt/keyrings/docker.asc] + https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} {{ docker_channel }} + state: present + filename: docker + + - name: Install Docker Engine packages + ansible.builtin.apt: + name: + - docker-ce + - docker-ce-cli + - containerd.io + - docker-buildx-plugin + - docker-compose-plugin + state: present + update_cache: true + +- name: Install Docker on Rocky Linux + when: docker_platform == 'rocky' + block: + - name: Install required packages + ansible.builtin.package: + name: + - dnf-plugins-core + - yum-utils + state: present + + - name: Configure Docker repository + ansible.builtin.yum_repository: + name: docker-ce + description: Docker CE Repository + baseurl: https://download.docker.com/linux/centos/$releasever/$basearch/{{ docker_channel }} + enabled: true + gpgcheck: true + gpgkey: https://download.docker.com/linux/centos/gpg + + - name: Install Docker Engine packages + ansible.builtin.package: + name: + - docker-ce + - docker-ce-cli + - containerd.io + - docker-buildx-plugin + - docker-compose-plugin + state: present + +- name: Ensure Docker service is enabled and running + ansible.builtin.service: + name: docker + state: started + enabled: true diff --git a/roles/vhosts/dovecot/handlers/main.yml b/roles/vhosts/dovecot/handlers/main.yml new file mode 100644 index 0000000..c11e2a9 --- /dev/null +++ b/roles/vhosts/dovecot/handlers/main.yml @@ -0,0 +1,10 @@ +--- +- name: Restart dovecot + systemd: + name: dovecot + state: restarted + +- name: Reload dovecot + systemd: + name: dovecot + state: reloaded diff --git a/roles/vhosts/dovecot/tasks/main.yml b/roles/vhosts/dovecot/tasks/main.yml new file mode 100644 index 0000000..f305c65 --- /dev/null +++ b/roles/vhosts/dovecot/tasks/main.yml @@ -0,0 +1,66 @@ +--- +- name: Update apt cache + apt: + update_cache: yes + cache_valid_time: 3600 + +- name: Install dovecot + apt: + name: + - dovecot-core + - dovecot-imapd + state: present + +- name: Add dovecot user to mail group + user: + name: dovecot + groups: mail + append: yes + +- name: Create dovecot configuration directories + file: + path: /etc/dovecot/conf.d + state: directory + mode: '0755' + +- name: Create main dovecot configuration + template: + src: dovecot.conf.j2 + dest: /etc/dovecot/dovecot.conf + mode: '0644' + notify: Restart dovecot + +- name: Create 10-mail.conf + template: + src: 10-mail.conf.j2 + dest: /etc/dovecot/conf.d/10-mail.conf + mode: '0644' + notify: Restart dovecot + +- name: Create 10-auth.conf + template: + src: 10-auth.conf.j2 + dest: /etc/dovecot/conf.d/10-auth.conf + mode: '0644' + notify: Restart dovecot + +- name: Create 10-ssl.conf + template: + src: 10-ssl.conf.j2 + dest: /etc/dovecot/conf.d/10-ssl.conf + mode: '0644' + notify: Restart dovecot + +- name: Create 10-master.conf + template: + src: 10-master.conf.j2 + dest: /etc/dovecot/conf.d/10-master.conf + mode: '0644' + notify: Restart dovecot + +- name: Enable and start dovecot service + systemd: + name: dovecot + state: started + enabled: yes + daemon_reload: yes diff --git a/roles/vhosts/dovecot/templates/10-auth.conf.j2 b/roles/vhosts/dovecot/templates/10-auth.conf.j2 new file mode 100644 index 0000000..c91cdbe --- /dev/null +++ b/roles/vhosts/dovecot/templates/10-auth.conf.j2 @@ -0,0 +1,3 @@ +auth_mechanisms = plain login +disable_plaintext_auth = yes +!include auth-system.conf.ext diff --git a/roles/vhosts/dovecot/templates/10-mail.conf.j2 b/roles/vhosts/dovecot/templates/10-mail.conf.j2 new file mode 100644 index 0000000..97c1876 --- /dev/null +++ b/roles/vhosts/dovecot/templates/10-mail.conf.j2 @@ -0,0 +1,5 @@ +mail_location = maildir:~/Maildir +namespace inbox { + inbox = yes +} +mail_privileged_group = mail diff --git a/roles/vhosts/dovecot/templates/10-master.conf.j2 b/roles/vhosts/dovecot/templates/10-master.conf.j2 new file mode 100644 index 0000000..dd2ba2d --- /dev/null +++ b/roles/vhosts/dovecot/templates/10-master.conf.j2 @@ -0,0 +1,24 @@ +service imap-login { + inet_listener imap { + port = 143 + } + inet_listener imaps { + port = 993 + ssl = yes + } +} + +service auth { + unix_listener auth-userdb { + mode = 0600 + user = dovecot + } + unix_listener auth-chasquid-userdb { + mode = 0660 + user = chasquid + } + unix_listener auth-chasquid-client { + mode = 0660 + user = chasquid + } +} diff --git a/roles/vhosts/dovecot/templates/10-ssl.conf.j2 b/roles/vhosts/dovecot/templates/10-ssl.conf.j2 new file mode 100644 index 0000000..4261581 --- /dev/null +++ b/roles/vhosts/dovecot/templates/10-ssl.conf.j2 @@ -0,0 +1,5 @@ +ssl = required +ssl_cert = <{{ CERT_PEM }} +ssl_key = <{{ CERT_KEY }} +ssl_min_protocol = TLSv1.2 +ssl_prefer_server_ciphers = yes diff --git a/roles/vhosts/dovecot/templates/dovecot.conf.j2 b/roles/vhosts/dovecot/templates/dovecot.conf.j2 new file mode 100644 index 0000000..7fb1587 --- /dev/null +++ b/roles/vhosts/dovecot/templates/dovecot.conf.j2 @@ -0,0 +1,3 @@ +protocols = imap +listen = *, :: +!include conf.d/*.conf diff --git a/roles/vhosts/firewall/defaults/main.yml b/roles/vhosts/firewall/defaults/main.yml new file mode 100644 index 0000000..f374ede --- /dev/null +++ b/roles/vhosts/firewall/defaults/main.yml @@ -0,0 +1,31 @@ +--- +# Mail server firewall configuration + +# SSH port (always allow - most important!) +# Note: SSH is allowed FIRST to ensure you don't lock yourself out +# This should typically be customized to your actual SSH port (e.g., 2222) +ssh_port: 22 + +# Private networks allowed to access LMTP (port 24) +lmtp_private_networks: + - 127.0.0.1 + - 10.0.0.0/8 + +# Essential ports to open (SSH first!) +essential_ports: + - { port: "{{ ssh_port }}", protocol: 'tcp', comment: 'SSH (Secure Shell) - Critical!' } + - { port: 443, protocol: 'tcp', comment: 'HTTPS - Web SSL' } + - { port: 80, protocol: 'tcp', comment: 'HTTP - Web (optional)' } + +# Mail ports to open +mail_ports: + - { port: 25, protocol: 'tcp', comment: 'SMTP MX inbound' } + - { port: 587, protocol: 'tcp', comment: 'SMTP Submission (AUTH)' } + - { port: 465, protocol: 'tcp', comment: 'SMTPS (optional)' } + - { port: 993, protocol: 'tcp', comment: 'IMAPS SSL' } + +# Plaintext ports to deny +denied_ports: + - { port: 110, protocol: 'tcp', comment: 'Deny POP3 plaintext' } + - { port: 143, protocol: 'tcp', comment: 'Deny IMAP plaintext' } + - { port: 995, protocol: 'tcp', comment: 'Deny POP3S plaintext' } diff --git a/roles/vhosts/firewall/handlers/main.yml b/roles/vhosts/firewall/handlers/main.yml new file mode 100644 index 0000000..7c78e0e --- /dev/null +++ b/roles/vhosts/firewall/handlers/main.yml @@ -0,0 +1,2 @@ +--- +# Firewall handlers (reserved for future use) diff --git a/roles/vhosts/firewall/tasks/main.yml b/roles/vhosts/firewall/tasks/main.yml new file mode 100644 index 0000000..4384063 --- /dev/null +++ b/roles/vhosts/firewall/tasks/main.yml @@ -0,0 +1,98 @@ +--- +- name: Install UFW + apt: + name: ufw + state: present + +- name: Set default policies + ufw: + policy: "{{ item.policy }}" + direction: "{{ item.direction }}" + loop: + - { policy: 'deny', direction: 'incoming' } + - { policy: 'allow', direction: 'outgoing' } + +- name: Allow essential ports (SSH first!) + ufw: + port: "{{ item.port }}" + protocol: "{{ item.protocol }}" + rule: allow + comment: "{{ item.comment }}" + loop: "{{ essential_ports }}" + +- name: Allow necessary mail ports + ufw: + port: "{{ item.port }}" + protocol: "{{ item.protocol }}" + rule: allow + comment: "{{ item.comment }}" + loop: "{{ mail_ports }}" + +- name: Allow LMTP from private networks + ufw: + port: 24 + protocol: tcp + rule: allow + from_ip: "{{ item }}" + comment: 'LMTP private' + loop: "{{ lmtp_private_networks }}" + +- name: Deny plaintext ports + ufw: + port: "{{ item.port }}" + protocol: "{{ item.protocol }}" + rule: deny + comment: "{{ item.comment }}" + loop: "{{ denied_ports }}" + +- name: Enable UFW + ufw: + state: enabled + +- name: Get UFW numbered status + command: ufw status numbered + register: ufw_numbered + changed_when: false + +- name: Get UFW status verbose + command: ufw status verbose + register: ufw_status + changed_when: false + +- name: Display UFW status + debug: + msg: | + 🔥 UFW Firewall Status + ==================== + {{ ufw_status.stdout }} + +- name: Display configured mail rules + debug: + msg: | + 🔥 Mail Server Firewall Rules: + ============================= + + ESSENTIAL PORTS (Open - Applied First!): + {% for port in essential_ports %} + ✅ {{ port.port }}/{{ port.protocol }} - {{ port.comment }} + {% endfor %} + + MAIL PORTS (Open): + {% for port in mail_ports %} + ✅ {{ port.port }}/{{ port.protocol }} - {{ port.comment }} + {% endfor %} + + DENIED PORTS (Blocked): + {% for port in denied_ports %} + ❌ {{ port.port }}/{{ port.protocol }} - {{ port.comment }} + {% endfor %} + + LMTP ACCESS (Private Networks): + {% for network in lmtp_private_networks %} + ✅ From {{ network }} to port 24/tcp - Local mail delivery + {% endfor %} + + ⚠️ SECURITY NOTES: + - SSH port {{ ssh_port }} is open - ensure you use key-based authentication + - Default policy: deny all incoming, allow all outgoing + - Only open ports that are absolutely necessary diff --git a/roles/vhosts/gpu-k8s-reset/files/reset-gpu-k8s.sh b/roles/vhosts/gpu-k8s-reset/files/reset-gpu-k8s.sh new file mode 100755 index 0000000..01689a6 --- /dev/null +++ b/roles/vhosts/gpu-k8s-reset/files/reset-gpu-k8s.sh @@ -0,0 +1,17 @@ +#!/bin/bash +set -e + +if command -v sealos >/dev/null 2>&1; then + sudo sealos reset --force || true +fi + +sudo kubeadm reset -f || true +sudo rm -rf ~/.kube /etc/kubernetes /var/lib/etcd /var/lib/kubelet +sudo rm -rf /var/lib/cni /etc/cni/net.d + +ip link delete cni0 2>/dev/null || true +ip link delete flannel.1 2>/dev/null || true +ip link delete docker0 2>/dev/null || true +ip link delete kube-ipvs0 2>/dev/null || true + +iptables-save | grep -v KUBE- | grep -v CNI- | iptables-restore || true diff --git a/roles/vhosts/gpu-k8s-reset/tasks/main.yml b/roles/vhosts/gpu-k8s-reset/tasks/main.yml new file mode 100644 index 0000000..7146bff --- /dev/null +++ b/roles/vhosts/gpu-k8s-reset/tasks/main.yml @@ -0,0 +1,3 @@ +- name: Reset GPU Kubernetes cluster + script: files/reset-gpu-k8s.sh + when: cluster_reset | default('enable') == 'enable' diff --git a/roles/vhosts/gpu-k8s/defaults/main.yml b/roles/vhosts/gpu-k8s/defaults/main.yml new file mode 100644 index 0000000..78febbb --- /dev/null +++ b/roles/vhosts/gpu-k8s/defaults/main.yml @@ -0,0 +1,13 @@ +# Default variables for gpu-k8s role +sealos_version: latest +kubernetes_version: v1.25.16 +cilium_version: v1.13.4 +helm_version: v3.9.4 +masters: [] # List of master hostnames +nodes: [] # List of worker hostnames +master_ips: [] # List of up to three master node IPs +node_ips: [] # List of worker node IPs +sealos_cmd_env: '{}' +kubeadm_init_cmd: "kubeadm init --skip-phases=addon/kube-proxy" +ssh_user: "{{ ansible_user | default('root') }}" +ssh_private_key: "{{ ansible_ssh_private_key_file | default('~/.ssh/id_rsa') }}" diff --git a/roles/vhosts/gpu-k8s/files/get_labring_registry.sh b/roles/vhosts/gpu-k8s/files/get_labring_registry.sh new file mode 100755 index 0000000..e3a8b8f --- /dev/null +++ b/roles/vhosts/gpu-k8s/files/get_labring_registry.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# Determine the appropriate LabRing container registry based on geolocation. +# Defaults to the Chinese mainland registry. +REGISTRY_CN="registry.cn-shanghai.aliyuncs.com/labring" +REGISTRY_INT="labring" +# Query external service for country code; fall back to CN on failure. +COUNTRY=$(curl -fsSL https://ipapi.co/country/ 2>/dev/null || echo "") +if [ "$COUNTRY" = "CN" ]; then + echo "$REGISTRY_CN" +else + echo "$REGISTRY_INT" +fi diff --git a/roles/vhosts/gpu-k8s/files/run_sealos.sh b/roles/vhosts/gpu-k8s/files/run_sealos.sh new file mode 100755 index 0000000..a54b052 --- /dev/null +++ b/roles/vhosts/gpu-k8s/files/run_sealos.sh @@ -0,0 +1,24 @@ +#!/bin/bash +set -e + +REGISTRY="$1" +K8S_VERSION="$2" +CILIUM_VERSION="$3" +HELM_VERSION="$4" +MASTERS="$5" +NODES="$6" +SSH_USER="$7" +ANS_USER="$8" +CMD_ENV=$(echo "$9" | base64 -d) +KUBEADM_CMD=$(echo "${10}" | base64 -d) + +sudo sealos run \ + ${REGISTRY}/kubernetes:${K8S_VERSION} \ + ${REGISTRY}/cilium:${CILIUM_VERSION} \ + ${REGISTRY}/helm:${HELM_VERSION} \ + --masters ${MASTERS} \ + --nodes ${NODES} \ + --user ${SSH_USER} \ + --pk /home/${ANS_USER}/.ssh/id_rsa \ + --env "${CMD_ENV}" \ + --cmd "${KUBEADM_CMD}" diff --git a/roles/vhosts/gpu-k8s/tasks/install_cluster.yml b/roles/vhosts/gpu-k8s/tasks/install_cluster.yml new file mode 100644 index 0000000..9353693 --- /dev/null +++ b/roles/vhosts/gpu-k8s/tasks/install_cluster.yml @@ -0,0 +1,129 @@ +- name: Determine latest sealos version when requested + uri: + url: https://api.github.com/repos/labring/sealos/releases/latest + return_content: yes + register: sealos_latest + when: sealos_version is not defined or sealos_version == 'latest' + +- name: Set sealos_version fact to latest release + set_fact: + sealos_version: "{{ sealos_latest.json.tag_name }}" + when: + - sealos_version is not defined or sealos_version == 'latest' + - sealos_latest is defined + - sealos_latest.json is defined + +- name: Resolve master and node IPs from hostnames when needed + set_fact: + master_ips: >- + {{ ((master_ips | default([])) | length > 0) + | ternary(master_ips, + (masters | default([]) | map('extract', hostvars, 'ansible_host') | list)) }} + node_ips: >- + {{ ((node_ips | default([])) | length > 0) + | ternary(node_ips, + (nodes | default([]) | map('extract', hostvars, 'ansible_host') | list)) }} + delegate_to: "{{ ops_host | default(masters | default(master_ips) | first) }}" + delegate_facts: true + run_once: true + become: false + + +- name: Install sealos CLI + shell: | + VERSION={{ sealos_version }} + wget -q https://github.com/labring/sealos/releases/download/${VERSION}/sealos_${VERSION#v}_linux_amd64.tar.gz + tar -xzf sealos_${VERSION#v}_linux_amd64.tar.gz sealos + chmod +x sealos + mv sealos /usr/bin/sealos + args: + executable: /bin/bash + become: true + when: inventory_hostname == (ops_host | default(masters | default(master_ips) | first)) + +- name: Install Helm + shell: | + curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 + chmod 700 get_helm.sh + ./get_helm.sh + rm -f get_helm.sh + args: + executable: /bin/bash + become: true + when: inventory_hostname == (ops_host | default(masters | default(master_ips) | first)) and (ansible_facts['distribution'] == 'Ubuntu' or ansible_facts['distribution'] == 'Debian') + +- name: Install nerdctl + shell: | + wget -q https://github.com/containerd/nerdctl/releases/download/v2.0.2/nerdctl-2.0.2-linux-amd64.tar.gz + tar -C /usr/local -xzf nerdctl-2.0.2-linux-amd64.tar.gz + args: + executable: /bin/bash + become: true + when: inventory_hostname == (ops_host | default(masters | default(master_ips) | first)) + +- name: Determine LabRing registry prefix + shell: "{{ role_path }}/files/get_labring_registry.sh" + register: labring_registry + changed_when: false + delegate_to: localhost + run_once: true + become: false + when: inventory_hostname == (ops_host | default(masters | default(master_ips) | first)) + +- name: Validate master_ips and node_ips + assert: + that: + - master_ips | length > 0 + - node_ips | length > 0 + fail_msg: "Provide masters/nodes hostnames or master_ips/node_ips with at least one entry" + when: inventory_hostname == (ops_host | default(masters | default(master_ips) | first)) + +- name: Verify passwordless SSH access to all cluster nodes + shell: >- + ssh -o BatchMode=yes -o StrictHostKeyChecking=no \ + -i {{ ssh_private_key }} \ + {{ ssh_user | default(ansible_ssh_user | default(ansible_user, true) | default('root')) }}@{{ item }} hostname + loop: "{{ master_ips + node_ips }}" + delegate_to: "{{ ops_host | default(masters | default(master_ips) | first) }}" + become: false + register: ssh_access + changed_when: false + run_once: true + +- name: Run sealos to create Kubernetes cluster + script: files/run_sealos.sh \ + {{ labring_registry.stdout | trim }} \ + {{ kubernetes_version }} \ + {{ cilium_version }} \ + {{ helm_version }} \ + "{{ master_ips | join(',') }}" \ + "{{ node_ips | join(',') }}" \ + {{ ssh_user }} \ + {{ ansible_user }} \ + "{{ sealos_cmd_env | b64encode }}" \ + "{{ kubeadm_init_cmd | b64encode }}" + args: + executable: /bin/bash + become: true + when: inventory_hostname == (ops_host | default(masters | default(master_ips) | first)) + +- name: Show sealos CLI version + command: sealos version + register: sealos_cli_version + changed_when: false + become: true + when: inventory_hostname == (ops_host | default(masters | default(master_ips) | first)) + +- name: Display Kubernetes cluster status + shell: kubectl get nodes -o wide + args: + executable: /bin/bash + register: k8s_status + changed_when: false + become: true + when: inventory_hostname == (ops_host | default(masters | default(master_ips) | first)) + +- name: Print cluster nodes + debug: + msg: "{{ k8s_status.stdout }}" + when: k8s_status is defined and inventory_hostname == (ops_host | default(masters | default(master_ips) | first)) diff --git a/roles/vhosts/gpu-k8s/tasks/install_driver.yml b/roles/vhosts/gpu-k8s/tasks/install_driver.yml new file mode 100644 index 0000000..27d0c8f --- /dev/null +++ b/roles/vhosts/gpu-k8s/tasks/install_driver.yml @@ -0,0 +1,22 @@ +- name: Add NVIDIA repositories + shell: | + add-apt-repository -y ppa:graphics-drivers + curl -s -L https://nvidia.github.io/libnvidia-container/gpgkey | apt-key add - + curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | tee /etc/apt/sources.list.d/nvidia-container-runtime.list + curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | apt-key add - + curl -s -L https://nvidia.github.io/nvidia-docker/stable/ubuntu22.04/nvidia-docker.list | tee /etc/apt/sources.list.d/nvidia-docker.list + apt-get update + args: + executable: /bin/bash + become: true + +- name: Install NVIDIA driver and container runtime + apt: + name: + - nvidia-modprobe + - nvidia-driver-535 + - nvidia-headless-535 + - nvidia-container-toolkit + state: present + update_cache: yes + become: true diff --git a/roles/vhosts/gpu-k8s/tasks/main.yml b/roles/vhosts/gpu-k8s/tasks/main.yml new file mode 100644 index 0000000..63cc9c9 --- /dev/null +++ b/roles/vhosts/gpu-k8s/tasks/main.yml @@ -0,0 +1,8 @@ +- name: Bootstrap Kubernetes cluster with GPU support + include_tasks: install_cluster.yml + +- name: Install NVIDIA GPU drivers + include_tasks: install_driver.yml + +- name: Validate GPU access with test workload + include_tasks: run_test.yml diff --git a/roles/vhosts/gpu-k8s/tasks/run_test.yml b/roles/vhosts/gpu-k8s/tasks/run_test.yml new file mode 100644 index 0000000..0054d07 --- /dev/null +++ b/roles/vhosts/gpu-k8s/tasks/run_test.yml @@ -0,0 +1,16 @@ +- name: Deploy NVIDIA device plugin + shell: kubectl apply -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.14.5/nvidia-device-plugin.yml + args: + executable: /bin/bash + become: true + when: master_ips|length > 0 and inventory_hostname == (ops_host | default(masters | default(master_ips) | first)) + +- name: Run CUDA validation pod + shell: | + kubectl run gpu-test --image=nvidia/cuda:12.3.2-base-ubuntu22.04 --restart=Never -- nvidia-smi + kubectl delete pod gpu-test --wait + args: + executable: /bin/bash + become: true + when: master_ips|length > 0 and inventory_hostname == (ops_host | default(masters | default(master_ips) | first)) + diff --git a/roles/vhosts/grafana/defaults/main.yml b/roles/vhosts/grafana/defaults/main.yml new file mode 100644 index 0000000..cc39a8a --- /dev/null +++ b/roles/vhosts/grafana/defaults/main.yml @@ -0,0 +1,13 @@ +grafana_git_url: https://github.com/svc-design/gitops.git +grafana_root_dir: /srv/grafana/grafana-as-code +grafana_domain: grafana.svc.plus +metrics_domain: metrics.svc.plus +prom_url_for_grafana: "https://{{ metrics_domain }}/prom/" + +# Plugin configuration +grafana_plugins_enable_alpha: true +grafana_plugins_app_tls_skip_verify_insecure: false +grafana_allow_loading_unsigned_plugins: + - zinclabs_openobserve +grafana_admin_user: admin +grafana_admin_password: admin diff --git a/roles/vhosts/grafana/tasks/main.yml b/roles/vhosts/grafana/tasks/main.yml new file mode 100644 index 0000000..9b02200 --- /dev/null +++ b/roles/vhosts/grafana/tasks/main.yml @@ -0,0 +1,94 @@ +- name: Ensure Grafana APT key is present + ansible.builtin.get_url: + url: https://apt.grafana.com/gpg.key + dest: /etc/apt/keyrings/grafana.gpg + mode: '0644' + when: inventory_hostname in groups[group] + +- name: Add Grafana repository + ansible.builtin.apt_repository: + repo: "deb [signed-by=/etc/apt/keyrings/grafana.gpg] https://apt.grafana.com stable main" + filename: grafana + when: inventory_hostname in groups[group] + +- name: Install Grafana + ansible.builtin.apt: + name: grafana + state: present + update_cache: true + when: inventory_hostname in groups[group] + +- name: Clone GitOps dashboards repo + ansible.builtin.git: + repo: "{{ grafana_git_url }}" + dest: "{{ grafana_root_dir }}" + version: HEAD + depth: 1 + update: true + when: inventory_hostname in groups[group] + +- name: Ensure Grafana provisioning directory exists + ansible.builtin.file: + path: /etc/grafana/provisioning/dashboards + state: directory + mode: '0755' + when: inventory_hostname in groups[group] + +- name: Configure Grafana dashboards provisioning + ansible.builtin.template: + src: dashboards.yaml.j2 + dest: /etc/grafana/provisioning/dashboards/dashboards.yaml + owner: root + group: root + mode: '0644' + when: inventory_hostname in groups[group] + +- name: Ensure Grafana systemd override directory exists + ansible.builtin.file: + path: /etc/systemd/system/grafana-server.service.d + state: directory + mode: '0755' + when: inventory_hostname in groups[group] + +- name: Inject Grafana environment overrides + ansible.builtin.template: + src: env.conf.j2 + dest: /etc/systemd/system/grafana-server.service.d/env.conf + owner: root + group: root + mode: '0644' + when: inventory_hostname in groups[group] + +- name: Configure Grafana ini + ansible.builtin.template: + src: grafana.ini.j2 + dest: /etc/grafana/grafana.ini + owner: root + group: grafana + mode: '0640' + when: inventory_hostname in groups[group] + +- name: Install grafana dashboard pull timer + ansible.builtin.template: + src: grafana-dash-pull.timer.j2 + dest: /etc/systemd/system/grafana-dash-pull.timer + mode: '0644' + when: inventory_hostname in groups[group] + +- name: Install grafana dashboard pull service + ansible.builtin.template: + src: grafana-dash-pull.service.j2 + dest: /etc/systemd/system/grafana-dash-pull.service + mode: '0644' + when: inventory_hostname in groups[group] + +- name: Enable and start Grafana services + ansible.builtin.systemd: + name: "{{ item }}" + enabled: true + state: started + daemon_reload: true + loop: + - grafana-server + - grafana-dash-pull.timer + when: inventory_hostname in groups[group] diff --git a/roles/vhosts/grafana/templates/dashboards.yaml.j2 b/roles/vhosts/grafana/templates/dashboards.yaml.j2 new file mode 100644 index 0000000..7809060 --- /dev/null +++ b/roles/vhosts/grafana/templates/dashboards.yaml.j2 @@ -0,0 +1,10 @@ +apiVersion: 1 +providers: + - name: 'gitops-dashboards' + type: file + disableDeletion: false + allowUiUpdates: false + updateIntervalSeconds: 30 + options: + path: {{ grafana_root_dir }}/dashboards + foldersFromFilesStructure: true diff --git a/roles/vhosts/grafana/templates/env.conf.j2 b/roles/vhosts/grafana/templates/env.conf.j2 new file mode 100644 index 0000000..78ac6df --- /dev/null +++ b/roles/vhosts/grafana/templates/env.conf.j2 @@ -0,0 +1,4 @@ +[Service] +Environment=GF_SERVER_DOMAIN={{ grafana_domain }} +Environment=GF_SERVER_ROOT_URL=https://{{ grafana_domain }}/ +Environment=PROM_URL={{ prom_url_for_grafana }} diff --git a/roles/vhosts/grafana/templates/grafana-dash-pull.service.j2 b/roles/vhosts/grafana/templates/grafana-dash-pull.service.j2 new file mode 100644 index 0000000..fadef99 --- /dev/null +++ b/roles/vhosts/grafana/templates/grafana-dash-pull.service.j2 @@ -0,0 +1,7 @@ +[Unit] +Description=git pull dashboards + +[Service] +Type=oneshot +WorkingDirectory={{ grafana_root_dir }} +ExecStart=/usr/bin/git pull --ff-only diff --git a/roles/vhosts/grafana/templates/grafana-dash-pull.timer.j2 b/roles/vhosts/grafana/templates/grafana-dash-pull.timer.j2 new file mode 100644 index 0000000..7e3a5a8 --- /dev/null +++ b/roles/vhosts/grafana/templates/grafana-dash-pull.timer.j2 @@ -0,0 +1,10 @@ +[Unit] +Description=git pull dashboards every 5m + +[Timer] +OnBootSec=30s +OnUnitActiveSec=5m +AccuracySec=30s + +[Install] +WantedBy=timers.target diff --git a/roles/vhosts/grafana/templates/grafana.ini.j2 b/roles/vhosts/grafana/templates/grafana.ini.j2 new file mode 100644 index 0000000..bdb19d4 --- /dev/null +++ b/roles/vhosts/grafana/templates/grafana.ini.j2 @@ -0,0 +1,22 @@ +[server] +http_addr = 0.0.0.0 +http_port = 3000 +domain = {{ grafana_domain }} +root_url = https://{{ grafana_domain }}/ +serve_from_sub_path = false + +[security] +admin_user = {{ grafana_admin_user }} +admin_password = {{ grafana_admin_password }} + +[auth] +disable_login_form = false +disable_signout_menu = false + +[users] +allow_sign_up = false + +[plugins] +enable_alpha = {{ grafana_plugins_enable_alpha | bool | lower }} +app_tls_skip_verify_insecure = {{ grafana_plugins_app_tls_skip_verify_insecure | bool | lower }} +allow_loading_unsigned_plugins = {{ grafana_allow_loading_unsigned_plugins | join(',') }} diff --git a/roles/vhosts/k3s-addon/files/setup-argocd.sh b/roles/vhosts/k3s-addon/files/setup-argocd.sh new file mode 100644 index 0000000..3374e34 --- /dev/null +++ b/roles/vhosts/k3s-addon/files/setup-argocd.sh @@ -0,0 +1,102 @@ +#!/bin/bash + +# 检查参数是否为空 +check_not_empty() { + if [[ -z $1 ]]; then + echo "Error: $2 is empty. Please provide a value." + exit 1 + fi +} + +helm repo add argo https://argoproj.github.io/argo-helm +helm repo update + +# 使用 Helm 部署 Argo CD +#helm upgrade --install argocd argo/argo-cd -n argocd --create-namespace + +cat < values.yaml +global: + domain: argocd.onwalk.net +server: + service: + type: NodePort + nodePortHttp: 80 + nodePortHttps: 443 + servicePortHttp: 80 + servicePortHttps: 443 + servicePortHttpName: http + servicePortHttpsName: https + ingress: + enabled: false + ingressClassName: "nginx" + hostname: argocd.onwalk.net + annotations: + nginx.ingress.kubernetes.io/force-ssl-redirect: "true" + nginx.ingress.kubernetes.io/backend-protocol: "HTTP" + tls: true +repoServer: + extraContainers: + - name: helmfile + image: ghcr.io/helmfile/helmfile:v0.157.0 + # Entrypoint should be Argo CD lightweight CMP server i.e. argocd-cmp-server + command: ["/var/run/argocd/argocd-cmp-server"] + env: + - name: HELM_CACHE_HOME + value: /tmp/helm/cache + - name: HELM_CONFIG_HOME + value: /tmp/helm/config + - name: HELMFILE_CACHE_HOME + value: /tmp/helmfile/cache + - name: HELMFILE_TEMPDIR + value: /tmp/helmfile/tmp + securityContext: + runAsNonRoot: true + runAsUser: 999 + volumeMounts: + - mountPath: /var/run/argocd + name: var-files + - mountPath: /home/argocd/cmp-server/plugins + name: plugins + # Register helmfile plugin into sidecar + - mountPath: /home/argocd/cmp-server/config/plugin.yaml + subPath: helmfile.yaml + name: argocd-cmp-cm + # Starting with v2.4, do NOT mount the same tmp volume as the repo-server container. The filesystem separation helps mitigate path traversal attacks. + - mountPath: /tmp + name: helmfile-tmp + volumes: + - name: argocd-cmp-cm + configMap: + name: argocd-cmp-cm + - name: helmfile-tmp + emptyDir: {} +configs: + cmp: + create: true + plugins: + helmfile: + allowConcurrency: true + discover: + fileName: helmfile.yaml + generate: + command: + - bash + - "-c" + - | + if [[ -v ENV_NAME ]]; then + helmfile -n "$ARGOCD_APP_NAMESPACE" -e $ENV_NAME template --include-crds -q + elif [[ -v ARGOCD_ENV_ENV_NAME ]]; then + helmfile -n "$ARGOCD_APP_NAMESPACE" -e "$ARGOCD_ENV_ENV_NAME" template --include-crds -q + else + helmfile -n "$ARGOCD_APP_NAMESPACE" template --include-crds -q + fi + lockRepo: false +EOF + +helm upgrade --install argocd argo/argo-cd -n argocd -f values.yaml + +# 等待 Argo CD 完全启动 +echo "Waiting for Argo CD to be ready..." +kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=argocd-server -n argocd --timeout=180s + +echo "Argo CD deployment and configuration complete." diff --git a/roles/vhosts/k3s-addon/files/setup-dns-provider.sh b/roles/vhosts/k3s-addon/files/setup-dns-provider.sh new file mode 100644 index 0000000..beb17d5 --- /dev/null +++ b/roles/vhosts/k3s-addon/files/setup-dns-provider.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# 检查参数是否为空 +check_not_empty() { + if [[ -z $1 ]]; then + echo "Error: $2 is empty. Please provide a value." + exit 1 + fi +} + +# 检查参数是否为空 +check_not_empty "$1" "DNS_AK" && DNS_AK=$1 +check_not_empty "$2" "DNS_SK" && DNS_SK=$2 +check_not_empty "$3" "DOMAIN" && DOMAIN=$3 + +# Deploy external-dns +cat > external-dns-values.yaml << EOF +clusterDomain: admin.local +sources: + - service + - ingress +domainFilters: + - $DOMAIN +policy: upsert-only +provider: alibabacloud +alibabacloud: + accessKeyId: $DNS_AK + accessKeySecret: $DNS_SK + regionId: rg-acfm2akhd255pgi + zoneType: public +EOF + +helm repo add bitnami https://charts.bitnami.com/bitnami || echo true +helm repo update +kubectl create namespace external-dns || echo true +helm upgrade --install external-dns -f external-dns-values.yaml bitnami/external-dns -n external-dns diff --git a/roles/vhosts/k3s-addon/files/setup-egress.sh b/roles/vhosts/k3s-addon/files/setup-egress.sh new file mode 100644 index 0000000..998b079 --- /dev/null +++ b/roles/vhosts/k3s-addon/files/setup-egress.sh @@ -0,0 +1,24 @@ +#!/bin/bash +ip=$1 +namespace=$2 + +cat > /tmp/egress.yaml << EOF +apiVersion: cilium.io/v2 +kind: CiliumEgressGatewayPolicy +metadata: + name: egress-nat-policy +spec: + selectors: + - podSelector: + matchLabels: + role: egress-gateway + io.kubernetes.pod.namespace: $namespace + destinationCIDRs: + - "0.0.0.0/0" + egressGateway: + nodeSelector: + matchLabels: + node.kubernetes.io/name: tky-connector.onwalk.net + egressIP: $ip +EOF +kubectl apply -f /tmp/egress.yaml diff --git a/roles/vhosts/k3s-addon/files/setup-flagger.sh b/roles/vhosts/k3s-addon/files/setup-flagger.sh new file mode 100644 index 0000000..ac8011a --- /dev/null +++ b/roles/vhosts/k3s-addon/files/setup-flagger.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# 检查参数是否为空 +check_not_empty() { + if [[ -z $1 ]]; then + echo "Error: $2 is empty. Please provide a value." + exit 1 + fi +} + +# 检查参数是否为空 +check_not_empty "$1" "DOMAIN" && DOMAIN=$1 + +helm repo add flagger https://flagger.app +helm repo update +kubectl create ns ingress || echo true +helm upgrade -i flagger flagger/flagger \ +--namespace ingress \ +--set prometheus.install=false \ +--set meshProvider=nginx \ +--set metricsServer="https://prometheus.${DOMAIN}" diff --git a/roles/vhosts/k3s-addon/files/setup-fluxcd.sh b/roles/vhosts/k3s-addon/files/setup-fluxcd.sh new file mode 100644 index 0000000..eea72c4 --- /dev/null +++ b/roles/vhosts/k3s-addon/files/setup-fluxcd.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +# 检查参数是否为空 +check_not_empty() { + if [[ -z $1 ]]; then + echo "Error: $2 is empty. Please provide a value." + exit 1 + fi +} + +# 检查参数是否为空 +check_not_empty "$1" "Git repository URL" && git_repo=$1 +check_not_empty "$2" "Cluster name" && cluster_name=$2 + +helm repo add fluxcd https://fluxcd-community.github.io/helm-charts +helm repo update +kubectl create namespace gitops-system || true +helm upgrade --install fluxcd fluxcd/flux2 --version 2.12.1 -n gitops-system + +cat > cluster-config.yaml << EOF +apiVersion: source.toolkit.fluxcd.io/v1beta2 +kind: GitRepository +metadata: + name: stable + namespace: gitops-system +spec: + interval: 1m0s + ref: + branch: main + url: $git_repo +--- +apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 +kind: Kustomization +metadata: + name: cluster + namespace: gitops-system +spec: + interval: 1m0s + sourceRef: + kind: GitRepository + name: stable + path: ./clusters/${cluster_name} + prune: true +EOF + +kubectl apply -f cluster-config.yaml && rm cluster-config.yaml -f diff --git a/roles/vhosts/k3s-addon/files/setup-ingress-apisix.sh b/roles/vhosts/k3s-addon/files/setup-ingress-apisix.sh new file mode 100644 index 0000000..495286a --- /dev/null +++ b/roles/vhosts/k3s-addon/files/setup-ingress-apisix.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +ingress_ip=$1 + +cat > values.yaml << EOF +service: + type: NodePort + externalIPs: + - $ingress_ip + http: + enabled: true + servicePort: 80 + tls: + servicePort: 443 + nodePort: 443 +apisix: + ssl: + enabled: true + prometheus: + enabled: true +ingress-controller: + enabled: true + config: + apisix: + serviceNamespace: "ingress" + kubernetes: + enableGatewayAPI: true +metrics: + serviceMonitor: + enabled: true + namespace: "ingress" +EOF + +helm repo add apisix https://charts.apiseven.com || echo true +helm repo update +kubectl create ns ingress || echo true +helm delete nginx -n ingress || echo true +helm upgrade --install apisix apisix/apisix --namespace ingress -f values.yaml diff --git a/roles/vhosts/k3s-addon/files/setup-ingress.sh b/roles/vhosts/k3s-addon/files/setup-ingress.sh new file mode 100644 index 0000000..1dfb5fb --- /dev/null +++ b/roles/vhosts/k3s-addon/files/setup-ingress.sh @@ -0,0 +1,145 @@ +#!/bin/bash +ingress=$1 +ingress_ip=$2 + +if [[ $ingress == "default" ]]; then +export KUBECONFIG=/etc/rancher/k3s/k3s.yaml +helm repo add stable https://kubernetes.github.io/ingress-nginx +helm repo up + +cat > value.yaml < svc-patch.yaml < value.yaml < nginx-cm.yaml << EOF +apiVersion: v1 +kind: ConfigMap +metadata: + name: nginx-nginx-ingress + namespace: ingress +data: + use-ssl-certificate-for-ingress: "false" + external-status-address: $ingress_ip + proxy-connect-timeout: 10s + proxy-read-timeout: 10s + client-header-buffer-size: 64k + client-body-buffer-size: 64k + client-max-body-size: 1000m + proxy-buffers: 8 32k + proxy-body-size: 1024m + proxy-buffer-size: 32k + proxy-connect-timeout: 10s + proxy-read-timeout: 10s +EOF + +cat > nginx-svc-patch.yaml << EOF +spec: + ports: + - name: http + nodePort: 80 + port: 80 + protocol: TCP + targetPort: 80 + - name: https + nodePort: 443 + port: 443 + protocol: TCP + targetPort: 443 +EOF + +helm repo add nginx-stable https://helm.nginx.com/stable || echo true +helm repo up +helm delete apisix -n ingress || echo true +kubectl create namespace ingress || echo true +helm upgrade --install nginx nginx-stable/nginx-ingress --version=0.15.0 --namespace ingress -f value.yaml +kubectl apply -f nginx-cm.yaml +kubectl patch svc nginx-nginx-ingress -n ingress --patch-file nginx-svc-patch.yaml + +elif [[ $ingress == "apisix" ]]; then + +cat > values.yaml << EOF +service: + type: NodePort + externalIPs: + - $ingress_ip + http: + enabled: true + servicePort: 80 + tls: + servicePort: 443 + nodePort: 443 +apisix: + ssl: + enabled: true + prometheus: + enabled: true +ingress-controller: + enabled: true + config: + apisix: + serviceNamespace: "ingress" + kubernetes: + enableGatewayAPI: true +metrics: + serviceMonitor: + enabled: true + namespace: "ingress" +EOF + +helm repo add apisix https://charts.apiseven.com || echo true +helm repo update +kubectl create ns ingress || echo true +helm delete nginx -n ingress || echo true +helm upgrade --install apisix apisix/apisix --namespace ingress -f values.yaml + +fi diff --git a/roles/vhosts/k3s-addon/files/setup-keda-operator.sh b/roles/vhosts/k3s-addon/files/setup-keda-operator.sh new file mode 100644 index 0000000..0ee63ac --- /dev/null +++ b/roles/vhosts/k3s-addon/files/setup-keda-operator.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +helm repo add kedacore https://kedacore.github.io/charts +helm repo update +kubectl create namespace kube-system || true +helm upgrade --install keda kedacore/keda --namespace kube-system diff --git a/roles/vhosts/k3s-addon/files/setup-prometheus-operator.sh b/roles/vhosts/k3s-addon/files/setup-prometheus-operator.sh new file mode 100644 index 0000000..c5ed2a0 --- /dev/null +++ b/roles/vhosts/k3s-addon/files/setup-prometheus-operator.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +# 检查参数是否为空 +check_not_empty() { + if [[ -z $1 ]]; then + echo "Error: $2 is empty. Please provide a value." + exit 1 + fi +} + +# 检查参数是否为空 +check_not_empty "$1" "DOMAIN" && DOMAIN=$1 + +cat > prometheus-values.yaml << EOF +global: + imageRegistry: "artifact.onwalk.net/base" +prometheus: + enabled: true + agentMode: false + prometheusSpec: + remoteWrite: + - name: remote_prometheus + url: 'https://prometheus.${DOMAIN}/api/v1/write' + retention: 30m + resources: + requests: + cpu: 200m + memory: 200Mi + podMonitorNamespaceSelector: { } + podMonitorSelector: + matchLabels: + app.kubernetes.io/component: monitoring +nodeExporter: + enabled: true +kubeStateMetrics: + enabled: true +grafana: + enabled: false +prometheus-windows-exporter: + enabled: false +alertmanager: + enabled: false +defaultRules: + create: false +EOF + +node_name=`kubectl get nodes | awk 'NR>1 {print $1}'` +kubectl create namespace monitoring || echo true +kubectl label nodes $node prometheus=true --overwrite || echo true +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update +helm upgrade --install prometheus-agent prometheus-community/kube-prometheus-stack --version 55.11.0 -n monitoring -f prometheus-values.yaml diff --git a/roles/vhosts/k3s-addon/meta/main.yml b/roles/vhosts/k3s-addon/meta/main.yml new file mode 100644 index 0000000..83cef7b --- /dev/null +++ b/roles/vhosts/k3s-addon/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: cert-manager diff --git a/roles/vhosts/k3s-addon/tasks/main.yml b/roles/vhosts/k3s-addon/tasks/main.yml new file mode 100755 index 0000000..a8a61e7 --- /dev/null +++ b/roles/vhosts/k3s-addon/tasks/main.yml @@ -0,0 +1,15 @@ +- name: Enable nginx Ingress + script: files/setup-ingress.sh {{ ingress }} {{ ingress_ip }} + when: inventory_hostname in groups[group] and ( ingress == 'nginx' ) +- name: Remove nginx ingress + shell: 'helm delete nginx -n ingress || true ; helm delete apisix -n ingress || true ;' + when: ( inventory_hostname in groups[group] ) and (ingress == 'disable' ) + ignore_errors: yes + +- name: Setup DNS Provider + script: files/setup-dns-provider.sh {{ dns_ak }} {{ dns_sk }} {{ domain }} + when: ( inventory_hostname in groups[group] ) and (external_dns == 'enable' ) +- name: Remove DNS Provider + shell: 'helm delete external-dns -n external-dns' + when: ( inventory_hostname in groups[group] ) and (external_dns == 'disable' ) + ignore_errors: yes diff --git a/roles/vhosts/k3s-addon/templates/ingress-apisix-dashboard.yaml b/roles/vhosts/k3s-addon/templates/ingress-apisix-dashboard.yaml new file mode 100644 index 0000000..b43c0e0 --- /dev/null +++ b/roles/vhosts/k3s-addon/templates/ingress-apisix-dashboard.yaml @@ -0,0 +1,33 @@ +apiVersion: apisix.apache.org/v2 +kind: ApisixRoute +metadata: + name: apisix-dashboard + namespace: ingress +spec: + http: + - name: root + match: + hosts: + - apisix-dashboard.onwalk.net + paths: + - '/*' + backends: + - serviceName: apisix-dashboard + servicePort: 80 + plugins: + - config: + http_to_https: true + enable: true + name: redirect +--- +apiVersion: apisix.apache.org/v2 +kind: ApisixTls +metadata: + name: apisix + namespace: ingress +spec: + hosts: + - apisix-dashboard.onwalk.net + secret: + name: apisix-tls + namespace: ingress diff --git a/roles/vhosts/k3s-addon/templates/ingress-apisix-values.yaml b/roles/vhosts/k3s-addon/templates/ingress-apisix-values.yaml new file mode 100644 index 0000000..24f386a --- /dev/null +++ b/roles/vhosts/k3s-addon/templates/ingress-apisix-values.yaml @@ -0,0 +1,24 @@ +ingress-controller: + enabled: true + config: + apisix: + serviceNamespace: ingress +etcd: + replicaCount: 1 +discovery: + enabled: true +admin: + enabled: true +gateway: + enabled: true + type: NodePort + http: + enabled: true + nodePort: 80 + tls: + enabled: true + nodePort: 443 + externalIPs: + - {{ ingress_ip }} +dashboard: + enabled: true diff --git a/roles/vhosts/k3s-addon/templates/kubernetes-discovery-config.yaml b/roles/vhosts/k3s-addon/templates/kubernetes-discovery-config.yaml new file mode 100644 index 0000000..d382fcb --- /dev/null +++ b/roles/vhosts/k3s-addon/templates/kubernetes-discovery-config.yaml @@ -0,0 +1,65 @@ +apiVersion: apisix.apache.org/v2 +kind: ApisixUpstream +metadata: + name: bookinfo-upstream + namespace: bookinfo +spec: + discovery: + type: kubernetes + serviceName: apisix/bookinfo/productpage:9080 +--- +apiVersion: apisix.apache.org/v2 +kind: ApisixRoute +metadata: + name: bookinfo + namespace: bookinfo +spec: + http: + - name: root + match: + hosts: + - bookinfo.onwalk.net + paths: + - /* + upstreams: + - name: bookinfo-upstream + plugins: + - config: + http_to_https: true + enable: true + name: redirect +--- +apiVersion: apisix.apache.org/v2 +kind: ApisixTls +metadata: + name: bookinfo + namespace: bookinfo +spec: + hosts: + - bookinfo.onwalk.net + secret: + name: bookinfo-tls + namespace: bookinfo +--- +curl -k --header "Authorization: Bearer tokenxxxxx" https://10.170.0.8:6443/api +--- +kubectl get secret kubernetes-discovery-token -o jsonpath={.data.token} | base64 -d +--- +kubectl edit cm -n ingress apisix + discovery: + kubernetes: + - id: apisix + service: + schema: https + host: "10.170.0.6" + port: "6443" + client: + token: |- + #xxxxxxxxxxxxxxx + default_weight: 50 + namespace_selector: + match: + - bookinfo + - nginx + shared_size: 1m +--- diff --git a/roles/vhosts/k3s-addon/templates/kubernetes-discovery-serviceaccount.yaml b/roles/vhosts/k3s-addon/templates/kubernetes-discovery-serviceaccount.yaml new file mode 100644 index 0000000..734f0df --- /dev/null +++ b/roles/vhosts/k3s-addon/templates/kubernetes-discovery-serviceaccount.yaml @@ -0,0 +1,40 @@ +kind: ServiceAccount +apiVersion: v1 +metadata: + name: kubernetes-discovery +--- +apiVersion: v1 +kind: Secret +metadata: + name: kubernetes-discovery-token + annotations: + kubernetes.io/service-account.name: "kubernetes-discovery" +type: kubernetes.io/service-account-token +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: kubernetes-discovery +rules: +- apiGroups: [""] + resources: ["endpoints" ] + verbs: ["get", "list", "watch" ] +- apiGroups: [""] + resources: [ "namespaces"] + verbs: ["get", "list", "watch"] +- apiGroups: [""] + resources: ["services", "endpoints"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: kubernetes-discovery +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kubernetes-discovery +subjects: +- kind: ServiceAccount + name: kubernetes-discovery + namespace: default diff --git a/roles/vhosts/k3s-addon/templates/kubernetes-discovery.yaml b/roles/vhosts/k3s-addon/templates/kubernetes-discovery.yaml new file mode 100644 index 0000000..3c5cca9 --- /dev/null +++ b/roles/vhosts/k3s-addon/templates/kubernetes-discovery.yaml @@ -0,0 +1,47 @@ +kind: ServiceAccount +apiVersion: v1 +metadata: + name: kubernetes-discovery + namespace: default +--- + +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: kubernetes-discovery +rules: +- apiGroups: [ "" ] + resources: [ endpoints ] + verbs: [ get,list,watch ] +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: kubernetes-discovery +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: apisix-test +subjects: + - kind: ServiceAccount + name: kubernetes-discovery + namespace: default +--- +#discovery: +# kubernetes: +# - id: release # a custom name refer to the cluster, pattern ^[a-z0-9]{1,8} +# service: +# schema: https #default https +# host: "1.cluster.com" +# port: "6443" +# client: +# #token: |- +# # eyJhbGciOiJSUzI1NiIsImtpZCI6Ikx5ME1DNWdnbmhQNkZCNlZYMXBsT3pYU3BBS2swYzBPSkN3ZnBESGpkUEEif +# # 6Ikx5ME1DNWdnbmhQNkZCNlZYMXBsT3pYU3BBS2swYzBPSkN3ZnBESGpkUEEifeyJhbGciOiJSUzI1NiIsImtpZCI +# default_weight: 50 # weight assigned to each discovered endpoint. default 50, minimum 0 +# namespace_selector: +# equal: default +# label_selector: |- +# first="a",second="b" +# shared_size: 1m #default 1m diff --git a/roles/vhosts/k3s-cluster-agent/defaults/main.yml b/roles/vhosts/k3s-cluster-agent/defaults/main.yml new file mode 100644 index 0000000..0b79c3e --- /dev/null +++ b/roles/vhosts/k3s-cluster-agent/defaults/main.yml @@ -0,0 +1 @@ +# Default values for k3s-cluster-agent role diff --git a/roles/vhosts/k3s-cluster-agent/tasks/bootstrap.yml b/roles/vhosts/k3s-cluster-agent/tasks/bootstrap.yml new file mode 100644 index 0000000..e69de29 diff --git a/roles/vhosts/k3s-cluster-agent/tasks/destroy.yml b/roles/vhosts/k3s-cluster-agent/tasks/destroy.yml new file mode 100644 index 0000000..e69de29 diff --git a/roles/vhosts/k3s-cluster-agent/tasks/main.yml b/roles/vhosts/k3s-cluster-agent/tasks/main.yml new file mode 100644 index 0000000..ffeb04b --- /dev/null +++ b/roles/vhosts/k3s-cluster-agent/tasks/main.yml @@ -0,0 +1,2 @@ +- name: Execute action on K3s cluster agent + include_tasks: "{{ action }}.yml" diff --git a/roles/vhosts/k3s-cluster-agent/tasks/upgrade.yml b/roles/vhosts/k3s-cluster-agent/tasks/upgrade.yml new file mode 100644 index 0000000..e69de29 diff --git a/roles/vhosts/k3s-cluster-agent/templates/install_k3s_agent.sh.j2 b/roles/vhosts/k3s-cluster-agent/templates/install_k3s_agent.sh.j2 new file mode 100644 index 0000000..a91e43a --- /dev/null +++ b/roles/vhosts/k3s-cluster-agent/templates/install_k3s_agent.sh.j2 @@ -0,0 +1,3 @@ +#!/bin/bash + +curl -sfL https://rancher-mirror.rancher.cn/k3s/k3s-install.sh | INSTALL_K3S_MIRROR=cn K3S_URL=https://{{ agent.k3s_url }}:6443 K3S_TOKEN={{ agent.server_token }} INSTALL_K3S_EXEC="{{ agent.extra_vars }}" sh - diff --git a/roles/vhosts/k3s-cluster-agent/vars/main.yml b/roles/vhosts/k3s-cluster-agent/vars/main.yml new file mode 100644 index 0000000..1e61379 --- /dev/null +++ b/roles/vhosts/k3s-cluster-agent/vars/main.yml @@ -0,0 +1,5 @@ +action: 'bootstrap' +agent: + node_ip: '10.254.0.1' + server_token: 'your_server_token' + extra_vars: '--node-label deployment=true --node-external-ip 110.42.238.110 --node-ip {{ agent.node_ip }} --flannel-iface wg0' diff --git a/roles/vhosts/k3s-cluster-server/defaults/main.yml b/roles/vhosts/k3s-cluster-server/defaults/main.yml new file mode 100644 index 0000000..1b488a9 --- /dev/null +++ b/roles/vhosts/k3s-cluster-server/defaults/main.yml @@ -0,0 +1 @@ +# Default values for k3s-cluster-server role diff --git a/roles/vhosts/k3s-cluster-server/tasks/add-master.yml b/roles/vhosts/k3s-cluster-server/tasks/add-master.yml new file mode 100644 index 0000000..e69de29 diff --git a/roles/vhosts/k3s-cluster-server/tasks/backup.yml b/roles/vhosts/k3s-cluster-server/tasks/backup.yml new file mode 100644 index 0000000..e69de29 diff --git a/roles/vhosts/k3s-cluster-server/tasks/bootstrap.yml b/roles/vhosts/k3s-cluster-server/tasks/bootstrap.yml new file mode 100644 index 0000000..e69de29 diff --git a/roles/vhosts/k3s-cluster-server/tasks/destroy.yml b/roles/vhosts/k3s-cluster-server/tasks/destroy.yml new file mode 100644 index 0000000..e69de29 diff --git a/roles/vhosts/k3s-cluster-server/tasks/main.yml b/roles/vhosts/k3s-cluster-server/tasks/main.yml new file mode 100644 index 0000000..388f080 --- /dev/null +++ b/roles/vhosts/k3s-cluster-server/tasks/main.yml @@ -0,0 +1,2 @@ +- name: Execute action on K3s cluster server + include_tasks: "{{ action }}.yml" diff --git a/roles/vhosts/k3s-cluster-server/tasks/recovery.yml b/roles/vhosts/k3s-cluster-server/tasks/recovery.yml new file mode 100644 index 0000000..e69de29 diff --git a/roles/vhosts/k3s-cluster-server/tasks/upgrade.yml b/roles/vhosts/k3s-cluster-server/tasks/upgrade.yml new file mode 100644 index 0000000..e69de29 diff --git a/roles/vhosts/k3s-cluster-server/templates/install_k3s_server.sh.j2 b/roles/vhosts/k3s-cluster-server/templates/install_k3s_server.sh.j2 new file mode 100644 index 0000000..ae9b8ef --- /dev/null +++ b/roles/vhosts/k3s-cluster-server/templates/install_k3s_server.sh.j2 @@ -0,0 +1,3 @@ +#!/bin/bash + +INSTALL_K3S_SKIP_DOWNLOAD=true bash /usr/local/share/k3s/install.sh -s - --disable={{ cluster.server_disable }} --token='{{ cluster.token }}' --datastore-endpoint='{{ cluster.datastore_endpoint }}' --system-default-registry '{{ cluster.registry }}' --data-dir='{{ cluster.data_dir }}' --kube-apiserver-arg '{{ cluster.apiserver_arg }}' --bind-address='{{ cluster.bind_address }}' --tls-san='{{ cluster.tls_san }}' --advertise-address='{{ cluster.advertise_address }}' --node-ip='{{ cluster.node_ip }}' --node-external-ip '{{ cluster.node_external_ip }}' --flannel-iface '{{ cluster.flannel_iface }}' --cluster-cidr '{{ cluster.cluster_cidr }}' --service-cidr '{{ cluster.service_cidr }}' diff --git a/roles/vhosts/k3s-cluster-server/vars/main.yml b/roles/vhosts/k3s-cluster-server/vars/main.yml new file mode 100644 index 0000000..45f4838 --- /dev/null +++ b/roles/vhosts/k3s-cluster-server/vars/main.yml @@ -0,0 +1,17 @@ +action: 'bootstrap' +cluster: + name: 'cn-k3s-cluster-1' + token: 'your_default_token' + server_disable: "traefik,servicelb" + datastore_endpoint: "mysql://user:password@tcp(database_url:3306)/k3s" + registry: "registry.cn-hangzhou.aliyuncs.com" + data_dir: "/opt/rancher/k3s" + apiserver_arg: "service-node-port-range=0-50000" + bind_address: "0.0.0.0" + tls_san: "cn-k3s-server.svc.plus" + advertise_address: "8.130.93.47" + node_ip: "10.254.0.3" + node_external_ip: "8.130.93.47" + flannel_iface: "wg0" + cluster_cidr: "10.42.0.0/16" + service_cidr: "10.43.0.0/16" diff --git a/roles/vhosts/k3s-reset/files/reset-k3s.sh b/roles/vhosts/k3s-reset/files/reset-k3s.sh new file mode 100644 index 0000000..719a657 --- /dev/null +++ b/roles/vhosts/k3s-reset/files/reset-k3s.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +wget https://raw.githubusercontent.com/kubeovn/kube-ovn/release-1.10/dist/images/cleanup.sh +bash cleanup.sh + +rm -rf /var/run/openvswitch +rm -rf /var/run/ovn +rm -rf /etc/origin/openvswitch/ +rm -rf /etc/origin/ovn/ +rm -rf /etc/cni/net.d/00-kube-ovn.conflist +rm -rf /etc/cni/net.d/01-kube-ovn.conflist +rm -rf /var/log/openvswitch +rm -rf /var/log/ovn +rm -fr /var/log/kube-ovn + +/usr/local/bin/k3s-uninstall.sh +rm -rvf /opt/rancher/ /etc/rancher/ /var/lib/rancher/ ~/.kube + +rm -rvf /etc/cni/net.d/* + +# 移除cni命名空间 +ip netns show 2>/dev/null | grep cni- | xargs -r -t -n 1 ip netns delete +# 移除cnio网卡 +ip link show 2>/dev/null | grep 'master cni0' | while read ignore iface ignore; do + iface=${iface%%@*} + [ -z "$iface" ] || ip link delete $iface +done +ip link delete cni0 +ip link delete flannel.1 +rm -rf /var/lib/cni/ +# 清理iptables +iptables-save | grep -v KUBE- | grep -v CNI- | iptables-restore diff --git a/roles/vhosts/k3s-reset/tasks/main.yml b/roles/vhosts/k3s-reset/tasks/main.yml new file mode 100755 index 0000000..f500dec --- /dev/null +++ b/roles/vhosts/k3s-reset/tasks/main.yml @@ -0,0 +1,4 @@ +- name: Reset K3S Cluster + script: files/reset-k3s.sh + when: (inventory_hostname in groups[group] ) and ( cluster_reset == 'enanble' ) + diff --git a/roles/vhosts/k3s/files/setup-cni-cilium.sh b/roles/vhosts/k3s/files/setup-cni-cilium.sh new file mode 100644 index 0000000..34c9ac2 --- /dev/null +++ b/roles/vhosts/k3s/files/setup-cni-cilium.sh @@ -0,0 +1,19 @@ + +# 移除cni命名空间 +ip netns show 2>/dev/null | grep cni- | xargs -r -t -n 1 ip netns delete +# 移除cnio网卡 +ip link show 2>/dev/null | grep 'master cni0' | while read ignore iface ignore; do + iface=${iface%%@*} + [ -z "$iface" ] || ip link delete $iface +done +ip link delete cni0 +ip link delete flannel.1 +rm -rf /var/lib/cni/ +# 清理iptables +iptables-save | grep -v KUBE- | grep -v CNI- | iptables-restore + +helm repo add cilium https://helm.cilium.io/ +helm install cilium cilium/cilium --version 1.10.4 \ + --namespace kube-system\ + --set hubble.relay.enabled=true \ + --set hubble.ui.enabled=true diff --git a/roles/vhosts/k3s/files/setup-cni-kubeovn.sh b/roles/vhosts/k3s/files/setup-cni-kubeovn.sh new file mode 100644 index 0000000..b1f8139 --- /dev/null +++ b/roles/vhosts/k3s/files/setup-cni-kubeovn.sh @@ -0,0 +1,17 @@ +#!/bin/bash +export NodeIP=$1 +node_name=`hostname` + +modprobe geneve +modprobe openvswitch +modprobe ip_tables +modprobe iptable_nat + +rm -rvf /etc/cni/net.d/* + +kubectl taint node $node_name node-role.kubernetes.io/control-plane:NoSchedule- +kubectl label node $node_name kubernetes.io/os=linux --overwrite +kubectl label node $node_name kube-ovn/role=master --overwrite +helm repo add kubeovn https://kubeovn.github.io/kube-ovn/ +helm repo up +helm upgrade --install kube-ovn kubeovn/kube-ovn --set MASTER_NODES=${NodeIP} -n kube-system diff --git a/roles/vhosts/k3s/files/setup-k3s.sh b/roles/vhosts/k3s/files/setup-k3s.sh new file mode 100644 index 0000000..7a6db79 --- /dev/null +++ b/roles/vhosts/k3s/files/setup-k3s.sh @@ -0,0 +1,134 @@ +#!/bin/bash +set -x + +export version=$1 +export cni=$2 +export pod_cidr=$3 +export svc_cidr=$4 +export enable_api_access=$5 +export advertise-address=$6 + +function setup_k3s() +{ + local extra_opts=$1 + mkdir -pv /opt/rancher/k3s + + ping -c 1 google.com > /dev/null 2>&1 + if [ $? -eq 0 ]; then + echo "当前主机在国际网络上" + curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION=$version sh -s - $extra_opts + else + echo "当前主机在大陆网络上" + curl -sfL https://rancher-mirror.rancher.cn/k3s/k3s-install.sh | INSTALL_K3S_VERSION=$version INSTALL_K3S_MIRROR=cn sh -s - $extra_opts + fi + mkdir -pv ~/.kube/ && cp /etc/rancher/k3s/k3s.yaml ~/.kube/config +} + +function setup_helm() +{ + ping -c 1 google.com > /dev/null 2>&1 + if [ $? -eq 0 ]; then + echo "当前主机在国际网络上" + curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash + else + echo "当前主机在大陆网络上" + case `uname -m` in + x86_64) ARCH=amd64; ;; + aarch64) ARCH=arm64; ;; + loongarch64) ARCH=loongarch64; ;; + *) echo "un-supported arch, exit ..."; exit 1; ;; + esac + rm -rf helm.tar.gz* /usr/local/bin/helm || echo true + sudo wget --no-check-certificate https://mirrors.onwalk.net/tools/linux-${ARCH}/helm.tar.gz && sudo tar -xvpf helm.tar.gz -C /usr/local/bin/ + sudo chmod 755 /usr/local/bin/helm + fi +} + + +function set_apiserver_l4_proxy() +{ + sudo apt update && apt install nginx -y +cat > /etc/nginx/sites-available/default << EOF + +load_module /usr/lib64/nginx/modules/ngx_stream_module.so; + +worker_processes 4; +worker_rlimit_nofile 40000; + + +events { + worker_connections 8192; +} + +stream { + log_format logs '$remote_addr - - [$time_local] $protocol $status $bytes_sent $bytes_received $session_time "$upstream_addr"'; + + access_log /var/log/nginx/access.log logs; + + upstream K3s_api_server { + least_conn; + server 127.0.0.1:6443 max_fails=3 fail_timeout=5s; + } + server { + listen 8022; + server_name k3s-cluster.onwalk.net; + proxy_pass K3s_api_server; + } +} +EOF + sudo systemctl restart nginx +} + +###### function set_apiserver_l7_proxy ####### +function set_apiserver_l7_proxy() +{ + sudo apt update && apt install nginx -y +cat > /etc/nginx/sites-available/default << EOF + +http { + upstream api { + kubernetes.default.svc.cluster.local:6443; + } + + server { + listen 6443 ssl; + ssl_certificate /usr/local/nginx/ssl/apiserver.crt; # kube-apiserver cert + ssl_certificate_key /usr/local/nginx/ssl/apiserver.key; # kube-apiserver key + ssl_trusted_certificate /usr/local/nginx/ssl/ca.crt; # ca.pem + + location / { + } + + location /api/ { + rewrite ^/api(/.*)$ $1 break; + proxy_pass https://api; + proxy_ssl_certificate /etc/nginx/k8s-client-certificate.pem; + proxy_ssl_certificate_key /etc/nginx/k8s-client-key.key; + proxy_ssl_session_reuse on; + } + } +} +EOF + sudo systemctl restart nginx +} + +disable_proxy="--disable-kube-proxy" +disable_cni="--flannel-backend=none --disable-network-policy" +default="--disable=traefik,servicelb --data-dir=/opt/rancher/k3s --kube-apiserver-arg service-node-port-range=0-50000" + +case $enable_api_access in + 'true') api_opts="--bind-address=0.0.0.0" ;; + *) api_opts="" ;; +esac + +case $cni in + 'default') opts="$default $api_opts" ;; + 'kubeovn') opts="$default $disable_cni $api_opts" ;; + 'cilium') opts="$default $disable_cni $disable_proxy $api_opts" ;; + *) echo "error args" ;; +esac + +setup_k3s "$opts" +setup_helm +#set_apiserver_l4_proxy +#set_apiserver_l7_proxy diff --git a/roles/vhosts/k3s/meta/main.yml b/roles/vhosts/k3s/meta/main.yml new file mode 100644 index 0000000..9711b33 --- /dev/null +++ b/roles/vhosts/k3s/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: common diff --git a/roles/vhosts/k3s/tasks/main.yml b/roles/vhosts/k3s/tasks/main.yml new file mode 100755 index 0000000..af3f2d7 --- /dev/null +++ b/roles/vhosts/k3s/tasks/main.yml @@ -0,0 +1,11 @@ +- name: Setup K3S Server + script: files/setup-k3s.sh {{ version }} {{ cni }} {{ pod_cidr }} {{ svc_cidr }} {{ enable_api_access }} + when: inventory_hostname in groups[group] + +- name: Sync K3S CNI Config + template: src=templates/cni_install.sh dest=/tmp/ owner=root group=root mode=0644 + when: ( inventory_hostname in groups[group] ) and (cni == 'kubeovn' ) +- name: Setup K3S CNI + shell: 'bash /tmp/cni_install.sh' + when: ( inventory_hostname in groups[group] ) and (cni == 'kubeovn' ) + ignore_errors: yes diff --git a/roles/vhosts/k3s/templates/cni_install.sh b/roles/vhosts/k3s/templates/cni_install.sh new file mode 100644 index 0000000..1b004ba --- /dev/null +++ b/roles/vhosts/k3s/templates/cni_install.sh @@ -0,0 +1,3657 @@ +#!/usr/bin/env bash +set -euo pipefail + +IPV6=${IPV6:-false} +DUAL_STACK=${DUAL_STACK:-false} +ENABLE_SSL=${ENABLE_SSL:-false} +ENABLE_VLAN=${ENABLE_VLAN:-false} +CHECK_GATEWAY=${CHECK_GATEWAY:-true} +LOGICAL_GATEWAY=${LOGICAL_GATEWAY:-false} +U2O_INTERCONNECTION=${U2O_INTERCONNECTION:-false} +ENABLE_MIRROR=${ENABLE_MIRROR:-false} +VLAN_NIC=${VLAN_NIC:-} +HW_OFFLOAD=${HW_OFFLOAD:-false} +ENABLE_LB=${ENABLE_LB:-true} +ENABLE_NP=${ENABLE_NP:-true} +ENABLE_EIP_SNAT=${ENABLE_EIP_SNAT:-true} +LS_DNAT_MOD_DL_DST=${LS_DNAT_MOD_DL_DST:-true} +ENABLE_EXTERNAL_VPC=${ENABLE_EXTERNAL_VPC:-true} +CNI_CONFIG_PRIORITY=${CNI_CONFIG_PRIORITY:-01} +ENABLE_LB_SVC=${ENABLE_LB_SVC:-false} +ENABLE_KEEP_VM_IP=${ENABLE_KEEP_VM_IP:-true} + +# exchange link names of OVS bridge and the provider nic +# in the default provider-network +EXCHANGE_LINK_NAME=${EXCHANGE_LINK_NAME:-false} +# The nic to support container network can be a nic name or a group of regex +# separated by comma, if empty will use the nic that the default route use +IFACE=${IFACE:-} +# Specifies the name of the dpdk tunnel iface. +# Note that the dpdk tunnel iface and tunnel ip cidr should be diffierent with Kubernetes api cidr,otherwise the route will be a problem. +DPDK_TUNNEL_IFACE=${DPDK_TUNNEL_IFACE:-br-phy} +ENABLE_BIND_LOCAL_IP=${ENABLE_BIND_LOCAL_IP:-true} + +# debug +DEBUG_WRAPPER=${DEBUG_WRAPPER:-} + +CNI_CONF_DIR="/etc/cni/net.d" +CNI_BIN_DIR="/opt/cni/bin" + +REGISTRY="kubeovn" +VERSION="v1.11.5" +IMAGE_PULL_POLICY="IfNotPresent" +POD_CIDR="{{ pod_cidr }}" # Do NOT overlap with NODE/SVC/JOIN CIDR +POD_GATEWAY="{{ pod_gateway }}" +SVC_CIDR="{{ svc_cidr }}" # Do NOT overlap with NODE/POD/JOIN CIDR +JOIN_CIDR="{{ join_cidr }}" # Do NOT overlap with NODE/POD/SVC CIDR +PINGER_EXTERNAL_ADDRESS="114.114.114.114" # Pinger check external ip probe +PINGER_EXTERNAL_DOMAIN="alauda.cn" # Pinger check external domain probe +SVC_YAML_IPFAMILYPOLICY="" +if [ "$IPV6" = "true" ]; then + POD_CIDR="fd00:10:16::/64" # Do NOT overlap with NODE/SVC/JOIN CIDR + POD_GATEWAY="fd00:10:16::1" + SVC_CIDR="fd00:10:96::/112" # Do NOT overlap with NODE/POD/JOIN CIDR + JOIN_CIDR="fd00:100:64::/64" # Do NOT overlap with NODE/POD/SVC CIDR + PINGER_EXTERNAL_ADDRESS="2400:3200::1" + PINGER_EXTERNAL_DOMAIN="google.com" +fi +if [ "$DUAL_STACK" = "true" ]; then + POD_CIDR="10.16.0.0/16,fd00:10:16::/64" # Do NOT overlap with NODE/SVC/JOIN CIDR + POD_GATEWAY="10.16.0.1,fd00:10:16::1" + SVC_CIDR="10.96.0.0/12,fd00:10:96::/112" # Do NOT overlap with NODE/POD/JOIN CIDR + JOIN_CIDR="100.64.0.0/16,fd00:100:64::/64" # Do NOT overlap with NODE/POD/SVC CIDR + PINGER_EXTERNAL_ADDRESS="114.114.114.114,2400:3200::1" + PINGER_EXTERNAL_DOMAIN="google.com" + SVC_YAML_IPFAMILYPOLICY="ipFamilyPolicy: PreferDualStack" +fi + +EXCLUDE_IPS="" # EXCLUDE_IPS for default subnet +LABEL="node-role.kubernetes.io/control-plane" # The node label to deploy OVN DB +DEPRECATED_LABEL="node-role.kubernetes.io/master" # The node label to deploy OVN DB in earlier versions +NETWORK_TYPE="geneve" # geneve or vlan +TUNNEL_TYPE="geneve" # geneve, vxlan or stt. ATTENTION: some networkpolicy cannot take effect when using vxlan and stt need custom compile ovs kernel module +POD_NIC_TYPE="veth-pair" # veth-pair or internal-port +POD_DEFAULT_FIP_TYPE="" # iptables, pod can set iptables fip automatically by enable fip annotation + +# VLAN Config only take effect when NETWORK_TYPE is vlan +PROVIDER_NAME="provider" +VLAN_INTERFACE_NAME="" +VLAN_NAME="ovn-vlan" +VLAN_ID="100" + +if [ "$ENABLE_VLAN" = "true" ]; then + NETWORK_TYPE="vlan" + if [ "$VLAN_NIC" != "" ]; then + VLAN_INTERFACE_NAME="$VLAN_NIC" + fi +fi + +# hybrid dpdk +HYBRID_DPDK="false" + +# DPDK +DPDK="false" +DPDK_SUPPORTED_VERSIONS=("19.11") +DPDK_VERSION="" +DPDK_CPU="1000m" # Default CPU configuration for if --dpdk-cpu flag is not included +DPDK_MEMORY="2Gi" # Default Memory configuration for it --dpdk-memory flag is not included + +# performance +MODULES="kube_ovn_fastpath.ko" +RPMS="openvswitch-kmod" +GC_INTERVAL=360 +INSPECT_INTERVAL=20 + +display_help() { + echo "Usage: $0 [option...]" + echo + echo " -h, --help Print Help (this message) and exit" + echo " --with-hybrid-dpdk Install Kube-OVN with nodes which run ovs-dpdk or ovs-kernel" + echo " --with-dpdk= Install Kube-OVN with OVS-DPDK instead of kernel OVS" + echo " --dpdk-cpu=m Configure DPDK to use a specific amount of CPU" + echo " --dpdk-memory=Gi Configure DPDK to use a specific amount of memory" + echo + exit 0 +} + +if [ -n "${1-}" ] +then + set +u + while :; do + case $1 in + -h|--help) + display_help + ;; + --with-hybrid-dpdk) + HYBRID_DPDK="true" + ;; + --with-dpdk=*) + DPDK=true + DPDK_VERSION="${1#*=}" + if [[ ! "${DPDK_SUPPORTED_VERSIONS[@]}" = "${DPDK_VERSION}" ]] || [[ -z "${DPDK_VERSION}" ]]; then + echo "Unsupported DPDK version: ${DPDK_VERSION}" + echo "Supported DPDK versions: ${DPDK_SUPPORTED_VERSIONS[*]}" + exit 1 + fi + ;; + --dpdk-cpu=*) + DPDK_CPU="${1#*=}" + if [[ $DPDK_CPU =~ ^[0-9]+(m)$ ]] + then + echo "CPU $DPDK_CPU" + else + echo "$DPDK_CPU is not valid, please use the format --dpdk-cpu=m" + exit 1 + fi + ;; + --dpdk-memory=*) + DPDK_MEMORY="${1#*=}" + if [[ $DPDK_MEMORY =~ ^[0-9]+(Gi)$ ]] + then + echo "MEMORY $DPDK_MEMORY" + else + echo "$DPDK_MEMORY is not valid, please use the format --dpdk-memory=Gi" + exit 1 + fi + ;; + -?*) + echo "Unknown argument $1" + exit 1 + ;; + *) break + esac + shift + done + set -u +fi + +echo "-------------------------------" +echo "Kube-OVN Version: $VERSION" +echo "Default Network Mode: $NETWORK_TYPE" +if [[ $NETWORK_TYPE = "vlan" ]];then + echo "Default Vlan Nic: $VLAN_INTERFACE_NAME" + echo "Default Vlan ID: $VLAN_ID" +fi +echo "Default Subnet CIDR: $POD_CIDR" +echo "Join Subnet CIDR: $JOIN_CIDR" +echo "Enable SVC LB: $ENABLE_LB" +echo "Enable Networkpolicy: $ENABLE_NP" +echo "Enable EIP and SNAT: $ENABLE_EIP_SNAT" +echo "Enable Mirror: $ENABLE_MIRROR" +echo "-------------------------------" + +if [[ $ENABLE_SSL = "true" ]];then + echo "[Step 0/6] Generate SSL key and cert" + exist=$(kubectl get secret -n kube-system kube-ovn-tls --ignore-not-found) + if [[ $exist == "" ]];then + docker run --rm -v "$PWD":/etc/ovn $REGISTRY/kube-ovn:$VERSION bash generate-ssl.sh + kubectl create secret generic -n kube-system kube-ovn-tls --from-file=cacert=cacert.pem --from-file=cert=ovn-cert.pem --from-file=key=ovn-privkey.pem + rm -rf cacert.pem ovn-cert.pem ovn-privkey.pem ovn-req.pem + fi + echo "-------------------------------" + echo "" +fi + +echo "[Step 1/6] Label kube-ovn-master node and label datapath type" +count=$(kubectl get no -l$LABEL --no-headers | wc -l) +node_label="$LABEL" +if [ $count -eq 0 ]; then + count=$(kubectl get no -l$DEPRECATED_LABEL --no-headers | wc -l) + node_label="$DEPRECATED_LABEL" + if [ $count -eq 0 ]; then + echo "ERROR: No node with label $LABEL or $DEPRECATED_LABEL found" + exit 1 + fi +fi +kubectl label no -l$node_label kube-ovn/role=master --overwrite + +if [ "$DPDK" = "true" -o "$HYBRID_DPDK" = "true" ]; then + kubectl label no -lovn.kubernetes.io/ovs_dp_type!=userspace ovn.kubernetes.io/ovs_dp_type=kernel --overwrite +fi + +echo "-------------------------------" +echo "" + +echo "[Step 2/6] Install OVN components" +addresses=$(kubectl get no -lkube-ovn/role=master --no-headers -o wide | awk '{print $6}' | tr \\n ',') +count=$(kubectl get no -lkube-ovn/role=master --no-headers | wc -l) +echo "Install OVN DB in $addresses" + +cat < kube-ovn-crd.yaml +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: vpc-dnses.kubeovn.io +spec: + group: kubeovn.io + names: + plural: vpc-dnses + singular: vpc-dns + shortNames: + - vpc-dns + kind: VpcDns + listKind: VpcDnsList + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .status.active + name: Active + type: boolean + - jsonPath: .spec.vpc + name: Vpc + type: string + - jsonPath: .spec.subnet + name: Subnet + type: string + name: v1 + served: true + storage: true + subresources: + status: {} + schema: + openAPIV3Schema: + type: object + properties: + spec: + type: object + properties: + vpc: + type: string + subnet: + type: string + status: + type: object + properties: + active: + type: boolean + conditions: + type: array + items: + type: object + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastUpdateTime: + type: string + lastTransitionTime: + type: string +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: switch-lb-rules.kubeovn.io +spec: + group: kubeovn.io + names: + plural: switch-lb-rules + singular: switch-lb-rule + shortNames: + - slr + kind: SwitchLBRule + listKind: SwitchLBRuleList + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .spec.vip + name: vip + type: string + - jsonPath: .status.ports + name: port(s) + type: string + - jsonPath: .status.service + name: service + type: string + - jsonPath: .metadata.creationTimestamp + name: age + type: date + name: v1 + served: true + storage: true + subresources: + status: {} + schema: + openAPIV3Schema: + type: object + properties: + spec: + type: object + properties: + namespace: + type: string + vip: + type: string + sessionAffinity: + type: string + ports: + items: + properties: + name: + type: string + port: + type: integer + minimum: 1 + maximum: 65535 + protocol: + type: string + targetPort: + type: integer + minimum: 1 + maximum: 65535 + type: object + type: array + selector: + items: + type: string + type: array + status: + type: object + properties: + ports: + type: string + service: + type: string +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: vpc-nat-gateways.kubeovn.io +spec: + group: kubeovn.io + names: + plural: vpc-nat-gateways + singular: vpc-nat-gateway + shortNames: + - vpc-nat-gw + kind: VpcNatGateway + listKind: VpcNatGatewayList + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .spec.vpc + name: Vpc + type: string + - jsonPath: .spec.subnet + name: Subnet + type: string + - jsonPath: .spec.lanIp + name: LanIP + type: string + name: v1 + served: true + storage: true + schema: + openAPIV3Schema: + type: object + properties: + spec: + type: object + properties: + lanIp: + type: string + subnet: + type: string + vpc: + type: string + selector: + type: array + items: + type: string + tolerations: + type: array + items: + type: object + properties: + key: + type: string + operator: + type: string + value: + type: string + effect: + type: string + tolerationSeconds: + type: integer +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: iptables-eips.kubeovn.io +spec: + group: kubeovn.io + names: + plural: iptables-eips + singular: iptables-eip + shortNames: + - eip + kind: IptablesEIP + listKind: IptablesEIPList + scope: Cluster + versions: + - name: v1 + served: true + storage: true + subresources: + status: {} + additionalPrinterColumns: + - jsonPath: .status.ip + name: IP + type: string + - jsonPath: .spec.macAddress + name: Mac + type: string + - jsonPath: .status.nat + name: Nat + type: string + - jsonPath: .spec.natGwDp + name: NatGwDp + type: string + - jsonPath: .status.ready + name: Ready + type: boolean + schema: + openAPIV3Schema: + type: object + properties: + status: + type: object + properties: + ready: + type: boolean + ip: + type: string + nat: + type: string + redo: + type: string + conditions: + type: array + items: + type: object + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastUpdateTime: + type: string + lastTransitionTime: + type: string + spec: + type: object + properties: + v4ip: + type: string + v6ip: + type: string + macAddress: + type: string + natGwDp: + type: string +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: iptables-fip-rules.kubeovn.io +spec: + group: kubeovn.io + names: + plural: iptables-fip-rules + singular: iptables-fip-rule + shortNames: + - fip + kind: IptablesFIPRule + listKind: IptablesFIPRuleList + scope: Cluster + versions: + - name: v1 + served: true + storage: true + subresources: + status: {} + additionalPrinterColumns: + - jsonPath: .spec.eip + name: Eip + type: string + - jsonPath: .status.v4ip + name: V4ip + type: string + - jsonPath: .spec.internalIp + name: InternalIp + type: string + - jsonPath: .status.v6ip + name: V6ip + type: string + - jsonPath: .status.ready + name: Ready + type: boolean + - jsonPath: .status.natGwDp + name: NatGwDp + type: string + schema: + openAPIV3Schema: + type: object + properties: + status: + type: object + properties: + ready: + type: boolean + v4ip: + type: string + v6ip: + type: string + natGwDp: + type: string + redo: + type: string + conditions: + type: array + items: + type: object + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastUpdateTime: + type: string + lastTransitionTime: + type: string + spec: + type: object + properties: + eip: + type: string + internalIp: + type: string +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: iptables-dnat-rules.kubeovn.io +spec: + group: kubeovn.io + names: + plural: iptables-dnat-rules + singular: iptables-dnat-rule + shortNames: + - dnat + kind: IptablesDnatRule + listKind: IptablesDnatRuleList + scope: Cluster + versions: + - name: v1 + served: true + storage: true + subresources: + status: {} + additionalPrinterColumns: + - jsonPath: .spec.eip + name: Eip + type: string + - jsonPath: .spec.protocol + name: Protocol + type: string + - jsonPath: .status.v4ip + name: V4ip + type: string + - jsonPath: .status.v6ip + name: V6ip + type: string + - jsonPath: .spec.internalIp + name: InternalIp + type: string + - jsonPath: .spec.externalPort + name: ExternalPort + type: string + - jsonPath: .spec.internalPort + name: InternalPort + type: string + - jsonPath: .status.natGwDp + name: NatGwDp + type: string + - jsonPath: .status.ready + name: Ready + type: boolean + schema: + openAPIV3Schema: + type: object + properties: + status: + type: object + properties: + ready: + type: boolean + v4ip: + type: string + v6ip: + type: string + natGwDp: + type: string + redo: + type: string + protocol: + type: string + internalIp: + type: string + internalPort: + type: string + externalPort: + type: string + conditions: + type: array + items: + type: object + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastUpdateTime: + type: string + lastTransitionTime: + type: string + spec: + type: object + properties: + eip: + type: string + externalPort: + type: string + protocol: + type: string + internalIp: + type: string + internalPort: + type: string +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: iptables-snat-rules.kubeovn.io +spec: + group: kubeovn.io + names: + plural: iptables-snat-rules + singular: iptables-snat-rule + shortNames: + - snat + kind: IptablesSnatRule + listKind: IptablesSnatRuleList + scope: Cluster + versions: + - name: v1 + served: true + storage: true + subresources: + status: {} + additionalPrinterColumns: + - jsonPath: .spec.eip + name: EIP + type: string + - jsonPath: .status.v4ip + name: V4ip + type: string + - jsonPath: .status.v6ip + name: V6ip + type: string + - jsonPath: .spec.internalCIDR + name: InternalCIDR + type: string + - jsonPath: .status.natGwDp + name: NatGwDp + type: string + - jsonPath: .status.ready + name: Ready + type: boolean + schema: + openAPIV3Schema: + type: object + properties: + status: + type: object + properties: + ready: + type: boolean + v4ip: + type: string + v6ip: + type: string + natGwDp: + type: string + redo: + type: string + conditions: + type: array + items: + type: object + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastUpdateTime: + type: string + lastTransitionTime: + type: string + spec: + type: object + properties: + eip: + type: string + internalCIDR: + type: string +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: ovn-eips.kubeovn.io +spec: + group: kubeovn.io + names: + plural: ovn-eips + singular: ovn-eip + shortNames: + - oeip + kind: OvnEip + listKind: OvnEipList + scope: Cluster + versions: + - name: v1 + served: true + storage: true + subresources: + status: {} + additionalPrinterColumns: + - jsonPath: .spec.v4ip + name: IP + type: string + - jsonPath: .spec.macAddress + name: Mac + type: string + - jsonPath: .spec.type + name: Type + type: string + schema: + openAPIV3Schema: + type: object + properties: + status: + type: object + properties: + v4Ip: + type: string + macAddress: + type: string + conditions: + type: array + items: + type: object + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastUpdateTime: + type: string + lastTransitionTime: + type: string + spec: + type: object + properties: + externalSubnet: + type: string + type: + type: string + v4ip: + type: string + macAddress: + type: string +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: ovn-fips.kubeovn.io +spec: + group: kubeovn.io + names: + plural: ovn-fips + singular: ovn-fip + shortNames: + - ofip + kind: OvnFip + listKind: OvnFipList + scope: Cluster + versions: + - name: v1 + served: true + storage: true + subresources: + status: {} + additionalPrinterColumns: + - jsonPath: .status.vpc + name: Vpc + type: string + - jsonPath: .status.v4Eip + name: V4Eip + type: string + - jsonPath: .status.v4Ip + name: V4Ip + type: string + - jsonPath: .status.ready + name: Ready + type: boolean + schema: + openAPIV3Schema: + type: object + properties: + status: + type: object + properties: + ready: + type: boolean + v4Eip: + type: string + v4Ip: + type: string + macAddress: + type: string + vpc: + type: string + conditions: + type: array + items: + type: object + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastUpdateTime: + type: string + lastTransitionTime: + type: string + spec: + type: object + properties: + ovnEip: + type: string + ipName: + type: string +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: ovn-snat-rules.kubeovn.io +spec: + group: kubeovn.io + names: + plural: ovn-snat-rules + singular: ovn-snat-rule + shortNames: + - osnat + kind: OvnSnatRule + listKind: OvnSnatRuleList + scope: Cluster + versions: + - name: v1 + served: true + storage: true + subresources: + status: {} + additionalPrinterColumns: + - jsonPath: .status.vpc + name: Vpc + type: string + - jsonPath: .status.v4Eip + name: V4Eip + type: string + - jsonPath: .status.v4ipCidr + name: V4Ip + type: string + - jsonPath: .status.ready + name: Ready + type: boolean + schema: + openAPIV3Schema: + type: object + properties: + status: + type: object + properties: + ready: + type: boolean + v4Eip: + type: string + v4ipCidr: + type: string + vpc: + type: string + conditions: + type: array + items: + type: object + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastUpdateTime: + type: string + lastTransitionTime: + type: string + spec: + type: object + properties: + ovnEip: + type: string + vpcSubnet: + type: string + ipName: + type: string +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: vpcs.kubeovn.io +spec: + group: kubeovn.io + versions: + - additionalPrinterColumns: + - jsonPath: .status.enableExternal + name: EnableExternal + type: boolean + - jsonPath: .status.standby + name: Standby + type: boolean + - jsonPath: .status.subnets + name: Subnets + type: string + - jsonPath: .spec.namespaces + name: Namespaces + type: string + name: v1 + schema: + openAPIV3Schema: + properties: + spec: + properties: + enableExternal: + type: boolean + namespaces: + items: + type: string + type: array + staticRoutes: + items: + properties: + policy: + type: string + cidr: + type: string + nextHopIP: + type: string + type: object + type: array + policyRoutes: + items: + properties: + priority: + type: integer + action: + type: string + match: + type: string + nextHopIP: + type: string + type: object + type: array + vpcPeerings: + items: + properties: + remoteVpc: + type: string + localConnectIP: + type: string + type: object + type: array + type: object + status: + properties: + conditions: + items: + properties: + lastTransitionTime: + type: string + lastUpdateTime: + type: string + message: + type: string + reason: + type: string + status: + type: string + type: + type: string + type: object + type: array + default: + type: boolean + defaultLogicalSwitch: + type: string + router: + type: string + standby: + type: boolean + enableExternal: + type: boolean + subnets: + items: + type: string + type: array + vpcPeerings: + items: + type: string + type: array + tcpLoadBalancer: + type: string + tcpSessionLoadBalancer: + type: string + udpLoadBalancer: + type: string + udpSessionLoadBalancer: + type: string + sctpLoadBalancer: + type: string + sctpSessionLoadBalancer: + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} + names: + kind: Vpc + listKind: VpcList + plural: vpcs + shortNames: + - vpc + singular: vpc + scope: Cluster +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: ips.kubeovn.io +spec: + group: kubeovn.io + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: V4IP + type: string + jsonPath: .spec.v4IpAddress + - name: V6IP + type: string + jsonPath: .spec.v6IpAddress + - name: Mac + type: string + jsonPath: .spec.macAddress + - name: Node + type: string + jsonPath: .spec.nodeName + - name: Subnet + type: string + jsonPath: .spec.subnet + schema: + openAPIV3Schema: + type: object + properties: + spec: + type: object + properties: + podName: + type: string + namespace: + type: string + subnet: + type: string + attachSubnets: + type: array + items: + type: string + nodeName: + type: string + ipAddress: + type: string + v4IpAddress: + type: string + v6IpAddress: + type: string + attachIps: + type: array + items: + type: string + macAddress: + type: string + attachMacs: + type: array + items: + type: string + containerID: + type: string + podType: + type: string + scope: Cluster + names: + plural: ips + singular: ip + kind: IP + shortNames: + - ip +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: vips.kubeovn.io +spec: + group: kubeovn.io + names: + plural: vips + singular: vip + shortNames: + - vip + kind: Vip + listKind: VipList + scope: Cluster + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: V4IP + type: string + jsonPath: .status.v4ip + - name: PV4IP + type: string + jsonPath: .spec.parentV4ip + - name: Mac + type: string + jsonPath: .status.mac + - name: PMac + type: string + jsonPath: .spec.parentMac + - name: V6IP + type: string + jsonPath: .status.v6ip + - name: PV6IP + type: string + jsonPath: .spec.parentV6ip + - name: Subnet + type: string + jsonPath: .spec.subnet + - jsonPath: .status.ready + name: Ready + type: boolean + schema: + openAPIV3Schema: + type: object + properties: + status: + type: object + properties: + ready: + type: boolean + v4ip: + type: string + v6ip: + type: string + mac: + type: string + pv4ip: + type: string + pv6ip: + type: string + pmac: + type: string + conditions: + type: array + items: + type: object + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastUpdateTime: + type: string + lastTransitionTime: + type: string + spec: + type: object + properties: + namespace: + type: string + subnet: + type: string + attachSubnets: + type: array + items: + type: string + v4ip: + type: string + macAddress: + type: string + v6ip: + type: string + parentV4ip: + type: string + parentMac: + type: string + parentV6ip: + type: string +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: subnets.kubeovn.io +spec: + group: kubeovn.io + versions: + - name: v1 + served: true + storage: true + subresources: + status: {} + additionalPrinterColumns: + - name: Provider + type: string + jsonPath: .spec.provider + - name: Vpc + type: string + jsonPath: .spec.vpc + - name: Protocol + type: string + jsonPath: .spec.protocol + - name: CIDR + type: string + jsonPath: .spec.cidrBlock + - name: Private + type: boolean + jsonPath: .spec.private + - name: NAT + type: boolean + jsonPath: .spec.natOutgoing + - name: Default + type: boolean + jsonPath: .spec.default + - name: GatewayType + type: string + jsonPath: .spec.gatewayType + - name: V4Used + type: number + jsonPath: .status.v4usingIPs + - name: V4Available + type: number + jsonPath: .status.v4availableIPs + - name: V6Used + type: number + jsonPath: .status.v6usingIPs + - name: V6Available + type: number + jsonPath: .status.v6availableIPs + - name: ExcludeIPs + type: string + jsonPath: .spec.excludeIps + - name: U2OInterconnectionIP + type: string + jsonPath: .status.u2oInterconnectionIP + schema: + openAPIV3Schema: + type: object + properties: + status: + type: object + properties: + v4availableIPs: + type: number + v4usingIPs: + type: number + v6availableIPs: + type: number + v6usingIPs: + type: number + activateGateway: + type: string + dhcpV4OptionsUUID: + type: string + dhcpV6OptionsUUID: + type: string + u2oInterconnectionIP: + type: string + u2oInterconnectionVPC: + type: string + conditions: + type: array + items: + type: object + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastUpdateTime: + type: string + lastTransitionTime: + type: string + spec: + type: object + properties: + vpc: + type: string + default: + type: boolean + protocol: + type: string + enum: + - IPv4 + - IPv6 + - Dual + cidrBlock: + type: string + namespaces: + type: array + items: + type: string + gateway: + type: string + provider: + type: string + excludeIps: + type: array + items: + type: string + vips: + type: array + items: + type: string + gatewayType: + type: string + allowSubnets: + type: array + items: + type: string + gatewayNode: + type: string + natOutgoing: + type: boolean + u2oRouting: + type: boolean + externalEgressGateway: + type: string + policyRoutingPriority: + type: integer + minimum: 1 + maximum: 32765 + policyRoutingTableID: + type: integer + minimum: 1 + maximum: 2147483647 + not: + enum: + - 252 # compat + - 253 # default + - 254 # main + - 255 # local + private: + type: boolean + vlan: + type: string + logicalGateway: + type: boolean + disableGatewayCheck: + type: boolean + disableInterConnection: + type: boolean + enableDHCP: + type: boolean + dhcpV4Options: + type: string + dhcpV6Options: + type: string + enableIPv6RA: + type: boolean + ipv6RAConfigs: + type: string + acls: + type: array + items: + type: object + properties: + direction: + type: string + enum: + - from-lport + - to-lport + priority: + type: integer + minimum: 0 + maximum: 32767 + match: + type: string + action: + type: string + enum: + - allow-related + - allow-stateless + - allow + - drop + - reject + u2oInterconnection: + type: boolean + scope: Cluster + names: + plural: subnets + singular: subnet + kind: Subnet + shortNames: + - subnet +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: vlans.kubeovn.io +spec: + group: kubeovn.io + versions: + - name: v1 + served: true + storage: true + subresources: + status: {} + schema: + openAPIV3Schema: + type: object + properties: + spec: + type: object + properties: + id: + type: integer + minimum: 0 + maximum: 4095 + provider: + type: string + vlanId: + type: integer + description: Deprecated in favor of id + providerInterfaceName: + type: string + description: Deprecated in favor of provider + required: + - provider + status: + type: object + properties: + subnets: + type: array + items: + type: string + additionalPrinterColumns: + - name: ID + type: string + jsonPath: .spec.id + - name: Provider + type: string + jsonPath: .spec.provider + scope: Cluster + names: + plural: vlans + singular: vlan + kind: Vlan + shortNames: + - vlan +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: provider-networks.kubeovn.io +spec: + group: kubeovn.io + versions: + - name: v1 + served: true + storage: true + subresources: + status: {} + schema: + openAPIV3Schema: + type: object + properties: + metadata: + type: object + properties: + name: + type: string + maxLength: 12 + not: + enum: + - int + - external + spec: + type: object + properties: + defaultInterface: + type: string + maxLength: 15 + pattern: '^[^/\s]+$' + customInterfaces: + type: array + items: + type: object + properties: + interface: + type: string + maxLength: 15 + pattern: '^[^/\s]+$' + nodes: + type: array + items: + type: string + exchangeLinkName: + type: boolean + excludeNodes: + type: array + items: + type: string + required: + - defaultInterface + status: + type: object + properties: + ready: + type: boolean + readyNodes: + type: array + items: + type: string + notReadyNodes: + type: array + items: + type: string + vlans: + type: array + items: + type: string + conditions: + type: array + items: + type: object + properties: + node: + type: string + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastUpdateTime: + type: string + lastTransitionTime: + type: string + additionalPrinterColumns: + - name: DefaultInterface + type: string + jsonPath: .spec.defaultInterface + - name: Ready + type: boolean + jsonPath: .status.ready + scope: Cluster + names: + plural: provider-networks + singular: provider-network + kind: ProviderNetwork + listKind: ProviderNetworkList +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: security-groups.kubeovn.io +spec: + group: kubeovn.io + names: + plural: security-groups + singular: security-group + shortNames: + - sg + kind: SecurityGroup + listKind: SecurityGroupList + scope: Cluster + versions: + - name: v1 + served: true + storage: true + schema: + openAPIV3Schema: + type: object + properties: + spec: + type: object + properties: + ingressRules: + type: array + items: + type: object + properties: + ipVersion: + type: string + protocol: + type: string + priority: + type: integer + remoteType: + type: string + remoteAddress: + type: string + remoteSecurityGroup: + type: string + portRangeMin: + type: integer + portRangeMax: + type: integer + policy: + type: string + egressRules: + type: array + items: + type: object + properties: + ipVersion: + type: string + protocol: + type: string + priority: + type: integer + remoteType: + type: string + remoteAddress: + type: string + remoteSecurityGroup: + type: string + portRangeMin: + type: integer + portRangeMax: + type: integer + policy: + type: string + allowSameGroupTraffic: + type: boolean + status: + type: object + properties: + portGroup: + type: string + allowSameGroupTraffic: + type: boolean + ingressMd5: + type: string + egressMd5: + type: string + ingressLastSyncSuccess: + type: boolean + egressLastSyncSuccess: + type: boolean + subresources: + status: {} + conversion: + strategy: None +EOF + +if $DPDK; then + cat < ovn.yaml +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: ovn + namespace: kube-system + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + annotations: + rbac.authorization.k8s.io/system-only: "true" + name: system:ovn +rules: + - apiGroups: + - "kubeovn.io" + resources: + - vpcs + - vpcs/status + - vpc-nat-gateways + - subnets + - subnets/status + - ips + - vips + - vips/status + - vlans + - vlans/status + - provider-networks + - provider-networks/status + - security-groups + - security-groups/status + - iptables-eips + - iptables-fip-rules + - iptables-dnat-rules + - iptables-snat-rules + - iptables-eips/status + - iptables-fip-rules/status + - iptables-dnat-rules/status + - iptables-snat-rules/status + - ovn-eips + - ovn-fips + - ovn-snat-rules + - ovn-eips/status + - ovn-fips/status + - ovn-snat-rules/status + - switch-lb-rules + - switch-lb-rules/status + - vpc-dnses + - vpc-dnses/status + verbs: + - "*" + - apiGroups: + - "" + resources: + - pods + - pods/exec + - namespaces + - nodes + - configmaps + verbs: + - create + - get + - list + - watch + - patch + - update + - apiGroups: + - "k8s.cni.cncf.io" + resources: + - network-attachment-definitions + verbs: + - create + - delete + - get + - list + - update + - apiGroups: + - "" + - networking.k8s.io + - apps + - extensions + resources: + - networkpolicies + - services + - services/status + - endpoints + - statefulsets + - daemonsets + - deployments + - deployments/scale + verbs: + - create + - delete + - update + - patch + - get + - list + - watch + - apiGroups: + - "" + resources: + - events + verbs: + - create + - patch + - update + - apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - "*" + - apiGroups: + - "kubevirt.io" + resources: + - virtualmachines + - virtualmachineinstances + verbs: + - get + - list +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: ovn +roleRef: + name: system:ovn + kind: ClusterRole + apiGroup: rbac.authorization.k8s.io +subjects: + - kind: ServiceAccount + name: ovn + namespace: kube-system + +--- +kind: Service +apiVersion: v1 +metadata: + name: ovn-nb + namespace: kube-system +spec: + ports: + - name: ovn-nb + protocol: TCP + port: 6641 + targetPort: 6641 + type: ClusterIP + ${SVC_YAML_IPFAMILYPOLICY} + selector: + app: ovn-central + ovn-nb-leader: "true" + sessionAffinity: None + +--- +kind: Service +apiVersion: v1 +metadata: + name: ovn-sb + namespace: kube-system +spec: + ports: + - name: ovn-sb + protocol: TCP + port: 6642 + targetPort: 6642 + type: ClusterIP + ${SVC_YAML_IPFAMILYPOLICY} + selector: + app: ovn-central + ovn-sb-leader: "true" + sessionAffinity: None + +--- +kind: Service +apiVersion: v1 +metadata: + name: ovn-northd + namespace: kube-system +spec: + ports: + - name: ovn-northd + protocol: TCP + port: 6643 + targetPort: 6643 + type: ClusterIP + ${SVC_YAML_IPFAMILYPOLICY} + selector: + app: ovn-central + ovn-northd-leader: "true" + sessionAffinity: None +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: ovn-central + namespace: kube-system + annotations: + kubernetes.io/description: | + OVN components: northd, nb and sb. +spec: + replicas: $count + strategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 + type: RollingUpdate + selector: + matchLabels: + app: ovn-central + template: + metadata: + labels: + app: ovn-central + component: network + type: infra + spec: + tolerations: + - effect: NoSchedule + operator: Exists + - effect: NoExecute + operator: Exists + - key: CriticalAddonsOnly + operator: Exists + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app: ovn-central + topologyKey: kubernetes.io/hostname + priorityClassName: system-cluster-critical + serviceAccountName: ovn + hostNetwork: true + containers: + - name: ovn-central + image: "$REGISTRY/kube-ovn:$VERSION" + imagePullPolicy: $IMAGE_PULL_POLICY + command: ["/kube-ovn/start-db.sh"] + securityContext: + capabilities: + add: ["SYS_NICE"] + env: + - name: ENABLE_SSL + value: "$ENABLE_SSL" + - name: NODE_IPS + value: $addresses + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_IPS + valueFrom: + fieldRef: + fieldPath: status.podIPs + - name: ENABLE_BIND_LOCAL_IP + value: "$ENABLE_BIND_LOCAL_IP" + - name: DEBUG_WRAPPER + value: "$DEBUG_WRAPPER" + resources: + requests: + cpu: 300m + memory: 300Mi + limits: + cpu: 3 + memory: 4Gi + volumeMounts: + - mountPath: /var/run/openvswitch + name: host-run-ovs + - mountPath: /var/run/ovn + name: host-run-ovn + - mountPath: /sys + name: host-sys + readOnly: true + - mountPath: /etc/openvswitch + name: host-config-openvswitch + - mountPath: /etc/ovn + name: host-config-ovn + - mountPath: /var/log/openvswitch + name: host-log-ovs + - mountPath: /var/log/ovn + name: host-log-ovn + - mountPath: /etc/localtime + name: localtime + - mountPath: /var/run/tls + name: kube-ovn-tls + readinessProbe: + exec: + command: + - bash + - /kube-ovn/ovn-healthcheck.sh + periodSeconds: 15 + timeoutSeconds: 45 + livenessProbe: + exec: + command: + - bash + - /kube-ovn/ovn-healthcheck.sh + initialDelaySeconds: 30 + periodSeconds: 15 + failureThreshold: 5 + timeoutSeconds: 45 + nodeSelector: + kubernetes.io/os: "linux" + kube-ovn/role: "master" + volumes: + - name: host-run-ovs + hostPath: + path: /run/openvswitch + - name: host-run-ovn + hostPath: + path: /run/ovn + - name: host-sys + hostPath: + path: /sys + - name: host-config-openvswitch + hostPath: + path: /etc/origin/openvswitch + - name: host-config-ovn + hostPath: + path: /etc/origin/ovn + - name: host-log-ovs + hostPath: + path: /var/log/openvswitch + - name: host-log-ovn + hostPath: + path: /var/log/ovn + - name: localtime + hostPath: + path: /etc/localtime + - name: kube-ovn-tls + secret: + optional: true + secretName: kube-ovn-tls + +--- +kind: DaemonSet +apiVersion: apps/v1 +metadata: + name: ovs-ovn + namespace: kube-system + annotations: + kubernetes.io/description: | + This daemon set launches the openvswitch daemon. +spec: + selector: + matchLabels: + app: ovs + updateStrategy: + type: OnDelete + template: + metadata: + labels: + app: ovs + component: network + type: infra + spec: + tolerations: + - effect: NoSchedule + operator: Exists + - effect: NoExecute + operator: Exists + - key: CriticalAddonsOnly + operator: Exists + priorityClassName: system-node-critical + serviceAccountName: ovn + hostNetwork: true + hostPID: true + containers: + - name: openvswitch + image: "$REGISTRY/kube-ovn-dpdk:$DPDK_VERSION-$VERSION" + imagePullPolicy: $IMAGE_PULL_POLICY + command: ["/kube-ovn/start-ovs-dpdk.sh"] + securityContext: + runAsUser: 0 + privileged: true + env: + - name: ENABLE_SSL + value: "$ENABLE_SSL" + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: KUBE_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: OVN_DB_IPS + value: $addresses + volumeMounts: + - mountPath: /var/run/netns + name: host-ns + mountPropagation: HostToContainer + - mountPath: /lib/modules + name: host-modules + readOnly: true + - mountPath: /var/run/openvswitch + name: host-run-ovs + - mountPath: /var/run/ovn + name: host-run-ovn + - mountPath: /sys + name: host-sys + readOnly: true + - mountPath: /etc/cni/net.d + name: cni-conf + - mountPath: /etc/openvswitch + name: host-config-openvswitch + - mountPath: /etc/ovn + name: host-config-ovn + - mountPath: /var/log/openvswitch + name: host-log-ovs + - mountPath: /var/log/ovn + name: host-log-ovn + - mountPath: /opt/ovs-config + name: host-config-ovs + - mountPath: /dev/hugepages + name: hugepage + - mountPath: /etc/localtime + name: localtime + - mountPath: /var/run/tls + name: kube-ovn-tls + readinessProbe: + exec: + command: + - bash + - /kube-ovn/ovs-dpdk-healthcheck.sh + periodSeconds: 5 + timeoutSeconds: 45 + livenessProbe: + exec: + command: + - bash + - /kube-ovn/ovs-dpdk-healthcheck.sh + initialDelaySeconds: 60 + periodSeconds: 5 + failureThreshold: 5 + timeoutSeconds: 45 + resources: + requests: + cpu: $DPDK_CPU + memory: $DPDK_MEMORY + limits: + cpu: $DPDK_CPU + memory: $DPDK_MEMORY + hugepages-1Gi: 1Gi + nodeSelector: + kubernetes.io/os: "linux" + ovn.kubernetes.io/ovs_dp_type: "kernel" + volumes: + - name: host-modules + hostPath: + path: /lib/modules + - name: host-run-ovs + hostPath: + path: /run/openvswitch + - name: host-run-ovn + hostPath: + path: /run/ovn + - name: host-sys + hostPath: + path: /sys + - name: host-ns + hostPath: + path: /var/run/netns + - name: cni-conf + hostPath: + path: /etc/cni/net.d + - name: host-config-openvswitch + hostPath: + path: /etc/origin/openvswitch + - name: host-config-ovn + hostPath: + path: /etc/origin/ovn + - name: host-log-ovs + hostPath: + path: /var/log/openvswitch + - name: host-log-ovn + hostPath: + path: /var/log/ovn + - name: host-config-ovs + hostPath: + path: /opt/ovs-config + type: DirectoryOrCreate + - name: hugepage + emptyDir: + medium: HugePages + - name: localtime + hostPath: + path: /etc/localtime + - name: kube-ovn-tls + secret: + optional: true + secretName: kube-ovn-tls +EOF + +else + cat < ovn.yaml +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: ovn + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + annotations: + rbac.authorization.k8s.io/system-only: "true" + name: system:ovn +rules: + - apiGroups: + - "kubeovn.io" + resources: + - vpcs + - vpcs/status + - vpc-nat-gateways + - subnets + - subnets/status + - ips + - vips + - vips/status + - vlans + - vlans/status + - provider-networks + - provider-networks/status + - security-groups + - security-groups/status + - iptables-eips + - iptables-fip-rules + - iptables-dnat-rules + - iptables-snat-rules + - iptables-eips/status + - iptables-fip-rules/status + - iptables-dnat-rules/status + - iptables-snat-rules/status + - ovn-eips + - ovn-fips + - ovn-snat-rules + - ovn-eips/status + - ovn-fips/status + - ovn-snat-rules/status + - vpc-dnses + - vpc-dnses/status + - switch-lb-rules + - switch-lb-rules/status + verbs: + - "*" + - apiGroups: + - "" + resources: + - pods + - pods/exec + - namespaces + - nodes + - configmaps + verbs: + - create + - get + - list + - watch + - patch + - update + - apiGroups: + - "" + - networking.k8s.io + - apps + - extensions + resources: + - networkpolicies + - services + - services/status + - endpoints + - statefulsets + - daemonsets + - deployments + - deployments/scale + verbs: + - create + - delete + - update + - patch + - get + - list + - watch + - apiGroups: + - "" + resources: + - events + verbs: + - create + - patch + - update + - apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - "*" + - apiGroups: + - "k8s.cni.cncf.io" + resources: + - network-attachment-definitions + verbs: + - create + - delete + - get + - list + - update + - apiGroups: + - "kubevirt.io" + resources: + - virtualmachines + - virtualmachineinstances + verbs: + - get + - list +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: ovn +roleRef: + name: system:ovn + kind: ClusterRole + apiGroup: rbac.authorization.k8s.io +subjects: + - kind: ServiceAccount + name: ovn + namespace: kube-system +--- +kind: Service +apiVersion: v1 +metadata: + name: ovn-nb + namespace: kube-system +spec: + ports: + - name: ovn-nb + protocol: TCP + port: 6641 + targetPort: 6641 + type: ClusterIP + ${SVC_YAML_IPFAMILYPOLICY} + selector: + app: ovn-central + ovn-nb-leader: "true" + sessionAffinity: None +--- +kind: Service +apiVersion: v1 +metadata: + name: ovn-sb + namespace: kube-system +spec: + ports: + - name: ovn-sb + protocol: TCP + port: 6642 + targetPort: 6642 + type: ClusterIP + ${SVC_YAML_IPFAMILYPOLICY} + selector: + app: ovn-central + ovn-sb-leader: "true" + sessionAffinity: None +--- +kind: Service +apiVersion: v1 +metadata: + name: ovn-northd + namespace: kube-system +spec: + ports: + - name: ovn-northd + protocol: TCP + port: 6643 + targetPort: 6643 + type: ClusterIP + ${SVC_YAML_IPFAMILYPOLICY} + selector: + app: ovn-central + ovn-northd-leader: "true" + sessionAffinity: None +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: ovn-central + namespace: kube-system + annotations: + kubernetes.io/description: | + OVN components: northd, nb and sb. +spec: + replicas: $count + strategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 + type: RollingUpdate + selector: + matchLabels: + app: ovn-central + template: + metadata: + labels: + app: ovn-central + component: network + type: infra + spec: + tolerations: + - effect: NoSchedule + operator: Exists + - effect: NoExecute + operator: Exists + - key: CriticalAddonsOnly + operator: Exists + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app: ovn-central + topologyKey: kubernetes.io/hostname + priorityClassName: system-cluster-critical + serviceAccountName: ovn + hostNetwork: true + containers: + - name: ovn-central + image: "$REGISTRY/kube-ovn:$VERSION" + imagePullPolicy: $IMAGE_PULL_POLICY + command: ["/kube-ovn/start-db.sh"] + securityContext: + capabilities: + add: ["SYS_NICE"] + env: + - name: ENABLE_SSL + value: "$ENABLE_SSL" + - name: NODE_IPS + value: $addresses + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_IPS + valueFrom: + fieldRef: + fieldPath: status.podIPs + - name: ENABLE_BIND_LOCAL_IP + value: "$ENABLE_BIND_LOCAL_IP" + - name: DEBUG_WRAPPER + value: "$DEBUG_WRAPPER" + resources: + requests: + cpu: 300m + memory: 200Mi + limits: + cpu: 3 + memory: 4Gi + volumeMounts: + - mountPath: /var/run/openvswitch + name: host-run-ovs + - mountPath: /var/run/ovn + name: host-run-ovn + - mountPath: /sys + name: host-sys + readOnly: true + - mountPath: /etc/openvswitch + name: host-config-openvswitch + - mountPath: /etc/ovn + name: host-config-ovn + - mountPath: /var/log/openvswitch + name: host-log-ovs + - mountPath: /var/log/ovn + name: host-log-ovn + - mountPath: /etc/localtime + name: localtime + - mountPath: /var/run/tls + name: kube-ovn-tls + readinessProbe: + exec: + command: + - bash + - /kube-ovn/ovn-healthcheck.sh + periodSeconds: 15 + timeoutSeconds: 45 + livenessProbe: + exec: + command: + - bash + - /kube-ovn/ovn-healthcheck.sh + initialDelaySeconds: 30 + periodSeconds: 15 + failureThreshold: 5 + timeoutSeconds: 45 + nodeSelector: + kubernetes.io/os: "linux" + kube-ovn/role: "master" + volumes: + - name: host-run-ovs + hostPath: + path: /run/openvswitch + - name: host-run-ovn + hostPath: + path: /run/ovn + - name: host-sys + hostPath: + path: /sys + - name: host-config-openvswitch + hostPath: + path: /etc/origin/openvswitch + - name: host-config-ovn + hostPath: + path: /etc/origin/ovn + - name: host-log-ovs + hostPath: + path: /var/log/openvswitch + - name: host-log-ovn + hostPath: + path: /var/log/ovn + - name: localtime + hostPath: + path: /etc/localtime + - name: kube-ovn-tls + secret: + optional: true + secretName: kube-ovn-tls +--- +kind: DaemonSet +apiVersion: apps/v1 +metadata: + name: ovs-ovn + namespace: kube-system + annotations: + kubernetes.io/description: | + This daemon set launches the openvswitch daemon. +spec: + selector: + matchLabels: + app: ovs + updateStrategy: + type: OnDelete + template: + metadata: + labels: + app: ovs + component: network + type: infra + spec: + tolerations: + - effect: NoSchedule + operator: Exists + - effect: NoExecute + operator: Exists + - key: CriticalAddonsOnly + operator: Exists + priorityClassName: system-node-critical + serviceAccountName: ovn + hostNetwork: true + hostPID: true + containers: + - name: openvswitch + image: "$REGISTRY/kube-ovn:$VERSION" + imagePullPolicy: $IMAGE_PULL_POLICY + command: ["/kube-ovn/start-ovs.sh"] + securityContext: + runAsUser: 0 + privileged: true + env: + - name: ENABLE_SSL + value: "$ENABLE_SSL" + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: HW_OFFLOAD + value: "$HW_OFFLOAD" + - name: TUNNEL_TYPE + value: "$TUNNEL_TYPE" + - name: KUBE_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: OVN_DB_IPS + value: $addresses + - name: DEBUG_WRAPPER + value: "$DEBUG_WRAPPER" + volumeMounts: + - mountPath: /var/run/netns + name: host-ns + mountPropagation: HostToContainer + - mountPath: /lib/modules + name: host-modules + readOnly: true + - mountPath: /var/run/openvswitch + name: host-run-ovs + - mountPath: /var/run/ovn + name: host-run-ovn + - mountPath: /sys + name: host-sys + readOnly: true + - mountPath: /etc/cni/net.d + name: cni-conf + - mountPath: /etc/openvswitch + name: host-config-openvswitch + - mountPath: /etc/ovn + name: host-config-ovn + - mountPath: /var/log/openvswitch + name: host-log-ovs + - mountPath: /var/log/ovn + name: host-log-ovn + - mountPath: /etc/localtime + name: localtime + - mountPath: /var/run/tls + name: kube-ovn-tls + - mountPath: /var/run/containerd + name: cruntime + readinessProbe: + exec: + command: + - bash + - -c + - LOG_ROTATE=true /kube-ovn/ovs-healthcheck.sh + periodSeconds: 5 + timeoutSeconds: 45 + livenessProbe: + exec: + command: + - bash + - /kube-ovn/ovs-healthcheck.sh + initialDelaySeconds: 60 + periodSeconds: 5 + failureThreshold: 5 + timeoutSeconds: 45 + resources: + requests: + cpu: 200m + memory: 200Mi + limits: + cpu: 1000m + memory: 1000Mi + nodeSelector: + kubernetes.io/os: "linux" + volumes: + - name: host-modules + hostPath: + path: /lib/modules + - name: host-run-ovs + hostPath: + path: /run/openvswitch + - name: host-run-ovn + hostPath: + path: /run/ovn + - name: host-sys + hostPath: + path: /sys + - name: host-ns + hostPath: + path: /var/run/netns + - name: cni-conf + hostPath: + path: /etc/cni/net.d + - name: host-config-openvswitch + hostPath: + path: /etc/origin/openvswitch + - name: host-config-ovn + hostPath: + path: /etc/origin/ovn + - name: host-log-ovs + hostPath: + path: /var/log/openvswitch + - name: host-log-ovn + hostPath: + path: /var/log/ovn + - name: localtime + hostPath: + path: /etc/localtime + - hostPath: + path: /var/run/containerd + name: cruntime + - name: kube-ovn-tls + secret: + optional: true + secretName: kube-ovn-tls +EOF +fi + +kubectl apply -f kube-ovn-crd.yaml +kubectl apply -f ovn.yaml + +if $HYBRID_DPDK; then + +cat < ovn-dpdk.yaml +kind: DaemonSet +apiVersion: apps/v1 +metadata: + name: ovs-ovn-dpdk + namespace: kube-system + annotations: + kubernetes.io/description: | + This daemon set launches the openvswitch daemon. +spec: + selector: + matchLabels: + app: ovs-dpdk + updateStrategy: + type: OnDelete + template: + metadata: + labels: + app: ovs-dpdk + component: network + type: infra + spec: + tolerations: + - operator: Exists + priorityClassName: system-node-critical + serviceAccountName: ovn + hostNetwork: true + hostPID: true + containers: + - name: openvswitch + image: "$REGISTRY/kube-ovn:${VERSION}-dpdk" + imagePullPolicy: $IMAGE_PULL_POLICY + command: ["/kube-ovn/start-ovs-dpdk-v2.sh"] + securityContext: + runAsUser: 0 + privileged: true + env: + - name: ENABLE_SSL + value: "$ENABLE_SSL" + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: HW_OFFLOAD + value: "$HW_OFFLOAD" + - name: TUNNEL_TYPE + value: "$TUNNEL_TYPE" + - name: DPDK_TUNNEL_IFACE + value: "$DPDK_TUNNEL_IFACE" + - name: KUBE_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: OVN_DB_IPS + value: $addresses + volumeMounts: + - mountPath: /opt/ovs-config + name: host-config-ovs + - name: shareddir + mountPath: /var/lib/kubelet/pods + - name: hugepage + mountPath: /dev/hugepages + - mountPath: /lib/modules + name: host-modules + readOnly: true + - mountPath: /var/run/openvswitch + name: host-run-ovs + mountPropagation: HostToContainer + - mountPath: /var/run/ovn + name: host-run-ovn + - mountPath: /sys + name: host-sys + - mountPath: /etc/cni/net.d + name: cni-conf + - mountPath: /etc/openvswitch + name: host-config-openvswitch + - mountPath: /etc/ovn + name: host-config-ovn + - mountPath: /var/log/openvswitch + name: host-log-ovs + - mountPath: /var/log/ovn + name: host-log-ovn + - mountPath: /etc/localtime + name: localtime + - mountPath: /var/run/tls + name: kube-ovn-tls + readinessProbe: + exec: + command: + - bash + - -c + - LOG_ROTATE=true /kube-ovn/ovs-healthcheck.sh + periodSeconds: 5 + timeoutSeconds: 45 + livenessProbe: + exec: + command: + - bash + - /kube-ovn/ovs-healthcheck.sh + initialDelaySeconds: 60 + periodSeconds: 5 + failureThreshold: 5 + timeoutSeconds: 45 + resources: + requests: + cpu: 200m + hugepages-2Mi: 1Gi + memory: 200Mi + limits: + cpu: 1000m + hugepages-2Mi: 1Gi + memory: 800Mi + nodeSelector: + kubernetes.io/os: "linux" + ovn.kubernetes.io/ovs_dp_type: "userspace" + volumes: + - name: host-config-ovs + hostPath: + path: /opt/ovs-config + type: DirectoryOrCreate + - name: shareddir + hostPath: + path: /var/lib/kubelet/pods + type: '' + - name: hugepage + emptyDir: + medium: HugePages + - name: host-modules + hostPath: + path: /lib/modules + - name: host-run-ovs + hostPath: + path: /run/openvswitch + - name: host-run-ovn + hostPath: + path: /run/ovn + - name: host-sys + hostPath: + path: /sys + - name: cni-conf + hostPath: + path: /etc/cni/net.d + - name: host-config-openvswitch + hostPath: + path: /etc/origin/openvswitch + - name: host-config-ovn + hostPath: + path: /etc/origin/ovn + - name: host-log-ovs + hostPath: + path: /var/log/openvswitch + - name: host-log-ovn + hostPath: + path: /var/log/ovn + - name: localtime + hostPath: + path: /etc/localtime + - name: kube-ovn-tls + secret: + optional: true + secretName: kube-ovn-tls +EOF +kubectl apply -f ovn-dpdk.yaml +fi +kubectl rollout status deployment/ovn-central -n kube-system --timeout 300s +echo "-------------------------------" +echo "" + +echo "[Step 3/6] Install Kube-OVN" + +cat < kube-ovn.yaml +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: kube-ovn-controller + namespace: kube-system + annotations: + kubernetes.io/description: | + kube-ovn controller +spec: + replicas: $count + selector: + matchLabels: + app: kube-ovn-controller + strategy: + rollingUpdate: + maxSurge: 0% + maxUnavailable: 100% + type: RollingUpdate + template: + metadata: + labels: + app: kube-ovn-controller + component: network + type: infra + spec: + tolerations: + - effect: NoSchedule + operator: Exists + - key: CriticalAddonsOnly + operator: Exists + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app: kube-ovn-controller + topologyKey: kubernetes.io/hostname + priorityClassName: system-cluster-critical + serviceAccountName: ovn + hostNetwork: true + containers: + - name: kube-ovn-controller + image: "$REGISTRY/kube-ovn:$VERSION" + imagePullPolicy: $IMAGE_PULL_POLICY + args: + - /kube-ovn/start-controller.sh + - --default-cidr=$POD_CIDR + - --default-gateway=$POD_GATEWAY + - --default-gateway-check=$CHECK_GATEWAY + - --default-logical-gateway=$LOGICAL_GATEWAY + - --default-u2o-interconnection=$U2O_INTERCONNECTION + - --default-exclude-ips=$EXCLUDE_IPS + - --node-switch-cidr=$JOIN_CIDR + - --service-cluster-ip-range=$SVC_CIDR + - --network-type=$NETWORK_TYPE + - --default-interface-name=$VLAN_INTERFACE_NAME + - --default-exchange-link-name=$EXCHANGE_LINK_NAME + - --default-vlan-id=$VLAN_ID + - --ls-dnat-mod-dl-dst=$LS_DNAT_MOD_DL_DST + - --pod-nic-type=$POD_NIC_TYPE + - --enable-lb=$ENABLE_LB + - --enable-np=$ENABLE_NP + - --enable-eip-snat=$ENABLE_EIP_SNAT + - --enable-external-vpc=$ENABLE_EXTERNAL_VPC + - --logtostderr=false + - --alsologtostderr=true + - --gc-interval=$GC_INTERVAL + - --inspect-interval=$INSPECT_INTERVAL + - --log_file=/var/log/kube-ovn/kube-ovn-controller.log + - --log_file_max_size=0 + - --enable-lb-svc=$ENABLE_LB_SVC + - --keep-vm-ip=$ENABLE_KEEP_VM_IP + - --pod-default-fip-type=$POD_DEFAULT_FIP_TYPE + env: + - name: ENABLE_SSL + value: "$ENABLE_SSL" + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: KUBE_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: KUBE_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: OVN_DB_IPS + value: $addresses + - name: POD_IPS + valueFrom: + fieldRef: + fieldPath: status.podIPs + - name: ENABLE_BIND_LOCAL_IP + value: "$ENABLE_BIND_LOCAL_IP" + volumeMounts: + - mountPath: /etc/localtime + name: localtime + - mountPath: /var/log/kube-ovn + name: kube-ovn-log + - mountPath: /var/run/tls + name: kube-ovn-tls + readinessProbe: + exec: + command: + - /kube-ovn/kube-ovn-controller-healthcheck + periodSeconds: 3 + timeoutSeconds: 45 + livenessProbe: + exec: + command: + - /kube-ovn/kube-ovn-controller-healthcheck + initialDelaySeconds: 300 + periodSeconds: 7 + failureThreshold: 5 + timeoutSeconds: 45 + resources: + requests: + cpu: 200m + memory: 200Mi + limits: + cpu: 1000m + memory: 1Gi + nodeSelector: + kubernetes.io/os: "linux" + volumes: + - name: localtime + hostPath: + path: /etc/localtime + - name: kube-ovn-log + hostPath: + path: /var/log/kube-ovn + - name: kube-ovn-tls + secret: + optional: true + secretName: kube-ovn-tls + +--- +kind: DaemonSet +apiVersion: apps/v1 +metadata: + name: kube-ovn-cni + namespace: kube-system + annotations: + kubernetes.io/description: | + This daemon set launches the kube-ovn cni daemon. +spec: + selector: + matchLabels: + app: kube-ovn-cni + template: + metadata: + labels: + app: kube-ovn-cni + component: network + type: infra + spec: + tolerations: + - effect: NoSchedule + operator: Exists + - effect: NoExecute + operator: Exists + - key: CriticalAddonsOnly + operator: Exists + priorityClassName: system-node-critical + serviceAccountName: ovn + hostNetwork: true + hostPID: true + initContainers: + - name: install-cni + image: "$REGISTRY/kube-ovn:$VERSION" + imagePullPolicy: $IMAGE_PULL_POLICY + command: ["/kube-ovn/install-cni.sh"] + securityContext: + runAsUser: 0 + privileged: true + volumeMounts: + - mountPath: /opt/cni/bin + name: cni-bin + - mountPath: /usr/local/bin + name: local-bin + containers: + - name: cni-server + image: "$REGISTRY/kube-ovn:$VERSION" + imagePullPolicy: $IMAGE_PULL_POLICY + command: + - bash + - /kube-ovn/start-cniserver.sh + args: + - --enable-mirror=$ENABLE_MIRROR + - --encap-checksum=true + - --service-cluster-ip-range=$SVC_CIDR + - --iface=${IFACE} + - --dpdk-tunnel-iface=${DPDK_TUNNEL_IFACE} + - --network-type=$TUNNEL_TYPE + - --default-interface-name=$VLAN_INTERFACE_NAME + - --cni-conf-name=${CNI_CONFIG_PRIORITY}-kube-ovn.conflist + - --logtostderr=false + - --alsologtostderr=true + - --log_file=/var/log/kube-ovn/kube-ovn-cni.log + - --log_file_max_size=0 + securityContext: + runAsUser: 0 + privileged: true + env: + - name: ENABLE_SSL + value: "$ENABLE_SSL" + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: KUBE_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: MODULES + value: $MODULES + - name: RPMS + value: $RPMS + - name: POD_IPS + valueFrom: + fieldRef: + fieldPath: status.podIPs + - name: ENABLE_BIND_LOCAL_IP + value: "$ENABLE_BIND_LOCAL_IP" + - name: DBUS_SYSTEM_BUS_ADDRESS + value: "unix:path=/host/var/run/dbus/system_bus_socket" + volumeMounts: + - name: host-modules + mountPath: /lib/modules + readOnly: true + - name: shared-dir + mountPath: /var/lib/kubelet/pods + - mountPath: /etc/openvswitch + name: systemid + - mountPath: /etc/cni/net.d + name: cni-conf + - mountPath: /run/openvswitch + name: host-run-ovs + mountPropagation: Bidirectional + - mountPath: /run/ovn + name: host-run-ovn + - mountPath: /host/var/run/dbus + name: host-dbus + mountPropagation: HostToContainer + - mountPath: /var/run/netns + name: host-ns + mountPropagation: HostToContainer + - mountPath: /var/log/kube-ovn + name: kube-ovn-log + - mountPath: /var/log/openvswitch + name: host-log-ovs + - mountPath: /var/log/ovn + name: host-log-ovn + - mountPath: /etc/localtime + name: localtime + - mountPath: /tmp + name: tmp + livenessProbe: + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 7 + successThreshold: 1 + tcpSocket: + port: 10665 + timeoutSeconds: 3 + readinessProbe: + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 7 + successThreshold: 1 + tcpSocket: + port: 10665 + timeoutSeconds: 3 + resources: + requests: + cpu: 100m + memory: 100Mi + limits: + cpu: 1000m + memory: 1Gi + nodeSelector: + kubernetes.io/os: "linux" + volumes: + - name: host-modules + hostPath: + path: /lib/modules + - name: shared-dir + hostPath: + path: /var/lib/kubelet/pods + - name: systemid + hostPath: + path: /etc/origin/openvswitch + - name: host-run-ovs + hostPath: + path: /run/openvswitch + - name: host-run-ovn + hostPath: + path: /run/ovn + - name: cni-conf + hostPath: + path: $CNI_CONF_DIR + - name: cni-bin + hostPath: + path: $CNI_BIN_DIR + - name: host-ns + hostPath: + path: /var/run/netns + - name: host-dbus + hostPath: + path: /var/run/dbus + - name: host-log-ovs + hostPath: + path: /var/log/openvswitch + - name: kube-ovn-log + hostPath: + path: /var/log/kube-ovn + - name: host-log-ovn + hostPath: + path: /var/log/ovn + - name: localtime + hostPath: + path: /etc/localtime + - name: tmp + hostPath: + path: /tmp + - name: local-bin + hostPath: + path: /usr/local/bin + +--- +kind: DaemonSet +apiVersion: apps/v1 +metadata: + name: kube-ovn-pinger + namespace: kube-system + annotations: + kubernetes.io/description: | + This daemon set launches the openvswitch daemon. +spec: + selector: + matchLabels: + app: kube-ovn-pinger + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + app: kube-ovn-pinger + component: network + type: infra + spec: + priorityClassName: system-node-critical + serviceAccountName: ovn + hostPID: true + containers: + - name: pinger + image: "$REGISTRY/kube-ovn:$VERSION" + command: + - /kube-ovn/kube-ovn-pinger + args: + - --external-address=$PINGER_EXTERNAL_ADDRESS + - --external-dns=$PINGER_EXTERNAL_DOMAIN + - --logtostderr=false + - --alsologtostderr=true + - --log_file=/var/log/kube-ovn/kube-ovn-pinger.log + - --log_file_max_size=0 + imagePullPolicy: $IMAGE_PULL_POLICY + securityContext: + runAsUser: 0 + privileged: false + env: + - name: ENABLE_SSL + value: "$ENABLE_SSL" + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + volumeMounts: + - mountPath: /lib/modules + name: host-modules + readOnly: true + - mountPath: /run/openvswitch + name: host-run-ovs + - mountPath: /var/run/openvswitch + name: host-run-ovs + - mountPath: /var/run/ovn + name: host-run-ovn + - mountPath: /sys + name: host-sys + readOnly: true + - mountPath: /etc/openvswitch + name: host-config-openvswitch + - mountPath: /var/log/openvswitch + name: host-log-ovs + - mountPath: /var/log/ovn + name: host-log-ovn + - mountPath: /var/log/kube-ovn + name: kube-ovn-log + - mountPath: /etc/localtime + name: localtime + - mountPath: /var/run/tls + name: kube-ovn-tls + resources: + requests: + cpu: 100m + memory: 100Mi + limits: + cpu: 200m + memory: 400Mi + nodeSelector: + kubernetes.io/os: "linux" + volumes: + - name: host-modules + hostPath: + path: /lib/modules + - name: host-run-ovs + hostPath: + path: /run/openvswitch + - name: host-run-ovn + hostPath: + path: /run/ovn + - name: host-sys + hostPath: + path: /sys + - name: host-config-openvswitch + hostPath: + path: /etc/origin/openvswitch + - name: host-log-ovs + hostPath: + path: /var/log/openvswitch + - name: kube-ovn-log + hostPath: + path: /var/log/kube-ovn + - name: host-log-ovn + hostPath: + path: /var/log/ovn + - name: localtime + hostPath: + path: /etc/localtime + - name: kube-ovn-tls + secret: + optional: true + secretName: kube-ovn-tls +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: kube-ovn-monitor + namespace: kube-system + annotations: + kubernetes.io/description: | + Metrics for OVN components: northd, nb and sb. +spec: + replicas: 1 + strategy: + rollingUpdate: + maxSurge: 1 + maxUnavailable: 1 + type: RollingUpdate + selector: + matchLabels: + app: kube-ovn-monitor + template: + metadata: + labels: + app: kube-ovn-monitor + component: network + type: infra + spec: + tolerations: + - effect: NoSchedule + operator: Exists + - key: CriticalAddonsOnly + operator: Exists + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app: kube-ovn-monitor + topologyKey: kubernetes.io/hostname + priorityClassName: system-cluster-critical + serviceAccountName: ovn + hostNetwork: true + containers: + - name: kube-ovn-monitor + image: "$REGISTRY/kube-ovn:$VERSION" + imagePullPolicy: $IMAGE_PULL_POLICY + command: ["/kube-ovn/start-ovn-monitor.sh"] + securityContext: + runAsUser: 0 + privileged: false + env: + - name: ENABLE_SSL + value: "$ENABLE_SSL" + - name: KUBE_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: POD_IPS + valueFrom: + fieldRef: + fieldPath: status.podIPs + - name: ENABLE_BIND_LOCAL_IP + value: "$ENABLE_BIND_LOCAL_IP" + resources: + requests: + cpu: 200m + memory: 200Mi + limits: + cpu: 200m + memory: 200Mi + volumeMounts: + - mountPath: /var/run/openvswitch + name: host-run-ovs + - mountPath: /var/run/ovn + name: host-run-ovn + - mountPath: /etc/openvswitch + name: host-config-openvswitch + - mountPath: /etc/ovn + name: host-config-ovn + - mountPath: /var/log/openvswitch + name: host-log-ovs + - mountPath: /var/log/ovn + name: host-log-ovn + - mountPath: /etc/localtime + name: localtime + - mountPath: /var/run/tls + name: kube-ovn-tls + readinessProbe: + exec: + command: + - cat + - /var/run/ovn/ovn-controller.pid + periodSeconds: 10 + timeoutSeconds: 45 + livenessProbe: + exec: + command: + - cat + - /var/run/ovn/ovn-controller.pid + initialDelaySeconds: 30 + periodSeconds: 10 + failureThreshold: 5 + timeoutSeconds: 45 + nodeSelector: + kubernetes.io/os: "linux" + kube-ovn/role: "master" + volumes: + - name: host-run-ovs + hostPath: + path: /run/openvswitch + - name: host-run-ovn + hostPath: + path: /run/ovn + - name: host-config-openvswitch + hostPath: + path: /etc/origin/openvswitch + - name: host-config-ovn + hostPath: + path: /etc/origin/ovn + - name: host-log-ovs + hostPath: + path: /var/log/openvswitch + - name: host-log-ovn + hostPath: + path: /var/log/ovn + - name: localtime + hostPath: + path: /etc/localtime + - name: kube-ovn-tls + secret: + optional: true + secretName: kube-ovn-tls +--- +kind: Service +apiVersion: v1 +metadata: + name: kube-ovn-monitor + namespace: kube-system + labels: + app: kube-ovn-monitor +spec: + ports: + - name: metrics + port: 10661 + type: ClusterIP + ${SVC_YAML_IPFAMILYPOLICY} + selector: + app: kube-ovn-monitor + sessionAffinity: None +--- +kind: Service +apiVersion: v1 +metadata: + name: kube-ovn-pinger + namespace: kube-system + labels: + app: kube-ovn-pinger +spec: + ${SVC_YAML_IPFAMILYPOLICY} + selector: + app: kube-ovn-pinger + ports: + - port: 8080 + name: metrics +--- +kind: Service +apiVersion: v1 +metadata: + name: kube-ovn-controller + namespace: kube-system + labels: + app: kube-ovn-controller +spec: + ${SVC_YAML_IPFAMILYPOLICY} + selector: + app: kube-ovn-controller + ports: + - port: 10660 + name: metrics +--- +kind: Service +apiVersion: v1 +metadata: + name: kube-ovn-cni + namespace: kube-system + labels: + app: kube-ovn-cni +spec: + ${SVC_YAML_IPFAMILYPOLICY} + selector: + app: kube-ovn-cni + ports: + - port: 10665 + name: metrics +EOF + +kubectl apply -f kube-ovn.yaml +kubectl rollout status deployment/kube-ovn-controller -n kube-system --timeout 300s +kubectl rollout status daemonset/kube-ovn-cni -n kube-system --timeout 300s +echo "-------------------------------" +echo "" + +echo "[Step 4/6] Delete pod that not in host network mode" +for ns in $(kubectl get ns --no-headers -o custom-columns=NAME:.metadata.name); do + for pod in $(kubectl get pod --no-headers -n "$ns" --field-selector spec.restartPolicy=Always -o custom-columns=NAME:.metadata.name,HOST:spec.hostNetwork | awk '{if ($2!="true") print $1}'); do + kubectl delete pod "$pod" -n "$ns" --ignore-not-found + done +done + +sleep 5 +kubectl rollout status daemonset/kube-ovn-pinger -n kube-system --timeout 300s +kubectl rollout status deployment/coredns -n kube-system --timeout 600s +echo "-------------------------------" +echo "" + +echo "[Step 5/6] Add kubectl plugin PATH" + +if ! sh -c "echo \":$PATH:\" | grep -q \":/usr/local/bin:\""; then + echo "Tips:Please join the /usr/local/bin to your PATH. Temporarily, we do it for this execution." + export PATH=/usr/local/bin:$PATH + echo "-------------------------------" + echo "" +fi + +echo "[Step 6/6] Run network diagnose" +kubectl cp kube-system/$(kubectl -n kube-system get pods -o wide | grep cni | awk '{print $1}' | awk 'NR==1{print}'):/kube-ovn/kubectl-ko /usr/local/bin/kubectl-ko +chmod +x /usr/local/bin/kubectl-ko +kubectl ko diagnose all + +echo "-------------------------------" +echo " + ,,,, + ,::, + ,,::,,,, + ,,,,,::::::::::::,,,,, + ,,,::::::::::::::::::::::,,, + ,,::::::::::::::::::::::::::::,, + ,,::::::::::::::::::::::::::::::::,, + ,::::::::::::::::::::::::::::::::::::, + ,:::::::::::::,, ,,:::::,,,::::::::::, + ,,:::::::::::::, ,::, ,:::::::::, + ,:::::::::::::, :x, ,:: :, ,:::::::::, +,:::::::::::::::, ,,, ,::, ,, ,::::::::::, +,:::::::::::::::::,,,,,,:::::,,,,::::::::::::, ,:, ,:, ,xx, ,:::::, ,:, ,:: :::, ,x +,::::::::::::::::::::::::::::::::::::::::::::, :x: ,:xx: , :xx, :xxxxxxxxx, :xx, ,xx:,xxxx, :x +,::::::::::::::::::::::::::::::::::::::::::::, :xxxxx:, ,xx, :x: :xxx:x::, ::xxxx: :xx:, ,:xxx :xx, ,xx: ,xxxxx:, :x +,::::::::::::::::::::::::::::::::::::::::::::, :xxxxx, :xx, :x: :xxx,,:xx,:xx:,:xx, ,,,,,,,,,xxx, ,xx: :xx:xx: ,xxx,:xx::x +,::::::,,::::::::,,::::::::,,:::::::,,,::::::, :x:,xxx: ,xx, :xx :xx: ,xx,xxxxxx:, ,xxxxxxx:,xxx:, ,xxx, :xxx: ,xxx, :xxxx +,::::, ,::::, ,:::::, ,,::::, ,::::, :x: ,:xx,,:xx::xxxx,,xxx::xx: :xx::::x: ,,,,,, ,xxxxxxxxx, ,xx: ,xxx, :xxx +,::::, ,::::, ,::::, ,::::, ,::::, ,:, ,:, ,,::,,:, ,::::,, ,:::::, ,,:::::, ,, :x: ,:: +,::::, ,::::, ,::::, ,::::, ,::::, + ,,,,, ,::::, ,::::, ,::::, ,:::, ,,,,,,,,,,,,, + ,::::, ,::::, ,::::, ,:::, ,,,:::::::::::::::, + ,::::, ,::::, ,::::, ,::::, ,,,,:::::::::,,,,,,,:::, + ,::::, ,::::, ,::::, ,::::::::::::,,,,, + ,,,, ,::::, ,,,, ,,,::::,,,, + ,::::, + ,,::, +" +echo "Thanks for choosing Kube-OVN! +For more advanced features, please read https://kubeovn.github.io/docs/stable/en/ +If you have any question, please file an issue https://github.com/kubeovn/kube-ovn/issues/new/choose" diff --git a/roles/vhosts/k8s-node/defaults/main.yml b/roles/vhosts/k8s-node/defaults/main.yml new file mode 100644 index 0000000..19cc700 --- /dev/null +++ b/roles/vhosts/k8s-node/defaults/main.yml @@ -0,0 +1,3 @@ +username: "{{ user }}" +ssh_public_keys: "{{ ssh_keys }}" +enable_gpu: "{{ gpu_enabled | default(false) }}" diff --git a/roles/vhosts/k8s-node/tasks/apt_setup.yml b/roles/vhosts/k8s-node/tasks/apt_setup.yml new file mode 100644 index 0000000..cfb84c7 --- /dev/null +++ b/roles/vhosts/k8s-node/tasks/apt_setup.yml @@ -0,0 +1,65 @@ +- name: Update apt cache + apt: + update_cache: yes + +- name: Gather package facts + package_facts: + manager: auto + +- name: Stop and disable unattended upgrades + systemd: + name: unattended-upgrades + state: stopped + enabled: no + ignore_errors: true + +- name: Remove old containerd if exists + apt: + name: containerd.io + state: absent + +- name: Remove unwanted files in sources.list.d + find: + paths: /etc/apt/sources.list.d + patterns: + - "*" + excludes: + - "ubuntu.sources" + - "cuda-*.list" + - "deadsnakes.list" + - "docker.list" + - "nvidia-docker-container.list" + file_type: file + register: apt_files_to_remove + +- name: Delete found files + file: + path: "{{ item.path }}" + state: absent + with_items: "{{ apt_files_to_remove.files }}" + +- name: Remove proxy configuration files + file: + path: "{{ item }}" + state: absent + with_fileglob: + - "/etc/apt/apt.conf.d/*proxy*" + +- name: Remove proxy settings from apt.conf + lineinfile: + path: /etc/apt/apt.conf + regexp: "{{ item }}" + state: absent + with_items: + - 'Acquire::http::Proxy' + - 'Acquire::https::Proxy' + when: ansible_path_apt_conf.stat.exists is defined and ansible_path_apt_conf.stat.exists + +- name: Unset all snap proxy settings + command: snap unset system {{ item }} + loop: + - proxy.http + - proxy.https + - proxy.no-proxy + changed_when: true + ignore_errors: true diff --git a/roles/vhosts/k8s-node/tasks/containerd.yml b/roles/vhosts/k8s-node/tasks/containerd.yml new file mode 100644 index 0000000..cbaa912 --- /dev/null +++ b/roles/vhosts/k8s-node/tasks/containerd.yml @@ -0,0 +1,33 @@ +- name: Install NVIDIA Container Toolkit + apt: + name: nvidia-container-toolkit + state: present + when: gpu_enabled | bool + +- name: Configure NVIDIA Container Toolkit + shell: | + nvidia-ctk runtime configure --runtime=docker --set-as-default + nvidia-ctk runtime configure --runtime=containerd --set-as-default + nvidia-ctk runtime configure --runtime=crio --set-as-default --config=/etc/crio/crio.conf.d/99-nvidia.conf + register: container_runtime + when: gpu_enabled | bool + +- name: Find all microk8s services + shell: systemctl list-units --full --all "snap.microk8s.*" --plain --no-legend | awk '{print $1}' + register: microk8s_services + +- name: Restart all microk8s services + systemd: + name: "{{ item }}" + state: restarted + loop: "{{ microk8s_services.stdout_lines }}" + when: container_runtime.changed + +- name: Fix containerd + shell: | + DISABLED=$(egrep 'disabled_plugins' /etc/containerd/config.toml | grep -v 'disabled_plugins = []') + if [ ! -z "$PKGS" ]; then + perl -pi -e 's/^\s*disabled_plugins\s*=.*/##disabled_plugins = []/g' /etc/containerd/config.toml + systemctl restart containerd + fi + diff --git a/roles/vhosts/k8s-node/tasks/gpu.yml b/roles/vhosts/k8s-node/tasks/gpu.yml new file mode 100644 index 0000000..9635f0a --- /dev/null +++ b/roles/vhosts/k8s-node/tasks/gpu.yml @@ -0,0 +1,74 @@ +- name: Check CUDA installation + apt: + name: cuda-toolkit-{{ cuda_version }} + state: present + check_mode: yes + register: cuda_check + ignore_errors: yes + when: enable_gpu | bool + +- name: Purge existing NVIDIA/CUDA packages + shell: | + export DEBIAN_FRONTEND=noninteractive + export NEEDRESTART_SUSPEND=y + PKGS=$(dpkg --list | egrep -i 'cuda|nvidia' | egrep -v 'nvidia-kernel|linux-(nvidia|modules|headers|image)' | awk '{print $2}' ) + if [ ! -z "$PKGS" ]; then + echo "$PKGS" | xargs apt -y remove --allow-change-held-packages + echo "$PKGS" | xargs dpkg --purge + fi + ignore_errors: yes + register: gpu_setup + when: not skip_cuda | bool and (gpu_enabled | bool) and (cuda_check.failed or cuda_check.changed) + +- name: Download and install CUDA keyring + block: + - get_url: + url: https://developer.download.nvidia.com/compute/cuda/repos/ubuntu{{ ubuntu_major }}{{ ubuntu_minor }}/x86_64/cuda-keyring_1.1-1_all.deb + dest: /tmp/cuda-keyring.deb + - apt: + deb: /tmp/cuda-keyring.deb + when: gpu_enabled | bool + +- name: Update apt cache + apt: + update_cache: yes + when: gpu_enabled | bool + +- name: GPU Setup Tasks + when: enable_gpu | bool and not skip_cuda | bool + block: + - name: Install NVIDIA packages + apt: + name: + - cuda-toolkit-{{ cuda_version }} + - nvidia-open + - nvidia-fabricmanager-{{ nvidia_version }} + state: present + + - name: Configure NVIDIA Fabric Manager + systemd: + name: nvidia-fabricmanager + enabled: yes + masked: no + +- name: Set NVIDIA device permissions + file: + path: "{{ item }}" + mode: '0666' + with_fileglob: + - /dev/nvidia* + - /dev/nvidiactl + - /dev/nvidia-uvm + - /dev/nvidia-uvm-tools + +- name: Create NVIDIA character device symlinks + when: gpu_enabled | bool + shell: | + ls /dev/nvidia? | egrep 'nvidia[0-9]' | while read i + do + N=$(echo $i | sed 's#/dev/nvidia##'); + MAJ=$(ls -l $i | awk '{print $5}' | cut -d, -f1) + MIN=$(ls -l $i | awk '{print $6}') + mkdir -p /dev/char/$MAJ:$MIN + ln -sf $i /dev/char/$MAJ:$MIN + done diff --git a/roles/vhosts/k8s-node/tasks/main.yml b/roles/vhosts/k8s-node/tasks/main.yml new file mode 100644 index 0000000..e47d10c --- /dev/null +++ b/roles/vhosts/k8s-node/tasks/main.yml @@ -0,0 +1,23 @@ +- name: Include apt setup + import_tasks: apt_setup.yml + +- name: Include user setup + import_tasks: user_setup.yml + +- name: Include base packages + import_tasks: packages.yml + +- name: Include GPU configuration + import_tasks: gpu.yml + +- name: Include system configuration + import_tasks: system_config.yml + +- name: Include reboot logic + import_tasks: reboot.yml + +- name: Configure container runtime + import_tasks: containerd.yml + +- name: Configure networking + import_tasks: network.yml diff --git a/roles/vhosts/k8s-node/tasks/network.yml b/roles/vhosts/k8s-node/tasks/network.yml new file mode 100644 index 0000000..574ed7d --- /dev/null +++ b/roles/vhosts/k8s-node/tasks/network.yml @@ -0,0 +1,46 @@ +- name: Install iptables-persistent + package: + name: + - iptables-persistent + - netfilter-persistent + state: present + when: is_primary | bool + +- name: Add forwarding rule + iptables: + chain: FORWARD + in_interface: wg0 + jump: ACCEPT + state: present + when: is_primary | bool + +- name: Get network interface information + ansible.builtin.shell: | + ip -o link show | awk '$2 !~ /^(docker|cali|cilium|veth|vxlan|lo|wg)/ && $2 ~ /^en/ {gsub(/:/, "", $2); print $2}' + register: ethernet_interfaces + changed_when: false + +- name: Add NAT masquerade rules for ethernet interfaces + iptables: + table: nat + chain: POSTROUTING + out_interface: "{{ item }}" + jump: MASQUERADE + state: present + loop: "{{ ethernet_interfaces.stdout_lines }}" + when: is_primary | bool + +- name: Save iptables rules + shell: | + netfilter-persistent save + netfilter-persistent reload + when: is_primary | bool + +- name: Enable IP forwarding + sysctl: + name: net.ipv4.ip_forward + value: '1' + state: present + sysctl_set: yes + reload: yes + when: is_primary | bool diff --git a/roles/vhosts/k8s-node/tasks/packages.yml b/roles/vhosts/k8s-node/tasks/packages.yml new file mode 100644 index 0000000..8ce999d --- /dev/null +++ b/roles/vhosts/k8s-node/tasks/packages.yml @@ -0,0 +1,60 @@ +- name: Install basic system prerequisites + apt: + name: + - socat + - vim + - jq + - bc + - libclang-dev + - npm + - clang + - libssl-dev + - llvm + - libudev1 + - protobuf-compiler + - python3 + - python3-pip + - python3-venv + - docker.io + - docker-compose + - build-essential + - nginx + - redis + - net-tools + - ffmpeg + - rsyslog + - libpq-dev + - snapd + - iputils-ping + - systemd-timesyncd + state: present + +- name: Set vim as default editor + alternatives: + name: editor + path: /usr/bin/vim + priority: 1 + +- name: Get list of installed lambda packages + shell: dpkg --list | grep lambda | awk '{print $2}' + register: lambda_packages + changed_when: false + +- name: Check if lambda version file exists + stat: + path: /etc/lambda-version + register: lambda_version_file + +- name: Check if another lambda package file exists, backup + stat: + path: /etc/systemd/system/lambda-jupyter.service + register: lambda_file_backup + +- name: Remove lambda packages + apt: + name: "{{ lambda_packages.stdout_lines }}" + state: absent + purge: yes + when: + - lambda_version_file.stat.exists or lambda_file_backup.stat.exists + - lambda_packages.stdout_lines | length > 0 diff --git a/roles/vhosts/k8s-node/tasks/reboot.yml b/roles/vhosts/k8s-node/tasks/reboot.yml new file mode 100644 index 0000000..79e096a --- /dev/null +++ b/roles/vhosts/k8s-node/tasks/reboot.yml @@ -0,0 +1,33 @@ +- name: Reboot + reboot: + msg: "Rebooting..." + reboot_command: "reboot" + connect_timeout: 5 + reboot_timeout: 900 + pre_reboot_delay: 0 + post_reboot_delay: 30 + test_command: uptime + when: gpu_setup.changed or ipv6_disabled.changed + register: rebooted + +- name: Wait for system to be ready + wait_for_connection: + timeout: 600 + when: rebooted.changed + +- name: Set hostname again just in case + systemd: + name: set-hostname + enabled: yes + state: restarted + daemon_reload: yes + when: rebooted.changed + +- name: Wait for microk8s to be ready + shell: microk8s status | grep -E "microk8s is running|acting as a node in a cluster" + register: result + until: result.rc == 0 + retries: 30 + delay: 10 + ignore_errors: yes + when: rebooted.changed diff --git a/roles/vhosts/k8s-node/tasks/system_config.yml b/roles/vhosts/k8s-node/tasks/system_config.yml new file mode 100644 index 0000000..57bf5da --- /dev/null +++ b/roles/vhosts/k8s-node/tasks/system_config.yml @@ -0,0 +1,103 @@ +- name: Configure file limits + blockinfile: + path: /etc/security/limits.conf + block: | + * soft nofile 40000 + * hard nofile 40001 + +- name: Configure PAM limits + lineinfile: + path: "{{ item }}" + line: "session required pam_limits.so" + with_items: + - /etc/pam.d/common-session + - /etc/pam.d/common-session-noninteractive + +- name: Set hostname + hostname: + name: "{{ inventory_hostname }}" + use: systemd + +- name: Update /etc/hostname + copy: + content: "{{ inventory_hostname }}" + dest: /etc/hostname + +- name: Ensure preserve_hostname is set to true + lineinfile: + path: /etc/cloud/cloud.cfg + regexp: '^preserve_hostname:' + line: 'preserve_hostname: true' + create: true + owner: root + group: root + mode: '0644' + +- name: Create hostname script + copy: + dest: /usr/local/bin/set-hostname.sh + mode: '0755' + content: | + #!/bin/bash + hostnamectl set-hostname "{{ inventory_hostname }}" + hostname "{{ inventory_hostname }}" + echo -n "{{ inventory_hostname }}" > /etc/hostname + +- name: Create systemd service + copy: + dest: /etc/systemd/system/set-hostname.service + content: | + [Unit] + Description=Set system hostname on boot + After=network.target + + [Service] + Type=oneshot + ExecStart=/usr/local/bin/set-hostname.sh + RemainAfterExit=yes + + [Install] + WantedBy=multi-user.target + +- name: Enable and start hostname service + systemd: + name: set-hostname + enabled: yes + state: started + daemon_reload: yes + +- name: Configure and ensure time synchronization + block: + - name: Ensure timesyncd is installed and enabled + systemd: + name: systemd-timesyncd + state: started + enabled: yes + + - name: Configure NTP servers + lineinfile: + path: /etc/systemd/timesyncd.conf + regexp: '^#?NTP=' + line: 'NTP=pool.ntp.org' + + - name: Force time synchronization + shell: | + timedatectl set-ntp true + systemctl restart systemd-timesyncd + +- name: Disable IPv6 + block: + - name: Set sysctl parameters for IPv6 + sysctl: + name: "{{ item.key }}" + value: "{{ item.value }}" + state: present + sysctl_file: /etc/sysctl.d/99-disable-ipv6.conf + reload: yes + with_items: + - { key: "net.ipv6.conf.all.disable_ipv6", value: "1" } + - { key: "net.ipv6.conf.default.disable_ipv6", value: "1" } + - { key: "net.ipv6.conf.lo.disable_ipv6", value: "1" } + register: ipv6_disabled + become: yes + when: not ipv6_enabled | bool diff --git a/roles/vhosts/k8s-node/tasks/user_setup.yml b/roles/vhosts/k8s-node/tasks/user_setup.yml new file mode 100644 index 0000000..943620a --- /dev/null +++ b/roles/vhosts/k8s-node/tasks/user_setup.yml @@ -0,0 +1,36 @@ +- name: Create user + user: + name: "{{ username }}" + shell: /bin/bash + create_home: yes + state: present + +- name: Create SSH directories + file: + path: "{{ item }}" + state: directory + mode: '0700' + with_items: + - "/home/{{ username }}/.ssh" + - "/root/.ssh" + +- name: Add authorized SSH keys + authorized_key: + user: "{{ user }}" + state: present + key: "{{ item }}" + with_items: "{{ ssh_public_keys }}" + become: yes + +- name: Set correct ownership for user home + file: + path: "/home/{{ username }}" + owner: "{{ username }}" + group: "{{ username }}" + recurse: yes + +- name: Add user to sudoers + lineinfile: + path: /etc/sudoers + line: '{{ username }} ALL=(ALL) NOPASSWD:ALL' + validate: 'visudo -cf %s' diff --git a/roles/vhosts/network_info/files/display_network_info.sh b/roles/vhosts/network_info/files/display_network_info.sh new file mode 100644 index 0000000..e3cab71 --- /dev/null +++ b/roles/vhosts/network_info/files/display_network_info.sh @@ -0,0 +1,10 @@ +#!/bin/bash +for interface in `ip -br link | awk '/^(ens|cni)/ {print $1}'` +do + status=$(ip link show "$interface" | grep -q "state UP" && echo -n "UP" || echo -n "DOWN") + ip_addr=$(ip -br addr show "$interface" | awk '{print $3}') + default_gw=$(ip route | grep default | awk '{print $3}') + + echo "$interface $status $ip_addr $default_gw" + +done diff --git a/roles/vhosts/network_info/tasks/main.yml b/roles/vhosts/network_info/tasks/main.yml new file mode 100755 index 0000000..a86aa01 --- /dev/null +++ b/roles/vhosts/network_info/tasks/main.yml @@ -0,0 +1,15 @@ +- name: Gather network default gateway + ansible.builtin.shell: | + ip route | grep default | awk '{print $3}' + register: default_gateway + +- name: Gather network interface information and display details + ansible.builtin.debug: + msg: | + Interface: "{{ item }}" + Status: "{{ 'UP' if hostvars[inventory_hostname]['ansible_' + item].active else 'DOWN' }}" + IP Address: "{{ hostvars[inventory_hostname]['ansible_' + item].ipv4.address if hostvars[inventory_hostname]['ansible_' + item].ipv4 is defined else 'N/A' }}" + Netmask: "{{ hostvars[inventory_hostname]['ansible_' + item].ipv4.netmask if hostvars[inventory_hostname]['ansible_' + item].ipv4 is defined else 'N/A' }}" + Gateway: "{{ default_gateway.stdout }}" + loop: "{{ ansible_facts.interfaces }}" + when: "'ens' in item or 'cni' in item or '^eth' in item" diff --git a/roles/vhosts/nginx-proxy/defaults/main.yml b/roles/vhosts/nginx-proxy/defaults/main.yml new file mode 100644 index 0000000..43fc08a --- /dev/null +++ b/roles/vhosts/nginx-proxy/defaults/main.yml @@ -0,0 +1,4 @@ +vhosts_nginx_proxy_domain: global-homepage.svc.plus +vhosts_nginx_proxy_upstream_host: global-homepage.onwalk.net +vhosts_nginx_proxy_ssl_certificate: /etc/ssl/svc.plus.pem +vhosts_nginx_proxy_ssl_certificate_key: /etc/ssl/svc.plus.rsa.key diff --git a/roles/vhosts/nginx-proxy/handlers/main.yml b/roles/vhosts/nginx-proxy/handlers/main.yml new file mode 100644 index 0000000..c10ab50 --- /dev/null +++ b/roles/vhosts/nginx-proxy/handlers/main.yml @@ -0,0 +1,4 @@ +- name: Reload nginx + ansible.builtin.service: + name: nginx + state: reloaded diff --git a/roles/vhosts/nginx-proxy/tasks/main.yml b/roles/vhosts/nginx-proxy/tasks/main.yml new file mode 100644 index 0000000..778b753 --- /dev/null +++ b/roles/vhosts/nginx-proxy/tasks/main.yml @@ -0,0 +1,28 @@ +- name: Install nginx + ansible.builtin.apt: + name: nginx + state: present + update_cache: true + when: + - ansible_facts['distribution'] == 'Ubuntu' + - ansible_facts['distribution_version'] is version('22.04', '>=') + +- name: Deploy nginx proxy configuration + ansible.builtin.template: + src: nginx-proxy.conf.j2 + dest: /etc/nginx/sites-available/nginx-proxy.conf + mode: '0644' + notify: Reload nginx + +- name: Enable nginx proxy site + ansible.builtin.file: + src: /etc/nginx/sites-available/nginx-proxy.conf + dest: /etc/nginx/sites-enabled/nginx-proxy.conf + state: link + notify: Reload nginx + +- name: Ensure nginx is running + ansible.builtin.service: + name: nginx + state: started + enabled: true diff --git a/roles/vhosts/nginx-proxy/templates/nginx-proxy.conf.j2 b/roles/vhosts/nginx-proxy/templates/nginx-proxy.conf.j2 new file mode 100644 index 0000000..c504c68 --- /dev/null +++ b/roles/vhosts/nginx-proxy/templates/nginx-proxy.conf.j2 @@ -0,0 +1,45 @@ +# HTTP redirect to HTTPS +server { + listen 80; + server_name {{ vhosts_nginx_proxy_domain }}; + return 301 https://{{ vhosts_nginx_proxy_domain }}$request_uri; +} + +# HTTPS entrypoint +server { + listen 443 ssl http2; + server_name {{ vhosts_nginx_proxy_domain }}; + + ssl_certificate {{ vhosts_nginx_proxy_ssl_certificate }}; + ssl_certificate_key {{ vhosts_nginx_proxy_ssl_certificate_key }}; + + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers HIGH:!aNULL:!MD5; + + location / { + proxy_pass https://{{ vhosts_nginx_proxy_upstream_host }}; + + # Enable TLS SNI + proxy_ssl_server_name on; + + # Spoof browser headers to avoid Cloudflare challenge + proxy_set_header Host {{ vhosts_nginx_proxy_upstream_host }}; + proxy_set_header User-Agent "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"; + proxy_set_header Accept "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"; + proxy_set_header Referer "https://{{ vhosts_nginx_proxy_upstream_host }}/"; + + # Preserve client IP + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # HTTP/1.1 and clear connection upgrade headers + proxy_http_version 1.1; + proxy_set_header Connection ""; + + # Optional timeout controls + proxy_connect_timeout 30s; + proxy_send_timeout 30s; + proxy_read_timeout 30s; + } +} diff --git a/roles/vhosts/nginx/defaults/main.yml b/roles/vhosts/nginx/defaults/main.yml new file mode 100644 index 0000000..c1029ce --- /dev/null +++ b/roles/vhosts/nginx/defaults/main.yml @@ -0,0 +1,12 @@ +vhosts_nginx_cn_homepage_domain: cn-homepage.svc.plus +vhosts_nginx_artifact_domain: artifact.svc.plus +vhosts_nginx_ssl_certificate: /etc/ssl/svc.plus.pem +vhosts_nginx_ssl_certificate_key: /etc/ssl/svc.plus.rsa.key +vhosts_nginx_cn_homepage_root: /var/www/XControl/ui/homepage/out +vhosts_nginx_artifact_root: /data/update-server +vhosts_nginx_grafana_domain: grafana.svc.plus +vhosts_nginx_metrics_domain: metrics.svc.plus +vhosts_nginx_metrics_backend_addr: 10.10.0.50:8428 +vhosts_nginx_vm_write_path: /api/v1/write +vhosts_nginx_vm_read_path: /api/v1/read +vhosts_nginx_receiver_path: /api/v1/receive diff --git a/roles/vhosts/nginx/handlers/main.yml b/roles/vhosts/nginx/handlers/main.yml new file mode 100644 index 0000000..c10ab50 --- /dev/null +++ b/roles/vhosts/nginx/handlers/main.yml @@ -0,0 +1,4 @@ +- name: Reload nginx + ansible.builtin.service: + name: nginx + state: reloaded diff --git a/roles/vhosts/nginx/tasks/main.yml b/roles/vhosts/nginx/tasks/main.yml new file mode 100644 index 0000000..06ddc56 --- /dev/null +++ b/roles/vhosts/nginx/tasks/main.yml @@ -0,0 +1,84 @@ +- name: Install nginx + ansible.builtin.apt: + name: nginx + state: present + update_cache: true + when: + - ansible_facts['distribution'] == 'Ubuntu' + - ansible_facts['distribution_version'] is version('22.04', '>=') + +- name: Deploy cn-homepage redirect configuration + ansible.builtin.template: + src: cn-homepage-redirect.conf.j2 + dest: /etc/nginx/sites-available/cn-homepage-redirect.conf + mode: '0644' + notify: Reload nginx + +- name: Enable cn-homepage redirect site + ansible.builtin.file: + src: /etc/nginx/sites-available/cn-homepage-redirect.conf + dest: /etc/nginx/sites-enabled/cn-homepage-redirect.conf + state: link + notify: Reload nginx + +- name: Deploy cn-homepage site configuration + ansible.builtin.template: + src: cn-homepage-https.conf.j2 + dest: /etc/nginx/sites-available/cn-homepage-https.conf + mode: '0644' + notify: Reload nginx + +- name: Enable cn-homepage site + ansible.builtin.file: + src: /etc/nginx/sites-available/cn-homepage-https.conf + dest: /etc/nginx/sites-enabled/cn-homepage-https.conf + state: link + notify: Reload nginx + +- name: Deploy artifact site configuration + ansible.builtin.template: + src: artifact.conf.j2 + dest: /etc/nginx/sites-available/artifact.conf + mode: '0644' + notify: Reload nginx + +- name: Enable artifact site + ansible.builtin.file: + src: /etc/nginx/sites-available/artifact.conf + dest: /etc/nginx/sites-enabled/artifact.conf + state: link + notify: Reload nginx + +- name: Deploy metrics site configuration + ansible.builtin.template: + src: metrics.conf.j2 + dest: /etc/nginx/sites-available/metrics.conf + mode: '0644' + notify: Reload nginx + +- name: Enable metrics site + ansible.builtin.file: + src: /etc/nginx/sites-available/metrics.conf + dest: /etc/nginx/sites-enabled/metrics.conf + state: link + notify: Reload nginx + +- name: Deploy grafana site configuration + ansible.builtin.template: + src: grafana.conf.j2 + dest: /etc/nginx/sites-available/grafana.conf + mode: '0644' + notify: Reload nginx + +- name: Enable grafana site + ansible.builtin.file: + src: /etc/nginx/sites-available/grafana.conf + dest: /etc/nginx/sites-enabled/grafana.conf + state: link + notify: Reload nginx + +- name: Ensure nginx is running + ansible.builtin.service: + name: nginx + state: started + enabled: true diff --git a/roles/vhosts/nginx/templates/artifact.conf.j2 b/roles/vhosts/nginx/templates/artifact.conf.j2 new file mode 100644 index 0000000..03a3e6a --- /dev/null +++ b/roles/vhosts/nginx/templates/artifact.conf.j2 @@ -0,0 +1,33 @@ +server { + listen 443 ssl http2; + server_name {{ vhosts_nginx_artifact_domain }}; + + ssl_certificate {{ vhosts_nginx_ssl_certificate }}; + ssl_certificate_key {{ vhosts_nginx_ssl_certificate_key }}; + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers HIGH:!aNULL:!MD5; + + root {{ vhosts_nginx_artifact_root }}; + index index.html; + + autoindex on; + autoindex_exact_size off; + autoindex_localtime on; + + location / { + add_header Accept-Ranges bytes; + try_files $uri $uri/ =404; + } + + location ~* \.(dmg|zip|tar\.gz|deb|rpm|exe|pkg|AppImage|apk|ipa)$ { + expires 7d; + access_log off; + add_header Cache-Control "public"; + add_header Accept-Ranges bytes; + try_files $uri =404; + } + + location ~ /\. { + deny all; + } +} diff --git a/roles/vhosts/nginx/templates/cn-homepage-https.conf.j2 b/roles/vhosts/nginx/templates/cn-homepage-https.conf.j2 new file mode 100644 index 0000000..111aac5 --- /dev/null +++ b/roles/vhosts/nginx/templates/cn-homepage-https.conf.j2 @@ -0,0 +1,35 @@ +server { + listen 443 ssl http2; + server_name {{ vhosts_nginx_cn_homepage_domain }}; + + ssl_certificate {{ vhosts_nginx_ssl_certificate }}; + ssl_certificate_key {{ vhosts_nginx_ssl_certificate_key }}; + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers HIGH:!aNULL:!MD5; + + root {{ vhosts_nginx_cn_homepage_root }}; + index index.html; + + location /api/ { + proxy_pass http://127.0.0.1:8080; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + location / { + try_files $uri $uri/ /index.html; + } + + location ~* \.(?:ico|css|js|gif|jpe?g|png|woff2?)$ { + expires 30d; + access_log off; + add_header Cache-Control "public"; + } + + location ~ /\. { + deny all; + } +} diff --git a/roles/vhosts/nginx/templates/cn-homepage-redirect.conf.j2 b/roles/vhosts/nginx/templates/cn-homepage-redirect.conf.j2 new file mode 100644 index 0000000..3738790 --- /dev/null +++ b/roles/vhosts/nginx/templates/cn-homepage-redirect.conf.j2 @@ -0,0 +1,5 @@ +server { + listen 80; + server_name {{ vhosts_nginx_cn_homepage_domain }}; + return 301 https://{{ vhosts_nginx_cn_homepage_domain }}$request_uri; +} diff --git a/roles/vhosts/nginx/templates/grafana.conf.j2 b/roles/vhosts/nginx/templates/grafana.conf.j2 new file mode 100644 index 0000000..fa8ca8f --- /dev/null +++ b/roles/vhosts/nginx/templates/grafana.conf.j2 @@ -0,0 +1,29 @@ +server { + listen 443 ssl http2; + server_name {{ vhosts_nginx_grafana_domain }}; + + ssl_certificate {{ vhosts_nginx_ssl_certificate }}; + ssl_certificate_key {{ vhosts_nginx_ssl_certificate_key }}; + + location / { + proxy_set_header Host $http_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_pass http://127.0.0.1:3000; + } + + # Websocket + location /api/live/ { + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_pass http://127.0.0.1:3000; + } +} + +server { + listen 80; + server_name {{ vhosts_nginx_grafana_domain }}; + return 301 https://$host$request_uri; +} diff --git a/roles/vhosts/nginx/templates/metrics.conf.j2 b/roles/vhosts/nginx/templates/metrics.conf.j2 new file mode 100644 index 0000000..db694cc --- /dev/null +++ b/roles/vhosts/nginx/templates/metrics.conf.j2 @@ -0,0 +1,43 @@ +upstream metrics_backend { server {{ vhosts_nginx_metrics_backend_addr }}; keepalive 32; } + +server { + listen 443 ssl http2; + server_name {{ vhosts_nginx_metrics_domain }}; + ssl_certificate {{ vhosts_nginx_ssl_certificate }}; + ssl_certificate_key {{ vhosts_nginx_ssl_certificate_key }}; + + # Prometheus Web/API via subpath + location /prom/ { + proxy_set_header Host $http_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 300; + proxy_send_timeout 300; + proxy_pass http://127.0.0.1:9090/; + } + + # Ingest (VM write/read; Receiver write) + location = {{ vhosts_nginx_vm_write_path }} { + client_max_body_size 0; proxy_request_buffering off; proxy_buffering off; proxy_http_version 1.1; + proxy_set_header Host $http_host; proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 600; proxy_send_timeout 600; + proxy_pass http://metrics_backend$request_uri; + } + location = {{ vhosts_nginx_vm_read_path }} { + client_max_body_size 0; proxy_request_buffering off; proxy_buffering off; proxy_http_version 1.1; + proxy_set_header Host $http_host; proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 600; proxy_send_timeout 600; + proxy_pass http://metrics_backend$request_uri; + } + location = {{ vhosts_nginx_receiver_path }} { + client_max_body_size 0; proxy_request_buffering off; proxy_buffering off; proxy_http_version 1.1; + proxy_set_header Host $http_host; proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 600; proxy_send_timeout 600; + proxy_pass http://metrics_backend$request_uri; + } +} +server { listen 80; server_name {{ vhosts_nginx_metrics_domain }}; return 301 https://$host$request_uri; } diff --git a/roles/vhosts/node_exporter/meta/main.yml b/roles/vhosts/node_exporter/meta/main.yml new file mode 100644 index 0000000..9711b33 --- /dev/null +++ b/roles/vhosts/node_exporter/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: common diff --git a/roles/vhosts/node_exporter/tasks/main.yml b/roles/vhosts/node_exporter/tasks/main.yml new file mode 100644 index 0000000..14b1e62 --- /dev/null +++ b/roles/vhosts/node_exporter/tasks/main.yml @@ -0,0 +1,78 @@ +- name: Download node_exporter archive + ansible.builtin.get_url: + url: >- + https://github.com/prometheus/node_exporter/releases/download/v{{ + node_exporter_version | default('1.8.2') }}/node_exporter-{{ + node_exporter_version | default('1.8.2') }}.linux-amd64.tar.gz + dest: /tmp/node_exporter.tar.gz + mode: "0644" + when: inventory_hostname in groups[group] + +- name: Extract node_exporter + ansible.builtin.unarchive: + src: /tmp/node_exporter.tar.gz + dest: /tmp + remote_src: true + creates: "/tmp/node_exporter-{{ node_exporter_version | default('1.8.2') }}.linux-amd64" + when: inventory_hostname in groups[group] + +- name: Ensure metrics-agent directory exists + ansible.builtin.file: + path: /opt/metrics-agent + state: directory + owner: root + group: root + mode: '0755' + when: inventory_hostname in groups[group] + +- name: Install node_exporter binary + ansible.builtin.copy: + src: "/tmp/node_exporter-{{ node_exporter_version | default('1.8.2') }}.linux-amd64/node_exporter" + dest: /opt/metrics-agent/node_exporter + mode: '0755' + remote_src: true + when: inventory_hostname in groups[group] + +- name: Remove node_exporter archive + ansible.builtin.file: + path: /tmp/node_exporter.tar.gz + state: absent + when: inventory_hostname in groups[group] + +- name: Cleanup extracted directory + ansible.builtin.file: + path: "/tmp/node_exporter-{{ node_exporter_version | default('1.8.2') }}.linux-amd64" + state: absent + when: inventory_hostname in groups[group] + +- name: Ensure textfile collector directory exists + ansible.builtin.file: + path: /var/lib/node_exporter + state: directory + owner: nobody + group: nogroup + mode: '0755' + when: inventory_hostname in groups[group] + +- name: Remove old node_exporter service file + ansible.builtin.file: + path: /etc/systemd/system/node_exporter.service + state: absent + when: inventory_hostname in groups[group] + +- name: Create node_exporter service + ansible.builtin.template: + src: node-exporter.service + dest: /etc/systemd/system/node-exporter.service + owner: root + group: root + mode: '0644' + when: inventory_hostname in groups[group] + +- name: Enable and start node_exporter + ansible.builtin.systemd: + name: node-exporter + enabled: true + state: restarted + daemon_reload: true + when: inventory_hostname in groups[group] diff --git a/roles/vhosts/node_exporter/templates/node-exporter.service b/roles/vhosts/node_exporter/templates/node-exporter.service new file mode 100644 index 0000000..c28fae7 --- /dev/null +++ b/roles/vhosts/node_exporter/templates/node-exporter.service @@ -0,0 +1,25 @@ +[Unit] +Description=Prometheus Node Exporter (tiny, LAN) +After=network.target + +[Service] +User=nobody +Group=nogroup +ExecStart=/opt/metrics-agent/node_exporter \ + --web.listen-address=0.0.0.0:9100 \ + --collector.disable-defaults \ + --collector.cpu \ + --collector.meminfo \ + --collector.loadavg \ + --collector.filesystem \ + --collector.diskstats \ + --collector.netdev \ + --collector.netclass \ + --collector.uname \ + --collector.textfile \ + --collector.textfile.directory=/var/lib/node_exporter +Restart=always +RestartSec=2 + +[Install] +WantedBy=multi-user.target diff --git a/roles/vhosts/nodejs/defaults/main.yml b/roles/vhosts/nodejs/defaults/main.yml new file mode 100644 index 0000000..fb1c51e --- /dev/null +++ b/roles/vhosts/nodejs/defaults/main.yml @@ -0,0 +1,13 @@ +--- +# Node.js version to install (LTS or specific version) +# Examples: "20.x", "18.x", "20.11.0" +nodejs_version: "20.x" + +# Install Yarn package manager (default: true) +install_yarn: true + +# Add npm global bin to system PATH (default: true) +add_npm_to_path: true + +# NPM configuration +npm_config_prefix: "/usr/local/lib/npm" diff --git a/roles/vhosts/nodejs/handlers/main.yml b/roles/vhosts/nodejs/handlers/main.yml new file mode 100644 index 0000000..3ec5bc7 --- /dev/null +++ b/roles/vhosts/nodejs/handlers/main.yml @@ -0,0 +1,2 @@ +--- +# Node.js handlers (reserved for future use) diff --git a/roles/vhosts/nodejs/tasks/darwin.yml b/roles/vhosts/nodejs/tasks/darwin.yml new file mode 100644 index 0000000..bbcc9e4 --- /dev/null +++ b/roles/vhosts/nodejs/tasks/darwin.yml @@ -0,0 +1,38 @@ +--- +- name: Determine Homebrew prefix + ansible.builtin.set_fact: + nodejs_homebrew_prefix: "{{ '/opt/homebrew' if ansible_facts['architecture'] in ['arm64', 'aarch64'] else '/usr/local' }}" + nodejs_homebrew_formula: "node@{{ nodejs_major_version | default(22) }}" + +- name: Ensure unversioned Homebrew node formula is absent + community.general.homebrew: + name: node + state: absent + +- name: Ensure Homebrew {{ nodejs_homebrew_formula }} formula is installed + community.general.homebrew: + name: "{{ nodejs_homebrew_formula }}" + state: present + +- name: Ensure {{ nodejs_homebrew_formula }} is linked as the default node + ansible.builtin.command: "brew link --force --overwrite {{ nodejs_homebrew_formula }}" + register: nodejs_brew_link + changed_when: "'linking' in (nodejs_brew_link.stdout | lower)" + failed_when: nodejs_brew_link.rc != 0 and 'already linked' not in (nodejs_brew_link.stdout | lower) and 'already linked' not in (nodejs_brew_link.stderr | lower) + environment: + PATH: "{{ nodejs_homebrew_prefix }}/bin:/usr/local/bin:/usr/bin:/bin" + +- name: Prioritize {{ nodejs_homebrew_formula }} binaries in the default shell path + ansible.builtin.lineinfile: + path: "{{ ansible_env.HOME }}/.zshrc" + line: "export PATH=\"{{ nodejs_homebrew_prefix }}/opt/{{ nodejs_homebrew_formula }}/bin:$PATH\"" + insertafter: EOF + create: true + +- name: Pin {{ nodejs_homebrew_formula }} to prevent automatic upgrades + ansible.builtin.command: "brew pin {{ nodejs_homebrew_formula }}" + register: nodejs_brew_pin + changed_when: "'pinned' in (nodejs_brew_pin.stdout | lower)" + failed_when: nodejs_brew_pin.rc != 0 and 'already pinned' not in (nodejs_brew_pin.stdout | lower) and 'already pinned' not in (nodejs_brew_pin.stderr | lower) + environment: + PATH: "{{ nodejs_homebrew_prefix }}/bin:/usr/local/bin:/usr/bin:/bin" diff --git a/roles/vhosts/nodejs/tasks/main.yml b/roles/vhosts/nodejs/tasks/main.yml new file mode 100644 index 0000000..78d09ea --- /dev/null +++ b/roles/vhosts/nodejs/tasks/main.yml @@ -0,0 +1,115 @@ +--- +- name: Check Node.js version + command: node --version + register: node_version_check + changed_when: false + failed_when: false + +- name: Get Node.js version number + set_fact: + nodejs_installed_version: "{{ node_version_check.stdout | regex_replace('v', '') | default('') }}" + when: node_version_check.rc == 0 + +- name: Update apt cache + apt: + update_cache: yes + cache_valid_time: 3600 + +- name: Install prerequisites + apt: + name: + - curl + - wget + - gnupg + - ca-certificates + state: present + +- name: Remove old NodeSource repository if exists + apt_repository: + repo: "{{ item }}" + state: absent + loop: + - deb https://deb.nodesource.com/node_{{ nodejs_version }}.x {{ ansible_distribution_release }} main + - deb-src https://deb.nodesource.com/node_{{ nodejs_version }}.x {{ ansible_distribution_release }} main + when: nodejs_installed_version is defined and nodejs_installed_version != nodejs_version + +- name: Add NodeSource repository + apt_repository: + repo: "deb https://deb.nodesource.com/node_{{ nodejs_version }}.x {{ ansible_distribution_release }} main" + state: present + filename: nodesource + when: nodejs_installed_version is not defined or nodejs_installed_version != nodejs_version + +- name: Add NodeSource GPG key + apt_key: + url: "https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key" + state: present + when: nodejs_installed_version is not defined or nodejs_installed_version != nodejs_version + +- name: Install Node.js + apt: + name: + - "nodejs={{ nodejs_version }}-1nodesource1" + state: present + update_cache: yes + when: nodejs_installed_version is not defined or nodejs_installed_version != nodejs_version + +- name: Install npm globally + npm: + name: npm + state: latest + global: yes + +- name: Get current Yarn version + command: yarn --version + register: yarn_version_check + changed_when: false + failed_when: false + when: install_yarn | default(true) + +- name: Get Yarn GPG key + rpm_key: + state: present + key: https://dl.yarnpkg.com/debian/pubkey.gpg + when: install_yarn | default(true) + environment: + ansible_python_interpreter: /usr/bin/python3 + +- name: Add Yarn repository + apt_repository: + repo: "deb https://dl.yarnpkg.com/debian/ stable main" + state: present + filename: yarn + when: install_yarn | default(true) + +- name: Install Yarn + apt: + name: yarn + state: present + update_cache: yes + when: install_yarn | default(true) + +- name: Set npm to use version tags by default + shell: npm config set save-exact true + args: + creates: /root/.npmrc + +- name: Create npm global directory + file: + path: "{{ npm_config_prefix }}" + state: directory + mode: '0755' + +- name: Add npm global bin to PATH + template: + src: npm_global.sh.j2 + dest: /etc/profile.d/npm_global.sh + mode: '0644' + when: add_npm_to_path | default(true) + +- name: Verify installations + debug: + msg: | + Node.js version: {{ nodejs_installed_version | default('Not installed') }} + NPM version: {{ ansible_facts.packages.npm[0].version if ansible_facts.packages.npm is defined and ansible_facts.packages.npm else 'N/A' }} + Yarn version: {{ yarn_version_check.stdout if yarn_version_check.rc == 0 and install_yarn | default(true) else 'Not installed' }} diff --git a/roles/vhosts/nodejs/tasks/ubuntu.yml b/roles/vhosts/nodejs/tasks/ubuntu.yml new file mode 100644 index 0000000..3b2d527 --- /dev/null +++ b/roles/vhosts/nodejs/tasks/ubuntu.yml @@ -0,0 +1,53 @@ +--- +- name: Ensure prerequisite packages are installed + ansible.builtin.apt: + name: + - ca-certificates + - curl + - gnupg + state: present + update_cache: true + become: true + +- name: Ensure directory for apt keyrings exists + ansible.builtin.file: + path: /etc/apt/keyrings + state: directory + mode: '0755' + become: true + +- name: Download NodeSource GPG key + ansible.builtin.get_url: + url: https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key + dest: /etc/apt/keyrings/nodesource.gpg + mode: '0644' + register: nodesource_key + become: true + +- name: Configure NodeSource Node.js {{ nodejs_major_version | default(22) }}.x repository + ansible.builtin.copy: + dest: "/etc/apt/sources.list.d/nodesource-node{{ nodejs_major_version | default(22) }}.list" + content: | + deb [signed-by=/etc/apt/keyrings/nodesource.gpg arch=amd64,arm64] https://deb.nodesource.com/node_{{ nodejs_major_version | default(22) }}.x nodistro main + mode: '0644' + register: nodesource_repo + become: true + +- name: Update apt cache for NodeSource repository + ansible.builtin.apt: + update_cache: true + when: nodesource_key.changed or nodesource_repo.changed + become: true + +- name: Ensure Node.js {{ nodejs_major_version | default(22) }}.x is installed + ansible.builtin.apt: + name: nodejs + state: present + update_cache: true + become: true + +- name: Hold Node.js package to prevent automatic upgrades + ansible.builtin.dpkg_selections: + name: nodejs + selection: hold + become: true diff --git a/roles/vhosts/nodejs/templates/npm_global.sh.j2 b/roles/vhosts/nodejs/templates/npm_global.sh.j2 new file mode 100644 index 0000000..13652b7 --- /dev/null +++ b/roles/vhosts/nodejs/templates/npm_global.sh.j2 @@ -0,0 +1,3 @@ +# NPM global packages PATH +export PATH="{{ npm_config_prefix }}/bin:$PATH" +export npm_config_prefix="{{ npm_config_prefix }}" diff --git a/roles/vhosts/openobserve/defaults/main.yml b/roles/vhosts/openobserve/defaults/main.yml new file mode 100644 index 0000000..c0d9f82 --- /dev/null +++ b/roles/vhosts/openobserve/defaults/main.yml @@ -0,0 +1,8 @@ +openobserve_working_dir: /data +openobserve_binary: /usr/bin/openobserve +openobserve_root_user_email: root@example.com +openobserve_root_user_password: changeme +openobserve_memory_cache_max_size: 256 +openobserve_compact_enabled: false +openobserve_query_parallelism: 1 +openobserve_feature_per_thread_lock: true diff --git a/roles/vhosts/openobserve/tasks/main.yml b/roles/vhosts/openobserve/tasks/main.yml new file mode 100644 index 0000000..2569008 --- /dev/null +++ b/roles/vhosts/openobserve/tasks/main.yml @@ -0,0 +1,21 @@ +- name: Ensure OpenObserve working directory exists + ansible.builtin.file: + path: "{{ openobserve_working_dir }}" + state: directory + mode: '0755' + when: inventory_hostname in groups[group] + +- name: Install OpenObserve systemd service + ansible.builtin.template: + src: openobserve.service.j2 + dest: /etc/systemd/system/openobserve.service + mode: '0644' + when: inventory_hostname in groups[group] + +- name: Enable and start OpenObserve service + ansible.builtin.systemd: + name: openobserve + enabled: true + state: restarted + daemon_reload: true + when: inventory_hostname in groups[group] diff --git a/roles/vhosts/openobserve/templates/openobserve.service.j2 b/roles/vhosts/openobserve/templates/openobserve.service.j2 new file mode 100644 index 0000000..dd0e8a5 --- /dev/null +++ b/roles/vhosts/openobserve/templates/openobserve.service.j2 @@ -0,0 +1,25 @@ +[Unit] +Description=OpenObserve Service +After=network.target + +[Service] +Type=simple +User=root +Group=root +WorkingDirectory={{ openobserve_working_dir }} +ExecStart={{ openobserve_binary }} +Restart=on-failure +RestartSec=5 + +Environment=ZO_ROOT_USER_EMAIL={{ openobserve_root_user_email }} +Environment=ZO_ROOT_USER_PASSWORD={{ openobserve_root_user_password }} +Environment=ZO_MEMORY_CACHE_MAX_SIZE={{ openobserve_memory_cache_max_size }} +Environment=ZO_COMPACT_ENABLED={{ openobserve_compact_enabled | lower }} +Environment=ZO_QUERY_PARALLELISM={{ openobserve_query_parallelism }} +Environment=ZO_FEATURE_PER_THREAD_LOCK={{ openobserve_feature_per_thread_lock | lower }} + +# Optional: Increase file descriptor limit to avoid errors with many small files +# LimitNOFILE=262144 + +[Install] +WantedBy=multi-user.target diff --git a/roles/vhosts/otel-collector/meta/main.yml b/roles/vhosts/otel-collector/meta/main.yml new file mode 100644 index 0000000..9711b33 --- /dev/null +++ b/roles/vhosts/otel-collector/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: common diff --git a/roles/vhosts/otel-collector/tasks/main.yml b/roles/vhosts/otel-collector/tasks/main.yml new file mode 100644 index 0000000..81cd497 --- /dev/null +++ b/roles/vhosts/otel-collector/tasks/main.yml @@ -0,0 +1,114 @@ +--- +- name: Set otel-collector version and arch + ansible.builtin.set_fact: + otel_collector_version: "{{ otel_collector_version | default('0.133.0') }}" + _otel_arch_map: + x86_64: amd64 + amd64: amd64 + aarch64: arm64 + arm64: arm64 + when: inventory_hostname in groups[group] + +- name: Resolve otel-collector arch + ansible.builtin.set_fact: + otel_arch: "{{ _otel_arch_map.get(ansible_architecture, 'amd64') }}" + when: inventory_hostname in groups[group] + +- name: Ensure openobserve-agent user exists + ansible.builtin.user: + name: openobserve-agent + system: true + shell: /usr/sbin/nologin + create_home: false + when: inventory_hostname in groups[group] + +# === 默认:tar.gz 安装方案 === +- name: Download otelcol-contrib archive + ansible.builtin.get_url: + url: "https://artifact.svc.plus/otel/OpenTelemetry/v{{ otel_collector_version }}/otelcol-contrib_{{ otel_collector_version }}_linux_{{ otel_arch }}.tar.gz" + dest: "/tmp/otelcol-contrib_{{ otel_collector_version }}_linux_{{ otel_arch }}.tar.gz" + mode: "0644" + when: inventory_hostname in groups[group] + +- name: Extract otelcol-contrib + ansible.builtin.unarchive: + src: "/tmp/otelcol-contrib_{{ otel_collector_version }}_linux_{{ otel_arch }}.tar.gz" + dest: "/tmp/otelcol-contrib_{{ otel_collector_version }}_linux_{{ otel_arch }}" + remote_src: true + creates: "/tmp/otelcol-contrib_{{ otel_collector_version }}_linux_{{ otel_arch }}" + when: inventory_hostname in groups[group] + +- name: Install otelcol-contrib binary + ansible.builtin.copy: + src: "/tmp/otelcol-contrib_{{ otel_collector_version }}_linux_{{ otel_arch }}/otelcol-contrib" + dest: /usr/local/bin/otelcol-contrib + mode: '0755' + remote_src: true + when: inventory_hostname in groups[group] + +- name: Remove otelcol-contrib archive + ansible.builtin.file: + path: "/tmp/otelcol-contrib_{{ otel_collector_version }}_linux_{{ otel_arch }}.tar.gz" + state: absent + when: inventory_hostname in groups[group] + +- name: Cleanup extracted otelcol-contrib directory + ansible.builtin.file: + path: "/tmp/otelcol-contrib_{{ otel_collector_version }}_linux_{{ otel_arch }}" + state: absent + when: inventory_hostname in groups[group] + +# === 可选:Debian/Ubuntu .deb 安装方案(默认禁用) === +# - name: Download otelcol-contrib deb +# ansible.builtin.get_url: +# url: "https://github.com/open-telemetry/opentelemetry-collector-releases/releases/download/" +# "v{{ otel_collector_version }}/otelcol-contrib_{{ otel_collector_version }}_linux_{{ otel_arch }}.deb" +# dest: "/tmp/otelcol-contrib_{{ otel_collector_version }}_linux_{{ otel_arch }}.deb" +# mode: "0644" +# when: +# - inventory_hostname in groups[group] +# - ansible_os_family == 'Debian' +# +# - name: Install otelcol-contrib via deb +# ansible.builtin.apt: +# deb: "/tmp/otelcol-contrib_{{ otel_collector_version }}_linux_{{ otel_arch }}.deb" +# state: present +# update_cache: false +# when: +# - inventory_hostname in groups[group] +# - ansible_os_family == 'Debian' + +# === Configuration validation === +- name: Ensure OpenObserve authorization is provided + ansible.builtin.assert: + that: + - otlp_auth is defined + - otlp_auth | string | trim | length > 0 + fail_msg: otlp_auth must be defined and non-empty for otel collector deployment + when: inventory_hostname in groups[group] + +- name: Deploy otel collector config + ansible.builtin.template: + src: otel-config.yaml + dest: /etc/otel-config.yaml + owner: openobserve-agent + group: openobserve-agent + mode: '0644' + when: inventory_hostname in groups[group] + +- name: Create otel collector service + ansible.builtin.template: + src: otel-collector.service + dest: /etc/systemd/system/otel-collector.service + owner: root + group: root + mode: '0644' + when: inventory_hostname in groups[group] + +- name: Enable and start otel collector + ansible.builtin.systemd: + name: otel-collector + enabled: true + state: restarted + daemon_reload: true + when: inventory_hostname in groups[group] diff --git a/roles/vhosts/otel-collector/templates/otel-collector.service b/roles/vhosts/otel-collector/templates/otel-collector.service new file mode 100644 index 0000000..4239162 --- /dev/null +++ b/roles/vhosts/otel-collector/templates/otel-collector.service @@ -0,0 +1,13 @@ +[Unit] +Description=OpenTelemetry Collector +After=network.target network-online.target + +[Service] +User=openobserve-agent +Group=openobserve-agent +ExecStart=/usr/local/bin/otelcol-contrib --config /etc/otel-config.yaml +Restart=always +RestartSec=10 + +[Install] +WantedBy=multi-user.target diff --git a/roles/vhosts/otel-collector/templates/otel-config.yaml b/roles/vhosts/otel-collector/templates/otel-config.yaml new file mode 100644 index 0000000..006a428 --- /dev/null +++ b/roles/vhosts/otel-collector/templates/otel-config.yaml @@ -0,0 +1,74 @@ +{% set node_static_configs = otel_prometheus_node_static_configs | mandatory('otel_prometheus_node_static_configs must be provided') %} +{% set process_static_configs = otel_prometheus_process_static_configs | mandatory('otel_prometheus_process_static_configs must be provided') %} + +receivers: + prometheus: + config: + global: + scrape_interval: 10s + scrape_configs: + - job_name: 'node-exporter' + static_configs: +{% for static_config in node_static_configs %} + - targets: {{ static_config.targets | to_json }} + labels: +{% for label_key, label_value in static_config.labels.items() %} + {{ label_key }}: '{{ label_value }}' +{% endfor %} +{% endfor %} + - job_name: 'process-exporter' + static_configs: +{% for static_config in process_static_configs %} + - targets: {{ static_config.targets | to_json }} + labels: +{% for label_key, label_value in static_config.labels.items() %} + {{ label_key }}: '{{ label_value }}' +{% endfor %} +{% endfor %} + journald: + directory: /var/log/journal + filelog/std: + include: [ /var/log/**log ] + +processors: + resourcedetection/system: + detectors: ["system"] + system: + hostname_sources: ["os"] + memory_limiter: + check_interval: 1s + limit_percentage: 75 + spike_limit_percentage: 15 + batch: + send_batch_size: 10000 + timeout: 10s + +extensions: + zpages: {} + +exporters: + otlphttp/openobserve: + endpoint: {{ otlp_endpoint | default('https://otel.svc.plus/api/default/') }} + headers: + Authorization: "{{ otlp_auth }}" + otlphttp/openobserve_journald: + endpoint: {{ otlp_endpoint | default('https://otel.svc.plus/api/default/') }} + headers: + Authorization: "{{ otlp_auth }}" + stream-name: journald + +service: + extensions: [zpages] + pipelines: + metrics: + receivers: [prometheus] + processors: [resourcedetection/system, memory_limiter, batch] + exporters: [otlphttp/openobserve] + logs: + receivers: [filelog/std] + processors: [resourcedetection/system, memory_limiter, batch] + exporters: [otlphttp/openobserve] + logs/journald: + receivers: [journald] + processors: [resourcedetection/system, memory_limiter, batch] + exporters: [otlphttp/openobserve_journald] diff --git a/roles/vhosts/postgres/handlers/main.yml b/roles/vhosts/postgres/handlers/main.yml new file mode 100644 index 0000000..9264747 --- /dev/null +++ b/roles/vhosts/postgres/handlers/main.yml @@ -0,0 +1,5 @@ +- name: Restart PostgreSQL + ansible.builtin.systemd: + name: "{{ postgresql_service_name }}" + state: restarted + daemon_reload: true diff --git a/roles/vhosts/postgres/meta/main.yml b/roles/vhosts/postgres/meta/main.yml new file mode 100644 index 0000000..9711b33 --- /dev/null +++ b/roles/vhosts/postgres/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: common diff --git a/roles/vhosts/postgres/tasks/main.yml b/roles/vhosts/postgres/tasks/main.yml new file mode 100644 index 0000000..6122c26 --- /dev/null +++ b/roles/vhosts/postgres/tasks/main.yml @@ -0,0 +1,120 @@ +- name: Ensure PostgreSQL repository prerequisites are installed + ansible.builtin.apt: + name: "{{ postgresql_package_dependencies | list }}" + state: present + update_cache: true + when: + - ansible_os_family == 'Debian' + - ansible_distribution == 'Ubuntu' + - ansible_distribution_version is version('22.04', '>=') + - postgresql_use_official_repo | bool + +- name: Download PostgreSQL repository signing key + ansible.builtin.get_url: + url: "{{ postgresql_repo_key_url }}" + dest: "{{ postgresql_repo_key_path }}" + mode: "0644" + when: + - ansible_os_family == 'Debian' + - ansible_distribution == 'Ubuntu' + - ansible_distribution_version is version('22.04', '>=') + - postgresql_use_official_repo | bool + +- name: Configure PostgreSQL apt repository + ansible.builtin.apt_repository: + repo: "{{ postgresql_repo }}" + filename: postgresql + state: present + register: postgresql_repo_config + when: + - ansible_os_family == 'Debian' + - ansible_distribution == 'Ubuntu' + - ansible_distribution_version is version('22.04', '>=') + - postgresql_use_official_repo | bool + +- name: Refresh apt cache if repository was added + ansible.builtin.apt: + update_cache: true + when: + - ansible_os_family == 'Debian' + - ansible_distribution == 'Ubuntu' + - ansible_distribution_version is version('22.04', '>=') + - postgresql_use_official_repo | bool + - postgresql_repo_config is defined + - postgresql_repo_config is changed + +- name: Set package list for PostgreSQL + ansible.builtin.set_fact: + postgresql_packages: "{{ (postgresql_packages_base + postgresql_extra_packages) | unique | list }}" + +- name: Install PostgreSQL packages + ansible.builtin.apt: + name: "{{ postgresql_packages | list }}" + state: present + update_cache: true + when: + - ansible_os_family == 'Debian' + +- name: Ensure PostgreSQL service is enabled and started + ansible.builtin.systemd: + name: "{{ postgresql_service_name }}" + enabled: true + state: started + +- name: Gather facts for PostgreSQL configuration files + ansible.builtin.stat: + path: "{{ postgresql_conf_path }}" + register: postgresql_conf_file + +- name: Configure listen_addresses in postgresql.conf + ansible.builtin.lineinfile: + path: "{{ postgresql_conf_path }}" + regexp: '^#?listen_addresses\s*=' + line: "listen_addresses = '{{ postgresql_listen_addresses }}'" + when: postgresql_conf_file.stat.exists + notify: Restart PostgreSQL + +- name: Configure port in postgresql.conf + ansible.builtin.lineinfile: + path: "{{ postgresql_conf_path }}" + regexp: '^#?port\s*=' + line: "port = {{ postgresql_port }}" + when: postgresql_conf_file.stat.exists + notify: Restart PostgreSQL + +- name: Configure password_encryption in postgresql.conf + ansible.builtin.lineinfile: + path: "{{ postgresql_conf_path }}" + regexp: '^#?password_encryption\s*=' + line: "password_encryption = '{{ postgresql_password_encryption }}'" + when: + - postgresql_conf_file.stat.exists + - postgresql_password_encryption | length > 0 + notify: Restart PostgreSQL + +- name: Ensure pg_hba.conf exists + ansible.builtin.stat: + path: "{{ postgresql_hba_path }}" + register: postgresql_hba_file + +- name: Configure pg_hba.conf access rules + ansible.builtin.blockinfile: + path: "{{ postgresql_hba_path }}" + marker: "# {mark} ANSIBLE MANAGED BLOCK FOR POSTGRESQL ACCESS" + block: |- + {% for network in postgresql_allowed_hosts %} + host all all {{ network }} {{ postgresql_auth_method }} + {% endfor %} + when: + - postgresql_hba_file.stat.exists + - postgresql_allowed_hosts | length > 0 + notify: Restart PostgreSQL + +- name: Remove managed pg_hba.conf block when no networks are defined + ansible.builtin.blockinfile: + path: "{{ postgresql_hba_path }}" + marker: "# {mark} ANSIBLE MANAGED BLOCK FOR POSTGRESQL ACCESS" + state: absent + when: + - postgresql_hba_file.stat.exists + - postgresql_allowed_hosts | length == 0 diff --git a/roles/vhosts/process_exporter/meta/main.yml b/roles/vhosts/process_exporter/meta/main.yml new file mode 100644 index 0000000..9711b33 --- /dev/null +++ b/roles/vhosts/process_exporter/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: common diff --git a/roles/vhosts/process_exporter/tasks/main.yml b/roles/vhosts/process_exporter/tasks/main.yml new file mode 100644 index 0000000..8c4dea9 --- /dev/null +++ b/roles/vhosts/process_exporter/tasks/main.yml @@ -0,0 +1,71 @@ +- name: Ensure process_exporter user exists + ansible.builtin.user: + name: process_exporter + system: true + shell: /usr/sbin/nologin + create_home: false + when: inventory_hostname in groups[group] + +- name: Download process-exporter archive + ansible.builtin.get_url: + url: >- + https://github.com/ncabatoff/process-exporter/releases/download/v{{ + process_exporter_version | default('0.7.10') }}/process-exporter-{{ + process_exporter_version | default('0.7.10') }}.linux-amd64.tar.gz + dest: /tmp/process-exporter.tar.gz + mode: "0644" + when: inventory_hostname in groups[group] + +- name: Extract process-exporter + ansible.builtin.unarchive: + src: /tmp/process-exporter.tar.gz + dest: /tmp + remote_src: true + creates: "/tmp/process-exporter-{{ process_exporter_version | default('0.7.10') }}.linux-amd64" + when: inventory_hostname in groups[group] + +- name: Install process-exporter binary + ansible.builtin.copy: + src: "/tmp/process-exporter-{{ process_exporter_version | default('0.7.10') }}.linux-amd64/process-exporter" + dest: /usr/local/bin/process-exporter + mode: '0755' + remote_src: true + when: inventory_hostname in groups[group] + +- name: Remove process-exporter archive + ansible.builtin.file: + path: /tmp/process-exporter.tar.gz + state: absent + when: inventory_hostname in groups[group] + +- name: Cleanup extracted process-exporter directory + ansible.builtin.file: + path: "/tmp/process-exporter-{{ process_exporter_version | default('0.7.10') }}.linux-amd64" + state: absent + when: inventory_hostname in groups[group] + +- name: Deploy process-exporter config + ansible.builtin.template: + src: process-exporter.yml + dest: /etc/process-exporter.yml + owner: process_exporter + group: process_exporter + mode: '0644' + when: inventory_hostname in groups[group] + +- name: Create process-exporter service + ansible.builtin.template: + src: process-exporter.service + dest: /etc/systemd/system/process-exporter.service + owner: root + group: root + mode: '0644' + when: inventory_hostname in groups[group] + +- name: Enable and start process-exporter + ansible.builtin.systemd: + name: process-exporter + enabled: true + state: restarted + daemon_reload: true + when: inventory_hostname in groups[group] diff --git a/roles/vhosts/process_exporter/templates/process-exporter.service b/roles/vhosts/process_exporter/templates/process-exporter.service new file mode 100644 index 0000000..95cb040 --- /dev/null +++ b/roles/vhosts/process_exporter/templates/process-exporter.service @@ -0,0 +1,18 @@ +[Unit] +Description=process-exporter +Wants=network-online.target +After=network-online.target + +[Service] +User=process_exporter +Group=process_exporter +ExecStart=/usr/local/bin/process-exporter --config.path /etc/process-exporter.yml --web.listen-address={{ process_exporter_bind_addr | default('0.0.0.0') }}:{{ process_exporter_port | default('9256') }} +Restart=always + +NoNewPrivileges=yes +PrivateTmp=yes +ProtectSystem=full +ProtectHome=yes + +[Install] +WantedBy=multi-user.target diff --git a/roles/vhosts/process_exporter/templates/process-exporter.yml b/roles/vhosts/process_exporter/templates/process-exporter.yml new file mode 100644 index 0000000..2d82c91 --- /dev/null +++ b/roles/vhosts/process_exporter/templates/process-exporter.yml @@ -0,0 +1,5 @@ +{% raw %} +process_names: + - name: "{{.Comm}}" + cmdline: [".+"] +{% endraw %} diff --git a/roles/vhosts/prometheus-transfer/meta/main.yml b/roles/vhosts/prometheus-transfer/meta/main.yml new file mode 100644 index 0000000..cfa117f --- /dev/null +++ b/roles/vhosts/prometheus-transfer/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: node-exporter diff --git a/roles/vhosts/prometheus-transfer/tasks/main.yml b/roles/vhosts/prometheus-transfer/tasks/main.yml new file mode 100755 index 0000000..dd139d3 --- /dev/null +++ b/roles/vhosts/prometheus-transfer/tasks/main.yml @@ -0,0 +1,21 @@ +- name: Pre setting + shell: "rm -f /usr/bin/prometheus; mkdir -pv /opt/prometheus/data/ && chown prometheus:prometheus /opt/prometheus/data/" + +- name: download prometheus binary + shell: 'curl -Lo /usr/bin/prometheus https://mirrors.onwalk.net/tools/linux-amd64/prometheus && chmod 755 /usr/bin/prometheus' + +- name: create prometheus-transfer service + template: src=templates/prometheus-transfer.service dest=/etc/systemd/system/prometheus-transfer.service owner=root group=root mode=0644 + +- name: create prometheus-transfer config + template: src=templates/prometheus-transfer.yml dest=/etc/prometheus/prometheus-transfer.yml owner=root group=root mode=0644 + +- name: create prometheus-transfer start script + template: src=templates/start-prometheus-transfer-service.sh dest=/usr/bin/start-prometheus-transfer-service.sh owner=root group=root mode=0755 + +- name: create prometheus-transfer stop script + template: src=templates/stop-prometheus-transfer-service.sh dest=/usr/bin/stop-prometheus-transfer-service.sh owner=root group=root mode=0755 + +- name: init prometheus-transfer service + shell: "systemctl enable prometheus-transfer && systemctl daemon-reload && systemctl restart prometheus-transfer" + diff --git a/roles/vhosts/prometheus-transfer/templates/prometheus-transfer.service b/roles/vhosts/prometheus-transfer/templates/prometheus-transfer.service new file mode 100644 index 0000000..99b2750 --- /dev/null +++ b/roles/vhosts/prometheus-transfer/templates/prometheus-transfer.service @@ -0,0 +1,16 @@ +[Unit] +Description=Prometheus +Documentation=https://prometheus.io/ +After=network.target + +[Service] +Type=simple +User=prometheus +ExecStart=/usr/bin/start-prometheus-transfer-service.sh +ExecStop=/usr/bin/stop-prometheus-transfer-service.sh +Restart=on-failure +RestartSec=30 +StartLimitInterval=0 + +[Install] +WantedBy=multi-user.target diff --git a/roles/vhosts/prometheus-transfer/templates/prometheus-transfer.yml b/roles/vhosts/prometheus-transfer/templates/prometheus-transfer.yml new file mode 100644 index 0000000..f3b536b --- /dev/null +++ b/roles/vhosts/prometheus-transfer/templates/prometheus-transfer.yml @@ -0,0 +1,9 @@ +global: + scrape_interval: 3s + evaluation_interval: 3s +remote_read: + - url: '{{ remote_read }}' + read_recent: true + basic_auth: + username: '{{ remote_user }}' + password: '{{ remote_token }}' diff --git a/roles/vhosts/prometheus-transfer/templates/start-prometheus-transfer-service.sh b/roles/vhosts/prometheus-transfer/templates/start-prometheus-transfer-service.sh new file mode 100755 index 0000000..f9c01a3 --- /dev/null +++ b/roles/vhosts/prometheus-transfer/templates/start-prometheus-transfer-service.sh @@ -0,0 +1,2 @@ +#!/bin/sh +/usr/bin/prometheus --config.file=/etc/prometheus/prometheus-transfer.yml --web.listen-address="0.0.0.0:9092" --web.enable-lifecycle --storage.tsdb.path="/opt/prometheus/data/" diff --git a/roles/vhosts/prometheus-transfer/templates/stop-prometheus-transfer-service.sh b/roles/vhosts/prometheus-transfer/templates/stop-prometheus-transfer-service.sh new file mode 100755 index 0000000..bb2b391 --- /dev/null +++ b/roles/vhosts/prometheus-transfer/templates/stop-prometheus-transfer-service.sh @@ -0,0 +1,2 @@ +#!/bin/sh +pkill -9 prometheus diff --git a/roles/vhosts/prometheus/defaults/main.yml b/roles/vhosts/prometheus/defaults/main.yml new file mode 100644 index 0000000..e1cc6c7 --- /dev/null +++ b/roles/vhosts/prometheus/defaults/main.yml @@ -0,0 +1,15 @@ +prometheus_version: 2.49.0 +prometheus_dir: /opt/prometheus +prometheus_user: prometheus +prometheus_group: prometheus +prometheus_data: /var/lib/prometheus +prometheus_etc: /etc/prometheus +prometheus_file_sd_dir: "{{ prometheus_etc }}/file_sd" +metrics_domain: metrics.svc.plus +metrics_backend_kind: vm +metrics_backend_addr: 10.10.0.50:8428 +vm_write_path: /api/v1/write +vm_read_path: /api/v1/read +receiver_path: /api/v1/receive +enable_remote_write: true +enable_remote_read: true diff --git a/roles/vhosts/prometheus/tasks/main.yml b/roles/vhosts/prometheus/tasks/main.yml new file mode 100644 index 0000000..49927cc --- /dev/null +++ b/roles/vhosts/prometheus/tasks/main.yml @@ -0,0 +1,116 @@ +- name: Create Prometheus directories + ansible.builtin.file: + path: "{{ item }}" + state: directory + mode: '0755' + loop: + - "{{ prometheus_dir }}" + - "{{ prometheus_etc }}" + - "{{ prometheus_data }}" + - "{{ prometheus_file_sd_dir }}" + when: inventory_hostname in groups[group] + +- name: Ensure prometheus user exists + ansible.builtin.user: + name: "{{ prometheus_user }}" + system: true + shell: /usr/sbin/nologin + create_home: false + when: inventory_hostname in groups[group] + +- name: Set Prometheus archive for amd64 + ansible.builtin.set_fact: + prometheus_tar: "prometheus-{{ prometheus_version }}.linux-amd64.tar.gz" + prometheus_src_dir: "prometheus-{{ prometheus_version }}.linux-amd64" + when: + - inventory_hostname in groups[group] + - ansible_architecture in ['x86_64', 'amd64'] + +- name: Set Prometheus archive for arm64 + ansible.builtin.set_fact: + prometheus_tar: "prometheus-{{ prometheus_version }}.linux-arm64.tar.gz" + prometheus_src_dir: "prometheus-{{ prometheus_version }}.linux-arm64" + when: + - inventory_hostname in groups[group] + - ansible_architecture in ['aarch64', 'arm64'] + +- name: Download Prometheus archive + ansible.builtin.get_url: + url: "https://github.com/prometheus/prometheus/releases/download/v{{ prometheus_version }}/{{ prometheus_tar }}" + dest: "/tmp/{{ prometheus_tar }}" + mode: '0644' + when: inventory_hostname in groups[group] + +- name: Extract Prometheus archive + ansible.builtin.unarchive: + src: "/tmp/{{ prometheus_tar }}" + dest: /tmp + remote_src: true + creates: "/tmp/{{ prometheus_src_dir }}" + when: inventory_hostname in groups[group] + +- name: Install Prometheus binaries + ansible.builtin.copy: + src: "/tmp/{{ prometheus_src_dir }}/{{ item }}" + dest: "{{ prometheus_dir }}/{{ item }}" + mode: '0755' + remote_src: true + loop: + - prometheus + - promtool + when: inventory_hostname in groups[group] + +- name: Symlink Prometheus binaries + ansible.builtin.file: + src: "{{ prometheus_dir }}/{{ item }}" + dest: "/usr/local/bin/{{ item }}" + state: link + loop: + - prometheus + - promtool + when: inventory_hostname in groups[group] + +- name: Create default file_sd config + ansible.builtin.template: + src: nodes.json.j2 + dest: "{{ prometheus_file_sd_dir }}/nodes.json" + owner: "{{ prometheus_user }}" + group: "{{ prometheus_group }}" + mode: '0644' + when: inventory_hostname in groups[group] + +- name: Deploy Prometheus configuration + ansible.builtin.template: + src: prometheus.yml.j2 + dest: "{{ prometheus_etc }}/prometheus.yml" + owner: "{{ prometheus_user }}" + group: "{{ prometheus_group }}" + mode: '0644' + when: inventory_hostname in groups[group] + +- name: Ensure Prometheus ownership + ansible.builtin.file: + path: "{{ item }}" + state: directory + owner: "{{ prometheus_user }}" + group: "{{ prometheus_group }}" + recurse: true + loop: + - "{{ prometheus_data }}" + - "{{ prometheus_etc }}" + when: inventory_hostname in groups[group] + +- name: Install Prometheus service + ansible.builtin.template: + src: prometheus.service.j2 + dest: /etc/systemd/system/prometheus.service + mode: '0644' + when: inventory_hostname in groups[group] + +- name: Enable and start Prometheus + ansible.builtin.systemd: + name: prometheus + enabled: true + state: restarted + daemon_reload: true + when: inventory_hostname in groups[group] diff --git a/roles/vhosts/prometheus/templates/nodes.json.j2 b/roles/vhosts/prometheus/templates/nodes.json.j2 new file mode 100644 index 0000000..b761aba --- /dev/null +++ b/roles/vhosts/prometheus/templates/nodes.json.j2 @@ -0,0 +1,3 @@ +[ + { "targets": ["127.0.0.1:9100"], "labels": { "instance": "localhost" } } +] diff --git a/roles/vhosts/prometheus/templates/prometheus.service.j2 b/roles/vhosts/prometheus/templates/prometheus.service.j2 new file mode 100644 index 0000000..96e9499 --- /dev/null +++ b/roles/vhosts/prometheus/templates/prometheus.service.j2 @@ -0,0 +1,19 @@ +[Unit] +Description=Prometheus Server +After=network-online.target +Wants=network-online.target + +[Service] +User={{ prometheus_user }} +Group={{ prometheus_group }} +ExecStart={{ prometheus_dir }}/prometheus \ + --config.file={{ prometheus_etc }}/prometheus.yml \ + --storage.tsdb.path={{ prometheus_data }} \ + --web.enable-lifecycle \ + --web.external-url=https://{{ metrics_domain }}/prom/ \ + --web.route-prefix=/ +Restart=always +LimitNOFILE=65536 + +[Install] +WantedBy=multi-user.target diff --git a/roles/vhosts/prometheus/templates/prometheus.yml.j2 b/roles/vhosts/prometheus/templates/prometheus.yml.j2 new file mode 100644 index 0000000..4134631 --- /dev/null +++ b/roles/vhosts/prometheus/templates/prometheus.yml.j2 @@ -0,0 +1,23 @@ +global: + scrape_interval: 30s + evaluation_interval: 30s + +scrape_configs: + - job_name: 'node' + file_sd_configs: + - files: ['{{ prometheus_file_sd_dir }}/nodes.json'] +{% if enable_remote_write %} + +remote_write: + - url: https://{{ metrics_domain }}{% if metrics_backend_kind == 'receiver' %}{{ receiver_path }}{% else %}{{ vm_write_path }}{% endif %} + queue_config: + max_samples_per_send: 10000 + max_shards: 8 + capacity: 100000 +{% endif %} +{% if enable_remote_read and metrics_backend_kind == 'vm' %} + +remote_read: + - url: https://{{ metrics_domain }}{{ vm_read_path }} + read_recent: true +{% endif %} diff --git a/roles/vhosts/promtail-agent/meta/main.yml b/roles/vhosts/promtail-agent/meta/main.yml new file mode 100644 index 0000000..9711b33 --- /dev/null +++ b/roles/vhosts/promtail-agent/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: common diff --git a/roles/vhosts/promtail-agent/tasks/main.yml b/roles/vhosts/promtail-agent/tasks/main.yml new file mode 100755 index 0000000..4fa017f --- /dev/null +++ b/roles/vhosts/promtail-agent/tasks/main.yml @@ -0,0 +1,19 @@ +- name: Pre setting + shell: "rm -f /usr/bin/promtail; mkdir -pv /etc/promtail/ && touch /var/log/positions.yaml" + when: inventory_hostname in groups[group] + +- name: Download promtail binary + shell: 'curl -Lo /usr/bin/promtail https://mirrors.onwalk.net/tools/linux-amd64/promtail && chmod 755 /usr/bin/promtail' + when: inventory_hostname in groups[group] + +- name: Create promtail-agent service + template: src=templates/promtail-agent.service dest=/lib/systemd/system/promtail-agent.service owner=root group=root mode=0644 + when: inventory_hostname in groups[group] + +- name: Create promtail-agent config + template: src=templates/promtail.yaml dest=/etc/promtail/promtail.yaml owner=root group=root mode=0644 + when: inventory_hostname in groups[group] + +- name: Init promtail-agent service + shell: "systemctl enable promtail-agent && systemctl daemon-reload && systemctl restart promtail-agent" + when: inventory_hostname in groups[group] diff --git a/roles/vhosts/promtail-agent/templates/promtail-agent.service b/roles/vhosts/promtail-agent/templates/promtail-agent.service new file mode 100644 index 0000000..4d27b40 --- /dev/null +++ b/roles/vhosts/promtail-agent/templates/promtail-agent.service @@ -0,0 +1,15 @@ +[Unit] +Description=Prometheus +Documentation=https://grafana.io/ +After=network.target + +[Service] +Type=simple +User=root +ExecStart=/usr/bin/promtail -config.file=/etc/promtail/promtail.yaml +Restart=on-failure +RestartSec=30 +StartLimitInterval=0 + +[Install] +WantedBy=multi-user.target diff --git a/roles/vhosts/promtail-agent/templates/promtail.yaml b/roles/vhosts/promtail-agent/templates/promtail.yaml new file mode 100644 index 0000000..8bdb778 --- /dev/null +++ b/roles/vhosts/promtail-agent/templates/promtail.yaml @@ -0,0 +1,38 @@ +server: + http_listen_port: 9080 + grpc_listen_port: 0 + +clients: + - url: http://{{ loki_host }}:{{ loki_port }}/loki/api/v1/push + +positions: + filename: /var/log/positions.yaml + +scrape_configs: +- job_name: system-auth-log + static_configs: + - targets: + - localhost + labels: + instance: {{ inventory_hostname }} + {{ label }} + job: secure + __path__: /var/log/auth.log +- job_name: system-os-log + static_configs: + - targets: + - localhost + labels: + instance: {{ inventory_hostname }} + {{ label }} + job: syslog + __path__: /var/log/syslog +- job_name: system-audit-log + static_configs: + - targets: + - localhost + labels: + instance: {{ inventory_hostname }} + {{ label }} + job: audit + __path__: /var/log/audit/audit.log diff --git a/roles/vhosts/sealos-k8s/defaults/main.yml b/roles/vhosts/sealos-k8s/defaults/main.yml new file mode 100644 index 0000000..e598aef --- /dev/null +++ b/roles/vhosts/sealos-k8s/defaults/main.yml @@ -0,0 +1 @@ +microk8s_channel: "1.31/stable" diff --git a/roles/vhosts/sealos-k8s/tasks/main.yml b/roles/vhosts/sealos-k8s/tasks/main.yml new file mode 100644 index 0000000..cbaae98 --- /dev/null +++ b/roles/vhosts/sealos-k8s/tasks/main.yml @@ -0,0 +1,115 @@ +- name: Install MicroK8s + community.general.snap: + name: microk8s + channel: "{{ microk8s_channel }}" + classic: yes + +- name: Start MicroK8s + shell: microk8s status | grep "microk8s is running" || microk8s start + changed_when: false + +- name: Configure wireguard IP for k8s comms + block: + - name: Get current kubelet args + slurp: + src: /var/snap/microk8s/current/args/kubelet + register: kubelet_args + + - name: Get current kube-apiserver args + slurp: + src: /var/snap/microk8s/current/args/kube-apiserver + register: kubeapi_args + + - name: Check if node-ip is already configured + set_fact: + has_node_ip: "{{ (kubelet_args.content | b64decode) is regex('--node-ip=') }}" + + - name: Check if advertise-addr is already configured + set_fact: + has_adv_ip: "{{ (kubeapi_args.content | b64decode) is regex('--advertise-address=') }}" + + - name: Add node-ip to kubelet args + lineinfile: + path: /var/snap/microk8s/current/args/kubelet + line: "--node-ip={{ wireguard_ip }}" + create: yes + when: not has_node_ip + register: kubelet_modified + + - name: Add advertise-address to kubeapi args + lineinfile: + path: /var/snap/microk8s/current/args/kube-apiserver + line: "--advertise-address={{ wireguard_ip }}" + create: yes + when: not has_adv_ip + register: kubeapi_modified + + - name: Restart kubelet if config changed + systemd: + name: snap.microk8s.daemon-kubelite + state: restarted + when: kubelet_modified.changed or kubeapi_modified.changed + +- name: Setup Kubernetes access for user + block: + - name: Add user to microk8s group + user: + name: "{{ item }}" + groups: microk8s + append: yes + with_items: + - "{{ username }}" + - "{{ ansible_user }}" + + - name: Ensure .kube directory exists for user + file: + path: "/home/{{ username }}/.kube" + state: directory + mode: '0755' + owner: "{{ username }}" + group: "{{ username }}" + + - name: Generate kubeconfig from microk8s + shell: microk8s config > /home/{{ username }}/.kube/config + args: + creates: "/home/{{ username }}/.kube/config" + + - name: Set kubeconfig permissions + file: + path: "/home/{{ username }}/.kube/config" + mode: '0600' + owner: "{{ username }}" + group: "{{ username }}" + +- name: Create containerd config directory + file: + path: /var/snap/microk8s/current/args/certs.d + state: directory + mode: '0755' + +- name: Set registry hostname + set_fact: + registry_hostname: "{{ validator | lower }}.localregistry.chutes.ai" + +- name: Create certs.d directory for registry + file: + path: "/var/snap/microk8s/current/args/certs.d/{{ registry_hostname }}:{{ registry_port }}" + state: directory + mode: '0755' + +- name: Create hosts.toml for registry + template: + src: hosts.toml.j2 + dest: "/var/snap/microk8s/current/args/certs.d/{{ registry_hostname }}:{{ registry_port }}/hosts.toml" + mode: '0644' + +- name: Update DNS resolution config + template: + src: resolved.conf.j2 + dest: "/etc/systemd/resolved.conf" + mode: '0644' + +- name: Restart systemd-resolved + systemd: + name: systemd-resolved + state: restarted diff --git a/roles/vhosts/sealos-k8s/templates/hosts.toml.j2 b/roles/vhosts/sealos-k8s/templates/hosts.toml.j2 new file mode 100644 index 0000000..437c448 --- /dev/null +++ b/roles/vhosts/sealos-k8s/templates/hosts.toml.j2 @@ -0,0 +1,4 @@ +server = "https://{{ registry_hostname }}:{{ registry_port }}" +[host."https://{{ registry_hostname }}:{{ registry_port }}"] + capabilities = ["pull", "resolve", "push"] + skip_verify = true diff --git a/roles/vhosts/sealos-k8s/templates/resolved.conf.j2 b/roles/vhosts/sealos-k8s/templates/resolved.conf.j2 new file mode 100644 index 0000000..b3003c3 --- /dev/null +++ b/roles/vhosts/sealos-k8s/templates/resolved.conf.j2 @@ -0,0 +1,4 @@ +[Resolve] +DNS=8.8.8.8 +FallbackDNS=8.8.4.4 +DNSStubListener=yes diff --git a/roles/vhosts/secret-manger/tasks/main.yml b/roles/vhosts/secret-manger/tasks/main.yml new file mode 100755 index 0000000..f12b12b --- /dev/null +++ b/roles/vhosts/secret-manger/tasks/main.yml @@ -0,0 +1,48 @@ +- name: "cluster {{ ClusterContext }} : Create namespace" + shell: "kubectl create ns {{ namespace }} || echo true" + +# Create General Secret for K8S form File + +- name: "cluster {{ ClusterContext }} : Clean OLD Secret" + shell: "kubectl delete secret {{ item.secret_name }} -n {{ namespace }}" + ignore_errors: yes + loop: "{{ generic }}" + when: generic is defined + +- name: "cluster {{ ClusterContext }} Create New Generic Secret from Key/Vaule" + shell: 'kubectl create secret generic {{ item.secret_name }} \ + --from-literal={{ item.sercet_key }}="{{ item.secret_value }}" \ + -n {{ namespace }}' + loop: "{{ generic }}" + when: generic is defined + +# Create General Secret for K8S From Key/Value + +- name: "cluster {{ ClusterContext }} : Clean OLD Secret" + shell: "kubectl delete secret {{ item.secret_name }} -n {{ namespace }}" + ignore_errors: yes + loop: "{{ secret }}" + when: secret is defined + +- name: "cluster {{ ClusterContext }} Create New Generic Secret from Key/Vaule" + shell: 'kubectl create secret generic {{ item.secret_name }} \ + --from-literal={{ item.sercet_key }}="{{ item.secret_value }}" \ + -n {{ namespace }}' + loop: "{{ secrets }}" + when: secrets is defined + +# TLS Secret for K8S key/cert + +- name: "cluster {{ ClusterContext }} : Clean OLD Secret" + shell: "kubectl delete secret {{ item.secret_name }} -n {{ namespace }}" + ignore_errors: yes + loop: "{{ tls }}" + when: tls is defined + +- name: "cluster {{ ClusterContext }} : Create New tls secret" + shell: 'kubectl create secret tls {{ item.secret_name }} \ + --key={{ item.keyfile }} \ + --cert={{ item.certfile }} \ + -n {{ namespace }}' + loop: "{{ tls }}" + when: tls is defined diff --git a/roles/vhosts/ssh-trust/defaults/main.yml b/roles/vhosts/ssh-trust/defaults/main.yml new file mode 100644 index 0000000..3cd2c41 --- /dev/null +++ b/roles/vhosts/ssh-trust/defaults/main.yml @@ -0,0 +1,2 @@ +# Default user for generating and installing SSH key +ssh_user: "{{ ansible_user | default('root') }}" diff --git a/roles/vhosts/ssh-trust/tasks/main.yml b/roles/vhosts/ssh-trust/tasks/main.yml new file mode 100644 index 0000000..a1e971f --- /dev/null +++ b/roles/vhosts/ssh-trust/tasks/main.yml @@ -0,0 +1,23 @@ +- name: Ensure SSH key pair exists on ops host + shell: | + test -f ~/.ssh/id_rsa || ssh-keygen -t rsa -b 4096 -f ~/.ssh/id_rsa -N '' -q + args: + executable: /bin/bash + delegate_to: "{{ ops_host }}" + run_once: true + become: true + become_user: "{{ ssh_user }}" + +- name: Fetch ops host public key + slurp: + src: "~/.ssh/id_rsa.pub" + register: ops_pub_key + delegate_to: "{{ ops_host }}" + run_once: true + become: true + become_user: "{{ ssh_user }}" + +- name: Authorize ops host key on cluster hosts + ansible.builtin.authorized_key: + user: "{{ ssh_user }}" + key: "{{ ops_pub_key.content | b64decode }}" diff --git a/roles/vhosts/telegraf/handlers/main.yml b/roles/vhosts/telegraf/handlers/main.yml new file mode 100644 index 0000000..6a906e7 --- /dev/null +++ b/roles/vhosts/telegraf/handlers/main.yml @@ -0,0 +1,5 @@ +--- +- name: restart telegraf + service: + name: telegraf + state: restarted diff --git a/roles/vhosts/telegraf/meta/main.yml b/roles/vhosts/telegraf/meta/main.yml new file mode 100644 index 0000000..9711b33 --- /dev/null +++ b/roles/vhosts/telegraf/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: common diff --git a/roles/vhosts/telegraf/tasks/main.yml b/roles/vhosts/telegraf/tasks/main.yml new file mode 100755 index 0000000..e6118ff --- /dev/null +++ b/roles/vhosts/telegraf/tasks/main.yml @@ -0,0 +1,49 @@ +- name: Import InfluxData GPG key (CentOS) + rpm_key: + state: present + key: https://repos.influxdata.com/influxdb.key + when: ansible_os_family == "RedHat" + +- name: Add Telegraf YUM repository (CentOS) + yum_repository: + name: influxdata + description: InfluxData Repository - Telegraf + baseurl: https://repos.influxdata.com/rhel/$releasever/$basearch/stable + gpgcheck: yes + gpgkey: https://repos.influxdata.com/influxdb.key + when: ansible_os_family == "RedHat" + +- name: Import InfluxData GPG key (Ubuntu) + apt_key: + url: https://repos.influxdata.com/influxdb.key + state: present + when: ansible_os_family == "Debian" + +- name: Add Telegraf APT repository (Ubuntu) + apt_repository: + repo: deb https://repos.influxdata.com/ubuntu {{ ansible_distribution_release }} stable + state: present + when: ansible_os_family == "Debian" + +- name: Install Telegraf + package: + name: telegraf + state: present + +- name: Create Telegraf configuration directory + file: + path: /etc/telegraf/ + state: directory + owner: root + group: root + mode: '0755' + +- name: Install telegraf.conf + template: + src: templates/telegraf.conf.j2 + dest: /etc/telegraf/telegraf.conf + owner: root + group: root + mode: '0644' + notify: + - restart telegraf diff --git a/roles/vhosts/telegraf/templates/telegraf.conf b/roles/vhosts/telegraf/templates/telegraf.conf new file mode 100644 index 0000000..382eb98 --- /dev/null +++ b/roles/vhosts/telegraf/templates/telegraf.conf @@ -0,0 +1,32 @@ +[global_tags] +[agent] + interval = "10s" + round_interval = true + metric_batch_size = 1000 + metric_buffer_limit = 10000 + collection_jitter = "0s" + flush_interval = "10s" + flush_jitter = "0s" + precision = "0s" + hostname = "" + omit_hostname = false +[[outputs.influxdb]] +urls = ["https://influxdb.svc-dev.ink"] +[[inputs.cpu]] + percpu = true + totalcpu = true + collect_cpu_time = false + report_active = false + core_tags = false +[[inputs.disk]] + ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"] +[[inputs.diskio]] +[[inputs.kernel]] +[[inputs.mem]] +[[inputs.processes]] +[[inputs.swap]] +[[inputs.system]] +[[inputs.net]] + fielddrop = ["icmp_*", "icmpmsg_*", "ip_*", "tcp_*", "udp_*", "udplite_*"] + interfaces = ["eth*", "en*" ] +[[inputs.netstat]] diff --git a/roles/vhosts/vault/files/setup.sh b/roles/vhosts/vault/files/setup.sh new file mode 100644 index 0000000..747a36b --- /dev/null +++ b/roles/vhosts/vault/files/setup.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +# 检查参数是否为空 +check_not_empty() { + if [[ -z $1 ]]; then + echo "Error: $2 is empty. Please provide a value." + exit 1 + fi +} + +# 检查参数是否为空 +check_not_empty "$1" "DOMAIN" && DOMAIN=$1 +check_not_empty "$2" "NAMESPACE" && NAMESPACE=$2 +check_not_empty "$3" "SECRET_NAME" && SECRET_NAME=$3 + +cat > vaules.yaml << EOF +server: + ingress: + enabled: true + ingressClassName: "nginx" + hosts: + - host: vault.$DOMAIN + paths: + - / + tls: + - secretName: $SECRET_NAME + hosts: + - vault.$DOMAIN +EOF + +helm repo add hashicorp https://helm.releases.hashicorp.com +helm repo up +kubectl create ns $NAMESPACE || echo true +helm upgrade --install vault-server hashicorp/vault -n $NAMESPACE --create-namespace -f vaules.yaml diff --git a/roles/vhosts/vault/meta/main.yml b/roles/vhosts/vault/meta/main.yml new file mode 100644 index 0000000..1f2217b --- /dev/null +++ b/roles/vhosts/vault/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: secret-manger diff --git a/roles/vhosts/vault/readme.md b/roles/vhosts/vault/readme.md new file mode 100644 index 0000000..bdb3af3 --- /dev/null +++ b/roles/vhosts/vault/readme.md @@ -0,0 +1,4 @@ +# Init Vault Server + +kubectl exec -t -i vault-server-0 -n vault -- sh +vault operator init -key-shares=5 -key-threshold=3 diff --git a/roles/vhosts/vault/tasks/main.yml b/roles/vhosts/vault/tasks/main.yml new file mode 100755 index 0000000..0ff84c4 --- /dev/null +++ b/roles/vhosts/vault/tasks/main.yml @@ -0,0 +1,4 @@ +- name: Setup Vault Server + script: files/setup.sh {{ domain }} {{ namespace }} {{ item.secret_name }} + loop: "{{ tls }}" + when: inventory_hostname in groups[group] diff --git a/roles/vhosts/vault/vars/main.yml b/roles/vhosts/vault/vars/main.yml new file mode 100644 index 0000000..b5de537 --- /dev/null +++ b/roles/vhosts/vault/vars/main.yml @@ -0,0 +1,7 @@ +group: master +namespace: vault +update_secret: true +tls: + - secret_name: vault-tls + keyfile: /etc/ssl/svc.plus.key + certfile: /etc/ssl/svc.plus.pem diff --git a/roles/vhosts/wireguard-client/files/enable_ip_forward.sh b/roles/vhosts/wireguard-client/files/enable_ip_forward.sh new file mode 100644 index 0000000..9b652e1 --- /dev/null +++ b/roles/vhosts/wireguard-client/files/enable_ip_forward.sh @@ -0,0 +1,5 @@ +#!/bin/sh +cat >> /etc/sysctl.conf << EOF +net.ipv4.ip_forward = 1 +EOF +sysctl -p diff --git a/roles/vhosts/wireguard-client/tasks/main.yml b/roles/vhosts/wireguard-client/tasks/main.yml new file mode 100755 index 0000000..20d1165 --- /dev/null +++ b/roles/vhosts/wireguard-client/tasks/main.yml @@ -0,0 +1,17 @@ +- name: Init wireguard env + shell: "sudo mkdir -pv /etc/wireguard/ && \ + sudo touch /etc/wireguard/{{ local.tunnel }}.conf && \ + sudo chown shenlan /etc/wireguard/ && \ + brew install wireguard-tools" + +- name: Configure wireguard tunnel + template: + src: "templates/server.conf" + dest: "/etc/wireguard/{{ local.tunnel }}.conf" + +- name: Stop Wireguard interface + shell: sudo wg-quick down {{ local.tunnel }} + ignore_errors: yes + +- name: Start Wireguard interface + shell: sudo wg-quick up {{ local.tunnel }} diff --git a/roles/vhosts/wireguard-client/templates/server.conf b/roles/vhosts/wireguard-client/templates/server.conf new file mode 100755 index 0000000..bb28ee5 --- /dev/null +++ b/roles/vhosts/wireguard-client/templates/server.conf @@ -0,0 +1,15 @@ +[Interface] +PrivateKey = {{ local.private_key }} +Address = {{ local.network }} +ListenPort = {{ local.listen_port }} +SaveConfig = false +MTU = 1420 + +{% for peer in peers %} +[Peer] +{% if peer.endpoint is defined %} +Endpoint = {{ peer.endpoint }} +{% endif %} +PublicKey = {{ peer.public_key }} +AllowedIPs = {{ peer.allowed_ips }} +{% endfor %} diff --git a/roles/vhosts/wireguard-gateway/files/enable_ip_forward.sh b/roles/vhosts/wireguard-gateway/files/enable_ip_forward.sh new file mode 100644 index 0000000..9b652e1 --- /dev/null +++ b/roles/vhosts/wireguard-gateway/files/enable_ip_forward.sh @@ -0,0 +1,5 @@ +#!/bin/sh +cat >> /etc/sysctl.conf << EOF +net.ipv4.ip_forward = 1 +EOF +sysctl -p diff --git a/roles/vhosts/wireguard-gateway/handlers/main.yml b/roles/vhosts/wireguard-gateway/handlers/main.yml new file mode 100644 index 0000000..cd770a6 --- /dev/null +++ b/roles/vhosts/wireguard-gateway/handlers/main.yml @@ -0,0 +1,4 @@ +- name: Restart WireGuard service + systemd: + name: wg-quick@{{ gateway.public_config.tunnel }} + state: restarted diff --git a/roles/vhosts/wireguard-gateway/meta/main.yml b/roles/vhosts/wireguard-gateway/meta/main.yml new file mode 100755 index 0000000..9711b33 --- /dev/null +++ b/roles/vhosts/wireguard-gateway/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: common diff --git a/roles/vhosts/wireguard-gateway/tasks/main.yml b/roles/vhosts/wireguard-gateway/tasks/main.yml new file mode 100755 index 0000000..a3e8c04 --- /dev/null +++ b/roles/vhosts/wireguard-gateway/tasks/main.yml @@ -0,0 +1,45 @@ +- name: Install WireGuard Packages + shell: "mkdir -pv /etc/wireguard/ && \ + apt update && \ + apt install resolvconf wireguard-dkms wireguard-tools -y" + tags: wireguard + +- name: Check if IP forwarding is enabled + command: sysctl -n net.ipv4.ip_forward + register: ip_forwarding_status + +- name: Enable IP forwarding if not already enabled + block: + - name: Append IP forwarding configuration + lineinfile: + path: /etc/sysctl.conf + line: "net.ipv4.ip_forward = 1" + state: present + when: ip_forwarding_status.stdout != '1' + + - name: Apply sysctl changes + command: sysctl -p + when: ip_forwarding_status.stdout != '1' + +- name: Configure WireGuard tunnel + template: + src: templates/wg0.conf.j2 + dest: "/etc/wireguard/{{ gateway.public_config.tunnel }}.conf" + owner: root + group: root + notify: Restart WireGuard service # 触发 handler + tags: wireguard + +- name: Stop WireGuard service if running + systemd: + name: wg-quick@{{ gateway.public_config.tunnel }} + state: stopped + ignore_errors: yes + tags: wireguard + +- name: Enable and start WireGuard service + systemd: + name: wg-quick@{{ gateway.public_config.tunnel }} + enabled: yes + state: started + tags: wireguard diff --git a/roles/vhosts/wireguard-gateway/templates/wg0.conf.j2 b/roles/vhosts/wireguard-gateway/templates/wg0.conf.j2 new file mode 100644 index 0000000..5fe6007 --- /dev/null +++ b/roles/vhosts/wireguard-gateway/templates/wg0.conf.j2 @@ -0,0 +1,27 @@ +[Interface] +PrivateKey = {{ gateway.private_key }} +Address = {{ gateway.public_config.network }} +ListenPort = {{ gateway.public_config.listen_port }} +SaveConfig = false +MTU = {{ gateway.public_config.mtu | default(1420) }} + +{% for rule in gateway.public_config.postup_rules %} +PostUp = {{ rule }} +{% endfor %} + +{% for rule in gateway.public_config.postdown_rules %} +PostDown = {{ rule }} +{% endfor %} + +{% for peer in gateway.public_config.peers %} +[Peer] +PublicKey = {{ peer.public_key }} +AllowedIPs = {{ peer.allowed_ips }} +{% if peer.endpoint is defined %} +Endpoint = {{ peer.endpoint }} +{% endif %} +{% if peer.persistent_keepalive is defined %} +PersistentKeepalive = {{ peer.persistent_keepalive }} +{% endif %} +{% endfor %} + diff --git a/roles/vhosts/xcontrol_server/defaults/main.yml b/roles/vhosts/xcontrol_server/defaults/main.yml new file mode 100644 index 0000000..70b8c50 --- /dev/null +++ b/roles/vhosts/xcontrol_server/defaults/main.yml @@ -0,0 +1,9 @@ +xcontrol_config_dir: /etc/XControl +xcontrol_config_filename: server-qwen-ai.yaml +xcontrol_service_name: xcontrol-server +xcontrol_service_unit: xcontrol-server.service +xcontrol_service_user: root +xcontrol_service_group: root +xcontrol_binary_path: /usr/bin/xcontrol-server +xcontrol_server_embedder_token: "" +xcontrol_server_generator_token: "" diff --git a/roles/vhosts/xcontrol_server/tasks/main.yml b/roles/vhosts/xcontrol_server/tasks/main.yml new file mode 100644 index 0000000..8f80e3c --- /dev/null +++ b/roles/vhosts/xcontrol_server/tasks/main.yml @@ -0,0 +1,32 @@ +- name: Ensure XControl configuration directory exists + ansible.builtin.file: + path: "{{ xcontrol_config_dir }}" + state: directory + owner: "{{ xcontrol_service_user }}" + group: "{{ xcontrol_service_group }}" + mode: '0755' + when: inventory_hostname in groups[group] + +- name: Deploy XControl server configuration + ansible.builtin.template: + src: server-qwen-ai.yaml.j2 + dest: "{{ xcontrol_config_dir }}/{{ xcontrol_config_filename }}" + owner: "{{ xcontrol_service_user }}" + group: "{{ xcontrol_service_group }}" + mode: '0644' + when: inventory_hostname in groups[group] + +- name: Install XControl server systemd service + ansible.builtin.template: + src: xcontrol-server.service.j2 + dest: "/etc/systemd/system/{{ xcontrol_service_unit }}" + mode: '0644' + when: inventory_hostname in groups[group] + +- name: Enable and restart XControl server + ansible.builtin.systemd: + name: "{{ xcontrol_service_name }}" + enabled: true + state: restarted + daemon_reload: true + when: inventory_hostname in groups[group] diff --git a/roles/vhosts/xcontrol_server/templates/server-qwen-ai.yaml.j2 b/roles/vhosts/xcontrol_server/templates/server-qwen-ai.yaml.j2 new file mode 100644 index 0000000..3add902 --- /dev/null +++ b/roles/vhosts/xcontrol_server/templates/server-qwen-ai.yaml.j2 @@ -0,0 +1,55 @@ +global: + #proxy: socks5://127.0.0.1:1080 + redis: + addr: "127.0.0.1:6379" + password: "" + vectordb: + pgurl: "postgres://shenlan:password@127.0.0.1:5432/shenlan" + datasources: + - name: Xstream + repo: https://github.com/svc-design/Xstream + path: docs + - name: XControl + repo: https://github.com/svc-design/XControl + path: docs + - name: documents + repo: https://github.com/svc-design/documents + path: / + +sync: + repo: + proxy: socks5://127.0.0.1:1080 + +models: + embedder: + provider: "openai" + models: "text-embedding-v4" + baseurl: "https://dashscope.aliyuncs.com/compatible-mode" + endpoint: "https://dashscope.aliyuncs.com/compatible-mode/v1/embeddings" + token: "{{ xcontrol_server_embedder_token | default('') }}" + generator: + provider: "chutes" + models: + - 'unsloth/Llama-3.2-3B-Instruct' + baseurl: "https://llm.chutes.ai" + endpoint: "https://llm.chutes.ai/v1/chat/completions" + token: "{{ xcontrol_server_generator_token | default('') }}" + +embedding: + max_batch: 64 + dimension: 1024 + max_chars: 8000 + rate_limit_tpm: 120000 + +chunking: + embed_toc: true + max_tokens: 800 + overlap_tokens: 80 + prefer_heading_split: true + include_exts: [".md", ".mdx"] + ignore_dirs: [".git", "node_modules", "dist", "build"] + +api: + askai: + timeout: 100 + retries: 3 diff --git a/roles/vhosts/xcontrol_server/templates/xcontrol-server.service.j2 b/roles/vhosts/xcontrol_server/templates/xcontrol-server.service.j2 new file mode 100644 index 0000000..ebae3f0 --- /dev/null +++ b/roles/vhosts/xcontrol_server/templates/xcontrol-server.service.j2 @@ -0,0 +1,15 @@ +[Unit] +Description=XControl Server (Qwen AI) +After=network.target + +[Service] +User={{ xcontrol_service_user }} +Group={{ xcontrol_service_group }} +ExecStart={{ xcontrol_binary_path }} --config {{ xcontrol_config_dir }}/{{ xcontrol_config_filename }} +WorkingDirectory={{ xcontrol_config_dir }} +Restart=on-failure +RestartSec=5s +LimitNOFILE=65535 + +[Install] +WantedBy=multi-user.target diff --git a/roles/vhosts/zot/defaults/main.yml b/roles/vhosts/zot/defaults/main.yml new file mode 100644 index 0000000..e4ac31b --- /dev/null +++ b/roles/vhosts/zot/defaults/main.yml @@ -0,0 +1,33 @@ +zot_version: v2.1.11 +zot_binary_url: "https://github.com/project-zot/zot/releases/download/{{ zot_version }}/zot-linux-amd64" +zot_binary_path: /usr/bin/zot +zot_user: zot +zot_group: zot +zot_data_dir: /data/zot +zot_log_dir: /var/log/zot +zot_config_dir: /etc/zot +zot_config_path: /etc/zot/config.json +zot_htpasswd_path: /etc/zot/htpasswd +zot_service_name: zot +zot_service_limits: + nofile: 500000 + memory_high: 30G + memory_max: 32G +zot_http_address: 0.0.0.0 +zot_http_port: 5000 +zot_tls_cert_path: "" +zot_tls_key_path: "" +zot_log_level: info +zot_sync_enabled: false +zot_sync_registries: + - urls: + - https://mirror.gcr.io/library + onDemand: true + maxRetries: 3 + retryDelay: 5m + pollInterval: 6h + - urls: + - https://docker.io/library + onDemand: true +zot_auth_users: [] +zot_verify_config: false diff --git a/roles/vhosts/zot/tasks/main.yml b/roles/vhosts/zot/tasks/main.yml new file mode 100644 index 0000000..1dab2dd --- /dev/null +++ b/roles/vhosts/zot/tasks/main.yml @@ -0,0 +1,102 @@ +- name: Ensure zot group exists + ansible.builtin.group: + name: "{{ zot_group }}" + when: inventory_hostname in groups[group] + +- name: Ensure zot user exists + ansible.builtin.user: + name: "{{ zot_user }}" + group: "{{ zot_group }}" + create_home: false + shell: /usr/sbin/nologin + when: inventory_hostname in groups[group] + +- name: Ensure zot configuration directory exists + ansible.builtin.file: + path: "{{ zot_config_dir }}" + state: directory + owner: root + group: root + mode: '0755' + when: inventory_hostname in groups[group] + +- name: Ensure zot data directory exists + ansible.builtin.file: + path: "{{ zot_data_dir }}" + state: directory + owner: "{{ zot_user }}" + group: "{{ zot_group }}" + mode: '0755' + when: inventory_hostname in groups[group] + +- name: Ensure zot log directory exists + ansible.builtin.file: + path: "{{ zot_log_dir }}" + state: directory + owner: "{{ zot_user }}" + group: "{{ zot_group }}" + mode: '0755' + when: inventory_hostname in groups[group] + +- name: Download zot binary + ansible.builtin.get_url: + url: "{{ zot_binary_url }}" + dest: "{{ zot_binary_path }}" + mode: '0755' + when: inventory_hostname in groups[group] + +- name: Ensure htpasswd file exists + ansible.builtin.file: + path: "{{ zot_htpasswd_path }}" + state: touch + owner: "{{ zot_user }}" + group: "{{ zot_group }}" + mode: '0640' + when: (inventory_hostname in groups[group]) and (zot_auth_users | length > 0) + +- name: Configure local authentication users + community.general.htpasswd: + path: "{{ zot_htpasswd_path }}" + name: "{{ item.name }}" + password: "{{ item.password }}" + crypt_scheme: bcrypt + mode: '0640' + owner: "{{ zot_user }}" + group: "{{ zot_group }}" + loop: "{{ zot_auth_users }}" + when: (inventory_hostname in groups[group]) and (zot_auth_users | length > 0) + +- name: Install zot configuration + ansible.builtin.template: + src: config.json.j2 + dest: "{{ zot_config_path }}" + mode: '0644' + owner: root + group: root + when: inventory_hostname in groups[group] + +- name: Install zot systemd service + ansible.builtin.template: + src: zot.service.j2 + dest: "/etc/systemd/system/{{ zot_service_name }}.service" + mode: '0644' + owner: root + group: root + when: inventory_hostname in groups[group] + +- name: Verify zot configuration + ansible.builtin.command: + cmd: "{{ zot_binary_path }} verify {{ zot_config_path }}" + become: true + become_user: "{{ zot_user }}" + register: zot_verify_result + changed_when: false + when: (inventory_hostname in groups[group]) and zot_verify_config + +- name: Enable and start zot service + ansible.builtin.systemd: + name: "{{ zot_service_name }}" + enabled: true + state: restarted + daemon_reload: true + when: inventory_hostname in groups[group] diff --git a/roles/vhosts/zot/templates/config.json.j2 b/roles/vhosts/zot/templates/config.json.j2 new file mode 100644 index 0000000..77bdb46 --- /dev/null +++ b/roles/vhosts/zot/templates/config.json.j2 @@ -0,0 +1,45 @@ +{ + "distSpecVersion": "1.1.0", + "storage": { + "rootDirectory": "{{ zot_data_dir }}" + }, + "http": { + "address": "{{ zot_http_address }}", + "port": "{{ zot_http_port }}"{% if zot_tls_cert_path and zot_tls_key_path %}, + "tls": { + "cert": "{{ zot_tls_cert_path }}", + "key": "{{ zot_tls_key_path }}" + }{% endif %}{% if zot_auth_users | length > 0 %}, + "auth": { + "htpasswd": { + "path": "{{ zot_htpasswd_path }}" + } + }, + "accessControl": { + "repositories": { + "**": { + "policies": [ + { + "users": ["*"], + "actions": ["read"] + }, + { + "users": ["{{ zot_user }}"], + "actions": ["*"] + } + ] + } + } + }{% endif %} + }, + "log": { + "level": "{{ zot_log_level }}", + "output": "{{ zot_log_dir }}/zot.log" + }, + "extensions": { + "sync": { + "enable": {{ zot_sync_enabled | string | lower }}, + "registries": {{ zot_sync_registries | to_json }} + } + } +} diff --git a/roles/vhosts/zot/templates/zot.service.j2 b/roles/vhosts/zot/templates/zot.service.j2 new file mode 100644 index 0000000..7b25ba0 --- /dev/null +++ b/roles/vhosts/zot/templates/zot.service.j2 @@ -0,0 +1,17 @@ +[Unit] +Description=OCI Distribution Registry (zot) +Documentation=https://zotregistry.dev/ +After=network.target auditd.service local-fs.target + +[Service] +Type=simple +User={{ zot_user }} +Group={{ zot_group }} +ExecStart={{ zot_binary_path }} serve {{ zot_config_path }} +Restart=on-failure +LimitNOFILE={{ zot_service_limits.nofile }} +MemoryHigh={{ zot_service_limits.memory_high }} +MemoryMax={{ zot_service_limits.memory_max }} + +[Install] +WantedBy=multi-user.target diff --git a/scripts/Fetch_packages_depends.sh b/scripts/Fetch_packages_depends.sh new file mode 100644 index 0000000..7b71600 --- /dev/null +++ b/scripts/Fetch_packages_depends.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +# 定义要查询的包列表 +PACKAGES=(plasma-desktop dolphin konsole chromium sddm) + +# 输出依赖关系的文件 +DEP_FILE="kde_dependencies.txt" +SRPM_FILE="kde_srpm_list.txt" + +# 清空旧文件 +> "$DEP_FILE" +> "$SRPM_FILE" + +# 递归获取依赖项的函数 +get_dependencies() { + local package="$1" + echo "查询 $package 的依赖关系..." + local dependencies=$(dnf repoquery --requires --resolve "$package" 2>/dev/null) + + for dep in $dependencies; do + # 避免重复写入 + if ! grep -q "^$dep$" "$DEP_FILE"; then + echo "$dep" | tee -a "$DEP_FILE" + get_dependencies "$dep" + fi + done +} + +# 遍历所有初始包 +for pkg in "${PACKAGES[@]}"; do + echo "$pkg" | tee -a "$DEP_FILE" + get_dependencies "$pkg" + echo "------------------------------------------------------" +done + +# 统计最终的依赖包数量 +TOTAL_PACKAGES=$(wc -l < "$DEP_FILE") +echo "总计依赖包数量: $TOTAL_PACKAGES" + +# 获取所有包的 SRPM +while read -r pkg; do + srpm=$(dnf repoquery --source "$pkg" 2>/dev/null) + if [ -n "$srpm" ]; then + echo "$srpm" | tee -a "$SRPM_FILE" + fi +done < "$DEP_FILE" + +# 统计 SRPM 数量 +TOTAL_SRPM=$(wc -l < "$SRPM_FILE") +echo "总计 SRPM 包数量: $TOTAL_SRPM" + +# 下载所有 SRPM 包 +dnf download --source $(cat "$SRPM_FILE") --setopt=install_weak_deps=False + +echo "依赖包列表已保存到 $DEP_FILE" +echo "SRPM 包列表已保存到 $SRPM_FILE" +echo "所有 SRPM 包下载完成" + diff --git a/scripts/Jenkinsfile b/scripts/Jenkinsfile new file mode 100644 index 0000000..5c0cbfb --- /dev/null +++ b/scripts/Jenkinsfile @@ -0,0 +1,27 @@ +pipeline { + agent any + stages { + stage('Build') { + steps { + sh 'ansible-lint' + } + } + stage('Pre Setup') { + steps { + sh "echo \"${secrets.ANSIBLE_SSH_PASSWORD}\" > ~/.vault_pass.txt" + sh "echo 'ansible_password: \'xxxx\'' >> inventory/group_vars/all.yml" + sh "echo 'ansible_become_password: \'xxxx\'' >> inventory/group_vars/all.yml" + } + } + stage('Deploy') { + steps { + sh "ansible-playbook -u ${secrets.ANSIBLE_SSH_USER} -i inventor.ini -kK playbooks/server.yml -l ${params.instance_name} -e 'ign_install_ver=${params.install_version}' --vault-password-file .vault_pass.txt --diff" + } + } + stage('Postsetup') { + steps { + echo "Todo" + } + } + } +} diff --git a/scripts/ansible_playbook_hosts_setup.sh b/scripts/ansible_playbook_hosts_setup.sh new file mode 100644 index 0000000..14a574a --- /dev/null +++ b/scripts/ansible_playbook_hosts_setup.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +# Function to check if a variable is empty +check_empty() { + if [ -z "${!1}" ]; then + echo "$1 is empty. Aborting." + exit 1 + fi +} + +# List of variables to check +variables=("SSH_USER" "SSH_HOST_IP" "SSH_HOST_DOMAIN" "SSH_PRIVATE_KEY") + +# Loop through variables and check if each one is empty +for var in "${variables[@]}"; do + check_empty "$var" +done + +mkdir -pv ~/.ssh/ +cat > ~/.ssh/id_rsa << EOF +$SSH_PRIVATE_KEY +EOF +sudo chmod 0400 ~/.ssh/id_rsa +md5sum ~/.ssh/id_rsa + +mkdir -pv hosts/ + +cat > hosts/inventory << EOF +[master] +$SSH_HOST_DOMAIN ansible_host=$SSH_HOST_IP + +[all:vars] +ansible_port=22 +ansible_ssh_user=$SSH_USER +ansible_ssh_private_key_file=~/.ssh/id_rsa +ansible_host_key_checking=False +ingress_ip=$SSH_HOST_IP +EOF + +cat hosts/inventory diff --git a/scripts/argo_application-demo.yaml b/scripts/argo_application-demo.yaml new file mode 100644 index 0000000..31719b5 --- /dev/null +++ b/scripts/argo_application-demo.yaml @@ -0,0 +1,22 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: postgresql + namespace: itsm-dev-db +spec: + project: postgresql + destination: + namespace: itsm-dev-db + server: https://k3s.onwalk.net + source: + path: apps/postgresql + repoURL: https://github.com/svc-design/gitops.git + targetRevision: HEAD + plugin: + name: kustomized-helm + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=false diff --git a/scripts/argocd_all_in_one.sh b/scripts/argocd_all_in_one.sh new file mode 100644 index 0000000..f9f5fc2 --- /dev/null +++ b/scripts/argocd_all_in_one.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +# 添加 Argo CD 的 Helm 仓库 +helm repo add argo https://argoproj.github.io/argo-helm +helm repo update + +# 使用 Helm 部署 Argo CD +helm install argocd argo/argo-cd -n argocd --create-namespace + +# 等待 Argo CD 完全启动 +echo "Waiting for Argo CD to be ready..." +kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=argocd-server -n argocd --timeout=180s + +# 创建 Argo CD Application 配置文件 +cat < argocd-application.yaml +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: helmfile-application + namespace: argocd +spec: + project: default + source: + repoURL: <你的 Git 仓库 URL> + path: <存放 Helmfile 的路径> + targetRevision: HEAD + helm: + releaseName: helmfile-application + destination: + server: https://kubernetes.default.svc + namespace: default + syncPolicy: + automated: + prune: true + selfHeal: true +EOF + +# 替换占位符为实际值 +sed -i 's|<你的 Git 仓库 URL>|你的实际 Git 仓库 URL|g' argocd-application.yaml +sed -i 's|<存放 Helmfile 的路径>|你的实际 Helmfile 路径|g' argocd-application.yaml + +# 应用 Argo CD Application 配置 +kubectl apply -f argocd-application.yaml + +echo "Argo CD deployment and configuration complete." diff --git a/scripts/artifact/setup-harbor.sh b/scripts/artifact/setup-harbor.sh new file mode 100644 index 0000000..92aa232 --- /dev/null +++ b/scripts/artifact/setup-harbor.sh @@ -0,0 +1,66 @@ +helm repo add harbor https://helm.goharbor.io +helm repo update +kubectl create ns harbor || true +kubectl create secret tls harbor-secret --key=/etc/ssl/onwalk.net.key --cert=/etc/ssl/onwalk.net.pem -n harbor +cat > harbor-arm-config.yaml << EOF +expose: + type: ingress + tls: + enabled: true + certSource: secret + secret: + secretName: harbor-secret + notarySecretName: harbor-secret + ingress: + hosts: + core: harbor.onwalk.net + notary: artifact-notary.onwalk.net + className: "nginx" +externalURL: https://harbor.onwalk.net +nginx: + image: + repository: images.onwalk.net/public/goharbor/nginx-photon + tag: v2.12.0 +portal: + image: + repository: images.onwalk.net/public/goharbor/harbor-portal + tag: v2.12.0 +core: + image: + repository: images.onwalk.net/public/goharbor/harbor-core + tag: v2.12.0 +jobservice: + image: + repository: images.onwalk.net/public/goharbor/harbor-jobservice + tag: v2.12.0 +registry: + registry: + image: + repository: images.onwalk.net/public/goharbor/registry-photon + tag: v2.12.0 + controller: + image: + repository: images.onwalk.net/public/goharbor/harbor-registryctl + tag: v2.12.0 +trivy: + enabled: true + image: + repository: images.onwalk.net/public/goharbor/trivy-adapter-photon + tag: v2.12.0 +database: + type: internal + internal: + image: + repository: images.onwalk.net/public/goharbor/harbor-db + tag: v2.12.0 +redis: + type: internal + internal: + image: + repository: images.onwalk.net/public/goharbor/redis-photon + tag: v2.12.0 +exporter: + image: + repository: images.onwalk.net/public/goharbor/harbor-exporter +EOF +helm upgrade --install harbor harbor/harbor -f harbor-arm-config.yaml -n harbor diff --git a/scripts/backup_docker_registry_secret.sh b/scripts/backup_docker_registry_secret.sh new file mode 100644 index 0000000..685b26a --- /dev/null +++ b/scripts/backup_docker_registry_secret.sh @@ -0,0 +1,21 @@ +#!/bin/sh + +check_not_empty() { + if [[ -z $1 ]]; then + echo "Error: $2 is empty. Please provide a value." + exit 1 + fi +} + +function backup_docker_registry_secret() +{ + + # 检查参数是否为空 + check_not_empty "$1" "cluster" && local cluster=$1 + check_not_empty "$2" "namespace" && local namespace=$2 + check_not_empty "$3" "secret" && local secret=$3 + + mkdir -pv ~/Backups/ + kubectl config set-context --current --namespace $namespace + kubectl get secret $secret -n $namespace -o yaml > ~/Backups/$cluster-$namespace-$secret.yaml +} diff --git a/scripts/check_docker_registry_secret.sh b/scripts/check_docker_registry_secret.sh new file mode 100644 index 0000000..cd8c1f2 --- /dev/null +++ b/scripts/check_docker_registry_secret.sh @@ -0,0 +1,33 @@ +#!/bin/sh + +check_not_empty() { + if [[ -z $1 ]]; then + echo "Error: $2 is empty. Please provide a value." + exit 1 + fi +} + +function run() { + check_not_empty "$1" "cluster" && local cluster=$1 + check_not_empty "$2" "namespace" && local namespace=$2 + + kubectl config set-context --current --namespace $namespace + + for secret in $(kubectl get secrets -n $namespace -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' | grep '\.dockerconfigjson$'); do + echo "$cluster $namespace $secret" + done +} + +function print_base64_data() { + local namespace=$1 + local secret=$2 + local cluster=$3 + echo "$cluster $namespace $secret" + kubectl get secret $secret -n $namespace --output="jsonpath={.data.\.dockerconfigjson}" | base64 --decode || true +} + +cluster="$1" +namespace="$2" + +run "$cluster" "$namespace" +print_base64_data "$namespace" "$secret" "$cluster" diff --git a/scripts/deepflow-agent-batch-tools-v1.0.sh b/scripts/deepflow-agent-batch-tools-v1.0.sh new file mode 100644 index 0000000..26f3d57 --- /dev/null +++ b/scripts/deepflow-agent-batch-tools-v1.0.sh @@ -0,0 +1,265 @@ +#!/bin/bash +# FIX config vtap-group-id-request 20250612-15:10 + +set -e + +#################################### +# 🌐 配置区 +#################################### + +IP_LIST="./ip.list" +SERVICE_NAME="deepflow-agent" +PKG_DIR="deepflow-agent-for-linux" +MAX_PARALLEL=5 + +CONTROLLER_IP="" +VTAP_GROUP_ID="" +LIMIT="" + +SSH_OPTS="-o StrictHostKeyChecking=no -o ConnectTimeout=15" + +FAILED_FILE="failed_hosts.txt" +SUCCESS_FILE="success_hosts.txt" +> "$FAILED_FILE" +> "$SUCCESS_FILE" + +#################################### +# 参数解析 +#################################### + +if [[ $# -eq 0 ]]; then + echo "用法: $0 {deploy|upgrade|verify} --controller --group [--limit ip1,ip2]" + exit 1 +fi + +ACTION="$1" +shift + +while [[ $# -gt 0 ]]; do + case "$1" in + --controller) + CONTROLLER_IP="$2" + shift 2 + ;; + --group) + VTAP_GROUP_ID="$2" + shift 2 + ;; + --limit) + LIMIT="$2" + shift 2 + ;; + *) + echo "未知参数: $1" + exit 1 + ;; + esac +done + +if [[ "$ACTION" != "deploy" && "$ACTION" != "upgrade" && "$ACTION" != "verify" ]]; then + echo "用法: $0 {deploy|upgrade|verify} --controller --group [--limit ip1,ip2]" + exit 1 +fi + +if [[ "$ACTION" != "verify" && ( -z "$CONTROLLER_IP" || -z "$VTAP_GROUP_ID" ) ]]; then + echo "❗ deploy/upgrade 必须传入 --controller 和 --group 参数" + exit 1 +fi + +#################################### +# 核心函数 +#################################### + +worker() { + local ip="$1" + local user="$2" + local pass="$3" + + echo "🔧 [$ACTION] 处理主机 $ip ($user)" + + if [[ "$ACTION" == "verify" ]]; then + verify_agent "$ip" "$user" "$pass" && { + echo "$ip" >> "$SUCCESS_FILE" + return + } || { + echo "$ip" >> "$FAILED_FILE" + return + } + fi + + remote_info=$(fetch_remote_info "$ip" "$user" "$pass") || { + echo "❌ $ip 获取远程信息失败" + echo "$ip" >> "$FAILED_FILE" + return + } + + arch=$(echo "$remote_info" | cut -d'|' -f1) + init=$(echo "$remote_info" | cut -d'|' -f2) + + if [[ "$init" == "unknown" ]]; then + echo "❌ $ip 不支持的初始化系统: $init" + echo "$ip" >> "$FAILED_FILE" + return + fi + + pkg_path=$(choose_agent_package "$arch" "$init") + + if [[ "$pkg_path" == "UNSUPPORTED" ]]; then + echo "❌ $ip 无匹配安装包: $arch/$init" + echo "$ip" >> "$FAILED_FILE" + return + fi + + install_agent "$ip" "$user" "$pass" "$pkg_path" && update_config "$ip" "$user" "$pass" && { + echo "✅ $ip $ACTION 完成" + echo "$ip" >> "$SUCCESS_FILE" + } || { + echo "❌ $ip 安装或配置失败" + echo "$ip" >> "$FAILED_FILE" + } + + echo "-------------------------------------------" +} + +fetch_remote_info() { + local ip="$1" user="$2" pass="$3" + + sshpass -p "$pass" ssh $SSH_OPTS "$user@$ip" bash <<'EOF' +arch=$(uname -m) +case "$arch" in + aarch64|arm64) arch="arm" ;; + *) arch="x86" ;; +esac + +if command -v systemctl >/dev/null; then init=systemd; +elif command -v initctl >/dev/null; then init=upstart; +else init=unknown; fi + +echo "${arch}|${init}" +EOF +} + +choose_agent_package() { + local arch="$1" init="$2" + + shopt -s nullglob + + declare -a patterns + + if [[ "$arch" == "arm" ]]; then + patterns=("$PKG_DIR"/deepflow-agent-*.$init.aarch64.*) + else + patterns=("$PKG_DIR"/deepflow-agent-*.$init-x86.* \ + "$PKG_DIR"/deepflow-agent-*.$init.*) + fi + + files=() + + for pattern in "${patterns[@]}"; do + for file in $pattern; do + files+=("$file") + done + done + + if [[ ${#files[@]} -gt 0 ]]; then + latest=$(printf "%s\n" "${files[@]}" | sort -V | tail -1) + echo "🎯 选择安装包: $latest" >&2 + echo "$latest" + else + echo "UNSUPPORTED" + fi +} + +install_agent() { + local ip="$1" user="$2" pass="$3" pkg_path="$4" + local remote_pkg="/tmp/agent.${pkg_path##*.}" + + sshpass -p "$pass" scp $SSH_OPTS "$pkg_path" "$user@$ip:$remote_pkg" + + sshpass -p "$pass" ssh $SSH_OPTS "$user@$ip" bash </dev/null; then SUDO="sudo"; else SUDO=""; fi + +if [[ "$remote_pkg" == *.rpm ]]; then + \$SUDO rpm -Uvh --replacepkgs "$remote_pkg" +elif [[ "$remote_pkg" == *.deb ]]; then + \$SUDO dpkg -i "$remote_pkg" || \$SUDO apt-get install -f -y +else + echo "❌ 不支持的安装包格式" + exit 1 +fi + +if command -v systemctl &>/dev/null; then + \$SUDO systemctl enable $SERVICE_NAME + \$SUDO systemctl restart $SERVICE_NAME +elif command -v service &>/dev/null; then + \$SUDO service $SERVICE_NAME restart + \$SUDO chkconfig $SERVICE_NAME on +elif command -v initctl &>/dev/null; then + \$SUDO initctl restart $SERVICE_NAME || \$SUDO initctl start $SERVICE_NAME +else + echo "❌ 无法识别服务管理方式" +fi +EOF +} + +update_config() { + local ip="$1" user="$2" pass="$3" + sshpass -p "$pass" ssh $SSH_OPTS "$user@$ip" bash </dev/null; then SUDO="sudo"; else SUDO=""; fi +CONFIG_FILE="/etc/deepflow-agent.yaml" +\$SUDO mkdir -p \$(dirname \$CONFIG_FILE) +cat </dev/null +controller-ips: + - $CONTROLLER_IP +vtap-group-id-request: "$VTAP_GROUP_ID" +CFG +\$SUDO chmod 644 "\$CONFIG_FILE" +\$SUDO chown root:root "\$CONFIG_FILE" +EOF +} + +verify_agent() { + local ip="$1" user="$2" pass="$3" + echo "🔍 $ip 状态检查:" + sshpass -p "$pass" ssh $SSH_OPTS "$user@$ip" " + systemctl is-active $SERVICE_NAME 2>/dev/null || \ + service $SERVICE_NAME status || \ + initctl status $SERVICE_NAME + " +} + +#################################### +# 并发控制主逻辑 +#################################### + +sem(){ + while [[ $(jobs -r | wc -l) -ge $MAX_PARALLEL ]]; do + sleep 0.5 + done +} + +while read -r ip user pass; do + if [[ -n "$LIMIT" ]]; then + IFS=',' read -ra LIMIT_IPS <<< "$LIMIT" + skip=true + for lim_ip in "${LIMIT_IPS[@]}"; do + [[ "$ip" == "$lim_ip" ]] && skip=false + done + $skip && continue + fi + + sem + worker "$ip" "$user" "$pass" & +done < "$IP_LIST" + +wait + +TOTAL_SUCCESS=$(wc -l < "$SUCCESS_FILE") +TOTAL_FAIL=$(wc -l < "$FAILED_FILE") + +echo "🎯 全部任务执行完成: 成功 $TOTAL_SUCCESS 台,失败 $TOTAL_FAIL 台" +if [[ -s "$FAILED_FILE" ]]; then + echo "❗ 失败主机列表已保存: $FAILED_FILE" +fi diff --git a/scripts/deepflow/backup_images_v6.3-20250309-17.json b/scripts/deepflow/backup_images_v6.3-20250309-17.json new file mode 100755 index 0000000..c6a35c4 --- /dev/null +++ b/scripts/deepflow/backup_images_v6.3-20250309-17.json @@ -0,0 +1,435 @@ +{ + "version": "v6.3-20250309-17", + "items": [ + { + "kind": "Deployment", + "name": "acl-controller-deployment", + "containers": [ + { + "name": "deepflow-acl-controller", + "image": "images.onwalk.net/private/deepflow-v6.3/acl-controller:v6.3.179" + } + ] + }, + { + "kind": "Deployment", + "name": "alarm-deployment", + "containers": [ + { + "name": "deepflow-alarm", + "image": "images.onwalk.net/private/deepflow-v6.3/alarm:v6.3.686" + } + ] + }, + { + "kind": "Deployment", + "name": "cerebro-deployment", + "containers": [ + { + "name": "deepflow-cerebro", + "image": "images.onwalk.net/private/deepflow-v6.3/cerebro:0.9.0" + } + ] + }, + { + "kind": "Deployment", + "name": "deepflow-app", + "containers": [ + { + "name": "deepflow-app", + "image": "images.onwalk.net/private/deepflow-v6.3/deepflow-app:v6.3.120" + } + ] + }, + { + "kind": "Deployment", + "name": "df-help-deployment", + "containers": [ + { + "name": "df-help", + "image": "images.onwalk.net/private/deepflow-v6.3/df-help:v6.3.1086" + } + ] + }, + { + "kind": "Deployment", + "name": "df-web-core-deployment", + "containers": [ + { + "name": "df-web-core", + "image": "images.onwalk.net/private/deepflow-v6.3/df-web-qiankun-core:v6.3.9969" + } + ] + }, + { + "kind": "Deployment", + "name": "df-web-deployment", + "containers": [ + { + "name": "df-web", + "image": "images.onwalk.net/private/deepflow-v6.3/df-web-service:v6.3.580" + } + ] + }, + { + "kind": "Deployment", + "name": "df-web-metrics-explore-deployment", + "containers": [ + { + "name": "df-web-metrics-explore", + "image": "images.onwalk.net/private/deepflow-v6.3/df-web-metrics-explore:v6.3.4318" + } + ] + }, + { + "kind": "Deployment", + "name": "diagnose-deployment", + "containers": [ + { + "name": "deepflow-diagnose", + "image": "images.onwalk.net/private/deepflow-v6.3/diagnose:v6.3.103" + } + ] + }, + { + "kind": "Deployment", + "name": "fauths-deployment-deployment", + "containers": [ + { + "name": "fauths", + "image": "images.onwalk.net/private/deepflow-v6.3/fauths:v6.3.452" + } + ] + }, + { + "kind": "Deployment", + "name": "fpermit-deployment", + "containers": [ + { + "name": "fpermit", + "image": "images.onwalk.net/private/deepflow-v6.3/fpermit:v6.3.254" + } + ] + }, + { + "kind": "Deployment", + "name": "fuser-deployment", + "containers": [ + { + "name": "fuser", + "image": "images.onwalk.net/private/deepflow-v6.3/fuser:v6.3.328" + } + ] + }, + { + "kind": "Deployment", + "name": "grafana-deployment", + "containers": [ + { + "name": "deepflow-grafana", + "image": "images.onwalk.net/private/deepflow-v6.3/grafana:9.2.4" + } + ] + }, + { + "kind": "Deployment", + "name": "kibana-deployment", + "containers": [ + { + "name": "deepflow-kibana", + "image": "images.onwalk.net/private/deepflow-v6.3/kibana:6.8.8" + } + ] + }, + { + "kind": "Deployment", + "name": "manager-deployment", + "containers": [ + { + "name": "deepflow-manager", + "image": "images.onwalk.net/private/deepflow-v6.3/manager:v6.3.684" + } + ] + }, + { + "kind": "Deployment", + "name": "masterdeepflow-server", + "containers": [ + { + "name": "deepflow-server", + "image": "images.onwalk.net/private/deepflow-v6.3/deepflow-server:v6.3.4211" + } + ] + }, + { + "kind": "Deployment", + "name": "mntnct-deployment", + "containers": [ + { + "name": "deepflow-mntnct", + "image": "images.onwalk.net/private/deepflow-v6.3/mntnct:v6.3.1317" + } + ] + }, + { + "kind": "Deployment", + "name": "monitor-deployment", + "containers": [ + { + "name": "deepflow-monitor", + "image": "images.onwalk.net/private/deepflow-v6.3/monitor:v6.3.174" + } + ] + }, + { + "kind": "Deployment", + "name": "mysql-deployment", + "containers": [ + { + "name": "deepflow-mysql", + "image": "images.onwalk.net/private/deepflow-v6.3/mysql-server:8.0.26" + } + ] + }, + { + "kind": "Deployment", + "name": "opensource-endpoints-operator", + "containers": [ + { + "name": "endpoints-operator", + "image": "images.onwalk.net/private/deepflow-v6.3/endpoints-operator:0.2.1" + } + ] + }, + { + "kind": "Deployment", + "name": "pcap-deployment", + "containers": [ + { + "name": "pcap", + "image": "images.onwalk.net/private/deepflow-v6.3/pcap:v6.3.188" + } + ] + }, + { + "kind": "Deployment", + "name": "postman-deployment", + "containers": [ + { + "name": "deepflow-postman", + "image": "images.onwalk.net/private/deepflow-v6.3/postman:v6.3.54" + } + ] + }, + { + "kind": "Deployment", + "name": "querier-js-deployment", + "containers": [ + { + "name": "deepflow-querier-js", + "image": "images.onwalk.net/private/deepflow-v6.3/querier-js:v6.3.264" + } + ] + }, + { + "kind": "Deployment", + "name": "rabbitmq-deployment", + "containers": [ + { + "name": "deepflow-rabbitmq", + "image": "images.onwalk.net/private/deepflow-v6.3/rabbitmq:3.10.25" + } + ] + }, + { + "kind": "Deployment", + "name": "redis-deployment", + "containers": [ + { + "name": "deepflow-redis", + "image": "images.onwalk.net/private/deepflow-v6.3/redis:6.2.6" + } + ] + }, + { + "kind": "Deployment", + "name": "report-deployment", + "containers": [ + { + "name": "deepflow-report", + "image": "images.onwalk.net/private/deepflow-v6.3/report:v6.3.247" + } + ] + }, + { + "kind": "Deployment", + "name": "statistics-deployment", + "containers": [ + { + "name": "deepflow-statistics", + "image": "images.onwalk.net/private/deepflow-v6.3/statistics:v6.3.2082" + } + ] + }, + { + "kind": "Deployment", + "name": "talker-deployment", + "containers": [ + { + "name": "deepflow-talker", + "image": "images.onwalk.net/private/deepflow-v6.3/talker:v6.3.2958" + } + ] + }, + { + "kind": "Deployment", + "name": "warrant-deployment", + "containers": [ + { + "name": "deepflow-warrant", + "image": "images.onwalk.net/private/deepflow-v6.3/warrant:v6.3.81" + } + ] + }, + { + "kind": "Deployment", + "name": "web-sched-deployment", + "containers": [ + { + "name": "web-sched", + "image": "images.onwalk.net/private/deepflow-v6.3/df-web-sched:v6.3.103" + } + ] + }, + { + "kind": "Deployment", + "name": "web-tools-deployment", + "containers": [ + { + "name": "web-tools", + "image": "images.onwalk.net/private/deepflow-v6.3/web-tools:v6.3.122" + } + ] + }, + { + "kind": "Deployment", + "name": "webssh-deployment", + "containers": [ + { + "name": "webssh", + "image": "images.onwalk.net/private/deepflow-v6.3/webssh:v6.3.22" + } + ] + }, + { + "kind": "StatefulSet", + "name": "masterdeepflow-clickhouse", + "containers": [ + { + "name": "clickhouse", + "image": "images.onwalk.net/private/deepflow-v6.3/clickhouse-server:22.8.20.11" + } + ] + }, + { + "kind": "StatefulSet", + "name": "opensource-loki", + "containers": [ + { + "name": "loki", + "image": "images.onwalk.net/private/deepflow-v6.3/loki:2.4.2" + } + ] + }, + { + "kind": "DaemonSet", + "name": "check", + "containers": [ + { + "name": "deepflow-check", + "image": "images.onwalk.net/private/deepflow-v6.3/mntnct:v6.3.1317" + } + ] + }, + { + "kind": "DaemonSet", + "name": "dedicated-agent", + "containers": [ + { + "name": "dedicated-agent", + "image": "images.onwalk.net/private/deepflow-v6.3/deepflow-agent:v6.3.4400" + } + ] + }, + { + "kind": "DaemonSet", + "name": "deepflow-agent", + "containers": [ + { + "name": "deepflow-agent", + "image": "images.onwalk.net/private/deepflow-v6.3/deepflow-agent:v6.3.4211" + } + ] + }, + { + "kind": "DaemonSet", + "name": "elasticsearch-daemonset", + "containers": [ + { + "name": "deepflow-elasticsearch", + "image": "images.onwalk.net/private/deepflow-v6.3/elasticsearch:6.8.8" + } + ] + }, + { + "kind": "DaemonSet", + "name": "front-end-daemonset", + "containers": [ + { + "name": "front-end", + "image": "images.onwalk.net/private/deepflow-v6.3/apientry:v6.3.193" + } + ] + }, + { + "kind": "DaemonSet", + "name": "log-cleaner-daemonset", + "containers": [ + { + "name": "deepflow-log-cleaner", + "image": "images.onwalk.net/private/deepflow-v6.3/log-cleaner:v6.3.11" + } + ] + }, + { + "kind": "DaemonSet", + "name": "opensource-promtail", + "containers": [ + { + "name": "promtail", + "image": "images.onwalk.net/private/deepflow-v6.3/promtail:2.4.2" + } + ] + }, + { + "kind": "DaemonSet", + "name": "telegraf-daemonset", + "containers": [ + { + "name": "deepflow-telegraf", + "image": "images.onwalk.net/private/deepflow-v6.3/telegraf:1.14.1.12" + } + ] + }, + { + "kind": "CronJob", + "name": "database-backup", + "containers": [ + { + "name": "database-backup", + "image": "images.onwalk.net/private/deepflow-v6.3/mntnct:v6.3.1317" + } + ] + } + ] +} diff --git a/scripts/deepflow/check_k8s_node_config.sh b/scripts/deepflow/check_k8s_node_config.sh new file mode 100755 index 0000000..9db95ea --- /dev/null +++ b/scripts/deepflow/check_k8s_node_config.sh @@ -0,0 +1,215 @@ + +#!/bin/bash + +# 获取操作系统信息 +get_os_info() { + if [ -f /etc/os-release ]; then + . /etc/os-release + OS_NAME=$NAME + OS_VERSION=$VERSION_ID + elif type lsb_release >/dev/null 2>&1; then + OS_NAME=$(lsb_release -si) + OS_VERSION=$(lsb_release -sr) + else + OS_NAME=$(uname -s) + OS_VERSION=$(uname -r) + fi + echo "当前操作系统: $OS_NAME $OS_VERSION" +} + +# 检查 DNS 解析 +check_dns() { + echo "检查 DNS 解析配置..." + dns_config=$(grep "nameserver" /etc/resolv.conf) + if [[ -n "$dns_config" && "$dns_config" != *"127.0.0.1"* ]]; then + echo "✅ DNS 解析配置正确" + else + echo "❌ DNS 解析配置错误,未设置或包含127.0.0.1" + operations+="\n1. 编辑 /etc/resolv.conf,配置有效的 nameserver,如 114.114.114.114" + fi +} + +# 检查主机名配置 +check_hostname() { + echo "检查主机名配置..." + hostname=$(hostname) + if [[ "$hostname" != *"local"* && "$hostname" != *"_"* && ${#hostname} -le 64 ]]; then + echo "✅ 主机名配置正确:$hostname" + else + echo "❌ 主机名配置不符合要求:$hostname" + operations+="\n2. 修改主机名为合法值,使用 hostnamectl set-hostname 命令" + fi + + # 检查 /etc/hosts 是否包含主机名解析 + hosts_file=$(cat /etc/hosts) + if [[ "$hosts_file" == *"$hostname"* ]]; then + echo "✅ /etc/hosts 中包含主机名解析" + else + echo "❌ /etc/hosts 中未找到主机名解析" + operations+="\n3. 修改 /etc/hosts,添加主机名解析" + fi +} + +# 检查数据盘挂载 +check_disk_mount() { + echo "检查数据盘挂载..." + lsblk_output=$(lsblk) + df_output=$(df -hT) + # 检查是否挂载 /mnt 目录 + if [[ "$df_output" == *"/mnt"* ]]; then + echo "✅ 数据盘已挂载到 /mnt" + # 打印 /mnt 的大小 + mnt_size=$(df -h | grep '/mnt' | awk '{print $2}') + echo "当前 /mnt 大小: $mnt_size" + else + echo "❌ 数据盘未挂载到 /mnt" + operations+="\n4. 挂载数据盘到 /mnt" + fi + + # 检查 /etc/fstab 中是否包含自动挂载配置 + fstab_config=$(grep "/mnt" /etc/fstab) + if [[ -n "$fstab_config" ]]; then + echo "✅ /etc/fstab 中包含数据盘自动挂载配置" + else + echo "❌ /etc/fstab 中未找到数据盘自动挂载配置" + operations+="\n5. 在 /etc/fstab 中添加自动挂载配置" + fi +} + +# 检查免密登录配置 +check_ssh_key() { + echo "检查免密登录配置..." + ssh_config_dir="/root/.ssh" + if [[ -d "$ssh_config_dir" && -f "$ssh_config_dir/authorized_keys" ]]; then + echo "✅ 已配置免密登录" + else + echo "❌ 未配置免密登录" + operations+="\n6. 配置免密登录:使用 ssh-keygen 和 ssh-copy-id 配置公钥免密登录" + fi +} + +# 检查 swap 状态 +check_swap() { + echo "检查 swap 缓存..." + swap_status=$(swapon --show) + if [[ -z "$swap_status" ]]; then + echo "✅ swap 已关闭" + else + echo "❌ swap 未关闭" + operations+="\n7. 关闭 swap:执行 swapoff -a 并删除 /etc/fstab 中的 swap 条目" + fi +} + +# 检查防火墙状态 +check_firewall() { + echo "检查防火墙状态..." + if [[ "$OS_NAME" == "CentOS" || "$OS_NAME" == "RedHat" ]]; then + firewalld_status=$(systemctl is-active firewalld) + if [[ "$firewalld_status" == "inactive" ]]; then + echo "✅ 防火墙已关闭" + else + echo "❌ 防火墙未关闭" + operations+="\n8. 停止防火墙并禁用:执行 systemctl stop firewalld 和 systemctl disable firewalld" + fi + else + ufw_status=$(ufw status | grep "Status" | awk '{print $2}') + if [[ "$ufw_status" == "inactive" ]]; then + echo "✅ 防火墙已关闭" + else + echo "❌ 防火墙未关闭" + operations+="\n8. 停止防火墙并禁用:执行 ufw disable" + fi + fi +} + +# 检查 SELinux 或 AppArmor 状态 +check_security() { + echo "检查 SELinux 或 AppArmor 状态..." + if [[ "$OS_NAME" == "CentOS" || "$OS_NAME" == "RedHat" ]]; then + selinux_status=$(getenforce) + if [[ "$selinux_status" == "Disabled" ]]; then + echo "✅ SELinux 已禁用" + else + echo "❌ SELinux 未禁用" + operations+="\n9. 禁用 SELinux:执行 setenforce 0 并修改 /etc/selinux/config" + fi + elif [[ "$OS_NAME" == "Ubuntu" ]]; then + apparmor_status=$(systemctl is-active apparmor) + if [[ "$apparmor_status" == "inactive" ]]; then + echo "✅ AppArmor 已禁用" + else + echo "❌ AppArmor 未禁用" + operations+="\n9. 禁用 AppArmor:执行 systemctl stop apparmor 并禁用 systemctl disable apparmor" + fi + else + echo "❌ 无法识别 SELinux 或 AppArmor 状态" + operations+="\n9. SELinux 或 AppArmor 状态检查适用于 CentOS/RedHat 或 Ubuntu 系统" + fi +} + +# 检查 IPV4 流量转发 +check_ip_forward() { + echo "检查 IPV4 流量转发..." + ipv4_forward_status=$(sysctl net.ipv4.ip_forward | grep -o "net.ipv4.ip_forward = 1") + if [[ -n "$ipv4_forward_status" ]]; then + echo "✅ IPV4 流量转发已开启" + else + echo "❌ IPV4 流量转发未开启" + operations+="\n10. 开启 IPV4 流量转发:执行 echo 'net.ipv4.ip_forward=1' >> /etc/sysctl.conf && sysctl -p" + fi + # 打印 /etc/sysctl.conf 中 ip_forward 配置 + ip_forward_config=$(grep "net.ipv4.ip_forward" /etc/sysctl.conf) + echo "当前 /etc/sysctl.conf 中的 IPV4 流量转发配置:$ip_forward_config" +} + +# 检查操作系统连接数限制 +check_conn_limit() { + echo "检查操作系统级别连接数限制..." + + # 获取 ulimit 输出 + ulimit_output=$(ulimit -a) + + # 获取 nofile 和 nproc 配置的值 + nofile_limit=$(ulimit -n) + nproc_limit=$(ulimit -u) + + # 检查 nofile 和 nproc 是否为 1048576 + if [[ "$nofile_limit" -eq 1048576 && "$nproc_limit" -eq 1048576 ]]; then + echo "✅ 系统连接数限制配置正确: nofile = $nofile_limit, nproc = $nproc_limit" + else + echo "❌ 系统连接数限制配置错误" + echo " 当前 nofile = $nofile_limit, nproc = $nproc_limit" + operations+="\n11. 修改连接数限制:编辑 /etc/security/limits.conf 文件并配置 nofile 和 nproc 为 1048576" + fi + + # 检查 /etc/security/limits.conf 文件中的 root 连接数限制配置 + limits_config=$(grep -E "root\s+soft\s+nofile\s+1048576|root\s+hard\s+nofile\s+1048576|root\s+soft\s+nproc\s+1048576|root\s+hard\s+nproc\s+1048576" /etc/security/limits.conf) + if [[ -z "$limits_config" ]]; then + echo "❌ /etc/security/limits.conf 中未设置正确的连接数限制" + operations+="\n12. 请检查 /etc/security/limits.conf 中是否配置了以下项:\nroot soft nofile 1048576\nroot hard nofile 1048576\nroot soft nproc 1048576\nroot hard nproc 1048576" + else + echo "✅ /etc/security/limits.conf 中的关键配置项:" + echo "$limits_config" + fi +} + +# 统一列出检查结果 +operations="" +get_os_info +check_dns +check_hostname +check_disk_mount +check_ssh_key +check_swap +check_firewall +check_security +check_ip_forward +check_conn_limit + +echo -e "\n检查完成。" + +if [[ -n "$operations" ]]; then + echo -e "未通过的检查项及建议操作:$operations" +else + echo "所有检查项通过!" +fi diff --git a/scripts/deepflow/clean-failed-pods.sh b/scripts/deepflow/clean-failed-pods.sh new file mode 100755 index 0000000..ac35ddf --- /dev/null +++ b/scripts/deepflow/clean-failed-pods.sh @@ -0,0 +1,14 @@ +!/bin/bash +# 脚本名称: clean-failed-pods.sh +# 作用: 删除指定命名空间中非 Running 状态的 Pod + +# 定义需要处理的命名空间 +NAMESPACES=("deepflow" "openebs" "kube-system") + +# 遍历命名空间 +for NAMESPACE in "${NAMESPACES[@]}"; do + echo "正在删除 $NAMESPACE 命名空间中非 Running 状态的 Pod..." + kubectl get pods -n $NAMESPACE | grep -v Running | awk 'NR>1 {print $1}' | xargs kubectl delete pod -n $NAMESPACE --force + kubectl delete jobs --all -n $NAMESPACE + echo "$NAMESPACE 命名空间清理完成!" +done diff --git a/scripts/deepflow/deepflow-server-master-controller-pre.sh b/scripts/deepflow/deepflow-server-master-controller-pre.sh new file mode 100755 index 0000000..1a99089 --- /dev/null +++ b/scripts/deepflow/deepflow-server-master-controller-pre.sh @@ -0,0 +1,20 @@ +k8s_node=`kubectl get nodes | awk 'NR>1{print $1}'` + +kubectl label node $k8s_node master_controller=enable +kubectl label node $k8s_node tsdb=enable +kubectl label node $k8s_node dfdb=enable +kubectl label node $k8s_node elasticsearch-warm=enable + +kubectl describe node | grep Taint +kubectl taint nodes $k8s_node node-role.kubernetes.io/control-plane:NoSchedule- + + +kubectl label nodes $k8s_node slave_controller- + +kubectl get node --show-labels + +mkdir -p /usr/local/deepflow +mount -o ro deepflow-docker-release-v6.5-242.iso /media + +rsync -av /media/* /usr/local/deepflow/ +ln -sv /usr/local/deepflow/registry /var/lib/registry -f || rsync -av /usr/local/deepflow/registry/* /var/lib/registry/ diff --git a/scripts/deepflow/deepflow-server-slave-controller-pre.sh b/scripts/deepflow/deepflow-server-slave-controller-pre.sh new file mode 100755 index 0000000..fb5ea7f --- /dev/null +++ b/scripts/deepflow/deepflow-server-slave-controller-pre.sh @@ -0,0 +1,20 @@ + +k8s_node=`sudo kubectl get nodes | awk 'NR>1{print $1}'` + +sudo kubectl label node $k8s_node slave_controller=enable +sudo kubectl label node $k8s_node tsdb=enable +sudo kubectl label node $k8s_node dfdb=enable +sudo kubectl label node $k8s_node elasticsearch-warm=enable + +kubectl describe node | grep Taint +kubectl taint nodes $k8s_node node-role.kubernetes.io/control-plane:NoSchedule- + +kubectl label nodes $k8s_node master_controller- + +kubectl get node --show-labels + +mkdir -p /usr/local/deepflow +mount -o ro deepflow-docker-release-v6.5-242.iso /media + +rsync -av /media/* /usr/local/deepflow/ +rsync -av /usr/local/deepflow/registry/* /var/lib/registry/ diff --git a/scripts/deepflow/deepflow_k8s_backup.sh b/scripts/deepflow/deepflow_k8s_backup.sh new file mode 100755 index 0000000..07ad09d --- /dev/null +++ b/scripts/deepflow/deepflow_k8s_backup.sh @@ -0,0 +1,161 @@ +#!/bin/bash + +NAMESPACE="deepflow" +VERSION_PREFIX="v6.3" +TIMESTAMP=$(date +"%Y%m%d-%H") +BACKUP_FILE="backup_images_${VERSION_PREFIX}-${TIMESTAMP}.json" + +# 备份 deepflow 命名空间的 Kubernetes 资源镜像信息 +backup_images() { + echo "🔄 开始备份 deepflow 命名空间的 Kubernetes 资源镜像信息..." + + kubectl get deployments,statefulsets,daemonsets,cronjobs -n "$NAMESPACE" -o json | jq ' + { + version: "'${VERSION_PREFIX}-${TIMESTAMP}'", + items: [ + .items[] | select(.spec != null) | { + kind: .kind, + name: .metadata.name, + containers: ( + if .kind == "CronJob" then + [.spec.jobTemplate.spec.template.spec.containers[]? | {name: .name, image: .image}] + else + [.spec.template.spec.containers[]? | {name: .name, image: .image}] + end + ) + } + ] + }' > "$BACKUP_FILE" + + if [[ -f "$BACKUP_FILE" ]]; then + echo "✅ 备份成功!文件路径: $BACKUP_FILE" + echo "📋 备份内容预览(前10行):" + head -n 10 "$BACKUP_FILE" + else + echo "❌ 备份失败,请检查 Kubernetes 访问权限!" + exit 1 + fi +} + +# 校验当前 Kubernetes 资源是否与备份文件一致 +check_images() { + if [[ ! -f "$1" ]]; then + echo "❌ 错误: 备份文件 $1 不存在!请先运行备份。" + exit 1 + fi + + echo "🔍 正在校验当前 Kubernetes 资源与备份文件是否一致..." + + CURRENT_IMAGES=$(kubectl get deployments,statefulsets,daemonsets,cronjobs -n "$NAMESPACE" -o json | jq ' + { + items: [ + .items[] | select(.spec != null) | { + kind: .kind, + name: .metadata.name, + containers: ( + if .kind == "CronJob" then + [.spec.jobTemplate.spec.template.spec.containers[]? | {name: .name, image: .image}] + else + [.spec.template.spec.containers[]? | {name: .name, image: .image}] + end + ) + } + ] + }') + + BACKUP_IMAGES=$(cat "$1") + + MATCH_COUNT=0 + MISMATCH_COUNT=0 + + echo "$BACKUP_IMAGES" | jq -c '.items[]' | while read -r backup_item; do + kind=$(echo "$backup_item" | jq -r '.kind') + name=$(echo "$backup_item" | jq -r '.name') + + echo "📌 检查 $kind/$name ..." + + backup_containers=$(echo "$backup_item" | jq -c '.containers[]?') + current_containers=$(echo "$CURRENT_IMAGES" | jq -c --arg name "$name" '.items[] | select(.name == $name) | .containers[]?') + + for backup_container in $backup_containers; do + container_name=$(echo "$backup_container" | jq -r '.name') + backup_image=$(echo "$backup_container" | jq -r '.image') + + current_image=$(echo "$current_containers" | jq -r --arg container_name "$container_name" 'select(.name == $container_name) | .image') + + if [[ "$backup_image" == "$current_image" ]]; then + echo " ✅ $container_name 镜像匹配: $backup_image" + ((MATCH_COUNT++)) + else + echo " ❌ $container_name 镜像不匹配: 期望 $backup_image,当前 $current_image" + ((MISMATCH_COUNT++)) + fi + done + done + + echo "📊 校验结果: ✅ 匹配 $MATCH_COUNT 项, ❌ 不匹配 $MISMATCH_COUNT 项" + + if [[ $MISMATCH_COUNT -eq 0 ]]; then + echo "✅ 校验通过!当前运行的镜像版本与备份一致。" + else + echo "❌ 校验失败!请检查上方输出。" + fi +} + +# 恢复 deepflow 命名空间的 Kubernetes 资源镜像 +restore_images() { + if [[ ! -f "$1" ]]; then + echo "❌ 错误: 备份文件 $1 不存在!请先运行备份。" + exit 1 + fi + + echo "🔄 开始恢复 deepflow 命名空间的 Kubernetes 资源镜像..." + + cat "$1" | jq -c '.items[]' | while read -r item; do + kind=$(echo "$item" | jq -r '.kind') + name=$(echo "$item" | jq -r '.name') + + echo "📌 处理 $kind/$name ..." + + containers=$(echo "$item" | jq -c '.containers[]?') + for container in $containers; do + container_name=$(echo "$container" | jq -r '.name') + image=$(echo "$container" | jq -r '.image') + + echo " 🔄 更新容器: $container_name -> 镜像: $image" + kubectl set image -n "$NAMESPACE" "$kind/$name" "$container_name=$image" --record + if [[ $? -eq 0 ]]; then + echo " ✅ 更新成功!" + else + echo " ❌ 更新失败!请检查日志。" + fi + done + done + + echo "✅ 恢复完成!" +} + +# 解析命令参数 +case "$1" in + backup) + backup_images + ;; + check) + if [[ -z "$2" ]]; then + echo "❌ 错误: 需要提供备份文件路径!示例: $0 check backup_images_v6.3-20250309-17.json" + exit 1 + fi + check_images "$2" + ;; + restore) + if [[ -z "$2" ]]; then + echo "❌ 错误: 需要提供备份文件路径!示例: $0 restore backup_images_v6.3-20250309-17.json" + exit 1 + fi + restore_images "$2" + ;; + *) + echo "📌 用法: $0 {backup|check <备份文件>|restore <备份文件>}" + exit 1 + ;; +esac diff --git a/scripts/deepflow/deploy-k8s.sh b/scripts/deepflow/deploy-k8s.sh new file mode 100755 index 0000000..9b73d80 --- /dev/null +++ b/scripts/deepflow/deploy-k8s.sh @@ -0,0 +1,22 @@ + +# 新建部署目录,并解压安装包到该目录 + +mkdir /opt/k8s-deploy && tar -xvpf sealos-amd64-k8s-1.25.16.tar.gz -C /opt/k8s-deploy +cd /opt/k8s-deploy/ && \ +cp sealos helm calicoctl nerdctl /usr/bin/ && \ +chmod +x /usr/bin/sealos /usr/bin/helm /usr/bin/calicoctl /usr/bin/nerdctl + +# 导入离线镜像 +sealos load -i sealos-calico.tar +sealos load -i sealos-helm.tar +sealos load -i sealos-k8s-1.25.16.tar + +# 单机部署(单机部署无需ssh密码,root用户本机直接执行即可) +# 根据节点 IP 所在地区自动选择拉取镜像的仓库 +REGISTRY_PREFIX=$(dirname "$0")/../playbooks/roles/vhosts/gpu-k8s/files/get_labring_registry.sh +REGISTRY_PREFIX=$("$REGISTRY_PREFIX") +sealos run \ + ${REGISTRY_PREFIX}/kubernetes:v1.25.16 \ + ${REGISTRY_PREFIX}/helm:v3.9.4 \ + ${REGISTRY_PREFIX}/calico:v3.24.1 \ + --single diff --git a/scripts/deepflow/df-web-ai-push-all.sh b/scripts/deepflow/df-web-ai-push-all.sh new file mode 100644 index 0000000..8889bdc --- /dev/null +++ b/scripts/deepflow/df-web-ai-push-all.sh @@ -0,0 +1,83 @@ +#!/bin/bash +# df-web-ai-push-all.sh +# 从 -.multi.tar (OCI) 逐个 load → retag → push 到目标仓库(multi-arch) +set -euo pipefail + +LOCAL_REG="${LOCAL_REG:-sealos.hub:5000}" +NERDCTL_BIN="${NERDCTL_BIN:-nerdctl}" +NERDCTL_NS="${NERDCTL_NS:-}" # 如 "k8s.io" +NC="${NERDCTL_BIN} ${NERDCTL_NS:+-n ${NERDCTL_NS}}" + +usage() { + cat < + LOCAL_REG=sealos.hub:5000 $0 + +说明: + - 对每个 tar: + 1) 解析 index.json,取 org.opencontainers.image.ref.name 作为源引用名(SRC_REF) + 2) nerdctl load -i + 3) 将 SRC_REF 重打为 \${LOCAL_REG}/ + 4) nerdctl push \${LOCAL_REG}/ # multi-arch 一次性推送 +EOF +} + +if [[ "${1:-}" =~ ^-h|--help$ ]]; then usage; exit 0; fi + +${NC} login "${LOCAL_REG}" || true + +shopt -s nullglob +TARS=(*.multi.tar) +shopt -u nullglob +[[ ${#TARS[@]} -gt 0 ]] || { echo "⚠️ 未找到 *.multi.tar"; exit 0; } + +get_src_ref_from_tar() { + # 从 OCI index.json 提取 ref.name;优先 manifest 注解,其次 index 注解 + local tar="$1" + local ref="" + ref=$(tar -xOf "$tar" index.json 2>/dev/null | \ + jq -r ' + .manifests[0].annotations["org.opencontainers.image.ref.name"] + // .annotations["org.opencontainers.image.ref.name"] + // empty + ') + echo -n "$ref" +} + +for TAR in "${TARS[@]}"; do + echo + echo "==> Processing $TAR" + + SRC_REF="$(get_src_ref_from_tar "$TAR")" + if [[ -z "$SRC_REF" ]]; then + # 回退:用文件名 -.multi.tar 推导 name:tag + BASE="$(basename "$TAR" .multi.tar)" + if [[ "$BASE" != *:* ]]; then + echo "❌ 无法从 $TAR 提取 SRC_REF,且文件名不含 格式。请改名或使用包含 ref.name 的归档。" + exit 2 + fi + SRC_REF="$BASE" + echo "ℹ️ 未找到 ref.name,使用文件名推导的源引用:$SRC_REF" + else + echo "SRC_REF: $SRC_REF" + fi + + NAME_TAG="${SRC_REF##*/}" # 仅保留 + DEST="${LOCAL_REG}/${NAME_TAG}" + echo "DEST: $DEST" + + echo "==> Load $TAR" + ${NC} load -i "$TAR" + + echo "==> Tag $SRC_REF -> $DEST" + ${NC} tag "$SRC_REF" "$DEST" + + echo "==> Push $DEST (multi-arch)" + ${NC} push "$DEST" + + echo "✅ DONE: $DEST" +done + +echo +echo "All done. (multi-arch push)" diff --git a/scripts/deepflow/images.txt b/scripts/deepflow/images.txt new file mode 100644 index 0000000..14d7646 --- /dev/null +++ b/scripts/deepflow/images.txt @@ -0,0 +1,6 @@ +dfcloud-image-registry-vpc.cn-beijing.cr.aliyuncs.com/dev/df-analyze:latest.515 +dfcloud-image-registry-vpc.cn-beijing.cr.aliyuncs.com/dev/df-web-agent:latest.2986036 +dfcloud-image-registry-vpc.cn-beijing.cr.aliyuncs.com/dev/df-web-composer:latest.2984869 +dfcloud-image-registry-vpc.cn-beijing.cr.aliyuncs.com/dev/apientry:latest.268 +hub.deepflow.yunshan.net/dev/alpine:latest +hub.deepflow.yunshan.net/dev/weaviate:1.30.0 diff --git a/scripts/deepflow/pull-all-v6.4.sh b/scripts/deepflow/pull-all-v6.4.sh new file mode 100644 index 0000000..c2dbe75 --- /dev/null +++ b/scripts/deepflow/pull-all-v6.4.sh @@ -0,0 +1,40 @@ +for image in \ +kube.registry.local:5000/acl-controller:v6.4.182 \ +kube.registry.local:5000/alarm:v6.4.703 \ +kube.registry.local:5000/cerebro:0.9.0 \ +kube.registry.local:5000/deepflow-agent:v6.4.4729 \ +kube.registry.local:5000/deepflow-app:v6.4.178 \ +kube.registry.local:5000/df-help:v6.4.1211 \ +kube.registry.local:5000/df-web-qiankun-core:v6.4.11721 \ +kube.registry.local:5000/df-web-service:v6.4.647 \ +kube.registry.local:5000/df-web-metrics-explore:v6.4.5342 \ +kube.registry.local:5000/df-env:v6.4.884 \ +kube.registry.local:5000/fauths:v6.4.482 \ +kube.registry.local:5000/fpermit:v6.4.278 \ +kube.registry.local:5000/apientry:v6.4.210 \ +kube.registry.local:5000/fuser:v6.4.356 \ +kube.registry.local:5000/grafana-agent:v0.38.0 \ +kube.registry.local:5000/grafana-agent-reload:v0.8.0 \ +kube.registry.local:5000/deepflow-init-grafana-ce:latest \ +kube.registry.local:5000/kibana:6.8.8 \ +kube.registry.local:5000/kube-rbac-proxy:v0.14.0 \ +kube.registry.local:5000/kube-state-metrics:v2.9.2 \ +kube.registry.local:5000/manager:v6.4.695 \ +kube.registry.local:5000/mntnct:v6.4.1320 \ +kube.registry.local:5000/mysql-server:8.0.39 \ +kube.registry.local:5000/pcap:v6.4.194 \ +kube.registry.local:5000/postman:v6.4.55 \ +kube.registry.local:5000/querier-js:v6.4.303 \ +kube.registry.local:5000/rabbitmq:3.10.25 \ +kube.registry.local:5000/redis:7.0.12 \ +kube.registry.local:5000/report:v6.4.267 \ +kube.registry.local:5000/statistics:v6.4.2171 \ +kube.registry.local:5000/talker:v6.4.2987 \ +kube.registry.local:5000/warrant:v6.4.88 \ +kube.registry.local:5000/df-web-sched:v6.4.213 \ +kube.registry.local:5000/web-tools:v6.4.231 \ +kube.registry.local:5000/webssh:v6.4.25 +do + echo "🔄 Pulling $image ..." + nerdctl --insecure-registry -n k8s.io pull "$image" +done diff --git a/scripts/deepflow/pull_save_scp_image_multi_arch.sh b/scripts/deepflow/pull_save_scp_image_multi_arch.sh new file mode 100644 index 0000000..94e19e7 --- /dev/null +++ b/scripts/deepflow/pull_save_scp_image_multi_arch.sh @@ -0,0 +1,181 @@ +#!/usr/bin/env bash +# deepflow/pull_save_scp_image_multi_arch.sh +# 远端:multi-arch pull(优先 --all-platforms,回退逐平台) +# -> image convert (--oci --all-platforms) 到临时本地引用 +# -> save -o /tmp/-.multi.tar (docker-archive) +# -> scp 回本地 +set -euo pipefail + +REMOTE_HOST="${REMOTE_HOST:-root@10.1.3.179}" +DEST_DIR="${DEST_DIR:-$HOME/Desktop}" +REMOTE_TMPDIR="${REMOTE_TMPDIR:-/tmp}" +RM_REMOTE="${RM_REMOTE:-0}" + +REMOTE_NERDCTL="${REMOTE_NERDCTL:-nerdctl}" +REMOTE_NERDCTL_NS="${REMOTE_NERDCTL_NS:-}" # 例如 "k8s.io" +REMOTE_NC="${REMOTE_NERDCTL} ${REMOTE_NERDCTL_NS:+-n ${REMOTE_NERDCTL_NS}}" + +PLATFORMS_DEFAULT="linux/amd64,linux/arm64" +PLATFORMS="${PLATFORMS:-$PLATFORMS_DEFAULT}" + +usage() { + cat < [image2 ...] [--rm-remote] + $0 -f images.txt [--rm-remote] + +流程(远端): + 1) ${REMOTE_NC} pull --all-platforms # 不支持则逐平台 --platform + 2) ${REMOTE_NC} image convert --oci --all-platforms + 3) ${REMOTE_NC} save -o ${REMOTE_TMPDIR}/-.multi.tar + 4) scp 回本地 ${DEST_DIR} + +环境变量: + REMOTE_HOST, DEST_DIR, REMOTE_TMPDIR, REMOTE_NERDCTL, REMOTE_NERDCTL_NS, PLATFORMS, RM_REMOTE +EOF +} + +# ---------- 参数解析 ---------- +IMAGES=() +LIST_FILE="" +if [[ $# -eq 0 ]]; then usage; exit 1; fi + +ARGS=() +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) usage; exit 0 ;; + --rm-remote) RM_REMOTE=1; shift ;; + -f) + [[ $# -ge 2 ]] || { echo "❌ 缺少镜像清单文件"; exit 1; } + LIST_FILE="$2"; shift 2 ;; + *) ARGS+=("$1"); shift ;; + esac +done + +if [[ -n "$LIST_FILE" ]]; then + [[ -f "$LIST_FILE" ]] || { echo "❌ 文件不存在: $LIST_FILE"; exit 1; } + while IFS= read -r line; do + line="${line%%#*}"; line="$(echo -n "$line" | xargs || true)" + [[ -n "$line" ]] || continue + IMAGES+=("$line") + done < "$LIST_FILE" +fi +if [[ ${#ARGS[@]} -gt 0 ]]; then IMAGES+=("${ARGS[@]}"); fi +[[ ${#IMAGES[@]} -gt 0 ]] || { echo "❌ 没有可处理的镜像"; exit 1; } + +echo "🖥️ 远端: $REMOTE_HOST" +echo "📂 本地目录: $DEST_DIR" +echo "🧭 命名空间: ${REMOTE_NERDCTL_NS:-}" +echo "🧹 rm-remote: $([[ $RM_REMOTE -eq 1 ]] && echo ON || echo OFF)" +echo "🧩 回退平台: $PLATFORMS" +mkdir -p "$DEST_DIR" + +# ---------- 预检查 ---------- +ssh -o BatchMode=yes "$REMOTE_HOST" "command -v ${REMOTE_NERDCTL} >/dev/null" \ + || { echo "❌ 远端未安装 ${REMOTE_NERDCTL}"; exit 1; } +ssh -o BatchMode=yes "$REMOTE_HOST" "test -d ${REMOTE_TMPDIR}" \ + || { echo "❌ 远端临时目录不存在: ${REMOTE_TMPDIR}"; exit 1; } + +REMOTE_SUPPORTS_ALL_PLATFORMS=0 +if ssh -o BatchMode=yes "$REMOTE_HOST" "${REMOTE_NC} pull --help 2>/dev/null | grep -q -- '--all-platforms'"; then + REMOTE_SUPPORTS_ALL_PLATFORMS=1 +fi + +# ---------- 工具函数 ---------- +rand_suffix() { LC_ALL=C tr -dc a-z0-9 /dev/null 2>&1 || true + fi' RETURN + + # 1) 拉取多架构 + if [[ $REMOTE_SUPPORTS_ALL_PLATFORMS -eq 1 ]]; then + ssh -o BatchMode=yes "$REMOTE_HOST" \ + "set -euo pipefail; ${REMOTE_NC} pull --all-platforms $Q_IMAGE" + else + echo "ℹ️ 远端不支持 --all-platforms,逐平台拉取: $PLATFORMS" + IFS=, read -r -a arr <<< "$PLATFORMS" + for p in "${arr[@]}"; do + local QP; QP=$(printf %q "$p") + ssh -o BatchMode=yes "$REMOTE_HOST" \ + "set -euo pipefail; ${REMOTE_NC} pull --platform=$QP $Q_IMAGE" + done + fi + + # 2) 转为 OCI(到临时本地引用),确保包含所有平台 + ssh -o BatchMode=yes "$REMOTE_HOST" \ + "set -euo pipefail; ${REMOTE_NC} image convert --oci --all-platforms $Q_IMAGE $Q_TARGET" + + # 3) 保存为 docker-archive TAR + ssh -o BatchMode=yes "$REMOTE_HOST" \ + "set -euo pipefail; ${REMOTE_NC} save -o $Q_TAR $Q_TARGET" + + # 4) 回传 + scp -q "$REMOTE_HOST:$REMOTE_TAR" "$DEST_PATH" + + # 5) 清理 + if [[ $RM_REMOTE -eq 1 ]]; then + ssh -o BatchMode=yes "$REMOTE_HOST" "rm -f $Q_TAR" + fi + ssh -o BatchMode=yes "$REMOTE_HOST" "${REMOTE_NC} rmi -f $Q_TARGET" >/dev/null 2>&1 || true + + CLEAN_ON_FAILURE=0 + trap - RETURN + echo "✅ OK: $DEST_PATH (docker-archive, multi-arch)" + echo " 加载:nerdctl load -i \"$DEST_PATH\"" + echo " 基本校验:tar tf \"$DEST_PATH\" | egrep 'manifest.json|repositories' | sed -n '1,5p'" + echo " 平台确认(加载后):nerdctl image inspect \"$TARGET_REF\" --mode=native | jq '.[0].Manifest.Manifests[].Platform'" +} + +for img in "${IMAGES[@]}"; do + process_image "$img" +done + +echo +echo "🎉 全部 multi-arch 导出完成。" diff --git a/scripts/deepflow/setup-agent-all-in-one.sh b/scripts/deepflow/setup-agent-all-in-one.sh new file mode 100644 index 0000000..55ec98a --- /dev/null +++ b/scripts/deepflow/setup-agent-all-in-one.sh @@ -0,0 +1,194 @@ +#!/bin/bash +set -e + +cat << EOF > values-custom.yaml +deepflowServerNodeIPS: +- 10.50.1.111 +#deepflowK8sClusterID: "fffffff" # FIXME: K8s ClusterID +image: + repository: hub.deepflow.yunshan.net/public/deepflow-agent + pullPolicy: Always + tag: v6.5 +EOF + +helm repo add deepflow https://deepflowio.github.io/deepflow +helm repo update deepflow # use `helm repo update` when helm < 3.7.0 +helm install deepflow-agent -n deepflow deepflow/deepflow-agent --create-namespace -f values-custom.yaml + +######################################################################################################## + +helm repo add grafana https://grafana.github.io/helm-charts +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update +kubectl create ns deepflow || true + +helm upgrade --install kube-state-metrics prometheus-community/kube-state-metrics \ + --namespace deepflow --create-namespace + +helm upgrade --install node-exporter prometheus-community/prometheus-node-exporter \ + --namespace deepflow --create-namespace \ + --set service.type=ClusterIP \ + --set service.port=9100 + +cat > grafana-agent-values.yaml << EOF +global: + image: + registry: "images.onwalk.net/public" +agent: + mode: 'static' + configMap: + create: true + content: '' +logs: + enabled: false +traces: + enabled: false +EOF + +helm upgrade --install grafana-agent grafana/grafana-agent --namespace deepflow -f grafana-agent-values.yaml + +cat > grafana-agent-configmap.yaml << EOF +apiVersion: v1 +data: + config.yaml: |- + server: + log_level: info + log_format: logfmt + metrics: + global: + scrape_interval: 1m + configs: + - name: agent + scrape_configs: + - job_name: kube-state-metrics + static_configs: + - targets: ['10.43.155.169:8080'] + - job_name: node-metrics + static_configs: + - targets: ['10.43.68.133:9100'] + remote_write: + - url: http://deepflow-agent.deepflow.svc.cluster.local/api/v1/prometheus +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: grafana-agent + meta.helm.sh/release-namespace: deepflow + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.42.0 + helm.sh/chart: grafana-agent-0.42.0 + name: grafana-agent + namespace: deepflow +EOF + +kubectl apply -f grafana-agent-configmap.yaml + +kubectl get pods -n deepflow + +######################################################################################################## + + +helm repo add vector https://helm.vector.dev +helm repo update +cat << EOF > vector-values-custom.yaml +role: Agent +#nodeSelector: +# allow/vector: "false" + +# resources -- Set Vector resource requests and limits. +resources: + requests: + cpu: 200m + memory: 256Mi + limits: + cpu: 200m + memory: 256Mi +image: + repository: images.onwalk.net/public/timberio/vector + pullPolicy: Always + tag: "0.37.1-distroless-libc" +podLabels: + vector.dev/exclude: "true" + app: deepflow +# extraVolumes -- Additional Volumes to use with Vector Pods. + # extraVolumes: + # - name: opt-log + # hostPath: + # path: "/opt/log/" +# extraVolumeMounts -- Additional Volume to mount into Vector Containers. + # extraVolumeMounts: + # - name: opt-log + # mountPath: "/opt/log/" + # readOnly: true +customConfig: + ## The configuration comes from https://vector.dev/docs/reference/configuration/global-options/#data_dir + data_dir: /vector-data-dir + api: + enabled: true + address: 127.0.0.1:8686 + playground: false + sources: + kubernetes_logs: + type: kubernetes_logs + namespace_annotation_fields: + namespace_labels: "" + node_annotation_fields: + node_labels: "" + pod_annotation_fields: + pod_annotations: "" + pod_labels: "" + + transforms: + remap_kubernetes_logs: + type: remap + inputs: + - kubernetes_logs + source: |- + # try to parse json + if is_string(.message) && is_json(string!(.message)) { + tags = parse_json(.message) ?? {} + .message = tags.message # FIXME: the log content key inside json + del(tags.message) + .json = tags + } + + if !exists(.level) { + if exists(.json) { + .level = .json.level + del(.json.level) + } else { + # match log levels surround by ``[]`` or ``<>`` with ignore case + level_tags = parse_regex(.message, r'[\[\\\<](?(?i)INFOR?(MATION)?|WARN(ING)?|DEBUG?|ERROR?|TRACE|FATAL|CRIT(ICAL)?)[\]\\\>]') ?? {} + if !exists(level_tags.level) { + # match log levels surround by whitespace, required uppercase strictly in case mismatching + level_tags = parse_regex(.message, r'[\s](?INFOR?(MATION)?|WARN(ING)?|DEBUG?|ERROR?|TRACE|FATAL|CRIT(ICAL)?)[\s]') ?? {} + } + if exists(level_tags.level) { + level_tags.level = upcase(string!(level_tags.level)) + .level = level_tags.level + } + } + } + + if !exists(._df_log_type) { + # default log type + ._df_log_type = "user" + } + + if !exists(.app_service) { + # FIXME: files 模块没有此字段,请通过日志内容注入应用名称 + .app_service = .kubernetes.container_name + } + sinks: + http: + encoding: + codec: json + inputs: + - remap_kubernetes_logs # NOTE: 注意这里数据源是 transform 模块的 key + type: http + uri: http://deepflow-agent.deepflow/api/v1/log +EOF +helm upgrade --install vector vector/vector --namespace deepflow --create-namespace -f vector-values-custom.yaml + diff --git a/scripts/deepflow/setup-deepflow-agent.sh b/scripts/deepflow/setup-deepflow-agent.sh new file mode 100644 index 0000000..4efa95f --- /dev/null +++ b/scripts/deepflow/setup-deepflow-agent.sh @@ -0,0 +1,13 @@ +cat << EOF > values-custom.yaml +deepflowServerNodeIPS: +- 10.50.1.111 +#deepflowK8sClusterID: "fffffff" # FIXME: K8s ClusterID +image: + repository: hub.deepflow.yunshan.net/public/deepflow-agent + pullPolicy: Always + tag: v6.5 +EOF + +helm repo add deepflow https://deepflowio.github.io/deepflow +helm repo update deepflow # use `helm repo update` when helm < 3.7.0 +helm install deepflow-agent -n deepflow deepflow/deepflow-agent --create-namespace -f values-custom.yaml diff --git a/scripts/deploy-open-webui.sh b/scripts/deploy-open-webui.sh new file mode 100644 index 0000000..3cb5fa5 --- /dev/null +++ b/scripts/deploy-open-webui.sh @@ -0,0 +1 @@ +helm upgrade --install --namespace ai open-webui open-webui/open-webui --set pipelines.enable==true --set ollama.enable==false diff --git a/scripts/deploy_deepflow_agent.sh b/scripts/deploy_deepflow_agent.sh new file mode 100644 index 0000000..4d16d41 --- /dev/null +++ b/scripts/deploy_deepflow_agent.sh @@ -0,0 +1,368 @@ +#!/bin/bash + +set -e + +#################################### +# ������ 配置区 +#################################### + +IP_LIST="./ip.list" +SERVICE_NAME="deepflow-agent" +PKG_DIR="deepflow-agent-for-linux" +MAX_PARALLEL=5 + +CONTROLLER_IP="" +VTAP_GROUP_ID="" +LIMIT="" +SUDO_MODE="sudo" # 可选: sudo | sudo-i + +SSH_OPTS="-o StrictHostKeyChecking=no -o ConnectTimeout=15" + +FAILED_FILE="failed_hosts.txt" +SUCCESS_FILE="success_hosts.txt" +> "$FAILED_FILE" +> "$SUCCESS_FILE" + +#################################### +# 参数解析(新增 --sudo-mode) +#################################### + +if [[ $# -eq 0 ]]; then + echo "用法: $0 {deploy|upgrade|verify} --controller --group [--limit ip1,ip2] [--sudo-mode sudo|sudo-i]" + exit 1 +fi + +ACTION="$1" +shift + +while [[ $# -gt 0 ]]; do + case "$1" in + --controller) + CONTROLLER_IP="$2" + shift 2 + ;; + --group) + VTAP_GROUP_ID="$2" + shift 2 + ;; + --limit) + LIMIT="$2" + shift 2 + ;; + --sudo-mode) + case "$2" in + sudo|sudo-i) + SUDO_MODE="$2" + shift 2 + ;; + *) + echo "❌ --sudo-mode 必须是 'sudo' 或 'sudo-i'" + exit 1 + ;; + esac + ;; + *) + echo "未知参数: $1" + exit 1 + ;; + esac +done + +if [[ "$ACTION" != "deploy" && "$ACTION" != "upgrade" && "$ACTION" != "verify" ]]; then + echo "用法: $0 {deploy|upgrade|verify} --controller --group [--limit ip1,ip2] [--sudo-mode sudo|sudo-i]" + exit 1 +fi + +if [[ "$ACTION" != "verify" && ( -z "$CONTROLLER_IP" || -z "$VTAP_GROUP_ID" ) ]]; then + echo "❗ deploy/upgrade 必须传入 --controller 和 --group 参数" + exit 1 +fi + +#################################### +# 核心函数(重点修改:SUDO 处理 + 重启逻辑) +#################################### + +worker() { + local ip="$1" + local user="$2" + local pass="$3" + + echo "������ [$ACTION] 处理主机 $ip ($user)" + + if [[ "$ACTION" == "verify" ]]; then + verify_agent "$ip" "$user" "$pass" && { + echo "$ip" >> "$SUCCESS_FILE" + return + } || { + echo "$ip" >> "$FAILED_FILE" + return + } + fi + + remote_info=$(fetch_remote_info "$ip" "$user" "$pass") || { + echo "❌ $ip 获取远程信息失败" + echo "$ip" >> "$FAILED_FILE" + return + } + + arch=$(echo "$remote_info" | cut -d'|' -f1) + init=$(echo "$remote_info" | cut -d'|' -f2) + + if [[ "$init" == "unknown" ]]; then + echo "❌ $ip 不支持的初始化系统: $init" + echo "$ip" >> "$FAILED_FILE" + return + fi + + pkg_path=$(choose_agent_package "$arch" "$init") + + if [[ "$pkg_path" == "UNSUPPORTED" ]]; then + echo "❌ $ip 无匹配安装包: $arch/$init" + echo "$ip" >> "$FAILED_FILE" + return + fi + + # 安装 + 配置 + if install_agent "$ip" "$user" "$pass" "$pkg_path" && update_config "$ip" "$user" "$pass"; then + # ✅ 配置完成后,再次重启服务,确保新配置生效 + restart_agent_service "$ip" "$user" "$pass" && { + echo "✅ $ip $ACTION 完成" + echo "$ip" >> "$SUCCESS_FILE" + } || { + echo "❌ $ip 服务重启失败" + echo "$ip" >> "$FAILED_FILE" + } + else + echo "❌ $ip 安装或配置失败" + echo "$ip" >> "$FAILED_FILE" + fi + + echo "-------------------------------------------" +} + +fetch_remote_info() { + local ip="$1" user="$2" pass="$3" + + sshpass -p "$pass" ssh $SSH_OPTS "$user@$ip" bash <<'EOF' +arch=$(uname -m) +case "$arch" in + aarch64|arm64) arch="arm" ;; + *) arch="x86" ;; +esac + +if command -v systemctl >/dev/null; then init=systemd; +elif command -v initctl >/dev/null; then init=upstart; +else init=unknown; fi + +echo "${arch}|${init}" +EOF +} + +choose_agent_package() { + local arch="$1" init="$2" + + shopt -s nullglob + + declare -a patterns + + if [[ "$arch" == "arm" ]]; then + patterns=("$PKG_DIR"/deepflow-agent-*.$init-arm.* \ + "$PKG_DIR"/deepflow-agent-*.$init-arm64.* \ + "$PKG_DIR"/deepflow-agent-*.$init-aarch64.*) + else + patterns=("$PKG_DIR"/deepflow-agent-*.$init-x86.* \ + "$PKG_DIR"/deepflow-agent-*.$init.*) + fi + + files=() + + for pattern in "${patterns[@]}"; do + for file in $pattern; do + files+=("$file") + done + done + + if [[ ${#files[@]} -gt 0 ]]; then + latest=$(printf "%s\n" "${files[@]}" | sort -V | tail -1) + echo "������ 选择安装包: $latest" >&2 + echo "$latest" + else + echo "UNSUPPORTED" + fi +} + +# ✅ 修改 install_agent:支持 sudo 和 sudo-i +install_agent() { + local ip="$1" user="$2" pass="$3" pkg_path="$4" + local remote_pkg="/tmp/agent.${pkg_path##*.}" + + sshpass -p "$pass" scp $SSH_OPTS "$pkg_path" "$user@$ip:$remote_pkg" || { + echo "❌ $ip 上传安装包失败" + return 1 + } + + # 构建 SUDO 前缀 + local SUDO_CMD="" + case "$SUDO_MODE" in + sudo) + SUDO_CMD="sudo" + ;; + sudo-i) + SUDO_CMD="sudo -i" + ;; + *) + SUDO_CMD="sudo" + ;; + esac + + sshpass -p "$pass" ssh $SSH_OPTS "$user@$ip" bash </dev/null; then SUDO="sudo"; else SUDO=""; fi + +# 使用指定模式 +SUDO_MODE_CMD='$SUDO_CMD' + +echo "������ 使用权限模式: \$SUDO_MODE_CMD" + +if [[ "$remote_pkg" == *.rpm ]]; then + \$SUDO_MODE_CMD rpm -Uvh --replacepkgs "$remote_pkg" +elif [[ "$remote_pkg" == *.deb ]]; then + \$SUDO_MODE_CMD dpkg -i "$remote_pkg" || \$SUDO_MODE_CMD apt-get install -f -y +else + echo "❌ 不支持的安装包格式" + exit 1 +fi + +# 服务管理(注意:sudo -i 下 systemctl 可能仍可用) +if command -v systemctl &>/dev/null; then + \$SUDO_MODE_CMD systemctl enable $SERVICE_NAME + \$SUDO_MODE_CMD systemctl restart $SERVICE_NAME +elif command -v service &>/dev/null; then + \$SUDO_MODE_CMD service $SERVICE_NAME restart + \$SUDO_MODE_CMD chkconfig $SERVICE_NAME on +elif command -v initctl &>/dev/null; then + \$SUDO_MODE_CMD initctl restart $SERVICE_NAME || \$SUDO_MODE_CMD initctl start $SERVICE_NAME +else + echo "❌ 无法识别服务管理方式" + exit 1 +fi +EOF +} + +# ✅ 修改 update_config:确保配置写入 /etc/ +update_config() { + local ip="$1" user="$2" pass="$3" + local SUDO_CMD="" + case "$SUDO_MODE" in + sudo) + SUDO_CMD="sudo" + ;; + sudo-i) + SUDO_CMD="sudo -i" + ;; + *) + SUDO_CMD="sudo" + ;; + esac + + sshpass -p "$pass" ssh $SSH_OPTS "$user@$ip" bash </dev/null +controller-ips: + - $CONTROLLER_IP +vtap-group-id: "$VTAP_GROUP_ID" +CFG + +\$SUDO_MODE_CMD chmod 644 "\$CONFIG_FILE" +\$SUDO_MODE_CMD chown root:root "\$CONFIG_FILE" +EOF +} + +# ✅ 新增函数:服务重启 + 状态检查 +restart_agent_service() { + local ip="$1" user="$2" pass="$3" + local SUDO_CMD="" + case "$SUDO_MODE" in + sudo) + SUDO_CMD="sudo" + ;; + sudo-i) + SUDO_CMD="sudo -i" + ;; + *) + SUDO_CMD="sudo" + ;; + esac + + sshpass -p "$pass" ssh $SSH_OPTS "$user@$ip" bash < /dev/null 2>&1; then + echo "❌ deepflow-agent.service 重启后未运行" + exit 1 +fi + +echo "✅ deepflow-agent.service 已成功重启" +EOF +} + +verify_agent() { + local ip="$1" user="$2" pass="$3" + echo "������ $ip 状态检查:" + sshpass -p "$pass" ssh $SSH_OPTS "$user@$ip" " + sudo systemctl is-active $SERVICE_NAME 2>/dev/null || \ + sudo service $SERVICE_NAME status || \ + sudo initctl status $SERVICE_NAME || \ + echo '⚠️ 服务状态未知' + " +} + +#################################### +# 并发控制主逻辑(不变) +#################################### + +sem(){ + while [[ $(jobs -r | wc -l) -ge $MAX_PARALLEL ]]; do + sleep 0.5 + done +} + +while read -r ip user pass; do + [[ -z "$ip" || "$ip" =~ ^# ]] && continue + + if [[ -n "$LIMIT" ]]; then + IFS=',' read -ra LIMIT_IPS <<< "$LIMIT" + skip=true + for lim_ip in "${LIMIT_IPS[@]}"; do + [[ "$ip" == "$lim_ip" ]] && skip=false + done + $skip && continue + fi + + sem + worker "$ip" "$user" "$pass" & +done < "$IP_LIST" + +wait + +TOTAL_SUCCESS=$(wc -l < "$SUCCESS_FILE") +TOTAL_FAIL=$(wc -l < "$FAILED_FILE") + +echo "������ 全部任务执行完成: 成功 $TOTAL_SUCCESS 台,失败 $TOTAL_FAIL 台" +if [[ -s "$FAILED_FILE" ]]; then + echo "❗ 失败主机列表已保存: $FAILED_FILE" +fi diff --git a/scripts/dynamic_inventory.py b/scripts/dynamic_inventory.py new file mode 100644 index 0000000..700ed89 --- /dev/null +++ b/scripts/dynamic_inventory.py @@ -0,0 +1,69 @@ +import os +import sys +from jinja2 import Template + +# Check if required environment variables are set +required_vars = ['SSH_USER', 'SSH_PRIVATE_KEY'] +for var in required_vars: + if var not in os.environ: + print(f"{var} is not set. Aborting.") + sys.exit(1) + +# Get the SSH_USER and SSH_PRIVATE_KEY from environment variables +ssh_user = os.environ['SSH_USER'] +ssh_private_key = os.environ['SSH_PRIVATE_KEY'] + +# Check if input is provided +if len(sys.argv) < 2: + print("No groups and nodes provided. Usage: python dynamic_inventory.py 'group_name:host_name:host_ip'") + sys.exit(1) + +# Parse input groups and hosts +input_data = sys.argv[1] +group_nodes = input_data.split() + +# Dictionary to hold groups and their hosts +groups = {} + +for group_node in group_nodes: + group, host_name, host_ip = group_node.split(':') + if group not in groups: + groups[group] = [] + groups[group].append({'host_name': host_name, 'host_ip': host_ip}) + +# Define the inventory template +inventory_template = """ +{% for group, hosts in groups.items() %} +[{{ group }}] +{% for host in hosts %} +{{ host.host_name }} ansible_host={{ host.host_ip }} +{% endfor %} +{% endfor %} + +[all:vars] +ansible_port=22 +ansible_ssh_user={{ ssh_user }} +ansible_ssh_private_key_file=~/.ssh/id_rsa +ansible_host_key_checking=False +""" + +# Create the SSH key directory if it doesn't exist +ssh_dir = os.path.expanduser("~/.ssh") +os.makedirs(ssh_dir, exist_ok=True) + +# Create the SSH key file +ssh_key_path = os.path.join(ssh_dir, 'id_rsa') +with open(ssh_key_path, 'w') as ssh_key_file: + ssh_key_file.write(ssh_private_key) +os.chmod(ssh_key_path, 0o400) # Set permissions to 0400 + +# Render the inventory file +template = Template(inventory_template) +output = template.render(groups=groups, ssh_user=ssh_user) + +# Write to the inventory file +os.makedirs('hosts', exist_ok=True) +with open('hosts/inventory', 'w') as inventory_file: + inventory_file.write(output) + +print("Inventory file created successfully!") diff --git a/scripts/fluxcd_all_in_one.sh b/scripts/fluxcd_all_in_one.sh new file mode 100644 index 0000000..67b18eb --- /dev/null +++ b/scripts/fluxcd_all_in_one.sh @@ -0,0 +1,30 @@ +helm repo add stable https://charts.onwalk.net +helm repo update +kubectl create namespace gitops-system || true +helm upgrade --install fluxcd stable/flux2 --version 2.12.1 -n gitops-system -f fluxcd-values.yaml + +cat > cluster-config.yaml << EOF +apiVersion: source.toolkit.fluxcd.io/v1beta2 +kind: GitRepository +metadata: + name: stable + namespace: gitops-system +spec: + interval: 1m0s + ref: + branch: main + url: https://github.com/svc-design/gitops.git +--- +apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 +kind: Kustomization +metadata: + name: cluster + namespace: gitops-system +spec: + interval: 1m0s + sourceRef: + kind: GitRepository + name: stable + path: ./clusters/k3s-local + prune: true +EOF diff --git a/scripts/gather_network_info.yml b/scripts/gather_network_info.yml new file mode 100644 index 0000000..7c4257c --- /dev/null +++ b/scripts/gather_network_info.yml @@ -0,0 +1,12 @@ +--- +- name: Gather and display specific network interface information based on dynamic filter + hosts: "{{ target_group }}" + user: ubuntu + become: yes + gather_facts: yes + tasks: + - include_role: + name: network_info + vars: + target_group: master + interface_filter: "^(ens|cni)" diff --git a/scripts/generate_ssl.sh b/scripts/generate_ssl.sh new file mode 100644 index 0000000..836f409 --- /dev/null +++ b/scripts/generate_ssl.sh @@ -0,0 +1,79 @@ +#!/bin/bash + +# 获取参数 +DOMAIN="$1" +VALID_DAYS="$2" +OUTPUT_DIR="$3" + +# 确保参数不为空 +if [[ -z "$DOMAIN" || -z "$VALID_DAYS" || -z "$OUTPUT_DIR" ]]; then + echo "Usage: $0 " + exit 1 +fi + +# 确保输出目录存在 +mkdir -p "$OUTPUT_DIR" + +CERT_FILE="$DOMAIN.cert" +KEY_FILE="$DOMAIN.key" + +echo "Generating self-signed SSL certificate for domain: $DOMAIN (with SAN), valid for $VALID_DAYS days" + +# 1. 生成 CA 私钥 +openssl genrsa -out "$OUTPUT_DIR/ca.key" 2048 + +# 2. 生成 CA 证书(自签的根证书) +openssl req -x509 -new -nodes \ + -key "$OUTPUT_DIR/ca.key" \ + -sha256 -days "$VALID_DAYS" \ + -out "$OUTPUT_DIR/ca.cert" \ + -subj "/C=CN/ST=State/L=City/O=Company/OU=Org/CN=Custom-CA" + +# 3. 生成服务器私钥 +openssl genrsa -out "$OUTPUT_DIR/$KEY_FILE" 2048 + +# 4. 创建 OpenSSL 配置文件(兼容 Linux & macOS) +SAN_CONFIG="$OUTPUT_DIR/san.cnf" +cat < "$SAN_CONFIG" +[ req ] +default_bits = 2048 +prompt = no +default_md = sha256 +req_extensions = req_ext +distinguished_name = dn + +[ dn ] +C = CN +ST = State +L = City +O = Company +OU = Org +CN = $DOMAIN + +[ req_ext ] +subjectAltName = @alt_names + +[ alt_names ] +DNS.1 = $DOMAIN +EOF + +# 5. 生成 CSR(证书签名请求) +openssl req -new -key "$OUTPUT_DIR/$KEY_FILE" \ + -out "$OUTPUT_DIR/$DOMAIN.csr" \ + -config "$SAN_CONFIG" + +# 6. 用 CA 证书签发服务器证书,保留 SAN +openssl x509 -req \ + -in "$OUTPUT_DIR/$DOMAIN.csr" \ + -CA "$OUTPUT_DIR/ca.cert" \ + -CAkey "$OUTPUT_DIR/ca.key" \ + -CAcreateserial \ + -out "$OUTPUT_DIR/$CERT_FILE" \ + -days "$VALID_DAYS" \ + -sha256 \ + -extensions req_ext -extfile "$SAN_CONFIG" + +# 7. 清理 CSR 和配置文件 +rm -f "$OUTPUT_DIR/$DOMAIN.csr" "$SAN_CONFIG" + +echo "✅ Self-signed SSL certificate (with SAN) for $DOMAIN generated in $OUTPUT_DIR!" diff --git a/scripts/get-standalone-cert.sh b/scripts/get-standalone-cert.sh new file mode 100644 index 0000000..ffa2813 --- /dev/null +++ b/scripts/get-standalone-cert.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +set -e + +DOMAIN="sing-box.onwalk.net" +SSL_KEY="/etc/ssl/${DOMAIN}.key" +SSL_PEM="/etc/ssl/${DOMAIN}.pem" + +# 1. 安装 acme.sh(如果未安装) +if [ ! -d "$HOME/.acme.sh" ]; then + echo "Installing acme.sh..." + curl https://get.acme.sh | sh + export PATH="$HOME/.acme.sh:$PATH" +else + echo "acme.sh already installed." +fi + +# 2. 申请 RSA 证书(使用 HTTP-01 验证,需 80 端口可用) +echo "Issuing certificate for $DOMAIN using standalone mode..." +~/.acme.sh/acme.sh --issue --standalone -d "$DOMAIN" --keylength 2048 + +# 3. 安装证书到指定位置 +echo "Installing cert to $SSL_PEM and $SSL_KEY..." +~/.acme.sh/acme.sh --install-cert -d "$DOMAIN" \ + --key-file "$SSL_KEY" \ + --fullchain-file "$SSL_PEM" \ + --reloadcmd "systemctl restart sing-box" + +# 4. 设置权限 +chmod 600 "$SSL_KEY" +chmod 644 "$SSL_PEM" +echo "Certificate successfully installed." + +# 5. 提示 +echo "Done. Cert saved at:" +echo " Key: $SSL_KEY" +echo " Cert: $SSL_PEM" + diff --git a/scripts/global-monitor/agent-group-config.yaml b/scripts/global-monitor/agent-group-config.yaml new file mode 100644 index 0000000..d5afe19 --- /dev/null +++ b/scripts/global-monitor/agent-group-config.yaml @@ -0,0 +1,3 @@ +vtap_group_id: g-3lSjoT4zjY +platform_enabled: 1 +tap_interface_regex: ^(tap.*|cali.*|veth.*|eth.*|en[ospx].*|lxc.*|lo|docker.*|br.*|wg.*)$ diff --git a/scripts/global-monitor/config/containerd.toml b/scripts/global-monitor/config/containerd.toml new file mode 100644 index 0000000..0f3e3a8 --- /dev/null +++ b/scripts/global-monitor/config/containerd.toml @@ -0,0 +1,34 @@ +version = 2 +root = "CONTAINERD_ROOT_DIR" +state = "/run/containerd" +oom_score = 0 + +[grpc] + max_recv_message_size = 16777216 + max_send_message_size = 16777216 + +[debug] + level = "info" + +[metrics] + address = "" + grpc_histogram = false + +[plugins] + [plugins."io.containerd.grpc.v1.cri"] + sandbox_image = "REGISTRY_DOMAIN/library/pause:3.3" + max_container_log_line_size = -1 + [plugins."io.containerd.grpc.v1.cri".containerd] + default_runtime_name = "runc" + snapshotter = "overlayfs" + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes] + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] + runtime_type = "io.containerd.runc.v2" + runtime_engine = "" + runtime_root = "" + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] + systemdCgroup = true + [plugins."io.containerd.grpc.v1.cri".registry] + [plugins."io.containerd.grpc.v1.cri".registry.mirrors] + [plugins."io.containerd.grpc.v1.cri".registry.mirrors."docker.io"] + endpoint = ["https://registry-1.docker.io"] \ No newline at end of file diff --git a/scripts/global-monitor/config/deepflow-registry.yaml b/scripts/global-monitor/config/deepflow-registry.yaml new file mode 100644 index 0000000..7aece34 --- /dev/null +++ b/scripts/global-monitor/config/deepflow-registry.yaml @@ -0,0 +1,23 @@ +version: 0.1 +log: + fields: + service: registry +storage: + cache: + blobdescriptor: inmemory + filesystem: + rootdirectory: /var/lib/registry + delete: + enabled: true +http: + addr: :5000 + headers: + X-Content-Type-Options: [nosniff] + tls: + certificate: /etc/docker/registry/domain.crt + key: /etc/docker/registry/domain.key +health: + storagedriver: + enabled: true + interval: 10s + threshold: 3 \ No newline at end of file diff --git a/scripts/global-monitor/config/nginx.conf b/scripts/global-monitor/config/nginx.conf new file mode 100644 index 0000000..b534744 --- /dev/null +++ b/scripts/global-monitor/config/nginx.conf @@ -0,0 +1,19 @@ +# This server listen at 8080 for download files and packages +server { + listen NGINX_PORT; + listen [::]:NGINX_PORT; + server_name kube.registry.local; + gzip_static on; + autoindex on; + autoindex_exact_size off; + autoindex_format html; #修改为html + autoindex_localtime on; + location / { + root /usr/share/nginx; + index index.html index.htm; + } + error_page 500 502 503 504 /50x.html; + location = /50x.html { + root html; + } +} diff --git a/scripts/global-monitor/config/registry.yaml b/scripts/global-monitor/config/registry.yaml new file mode 100644 index 0000000..7aece34 --- /dev/null +++ b/scripts/global-monitor/config/registry.yaml @@ -0,0 +1,23 @@ +version: 0.1 +log: + fields: + service: registry +storage: + cache: + blobdescriptor: inmemory + filesystem: + rootdirectory: /var/lib/registry + delete: + enabled: true +http: + addr: :5000 + headers: + X-Content-Type-Options: [nosniff] + tls: + certificate: /etc/docker/registry/domain.crt + key: /etc/docker/registry/domain.key +health: + storagedriver: + enabled: true + interval: 10s + threshold: 3 \ No newline at end of file diff --git a/scripts/global-monitor/custom-domain.yaml b/scripts/global-monitor/custom-domain.yaml new file mode 100644 index 0000000..6851918 --- /dev/null +++ b/scripts/global-monitor/custom-domain.yaml @@ -0,0 +1,5 @@ +name: "" +type: kubernetes +config: + controller_ip: 35.72.247.255 + node_port_name_regex: ^(cni|eth|flannel|vxlan.calico|wg|ens|tunl|en[ospx]) diff --git a/scripts/global-monitor/deepflow-registry/all.tag.list b/scripts/global-monitor/deepflow-registry/all.tag.list new file mode 100644 index 0000000..e8e0550 --- /dev/null +++ b/scripts/global-monitor/deepflow-registry/all.tag.list @@ -0,0 +1,51 @@ +local-registry.onwalk.net:5000/acl-controller:v6.3.179 +local-registry.onwalk.net:5000/alarm:v6.3.686 +local-registry.onwalk.net:5000/apientry:v6.3.193 +local-registry.onwalk.net:5000/cerebro:0.9.0 +local-registry.onwalk.net:5000/clickhouse-server:22.8.20.11 +local-registry.onwalk.net:5000/deepflow-agent:v6.3.4211 +local-registry.onwalk.net:5000/deepflow-app:v6.3.120 +local-registry.onwalk.net:5000/deepflow-init-grafana-ce:latest +local-registry.onwalk.net:5000/deepflow-init-grafana-ds-dh-ce:latest +local-registry.onwalk.net:5000/deepflow-server:v6.3.4211 +local-registry.onwalk.net:5000/df-env:v6.3.809 +local-registry.onwalk.net:5000/df-help:v6.3.1086 +local-registry.onwalk.net:5000/df-web-metrics-explore:v6.3.4318 +local-registry.onwalk.net:5000/df-web-qiankun-core:v6.3.9969 +local-registry.onwalk.net:5000/df-web-sched:v6.3.103 +local-registry.onwalk.net:5000/df-web-service:v6.3.580 +local-registry.onwalk.net:5000/diagnose:v6.3.103 +local-registry.onwalk.net:5000/elasticsearch:6.8.8 +local-registry.onwalk.net:5000/endpoints-operator:0.2.1 +local-registry.onwalk.net:5000/fauths:v6.3.452 +local-registry.onwalk.net:5000/fpermit:v6.3.254 +local-registry.onwalk.net:5000/fuser:v6.3.328 +local-registry.onwalk.net:5000/grafana-dashboards:v6.3.809 +local-registry.onwalk.net:5000/grafana:9.2.4 +local-registry.onwalk.net:5000/influxdb:v6.3.14424 +local-registry.onwalk.net:5000/kibana:6.8.8 +local-registry.onwalk.net:5000/kubectl:v6.3.809 +local-registry.onwalk.net:5000/log-cleaner:v6.3.11 +local-registry.onwalk.net:5000/loki:2.4.2 +local-registry.onwalk.net:5000/manager:v6.3.684 +local-registry.onwalk.net:5000/mntnct:v6.3.1317 +local-registry.onwalk.net:5000/monitor:v6.3.174 +local-registry.onwalk.net:5000/mysql-server:8.0.26 +local-registry.onwalk.net:5000/openebs/linux-utils:3.3.0 +local-registry.onwalk.net:5000/openebs/node-disk-manager:2.0.0 +local-registry.onwalk.net:5000/openebs/node-disk-operator:2.0.0 +local-registry.onwalk.net:5000/openebs/provisioner-localpv:3.3.0 +local-registry.onwalk.net:5000/pcap:v6.3.188 +local-registry.onwalk.net:5000/postman:v6.3.54 +local-registry.onwalk.net:5000/promtail:2.4.2 +local-registry.onwalk.net:5000/querier-js:v6.3.264 +local-registry.onwalk.net:5000/rabbitmq:3.10.25 +local-registry.onwalk.net:5000/redis:6.2.6 +local-registry.onwalk.net:5000/report:v6.3.247 +local-registry.onwalk.net:5000/statistics:v6.3.2082 +local-registry.onwalk.net:5000/talker:v6.3.2958 +local-registry.onwalk.net:5000/telegraf:1.14.1.12 +local-registry.onwalk.net:5000/trident:v6.3.1666 +local-registry.onwalk.net:5000/warrant:v6.3.81 +local-registry.onwalk.net:5000/web-tools:v6.3.122 +local-registry.onwalk.net:5000/webssh:v6.3.22 diff --git a/scripts/global-monitor/deepflow-registry/compose.yaml b/scripts/global-monitor/deepflow-registry/compose.yaml new file mode 100644 index 0000000..db6f21a --- /dev/null +++ b/scripts/global-monitor/deepflow-registry/compose.yaml @@ -0,0 +1,12 @@ +version: '3' +services: + registry: + image: registry:2.7.1 + container_name: deepflow-registry + restart: always + network_mode: host + volumes: + - /usr/local/deepflow/registry:/var/lib/registry + - /opt/deepflow-registry/config/deepflow-registry.yaml:/etc/docker/registry/config.yml + - /etc/ssl/onwalk.net.pem:/etc/docker/registry/domain.crt + - /etc/ssl/onwalk.net.key:/etc/docker/registry/domain.key diff --git a/scripts/global-monitor/deepflow-registry/push_images.sh b/scripts/global-monitor/deepflow-registry/push_images.sh new file mode 100644 index 0000000..cfa0a27 --- /dev/null +++ b/scripts/global-monitor/deepflow-registry/push_images.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +set +x + +# 设置容器和仓库地址 +CONTAINERD_ADDRESS="/run/k3s/containerd/containerd.sock" +LOCAL_REGISTRY="local-registry.onwalk.net:5000" +TARGET_REGISTRY="global-images.onwalk.net/private/deepflow-v6.3" + +# 设置输出文件 +input_file="all.tag.list" + +# 登录到目标 registry +echo "Logging in to $TARGET_REGISTRY..." +sudo CONTAINERD_ADDRESS=$CONTAINERD_ADDRESS nerdctl login $TARGET_REGISTRY + +# 读取 all.tag.list 并处理每个镜像 +while IFS= read -r line; do + # 替换 local-registry 地址为目标地址 + image_tag="${line//$LOCAL_REGISTRY/$TARGET_REGISTRY}" + + # 打标签并推送镜像 + echo "Tagging $line as $image_tag and pushing to $TARGET_REGISTRY" + sudo CONTAINERD_ADDRESS=$CONTAINERD_ADDRESS nerdctl pull "$line" + sudo CONTAINERD_ADDRESS=$CONTAINERD_ADDRESS nerdctl tag "$line" "$image_tag" + sudo CONTAINERD_ADDRESS=$CONTAINERD_ADDRESS nerdctl push "$image_tag" + + # 清理本地镜像 + echo "Cleaning up local image: $line" + sudo CONTAINERD_ADDRESS=$CONTAINERD_ADDRESS nerdctl rmi "$line" +done < "$input_file" + +echo "All images processed and pushed successfully." diff --git a/scripts/global-monitor/deepflow-registry/setup-nerdctl.sh b/scripts/global-monitor/deepflow-registry/setup-nerdctl.sh new file mode 100644 index 0000000..4bdd1ed --- /dev/null +++ b/scripts/global-monitor/deepflow-registry/setup-nerdctl.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +wget https://github.com/containerd/nerdctl/releases/download/v2.0.2/nerdctl-full-2.0.2-linux-amd64.tar.gz + +sudo mkdir -pv /etc/nerdctl +sudo touch /etc/nerdctl/nerdctl.toml + +sudo cat > /etc/nerdctl/nerdctl.toml << EOF +debug = false +debug_full = false +address = "unix:///run/k3s/containerd/containerd.sock" +namespace = "k8s.io" +cni_path = "/var/lib/nerdctl/cni/bin" +cni_netconfpath = "/var/lib/nerdctl/cni/net.d" +EOF + +sudo CONTAINERD_ADDRESS=/run/k3s/containerd/containerd.sock nerdctl --namespace k8s.io ps diff --git a/scripts/global-monitor/deepflow-registry/setup-registry.sh b/scripts/global-monitor/deepflow-registry/setup-registry.sh new file mode 100644 index 0000000..e68f79e --- /dev/null +++ b/scripts/global-monitor/deepflow-registry/setup-registry.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +https://github.com/containerd/nerdctl/releases/download/v2.0.2/nerdctl-2.0.2-linux-amd64.tar.gz + +sudo cp compose.yaml /opt/deepflow-registry/compose.yaml +sudo nerdctl compose -f /opt/deepflow-registry/compose.yaml down +sudo nerdctl compose -f /opt/deepflow-registry/compose.yaml up -d + +#运行时为Containerd +sudo erdctl load -i /usr/local/deepflow/registry.tar +sudo CONTAINERD_ADDRESS=/run/k3s/containerd/containerd.sock nerdctl --namespace k8s.io compose -f /opt/deepflow-registry/compose.yaml up -d +#nerdctl run -d -e REGISTRY_HTTP_ADDR=0.0.0.0:5000 --net=host -v /usr/local/deepflow/registry:/var/lib/registry --restart=always --name registry hub.deepflow.yunshan.net/dev/registry:latest diff --git a/scripts/global-monitor/deepflow-registry/show_images.sh b/scripts/global-monitor/deepflow-registry/show_images.sh new file mode 100644 index 0000000..011fc6b --- /dev/null +++ b/scripts/global-monitor/deepflow-registry/show_images.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# 设置协议和 registry 地址(https:// 或 http://) +PROTOCOL="https://" +REGISTRY="local-registry.onwalk.net:5000" + +# 获取仓库列表 +repos=$(curl -s -X GET "$PROTOCOL$REGISTRY/v2/_catalog" | jq -r '.repositories[]') + +# 要隐藏的仓库列表 +hidden_repos=("") + +# 创建或清空输出文件 +output_file="all.tag.list" +> "$output_file" + +# 遍历每个仓库,获取对应的标签列表 +for repo in $repos; do + # 如果是隐藏的仓库,跳过 + if [[ " ${hidden_repos[@]} " =~ " ${repo} " ]]; then + continue + fi + + # 获取标签列表 + tags=$(curl -s -X GET "$PROTOCOL$REGISTRY/v2/$repo/tags/list" | jq -r '.tags[]') + + # 如果仓库有标签,则按格式输出到文件 + if [ -n "$tags" ]; then + for tag in $tags; do + # 输出格式:local-registry.onwalk.net:5000/repository:tag + echo "$REGISTRY/$repo:$tag" >> "$output_file" + done + fi +done + +# 排序并去重 +sort -u "$output_file" -o "$output_file" + +echo "Tags have been saved to $output_file and sorted." diff --git a/scripts/global-monitor/deepflow-sever-values-v6.3.yaml b/scripts/global-monitor/deepflow-sever-values-v6.3.yaml new file mode 100644 index 0000000..ac75ece --- /dev/null +++ b/scripts/global-monitor/deepflow-sever-values-v6.3.yaml @@ -0,0 +1,117 @@ +global: + ## 此文件中的字段会替换掉 values.yaml 中的默认值,如需修改 values.yaml,都需要在此修改 + ## 需保证与 values.yaml 格式一致 + ## Tips: grep redis_tag: values.yaml -A 16 -B1 >> values-custom.yaml + deepflowVersion: DeepFlow-6.3.36 + imagePullSecrets: + - deepflow-registry-secret + image: + repository: global-images.onwalk.net/private/deepflow-v6.3 + pullPolicy: Always + node_type: master + master_region_domain_prefix: '' + current_region_domain_prefix: 'master-' + s3StorageEnabled: false + ## 计费模式 license 授权模式 / voucher 按量计费 + billing_method: license + ## + ## 【可选】用于部署ESXi环境中的采集器虚拟机,需先在页面创建采集器组及配置流量采集模式为虚拟镜像,再 `-uo trident` 升级 esxi 采集器。 + tridentConfigmap: + ## 自定义采集器配置 + customConfig: + vtap-group-id-request: "__FIX_ME__" + # controller-port: 20035 + + +## 各组件自定义参数 +## Example + +## server 和 clickhouse 副本数控制,server 副本数必须 >= clickhouse 数量。 +deepflow: + server: + replicas: 1 + resources: + ## limit.memory 需配置为节点资源的 70% and > 2Gi,如 128G,则配置为 128x0.7~=90G。 + limits: + memory: 5G + clickhouse: + replicas: 1 + +alarm: + alarmEmail: stats01@yunshan.net + + +## df-web-metrics-explore +dfWebMetricsExplore: + customConfig: + app_config_user.js: |- + window.APP_USER_CONFIG = {} + +## df-web-core +dfWebCore: + customConfig: + app_config_user.js: |- + window.APP_USER_CONFIG = {} + +## 主从区域通信配置 +customResource: + clusterEndpointMasterToSlave: + ## 创建内建负载均衡器配置开关 + enabled: false + slaveRegionList: + ## 此处从区域名称和部署从区域时的域名前缀一致 + ## 从区域一 + - name: slave1- + ## 访问从区域组件端口,如遇 nodePort 冲突,更换端口后可修改如下配置,如需新增组件访问,添加 service 名称及端口即可。 + ports: + influxdb: + ## 集群内 server 暴露端口,无需更改 + port: 20044 + ## 从区域组件端口,如果各从区域 nodePort 端口不相同可修改 targetPort + targetPort: 20044 + elasticsearch: + port: 20042 + targetPort: 20042 + deepflow-server: + port: 20416 + targetPort: 30416 + ## 从区域所有节点IP + ips: + - 1.1.1.1 + - 2.2.2.2 + ## 从区域二 + - name: slave2- + ports: + influxdb: + ## 集群内 server 暴露端口,无需更改 + port: 20044 + ## 从区域组件端口,如果各从区域 nodePort 端口不相同可修改 targetPort + targetPort: 20044 + elasticsearch: + port: 20042 + targetPort: 20042 + deepflow-server: + port: 20416 + targetPort: 30416 + ips: + - 3.3.3.3 + - 4.4.4.4 + ## 从区域访问主区域配置 + clusterEndpointSlaveToMaster: + ## 主区域组件端口,如需新增组件访问,添加 service 名称及端口即可。 + ## 填写主区域 server 所在节点 IP + master_controller_ips: + - __FIXME_MASTER_CONTROLLER1_IP__ + - __FIXME_MASTER_CONTROLLER2_IP__ + - __FIXME_MASTER_CONTROLLER3_IP__ + ports: + talker: + port: 20013 + targetPort: 20013 + mysql: + port: 30130 + targetPort: 30130 + manager: + port: 20403 + targetPort: 20403 + diff --git a/scripts/global-monitor/setup-agent-group-config.sh b/scripts/global-monitor/setup-agent-group-config.sh new file mode 100644 index 0000000..5c020d9 --- /dev/null +++ b/scripts/global-monitor/setup-agent-group-config.sh @@ -0,0 +1,8 @@ +deepflow-ctl agent-group create vm-group +deepflow-ctl agent-group list vm-group + +cat > agent-group-config.yaml << EOF +vtap_group_id: g-3lSjoT4zjY +tap_interface_regex: ^(tap.*|cali.*|veth.*|eth.*|en[ospx].*|lxc.*|lo|docker.*|br.*|wg.*)$ +EOF +deepflow-ctl agent-group-config create -f agent-group-config.yaml diff --git a/scripts/global-monitor/setup-coroot.sh b/scripts/global-monitor/setup-coroot.sh new file mode 100644 index 0000000..740cea1 --- /dev/null +++ b/scripts/global-monitor/setup-coroot.sh @@ -0,0 +1,11 @@ +helm repo add coroot https://coroot.github.io/helm-charts +helm repo update coroot +helm install --namespace coroot --create-namespace --set corootCE.service.type=NodePort coroot coroot/coroot +export NODE_PORT=$(kubectl get --namespace coroot -o jsonpath="{.spec.ports[0].nodePort}" services coroot) +export NODE_IP=$(kubectl get nodes --namespace coroot -o jsonpath="{.items[0].status.addresses[0].address}") +echo http://$NODE_IP:$NODE_PORT +curl -sfL https://raw.githubusercontent.com/coroot/coroot-node-agent/main/install.sh | \ + COLLECTOR_ENDPOINT=http://35.75.12.83:35412 \ + API_KEY=8npswdyt \ + SCRAPE_INTERVAL=15s \ + sh - diff --git a/scripts/global-monitor/setup-deepflow-Host-Domain-Group.sh b/scripts/global-monitor/setup-deepflow-Host-Domain-Group.sh new file mode 100644 index 0000000..88b60b0 --- /dev/null +++ b/scripts/global-monitor/setup-deepflow-Host-Domain-Group.sh @@ -0,0 +1,5 @@ +unset AGENT_GROUP +AGENT_GROUP="legacy-host" # FIXME: domain name + +deepflow-ctl agent-group create $AGENT_GROUP +deepflow-ctl agent-group list $AGENT_GROUP # Get agent-group ID diff --git a/scripts/global-monitor/setup-deepflow-Host-Domain.sh b/scripts/global-monitor/setup-deepflow-Host-Domain.sh new file mode 100644 index 0000000..2ba0f30 --- /dev/null +++ b/scripts/global-monitor/setup-deepflow-Host-Domain.sh @@ -0,0 +1,8 @@ +unset DOMAIN_NAME +DOMAIN_NAME="vm-group" # FIXME: domain name + +cat << EOF | deepflow-ctl domain create -f - +name: $DOMAIN_NAME +type: agent_sync +EOF + diff --git a/scripts/global-monitor/setup-deepflow-add-domain.sh b/scripts/global-monitor/setup-deepflow-add-domain.sh new file mode 100644 index 0000000..543a610 --- /dev/null +++ b/scripts/global-monitor/setup-deepflow-add-domain.sh @@ -0,0 +1,13 @@ +#!/bin/bash +export CLUSTER_NAME=$1 + +cat > custom-domain.yaml << EOF +name: "$CLUSTER_NAME" +type: kubernetes +config: + controller_ip: 35.72.247.255 + node_port_name_regex: ^(cni|eth|flannel|vxlan.calico|wg|ens|tunl|en[ospx]) +EOF + +deepflow-ctl domain create -f custom-domain.yaml +deepflow-ctl domain list $CLUSTER_NAME diff --git a/scripts/global-monitor/setup-deepflow-server-ee-all-in-one.sh b/scripts/global-monitor/setup-deepflow-server-ee-all-in-one.sh new file mode 100644 index 0000000..5a357d9 --- /dev/null +++ b/scripts/global-monitor/setup-deepflow-server-ee-all-in-one.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +sudo mkdir -pv /opt/rancher/k3s +curl -sfL https://get.k3s.io | sudo sh -s - --disable=traefik,servicelb \ + --data-dir=/opt/rancher/k3s \ + --kube-apiserver-arg service-node-port-range=0-50000 + +sudo mkdir -pv ~/.kube/ +sudo cp /etc/rancher/k3s/k3s.yaml ~/.kube/config +sudo chown -R ubuntu:ubuntu ~/.kube/ + +sudo snap install helm --classic + +k8s_node=`sudo kubectl get nodes | awk 'NR>1{print $1}'` + +sudo kubectl label node $k8s_node master_controller=enable +sudo kubectl label node $k8s_node tsdb=enable +sudo kubectl label node $k8s_node dfdb=enable + +sudo kubectl create ns deepflow || true +sudo kubectl create secret docker-registry deepflow-registry-secret \ + --docker-server=global-images.onwalk.net \ + --docker-username=admin \ + --docker-password=Harbor12345 \ + --docker-email=manbuzhe2009@qq.com \ + -n deepflow + +sudo kubectl create ns openebs || true +sudo kubectl create secret docker-registry deepflow-registry-secret \ + --docker-server=global-images.onwalk.net \ + --docker-username=admin \ + --docker-password=Harbor12345 \ + --docker-email=manbuzhe2009@qq.com \ + -n openebs + +/usr/local/deepflow/bin/deepflow-deploy -i diff --git a/scripts/global-monitor/setup-deepflow.sh b/scripts/global-monitor/setup-deepflow.sh new file mode 100644 index 0000000..54095d8 --- /dev/null +++ b/scripts/global-monitor/setup-deepflow.sh @@ -0,0 +1,27 @@ +helm repo add deepflow https://deepflowio.github.io/deepflow +helm repo update deepflow +cat > values.yaml << EOF +global: + replicas: 1 + storageEngine: clickhouse +byconity: + enabled: false +EOF +helm upgrade --install deepflow -n deepflow deepflow/deepflow --version 6.5.012 --create-namespace -f values.yaml +curl -o /usr/bin/deepflow-ctl https://deepflow-ce.oss-cn-beijing.aliyuncs.com/bin/ctl/v6.4.9/linux/$(arch | sed 's|x86_64|amd64|' | sed 's|aarch64|arm64|')/deepflow-ctl +chmod a+x /usr/bin/deepflow-ctl + +NODE_PORT=$(kubectl get --namespace deepflow -o jsonpath="{.spec.ports[0].nodePort}" services deepflow-grafana) +NODE_IP=$(kubectl get nodes -o jsonpath="{.items[0].status.addresses[0].address}") +echo -e "Grafana URL: http://$NODE_IP:$NODE_PORT \nGrafana auth: admin:deepflow" + + +kubectl delete deployment.apps/deepflow-byconity-daemon-manager -n deepflow +kubectl delete deployment.apps/deepflow-byconity-fdbcli -n deepflow +kubectl delete deployment.apps/deepflow-byconity-resource-manager -n deepflow +kubectl delete deployment.apps/deepflow-fdb-operator -n deepflow +kubectl delete statefulset.apps/deepflow-byconity-server -n deepflow +kubectl delete statefulset.apps/deepflow-byconity-tso -n deepflow +kubectl delete statefulset.apps/deepflow-byconity-vw-vw-default -n deepflow +kubectl delete statefulset.apps/deepflow-byconity-vw-vw-write -n deepflow +kubectl delete svc -n deepflow `kubectl get svc -n deepflow | grep deepflow-byconity | awk '{print $1}' | xargs` diff --git a/scripts/global-monitor/setup-ingress.sh b/scripts/global-monitor/setup-ingress.sh new file mode 100644 index 0000000..7f1920d --- /dev/null +++ b/scripts/global-monitor/setup-ingress.sh @@ -0,0 +1,56 @@ +cat > value.yaml < nginx-cm.yaml < nginx-svc-patch.yaml < values-custom.yaml +clickhouse: + enabled: true +EOF +helm upgrade --install signoz -n signoz signoz/signoz --create-namespace -f values-custom.yaml diff --git a/scripts/gpu-k8s.sh b/scripts/gpu-k8s.sh new file mode 100644 index 0000000..f0efe4e --- /dev/null +++ b/scripts/gpu-k8s.sh @@ -0,0 +1,28 @@ +#!/bin/bash +set -e + +# Install offline packages required for GPU support +install_all_offline_packages() { + echo "Installing GPU driver and toolkit packages" + # Implementation assumes packages are available locally + sudo apt-get update + sudo apt-get install -y nvidia-driver-535 nvidia-headless-535 nvidia-container-toolkit +} + +# Deploy the NVIDIA GPU operator +deploy_plugin() { + helm repo add nvidia https://helm.ngc.nvidia.com/nvidia || true + helm upgrade --install gpu-operator nvidia/gpu-operator \ + --namespace gpu-operator \ + --create-namespace \ + --set nodeSelector.kubernetes.io/gpu="true" \ + --set driver.enabled=true \ + --set toolkit.enabled=true \ + --set devicePlugin.enabled=true \ + --set operator.runtimeClass="nvidia-container-runtime" \ + --set operator.defaultRuntime=containerd \ + --set containerRuntime.socketPath=/var/run/containerd/containerd.sock +} + +install_all_offline_packages +deploy_plugin diff --git a/scripts/ingress-installer.sh b/scripts/ingress-installer.sh new file mode 100644 index 0000000..e5e91cf --- /dev/null +++ b/scripts/ingress-installer.sh @@ -0,0 +1,106 @@ +#!/bin/bash +set -e + +INGRESS_IP="${1:-$(hostname -I | awk '{print $1}')}" +NODE_LABEL="$2" + +echo "🚀 Ingress离线部署开始,IP: ${INGRESS_IP}" + +# 解压 nerdctl 并安装 +echo "📦 安装nerdctl..." +tar xzvf nerdctl.tar.gz -C /usr/local/bin/ + +echo "🚀 尝试导入镜像..." + +if command -v docker &>/dev/null && docker info &>/dev/null; then + echo "✅ 检测到 Docker 正常运行,使用 docker load 导入镜像" + docker load -i images/nginx-ingress.tar + docker load -i images/kube-webhook-certgen.tar + +elif [ -S /run/k3s/containerd/containerd.sock ]; then + echo "⚠️ Docker 不可用,检测到 K3s 的 containerd socket,使用 nerdctl 导入" + + # 设置 nerdctl 环境变量,连接到 K3s 的 containerd + export CONTAINERD_ADDRESS=/run/k3s/containerd/containerd.sock + + # 确保 nerdctl 可执行 + if ! command -v nerdctl &>/dev/null; then + echo "❌ nerdctl 未安装或未在 PATH 中,请检查" + exit 1 + fi + + nerdctl --namespace k8s.io load -i images/nginx-ingress.tar + nerdctl --namespace k8s.io load -i images/kube-webhook-certgen.tar + +elif [ -S /run/containerd/containerd.sock ]; then + echo "⚠️ Docker 和 K3s containerd 都不可用,退而使用默认 containerd socket" + + export CONTAINERD_ADDRESS=/run/containerd/containerd.sock + + if ! command -v nerdctl &>/dev/null; then + echo "❌ nerdctl 未安装或未在 PATH 中,请检查" + exit 1 + fi + + nerdctl --namespace k8s.io load -i images/nginx-ingress.tar + nerdctl --namespace k8s.io load -i images/kube-webhook-certgen.tar + +else + echo "❌ 没有可用的容器运行时(docker/containerd),无法导入镜像" + exit 1 +fi + +# 创建命名空间 +kubectl create namespace ingress || true + +# 生成 Helm values.yaml +cat > values.yaml <> values.yaml < "$CHART_DIR/Chart.yaml" < "$CHART_DIR/values.yaml" < "$CHART_DIR/templates/deployment.yaml" < "$CHART_DIR/templates/service.yaml" < "$CHART_DIR/templates/configmap.yaml" < "$CHART_DIR/templates/route.yaml" < ${ROLE_NAME}/defaults/main.yml << EOF +# Default variables for ${ROLE_NAME} +loki_journal_sources: + - name: "xray" + unit: "xray.service" + - name: "xray_tproxy" + unit: "xray-tproxy.service" + +loki_endpoint_url: "https://logs-prod-030.grafana.net/loki/api/v1/push" +loki_basic_auth_username: "{{ loki_username }}" +loki_basic_auth_password: "{{ loki_password }}" +EOF + +# Create tasks/main.yml file +cat > ${ROLE_NAME}/tasks/main.yml << EOF +--- +- name: Install GPG + apt: + name: gpg + state: present + +- name: Create APT keyrings directory + file: + path: /etc/apt/keyrings/ + state: directory + mode: '0755' + +- name: Add Grafana GPG key + ansible.builtin.get_url: + url: "{{ grafana_gpg_key_url }}" + dest: /etc/apt/keyrings/grafana.gpg + mode: '0644' + +- name: Add Grafana Alloy APT source + apt_repository: + repo: "{{ grafana_apt_source }}" + state: present + +- name: Update APT package list and install Grafana Alloy + apt: + name: alloy + state: present + update_cache: yes + +- name: Create Alloy configuration directory + file: + path: /etc/alloy + state: directory + mode: '0755' + +- name: Create Alloy configuration file + template: + src: config.alloy.j2 + dest: "{{ alloy_config_path }}" + mode: '0644' + +- name: Reload and restart Alloy service + systemd: + name: alloy + state: restarted + daemon_reload: yes +EOF + +# Create templates/config.alloy.j2 file +cat > ${ROLE_NAME}/templates/config.alloy.j2 << EOF +loki.write "grafanacloud" { + endpoint { + url = "{{ loki_endpoint_url }}" + + basic_auth { + username = "{{ loki_basic_auth_username }}" + password = "{{ loki_basic_auth_password }}" + } + } +} + +{% for source in loki_journal_sources %} +loki.source.journal "{{ source.name }}" { + format_as_json = true + labels = {job = "{{ source.name }}"} + matches = "_SYSTEMD_UNIT={{ source.unit }}" + forward_to = [loki.write.grafanacloud.receiver] +} +{% endfor %} +EOF + +# Create files/grafana.gpg file (an empty file is created here; you can manually add the content) +touch ${ROLE_NAME}/files/grafana.gpg + +echo "Ansible Role directory structure for '${ROLE_NAME}' has been initialized." diff --git a/scripts/init_linux_user.sh b/scripts/init_linux_user.sh new file mode 100644 index 0000000..f819843 --- /dev/null +++ b/scripts/init_linux_user.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +# 检查是否传入了用户名和密码参数 +if [ -z "$1" ] || [ -z "$2" ]; then + echo "Usage: $0 " + exit 1 +fi + +# 定义变量 +USERNAME="$1" # 使用传入的第一个参数作为用户名 +PASSWORD="$2" # 使用传入的第二个参数作为密码 +SSH_KEY_PATH="/root/.ssh/authorized_keys" # 替换为实际公钥文件路径 +HOME_DIR="/home/$USERNAME" +SSH_DIR="$HOME_DIR/.ssh" +AUTHORIZED_KEYS="$SSH_DIR/authorized_keys" + +# 创建用户并设置家目录 +sudo useradd -m -s /bin/bash -G sudo $USERNAME + +# 设置用户密码 +echo "$USERNAME:$PASSWORD" | sudo chpasswd + +# 创建 .ssh 目录 +sudo mkdir -p $SSH_DIR + +# 设置目录权限 +sudo chmod 700 $SSH_DIR +sudo chown $USERNAME:$USERNAME $SSH_DIR + +# 将公钥内容写入 authorized_keys 文件 +sudo bash -c "cat $SSH_KEY_PATH > $AUTHORIZED_KEYS" + +# 设置 authorized_keys 文件权限 +sudo chmod 600 $AUTHORIZED_KEYS +sudo chown $USERNAME:$USERNAME $AUTHORIZED_KEYS + +# 确保用户可以使用 sudo 不需要输入密码 +echo "$USERNAME ALL=(ALL) NOPASSWD:ALL" | sudo tee /etc/sudoers.d/$USERNAME + +echo "User $USERNAME has been created, password set, and configured with sudo privileges successfully." diff --git a/scripts/install-single-gpu-k8s.sh b/scripts/install-single-gpu-k8s.sh new file mode 100644 index 0000000..8da3e45 --- /dev/null +++ b/scripts/install-single-gpu-k8s.sh @@ -0,0 +1,199 @@ +#!/bin/bash +set -euo pipefail + +# === 全局变量 === +MASTER_IP=$(hostname -I | awk '{print $1}') +USER=${USER:-$(whoami)} +SSH_KEY="${HOME}/.ssh/id_rsa" +K8S_VERSION="labring/kubernetes:v1.25.16" +CILIUM_VERSION="labring/cilium:v1.13.4" +HELM_VERSION="labring/helm:v3.9.4" +NVIDIA_DRIVER_VERSION="nvidia-driver-535" +NVIDIA_PLUGIN_VERSION="v0.17.1" +NERDCTL_VERSION="2.1.2" +PROXY_ADDR="http://127.0.0.1:1081" +USE_PROXY=${USE_PROXY:-false} + +# === 选项代理 === +configure_proxy() { + if [ "$USE_PROXY" = true ]; then + export http_proxy=$PROXY_ADDR + export https_proxy=$PROXY_ADDR + export HTTP_PROXY=$PROXY_ADDR + export HTTPS_PROXY=$PROXY_ADDR + echo "🌐 代理已启用: $PROXY_ADDR" + else + unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY + echo "🌐 代理已关闭" + fi +} + +proxy_curl() { + if [ "$USE_PROXY" = true ]; then + curl --proxy "$PROXY_ADDR" "$@" + else + curl "$@" + fi +} + +install_base() { + echo "[1/8] 安装基础依赖" + sudo apt-get update -y + sudo apt-get install -y curl gnupg2 ca-certificates lsb-release \ + apt-transport-https software-properties-common openssh-client \ + openssh-server uidmap +} + +install_containerd() { + echo "[2/8] 安装 containerd + nerdctl" + sudo apt-get purge -y docker.io docker-ce docker-ce-cli containerd.io || true + sudo apt-get install -y containerd + + tmpdir=$(mktemp -d) + archive="nerdctl-full-${NERDCTL_VERSION}-linux-amd64.tar.gz" + url="https://github.com/containerd/nerdctl/releases/download/v${NERDCTL_VERSION}/${archive}" + echo "🔽 下载 nerdctl: $url" + proxy_curl -fLo "${tmpdir}/${archive}" "$url" + + echo "📆 解压 nerdctl 到 /usr/local" + sudo tar -xzf "${tmpdir}/${archive}" -C /usr/local + + sudo mkdir -p /etc/containerd + sudo containerd config default | sudo tee /etc/containerd/config.toml > /dev/null + sudo systemctl enable --now containerd + nerdctl --version && echo "✅ nerdctl 安装成功" || echo "❌ nerdctl 安装失败" +} + +install_nvidia() { + echo "[3/8] 安装 NVIDIA 驱动和容器工具" + distribution="ubuntu22.04" + proxy_curl -sL https://nvidia.github.io/nvidia-docker/gpgkey | \ + sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg + + proxy_curl -sL https://nvidia.github.io/nvidia-docker/${distribution}/nvidia-docker.list | \ + sed 's|^deb |deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] |' | \ + sudo tee /etc/apt/sources.list.d/nvidia-docker.list + + sudo apt-get update -y + sudo apt-get install -y ${NVIDIA_DRIVER_VERSION} nvidia-container-toolkit + sudo nvidia-ctk runtime configure --runtime=containerd --set-as-default + sudo systemctl restart containerd + + if ! command -v nvidia-smi >/dev/null; then echo "❌ nvidia-smi 未找到"; exit 1; fi + nvidia-smi || { echo "❌ NVIDIA 驱动有问题"; exit 1; } +} + +install_sealos() { + echo "[4/8] 安装 Sealos" + if ! command -v sealos &>/dev/null; then + proxy_curl -sfL https://raw.githubusercontent.com/labring/sealos/main/scripts/install.sh | bash + fi +} + +setup_ssh() { + echo "[5/8] 配置 SSH 免密" + [ ! -f "${SSH_KEY}" ] && ssh-keygen -f "${SSH_KEY}" -N "" + cat "${SSH_KEY}.pub" >> ~/.ssh/authorized_keys + chmod 600 ~/.ssh/authorized_keys && chmod 700 ~/.ssh + sudo systemctl enable --now ssh || sudo systemctl enable --now sshd +} + +deploy_k8s() { + echo "[6/8] 使用 Sealos 部署 K8s" + sealos run "${K8S_VERSION}" "${CILIUM_VERSION}" "${HELM_VERSION}" \ + --masters "${MASTER_IP}" --user "${USER}" --pk "${SSH_KEY}" \ + --env '{}' --cmd "kubeadm init --skip-phases=addon/kube-proxy" + + echo "[6.1] 禁用 sealos containerd, 启用系统 containerd" + sudo systemctl disable --now sealos-containerd || true + sudo systemctl enable --now containerd + sleep 3 + sudo systemctl status containerd --no-pager | grep Active +} + +deploy_plugin() { + echo "[7/8] 部署 NVIDIA Device Plugin" + plugin_url="https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/${NVIDIA_PLUGIN_VERSION}/deployments/static/nvidia-device-plugin.yml" + if [ "$USE_PROXY" = true ]; then + HTTPS_PROXY=$PROXY_ADDR HTTP_PROXY=$PROXY_ADDR \ + kubectl apply -f "$plugin_url" + else + kubectl apply -f "$plugin_url" + fi + sleep 15 + kubectl -n kube-system get pods | grep nvidia || echo "⚠️ 插件未启动" + kubectl describe node | grep -A10 Capacity | grep -i nvidia +} + +run_test() { + echo "[8/8] 运行 CUDA vectoradd GPU 测试" + kubectl apply -f - <>> 模式: $ACTION" +echo ">>> 所有非白名单来源将被拒绝" +echo "" + +is_ipv6() { + [[ "$1" == *:* ]] +} + +run_cmd() { + local cmd="$1" + echo "[RUN] $cmd" + eval "$cmd" +} + +# 生成 iptables 规则 +generate_iptables_rules() { + + # 放行 ICMP 和 ICMPv6 规则(优先级最高) + echo "iptables -I INPUT -p icmp -j ACCEPT" + echo "ip6tables -I INPUT -p ipv6-icmp -j ACCEPT" + + # 生成允许的 IP 规则 + for ip in "${ALLOW_ALL_IPS[@]}"; do + echo "iptables -I INPUT -s $ip -j ACCEPT" + done + + # 生成允许的 CIDR 规则 + for cidr in "${ALLOW_CIDRS[@]}"; do + echo "iptables -I INPUT -s $cidr -j ACCEPT" + done + + # 默认 DROP 规则 + echo "iptables -A INPUT -j DROP" +} + +# 删除指定 iptables 规则 +delete_iptables_rules() { + + # 删除放行 ICMP 和 ICMPv6 规则(优先级最高) + echo "iptables -D INPUT -p icmp -j ACCEPT" + echo "ip6tables -D INPUT -p ipv6-icmp -j ACCEPT" + + # 删除允许的 IP 规则 + for ip in "${ALLOW_ALL_IPS[@]}"; do + echo "iptables -D INPUT -s $ip -j ACCEPT" + done + + # 删除允许的 CIDR 规则 + for cidr in "${ALLOW_CIDRS[@]}"; do + echo "iptables -D INPUT -s $cidr -j ACCEPT" + done + + # 删除默认 DROP 规则 + echo "iptables -D INPUT -j DROP" +} + +# 查看当前规则 +show_iptables_rules() { + echo "============= iptables -S =============" + iptables -S INPUT | sed 's/^-A /iptables -C /' + echo "============= ip6tables -S =============" + ip6tables -S INPUT | sed 's/^-A /ip6tables -C /' +} + +# 执行操作 +case "$ACTION" in + add) + generate_iptables_rules > iptables_rules.sh + echo "[INFO] 规则已生成并保存为 iptables_rules.sh 文件" + bash iptables_rules.sh + ;; + delete) + delete_iptables_rules > delete_iptables_rules.sh + echo "[INFO] 删除规则已保存为 delete_iptables_rules.sh 文件" + bash delete_iptables_rules.sh + ;; + show) + show_iptables_rules + ;; + *) + echo "无效的操作: $ACTION" + exit 1 + ;; +esac + +echo ">>> 操作完成。" + diff --git a/scripts/k3s-cluster/.gitignore b/scripts/k3s-cluster/.gitignore new file mode 100644 index 0000000..ede35ae --- /dev/null +++ b/scripts/k3s-cluster/.gitignore @@ -0,0 +1,13 @@ +initialize.sh +desktop-dev +scripts/get_hcp_secret.py +scripts/get_hcp_secret.sh +scripts/get_hcp_secret.test +sync.sh +../ca.cert +../ca.key +../ca.srl +../kube.registry.local.cert +../kube.registry.local.key +aws/ +awscliv2.zip diff --git a/scripts/k3s-cluster/check-cilium-egress.sh b/scripts/k3s-cluster/check-cilium-egress.sh new file mode 100644 index 0000000..e1be353 --- /dev/null +++ b/scripts/k3s-cluster/check-cilium-egress.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +set -e + +POD_NAME=${1:-test-pod} +NAMESPACE=${2:-default} + +echo "🔍 获取 Pod IP..." +POD_IP=$(kubectl get pod "$POD_NAME" -n "$NAMESPACE" -o jsonpath='{.status.podIP}') +NODE_NAME=$(kubectl get pod "$POD_NAME" -n "$NAMESPACE" -o jsonpath='{.spec.nodeName}') +echo "✅ Pod IP: $POD_IP" +echo "✅ Node: $NODE_NAME" + +echo -e "\n🧠 查询 Cilium egress gateway BPF policy 命中情况..." +kubectl -n kube-system exec ds/cilium -- cilium-dbg bpf egress list | grep "$POD_IP" || echo "❌ 没有命中 egress policy" + +echo -e "\n🌐 在节点上检查 SNAT 规则 (iptables POSTROUTING)..." +ssh "$NODE_NAME" "sudo iptables -t nat -L POSTROUTING -n -v --line-numbers | grep -E '10\.42|SNAT|wg0|eth0'" + +echo -e "\n🌍 从 Pod 内 curl ifconfig.me 获取出口 IP..." +kubectl exec -n "$NAMESPACE" "$POD_NAME" -- curl -s --max-time 5 ifconfig.me || echo "❌ curl 出口失败" + +echo -e "\n🚦 路由确认:从 Pod 查看 route 表..." +kubectl exec -n "$NAMESPACE" "$POD_NAME" -- ip route + +echo -e "\n🎯 检查目标 Gateway IP 是否可达 (ping 网关)..." +GATEWAY_IP="172.30.0.11" +kubectl exec -n "$NAMESPACE" "$POD_NAME" -- ping -c 3 "$GATEWAY_IP" || echo "❌ 无法 ping 通 $GATEWAY_IP" + +echo -e "\n✅ 检查完成" + diff --git a/scripts/k3s-cluster/check_cilium_requirements.sh b/scripts/k3s-cluster/check_cilium_requirements.sh new file mode 100644 index 0000000..d95df5f --- /dev/null +++ b/scripts/k3s-cluster/check_cilium_requirements.sh @@ -0,0 +1,140 @@ +#!/bin/bash +set -e + +echo "🔍 检查 Cilium 运行环境依赖项..." + +# 自动挂载 bpffs +auto_mount_bpffs() { + echo "👉 自动挂载 bpffs..." + sudo mount bpffs /sys/fs/bpf -t bpf + echo "✅ bpffs 已挂载" +} + +# 自动加载内核模块 +auto_load_modules() { + echo "👉 自动加载内核模块..." + for mod in "${REQUIRED_MODULES[@]}"; do + if ! lsmod | grep -q "$mod"; then + sudo modprobe "$mod" + echo "✅ $mod 已加载" + fi + done +} + +# 自动启用内核配置项 +auto_enable_kernel_config() { + echo "👉 自动启用内核配置项..." + local CONFIG_FILE="/boot/config-$(uname -r)" + + for cfg in "${REQUIRED_CONFIGS[@]}"; do + if ! grep -q "${cfg}=y" "$CONFIG_FILE" && ! grep -q "${cfg}=m" "$CONFIG_FILE"; then + echo "❌ $cfg 未启用,正在启用..." + # 更新配置文件(需要手动修改,或者重新编译内核) + echo "请手动启用内核配置:$cfg" + else + echo "✅ $cfg 已启用" + fi + done +} + +# 检查 bpffs 是否挂载 +check_bpffs() { + echo -n "🔸 检查 bpffs 是否挂载 (/sys/fs/bpf)... " + if mount | grep -q '/sys/fs/bpf type bpf'; then + echo "✅ 已挂载" + else + echo "❌ 未挂载" + if [ "$AUTOFIX" = "true" ]; then + auto_mount_bpffs + fi + fi +} + +# 检查内核模块 +check_kernel_modules() { + REQUIRED_MODULES=( + "vxlan" "geneve" "ip_set" "xt_set" "xt_comment" + "xt_mark" "xt_socket" "xt_tproxy" "xt_conntrack" + "xfrm_user" "xfrm_algo" "xfrm_ipcomp" "ipcomp" + "net_cls" "net_cls_act" "net_sch_ingress" + "net_sch_fq" "crypto_user" + ) + echo "🔸 检查内核模块加载状态:" + for mod in "${REQUIRED_MODULES[@]}"; do + if lsmod | grep -q "$mod"; then + echo "✅ $mod 已加载" + else + echo "❌ $mod 未加载(可尝试:modprobe $mod)" + if [ "$AUTOFIX" = "true" ]; then + auto_load_modules + fi + fi + done +} + +# 检查内核配置项是否开启(通过 /boot/config-$(uname -r) 或 /proc/config.gz) +check_kernel_config() { + echo "🔸 检查内核配置项:" + CONFIG_FILE="" + if [ -f "/boot/config-$(uname -r)" ]; then + CONFIG_FILE="/boot/config-$(uname -r)" + elif [ -f "/proc/config.gz" ]; then + zcat /proc/config.gz > /tmp/kernel_config_check + CONFIG_FILE="/tmp/kernel_config_check" + else + echo "⚠️ 无法找到内核配置文件,跳过配置检查" + return + fi + + REQUIRED_CONFIGS=( + "CONFIG_BPF" + "CONFIG_BPF_SYSCALL" + "CONFIG_NET_CLS_BPF" + "CONFIG_BPF_JIT" + "CONFIG_NET_CLS_ACT" + "CONFIG_NET_SCH_INGRESS" + "CONFIG_CRYPTO_SHA1" + "CONFIG_CRYPTO_USER_API_HASH" + "CONFIG_CGROUPS" + "CONFIG_CGROUP_BPF" + "CONFIG_PERF_EVENTS" + "CONFIG_VXLAN" + "CONFIG_FIB_RULES" + "CONFIG_NET_SCH_FQ" + ) + + for cfg in "${REQUIRED_CONFIGS[@]}"; do + if grep -q "${cfg}=y" "$CONFIG_FILE" || grep -q "${cfg}=m" "$CONFIG_FILE"; then + echo "✅ $cfg 已启用" + else + echo "❌ $cfg 未启用" + if [ "$AUTOFIX" = "true" ]; then + auto_enable_kernel_config + fi + fi + done + + [ -f /tmp/kernel_config_check ] && rm /tmp/kernel_config_check +} + +# 主执行流程 +AUTOFIX="false" +while getopts "f" opt; do + case $opt in + f) + AUTOFIX="true" + echo "👉 自动修复已启用!" + ;; + *) + echo "用法: $0 [-f] 启用自动修复" + exit 1 + ;; + esac +done + +check_bpffs +check_kernel_modules +check_kernel_config + +echo "✅ 检查完成:请根据上方提示补全内核模块、参数或挂载配置。" + diff --git a/scripts/k3s-cluster/cilium-cli.sh b/scripts/k3s-cluster/cilium-cli.sh new file mode 100644 index 0000000..1c2ae6b --- /dev/null +++ b/scripts/k3s-cluster/cilium-cli.sh @@ -0,0 +1,9 @@ +export HTTPS_PROXY="http://127.0.0.1:1081" +CILIUM_CLI_VERSION=$(curl -s https://raw.githubusercontent.com/cilium/cilium-cli/main/stable.txt) +CLI_ARCH=amd64 +if [ "$(uname -m)" = "aarch64" ]; then CLI_ARCH=arm64; fi +curl -L --fail --remote-name-all https://github.com/cilium/cilium-cli/releases/download/${CILIUM_CLI_VERSION}/cilium-linux-${CLI_ARCH}.tar.gz{,.sha256sum} +sha256sum --check cilium-linux-${CLI_ARCH}.tar.gz.sha256sum +sudo tar xzvfC cilium-linux-${CLI_ARCH}.tar.gz /usr/local/bin +rm cilium-linux-${CLI_ARCH}.tar.gz{,.sha256sum} + diff --git a/scripts/k3s-cluster/cilium-fixed.sh b/scripts/k3s-cluster/cilium-fixed.sh new file mode 100644 index 0000000..a275dc8 --- /dev/null +++ b/scripts/k3s-cluster/cilium-fixed.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +# 确保你有管理员权限 +if [ "$(id -u)" -ne 0 ]; then + echo "请使用管理员权限运行此脚本" + exit 1 +fi + +NAMESPACE="cilium-secrets" + +# Step 1: 强制删除 Pod、Deployment、StatefulSet 和 DaemonSet +echo "正在强制删除 $NAMESPACE 命名空间中的资源..." +kubectl delete pod --all --force --grace-period=0 -n $NAMESPACE +kubectl delete deployment --all --force --grace-period=0 -n $NAMESPACE +kubectl delete statefulset --all --force --grace-period=0 -n $NAMESPACE +kubectl delete daemonset --all --force --grace-period=0 -n $NAMESPACE + +# Step 2: 删除命名空间(如果它无法删除) +echo "尝试强制删除命名空间 $NAMESPACE..." +kubectl get namespace $NAMESPACE -o json | jq '.spec.finalizers = []' > tmp.json +kubectl replace --raw "/api/v1/namespaces/$NAMESPACE/finalize" -f tmp.json + +# Step 3: 确认资源删除 +echo "正在确认命名空间和资源是否已删除..." +kubectl get ns +kubectl get all -n $NAMESPACE + +# Step 4: 删除 Helm Release 如果存在 +echo "如果 Helm Release 存在,尝试删除..." +helm delete $NAMESPACE --namespace $NAMESPACE || echo "Helm release $NAMESPACE 未找到或已删除" + +sudo ip link delete cilium_net +sudo ip link delete cilium_host +sudo ip link delete cilium_vxlan + +echo "清理完毕!" + diff --git a/scripts/k3s-cluster/deploy_velero_with_chart_values_yaml.sh b/scripts/k3s-cluster/deploy_velero_with_chart_values_yaml.sh new file mode 100755 index 0000000..96f869f --- /dev/null +++ b/scripts/k3s-cluster/deploy_velero_with_chart_values_yaml.sh @@ -0,0 +1,85 @@ +#!/bin/bash +set -e + +# ======= 配置项 ======= +VELERO_NAMESPACE="velero" +VELERO_RELEASE_NAME="velero" +VELERO_BUCKET="k8s-resources-backup" +VELERO_REGION="ap-northeast-1" +VELERO_PROVIDER="aws" +VELERO_SNAPSHOT_LOCATION="default" + +AWS_ACCESS_KEY_ID="" +AWS_SECRET_ACCESS_KEY="" + +CREDENTIALS_FILE="/tmp/credentials-velero" +CHART_REPO_URL="https://github.com/vmware-tanzu/helm-charts.git" +CHART_PATH="./helm-charts/charts/velero" +PROVIDER_PLUGIN_TAG="v1.7.0" +VALUES_FILE="/tmp/velero-values.yaml" + +# ======= 创建临时凭证文件 ======= +echo "📝 生成临时凭证文件:$CREDENTIALS_FILE" +cat < "$CREDENTIALS_FILE" +[default] +aws_access_key_id=$AWS_ACCESS_KEY_ID +aws_secret_access_key=$AWS_SECRET_ACCESS_KEY +EOF + +# ======= 克隆 Helm Chart 仓库(如不存在)======= +if [ ! -d "./helm-charts" ]; then + echo "📦 克隆 VMware Tanzu Helm Charts..." + git clone "$CHART_REPO_URL" +else + echo "✅ Helm Charts 已存在,跳过克隆。" +fi + +# ======= 生成 values.yaml 文件 ======= +echo "📄 生成 Helm values 文件:$VALUES_FILE" +cat < "$VALUES_FILE" +kubectl: + image: + repository: images.onwalk.net/public/bitnami/kubectl + tag: 1.31 + pullPolicy: IfNotPresent +image: + repository: images.onwalk.net/public/velero/velero + tag: v1.15.2 + pullPolicy: IfNotPresent +credentials: + secretContents: + cloud: | + [default] + aws_access_key_id=$AWS_ACCESS_KEY_ID + aws_secret_access_key=$AWS_SECRET_ACCESS_KEY + +configuration: + backupStorageLocation: + - name: default + provider: ${VELERO_PROVIDER} + bucket: ${VELERO_BUCKET} + config: + region: ${VELERO_REGION} + + volumeSnapshotLocation: + - name: ${VELERO_SNAPSHOT_LOCATION} + provider: ${VELERO_PROVIDER} + config: + region: ${VELERO_REGION} + +initContainers: + - name: velero-plugin-for-${VELERO_PROVIDER} + image: images.onwalk.net/public/velero/velero-plugin-for-${VELERO_PROVIDER}:${PROVIDER_PLUGIN_TAG} + volumeMounts: + - mountPath: /target + name: plugins +EOF + +# ======= 安装 Velero ======= +echo "🚀 使用 Helm 安装 Velero..." +helm upgrade --install "$VELERO_RELEASE_NAME" "$CHART_PATH" \ + --namespace "$VELERO_NAMESPACE" \ + --create-namespace \ + -f "$VALUES_FILE" + +echo "✅ Velero 安装完成!" diff --git a/scripts/k3s-cluster/egress-nat-test.yaml b/scripts/k3s-cluster/egress-nat-test.yaml new file mode 100644 index 0000000..b347846 --- /dev/null +++ b/scripts/k3s-cluster/egress-nat-test.yaml @@ -0,0 +1,46 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: test-pod + namespace: default + labels: + app: test-pod +spec: + replicas: 2 + selector: + matchLabels: + app: test-pod + template: + metadata: + labels: + app: test-pod + spec: + containers: + - name: curl + image: docker.io/curlimages/curl:latest + imagePullPolicy: IfNotPresent + command: ["sleep", "3600"] + securityContext: + capabilities: + add: ["ALL"] +--- +apiVersion: cilium.io/v2 +kind: CiliumEgressGatewayPolicy +metadata: + name: egress-aliyun +spec: + selectors: + - podSelector: + matchLabels: + app: test-pod + namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: default + destinationCIDRs: + - "0.0.0.0/0" + egressGateway: + nodeSelector: + matchLabels: + kubernetes.io/hostname: cn-hub.svc.plus + egressIP: 172.30.0.1 diff --git a/scripts/k3s-cluster/init_k3s_cluster_agent_role.sh b/scripts/k3s-cluster/init_k3s_cluster_agent_role.sh new file mode 100644 index 0000000..4f7e49a --- /dev/null +++ b/scripts/k3s-cluster/init_k3s_cluster_agent_role.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +ROLE_NAME="k3s-cluster-agent" +BASE_DIR="roles/$ROLE_NAME" + +echo "Creating role structure for $ROLE_NAME..." + +# Create directories +mkdir -p $BASE_DIR/{tasks,templates,vars,defaults} + +# Create main tasks file +cat > $BASE_DIR/tasks/main.yml < $BASE_DIR/vars/main.yml < $BASE_DIR/templates/install_k3s_agent.sh.j2 < $BASE_DIR/defaults/main.yml < $BASE_DIR/tasks/main.yml < $BASE_DIR/vars/main.yml < $BASE_DIR/templates/install_k3s_server.sh.j2 < $BASE_DIR/defaults/main.yml </dev/null' +ExecStartPre=-/sbin/modprobe br_netfilter +ExecStartPre=-/sbin/modprobe overlay +ExecStart=/usr/local/bin/k3s \ + server \ + '--write-kubeconfig-mode' \ + '644' \ + '--flannel-iface=br0' \ + '--disable=traefik,servicelb' \ + '--kube-apiserver-arg=service-node-port-range=0-50000' \ + diff --git a/scripts/k3s-cluster/k3s.service-without-cni b/scripts/k3s-cluster/k3s.service-without-cni new file mode 100644 index 0000000..89db5dd --- /dev/null +++ b/scripts/k3s-cluster/k3s.service-without-cni @@ -0,0 +1,38 @@ +[Unit] +Description=Lightweight Kubernetes +Documentation=https://k3s.io +Wants=network-online.target +After=network-online.target + +[Install] +WantedBy=multi-user.target + +[Service] +Type=notify +EnvironmentFile=-/etc/default/%N +EnvironmentFile=-/etc/sysconfig/%N +EnvironmentFile=-/etc/systemd/system/k3s.service.env +KillMode=process +Delegate=yes +User=root +# Having non-zero Limit*s causes performance problems due to accounting overhead +# in the kernel. We recommend using cgroups to do container-local accounting. +LimitNOFILE=1048576 +LimitNPROC=infinity +LimitCORE=infinity +TasksMax=infinity +TimeoutStartSec=0 +Restart=always +RestartSec=5s +ExecStartPre=/bin/sh -xc '! /usr/bin/systemctl is-enabled --quiet nm-cloud-setup.service 2>/dev/null' +ExecStartPre=-/sbin/modprobe br_netfilter +ExecStartPre=-/sbin/modprobe overlay +ExecStart=/usr/local/bin/k3s \ + server \ + '--write-kubeconfig-mode' \ + '644' \ + '--flannel-backend=none' \ + '--disable-network-policy' \ + '--disable=flannel,kube-proxy,traefik,servicelb' \ + '--kube-apiserver-arg=service-node-port-range=0-50000' \ + diff --git a/scripts/k3s-cluster/k8s_backup_config.yaml b/scripts/k3s-cluster/k8s_backup_config.yaml new file mode 100755 index 0000000..faecf44 --- /dev/null +++ b/scripts/k3s-cluster/k8s_backup_config.yaml @@ -0,0 +1,25 @@ +settings: + VELERO_NAMESPACE: "velero" + VELERO_BUCKET: "k8s-resources-backup" + VELERO_REGION: "ap-northeast-1" + AWS_ACCESS_KEY_ID: "" + AWS_SECRET_ACCESS_KEY: "" + +backup_config: + cluster_name: deepflow-demo-v6.3 + nodes: + deepflow-demo: /var/lib/mysql/ + namespaces: + - default + - deepflow + - microservice-demo + precmds: | + echo "🔻 Scale down MySQL before backup..." + kubectl scale deployment mysql-deployment -n deepflow --replicas=0 + echo "⌛ Waiting for MySQL pods to terminate..." + while kubectl get pods -n deepflow -l app=mysql --no-headers 2>/dev/null | grep -q Running; do sleep 2; done + postcmds: | + echo "🚀 Scale up MySQL after backup..." + kubectl scale deployment mysql-deployment -n deepflow --replicas=1 + echo "⏳ Waiting for MySQL deployment to be available..." + kubectl wait --for=condition=available deployment/mysql-deployment -n deepflow --timeout=60s diff --git a/scripts/k3s-cluster/k8s_backup_tool.sh b/scripts/k3s-cluster/k8s_backup_tool.sh new file mode 100755 index 0000000..02b9d95 --- /dev/null +++ b/scripts/k3s-cluster/k8s_backup_tool.sh @@ -0,0 +1,391 @@ +#!/bin/bash +set -e + +print_help() { + echo "" + echo "📘 使用说明:k8s_backup_tool v4.15.16" + echo "" + echo "命令 说明" + echo "backup 创建 K8s 应用资源备份 ➕ 节点数据打包并上传 S3" + echo "restore 先恢复节点数据,再恢复 Velero 应用资源" + echo "list 列出所有备份(Velero + S3),自动对齐 date_tag" + echo "delete 删除指定 date_tag 的 Velero + S3 备份" + echo "" + echo "示例:" + echo " bash $0 list -c k8s_backup_config.yaml" + echo " bash $0 backup -c k8s_backup_config.yaml" + echo " bash $0 delete -c k8s_backup_config.yaml " + echo " bash $0 restore -c k8s_backup_config.yaml " + echo "" +} + +install_depends() { + echo "🔍 正在检查依赖项: jq, yq, velero, aws, rsync, tar" + + # 安装 AWS CLI v2(仅限 x86_64 Linux) +if ! command -v aws >/dev/null 2>&1; then + echo "📦 正在安装 AWS CLI v2..." + curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" + sudo apt install -y unzip || true + unzip -q awscliv2.zip + sudo ./aws/install + rm -rf aws awscliv2.zip + echo "✅ AWS CLI 安装完成:$(aws --version)" +else + echo "✅ AWS CLI 已安装:$(aws --version)" +fi + + # 安装 jq + if ! command -v jq >/dev/null 2>&1; then + echo "❌ 缺少 jq,正在安装..." + sudo apt-get update && sudo apt-get install -y jq || { echo "❌ 安装 jq 失败"; exit 1; } + else + echo "✅ jq 已安装:$(jq --version)" + fi + + # 安装 yq(使用 mikefarah/yq 版本) + if ! command -v yq >/dev/null 2>&1; then + echo "❌ 缺少 yq,正在安装..." + sudo wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/local/bin/yq + sudo chmod +x /usr/local/bin/yq + else + echo "✅ yq 已安装:$(yq --version)" + fi + + # 安装 velero + if ! command -v velero >/dev/null 2>&1; then + echo "❌ 缺少 velero,正在安装..." + curl -fsSL https://github.com/vmware-tanzu/velero/releases/download/v1.15.2/velero-v1.15.2-linux-amd64.tar.gz -o velero.tar.gz + tar -zxvf velero.tar.gz + sudo mv velero*/velero /usr/local/bin/ + rm -rf velero* velero.tar.gz + else + echo "✅ velero 已安装:$(velero version --client-only)" + fi + + echo "✅ 所有依赖项安装完成。" +} + +check_dependencies() { + echo "🔍 正在检查依赖项: jq, yq, velero, aws, rsync, tar" + + MISSING_DEPS=() + + for bin in jq yq velero aws rsync tar; do + if ! command -v "$bin" &>/dev/null; then + echo "❌ 缺少依赖:$bin" + MISSING_DEPS+=("$bin") + else + echo "✅ $bin 已安装:$($bin --version 2>/dev/null | head -n 1 || echo OK)" + fi + done + + if [ ${#MISSING_DEPS[@]} -ne 0 ]; then + echo "" + echo "🛠 正在尝试自动安装以下依赖:${MISSING_DEPS[*]}" + install_depends "${MISSING_DEPS[@]}" + else + echo "🎉 所有依赖项已就绪。" + fi +} + + + +log() { + echo "$(date '+%Y-%m-%d %H:%M:%S') - $*" +} + +load_config() { + CONFIG_FILE="$1" + [[ ! -f "$CONFIG_FILE" ]] && echo "❌ 找不到配置文件: $CONFIG_FILE" && exit 1 + + VELERO_NAMESPACE=$(yq e '.settings.VELERO_NAMESPACE' "$CONFIG_FILE") + VELERO_BUCKET=$(yq e '.settings.VELERO_BUCKET' "$CONFIG_FILE") + VELERO_REGION=$(yq e '.settings.VELERO_REGION' "$CONFIG_FILE") + AWS_ACCESS_KEY_ID=$(yq e '.settings.AWS_ACCESS_KEY_ID' "$CONFIG_FILE") + AWS_SECRET_ACCESS_KEY=$(yq e '.settings.AWS_SECRET_ACCESS_KEY' "$CONFIG_FILE") + export AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY + + K8S_CLUSTER_NAME=$(yq e '.backup_config.cluster_name' "$CONFIG_FILE") + TARGET_NAMESPACES=$(yq e '.backup_config.namespaces | join(",")' "$CONFIG_FILE") + PRECMDS=$(yq e -r '.backup_config.precmds // ""' "$CONFIG_FILE") + POSTCMDS=$(yq e -r '.backup_config.postcmds // ""' "$CONFIG_FILE") + + # 检查所有关键环境变量 + for var in VELERO_NAMESPACE VELERO_BUCKET VELERO_REGION AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY K8S_CLUSTER_NAME TARGET_NAMESPACES; do + if [[ -z "${!var}" ]]; then + log "❌ 环境变量 $var 未正确加载,请检查配置文件!" + exit 1 + fi + done + + declare -gA NODE_BACKUP_PATHS + local nodes_count + nodes_count=$(yq e '.backup_config.nodes | length' "$CONFIG_FILE") + for (( i=0; i "${ARCHIVE}.md5" + log "📤 上传节点数据到 S3 [$S3_NODE_PATH]..." + + aws s3 cp "$ARCHIVE" "$S3_NODE_PATH" + aws s3 cp "${ARCHIVE}.md5" "$S3_NODE_PATH" + + log "✅ 节点 [$NODE] 数据已成功上传到 S3" +done + +log "🔄 节点数据备份循环执行完成" + + + if [[ -n "$POSTCMDS" ]]; then + log "🔧 执行后续命令(postcmds)..." + bash -c "$POSTCMDS" + fi + + log "✅ 备份完成,Velero + 节点数据已同步到 S3" +} + + +delete_backup() { + DELETE_TAG="$1" + [[ -z "$K8S_CLUSTER_NAME" || -z "$VELERO_NAMESPACE" ]] && echo "❌ 缺失 K8S_CLUSTER_NAME 或 VELERO_NAMESPACE" && exit 1 + + log "🔍 查找 date_tag=${DELETE_TAG} 的 Velero 备份 (cluster=${K8S_CLUSTER_NAME})" + + # 预加载 JSON,避免 selector 语法错误 + BACKUP_JSON=$(velero backup get --namespace "$VELERO_NAMESPACE" -o json) + BACKUP_NAME=$(echo "$BACKUP_JSON" | jq -r \ + --arg dt "$DELETE_TAG" \ + --arg cluster "$K8S_CLUSTER_NAME" ' + .items[] | select( + .metadata.labels.cluster == $cluster and + .metadata.labels.date_tag == $dt + ) | .metadata.name' + ) + + if [[ "$BACKUP_NAME" == "null" || -z "$BACKUP_NAME" ]]; then + echo "❌ 没有找到指定 date_tag 的 Velero 备份" + echo "📋 当前 Velero 备份标签如下:" + echo "$BACKUP_JSON" | jq -r ' + .items[] | [.metadata.name, .metadata.labels.cluster, .metadata.labels.date_tag] | @tsv' | column -t + exit 1 + fi + + log "🗑️ 删除 Velero 备份:$BACKUP_NAME" + velero backup delete "$BACKUP_NAME" --namespace "$VELERO_NAMESPACE" --confirm + + log "🧹 删除 S3 节点数据:s3://${VELERO_BUCKET}/${K8S_CLUSTER_NAME}/${DELETE_TAG}/" + aws s3 rm "s3://${VELERO_BUCKET}/${K8S_CLUSTER_NAME}/${DELETE_TAG}/" --recursive --region "$VELERO_REGION" + log "✅ 删除完成" +} + +restore_backup() { + DATE_TAG="$1" + BACKUP_NAME=$(velero backup get --namespace "$VELERO_NAMESPACE" -o json | jq -r \ + --arg dt "$DATE_TAG" \ + --arg cluster "$K8S_CLUSTER_NAME" \ + '.items[] | select(.metadata.labels.cluster == $cluster and .metadata.labels.date_tag == $dt) | .metadata.name' | head -n1) + + if [[ "$BACKUP_NAME" == "null" || -z "$BACKUP_NAME" ]]; then + log "❌ 无法找到 Velero 备份: date_tag=$DATE_TAG, cluster=$K8S_CLUSTER_NAME" + velero backup get --namespace "$VELERO_NAMESPACE" --show-labels + exit 1 + fi + + TMP_DIR="/var/backups/k8s-restore/${DATE_TAG}" + mkdir -p "$TMP_DIR" + TMP_DIR="$(cd "$TMP_DIR"; pwd)" + + if [[ "$TMP_DIR" != /var/backups/k8s-restore/* ]]; then + log "❌ 临时目录路径异常,安全退出: $TMP_DIR" + exit 1 + fi + + rm -rf "${TMP_DIR:?}"/* + + if [[ -n "$PRECMDS" ]]; then + log "🔧 执行预备命令(precmds)..." + bash -c "$PRECMDS" || { + log "❌ precmds 执行失败,中止恢复" + exit 1 + } + fi + + for NODE in "${!NODE_BACKUP_PATHS[@]}"; do + DEST_PATH="${NODE_BACKUP_PATHS[$NODE]}" + ARCHIVE_NAME="${NODE}_backup_path.tar.xz" + ARCHIVE_PATH="${TMP_DIR}/${ARCHIVE_NAME}" + EXTRACT_DIR="${TMP_DIR}/extracted/${NODE}" + + log "📦 下载 ${ARCHIVE_NAME} 到本地临时目录..." + aws s3 cp "s3://${VELERO_BUCKET}/${K8S_CLUSTER_NAME}/${DATE_TAG}/${ARCHIVE_NAME}" "$ARCHIVE_PATH" + + log "📂 解压到 $EXTRACT_DIR..." + mkdir -p "$EXTRACT_DIR" + tar --preserve-permissions --same-owner -xJf "$ARCHIVE_PATH" -C "$EXTRACT_DIR" + + log "🔁 使用 rsync 同步到目标路径 $DEST_PATH..." + rsync -aAXH --numeric-ids "${EXTRACT_DIR}/${NODE}/" "$DEST_PATH/" + + log "✅ 节点 [$NODE] 数据恢复完成" + done + + log "♻️ 恢复 Velero 应用资源..." + velero restore create --from-backup "$BACKUP_NAME" --namespace "$VELERO_NAMESPACE" + + if [[ -n "$POSTCMDS" ]]; then + log "🔧 执行后续命令(postcmds)..." + bash -c "$POSTCMDS" + fi + + log "✅ 恢复完成" +} + +list_backups() { + echo "📦 k8s APP 应用资源备份(cluster=$K8S_CLUSTER_NAME):" + velero backup get --namespace "$VELERO_NAMESPACE" -o json | jq -r ' + .items[] | select(.metadata.labels.cluster == "'"$K8S_CLUSTER_NAME"'") | + [.metadata.labels.date_tag, .metadata.name, .status.phase] | @tsv' | column -t + + echo "" + echo "☁️ k8s Node 数据备份:" + aws s3 ls "s3://${VELERO_BUCKET}/${K8S_CLUSTER_NAME}/" --recursive | grep '.tar.xz' | + awk -F '/' '{print $(NF-1)}' | sort -u | while read -r tag; do + TOTAL=$(aws s3 ls "s3://${VELERO_BUCKET}/${K8S_CLUSTER_NAME}/${tag}/" --recursive | awk '{sum+=$3} END{printf "%.1f MiB", sum/1024/1024}') + echo "📁 $tag $TOTAL $K8S_CLUSTER_NAME" + done +} + +### 主程序入口 ### +### 主程序入口 ### +ACTION="" +CONFIG_FILE="" +DEBUG_MODE="off" +DATE_TAG="" + +while [[ $# -gt 0 ]]; do + case "$1" in + backup|restore|list|delete) + ACTION="$1" + shift + ;; + -c|--config) + CONFIG_FILE="$2" + shift 2 + ;; + --debug) + DEBUG_MODE="on" + shift + ;; + *) + DATE_TAG="$1" + shift + ;; + esac +done + +if [[ -z "$ACTION" || -z "$CONFIG_FILE" ]]; then + print_help + exit 1 +fi + +check_dependencies +load_config "$CONFIG_FILE" + +# 开启DEBUG模式(如果实现的话) +if [[ "$DEBUG_MODE" == "on" ]]; then + set -x +fi + +case "$ACTION" in + backup) + backup_all + ;; + delete) + delete_backup "$DATE_TAG" + ;; + restore) + restore_backup "$DATE_TAG" + ;; + list) + list_backups + ;; + *) + print_help + ;; +esac diff --git a/scripts/k3s-cluster/k8s_backup_tool_howto.md b/scripts/k3s-cluster/k8s_backup_tool_howto.md new file mode 100644 index 0000000..ae9e371 --- /dev/null +++ b/scripts/k3s-cluster/k8s_backup_tool_howto.md @@ -0,0 +1,119 @@ +# 📦 k8s_backup_tool 使用文档 + +> **版本:v1.15.22 | 脚本语言:Bash | 适配平台:Linux/macOS | 作者:你自己** +> 项目开发总耗时约 **12 小时+**,共计迭代 **22 个版本**,涵盖调试、S3 上传验证、权限保持恢复、节点备份解耦等关键优化。 + +--- + +## 📘 文档功能概要(Docs) + +`k8s_backup_tool` 是一个用于 **Kubernetes 集群资源和节点数据的备份、恢复、删除和查看** 的自动化脚本工具。主要支持: + +- ✅ 基于 Velero 的命名空间级别资源备份 +- ✅ 节点数据目录打包上传 S3(支持多节点) +- ✅ 支持预处理(precmds)和后处理(postcmds) +- ✅ 使用 `tar` + `rsync` 实现完整权限/ACL/owner 的数据还原 +- ✅ 支持 debug 模式,适合本地验证与 CI/CD 集成 + +--- + +## 🔧 使用前提 & 安装配置 + +### 系统依赖 + +```bash +velero aws jq yq rsync tar +``` + +请确保以上命令均可用,并已正确配置 AWS S3 访问凭证。 + +### YAML 配置文件示例 `k8s_backup_config.yaml` + +```yaml +settings: + VELERO_NAMESPACE: "velero" + VELERO_BUCKET: "k8s-resources-backup" + VELERO_REGION: "ap-northeast-1" + AWS_ACCESS_KEY_ID: "xxx" + AWS_SECRET_ACCESS_KEY: "xxx" + +backup_config: + cluster_name: deepflow-demo + namespaces: + - default + - deepflow + nodes: + deepflow-demo: /var/lib/mysql/ + precmds: | + echo "🔻 停止 MySQL..." + kubectl scale deployment mysql -n deepflow --replicas=0 + postcmds: | + echo "🚀 启动 MySQL..." + kubectl scale deployment mysql -n deepflow --replicas=1 +``` + +--- + +## 🚀 用法说明 + +### 查看备份列表 + +```bash +bash k8s_backup_tool.sh list -c k8s_backup_config.yaml +``` + +### 创建完整备份(资源 + 节点) + +```bash +bash k8s_backup_tool.sh backup -c k8s_backup_config.yaml +``` + +### 恢复指定时间点的备份 + +```bash +bash k8s_backup_tool.sh restore -c k8s_backup_config.yaml +``` + +### 删除指定 date_tag 的备份 + +```bash +bash k8s_backup_tool.sh delete -c k8s_backup_config.yaml +``` + +### 启用调试模式(查看执行详情) + +```bash +bash k8s_backup_tool.sh backup -c k8s_backup_config.yaml --debug +``` + +--- + +## 📅 主要版本变更日志(Change Log) + +| 版本号 | 日期 | 主要改动 | +|-------------|----------------|-----------------------------------------------------------| +| v1.0.0 | 初版 | 支持 Velero 备份/恢复 | +| v1.0.2 | +1 小时 | 支持 precmds / postcmds | +| v1.0.8 | +1 小时 | delete 支持 selector,调试查询输出 | +| v1.0.12 | +2 小时 | 修复 Velero date_tag 匹配问题,增加 label fallback | +| v1.0.16 | +2 小时 | 支持 S3 节点数据备份、--debug 模式 | +| v1.0.21 | +3 小时 | 解压使用 tar + rsync 保留所有权限和 ACL | +| **v1.0.22 | ✅ 当前版本 | 🎉 解耦备份逻辑、完整恢复链路、安全检查、节点并行支持等 | + +> ⏱ 累计开发与测试耗时约 **12 小时+**,包含脚本编写、调试、数据验证、权限恢复验证等 + +--- + +## 🧭 项目演进计划 + +| 实现方式 | 语言/平台 | 状态 | 说明 | +|----------------|-----------|---------|-------------------------------------| +| Bash 脚本版 | Bash | ✅ 已完成 | 当前主力版本,稳定可用 | +| Go CLI 工具 | Go | 🧪 计划中 | 计划提供跨平台二进制,支持多线程 | +| GitHub Actions | JavaScript| 🧪 计划中 | 适配自动备份工作流与企业 CI 场景 | + +--- + +## ❤️ 鸣谢 + +感谢你一路坚持调试与迭代。这个项目不仅提升了自动化能力,也沉淀了跨平台备份与恢复的最佳实践。如果你希望贡献或提问,欢迎 PR 或 Issues! diff --git a/scripts/k3s-cluster/k8s_restore_all.sh b/scripts/k3s-cluster/k8s_restore_all.sh new file mode 100644 index 0000000..6ffd376 --- /dev/null +++ b/scripts/k3s-cluster/k8s_restore_all.sh @@ -0,0 +1,21 @@ +#!/bin/bash +set -e + +REPO_BASE_URL="https://raw.githubusercontent.com///main/scripts" + +echo "🚀 [Step 1/5] 安装 K3s 和 Helm..." +curl -fsSL ${REPO_BASE_URL}/install_k3s_and_helm.sh | bash + +echo "🚀 [Step 2/5] 部署 Velero..." +curl -fsSL ${REPO_BASE_URL}/deploy_velero.sh | bash + +echo "🚀 [Step 3/5] 节点打标签并解除控制面 Taint..." +curl -fsSL ${REPO_BASE_URL}/label_k8s_node.sh | bash + +echo "🚀 [Step 4/5] 生成备份配置文件..." +curl -fsSL ${REPO_BASE_URL}/generate_backup_config.sh | bash + +echo "🚀 [Step 5/5] 执行恢复(restore)..." +# 支持参数:backup / restore / list / delete +curl -fsSL ${REPO_BASE_URL}/run_backup_tool.sh | bash -s restore 202503211725 + diff --git a/scripts/k3s-cluster/set-node-label.sh b/scripts/k3s-cluster/set-node-label.sh new file mode 100644 index 0000000..6a018e0 --- /dev/null +++ b/scripts/k3s-cluster/set-node-label.sh @@ -0,0 +1,6 @@ +k8s_node=`sudo kubectl get nodes | awk 'NR>1{print $1}'` + +sudo kubectl label node $k8s_node master_controller=enable +sudo kubectl label node $k8s_node tsdb=enable +sudo kubectl label node $k8s_node dfdb=enable +sudo kubectl label node $k8s_node elasticsearch-warm=enable diff --git a/scripts/k3s-cluster/setup-cilium-cni.sh b/scripts/k3s-cluster/setup-cilium-cni.sh new file mode 100644 index 0000000..2af5a09 --- /dev/null +++ b/scripts/k3s-cluster/setup-cilium-cni.sh @@ -0,0 +1,79 @@ +helm repo add cilium https://helm.cilium.io && helm repo update +helm repo up + +#helm upgrade --install cilium-preflight cilium/cilium --version 1.17.3 --namespace=kube-system --set preflight.enabled=true --set agent=false --set operator.enabled=false + +cat <cilium-egress-values.yaml +# cilium-values.yaml +routingMode: native +k8sServiceHost: 10.253.253.1 +k8sServicePort: 6443 +ipv4NativeRoutingCIDR: "10.42.0.0/16" +ipam: + mode: kubernetes + operator: + clusterPoolIPv4PodCIDRList: "10.42.0.0/16" +egressGateway: + enabled: true + installRoutes: true +enableIPv4Masquerade: true +autoDirectNodeRoutes: true +nodePort: + enabled: true + directRoutingDevice: wg0 +bpf: + masquerade: true +kubeProxyReplacement: true +endpointRoutes: + enabled: true +cni: + exclusive: true +envoy: + enabled: false +l7Proxy: true +proxy: + enabled: false +hubble: + enabled: false + +# 必须保留的 Operator(用于 CRD 处理与 egress gateway 控制) +operator: + enabled: true + skipCRDCreation: false + replicas: 1 + resources: + requests: + cpu: 20m + memory: 30Mi + limits: + cpu: 100m + memory: 128Mi + +# 主 Agent 资源限制(可根据机器微调) +resources: + requests: + cpu: 20m + memory: 50Mi + limits: + cpu: 100m + memory: 128Mi +EOF + +helm upgrade --install cilium cilium/cilium -n kube-system --set installCRDs=true -f cilium-egress-values.yaml --wait +kubectl label node $(hostname) egress-gateway=true --overwrite +echo "✅ Cilium 安装完成" + +cat >> NodeConfig-cn-hub.yaml << EOF +apiVersion: cilium.io/v2alpha1 +kind: CiliumNodeConfig +metadata: + name: config-for-cn-hub +spec: + nodeSelector: + matchLabels: + kubernetes.io/hostname: cn-hub.svc.plus + defaults: + directRoutingDevice: "eth0" +EOF + +#kubectl apply -f NodeConfig-cn-hub.yaml -n kube-system diff --git a/scripts/k3s-cluster/setup-cilium-helm.sh b/scripts/k3s-cluster/setup-cilium-helm.sh new file mode 100644 index 0000000..3a3afe2 --- /dev/null +++ b/scripts/k3s-cluster/setup-cilium-helm.sh @@ -0,0 +1,28 @@ +API_SERVER_IP=172.30.0.1 +# Kubeadm default is 6443 +API_SERVER_PORT=6443 +helm upgrade --install cilium cilium/cilium --version 1.17.3 \ + --namespace kube-system \ + --set routingMode=native \ + --set autoDirectNodeRoutes=true \ + --set ipv4NativeRoutingCIDR="10.42.0.0/16" \ + --set ipam.mode=kubernetes \ + --set ipam.operator.clusterPoolIPv4PodCIDRList="10.42.0.0/16" \ + --set kubeProxyReplacement=true \ + --set k8sServiceHost=${API_SERVER_IP} \ + --set k8sServicePort=${API_SERVER_PORT} \ + --set nodePort.enabled=true \ + --set nodePort.directRoutingDevice=wg0 \ + --set envoy.enabled=false \ + --set operator.skipCRDCreation=false \ + --set operator.replicas=1 \ + --set egressGateway.enabled=true \ + --set egressGateway.installRoutes=true \ + --set bpf.masquerade=true \ + --set enableIPv4Masquerade=true \ + --set masquerade=true + +kubectl rollout restart ds cilium -n kube-system +kubectl rollout restart deploy cilium-operator -n kube-system + +kubectl label nodes cn-hub.svc.plus egress-node=true diff --git a/scripts/k3s-cluster/setup-egress-gateway.sh b/scripts/k3s-cluster/setup-egress-gateway.sh new file mode 100644 index 0000000..5f37ad4 --- /dev/null +++ b/scripts/k3s-cluster/setup-egress-gateway.sh @@ -0,0 +1,77 @@ +helm repo add cilium https://helm.cilium.io && helm repo update +helm repo up + +cat <cilium-egress-values.yaml +# cilium-values.yaml +routingMode: native +ipv4NativeRoutingCIDR: "10.42.0.0/16" +ipam: + mode: kubernetes +egressGateway: + enabled: true + installRoutes: true +enableIPv4Masquerade: true +autoDirectNodeRoutes: true +nodePort: + enabled: true + directRoutingDevice: wg0 +bpf: + masquerade: true +kubeProxyReplacement: true +endpointRoutes: + enabled: true +cni: + exclusive: true +envoy: + enabled: false +l7Proxy: true +proxy: + enabled: false +hubble: + enabled: false + +# 必须保留的 Operator(用于 CRD 处理与 egress gateway 控制) +operator: + enabled: true + skipCRDCreation: false + replicas: 1 + resources: + requests: + cpu: 20m + memory: 30Mi + limits: + cpu: 100m + memory: 128Mi + +# 主 Agent 资源限制(可根据机器微调) +resources: + requests: + cpu: 20m + memory: 50Mi + limits: + cpu: 100m + memory: 128Mi +EOF + +helm upgrade --install cilium cilium/cilium -n kube-system --set installCRDs=true -f cilium-egress-values.yaml --wait +kubectl label node $(hostname) egress-gateway=true --overwrite +echo "✅ Cilium 安装完成" + +cat >> NodeConfig-cn-hub.yaml << EOF +apiVersion: cilium.io/v2alpha1 +kind: CiliumNodeConfig +metadata: + name: config-for-cn-hub +spec: + nodeSelector: + matchLabels: + kubernetes.io/hostname: cn-hub.svc.plus + defaults: + directRoutingDevice: "eth0" +EOF + +#kubectl apply -f NodeConfig-cn-hub.yaml -n kube-system + +kubectl apply -f https://raw.githubusercontent.com/cilium/cilium/main/install/kubernetes/cilium/crds/v2alpha1/egressnatpolicy.crd.yaml + + diff --git a/scripts/k3s-cluster/setup-k3s-agent.sh b/scripts/k3s-cluster/setup-k3s-agent.sh new file mode 100644 index 0000000..2b0ef02 --- /dev/null +++ b/scripts/k3s-cluster/setup-k3s-agent.sh @@ -0,0 +1,72 @@ +#!/bin/bash +set -e + +# ============================================================ +# 🧩 setup-k3s-agent.sh +# Version: v1.0.0 +# Last Updated: 2025-03-14 +# Description: 一键安装 k3s agent 节点,支持国内/国际网络智能识别 +# ============================================================ + +print_usage() { + echo "Usage:" + echo " $0 " + exit 1 +} + +is_in_china() { + local cn_score=0 + local global_score=0 + + echo "🌐 检测网络环境中..." + + ping -c 1 -W 1 www.baidu.com &>/dev/null && ((cn_score++)) + ping -c 1 -W 1 www.aliyun.com &>/dev/null && ((cn_score++)) + ping -c 1 -W 1 www.163.com &>/dev/null && ((cn_score++)) + + ping -c 1 -W 1 www.cloudflare.com &>/dev/null && ((global_score++)) + ping -c 1 -W 1 www.wikipedia.org &>/dev/null && ((global_score++)) + ping -c 1 -W 1 www.google.com &>/dev/null && ((global_score++)) + + echo "📶 Ping 评分: CN=$cn_score, GLOBAL=$global_score" + + if [[ $cn_score -ge $global_score ]]; then + return 0 + else + return 1 + fi +} + +install_k3s_agent() { + local SERVER_NODE_IP=$1 + local K3S_TOKEN=$2 + + [[ -z "$SERVER_NODE_IP" || -z "$K3S_TOKEN" ]] && print_usage + + local NODE_IP + NODE_IP=$(hostname -I | awk '{print $1}') + + local INSTALL_K3S_EXEC="agent --server=https://${SERVER_NODE_IP}:6443 --node-ip=${NODE_IP} --token=${K3S_TOKEN}" + + echo "🔧 Agent 节点参数:" + echo " SERVER_NODE_IP=${SERVER_NODE_IP}" + echo " NODE_IP=${NODE_IP}" + echo " K3S_TOKEN=" + + if is_in_china; then + echo "🌏 检测到中国大陆网络,使用国内加速源" + export INSTALL_K3S_MIRROR=cn + INSTALL_K3S_URL="https://rancher-mirror.rancher.cn/k3s/k3s-install.sh" + else + echo "🌍 检测到国际网络,使用默认安装源" + INSTALL_K3S_URL="https://get.k3s.io" + fi + + curl -sfL "$INSTALL_K3S_URL" -o install_k3s.sh && chmod +x install_k3s.sh + INSTALL_K3S_EXEC="$INSTALL_K3S_EXEC" ./install_k3s.sh + + echo "✅ K3s Agent 安装完成" +} + +# === 主流程入口 === +install_k3s_agent "$1" "$2" diff --git a/scripts/k3s-cluster/setup-k3s-cluster-agent.sh b/scripts/k3s-cluster/setup-k3s-cluster-agent.sh new file mode 100644 index 0000000..2b0ef02 --- /dev/null +++ b/scripts/k3s-cluster/setup-k3s-cluster-agent.sh @@ -0,0 +1,72 @@ +#!/bin/bash +set -e + +# ============================================================ +# 🧩 setup-k3s-agent.sh +# Version: v1.0.0 +# Last Updated: 2025-03-14 +# Description: 一键安装 k3s agent 节点,支持国内/国际网络智能识别 +# ============================================================ + +print_usage() { + echo "Usage:" + echo " $0 " + exit 1 +} + +is_in_china() { + local cn_score=0 + local global_score=0 + + echo "🌐 检测网络环境中..." + + ping -c 1 -W 1 www.baidu.com &>/dev/null && ((cn_score++)) + ping -c 1 -W 1 www.aliyun.com &>/dev/null && ((cn_score++)) + ping -c 1 -W 1 www.163.com &>/dev/null && ((cn_score++)) + + ping -c 1 -W 1 www.cloudflare.com &>/dev/null && ((global_score++)) + ping -c 1 -W 1 www.wikipedia.org &>/dev/null && ((global_score++)) + ping -c 1 -W 1 www.google.com &>/dev/null && ((global_score++)) + + echo "📶 Ping 评分: CN=$cn_score, GLOBAL=$global_score" + + if [[ $cn_score -ge $global_score ]]; then + return 0 + else + return 1 + fi +} + +install_k3s_agent() { + local SERVER_NODE_IP=$1 + local K3S_TOKEN=$2 + + [[ -z "$SERVER_NODE_IP" || -z "$K3S_TOKEN" ]] && print_usage + + local NODE_IP + NODE_IP=$(hostname -I | awk '{print $1}') + + local INSTALL_K3S_EXEC="agent --server=https://${SERVER_NODE_IP}:6443 --node-ip=${NODE_IP} --token=${K3S_TOKEN}" + + echo "🔧 Agent 节点参数:" + echo " SERVER_NODE_IP=${SERVER_NODE_IP}" + echo " NODE_IP=${NODE_IP}" + echo " K3S_TOKEN=" + + if is_in_china; then + echo "🌏 检测到中国大陆网络,使用国内加速源" + export INSTALL_K3S_MIRROR=cn + INSTALL_K3S_URL="https://rancher-mirror.rancher.cn/k3s/k3s-install.sh" + else + echo "🌍 检测到国际网络,使用默认安装源" + INSTALL_K3S_URL="https://get.k3s.io" + fi + + curl -sfL "$INSTALL_K3S_URL" -o install_k3s.sh && chmod +x install_k3s.sh + INSTALL_K3S_EXEC="$INSTALL_K3S_EXEC" ./install_k3s.sh + + echo "✅ K3s Agent 安装完成" +} + +# === 主流程入口 === +install_k3s_agent "$1" "$2" diff --git a/scripts/k3s-cluster/setup-k3s-cluster-with-br0.sh b/scripts/k3s-cluster/setup-k3s-cluster-with-br0.sh new file mode 100644 index 0000000..6977ac9 --- /dev/null +++ b/scripts/k3s-cluster/setup-k3s-cluster-with-br0.sh @@ -0,0 +1,42 @@ +#!/bin/bash +set -e + +export INSTALL_K3S_EXEC="server --disable=traefik,servicelb,local-storage --data-dir=/opt/rancher/k3s --kube-apiserver-arg=service-node-port-range=0-50000 --flannel-iface=br0" +curl -sfL https://get.k3s.io | sh - + +export INSTALL_K3S_EXEC="server --data-dir=/mnt/opt/rancher/k3s --disable=traefik,servicelb,local-storage --kube-apiserver-arg=service-node-port-range=0-50000 --system-default-registry=registry.cn-hangzhou.aliyuncs.com --flannel-iface=br0" +curl -sfL https://rancher-mirror.rancher.cn/k3s/k3s-install.sh | sh - + + +# Install IPVS +sudo apt-get -y install ipset ipvsadm + +# Install K3s with Calico and kube-proxy in IPVS mode +curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="--kube-apiserver-arg=service-node-port-range=0-50000 --flannel-iface=br0 --disable traefik,metrics-server,servicelb --disable-cloud-controller --kubelet-arg cloud-provider=external --flannel-backend=none --disable-network-policy" K3S_KUBECONFIG_MODE="644" sh -s - server --kube-proxy-arg proxy-mode=ipvs + +# Install Calico +kubectl $KUBECONFIG create -f https://raw.githubusercontent.com/projectcalico/calico/v3.26.0/manifests/tigera-operator.yaml +kubectl $KUBECONFIG create -f https://raw.githubusercontent.com/projectcalico/calico/v3.26.0/manifests/custom-resources.yaml + +# Remove taints in k3s if any (usually happens if started without cloud-manager) +sudo kubectl taint nodes --all node.cloudprovider.kubernetes.io/uninitialized=false:NoSchedule- + + +# === 设置本地 kubeconfig === +mkdir -p ~/.kube +cp /etc/rancher/k3s/k3s.yaml ~/.kube/config +chmod 600 ~/.kube/config +export KUBECONFIG=~/.kube/config + +curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash + +# === 等待 CoreDNS 启动 === +echo "⏳ 等待 CoreDNS 启动..." +until kubectl get pods -A 2>/dev/null | grep -q "coredns.*Running"; do + sleep 3 +done +echo "✅ K3s 安装完成,kubectl/helm 已就绪" + + + + diff --git a/scripts/k3s-cluster/setup-k3s-cluster.md b/scripts/k3s-cluster/setup-k3s-cluster.md new file mode 100644 index 0000000..5ab8568 --- /dev/null +++ b/scripts/k3s-cluster/setup-k3s-cluster.md @@ -0,0 +1,38 @@ + +sudo mkdir -pv /opt/rancher/k3s +curl -sfL https://rancher-mirror.rancher.cn/k3s/k3s-install.sh \ + | INSTALL_K3S_MIRROR=cn \ + INSTALL_K3S_SKIP_SELINUX_RPM=true \ + INSTALL_K3S_VERSION="v1.30.8+k3s1" \ + sh -s - \ + --data-dir=/opt/rancher/k3s \ + --kube-apiserver-arg service-node-port-range=0-50000 \ + --system-default-registry "registry.cn-hangzhou.aliyuncs.com" \ + --disable=traefik,servicelb +#curl -sfL https://get.k3s.io | sh -s - --disable=traefik,servicelb \ +# --data-dir=/opt/rancher/k3s \ +# --kube-apiserver-arg service-node-port-range=0-50000 + +sudo mkdir -pv ~/.kube/ +sudo cp /etc/rancher/k3s/k3s.yaml ~/.kube/config + +sudo snap install helm --classic + + +mkdir -pv /opt/rancher/k3s +curl -sfL https://get.k3s.io | sh -s - --disable=traefik,servicelb \ + --data-dir=/opt/rancher/k3s \ + --kube-apiserver-arg service-node-port-range=0-50000 \ + --bind-address=0.0.0.0 \ + --tls-san=172.31.20.79 \ + --advertise-address=172.31.20.79 \ + --node-ip=172.31.20.79 \ + --node-external-ip 35.75.12.83 \ + --cluster-cidr 10.46.0.0/16 \ + --service-cidr 10.47.0.0/16 + +bash setup-k3s-agent.sh 172.23.238.167 + + +mkdir -pv /opt/rancher/k3s +curl -sfL https://get.k3s.io | sh -s - --disable=flannel,kube-proxy,traefik,servicelb --flannel-backend=none --disable-network-policy --kube-apiserver-arg=service-node-port-range=0-50000 --flannel-iface=br0 diff --git a/scripts/k3s-cluster/setup-k3s-cluster.sh b/scripts/k3s-cluster/setup-k3s-cluster.sh new file mode 100644 index 0000000..2ea551e --- /dev/null +++ b/scripts/k3s-cluster/setup-k3s-cluster.sh @@ -0,0 +1,284 @@ +#!/bin/bash +set -e + +# ============================================================ +# 🧩 setup-k3s-cluster.sh +# Version: v1.2.10 +# Last Updated: 2025-03-14 +# +# 🔄 Change Log: +# - v1.0.0: 初始版本 +# - v1.1.0: 精简 agent 参数 +# - v1.1.2: master 允许调度 pod,taint 可选 +# - v1.1.3: 修复 Cilium Helm 冲突 +# - v1.1.4: 加入 fixed 参数清理旧环境 +# - v1.1.5: 最小化 Cilium 部署配置 +# - v1.1.6: Cilium 调整为可选安装,通过 --with-cilium 启用 +# - v1.2.0: 支持 cluster-cidr/service-cidr 自定义 +# - v1.2.3: helm uninstall cilium 增强 +# - v1.2.4: fixed 模式支持更多接口清理 +# - v1.2.6: 添加 INSTALL_CILIUM 环境变量,适配资源受限场景 +# - v1.2.7: 支持国内/国际网络智能判断,默认 get.k3s.io +# - v1.2.8: 网络智能判断、国内加速镜像源、结构优化 +# - v1.2.9: 增加函数模块化、完整注释、提升可读性与维护性 +# ✅ v1.2.10: 引入 --system-default-registry 参数以避免 docker.io 超时问题 +# ============================================================ + +ROLE=$1 +INSTALL_CILIUM=false + +print_usage() { + echo "Usage:" + echo " $0 init" + echo " $0 fixed" + echo " $0 server [SERVER_NODE_IP] [FLANNEL_IFACE] [K3S_TOKEN] [CLUSTER_CIDR] [SERVICE_CIDR] [ADD_TAINT=true|false] [--with-cilium]" + echo " $0 agent " + exit 1 +} + +is_in_china() { + local cn_score=0 global_score=0 + for host in www.baidu.com www.aliyun.com www.163.com; do ping -c 1 -W 1 $host &>/dev/null && ((cn_score++)); done + for host in www.cloudflare.com www.wikipedia.org www.google.com; do ping -c 1 -W 1 $host &>/dev/null && ((global_score++)); done + [[ $cn_score -ge $global_score ]] +} + +optimize_system() { + fallocate -l 1G /swapfile || dd if=/dev/zero of=/swapfile bs=1M count=1024 + chmod 600 /swapfile && mkswap /swapfile && swapon /swapfile + grep -q swapfile /etc/fstab || echo '/swapfile none swap sw 0 0' >> /etc/fstab + cat </etc/sysctl.d/k3s.conf +vm.swappiness=10 +vm.vfs_cache_pressure=50 +net.ipv4.ip_forward=1 +EOF + sysctl --system + systemctl disable --now snapd motd-news.service rsyslog apport ufw || true + apt purge -y cloud-init lxd lxc unattended-upgrades || yum remove -y cloud-init || true + echo "✅ 系统优化完成" + exit 0 +} + +clean_environment() { + /usr/local/bin/k3s-uninstall.sh || true + /usr/local/bin/k3s-agent-uninstall.sh || true + rm -rf /etc/rancher /opt/rancher ~/.kube || true + helm uninstall cilium cilium-crds -n kube-system || true + kubectl delete ns cilium-secrets --ignore-not-found + kubectl delete crd $(kubectl get crd | grep cilium | awk '{print $1}') --ignore-not-found || true + kubectl taint nodes -l node.cilium.io/agent-not-ready:NoSchedule- || true + for iface in $(ip -o link show | awk -F': ' '{print $2}' | grep -E '^(flannel|cilium|cilium_|cilium@|cilium_vxlan)' | sed 's/@.*//'); do + ip link set $iface down || true + ip link delete $iface || true + done + echo "✅ 清理完成" + exit 0 +} + +install_cilium() { + curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash + helm repo add cilium https://helm.cilium.io && helm repo update + cat <cilium-egress-values.yaml +routingMode: native +ipv4NativeRoutingCIDR: "10.42.0.0/16" +kubeProxyReplacement: false +enableIPv4Masquerade: true +nodePort: + enabled: true +bpf: + masquerade: true +ipam: + mode: kubernetes +egressGateway: + enabled: true + installRoutes: true +endpointRoutes: + enabled: true +cni: + exclusive: false +envoy: + enabled: false +proxy: + enabled: false +l7Proxy: false +hubble: + enabled: false +operator: + enabled: true + replicas: 1 + resources: + requests: + cpu: 20m + memory: 30Mi + limits: + cpu: 100m + memory: 128Mi +resources: + requests: + cpu: 20m + memory: 50Mi + limits: + cpu: 100m + memory: 128Mi +EOF + helm upgrade --install cilium cilium/cilium -n kube-system --set installCRDs=true -f cilium-egress-values.yaml --wait + kubectl label node $(hostname) egress-gateway=true --overwrite + echo "✅ Cilium 安装完成" +} + +setup_k3s_ingress() { + # 用法示例: + # setup_k3s_ingress "192.168.1.100" "ingress-gateway=true" + # 参数1(可选):指定 ingress IP,默认为本地内网 IP + # 参数2(可选):为当前节点添加的 label,如 ingress-gateway=true + local ingress_ip="$1" + local ingress_label="$2" + + if [[ -z "$ingress_ip" ]]; then + ingress_ip=$(hostname -I | awk '{print $1}') + fi + local ingress_ip=$(hostname -I | awk '{print $1}') + + cat > value.yaml < nginx-cm.yaml < nginx-svc-patch.yaml </dev/null; then + echo "⛔ Helm 未安装,正在自动安装..." + curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash +fi diff --git a/scripts/k3s-cluster/setup-k3s-with-gitops.sh b/scripts/k3s-cluster/setup-k3s-with-gitops.sh new file mode 100644 index 0000000..66ce2e9 --- /dev/null +++ b/scripts/k3s-cluster/setup-k3s-with-gitops.sh @@ -0,0 +1,206 @@ +#!/bin/sh + +function get_local_ip() { + local_ip=$(hostname -I | awk '{print $1}') + echo "$local_ip" +} + +function setup_k3s() { + local disable_proxy="--disable-kube-proxy" + local disable_cni="--flannel-backend=none --disable-network-policy" + local default="--disable=traefik,servicelb --data-dir=/opt/rancher/k3s --kube-apiserver-arg service-node-port-range=0-50000" + + sudo mkdir -pv /opt/rancher/k3s + + ping -c 1 google.com > /dev/null 2>&1 + if [ $? -eq 0 ]; then + echo "当前主机在国际网络上" + #curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION=$version sh -s - $default --system-default-registry "registry.cn-hangzhou.aliyuncs.com" + curl -sfL https://rancher-mirror.rancher.cn/k3s/k3s-install.sh | INSTALL_K3S_VERSION=$version INSTALL_K3S_MIRROR=cn sh -s - $default --system-default-registry "registry.cn-hangzhou.aliyuncs.com" + else + echo "当前主机在大陆网络上" + curl -sfL https://rancher-mirror.rancher.cn/k3s/k3s-install.sh | INSTALL_K3S_VERSION=$version INSTALL_K3S_MIRROR=cn sh -s - $default --system-default-registry "registry.cn-hangzhou.aliyuncs.com" + fi + mkdir -pv ~/.kube/ && sudo cp /etc/rancher/k3s/k3s.yaml ~/.kube/config +} + +function setup_helm() +{ + ping -c 1 google.com > /dev/null 2>&1 + if [ $? -eq 0 ]; then + echo "当前主机在国际网络上" + curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash + else + echo "当前主机在大陆网络上" + case `uname -m` in + x86_64) ARCH=amd64; ;; + aarch64) ARCH=arm64; ;; + loongarch64) ARCH=loongarch64; ;; + *) echo "un-supported arch, exit ..."; exit 1; ;; + esac + curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash + #sudo rm -rf helm.tar.gz* /usr/local/bin/helm || echo true + #sudo wget --no-check-certificate https://mirrors.onwalk.net/tools/linux-${ARCH}/helm.tar.gz && sudo tar -xvpf helm.tar.gz -C /usr/local/bin/ + #sudo chmod 755 /usr/local/bin/helm + fi +} + +function setup_k3s_ingress() { + local ingress_ip=$(get_local_ip) + + cat > value.yaml < nginx-svc-patch.yaml << EOF +apiVersion: v1 +kind: ConfigMap +metadata: + name: nginx-nginx-ingress + namespace: ingress +data: + use-ssl-certificate-for-ingress: "false" + external-status-address: $ingress_ip + proxy-connect-timeout: 10s + proxy-read-timeout: 10s + client-header-buffer-size: 64k + client-body-buffer-size: 64k + client-max-body-size: 1000m + proxy-buffers: 8 32k + proxy-body-size: 1024m + proxy-buffer-size: 32k + proxy-connect-timeout: 10s + proxy-read-timeout: 10s +EOF + + helm repo add nginx-stable https://helm.nginx.com/stable || echo true + helm repo up + kubectl create namespace ingress || echo true + helm upgrade --install nginx nginx-stable/nginx-ingress --version=0.15.0 --namespace ingress -f value.yaml + kubectl apply -f nginx-cm.yaml + kubectl patch svc nginx-nginx-ingress -n ingress --patch-file nginx-svc-patch.yaml +} + +function setup_k3s_gitops() { + cat > fluxcd-values.yaml << EOF +cli: + image: artifact.onwalk.net/public/fluxcd/flux-cli + tag: v2.2.0 + resources: + request: + cpu: 100m + memory: 64Mi + limits: + cpu: 200m + memory: 100Mi +helmController: + create: true + image: artifact.onwalk.net/public/fluxcd/helm-controller + tag: v0.37.0 + resources: + request: + cpu: 100m + memory: 64Mi + limits: + cpu: 200m + memory: 100Mi +imageAutomationController: + image: artifact.onwalk.net/public/fluxcd/image-automation-controller + tag: v0.37.0 + resources: + request: + cpu: 100m + memory: 64Mi + limits: + cpu: 200m + memory: 100Mi +imageReflectionController: + image: artifact.onwalk.net/public/fluxcd/image-reflector-controller + tag: v0.31.1 + resources: + request: + cpu: 100m + memory: 64Mi + limits: + cpu: 200m + memory: 100Mi +kustomizeController: + create: true + image: artifact.onwalk.net/public/fluxcd/kustomize-controller + tag: v1.2.0 + resources: + request: + cpu: 100m + memory: 64Mi + limits: + cpu: 200m + memory: 100Mi +notificationController: + create: false + image: artifact.onwalk.net/public/fluxcd/notification-controller + tag: v1.2.2 + resources: + request: + cpu: 100m + memory: 64Mi + limits: + cpu: 200m + memory: 100Mi +sourceController: + create: true + image: artifact.onwalk.net/public/fluxcd/source-controller + tag: v1.2.2 + resources: + request: + cpu: 100m + memory: 64Mi + limits: + cpu: 200m + memory: 100Mi +EOF + + cat > cluster-config.yaml << EOF +apiVersion: source.toolkit.fluxcd.io/v1beta2 +kind: GitRepository +metadata: + name: stable + namespace: gitops-system +spec: + interval: 1m0s + ref: + branch: main + url: https://github.com/svc-design/gitops.git +--- +apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 +kind: Kustomization +metadata: + name: cluster + namespace: gitops-system +spec: + interval: 1m0s + sourceRef: + kind: GitRepository + name: stable + path: ./clusters/k3s-local + prune: true +EOF + + helm repo add stable https://charts.onwalk.net + helm repo update + kubectl create namespace gitops-system || true + helm upgrade --install fluxcd stable/flux2 --version 2.12.1 -n gitops-system -f fluxcd-values.yaml + kubectl apply -f cluster-config.yaml && rm cluster-config.yaml -f +} + +# Main script +setup_k3s +setup_helm +setup_k3s_ingress diff --git a/scripts/k3s-cluster/setup-k3s-with-ingress.sh b/scripts/k3s-cluster/setup-k3s-with-ingress.sh new file mode 100644 index 0000000..ee32c12 --- /dev/null +++ b/scripts/k3s-cluster/setup-k3s-with-ingress.sh @@ -0,0 +1,226 @@ +#!/bin/sh + +function get_local_ip() { + local_ip=$(hostname -I | awk '{print $1}') + echo "$local_ip" +} + +function setup_k3s() { + local disable_proxy="--disable-kube-proxy" + local disable_cni="--flannel-backend=none --disable-network-policy" + local default="--disable=traefik,servicelb --data-dir=/opt/rancher/k3s --kube-apiserver-arg service-node-port-range=0-50000" + + sudo mkdir -pv /opt/rancher/k3s + + ping -c 1 google.com > /dev/null 2>&1 + if [ $? -eq 0 ]; then + echo "当前主机在国际网络上" + curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION=$version sh -s - $default + else + echo "当前主机在大陆网络上" + curl -sfL https://rancher-mirror.rancher.cn/k3s/k3s-install.sh | INSTALL_K3S_VERSION=$version INSTALL_K3S_MIRROR=cn sh -s - $default + fi + mkdir -pv ~/.kube/ && sudo cp /etc/rancher/k3s/k3s.yaml ~/.kube/config +} + +function setup_helm() +{ + ping -c 1 google.com > /dev/null 2>&1 + if [ $? -eq 0 ]; then + echo "当前主机在国际网络上" + curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash + else + echo "当前主机在大陆网络上" + case `uname -m` in + x86_64) ARCH=amd64; ;; + aarch64) ARCH=arm64; ;; + loongarch64) ARCH=loongarch64; ;; + *) echo "un-supported arch, exit ..."; exit 1; ;; + esac + sudo rm -rf helm.tar.gz* /usr/local/bin/helm || echo true + sudo wget --no-check-certificate https://mirrors.onwalk.net/tools/linux-${ARCH}/helm.tar.gz && sudo tar -xvpf helm.tar.gz -C /usr/local/bin/ + sudo chmod 755 /usr/local/bin/helm + fi +} + +function setup_k3s_ingress() { + local ingress_ip=$(get_local_ip) + + cat > value.yaml < nginx-cm.yaml << EOF +apiVersion: v1 +kind: ConfigMap +metadata: + name: nginx-nginx-ingress + namespace: ingress +data: + use-ssl-certificate-for-ingress: "false" + external-status-address: $ingress_ip + proxy-connect-timeout: 10s + proxy-read-timeout: 10s + client-header-buffer-size: 64k + client-body-buffer-size: 64k + client-max-body-size: 1000m + proxy-buffers: 8 32k + proxy-body-size: 1024m + proxy-buffer-size: 32k + proxy-connect-timeout: 10s + proxy-read-timeout: 10s +EOF + + helm repo add nginx-stable https://helm.nginx.com/stable || echo true + helm repo up + kubectl create namespace ingress || echo true + helm upgrade --install nginx nginx-stable/nginx-ingress --version=0.15.0 --namespace ingress -f value.yaml + kubectl apply -f nginx-cm.yaml + kubectl patch svc nginx-nginx-ingress -n ingress --patch-file nginx-svc-patch.yaml +} + +function setup_k3s_gitops() { + cat > fluxcd-values.yaml << EOF +cli: + image: artifact.onwalk.net/public/fluxcd/flux-cli + tag: v2.2.0 + resources: + request: + cpu: 100m + memory: 64Mi + limits: + cpu: 200m + memory: 100Mi +helmController: + create: true + image: artifact.onwalk.net/public/fluxcd/helm-controller + tag: v0.37.0 + resources: + request: + cpu: 100m + memory: 64Mi + limits: + cpu: 200m + memory: 100Mi +imageAutomationController: + image: artifact.onwalk.net/public/fluxcd/image-automation-controller + tag: v0.37.0 + resources: + request: + cpu: 100m + memory: 64Mi + limits: + cpu: 200m + memory: 100Mi +imageReflectionController: + image: artifact.onwalk.net/public/fluxcd/image-reflector-controller + tag: v0.31.1 + resources: + request: + cpu: 100m + memory: 64Mi + limits: + cpu: 200m + memory: 100Mi +kustomizeController: + create: true + image: artifact.onwalk.net/public/fluxcd/kustomize-controller + tag: v1.2.0 + resources: + request: + cpu: 100m + memory: 64Mi + limits: + cpu: 200m + memory: 100Mi +notificationController: + create: false + image: artifact.onwalk.net/public/fluxcd/notification-controller + tag: v1.2.2 + resources: + request: + cpu: 100m + memory: 64Mi + limits: + cpu: 200m + memory: 100Mi +sourceController: + create: true + image: artifact.onwalk.net/public/fluxcd/source-controller + tag: v1.2.2 + resources: + request: + cpu: 100m + memory: 64Mi + limits: + cpu: 200m + memory: 100Mi +EOF + + cat > nginx-cm.yaml << EOF +apiVersion: v1 +kind: ConfigMap +metadata: + name: nginx-nginx-ingress + namespace: ingress +data: + use-ssl-certificate-for-ingress: "false" + external-status-address: $ingress_ip + proxy-connect-timeout: 10s + proxy-read-timeout: 10s + client-header-buffer-size: 64k + client-body-buffer-size: 64k + client-max-body-size: 1000m + proxy-buffers: 8 32k + proxy-body-size: 1024m + proxy-buffer-size: 32k + proxy-connect-timeout: 10s + proxy-read-timeout: 10s +EOF + + cat > cluster-config.yaml << EOF +apiVersion: source.toolkit.fluxcd.io/v1beta2 +kind: GitRepository +metadata: + name: stable + namespace: gitops-system +spec: + interval: 1m0s + ref: + branch: main + url: https://github.com/svc-design/gitops.git +--- +apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 +kind: Kustomization +metadata: + name: cluster + namespace: gitops-system +spec: + interval: 1m0s + sourceRef: + kind: GitRepository + name: stable + path: ./clusters/k3s-local + prune: true +EOF + + helm repo add stable https://charts.onwalk.net + helm repo update + kubectl create namespace gitops-system || true + helm upgrade --install fluxcd stable/flux2 --version 2.12.1 -n gitops-system -f fluxcd-values.yaml + kubectl apply -f cluster-config.yaml && rm cluster-config.yaml -f +} + +# Main script +setup_k3s +setup_helm +setup_k3s_ingress +setup_k3s_gitops diff --git a/scripts/k3s-cluster/setup-nginx-ingress.sh b/scripts/k3s-cluster/setup-nginx-ingress.sh new file mode 100644 index 0000000..5ccc119 --- /dev/null +++ b/scripts/k3s-cluster/setup-nginx-ingress.sh @@ -0,0 +1,81 @@ +#!/bin/bash + +setup_k3s_ingress() { + local ingress_ip="$1" + local ingress_label="$2" + + if [[ -z "$ingress_ip" ]]; then + ingress_ip=$(hostname -I | awk '{print $1}') + fi + + echo "📦 使用 ingress IP: $ingress_ip" + + cat > value.yaml < nginx-cm.yaml < nginx-svc-patch.yaml </dev/null || true + + echo "🚀 安装 ingress-nginx..." + helm upgrade --install nginx ingress-nginx/ingress-nginx \ + --version 4.9.0 \ + --namespace ingress \ + -f value.yaml + + echo "🔧 应用自定义 ConfigMap 和 Service IP Patch..." + kubectl apply -f nginx-cm.yaml + kubectl patch svc nginx-ingress-nginx-controller -n ingress --patch-file nginx-svc-patch.yaml + + if [[ -n "$ingress_label" ]]; then + echo "🏷️ 设置节点标签: $ingress_label" + kubectl label nodes --selector="kubernetes.io/hostname=$(hostname)" "$ingress_label" --overwrite || true + fi + + echo "✅ NGINX Ingress Controller 安装完成,IP: $ingress_ip" +} + +# 示例调用(你可以传入具体 IP) +setup_k3s_ingress 8.130.10.142 + diff --git a/scripts/k3s_all_in_one.sh b/scripts/k3s_all_in_one.sh new file mode 100644 index 0000000..9d5431d --- /dev/null +++ b/scripts/k3s_all_in_one.sh @@ -0,0 +1,93 @@ +#!/bin/sh + +function get_local_ip() { + local_ip=$(hostname -I | awk '{print $1}') + echo "$local_ip" +} + +function setup_k3s() { + local disable_proxy="--disable-kube-proxy" + local disable_cni="--flannel-backend=none --disable-network-policy" + local default="--disable=traefik,servicelb --data-dir=/opt/rancher/k3s --kube-apiserver-arg service-node-port-range=0-50000" + + sudo mkdir -pv /opt/rancher/k3s + + ping -c 1 google.com > /dev/null 2>&1 + if [ $? -eq 0 ]; then + echo "当前主机在国际网络上" + curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION=$version sh -s - $default + else + echo "当前主机在大陆网络上" + curl -sfL https://rancher-mirror.rancher.cn/k3s/k3s-install.sh | INSTALL_K3S_VERSION=$version INSTALL_K3S_MIRROR=cn sh -s - $default + fi + mkdir -pv ~/.kube/ && sudo cp /etc/rancher/k3s/k3s.yaml ~/.kube/config +} + +function setup_helm() +{ + ping -c 1 google.com > /dev/null 2>&1 + if [ $? -eq 0 ]; then + echo "当前主机在国际网络上" + curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash + else + echo "当前主机在大陆网络上" + case `uname -m` in + x86_64) ARCH=amd64; ;; + aarch64) ARCH=arm64; ;; + loongarch64) ARCH=loongarch64; ;; + *) echo "un-supported arch, exit ..."; exit 1; ;; + esac + sudo rm -rf helm.tar.gz* /usr/local/bin/helm || echo true + sudo wget --no-check-certificate https://mirrors.onwalk.net/tools/linux-${ARCH}/helm.tar.gz && sudo tar -xvpf helm.tar.gz -C /usr/local/bin/ + sudo chmod 755 /usr/local/bin/helm + fi +} + +function setup_k3s_ingress() { + local ingress_ip=$(get_local_ip) + + cat > value.yaml < nginx-cm.yaml << EOF +apiVersion: v1 +kind: ConfigMap +metadata: + name: nginx-nginx-ingress + namespace: ingress +data: + use-ssl-certificate-for-ingress: "false" + external-status-address: $ingress_ip + proxy-connect-timeout: 10s + proxy-read-timeout: 10s + client-header-buffer-size: 64k + client-body-buffer-size: 64k + client-max-body-size: 1000m + proxy-buffers: 8 32k + proxy-body-size: 1024m + proxy-buffer-size: 32k + proxy-connect-timeout: 10s + proxy-read-timeout: 10s +EOF + + helm repo add nginx-stable https://helm.nginx.com/stable || echo true + helm repo up + kubectl create namespace ingress || echo true + helm upgrade --install nginx nginx-stable/nginx-ingress --version=0.15.0 --namespace ingress -f value.yaml + kubectl apply -f nginx-cm.yaml + kubectl patch svc nginx-nginx-ingress -n ingress --patch-file nginx-svc-patch.yaml +} + +# Main script +setup_k3s +setup_helm +setup_k3s_ingress diff --git a/scripts/kong-gateway/GatewayAPI-deepflow-example.yaml b/scripts/kong-gateway/GatewayAPI-deepflow-example.yaml new file mode 100644 index 0000000..fb6cd0a --- /dev/null +++ b/scripts/kong-gateway/GatewayAPI-deepflow-example.yaml @@ -0,0 +1,44 @@ +kubectl create secret tls onwalk-tls --cert=/etc/ssl/onwalk.net.pem --key=/etc/ssl/onwalk.net.key -n deepflow +echo " +apiVersion: gateway.networking.k8s.io/v1 +kind: Gateway +metadata: + name: deepflow-gateway + namespace: deepflow + annotations: + konghq.com/publish-service: kong/kong-gateway-proxy +spec: + gatewayClassName: kong + listeners: + - name: https + port: 443 + protocol: HTTPS + hostname: "deepflow-demo.onwalk.net" # 匹配的域名 + tls: + mode: Terminate + certificateRefs: + - name: onwalk-tls # 引用存放证书的 Secret + allowedRoutes: + namespaces: + from: All +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: deepflow-demo-route + namespace: deepflow +spec: + parentRefs: + - name: deepflow-gateway + namespace: deepflow + hostnames: + - deepflow-demo.onwalk.net # 匹配的域名 + rules: + - matches: + - path: + type: PathPrefix + value: / # 匹配所有路径请求 + backendRefs: + - name: front-end # 目标服务名 + port: 80 # 后端服务的端口 +" | kubectl apply -f - diff --git a/scripts/kong-gateway/GatewayAPI-example.yaml b/scripts/kong-gateway/GatewayAPI-example.yaml new file mode 100644 index 0000000..aef9ead --- /dev/null +++ b/scripts/kong-gateway/GatewayAPI-example.yaml @@ -0,0 +1,81 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: Gateway +metadata: + name: example-gateway + namespace: kong + annotations: + konghq.com/publish-service: kong/kong-gateway-proxy +spec: + gatewayClassName: kong + listeners: + - name: https + port: 443 + protocol: HTTPS + hostname: "*.onwalk.net" # ⭐ 通配符域名,匹配所有子域 + tls: + mode: Terminate + certificateRefs: + - name: onwalk-tls # 存放 *.onwalk.net 证书的 Secret + allowedRoutes: + namespaces: + from: All +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: ui-route + namespace: ai +spec: + parentRefs: + - name: example-gateway + namespace: kong + hostnames: + - open-webui.onwalk.net + rules: + - matches: + - path: + type: PathPrefix + value: / + backendRefs: + - name: open-webui + port: 80 +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: keycloak-route + namespace: keycloak +spec: + parentRefs: + - name: example-gateway + namespace: kong + hostnames: + - keycloak.onwalk.net + rules: + - matches: + - path: + type: PathPrefix + value: / + backendRefs: + - name: keycloak + port: 80 +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: argocd-route + namespace: argocd +spec: + parentRefs: + - name: example-gateway + namespace: kong # ⭐ 必须指定! + hostnames: + - argocd.onwalk.net # ⭐ 注意要匹配实际访问域名 + rules: + - matches: + - path: + type: PathPrefix + value: / + backendRefs: + - name: argocd-server + port: 80 diff --git a/scripts/kong-gateway/GatewayAPI-http-example.yaml b/scripts/kong-gateway/GatewayAPI-http-example.yaml new file mode 100644 index 0000000..9c53c16 --- /dev/null +++ b/scripts/kong-gateway/GatewayAPI-http-example.yaml @@ -0,0 +1,81 @@ +kubectl create secret tls onwalk-tls --cert=/etc/ssl/onwalk.net.pem --key=/etc/ssl/onwalk.net.key +echo " +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx + namespace: default +spec: + replicas: 1 # 可根据需要调整副本数 + selector: + matchLabels: + app: nginx + template: + metadata: + labels: + app: nginx + spec: + containers: + - name: nginx + image: nginx:latest # 使用最新的 Nginx 镜像 + ports: + - containerPort: 80 # Nginx 默认的 HTTPS 端口 +--- +apiVersion: v1 +kind: Service +metadata: + name: nginx-svc + namespace: default +spec: + selector: + app: nginx + ports: + - protocol: TCP + port: 80 # 公开的服务端口 + targetPort: 80 # 容器内部的端口 + type: ClusterIP # 可以根据需要选择 NodePort 或 LoadBalancer 类型 +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: Gateway +metadata: + name: demo-gateway + namespace: default + annotations: + konghq.com/publish-service: kong/kong-gateway-proxy +spec: + gatewayClassName: kong + listeners: + - name: https + port: 443 + protocol: HTTPS + hostname: "demo.onwalk.net" # 匹配的域名 + tls: + mode: Terminate + certificateRefs: + - name: onwalk-tls # 引用存放证书的 Secret + allowedRoutes: + namespaces: + from: All +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: demo-route + namespace: default +spec: + parentRefs: + - name: demo-gateway + namespace: default + hostnames: + - demo.onwalk.net # 匹配的域名 + rules: + - matches: + - path: + type: PathPrefix + value: / # 匹配所有路径请求 + backendRefs: + - name: nginx-svc # 目标服务名 + port: 80 # 后端服务的端口 +" | kubectl apply -f - + +curl -ksv https://demo.onwalk.net/ --resolve demo.onwalk.net:443:172.30.0.10 diff --git a/scripts/kong-gateway/deploy-kong-gateway.sh b/scripts/kong-gateway/deploy-kong-gateway.sh new file mode 100644 index 0000000..8feaf14 --- /dev/null +++ b/scripts/kong-gateway/deploy-kong-gateway.sh @@ -0,0 +1,77 @@ +kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.1.0/standard-install.yaml + +helm repo add kong https://charts.konghq.com +helm repo update +cat > kong-values.yaml < ${dest_path}" + curl -sLo "$dest_path" "$src_url" + fi +} + +export_airgap_images() { + local arch=$1 + local out="${BASE_DIR}/images/k3s-airgap-images-${arch}.tar" + local ns="k8s.io" + + nerd() { + sudo nerdctl --namespace $ns --address /run/k3s/containerd/containerd.sock "$@" + } + + # ---- 核心镜像列表 ---- + local core_imgs=( + docker.io/rancher/mirrored-pause:3.6 + docker.io/rancher/mirrored-metrics-server:v0.6.3 + docker.io/rancher/mirrored-coredns-coredns:1.10.1 + docker.io/rancher/mirrored-prometheus-node-exporter:v1.3.1 + docker.io/rancher/mirrored-kube-state-metrics-kube-state-metrics:v2.12.0 + ) + + echo "[INFO] 拉取核心镜像…" + for img in "${core_imgs[@]}"; do + nerd pull "$img" + done + + echo "[INFO] 保存离线包 → $out" + mkdir -p "$(dirname "$out")" + nerd save -o "$out" "${core_imgs[@]}" + + echo "[OK] 完成:$out 已生成" +} + +######################################## +# 写 node‑exporter YAML → addons/node-exporter.yaml +######################################## +generate_node_exporter_yaml() { + local ADDON_DIR=${BASE_DIR}/addons + mkdir -p "$ADDON_DIR" + + cat > "${ADDON_DIR}/node-exporter.yaml" <<'EOF' +apiVersion: v1 +kind: ServiceAccount +metadata: + name: node-exporter + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: {name: node-exporter} +rules: +- apiGroups: [""] + resources: ["nodes", "nodes/proxy", "services", "endpoints"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: {name: node-exporter} +roleRef: {apiGroup: rbac.authorization.k8s.io, kind: ClusterRole, name: node-exporter} +subjects: +- kind: ServiceAccount + name: node-exporter + namespace: kube-system +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: node-exporter + namespace: kube-system +spec: + selector: {matchLabels: {app: node-exporter}} + template: + metadata: {labels: {app: node-exporter}} + spec: + hostPID: true + hostNetwork: true + serviceAccountName: node-exporter + containers: + - name: node-exporter + image: docker.io/rancher/mirrored-prometheus-node-exporter:v1.3.1 + imagePullPolicy: IfNotPresent + args: + - "--path.procfs=/host/proc" + - "--path.sysfs=/host/sys" + - "--path.rootfs=/host/root" + securityContext: {privileged: true} + resources: + requests: {cpu: "50m", memory: "30Mi"} + volumeMounts: + - {name: proc, mountPath: /host/proc, readOnly: true} + - {name: sys, mountPath: /host/sys, readOnly: true} + - {name: rootfs, mountPath: /host/root, readOnly: true} + volumes: + - {name: proc, hostPath: {path: /proc}} + - {name: sys, hostPath: {path: /sys}} + - {name: rootfs, hostPath: {path: /}} +--- +apiVersion: v1 +kind: Service +metadata: + name: node-exporter + namespace: kube-system + labels: {app: node-exporter} +spec: + clusterIP: None + selector: {app: node-exporter} + ports: + - {name: metrics, port: 9100, targetPort: 9100} +EOF + echo "[OK] 生成 ${ADDON_DIR}/node-exporter.yaml" +} + +######################################## +# 写 kube‑state‑metrics YAML → addons/kube-state-metrics.yaml +######################################## +generate_kube_state_metrics_yaml() { + local ADDON_DIR=${BASE_DIR}/addons + mkdir -p "$ADDON_DIR" + + cat > "${ADDON_DIR}/kube-state-metrics.yaml" <<'EOF' +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kube-state-metrics + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: {name: kube-state-metrics} +rules: +- apiGroups: [""] + resources: + ["pods","nodes","namespaces","services","endpoints", + "persistentvolumes","persistentvolumeclaims", + "configmaps","secrets","limitranges","replicationcontrollers"] + verbs: ["get","list","watch"] +- apiGroups: ["apps"] + resources: ["statefulsets","daemonsets","deployments","replicasets"] + verbs: ["get","list","watch"] +- apiGroups: ["batch"] + resources: ["cronjobs","jobs"] + verbs: ["get","list","watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: {name: kube-state-metrics} +roleRef: {apiGroup: rbac.authorization.k8s.io, kind: ClusterRole, name: kube-state-metrics} +subjects: +- kind: ServiceAccount + name: kube-state-metrics + namespace: kube-system +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: kube-state-metrics + namespace: kube-system +spec: + replicas: 1 + selector: {matchLabels: {app: kube-state-metrics}} + template: + metadata: {labels: {app: kube-state-metrics}} + spec: + serviceAccountName: kube-state-metrics + containers: + - name: kube-state-metrics + image: docker.io/rancher/mirrored-kube-state-metrics-kube-state-metrics:v2.12.0 + imagePullPolicy: IfNotPresent + ports: + - {name: metrics, containerPort: 8080} + - {name: telemetry, containerPort: 8081} + resources: + requests: {cpu: "40m", memory: "60Mi"} +--- +apiVersion: v1 +kind: Service +metadata: + name: kube-state-metrics + namespace: kube-system + labels: {app: kube-state-metrics} +spec: + selector: {app: kube-state-metrics} + ports: + - {name: metrics, port: 8080, targetPort: 8080} + - {name: telemetry, port: 8081, targetPort: 8081} +EOF + echo "[OK] 生成 ${ADDON_DIR}/kube-state-metrics.yaml" +} + +for ARCH in "${ARCH_LIST[@]}"; do + echo -e "\n[INFO] 准备架构:${ARCH}" + + safe_copy "${K3S_URL_BASE}/k3s" "${BASE_DIR}/bin/k3s-${ARCH}" + chmod +x "${BASE_DIR}/bin/k3s-${ARCH}" + + safe_copy "https://dl.k8s.io/release/v1.29.1/bin/linux/${ARCH}/kubectl" "${BASE_DIR}/bin/kubectl-${ARCH}" + chmod +x "${BASE_DIR}/bin/kubectl-${ARCH}" + + TMP_HELM="/tmp/helm-${ARCH}.tgz" + safe_copy "https://get.helm.sh/helm-${HELM_VERSION}-linux-${ARCH}.tar.gz" "$TMP_HELM" + tar -xzf "$TMP_HELM" -C /tmp + mv "/tmp/linux-${ARCH}/helm" "${BASE_DIR}/bin/helm-${ARCH}" + chmod +x "${BASE_DIR}/bin/helm-${ARCH}" + + safe_copy "https://github.com/containerd/nerdctl/releases/download/v${NERDCTL_VERSION}/nerdctl-${NERDCTL_VERSION}-linux-${ARCH}.tar.gz" \ + "/tmp/nerdctl-${NERDCTL_VERSION}-linux-${ARCH}.tar.gz" + tar -xzf "/tmp/nerdctl-${NERDCTL_VERSION}-linux-${ARCH}.tar.gz" -C /tmp + cp "/tmp/nerdctl" "${BASE_DIR}/bin/nerdctl-${ARCH}" + chmod +x "${BASE_DIR}/bin/nerdctl-${ARCH}" + + safe_copy "https://github.com/containernetworking/plugins/releases/download/${CNI_VERSION}/cni-plugins-linux-${ARCH}-${CNI_VERSION}.tgz" \ + "${BASE_DIR}/cni-plugins/cni-plugins-linux-${ARCH}-${CNI_VERSION}.tgz" + + export_airgap_images "$ARCH" + + generate_node_exporter_yaml + generate_kube_state_metrics_yaml +done + +safe_copy "https://get.k3s.io" "${BASE_DIR}/install/k3s-official-install.sh" +chmod +x "${BASE_DIR}/install/k3s-official-install.sh" + +# 生成 install-server.sh +cat > "${BASE_DIR}/install-server.sh" <<'EOF' +#!/bin/bash +set -e + +ARCH=$(uname -m) +case "$ARCH" in + x86_64 | amd64) ARCH="amd64" ;; # Intel/AMD 64 位 + aarch64 | arm64) ARCH="arm64" ;; # ARM 64 位 + *) + echo "[ERROR] 不支持的架构:$ARCH" + exit 1 + ;; +esac + +# 路径定义 +BIN_DIR="./bin" +K3S_BIN="${BIN_DIR}/k3s-${ARCH}" +HELM_BIN="${BIN_DIR}/helm-${ARCH}" +KUBECTL_BIN="${BIN_DIR}/kubectl-${ARCH}" +NERDCTL_BIN="${BIN_DIR}/nerdctl-${ARCH}" + +echo "[INFO] 安装 CLI 工具(${ARCH})到 /usr/local/bin" + +install_bin() { + local src=$1 + local dst=$2 + echo " ↳ $dst" + sudo cp "$src" "$dst" + sudo chmod +x "$dst" +} + +install_bin "$K3S_BIN" /usr/local/bin/k3s +install_bin "$HELM_BIN" /usr/local/bin/helm +install_bin "$KUBECTL_BIN" /usr/local/bin/kubectl +install_bin "$NERDCTL_BIN" /usr/local/bin/nerdctl + +echo "[INFO] 执行官方离线安装脚本" +INSTALL_K3S_SKIP_DOWNLOAD=true \ +INSTALL_K3S_EXEC="server \ + --write-kubeconfig-mode 644 \ + --disable=traefik,servicelb,local-storage \ + --kube-apiserver-arg=service-node-port-range=0-50000" \ +bash "install/k3s-official-install.sh" + +echo "[INFO] 准备 airgap 镜像" +sudo nerdctl \ +--namespace k8s.io \ +--address /run/k3s/containerd/containerd.sock load -i images/k3s-airgap-images-amd64.tar + +echo "[INFO] 等待 K3s 启动..." +sleep 5 + +echo "[INFO] 应用默认组件(如存在)" +mkdir -pv ~/.kube/ +cp -v /etc/rancher/k3s/k3s.yaml ~/.kube/config +kubectl apply -f addons/node-exporter.yaml || true +kubectl apply -f addons/kube-state-metrics.yaml || true + +echo "[SUCCESS] 离线 K3s 安装完成 ✅" +EOF + +chmod +x "${BASE_DIR}/install-server.sh" + +# 生成 install-agent.sh +cat > "${BASE_DIR}/install-agent.sh" <<'EOF' +#!/bin/bash +set -e + +ARCH=$(uname -m) +case "$ARCH" in + x86_64 | amd64) ARCH="amd64" ;; + aarch64 | arm64) ARCH="arm64" ;; + *) + echo "[ERROR] 不支持的架构:$ARCH" + exit 1 + ;; +esac + +if [[ -z "$K3S_TOKEN" || -z "$K3S_URL" ]]; then + echo "[ERROR] 你必须设置环境变量 K3S_TOKEN 和 K3S_URL" + echo "例如:" + echo " export K3S_TOKEN=K10xxxxxxxx" + echo " export K3S_URL=https://:6443" + exit 1 +fi + +echo "[INFO] 安装 CLI 工具(${ARCH})到 /usr/local/bin" + +# 路径定义 +BIN_DIR="./bin" +K3S_BIN="${BIN_DIR}/k3s-${ARCH}" +NERDCTL_BIN="${BIN_DIR}/nerdctl-${ARCH}" + + +install_bin() { + local src=$1 + local dst=$2 + echo " ↳ $dst" + sudo cp "$src" "$dst" + sudo chmod +x "$dst" +} + +echo "[INFO] 安装 CLI 工具(${ARCH})到 /usr/local/bin" + +install_bin "$K3S_BIN" /usr/local/bin/k3s +install_bin "$NERDCTL_BIN" /usr/local/bin/nerdctl + +sudo chmod +x /usr/local/bin/k3s +sudo chmod +x /usr/local/bin/neddctl + +echo "[INFO] 执行官方 agent 安装脚本(使用离线模式)" +INSTALL_K3S_SKIP_DOWNLOAD=true \ +INSTALL_K3S_EXEC="agent" \ +bash install/k3s-official-install.sh + +echo "[INFO] 准备 airgap 镜像" +sudo nerdctl \ +--namespace k8s.io \ +--address /run/k3s/containerd/containerd.sock load -i images/k3s-airgap-images-${ARCH}.tar + +echo "[SUCCESS] Agent 节点已完成离线安装 ✅" + +EOF + +chmod +x "${BASE_DIR}/install-agent.sh" +echo "[OK] 已生成 install-agent.sh ✅" + +cat > "${BASE_DIR}/README.md" <:6443 +export K3S_TOKEN=K10xxxxxxxx +bash ./install-agent.sh +\`\`\` + +### 3. 验证安装状态 + +\`\`\`bash +kubectl get nodes +kubectl get pods -A +\`\`\` + +--- + +## 🛠️ 使用 nerdctl 操作 K3s 内部 containerd + +\`\`\`bash +./bin/nerdctl-\$(uname -m) \\ + --namespace k8s.io \\ + --address /run/k3s/containerd/containerd.sock \\ + images +\`\`\` + +--- + +## 📂 目录结构示例 + +\`\`\` +${BASE_DIR}/ +├── bin/ +│ ├── k3s-(amd64/arm64) +│ ├── helm-(amd64/arm64) +│ ├── kubectl-(amd64/arm64) +│ └── nerdctl-(amd64/arm64) +├── images/ +│ └── k3s-airgap-images-amd64.tar +├── addons/ +│ ├── metrics-server.yaml +│ ├── node-exporter.yaml +│ └── kube-state-metrics.yaml +├── install-agent.sh +├── install-server.sh +├── README.md +\`\`\` + +--- +EOF + +echo -e "\n✅ [DONE] 离线安装包构建完成:${BASE_DIR}/" +tree "${BASE_DIR}" || ls -R "${BASE_DIR}" diff --git a/scripts/merge_csv.py b/scripts/merge_csv.py new file mode 100644 index 0000000..4b975ad --- /dev/null +++ b/scripts/merge_csv.py @@ -0,0 +1,30 @@ +import sys +import glob +import pandas as pd + +def merge_csv_files(src_pattern, dest_file): + # 获取匹配的源 CSV 文件列表 + csv_files = glob.glob(src_pattern) + + if not csv_files: + print(f"没有找到匹配的文件: {src_pattern}") + return + + print(f"找到以下文件: {csv_files}") + + # 使用 pandas 读取所有 CSV 文件并合并 + combined_df = pd.concat([pd.read_csv(file) for file in csv_files], ignore_index=True) + + # 将合并后的数据写入目标 CSV 文件 + combined_df.to_csv(dest_file, index=False) + print(f"合并完成,结果已保存到 {dest_file}") + +if __name__ == "__main__": + if len(sys.argv) != 3: + print("使用方法: python merge_csv.py <源文件模式> <目标文件>") + sys.exit(1) + + src_pattern = sys.argv[1] + dest_file = sys.argv[2] + + merge_csv_files(src_pattern, dest_file) diff --git a/scripts/merge_vars.py b/scripts/merge_vars.py new file mode 100644 index 0000000..65ab071 --- /dev/null +++ b/scripts/merge_vars.py @@ -0,0 +1,91 @@ + +import os +import sys +import yaml +import json +from secret.hcp import secret + +def check_env_vars(vars): + """检查环境变量是否存在并且非空""" + for var in vars: + value = os.environ.get(var) + if value is None or value == "": + print(f"Error: Environment variable '{var}' is not set or is empty.") + sys.exit(1) + +def main(): + # 定义需要检查的环境变量 + required_vars = [ + "DOMAIN", + "CLUSTER_NAME", + "SUDO_PASSWORD", + "HCP_API_URL", + "HCP_CLIENT_ID", + "HCP_CLIENT_SECRET", + "GATEWAY_PUBLIC_CONFIG" + ] + + # 检查环境变量 + check_env_vars(required_vars) + + # 从环境变量获取输入 + domain = os.environ.get("DOMAIN") + cluster_name = os.environ.get("CLUSTER_NAME") + ansible_become_pass = os.environ.get("SUDO_PASSWORD") + hcp_api_url = os.environ.get("HCP_API_URL") + hcp_client_id = os.environ.get("HCP_CLIENT_ID") + hcp_client_secret = os.environ.get("HCP_CLIENT_SECRET") + gateway_public_config = os.environ.get("GATEWAY_PUBLIC_CONFIG") + + # 检查并去掉开头的 '$' + if gateway_public_config.startswith('$'): + gateway_public_config = gateway_public_config[1:] + + # 获取 HCP API 令牌 + api_token = secret.get_hcp_api_token(hcp_client_id, hcp_client_secret) + + # 获取密钥数据 + secret_data = secret.get_secret_data(hcp_api_url, api_token) + + # 将 gateway_public_config 转换为字典 + public_config_dict = yaml.safe_load(gateway_public_config) + + # 从密钥数据中提取 private_key + private_key_name = f"{public_config_dict.get('name', '')}_private_key" + private_key = secret.get_secret_value_by_name(secret_data, private_key_name) + + if private_key is None: + print(f"Error: Secret value for '{private_key_name}' not found.") + sys.exit(1) + + # 填充 private_key + public_config_dict['private_key'] = private_key + + # 填充 peers 部分的 public_key + for peer in public_config_dict.get('peers', []): + peer_name = peer.get('name', '') + public_key_name = f"{peer_name}_public_key" + public_key = secret.get_secret_value_by_name(secret_data, public_key_name) + + if public_key is None: + print(f"Error: Secret value for '{public_key_name}' not found.") + sys.exit(1) + + peer['public_key'] = public_key + + # 构建最终的配置字典 + final_config = { + "domain": domain, + "cluster_name": cluster_name, + "ansible_become_pass": ansible_become_pass, + "gateway": { + "public_config": public_config_dict + } + } + + # 输出为 JSON + with open("extra_vars.json", "w") as json_file: + json.dump(final_config, json_file, indent=2) + +if __name__ == "__main__": + main() diff --git a/scripts/netcheck.sh b/scripts/netcheck.sh new file mode 100644 index 0000000..a065f79 --- /dev/null +++ b/scripts/netcheck.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +# netcheck.sh — Diagnose DNS / TLS / Route problems for a given target + +TARGET=${1:-fonts.gstatic.com} # 默认检测 fonts.gstatic.com,也可自定义 +PROXY=${https_proxy:-""} + +if [ -z "$1" ]; then + echo "Usage: $0 " + echo "Example: $0 accounts.google.com" + echo + echo "No argument supplied, using default target: $TARGET" +fi + +echo "=== 🌐 Network Diagnostic for $TARGET ===" +echo "Time: $(date)" +echo + +echo "1️⃣ Checking DNS resolution..." +dig +short "$TARGET" || nslookup "$TARGET" +echo + +IP=$(dig +short "$TARGET" | grep -m1 -Eo '([0-9]{1,3}\.){3}[0-9]{1,3}') +if [ -z "$IP" ]; then + echo "❌ DNS failed — cannot resolve $TARGET" + exit 1 +fi +echo "✅ DNS OK → $TARGET resolved to $IP" +echo + +echo "2️⃣ Checking basic connectivity..." +ping -c 3 -W 2 "$IP" >/dev/null 2>&1 && echo "✅ Ping reachable ($IP)" || echo "⚠️ Ping not reachable (may be ICMP blocked)" +echo + +echo "3️⃣ Checking route path..." +traceroute -m 15 -w 2 "$IP" || echo "⚠️ Traceroute failed — possibly blocked or proxied" +echo + +echo "4️⃣ Testing HTTPS handshake (TLS)..." +if [ -n "$PROXY" ]; then + echo "Using proxy: $PROXY" +fi + +curl -v --connect-timeout 10 -4 -I "https://$TARGET" 2>&1 | egrep "Trying|Connected|SSL|error|subject|issuer|HTTP" +RC=$? +echo + +if [ $RC -eq 0 ]; then + echo "✅ TLS handshake successful — outbound HTTPS working" +else + echo "❌ TLS handshake failed — outbound 443 likely filtered or intercepted" +fi + +echo +echo "5️⃣ Summary:" +if [ $RC -ne 0 ]; then + echo "→ Problem most likely in:" + echo " • DNS (if Step 1 failed)" + echo " • Firewall/Proxy (if Step 2/3 OK but Step 4 fails)" + echo " • TLS interception (if Step 4 shows certificate mismatch)" +else + echo "✅ Everything looks fine — network path and TLS normal" +fi diff --git a/scripts/network-config/ubuntu/init-wsl.sh b/scripts/network-config/ubuntu/init-wsl.sh new file mode 100644 index 0000000..8a18432 --- /dev/null +++ b/scripts/network-config/ubuntu/init-wsl.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +set -e + +# ✅ 1. 安装 openssh-server +echo "🔧 安装 openssh-server..." +sudo apt update +sudo apt install -y openssh-server + +# ✅ 2. 配置 sshd 默认启动(适配 systemd) +echo "📦 启用 SSH 服务..." +sudo systemctl enable ssh +sudo systemctl start ssh + +# ✅ 3. 配置静态 IP(通过 systemd-networkd) +echo "🌐 配置静态 IP 地址 10.253.0.2..." +sudo mkdir -p /etc/systemd/network + +cat <@$WSLStaticIP" diff --git a/scripts/pipeline-library/vars/ansibleSteps.groovy b/scripts/pipeline-library/vars/ansibleSteps.groovy new file mode 100644 index 0000000..28359fd --- /dev/null +++ b/scripts/pipeline-library/vars/ansibleSteps.groovy @@ -0,0 +1,74 @@ +// pipeline-library/vars/ansibleSteps.groovy + +// 检出代码 +def checkoutCode() { + stage('Checkout repository and submodules') { + agent { + docker { image 'your-docker-image' } // 替换为您的 Docker 镜像 + } + steps { + checkout scm + } + } +} + +// 预先设置 +def preSetup(String sshPassword) { + stage('Pre Setup') { + agent { + docker { image 'your-docker-image' } // 替换为您的 Docker 镜像 + } + steps { + script { + sh "echo \"${sshPassword}\" > ~/.vault_pass.txt" + sh "echo 'ansible_password: \'xxxx\'' >> inventory/group_vars/all.yml" + sh "echo 'ansible_become_password: \'xxxx\'' >> inventory/group_vars/all.yml" + } + } + } +} + +// 部署 +def deploy(String sshUser, String instanceName, String installVersion) { + stage('Deploy Ignition Server') { + agent { + docker { image 'your-docker-image' } // 替换为您的 Docker 镜像 + } + steps { + script { + sh "export ANSIBLE_HOST_KEY_CHECKING=False" + sh "ansible-playbook -u ${sshUser} -i inventor.ini -kK playbooks/server.yml -l ${instanceName} -e 'ign_install_ver=${installVersion}' --vault-password-file .vault_pass.txt --diff" + } + } + } +} + +// 后续设置 +def postSetup() { + stage('Post Setup') { + agent { + docker { image 'your-docker-image' } // 替换为您的 Docker 镜像 + } + steps { + script { + sh "export ANSIBLE_HOST_KEY_CHECKING=False" + } + } + } +} + +// 检查 +def check() { + stage('Check') { + agent { + docker { image 'your-docker-image' } // 替换为您的 Docker 镜像 + } + steps { + script { + // Add your check logic here + } + } + } +} + +return this // 返回以便导出所有函数 diff --git a/scripts/pulp-installer.sh b/scripts/pulp-installer.sh new file mode 100644 index 0000000..465df22 --- /dev/null +++ b/scripts/pulp-installer.sh @@ -0,0 +1,120 @@ +#!/bin/bash +set -e + +echo "🚀 开始离线安装 Pulp Operator..." + +# 安装 nerdctl(如存在) +if [ -f nerdctl.tar.gz ]; then + echo "📦 解压 nerdctl..." + tar xzvf nerdctl.tar.gz -C /usr/local/bin/ +fi + +# 导入镜像 +echo "🚀 导入 pulp-operator 镜像..." + +IMAGES=( + "images/pulp-operator.tar" + "images/kube-rbac-proxy.tar" +) + +if command -v docker &>/dev/null && docker info &>/dev/null; then + for img in "${IMAGES[@]}"; do + docker load -i "$img" + done +elif [ -S /run/k3s/containerd/containerd.sock ]; then + export CONTAINERD_ADDRESS=/run/k3s/containerd/containerd.sock + for img in "${IMAGES[@]}"; do + nerdctl --namespace k8s.io load -i "$img" + done +elif [ -S /run/containerd/containerd.sock ]; then + export CONTAINERD_ADDRESS=/run/containerd/containerd.sock + for img in "${IMAGES[@]}"; do + nerdctl --namespace k8s.io load -i "$img" + done +else + echo "❌ 没有可用的容器运行时" + exit 1 +fi + +# 创建命名空间 +kubectl create namespace pulp || true + +# 安装 chart +echo "📦 安装本地 Helm Chart..." +helm upgrade --install pulp-operator ./charts/pulp-operator/ -n pulp + +# 等待 CRD 注册 +sleep 10 + +# 生成默认 CR yaml(可改为 values 覆盖渲染) +echo "📝 生成 CR manifests/pulp-cr.yaml..." +mkdir -p manifests +cat > manifests/pulp-cr.yaml <" | awk '{print $3}' | while read image_id; do + echo "Deleting image: $image_id" + sudo CONTAINERD_ADDRESS=$CONTAINERD_ADDRESS nerdctl --namespace k8s.io rmi "$image_id" +done + +echo "Cleanup complete." diff --git a/scripts/registry/push_images.sh b/scripts/registry/push_images.sh new file mode 100644 index 0000000..62562bd --- /dev/null +++ b/scripts/registry/push_images.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +set +x + +# 设置容器和仓库地址 +CONTAINERD_ADDRESS="/run/k3s/containerd/containerd.sock" +LOCAL_REGISTRY="local-registry.onwalk.net:5000" +TARGET_REGISTRY="images.onwalk.net/private/deepflow-v6.5" + +# 设置输出文件 +input_file="all.tag.list" + +# 登录到目标 registry +echo "Logging in to $TARGET_REGISTRY..." +sudo CONTAINERD_ADDRESS=$CONTAINERD_ADDRESS nerdctl login $TARGET_REGISTRY + +# 读取 all.tag.list 并处理每个镜像 +while IFS= read -r line; do + # 如果行为空,跳过 + if [ -z "$line" ]; then + continue + fi + + # 替换 local-registry 地址为目标地址, 也删除 :5000 端口 + target_tag="${line//$LOCAL_REGISTRY/$TARGET_REGISTRY}" + + # 打标签并推送镜像 + echo "Tagging and Pushing $line -> $target_tag" + sudo CONTAINERD_ADDRESS=$CONTAINERD_ADDRESS nerdctl pull "$line" + sudo CONTAINERD_ADDRESS=$CONTAINERD_ADDRESS nerdctl tag "$line" "$target_tag" + sudo CONTAINERD_ADDRESS=$CONTAINERD_ADDRESS nerdctl push "$target_tag" + + # 清理本地镜像 + echo "Cleaning up local image: $line" + sudo CONTAINERD_ADDRESS=$CONTAINERD_ADDRESS nerdctl rmi "$line" + echo "Cleaning up local image: $target_tag" + sudo CONTAINERD_ADDRESS=$CONTAINERD_ADDRESS nerdctl rmi "$target_tag" +done < "$input_file" + diff --git a/scripts/registry/setup-nerdctl.sh b/scripts/registry/setup-nerdctl.sh new file mode 100644 index 0000000..749662b --- /dev/null +++ b/scripts/registry/setup-nerdctl.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +wget https://github.com/containerd/nerdctl/releases/download/v2.0.2/nerdctl-2.0.2-linux-amd64.tar.gz + +sudo mkdir -pv /etc/nerdctl +sudo touch /etc/nerdctl/nerdctl.toml + +sudo cat > /etc/nerdctl/nerdctl.toml << EOF +debug = false +debug_full = false +address = "unix:///run/k3s/containerd/containerd.sock" +namespace = "k8s.io" +cni_path = "/var/lib/nerdctl/cni/bin" +cni_netconfpath = "/var/lib/nerdctl/cni/net.d" +EOF + +sudo CONTAINERD_ADDRESS=/run/k3s/containerd/containerd.sock nerdctl --namespace k8s.io ps diff --git a/scripts/registry/setup-registry.sh b/scripts/registry/setup-registry.sh new file mode 100644 index 0000000..070557d --- /dev/null +++ b/scripts/registry/setup-registry.sh @@ -0,0 +1,260 @@ +#!/bin/bash + +#https://github.com/containerd/nerdctl/releases/download/v2.0.2/nerdctl-2.0.2-linux-amd64.tar.gz +#https://github.com/containerd/nerdctl/releases/download/v2.0.2/nerdctl-full-2.0.2-linux-amd64.tar.gz +#wget https://github.com/containernetworking/plugins/releases/download/v1.6.2/cni-plugins-linux-amd64-v1.6.2.tgz + +#!/bin/bash +set -e + +# ============================================= +# ✅ 环境变量检查(可配置) +# ============================================= +: "${REGISTRY_DOMAIN:=kube.registry.local}" +: "${REGISTRY_PORT:=5000}" +: "${NERDCTL_VERSION:=v2.0.2}" +: "${CNI_VERSION:=v1.6.2}" +: "${CNI_DIR:=/opt/cni/bin}" +: "${CERT_DIR:=/opt/registry/certs}" +: "${CONFIG_DIR:=/opt/registry/config}" +: "${REGISTRY_DATA:=/var/lib/registry}" +: "${REGISTRY_YAML:=registry.yaml}" +: "${COMPOSE_YAML:=compose.yaml}" +: "${TAR_FILE:=registry.tar}" + +# ============================================= +# ✅ 自动检测 containerd.sock +# ============================================= +if [[ -S "/run/k3s/containerd/containerd.sock" ]]; then + export CONTAINERD_ADDRESS="/run/k3s/containerd/containerd.sock" +elif [[ -S "/run/containerd/containerd.sock" ]]; then + export CONTAINERD_ADDRESS="/run/containerd/containerd.sock" +elif [[ -S "/var/run/containerd/containerd.sock" ]]; then + export CONTAINERD_ADDRESS="/var/run/containerd/containerd.sock" +else + echo "❌ 未检测到有效的 containerd.sock,请确认 containerd 是否正常运行。" + exit 1 +fi + +export NERDCTL_NAMESPACE="k8s.io" + +# ============================================= +echo "📦 准备 nerdctl 全功能版..." +if ! command -v nerdctl &>/dev/null; then + if [ ! -f /tmp/nerdctl-full.tgz ]; then + echo "⬇️ 下载 nerdctl..." + wget -O /tmp/nerdctl-full.tgz \ + "https://github.com/containerd/nerdctl/releases/download/${NERDCTL_VERSION}/nerdctl-full-${NERDCTL_VERSION#v}-linux-amd64.tar.gz" + else + echo "📦 已存在 nerdctl-full.tgz,跳过下载" + fi + + echo "📦 解压 nerdctl 到 /usr/local..." + sudo tar -C /usr/local -xzf /tmp/nerdctl-full.tgz + echo "✅ nerdctl 安装完成: $(nerdctl --version)" +else + echo "✅ nerdctl 已存在: $(nerdctl --version)" +fi + +# ============================================= +echo "📦 安装 CNI 插件..." +if [ ! -f "${CNI_DIR}/bridge" ]; then + if [ ! -f /tmp/cni.tgz ]; then + echo "⬇️ 下载 CNI 插件..." + wget -O /tmp/cni.tgz \ + "https://github.com/containernetworking/plugins/releases/download/${CNI_VERSION}/cni-plugins-linux-amd64-${CNI_VERSION}.tgz" + else + echo "📦 已存在 cni.tgz,跳过下载" + fi + + sudo mkdir -p "${CNI_DIR}" + sudo tar -C "${CNI_DIR}" -xzf /tmp/cni.tgz + echo "✅ CNI 插件已安装到: ${CNI_DIR}" +else + echo "✅ CNI 插件已存在: ${CNI_DIR}/bridge" +fi + +# ============================================= +echo "📦 解压 SSL 证书..." + +if [ ! -f "ssl_certificates.tar.gz" ]; then + echo "⬇️ 未找到 ssl_certificates.tar.gz,尝试从 GitHub 下载..." + wget -O ssl_certificates.tar.gz \ + "https://github.com/svc-design/ansible/releases/download/release-self-signed-cert_kube.registry.local/ssl_certificates.tar.gz" || { + echo "❌ 无法下载 ssl_certificates.tar.gz,终止执行" + exit 1 + } +else + if [ -f "ssl_certificates.tar.gz" ]; then + mkdir -p "$CERT_DIR" + tar -xvpf ssl_certificates.tar.gz -C "$CERT_DIR" + echo "✅ 证书已解压至: $CERT_DIR" + fi +fi + +# ============================================= + +# ============ 生成 registry-config ============ +echo "⚙️ 准备 registry 配置..." +sudo mkdir -pv "$CONFIG_DIR" +sudo mkdir -pv "$REGISTRY_DATA" +echo "📝 写入 registry-config.yaml..." +sudo cat > "${CONFIG_DIR}/${REGISTRY_YAML}" < /dev/null +version: 0.1 +log: + fields: + service: registry +storage: + cache: + blobdescriptor: inmemory + filesystem: + rootdirectory: /var/lib/registry + delete: + enabled: true +http: + addr: :${REGISTRY_PORT} + headers: + X-Content-Type-Options: [nosniff] + tls: + certificate: /etc/docker/registry/domain.crt + key: /etc/docker/registry/domain.key +health: + storagedriver: + enabled: true + interval: 10s + threshold: 3 +EOF +echo "✅ registry.yaml 已创建" + +# ========== 生成 compose.yaml ========== +echo "🛠️ 生成 compose 配置..." +cat < /dev/null +services: + registry: + image: registry:latest + container_name: registry + restart: always + network_mode: host + volumes: + - /var/lib/registry:/var/lib/registry + - ${CONFIG_DIR}/registry.yaml:/etc/docker/registry/config.yml + - ${CERT_DIR}/kube.registry.local.cert:/etc/docker/registry/domain.crt + - ${CERT_DIR}/kube.registry.local.key:/etc/docker/registry/domain.key +EOF +echo "✅ compose.yaml 已创建" + +# ============================================= +echo "📦 导入本地 registry 镜像..." +if [ -f "/usr/local/deepflow/$TAR_FILE" ]; then + sudo CONTAINERD_ADDRESS="$CONTAINERD_ADDRESS" nerdctl --namespace $NERDCTL_NAMESPACE load -i "/usr/local/deepflow/$TAR_FILE" +else + echo "⚠️ 本地镜像文件不存在:/usr/local/deepflow/$TAR_FILE" +fi + +# ============================================= +echo "🔁 重启 registry 服务..." +sudo CONTAINERD_ADDRESS="$CONTAINERD_ADDRESS" nerdctl --namespace $NERDCTL_NAMESPACE compose -f "$CONFIG_DIR/compose.yaml" down || true +sudo CONTAINERD_ADDRESS="$CONTAINERD_ADDRESS" nerdctl --namespace $NERDCTL_NAMESPACE compose -f "$CONFIG_DIR/compose.yaml" up -d + +# ============================================= +echo "🔗 添加 hosts 映射..." +if ! grep -q "$REGISTRY_DOMAIN" /etc/hosts; then + echo "127.0.0.1 $REGISTRY_DOMAIN" | sudo tee -a /etc/hosts + echo "✅ /etc/hosts 已添加 $REGISTRY_DOMAIN" +else + echo "✅ hosts 中已存在 $REGISTRY_DOMAIN" +fi + +echo "✅ Registry 启动成功: https://$REGISTRY_DOMAIN:$REGISTRY_PORT" + +# ============================================= +echo "🔐 安装 CA 证书到系统信任目录..." + +CA_CERT="${CERT_DIR}/ca.cert" +if [ ! -f "$CA_CERT" ]; then + echo "❌ 未找到 CA 证书: $CA_CERT" +else + if grep -qi "ubuntu\|debian" /etc/os-release; then + sudo cp "$CA_CERT" "/usr/local/share/ca-certificates/kube-registry-ca.crt" + sudo update-ca-certificates + echo "✅ 已导入 CA 到 Ubuntu/Debian 系统信任目录" + elif grep -qi "rhel\|centos\|rocky" /etc/os-release; then + sudo cp "$CA_CERT" "/etc/pki/ca-trust/source/anchors/kube-registry-ca.crt" + sudo update-ca-trust extract + echo "✅ 已导入 CA 到 RHEL/CentOS 系统信任目录" + else + echo "⚠️ 未知发行版,跳过系统 CA 导入" + fi +fi + +# ============================================= +echo "🐳 安装 CA 到容器运行时 (Docker/Containerd)..." + +# --- Docker CA --- +if command -v docker &>/dev/null; then + echo "🔧 配置 Docker..." + DOCKER_CA_DIR="/etc/docker/certs.d/kube.registry.local" + sudo mkdir -p "$DOCKER_CA_DIR" + sudo cp "$CA_CERT" "${DOCKER_CA_DIR}/ca.crt" + echo "✅ 已导入 CA 到 Docker: $DOCKER_CA_DIR" + sudo systemctl restart docker +fi + +# --- Containerd CA --- +if command -v containerd &>/dev/null || [ -S "$CONTAINERD_SOCK" ]; then + echo "🔧 配置 Containerd..." + + # Alpine/K3s: /etc/containerd/certs.d + # cri-o/nerdctl: /etc/containerd/certs.d/kube.registry.local/ca.crt + CONTAINERD_CA_DIR="/etc/containerd/certs.d/kube.registry.local" + sudo mkdir -p "$CONTAINERD_CA_DIR" + sudo cp "$CA_CERT" "${CONTAINERD_CA_DIR}/ca.crt" + echo "✅ 已导入 CA 到 Containerd: $CONTAINERD_CA_DIR" + sudo systemctl restart containerd || echo "⚠️ containerd 重启失败,可能在 K3s 中不适用" +fi + + +# --- K3s CA --- +if [[ -S "/run/k3s/containerd/containerd.sock" ]]; then + echo "🔧 检测到 K3s 环境,准备导入 CA..." + + K3S_CA_DIR="/etc/containerd/certs.d/${REGISTRY_DOMAIN}" + sudo mkdir -p "$K3S_CA_DIR" + sudo cp "$CA_CERT" "${K3S_CA_DIR}/ca.crt" + + echo "✅ 已导入 CA 到 K3s containerd: $K3S_CA_DIR" + + echo "🔁 重启 k3s..." + sudo systemctl restart k3s || echo "⚠️ K3s 重启失败,请手动确认" +fi diff --git a/scripts/registry/show_images.sh b/scripts/registry/show_images.sh new file mode 100644 index 0000000..3b9c4a5 --- /dev/null +++ b/scripts/registry/show_images.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +# 设置协议和 registry 地址(https:// 或 http://) +PROTOCOL="https://" +REGISTRY="local-registry.onwalk.net:5000" + +# 获取仓库列表 +repos=$(curl -s -X GET "$PROTOCOL$REGISTRY/v2/_catalog" | jq -r '.repositories[]') + +# 要隐藏的仓库列表 +hidden_repos=("") + +# 创建或清空输出文件 +output_file="all.tag.list" +> "$output_file" + +# 遍历每个仓库,获取对应的标签列表 +for repo in $repos; do + # 如果是隐藏的仓库,跳过 + if [[ " ${hidden_repos[@]} " =~ " ${repo} " ]]; then + continue + fi + + # 获取标签列表 + tags=$(curl -s -X GET "$PROTOCOL$REGISTRY/v2/$repo/tags/list" | jq -r '.tags[]') + + # 如果仓库有标签,则按格式输出到文件 + if [ -n "$tags" ]; then + for tag in $tags; do + # 输出格式:local-registry.onwalk.net:5000/repository:tag + echo "$REGISTRY/$repo:$tag" >> "$output_file" + done + fi +done + +# 排序并去重 +sort -u "$output_file" -o "$output_file" diff --git a/scripts/renew_docker_registry_secret_with_kubectl.sh b/scripts/renew_docker_registry_secret_with_kubectl.sh new file mode 100644 index 0000000..31eff3f --- /dev/null +++ b/scripts/renew_docker_registry_secret_with_kubectl.sh @@ -0,0 +1,30 @@ +#!/bin/sh + +check_not_empty() { + if [[ -z $1 ]]; then + echo "Error: $2 is empty. Please provide a value." + exit 1 + fi +} + +function renew_docker_registry_secret() +{ + + # 检查参数是否为空 + check_not_empty "$1" "cluster" && local cluster=$1 + check_not_empty "$2" "namespace" && local namespace=$2 + check_not_empty "$3" "secret" && local secret=$3 + check_not_empty "$4" "username" && local username=$4 + check_not_empty "$5" "password" && local password=$5 + + fuze k8s clusters connect $cluster && kubectl config set-context --current --namespace $namespace + kubectl delete secret $secret -n $namespace || true + kubectl create secret docker-registry $secret \ + --docker-server=artifact.onwalk.net \ + --docker-username=$username \ + --docker-password=$password \ + --docker-email=manbzuhe2009@qq.com \ + -n $namespace + + kubectl get secret $secret -n $namespace --output="jsonpath={.data.\.dockerconfigjson}" | base64 --decode || true +} diff --git a/scripts/rewrite-cover-history.sh b/scripts/rewrite-cover-history.sh new file mode 100644 index 0000000..3135e24 --- /dev/null +++ b/scripts/rewrite-cover-history.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +set -euo pipefail + +# 用法:bash rewrite-cover-history.sh path1 [path2 ...] +[ $# -gt 0 ] || { echo "Usage: $0 [path ...]"; exit 1; } +PATHS="$*" + +# 记录 remotes,并打个回滚点(本地) +git remote -v | tee remotes.before.txt +git tag "pre-redact-$(date +%Y%m%d-%H%M%S)" + +# 核心:对历史中每个提交,若文件存在则用 HEAD 版本覆盖 +git filter-branch --force --tree-filter ' +for p in '"$PATHS"'; do + [ -e "$p" ] && git show HEAD:"$p" > "$p" || true +done +' -- --all + +# 清理原始引用与垃圾对象(避免泄漏对象残留) +git for-each-ref --format="delete %(refname)" refs/original/ | git update-ref --stdin || true +git reflog expire --expire=now --all +git gc --prune=now --aggressive + +# 强制推送到所有 remotes(分支与标签) +for r in $(git remote); do + git push --force-with-lease "$r" --all + git push --force "$r" --tags +done + +echo "✅ Done. 协作者请重新克隆或:git fetch --all && git reset --hard origin/$(git rev-parse --abbrev-ref HEAD) && git gc --prune=now" diff --git a/scripts/rollout_docker_registry_secret.sh b/scripts/rollout_docker_registry_secret.sh new file mode 100644 index 0000000..7836ffe --- /dev/null +++ b/scripts/rollout_docker_registry_secret.sh @@ -0,0 +1,21 @@ +#!/bin/sh + +check_not_empty() { + if [[ -z $1 ]]; then + echo "Error: $2 is empty. Please provide a value." + exit 1 + fi +} + +function rollout_docker_registry_secret() +{ + + # 检查参数是否为空 + check_not_empty "$1" "cluster" && local cluster=$1 + check_not_empty "$2" "namespace" && local namespace=$2 + check_not_empty "$3" "secret" && local secret=$3 + + mkdir -pv ~/Backups/ + kubectl config set-context --current --namespace $namespace + kubectl apply -f ~/Backups/~/Backups/$cluster-$namespace-$secret.yaml +} diff --git a/scripts/secret/README.md b/scripts/secret/README.md new file mode 100644 index 0000000..44fa3a2 --- /dev/null +++ b/scripts/secret/README.md @@ -0,0 +1,45 @@ +# Secret Management Script + +This script is designed to fetch and manage secrets from HCP Cloud Secrets. It retrieves secrets based on environment variables and writes the final configuration to a JSON file. + +# Prerequisites + +1. **Python 3**: Ensure Python 3 is installed on your system. +2. **Python Libraries**: This script requires the `requests`, `pyyaml`, and `secret` libraries. You can install these dependencies using pip: + +```bash +pip install requests pyyaml +``` + +# Environment Variables + +The script requires the following environment variables: + +- HCP_API_URL: The API URL for fetching secrets from HCP. +- HCP_CLIENT_ID: The client ID for HCP authentication. +- HCP_CLIENT_SECRET: The client secret for HCP authentication. + +# Usage + +To use this script, follow these steps: +Set Environment Variables: Ensure all required environment variables are set. For example: + +``` +export HCP_API_URL="https://api.cloud.hashicorp.com/secrets/..." +export HCP_CLIENT_ID="your_client_id" +export HCP_CLIENT_SECRET="your_client_secret" +``` + +# Functions + +## get_hcp_api_token(client_id, client_secret) +Obtains an HCP API token using the provided client ID and secret. + +## get_secret_data(api_url, api_token) +Fetches secret data from HCP Cloud using the provided API URL and token. + +## get_secret_value_by_name(secret_data, secret_name) +Extracts the value of a secret from the fetched secret data based on the provided name. + +# License +This script is licensed under the GPLv3 License. See the LICENSE file for more details. diff --git a/scripts/secret/hcp/__init__.py b/scripts/secret/hcp/__init__.py new file mode 100644 index 0000000..87f0cc7 --- /dev/null +++ b/scripts/secret/hcp/__init__.py @@ -0,0 +1 @@ +from .secret import get_hcp_api_token, get_secret_data, get_secret_value_by_name diff --git a/scripts/secret/hcp/secret.py b/scripts/secret/hcp/secret.py new file mode 100644 index 0000000..0141be9 --- /dev/null +++ b/scripts/secret/hcp/secret.py @@ -0,0 +1,55 @@ +import requests + +def get_hcp_api_token(client_id, client_secret): + """Obtain the HCP API token using client credentials.""" + url = "https://auth.idp.hashicorp.com/oauth2/token" + headers = { + "Content-Type": "application/x-www-form-urlencoded" + } + data = { + "client_id": client_id, + "client_secret": client_secret, + "grant_type": "client_credentials", + "audience": "https://api.hashicorp.cloud" + } + + response = requests.post(url, headers=headers, data=data) + response.raise_for_status() # Raise an error for bad responses + return response.json().get("access_token") + +def get_secret_data(api_url, api_token): + """ + Fetch the secret data from HCP Cloud using the API URL and token. + + Parameters: + - api_url: The URL to fetch secret data from HCP Cloud. + - api_token: The API token for authentication. + + Returns: + - The JSON response containing the secret data. + """ + headers = { + "Authorization": f"Bearer {api_token}" + } + + response = requests.get(api_url, headers=headers) + response.raise_for_status() # Raise an error for bad responses + return response.json() + +def get_secret_value_by_name(secret_data, secret_name): + """ + Get the version value by the specified name from the fetched secret data. + + Parameters: + - secret_data: The JSON data containing secrets fetched from HCP Cloud. + - secret_name: The name of the secret to fetch the version value for. + + Returns: + - The value of the secret for the specified name. + """ + secrets = secret_data.get('secrets', []) + for secret_info in secrets: + if secret_info.get('name') == secret_name: + return secret_info.get('version', {}).get('value') + + return None diff --git a/scripts/secret/setup.py b/scripts/secret/setup.py new file mode 100644 index 0000000..a6edd2f --- /dev/null +++ b/scripts/secret/setup.py @@ -0,0 +1,17 @@ +from setuptools import setup, find_packages + +setup( + name="hcp_secret", + version="0.1", + packages=find_packages(include=['secret', 'secret.hcp']), + install_requires=[ + "requests", + ], + tests_require=[ + "unittest", + ], + description="A library to fetch secrets from HCP Cloud", + author="Haitao Pan", + author_email="manbuzhe2008@gmail.com", + url="https://github.com/yourusername/hcp_secret", +) diff --git a/scripts/secret/tests/__init__.py b/scripts/secret/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/secret/tests/test_secret.py b/scripts/secret/tests/test_secret.py new file mode 100644 index 0000000..5fa84cd --- /dev/null +++ b/scripts/secret/tests/test_secret.py @@ -0,0 +1,29 @@ +import unittest +from hcp import get_hcp_api_token, get_secret_data, get_secret_value_by_name + +class TestHCPSecret(unittest.TestCase): + + def test_get_hcp_api_token(self): + # Mock the API response and test the token retrieval + pass # Add actual test logic here + + def test_get_secret_data(self): + # Mock the API response and test secret data fetching + pass # Add actual test logic here + + def test_get_secret_value_by_name(self): + secret_data = { + "secrets": [ + { + "name": "cn_gateway_private_key", + "version": { + "value": "test_value" + } + } + ] + } + value = get_secret_value_by_name(secret_data, "cn_gateway_private_key") + self.assertEqual(value, "test_value") + +if __name__ == "__main__": + unittest.main() diff --git a/scripts/setup-gitea.sh b/scripts/setup-gitea.sh new file mode 100644 index 0000000..3877d9b --- /dev/null +++ b/scripts/setup-gitea.sh @@ -0,0 +1,83 @@ +helm repo add gitea https://dl.gitea.com/charts +helm repo update +kubectl create ns gitea || true +cat > gitea-values.yaml < grafana-agent-values.yaml << EOF +global: + image: + registry: "images.onwalk.net/public" +agent: + mode: 'static' + configMap: + create: true + content: '' +logs: + enabled: false +traces: + enabled: false +EOF + +helm upgrade --install grafana-agent grafana/grafana-agent --namespace deepflow -f grafana-agent-values.yaml + +cat > grafana-agent-configmap.yaml << EOF +apiVersion: v1 +data: + config.yaml: |- + server: + log_level: info + log_format: logfmt + metrics: + global: + scrape_interval: 1m + configs: + - name: agent + scrape_configs: + - job_name: kube-state-metrics + static_configs: + - targets: ['10.43.155.169:8080'] + - job_name: node-metrics + static_configs: + - targets: ['10.43.68.133:9100'] + remote_write: + - url: http://deepflow-agent.deepflow.svc.cluster.local/api/v1/prometheus +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: grafana-agent + meta.helm.sh/release-namespace: deepflow + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.42.0 + helm.sh/chart: grafana-agent-0.42.0 + name: grafana-agent + namespace: deepflow +EOF + +kubectl apply -f grafana-agent-configmap.yaml + +kubectl get pods -n deepflow diff --git a/scripts/setup-microservice-demo.sh b/scripts/setup-microservice-demo.sh new file mode 100644 index 0000000..4ea8af8 --- /dev/null +++ b/scripts/setup-microservice-demo.sh @@ -0,0 +1,11 @@ +#git clone https://github.com/aliyun/alibabacloud-microservice-demo.git +kubectl create ns microservice-demo || true +kubectl delete secret tls otel-demo-secret -n microservice-demo || true +kubectl create secret tls otel-demo-secret --key=/etc/ssl/onwalk.net.key --cert=/etc/ssl/onwalk.net.pem -n microservice-demo || true +cat > microservice-demo-config.yaml << EOF +image: + prefix: images.onwalk.net/public/microservice-demo/ + version: 1.0.0-SNAPSHOT +EOF +helm package alibabacloud-microservice-demo/helm-chart/ +helm upgrade --install microservice-demo /root/microservice-demo-0.1.0.tgz -n microservice-demo -f microservice-demo-config.yaml diff --git a/scripts/setup-open-telemetry-demo.sh b/scripts/setup-open-telemetry-demo.sh new file mode 100644 index 0000000..180a9ef --- /dev/null +++ b/scripts/setup-open-telemetry-demo.sh @@ -0,0 +1,116 @@ +helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts +helm repo update +kubectl create ns otel || true +kubectl delete secret tls otel-demo-secret -n otel || true +kubectl create secret tls otel-demo-secret --key=/etc/ssl/onwalk.net.key --cert=/etc/ssl/onwalk.net.pem -n otel || true +cat > otel-demo-config.yaml << EOF +default: + image: + repository: images.onwalk.net/public/opentelemetry/demo + tag: "" + pullPolicy: IfNotPresent +components: + accountingService: + enabled: true + initContainers: + - name: wait-for-kafka + image: images.onwalk.net/public/base/busybox:latest + adService: + enabled: true + cartService: + enabled: true + initContainers: + - name: wait-for-valkey + image: images.onwalk.net/public/base/busybox:latest + checkoutService: + enabled: true + initContainers: + - name: wait-for-kafka + image: images.onwalk.net/public/base/busybox:latest + currencyService: + enabled: true + emailService: + enabled: true + frauddetectionService: + enabled: true + initContainers: + - name: wait-for-kafka + image: images.onwalk.net/public/base/busybox:latest + frontend: + enabled: true + frontendProxy: + enabled: true + ingress: + enabled: true + ingressClassName: nginx + hosts: + - host: otel-demo.onwalk.net + paths: + - path: / + pathType: Prefix + port: 8080 + - path: /jaeger/ui/ + pathType: Prefix + port: 8080 + - path: /grafana/ + pathType: Prefix + port: 8080 + - path: /loadgen/ + pathType: Prefix + port: 8080 + - path: /feature/ + pathType: Prefix + port: 8080 + tls: + - secretName: otel-demo-secret + hosts: + - otel-demo.onwalk.net + imageprovider: + enabled: true + loadgenerator: + enabled: true + paymentService: + enabled: true + productCatalogService: + enabled: true + quoteService: + enabled: true + recommendationService: + enabled: true + shippingService: + enabled: true + flagd: + enabled: false + imageOverride: + repository: "ghcr.io/open-feature/flagd" + tag: "v0.11.4" + initContainers: + - name: init-config + image: images.onwalk.net/public/base/busybox:latest + kafka: + enabled: true + valkey: + enabled: true + imageOverride: + repository: "images.onwalk.net/public/opentelemetry/valkey" + tag: "7.2-alpine" +grafana: + enabled: true + global: + imageRegistry: images.onwalk.net/public +prometheus: + enabled: true +jaeger: + enabled: true + allInOne: + image: + repository: "images.onwalk.net/public/jaegertracing/all-in-one" + tag: "1.53.0" +opentelemetry-collector: + enabled: true + image: + repository: "images.onwalk.net/public/opentelemetry/opentelemetry-collector-contrib" +opensearch: + enabled: false +EOF +helm upgrade --install otel-demo open-telemetry/opentelemetry-demo --version=0.33.3 -n otel -f otel-demo-config.yaml diff --git a/scripts/setup-vector.sh b/scripts/setup-vector.sh new file mode 100644 index 0000000..761f15d --- /dev/null +++ b/scripts/setup-vector.sh @@ -0,0 +1,101 @@ +helm repo add vector https://helm.vector.dev +helm repo update +cat << EOF > vector-values-custom.yaml +role: Agent +#nodeSelector: +# allow/vector: "false" + +# resources -- Set Vector resource requests and limits. +resources: + requests: + cpu: 200m + memory: 256Mi + limits: + cpu: 200m + memory: 256Mi +image: + repository: images.onwalk.net/public/timberio/vector + pullPolicy: Always + tag: "0.37.1-distroless-libc" +podLabels: + vector.dev/exclude: "true" + app: deepflow +# extraVolumes -- Additional Volumes to use with Vector Pods. + # extraVolumes: + # - name: opt-log + # hostPath: + # path: "/opt/log/" +# extraVolumeMounts -- Additional Volume to mount into Vector Containers. + # extraVolumeMounts: + # - name: opt-log + # mountPath: "/opt/log/" + # readOnly: true +customConfig: + ## The configuration comes from https://vector.dev/docs/reference/configuration/global-options/#data_dir + data_dir: /vector-data-dir + api: + enabled: true + address: 127.0.0.1:8686 + playground: false + sources: + kubernetes_logs: + type: kubernetes_logs + namespace_annotation_fields: + namespace_labels: "" + node_annotation_fields: + node_labels: "" + pod_annotation_fields: + pod_annotations: "" + pod_labels: "" + + transforms: + remap_kubernetes_logs: + type: remap + inputs: + - kubernetes_logs + source: |- + # try to parse json + if is_string(.message) && is_json(string!(.message)) { + tags = parse_json(.message) ?? {} + .message = tags.message # FIXME: the log content key inside json + del(tags.message) + .json = tags + } + + if !exists(.level) { + if exists(.json) { + .level = .json.level + del(.json.level) + } else { + # match log levels surround by ``[]`` or ``<>`` with ignore case + level_tags = parse_regex(.message, r'[\[\\\<](?(?i)INFOR?(MATION)?|WARN(ING)?|DEBUG?|ERROR?|TRACE|FATAL|CRIT(ICAL)?)[\]\\\>]') ?? {} + if !exists(level_tags.level) { + # match log levels surround by whitespace, required uppercase strictly in case mismatching + level_tags = parse_regex(.message, r'[\s](?INFOR?(MATION)?|WARN(ING)?|DEBUG?|ERROR?|TRACE|FATAL|CRIT(ICAL)?)[\s]') ?? {} + } + if exists(level_tags.level) { + level_tags.level = upcase(string!(level_tags.level)) + .level = level_tags.level + } + } + } + + if !exists(._df_log_type) { + # default log type + ._df_log_type = "user" + } + + if !exists(.app_service) { + # FIXME: files 模块没有此字段,请通过日志内容注入应用名称 + .app_service = .kubernetes.container_name + } + sinks: + http: + encoding: + codec: json + inputs: + - remap_kubernetes_logs # NOTE: 注意这里数据源是 transform 模块的 key + type: http + uri: http://deepflow-agent.deepflow/api/v1/log +EOF +helm upgrade --install vector vector/vector --namespace deepflow --create-namespace -f vector-values-custom.yaml diff --git a/scripts/sing-box/README.md b/scripts/sing-box/README.md new file mode 100644 index 0000000..05eb390 --- /dev/null +++ b/scripts/sing-box/README.md @@ -0,0 +1,95 @@ + +# Sing-box VLESS + Reality 一键部署脚本 + +该脚本用于在 Linux 服务器上快速部署一个基于 sing-box 的隐匿代理服务,采用 `VLESS + Reality` 协议,结合 systemd 自启动支持,适用于高隐蔽性代理通信场景。 + +--- + +## 🧩 功能特性 + +- 🚀 自动安装并配置 sing-box(如未安装) +- 🔐 自动生成 Reality 密钥对(无需手动管理) +- 📄 自动生成服务端配置文件(支持伪装 SNI) +- ⚙️ 自动创建并启用 systemd 启动服务 +- 📦 自动输出客户端配置片段,支持 Windows/macOS/Linux + +--- + +## 🖥️ 支持平台 + +- 服务端:Debian / Ubuntu / CentOS / Arch / 兼容 Linux 系统 +- 客户端平台:macOS / Windows / Linux(任意 sing-box 客户端) + +--- + +## ⚙️ 使用方式 + +### 一键安装(推荐) + +```bash +bash <(curl -fsSL https://your.cdn/installer/install-singbox.sh) \ + --ip 123.123.123.123 \ + --sni www.bing.com \ + --client-platform macos + + 参数说明: + +参数 示例值 说明 +--ip 123.123.123.123 当前服务器公网 IP +--sni www.bing.com Reality 伪装域名 +--client-platform macos / windows / linux 客户端类型(影响输出说明) + +📂 脚本行为说明 +部署完成后,脚本会生成: + +文件路径 说明 +/etc/sing-box/config-server.json sing-box 服务端配置 +/etc/systemd/system/sing-box.service systemd 启动配置 +/usr/local/bin/sing-box 主程序(如未安装将自动下载) + +并自动执行: + +bash +复制 +编辑 +systemctl daemon-reload +systemctl enable --now sing-box +🔐 示例输出 +部署成功后会输出如下: + +css +复制 +编辑 +✅ 服务端已部署成功! +👉 Reality 公钥: yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy +👉 ShortID: abcd +👉 UUID: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + +📦 推荐客户端配置如下: +{ + "outbounds": [ + { + "type": "vless", + ... + } + ] +} +🧱 安全建议 +建议使用 Cloudflare DNS 或境外解析加快 SNI 匹配 + +Reality 不需要 TLS 证书即可启用加密通信 + +可进一步结合 iptables 或 fail2ban 做入站控制 + +🛠️ 后续扩展(可选) +你可以基于本项目扩展支持: + +fallback 到 nginx / 80 端口 + +多用户(多个 UUID) + +动态配置(通过 API 控制) + +客户端同步配置工具 + + diff --git a/scripts/sing-box/client-gvisor-tun-reality.json b/scripts/sing-box/client-gvisor-tun-reality.json new file mode 100644 index 0000000..9ee45ae --- /dev/null +++ b/scripts/sing-box/client-gvisor-tun-reality.json @@ -0,0 +1,72 @@ +{ + "log": { + "level": "debug" + }, + "dns": { + "servers": [ + { + "tag": "direct_dns", + "address": "223.5.5.5", + "detour": "direct" + } + ] + }, + "inbounds": [ + { + "type": "tun", + "tag": "tun-in", + "interface_name": "sing-tun", + "mtu": 1500, + "stack": "gvisor", + "endpoint_independent_nat": true, + "address": ["172.19.0.1/30"], + "auto_route": true, + "strict_route": true + } + ], + "outbounds": [ + { + "type": "direct", + "tag": "direct" + }, + { + "type": "vless", + "tag": "proxy-out", + "server": "your.server.ip", // 替换为你的服务端IP或域名 + "server_port": 443, + "uuid": "your-uuid", // 与服务端一致 + "flow": "", + "tls": { + "enabled": true, + "server_name": "fake-sni.com", // 可伪装的域名,如 bing.com + "utls": { + "enabled": true, + "fingerprint": "chrome" + }, + "reality": { + "enabled": true, + "public_key": "your-server-pubkey", // 服务端生成的 Reality 公钥 + "short_id": "abcd" // 与服务端一致 + } + } + } + ], + "route": { + "auto_detect_interface": true, + "rules": [ + { + "geoip": ["cn"], + "outbound": "direct" + }, + { + "ip_cidr": ["0.0.0.0/0"], + "outbound": "proxy-out" + }, + { + "protocol": ["dns"], + "action": "hijack-dns" + } + ] + } +} + diff --git a/scripts/sing-box/client-gvisor-tun-vless.json b/scripts/sing-box/client-gvisor-tun-vless.json new file mode 100644 index 0000000..091c31b --- /dev/null +++ b/scripts/sing-box/client-gvisor-tun-vless.json @@ -0,0 +1,69 @@ +{ + "log": { + "level": "info" + }, + "dns": { + "servers": [ + { + "tag": "direct_dns", + "address": "223.5.5.5", + "detour": "direct" + } + ] + }, + "inbounds": [ + { + "type": "tun", + "tag": "tun-in", + "interface_name": "sing-tun", // 虚拟网卡名,不会真实创建(gVisor 模式) + "mtu": 1500, + "stack": "gvisor", // 用户态 TCP/IP 栈 + "endpoint_independent_nat": true, + "address": [ + "172.19.0.1/30" + ], + "auto_route": true, + "strict_route": true + } + ], + "outbounds": [ + { + "type": "vless", + "tag": "proxy-out", + "server": "your.domain.com", // ✅ 你的 TLS 证书域名 + "server_port": 443, + "uuid": "your-uuid", // ✅ 与服务端一致 UUID + "flow": "xtls-rprx-vision", // ✅ 开启 XTLS-Vision 加速 + "tls": { + "enabled": true, + "server_name": "your.domain.com", // ✅ 与证书一致 + "utls": { + "enabled": true, + "fingerprint": "chrome" // 可选 uTLS 指纹伪装 + } + // ❌ 无 Reality 字段 + } + }, + { + "type": "direct", + "tag": "direct" + } + ], + "route": { + "auto_detect_interface": true, + "rules": [ + { + "geoip": ["cn"], + "outbound": "direct" // 国内走直连 + }, + { + "ip_cidr": ["0.0.0.0/0"], + "outbound": "proxy-out" // 其余走 VLESS XTLS 出口 + }, + { + "protocol": ["dns"], + "action": "hijack-dns" // 劫持系统 DNS + } + ] + } +} diff --git a/scripts/sing-box/create-reality-keypair.sh b/scripts/sing-box/create-reality-keypair.sh new file mode 100644 index 0000000..88d9f80 --- /dev/null +++ b/scripts/sing-box/create-reality-keypair.sh @@ -0,0 +1 @@ +sing-box generate reality-keypair diff --git a/scripts/sing-box/install-singbox.sh b/scripts/sing-box/install-singbox.sh new file mode 100644 index 0000000..7157d4b --- /dev/null +++ b/scripts/sing-box/install-singbox.sh @@ -0,0 +1,136 @@ +#!/bin/bash + +set -e + +# 参数 +SERVER_IP="$1" +SNI="$2" +CLIENT_PLATFORM="$3" + +# 示例用法提示 +if [[ -z "$SERVER_IP" || -z "$SNI" || -z "$CLIENT_PLATFORM" ]]; then + echo "用法: $0 --ip <服务器IP> --sni <伪装域名> --client-platform " + exit 1 +fi + +UUID=$(uuidgen) +KEYPAIR=$(sing-box generate reality-keypair) +PRIVATE_KEY=$(echo "$KEYPAIR" | grep PrivateKey | awk '{print $2}') +PUBLIC_KEY=$(echo "$KEYPAIR" | grep PublicKey | awk '{print $2}') +SHORT_ID=$(head /dev/urandom | tr -dc a-z0-9 | head -c 4) + +# 安装 sing-box(以 Debian 为例) +if ! command -v sing-box &>/dev/null; then + echo "🔧 安装 sing-box..." + curl -fsSL https://sing-box.app/install | bash +fi + +# 创建配置目录 +mkdir -p /etc/sing-box + +# 写入服务端配置 +cat > /etc/sing-box/config-server.json < /etc/systemd/system/sing-box.service <