From 53656acb2c4383aa47a7a2e4aac75e3097a530a6 Mon Sep 17 00:00:00 2001 From: Haitao Pan Date: Fri, 23 May 2025 21:19:18 +0800 Subject: [PATCH] add playbooks & scripts --- README.md | 91 +- ansible.cfg | 15 + inventory/group_vars/all.yml | 5 + inventory/hosts/all | 19 + inventory/hosts/vpn | 2 + inventory/k3s-cluster | 12 + playbooks/common | 8 + playbooks/deploy-docker-harbor.yml | 5 + playbooks/deploy-docker-keycloak.yml | 5 + playbooks/init-harbor-server | 17 + playbooks/init_chaos_mesh | 17 + playbooks/init_chartmuseum | 8 + playbooks/init_deepflow | 16 + playbooks/init_flagger-loadtester | 16 + playbooks/init_gitlab | 23 + playbooks/init_grafana_alloy | 8 + playbooks/init_harbor_server | 8 + playbooks/init_jenkins | 18 + playbooks/init_k3s_cluster_agent | 8 + playbooks/init_k3s_cluster_server | 8 + playbooks/init_k3s_cluster_std | 27 + playbooks/init_k3s_cluster_with_argo_server | 38 + playbooks/init_observability-agent | 13 + playbooks/init_observability-server | 29 + playbooks/init_openldap | 18 + playbooks/init_splunk-otel-collector | 13 + playbooks/init_telegraf | 10 + playbooks/init_vault | 8 + playbooks/init_vpn_gateway.yml | 7 + playbooks/keycloak_server | 7 + .../roles/docker/keycloak/defaults/main.yml | 14 + .../roles/docker/keycloak/files/nginx.conf | 37 + .../roles/docker/keycloak/tasks/main.yml | 29 + .../keycloak/templates/docker-compose.yml.j2 | 64 + playbooks/pre_setup.sh | 48 + playbooks/renew_nodes_ssl_certs | 8 + playbooks/roles/charts/app/meta/main.yml | 2 + playbooks/roles/charts/app/tasks/main.yml | 16 + .../roles/charts/app/templates/.gitignore | 2 + .../charts/app/templates/deploy-app.yaml | 18 + .../charts/argo-server/files/setup-argocd.sh | 100 + .../roles/charts/argo-server/meta/main.yml | 2 + .../roles/charts/argo-server/tasks/main.yml | 2 + .../roles/charts/chaos-mesh/files/setup.sh | 24 + playbooks/roles/charts/chaos-mesh/howto.md | 124 + .../roles/charts/chaos-mesh/meta/main.yml | 2 + .../roles/charts/chaos-mesh/tasks/main.yml | 4 + .../roles/charts/chartmuseum/files/setup.sh | 37 + .../roles/charts/chartmuseum/meta/main.yml | 2 + .../roles/charts/chartmuseum/tasks/main.yml | 4 + .../roles/charts/chartmuseum/vars/main.yml | 8 + .../roles/charts/clickhouse/meta/main.yml | 2 + .../roles/charts/clickhouse/tasks/main.yml | 48 + .../charts/clickhouse/templates/.gitignore | 2 + .../clickhouse-cluster/clickhouse-config.yaml | 94 + .../clickhouse-ingress.yaml | 18 + .../clickhouse-service.yaml | 23 + .../clickhouse-statefulset.yml | 103 + .../clickhouse-user-config.yaml | 19 + .../templates/otel-collector/configmap.yaml | 142 + .../templates/otel-collector/deployment.yaml | 42 + .../templates/otel-collector/ingress.yaml | 19 + .../templates/otel-collector/service.yaml | 48 + .../charts/clickhouse/templates/postsetup.sh | 27 + .../templates/qryn/qryn-deployment.yaml | 36 + .../templates/qryn/qryn-ingress.yaml | 24 + .../templates/qryn/qryn-service.yaml | 15 + playbooks/roles/charts/deepflow/Readme.md | 12 + .../roles/charts/deepflow/files/post-setup.sh | 7 + .../roles/charts/deepflow/files/pre-setup.sh | 6 + .../roles/charts/deepflow/files/setup.sh | 29 + playbooks/roles/charts/deepflow/meta/main.yml | 2 + .../roles/charts/deepflow/tasks/main.yml | 19 + .../charts/flagger-loadtester/files/setup.sh | 47 + .../charts/flagger-loadtester/meta/main.yml | 2 + .../charts/flagger-loadtester/tasks/main.yml | 4 + .../roles/charts/gitlab/files/post-setup.sh | 30 + .../roles/charts/gitlab/files/pre-setup.sh | 9 + .../charts/gitlab/files/setup-with-oidc.sh | 106 + .../charts/gitlab/files/setup-with_aws-s3.sh | 154 + playbooks/roles/charts/gitlab/files/setup.sh | 119 + playbooks/roles/charts/gitlab/meta/main.yml | 5 + playbooks/roles/charts/gitlab/tasks/main.yml | 58 + .../charts/gitlab/templates/gitlab-backup-cfg | 5 + .../charts/gitlab/templates/provider.yaml | 18 + .../roles/charts/harbor/files/post-setup.sh | 14 + .../roles/charts/harbor/files/pre-setup.sh | 13 + .../harbor/files/setup-bitnami-harbor.sh | 85 + .../harbor/files/setup-office-harbor.sh | 91 + playbooks/roles/charts/harbor/meta/main.yml | 4 + playbooks/roles/charts/harbor/tasks/main.yml | 38 + .../harbor/templates/harbor-oidc-config.json | 11 + playbooks/roles/charts/harbor/vars/main.yml | 9 + .../roles/charts/jenkins/files/pre-setup.sh | 6 + playbooks/roles/charts/jenkins/files/setup.sh | 86 + playbooks/roles/charts/jenkins/howto.md | 124 + playbooks/roles/charts/jenkins/meta/main.yml | 3 + playbooks/roles/charts/jenkins/tasks/main.yml | 18 + .../roles/charts/keycloak/files/pre-setup.sh | 5 + .../charts/keycloak/files/setup-keycloak.sh | 39 + playbooks/roles/charts/keycloak/meta/main.yml | 3 + playbooks/roles/charts/keycloak/readme.md | 8 + .../roles/charts/keycloak/tasks/main.yml | 29 + .../templates/aws-gloabl-oidc-broker.yaml | 74 + playbooks/roles/charts/keycloak/vars/main.yml | 16 + .../roles/charts/mysql/files/setup-mysql.sh | 6 + playbooks/roles/charts/mysql/tasks/main.yml | 3 + .../roles/charts/node-exporter/tasks/main.yml | 23 + .../templates/node-exporter.service | 14 + .../charts/observability-agent/files/setup.sh | 46 + .../charts/observability-agent/meta/main.yml | 2 + .../charts/observability-agent/tasks/main.yml | 3 + .../files/mysql-db-init-setup.sh | 6 + .../files/setup-observable-server.sh | 132 + .../charts/observability-server/meta/main.yml | 3 + .../observability-server/tasks/main.yml | 39 + .../charts/openldap/files/setup-openldap.sh | 44 + playbooks/roles/charts/openldap/meta/main.yml | 3 + .../roles/charts/openldap/tasks/main.yml | 13 + .../charts/openldap/templates/.gitignore | 2 + .../charts/openldap/templates/ingress.yaml | 45 + .../charts/postgresql/files/post-setup.sh | 16 + .../postgresql/files/setup-postgresql.sh | 13 + .../roles/charts/postgresql/tasks/main.yml | 3 + .../roles/charts/redis/files/setup-redis.sh | 14 + playbooks/roles/charts/redis/tasks/main.yml | 3 + .../splunk-otel-collector/files/setup.sh | 20 + .../splunk-otel-collector/tasks/main.yml | 2 + playbooks/roles/docker/harbor/README.md | 99 + .../roles/docker/harbor/defaults/main.yml | 138 + playbooks/roles/docker/harbor/tasks/main.yml | 37 + .../roles/docker/harbor/tasks/post-setup.yml | 19 + .../roles/docker/harbor/tasks/pre-setup.yml | 40 + .../templates/common/config/core/app.conf | 6 + .../harbor/templates/common/config/core/env | 47 + .../harbor/templates/common/config/db/env | 2 + .../common/config/jobservice/config.yml | 38 + .../templates/common/config/jobservice/env | 13 + .../common/config/log/logrotate.conf | 8 + .../common/config/log/rsyslog_docker.conf | 7 + .../templates/common/config/nginx/nginx.conf | 149 + .../templates/common/config/portal/nginx.conf | 42 + .../common/config/registry/config.yml | 49 + .../templates/common/config/registry/passwd | 1 + .../templates/common/config/registry/root.crt | 0 .../common/config/registryctl/config.yml | 5 + .../templates/common/config/registryctl/env | 2 + .../harbor/templates/docker-compose.yml.j2 | 195 + playbooks/roles/docker/keycloak/README.md | 113 + .../roles/docker/keycloak/defaults/main.yml | 36 + .../docker/keycloak/files/create_keystore.sh | 32 + .../roles/docker/keycloak/tasks/main.yml | 33 + .../docker/keycloak/tasks/post-setup.yml | 31 + .../roles/docker/keycloak/tasks/pre-setup.yml | 22 + .../keycloak/templates/docker-compose.yml.j2 | 67 + .../docker/keycloak/templates/nginx.conf.j2 | 70 + .../K8S-Dashboard-2025-01015.json | 6734 +++++++++++++++++ .../Node-Exporter-Dashboard-202501015.json | 5890 ++++++++++++++ .../alerting/files/setup-observable-server.sh | 102 + playbooks/roles/vhosts/alerting/meta/main.yml | 2 + .../roles/vhosts/alerting/tasks/main.yml | 17 + .../vhosts/alerting/templates/alerting_rules | 37 + .../vhosts/alerting/templates/recording_rules | 55 + .../roles/vhosts/alloy/defaults/main.yml | 23 + .../files/loki_journal_sources_gateway.yml | 5 + .../files/loki_journal_sources_k3s_agent.yml | 3 + .../files/loki_journal_sources_k3s_server.yml | 3 + .../files/loki_journal_sources_postgresql.yml | 3 + .../alloy/files/loki_journal_sources_vpn.yml | 3 + playbooks/roles/vhosts/alloy/tasks/main.yml | 55 + .../vhosts/alloy/templates/config.alloy.j2 | 19 + .../files/certs_automated_issuance.sh | 32 + .../files/fetch_certs_from_vault.py | 71 + .../cert-manager/files/get_certificate.sh | 35 + .../cert-manager/files/update-certs-secret.sh | 10 + .../roles/vhosts/cert-manager/meta/main.yml | 2 + .../roles/vhosts/cert-manager/tasks/main.yml | 7 + .../roles/vhosts/cert-manager/vars/main.yml | 2 + .../roles/vhosts/common/defaults/main.yml | 38 + .../vhosts/common/files/install-packages.sh | 7 + .../roles/vhosts/common/files/secure_ssh.sh | 11 + .../roles/vhosts/common/handlers/main.yml | 10 + .../common/tasks/configure_journald.yml | 7 + .../common/tasks/configure_logrotate.yaml | 7 + .../vhosts/common/tasks/include_gpu.yaml | 17 + playbooks/roles/vhosts/common/tasks/main.yml | 34 + .../vhosts/common/tasks/set_hostname.yaml | 12 + .../vhosts/common/tasks/set_timezone.yaml | 2 + .../vhosts/common/templates/authorized_keys | 3 + .../roles/vhosts/common/templates/hostname | 1 + playbooks/roles/vhosts/common/templates/hosts | 26 + .../common/templates/journald_logrotate.j2 | 5 + .../common/templates/logrotate-monitor-agent | 8 + .../common/templates/rsyslog_logrotate.j2 | 23 + .../vhosts/k3s-addon/files/setup-argocd.sh | 102 + .../k3s-addon/files/setup-dns-provider.sh | 36 + .../vhosts/k3s-addon/files/setup-egress.sh | 24 + .../vhosts/k3s-addon/files/setup-flagger.sh | 21 + .../vhosts/k3s-addon/files/setup-fluxcd.sh | 46 + .../k3s-addon/files/setup-ingress-apisix.sh | 38 + .../vhosts/k3s-addon/files/setup-ingress.sh | 145 + .../k3s-addon/files/setup-keda-operator.sh | 6 + .../files/setup-prometheus-operator.sh | 52 + .../roles/vhosts/k3s-addon/meta/main.yml | 2 + .../roles/vhosts/k3s-addon/tasks/main.yml | 15 + .../templates/ingress-apisix-dashboard.yaml | 33 + .../templates/ingress-apisix-values.yaml | 24 + .../kubernetes-discovery-config.yaml | 65 + .../kubernetes-discovery-serviceaccount.yaml | 40 + .../templates/kubernetes-discovery.yaml | 47 + .../k3s-cluster-agent/defaults/main.yml | 1 + .../k3s-cluster-agent/tasks/bootstrap.yml | 0 .../k3s-cluster-agent/tasks/destroy.yml | 0 .../vhosts/k3s-cluster-agent/tasks/main.yml | 2 + .../k3s-cluster-agent/tasks/upgrade.yml | 0 .../templates/install_k3s_agent.sh.j2 | 3 + .../vhosts/k3s-cluster-agent/vars/main.yml | 5 + .../k3s-cluster-server/defaults/main.yml | 1 + .../k3s-cluster-server/tasks/add-master.yml | 0 .../k3s-cluster-server/tasks/backup.yml | 0 .../k3s-cluster-server/tasks/bootstrap.yml | 0 .../k3s-cluster-server/tasks/destroy.yml | 0 .../vhosts/k3s-cluster-server/tasks/main.yml | 2 + .../k3s-cluster-server/tasks/recovery.yml | 0 .../k3s-cluster-server/tasks/upgrade.yml | 0 .../templates/install_k3s_server.sh.j2 | 3 + .../vhosts/k3s-cluster-server/vars/main.yml | 17 + .../roles/vhosts/k3s-reset/files/reset-k3s.sh | 32 + .../roles/vhosts/k3s-reset/tasks/main.yml | 4 + .../vhosts/k3s/files/setup-cni-cilium.sh | 19 + .../vhosts/k3s/files/setup-cni-kubeovn.sh | 17 + playbooks/roles/vhosts/k3s/files/setup-k3s.sh | 134 + playbooks/roles/vhosts/k3s/meta/main.yml | 2 + playbooks/roles/vhosts/k3s/tasks/main.yml | 11 + .../roles/vhosts/k3s/templates/cni_install.sh | 3657 +++++++++ .../files/display_network_info.sh | 10 + .../roles/vhosts/network_info/tasks/main.yml | 15 + .../vhosts/prometheus-transfer/meta/main.yml | 2 + .../vhosts/prometheus-transfer/tasks/main.yml | 21 + .../templates/prometheus-transfer.service | 16 + .../templates/prometheus-transfer.yml | 9 + .../start-prometheus-transfer-service.sh | 2 + .../stop-prometheus-transfer-service.sh | 2 + .../roles/vhosts/promtail-agent/meta/main.yml | 2 + .../vhosts/promtail-agent/tasks/main.yml | 19 + .../templates/promtail-agent.service | 15 + .../promtail-agent/templates/promtail.yaml | 38 + .../roles/vhosts/secret-manger/tasks/main.yml | 48 + .../roles/vhosts/telegraf/handlers/main.yml | 5 + playbooks/roles/vhosts/telegraf/meta/main.yml | 2 + .../roles/vhosts/telegraf/tasks/main.yml | 49 + .../vhosts/telegraf/templates/telegraf.conf | 32 + playbooks/roles/vhosts/vault/files/setup.sh | 34 + playbooks/roles/vhosts/vault/meta/main.yml | 2 + playbooks/roles/vhosts/vault/readme.md | 4 + playbooks/roles/vhosts/vault/tasks/main.yml | 4 + playbooks/roles/vhosts/vault/vars/main.yml | 7 + .../files/enable_ip_forward.sh | 5 + .../vhosts/wireguard-client/tasks/main.yml | 17 + .../wireguard-client/templates/server.conf | 15 + .../files/enable_ip_forward.sh | 5 + .../wireguard-gateway/handlers/main.yml | 4 + .../vhosts/wireguard-gateway/meta/main.yml | 2 + .../vhosts/wireguard-gateway/tasks/main.yml | 45 + .../wireguard-gateway/templates/wg0.conf.j2 | 27 + playbooks/wireguard_ali_vpn_gw | 29 + playbooks/wireguard_client | 24 + playbooks/wireguard_gateway | 7 + scripts/Fetch_packages_depends.sh | 58 + scripts/Jenkinsfile | 27 + scripts/ansible_playbook_hosts_setup.sh | 40 + scripts/artifact/setup-harbor.sh | 66 + .../backup_images_v6.3-20250309-17.json | 435 ++ scripts/deepflow/check_k8s_node_config.sh | 215 + scripts/deepflow/clean-failed-pods.sh | 14 + .../deepflow-server-master-controller-pre.sh | 20 + .../deepflow-server-slave-controller-pre.sh | 20 + scripts/deepflow/deepflow_k8s_backup.sh | 161 + scripts/deepflow/deploy-k8s.sh | 19 + scripts/deepflow/pull-all-v6.4.sh | 40 + scripts/deepflow/pull_save_scp_image.sh | 32 + scripts/deepflow/setup-agent-all-in-one.sh | 194 + scripts/deepflow/setup-deepflow-agent.sh | 13 + scripts/deploy-open-webui.sh | 1 + scripts/dynamic_inventory.py | 69 + scripts/gather_network_info.yml | 12 + scripts/generate_ssl.sh | 79 + .../global-monitor/agent-group-config.yaml | 3 + scripts/global-monitor/config/containerd.toml | 34 + .../config/deepflow-registry.yaml | 23 + scripts/global-monitor/config/nginx.conf | 19 + scripts/global-monitor/config/registry.yaml | 23 + scripts/global-monitor/custom-domain.yaml | 5 + .../deepflow-registry/all.tag.list | 51 + .../deepflow-registry/compose.yaml | 12 + .../deepflow-registry/push_images.sh | 33 + .../deepflow-registry/setup-nerdctl.sh | 17 + .../deepflow-registry/setup-registry.sh | 12 + .../deepflow-registry/show_images.sh | 39 + .../deepflow-sever-values-v6.3.yaml | 117 + .../setup-agent-group-config.sh | 8 + scripts/global-monitor/setup-coroot.sh | 11 + .../setup-deepflow-Host-Domain-Group.sh | 5 + .../setup-deepflow-Host-Domain.sh | 8 + .../setup-deepflow-add-domain.sh | 13 + .../setup-deepflow-server-ee-all-in-one.sh | 36 + scripts/global-monitor/setup-deepflow.sh | 27 + scripts/global-monitor/setup-ingress.sh | 56 + .../global-monitor/setup-kubesphere-core.sh | 1 + scripts/global-monitor/setup-signoz.sh | 10 + scripts/ingress-installer.sh | 106 + scripts/init_ansible_role.sh | 99 + scripts/init_linux_user.sh | 40 + scripts/k3s-cluster/.gitignore | 13 + scripts/k3s-cluster/check-cilium-egress.sh | 31 + .../k3s-cluster/check_cilium_requirements.sh | 140 + scripts/k3s-cluster/cilium-cli.sh | 9 + scripts/k3s-cluster/cilium-fixed.sh | 37 + .../deploy_velero_with_chart_values_yaml.sh | 85 + scripts/k3s-cluster/egress-nat-test.yaml | 46 + .../init_k3s_cluster_agent_role.sh | 46 + .../init_k3s_cluster_server_role.sh | 69 + scripts/k3s-cluster/k3s.service | 37 + scripts/k3s-cluster/k3s.service-without-cni | 38 + scripts/k3s-cluster/k8s_backup_config.yaml | 25 + scripts/k3s-cluster/k8s_backup_tool.sh | 391 + scripts/k3s-cluster/k8s_backup_tool_howto.md | 119 + scripts/k3s-cluster/k8s_restore_all.sh | 21 + scripts/k3s-cluster/set-node-label.sh | 6 + scripts/k3s-cluster/setup-cilium-cni.sh | 79 + scripts/k3s-cluster/setup-cilium-helm.sh | 28 + scripts/k3s-cluster/setup-egress-gateway.sh | 77 + scripts/k3s-cluster/setup-k3s-agent.sh | 72 + .../k3s-cluster/setup-k3s-cluster-agent.sh | 72 + .../k3s-cluster/setup-k3s-cluster-with-br0.sh | 23 + scripts/k3s-cluster/setup-k3s-cluster.md | 38 + scripts/k3s-cluster/setup-k3s-cluster.sh | 284 + scripts/k3s-cluster/setup-k3s-with-gitops.sh | 206 + scripts/k3s-cluster/setup-k3s-with-ingress.sh | 226 + scripts/k3s-cluster/setup-nginx-ingress.sh | 81 + .../GatewayAPI-deepflow-example.yaml | 44 + scripts/kong-gateway/GatewayAPI-example.yaml | 81 + .../kong-gateway/GatewayAPI-http-example.yaml | 81 + scripts/kong-gateway/deploy-kong-gateway.sh | 77 + scripts/make_k3s_offline_package.sh | 471 ++ scripts/merge_csv.py | 30 + scripts/merge_vars.py | 91 + scripts/network-config/ubuntu/init-wsl.sh | 40 + scripts/network-config/ubuntu/readme.md | 10 + scripts/network-config/windows/readme.md | 1 + .../windows/start-wsl-bridge.ps1 | 80 + .../pipeline-library/vars/ansibleSteps.groovy | 74 + scripts/pulp-installer.sh | 120 + scripts/registry/.gitignore | 17 + scripts/registry/all.tag.list | 47 + scripts/registry/clean_unlabeled_images.sh | 12 + scripts/registry/push_images.sh | 39 + scripts/registry/setup-nerdctl.sh | 17 + scripts/registry/setup-registry.sh | 260 + scripts/registry/show_images.sh | 37 + scripts/secret/README.md | 45 + scripts/secret/hcp/__init__.py | 1 + scripts/secret/hcp/secret.py | 55 + scripts/secret/setup.py | 17 + scripts/secret/tests/__init__.py | 0 scripts/secret/tests/test_secret.py | 29 + scripts/setup-gitea.sh | 83 + scripts/setup-grafana-agent.sh | 74 + scripts/setup-microservice-demo.sh | 11 + scripts/setup-open-telemetry-demo.sh | 116 + scripts/setup-vector.sh | 101 + 371 files changed, 29979 insertions(+), 1 deletion(-) create mode 100644 ansible.cfg create mode 100644 inventory/group_vars/all.yml create mode 100644 inventory/hosts/all create mode 100644 inventory/hosts/vpn create mode 100644 inventory/k3s-cluster create mode 100644 playbooks/common create mode 100644 playbooks/deploy-docker-harbor.yml create mode 100644 playbooks/deploy-docker-keycloak.yml create mode 100644 playbooks/init-harbor-server create mode 100644 playbooks/init_chaos_mesh create mode 100644 playbooks/init_chartmuseum create mode 100644 playbooks/init_deepflow create mode 100644 playbooks/init_flagger-loadtester create mode 100644 playbooks/init_gitlab create mode 100644 playbooks/init_grafana_alloy create mode 100644 playbooks/init_harbor_server create mode 100644 playbooks/init_jenkins create mode 100644 playbooks/init_k3s_cluster_agent create mode 100644 playbooks/init_k3s_cluster_server create mode 100644 playbooks/init_k3s_cluster_std create mode 100644 playbooks/init_k3s_cluster_with_argo_server create mode 100644 playbooks/init_observability-agent create mode 100644 playbooks/init_observability-server create mode 100644 playbooks/init_openldap create mode 100644 playbooks/init_splunk-otel-collector create mode 100644 playbooks/init_telegraf create mode 100644 playbooks/init_vault create mode 100755 playbooks/init_vpn_gateway.yml create mode 100644 playbooks/keycloak_server create mode 100644 playbooks/playbooks/roles/docker/keycloak/defaults/main.yml create mode 100644 playbooks/playbooks/roles/docker/keycloak/files/nginx.conf create mode 100644 playbooks/playbooks/roles/docker/keycloak/tasks/main.yml create mode 100644 playbooks/playbooks/roles/docker/keycloak/templates/docker-compose.yml.j2 create mode 100644 playbooks/pre_setup.sh create mode 100644 playbooks/renew_nodes_ssl_certs create mode 100644 playbooks/roles/charts/app/meta/main.yml create mode 100755 playbooks/roles/charts/app/tasks/main.yml create mode 100644 playbooks/roles/charts/app/templates/.gitignore create mode 100644 playbooks/roles/charts/app/templates/deploy-app.yaml create mode 100644 playbooks/roles/charts/argo-server/files/setup-argocd.sh create mode 100644 playbooks/roles/charts/argo-server/meta/main.yml create mode 100755 playbooks/roles/charts/argo-server/tasks/main.yml create mode 100644 playbooks/roles/charts/chaos-mesh/files/setup.sh create mode 100644 playbooks/roles/charts/chaos-mesh/howto.md create mode 100644 playbooks/roles/charts/chaos-mesh/meta/main.yml create mode 100755 playbooks/roles/charts/chaos-mesh/tasks/main.yml create mode 100644 playbooks/roles/charts/chartmuseum/files/setup.sh create mode 100644 playbooks/roles/charts/chartmuseum/meta/main.yml create mode 100755 playbooks/roles/charts/chartmuseum/tasks/main.yml create mode 100644 playbooks/roles/charts/chartmuseum/vars/main.yml create mode 100644 playbooks/roles/charts/clickhouse/meta/main.yml create mode 100755 playbooks/roles/charts/clickhouse/tasks/main.yml create mode 100644 playbooks/roles/charts/clickhouse/templates/.gitignore create mode 100644 playbooks/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-config.yaml create mode 100644 playbooks/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-ingress.yaml create mode 100644 playbooks/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-service.yaml create mode 100644 playbooks/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-statefulset.yml create mode 100644 playbooks/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-user-config.yaml create mode 100644 playbooks/roles/charts/clickhouse/templates/otel-collector/configmap.yaml create mode 100644 playbooks/roles/charts/clickhouse/templates/otel-collector/deployment.yaml create mode 100644 playbooks/roles/charts/clickhouse/templates/otel-collector/ingress.yaml create mode 100644 playbooks/roles/charts/clickhouse/templates/otel-collector/service.yaml create mode 100755 playbooks/roles/charts/clickhouse/templates/postsetup.sh create mode 100644 playbooks/roles/charts/clickhouse/templates/qryn/qryn-deployment.yaml create mode 100644 playbooks/roles/charts/clickhouse/templates/qryn/qryn-ingress.yaml create mode 100644 playbooks/roles/charts/clickhouse/templates/qryn/qryn-service.yaml create mode 100644 playbooks/roles/charts/deepflow/Readme.md create mode 100644 playbooks/roles/charts/deepflow/files/post-setup.sh create mode 100644 playbooks/roles/charts/deepflow/files/pre-setup.sh create mode 100644 playbooks/roles/charts/deepflow/files/setup.sh create mode 100644 playbooks/roles/charts/deepflow/meta/main.yml create mode 100755 playbooks/roles/charts/deepflow/tasks/main.yml create mode 100644 playbooks/roles/charts/flagger-loadtester/files/setup.sh create mode 100644 playbooks/roles/charts/flagger-loadtester/meta/main.yml create mode 100755 playbooks/roles/charts/flagger-loadtester/tasks/main.yml create mode 100755 playbooks/roles/charts/gitlab/files/post-setup.sh create mode 100755 playbooks/roles/charts/gitlab/files/pre-setup.sh create mode 100644 playbooks/roles/charts/gitlab/files/setup-with-oidc.sh create mode 100644 playbooks/roles/charts/gitlab/files/setup-with_aws-s3.sh create mode 100644 playbooks/roles/charts/gitlab/files/setup.sh create mode 100644 playbooks/roles/charts/gitlab/meta/main.yml create mode 100755 playbooks/roles/charts/gitlab/tasks/main.yml create mode 100644 playbooks/roles/charts/gitlab/templates/gitlab-backup-cfg create mode 100644 playbooks/roles/charts/gitlab/templates/provider.yaml create mode 100644 playbooks/roles/charts/harbor/files/post-setup.sh create mode 100644 playbooks/roles/charts/harbor/files/pre-setup.sh create mode 100644 playbooks/roles/charts/harbor/files/setup-bitnami-harbor.sh create mode 100644 playbooks/roles/charts/harbor/files/setup-office-harbor.sh create mode 100644 playbooks/roles/charts/harbor/meta/main.yml create mode 100755 playbooks/roles/charts/harbor/tasks/main.yml create mode 100644 playbooks/roles/charts/harbor/templates/harbor-oidc-config.json create mode 100644 playbooks/roles/charts/harbor/vars/main.yml create mode 100644 playbooks/roles/charts/jenkins/files/pre-setup.sh create mode 100644 playbooks/roles/charts/jenkins/files/setup.sh create mode 100644 playbooks/roles/charts/jenkins/howto.md create mode 100644 playbooks/roles/charts/jenkins/meta/main.yml create mode 100755 playbooks/roles/charts/jenkins/tasks/main.yml create mode 100644 playbooks/roles/charts/keycloak/files/pre-setup.sh create mode 100644 playbooks/roles/charts/keycloak/files/setup-keycloak.sh create mode 100644 playbooks/roles/charts/keycloak/meta/main.yml create mode 100644 playbooks/roles/charts/keycloak/readme.md create mode 100755 playbooks/roles/charts/keycloak/tasks/main.yml create mode 100644 playbooks/roles/charts/keycloak/templates/aws-gloabl-oidc-broker.yaml create mode 100644 playbooks/roles/charts/keycloak/vars/main.yml create mode 100644 playbooks/roles/charts/mysql/files/setup-mysql.sh create mode 100755 playbooks/roles/charts/mysql/tasks/main.yml create mode 100755 playbooks/roles/charts/node-exporter/tasks/main.yml create mode 100755 playbooks/roles/charts/node-exporter/templates/node-exporter.service create mode 100644 playbooks/roles/charts/observability-agent/files/setup.sh create mode 100644 playbooks/roles/charts/observability-agent/meta/main.yml create mode 100755 playbooks/roles/charts/observability-agent/tasks/main.yml create mode 100644 playbooks/roles/charts/observability-server/files/mysql-db-init-setup.sh create mode 100644 playbooks/roles/charts/observability-server/files/setup-observable-server.sh create mode 100644 playbooks/roles/charts/observability-server/meta/main.yml create mode 100755 playbooks/roles/charts/observability-server/tasks/main.yml create mode 100644 playbooks/roles/charts/openldap/files/setup-openldap.sh create mode 100644 playbooks/roles/charts/openldap/meta/main.yml create mode 100755 playbooks/roles/charts/openldap/tasks/main.yml create mode 100644 playbooks/roles/charts/openldap/templates/.gitignore create mode 100644 playbooks/roles/charts/openldap/templates/ingress.yaml create mode 100644 playbooks/roles/charts/postgresql/files/post-setup.sh create mode 100644 playbooks/roles/charts/postgresql/files/setup-postgresql.sh create mode 100755 playbooks/roles/charts/postgresql/tasks/main.yml create mode 100644 playbooks/roles/charts/redis/files/setup-redis.sh create mode 100755 playbooks/roles/charts/redis/tasks/main.yml create mode 100644 playbooks/roles/charts/splunk-otel-collector/files/setup.sh create mode 100755 playbooks/roles/charts/splunk-otel-collector/tasks/main.yml create mode 100644 playbooks/roles/docker/harbor/README.md create mode 100644 playbooks/roles/docker/harbor/defaults/main.yml create mode 100644 playbooks/roles/docker/harbor/tasks/main.yml create mode 100644 playbooks/roles/docker/harbor/tasks/post-setup.yml create mode 100644 playbooks/roles/docker/harbor/tasks/pre-setup.yml create mode 100644 playbooks/roles/docker/harbor/templates/common/config/core/app.conf create mode 100644 playbooks/roles/docker/harbor/templates/common/config/core/env create mode 100644 playbooks/roles/docker/harbor/templates/common/config/db/env create mode 100644 playbooks/roles/docker/harbor/templates/common/config/jobservice/config.yml create mode 100644 playbooks/roles/docker/harbor/templates/common/config/jobservice/env create mode 100644 playbooks/roles/docker/harbor/templates/common/config/log/logrotate.conf create mode 100644 playbooks/roles/docker/harbor/templates/common/config/log/rsyslog_docker.conf create mode 100644 playbooks/roles/docker/harbor/templates/common/config/nginx/nginx.conf create mode 100644 playbooks/roles/docker/harbor/templates/common/config/portal/nginx.conf create mode 100644 playbooks/roles/docker/harbor/templates/common/config/registry/config.yml create mode 100644 playbooks/roles/docker/harbor/templates/common/config/registry/passwd create mode 100755 playbooks/roles/docker/harbor/templates/common/config/registry/root.crt create mode 100644 playbooks/roles/docker/harbor/templates/common/config/registryctl/config.yml create mode 100644 playbooks/roles/docker/harbor/templates/common/config/registryctl/env create mode 100644 playbooks/roles/docker/harbor/templates/docker-compose.yml.j2 create mode 100644 playbooks/roles/docker/keycloak/README.md create mode 100644 playbooks/roles/docker/keycloak/defaults/main.yml create mode 100644 playbooks/roles/docker/keycloak/files/create_keystore.sh create mode 100644 playbooks/roles/docker/keycloak/tasks/main.yml create mode 100644 playbooks/roles/docker/keycloak/tasks/post-setup.yml create mode 100644 playbooks/roles/docker/keycloak/tasks/pre-setup.yml create mode 100644 playbooks/roles/docker/keycloak/templates/docker-compose.yml.j2 create mode 100644 playbooks/roles/docker/keycloak/templates/nginx.conf.j2 create mode 100644 playbooks/roles/grafana-dashboard/K8S-Dashboard-2025-01015.json create mode 100644 playbooks/roles/grafana-dashboard/Node-Exporter-Dashboard-202501015.json create mode 100644 playbooks/roles/vhosts/alerting/files/setup-observable-server.sh create mode 100644 playbooks/roles/vhosts/alerting/meta/main.yml create mode 100755 playbooks/roles/vhosts/alerting/tasks/main.yml create mode 100644 playbooks/roles/vhosts/alerting/templates/alerting_rules create mode 100644 playbooks/roles/vhosts/alerting/templates/recording_rules create mode 100644 playbooks/roles/vhosts/alloy/defaults/main.yml create mode 100644 playbooks/roles/vhosts/alloy/files/loki_journal_sources_gateway.yml create mode 100644 playbooks/roles/vhosts/alloy/files/loki_journal_sources_k3s_agent.yml create mode 100644 playbooks/roles/vhosts/alloy/files/loki_journal_sources_k3s_server.yml create mode 100644 playbooks/roles/vhosts/alloy/files/loki_journal_sources_postgresql.yml create mode 100644 playbooks/roles/vhosts/alloy/files/loki_journal_sources_vpn.yml create mode 100644 playbooks/roles/vhosts/alloy/tasks/main.yml create mode 100644 playbooks/roles/vhosts/alloy/templates/config.alloy.j2 create mode 100644 playbooks/roles/vhosts/cert-manager/files/certs_automated_issuance.sh create mode 100644 playbooks/roles/vhosts/cert-manager/files/fetch_certs_from_vault.py create mode 100644 playbooks/roles/vhosts/cert-manager/files/get_certificate.sh create mode 100644 playbooks/roles/vhosts/cert-manager/files/update-certs-secret.sh create mode 100644 playbooks/roles/vhosts/cert-manager/meta/main.yml create mode 100755 playbooks/roles/vhosts/cert-manager/tasks/main.yml create mode 100644 playbooks/roles/vhosts/cert-manager/vars/main.yml create mode 100644 playbooks/roles/vhosts/common/defaults/main.yml create mode 100644 playbooks/roles/vhosts/common/files/install-packages.sh create mode 100644 playbooks/roles/vhosts/common/files/secure_ssh.sh create mode 100644 playbooks/roles/vhosts/common/handlers/main.yml create mode 100644 playbooks/roles/vhosts/common/tasks/configure_journald.yml create mode 100644 playbooks/roles/vhosts/common/tasks/configure_logrotate.yaml create mode 100644 playbooks/roles/vhosts/common/tasks/include_gpu.yaml create mode 100644 playbooks/roles/vhosts/common/tasks/main.yml create mode 100644 playbooks/roles/vhosts/common/tasks/set_hostname.yaml create mode 100644 playbooks/roles/vhosts/common/tasks/set_timezone.yaml create mode 100755 playbooks/roles/vhosts/common/templates/authorized_keys create mode 100755 playbooks/roles/vhosts/common/templates/hostname create mode 100644 playbooks/roles/vhosts/common/templates/hosts create mode 100644 playbooks/roles/vhosts/common/templates/journald_logrotate.j2 create mode 100644 playbooks/roles/vhosts/common/templates/logrotate-monitor-agent create mode 100644 playbooks/roles/vhosts/common/templates/rsyslog_logrotate.j2 create mode 100644 playbooks/roles/vhosts/k3s-addon/files/setup-argocd.sh create mode 100644 playbooks/roles/vhosts/k3s-addon/files/setup-dns-provider.sh create mode 100644 playbooks/roles/vhosts/k3s-addon/files/setup-egress.sh create mode 100644 playbooks/roles/vhosts/k3s-addon/files/setup-flagger.sh create mode 100644 playbooks/roles/vhosts/k3s-addon/files/setup-fluxcd.sh create mode 100644 playbooks/roles/vhosts/k3s-addon/files/setup-ingress-apisix.sh create mode 100644 playbooks/roles/vhosts/k3s-addon/files/setup-ingress.sh create mode 100644 playbooks/roles/vhosts/k3s-addon/files/setup-keda-operator.sh create mode 100644 playbooks/roles/vhosts/k3s-addon/files/setup-prometheus-operator.sh create mode 100644 playbooks/roles/vhosts/k3s-addon/meta/main.yml create mode 100755 playbooks/roles/vhosts/k3s-addon/tasks/main.yml create mode 100644 playbooks/roles/vhosts/k3s-addon/templates/ingress-apisix-dashboard.yaml create mode 100644 playbooks/roles/vhosts/k3s-addon/templates/ingress-apisix-values.yaml create mode 100644 playbooks/roles/vhosts/k3s-addon/templates/kubernetes-discovery-config.yaml create mode 100644 playbooks/roles/vhosts/k3s-addon/templates/kubernetes-discovery-serviceaccount.yaml create mode 100644 playbooks/roles/vhosts/k3s-addon/templates/kubernetes-discovery.yaml create mode 100644 playbooks/roles/vhosts/k3s-cluster-agent/defaults/main.yml create mode 100644 playbooks/roles/vhosts/k3s-cluster-agent/tasks/bootstrap.yml create mode 100644 playbooks/roles/vhosts/k3s-cluster-agent/tasks/destroy.yml create mode 100644 playbooks/roles/vhosts/k3s-cluster-agent/tasks/main.yml create mode 100644 playbooks/roles/vhosts/k3s-cluster-agent/tasks/upgrade.yml create mode 100644 playbooks/roles/vhosts/k3s-cluster-agent/templates/install_k3s_agent.sh.j2 create mode 100644 playbooks/roles/vhosts/k3s-cluster-agent/vars/main.yml create mode 100644 playbooks/roles/vhosts/k3s-cluster-server/defaults/main.yml create mode 100644 playbooks/roles/vhosts/k3s-cluster-server/tasks/add-master.yml create mode 100644 playbooks/roles/vhosts/k3s-cluster-server/tasks/backup.yml create mode 100644 playbooks/roles/vhosts/k3s-cluster-server/tasks/bootstrap.yml create mode 100644 playbooks/roles/vhosts/k3s-cluster-server/tasks/destroy.yml create mode 100644 playbooks/roles/vhosts/k3s-cluster-server/tasks/main.yml create mode 100644 playbooks/roles/vhosts/k3s-cluster-server/tasks/recovery.yml create mode 100644 playbooks/roles/vhosts/k3s-cluster-server/tasks/upgrade.yml create mode 100644 playbooks/roles/vhosts/k3s-cluster-server/templates/install_k3s_server.sh.j2 create mode 100644 playbooks/roles/vhosts/k3s-cluster-server/vars/main.yml create mode 100644 playbooks/roles/vhosts/k3s-reset/files/reset-k3s.sh create mode 100755 playbooks/roles/vhosts/k3s-reset/tasks/main.yml create mode 100644 playbooks/roles/vhosts/k3s/files/setup-cni-cilium.sh create mode 100644 playbooks/roles/vhosts/k3s/files/setup-cni-kubeovn.sh create mode 100644 playbooks/roles/vhosts/k3s/files/setup-k3s.sh create mode 100644 playbooks/roles/vhosts/k3s/meta/main.yml create mode 100755 playbooks/roles/vhosts/k3s/tasks/main.yml create mode 100644 playbooks/roles/vhosts/k3s/templates/cni_install.sh create mode 100644 playbooks/roles/vhosts/network_info/files/display_network_info.sh create mode 100755 playbooks/roles/vhosts/network_info/tasks/main.yml create mode 100644 playbooks/roles/vhosts/prometheus-transfer/meta/main.yml create mode 100755 playbooks/roles/vhosts/prometheus-transfer/tasks/main.yml create mode 100644 playbooks/roles/vhosts/prometheus-transfer/templates/prometheus-transfer.service create mode 100644 playbooks/roles/vhosts/prometheus-transfer/templates/prometheus-transfer.yml create mode 100755 playbooks/roles/vhosts/prometheus-transfer/templates/start-prometheus-transfer-service.sh create mode 100755 playbooks/roles/vhosts/prometheus-transfer/templates/stop-prometheus-transfer-service.sh create mode 100644 playbooks/roles/vhosts/promtail-agent/meta/main.yml create mode 100755 playbooks/roles/vhosts/promtail-agent/tasks/main.yml create mode 100644 playbooks/roles/vhosts/promtail-agent/templates/promtail-agent.service create mode 100644 playbooks/roles/vhosts/promtail-agent/templates/promtail.yaml create mode 100755 playbooks/roles/vhosts/secret-manger/tasks/main.yml create mode 100644 playbooks/roles/vhosts/telegraf/handlers/main.yml create mode 100644 playbooks/roles/vhosts/telegraf/meta/main.yml create mode 100755 playbooks/roles/vhosts/telegraf/tasks/main.yml create mode 100644 playbooks/roles/vhosts/telegraf/templates/telegraf.conf create mode 100644 playbooks/roles/vhosts/vault/files/setup.sh create mode 100644 playbooks/roles/vhosts/vault/meta/main.yml create mode 100644 playbooks/roles/vhosts/vault/readme.md create mode 100755 playbooks/roles/vhosts/vault/tasks/main.yml create mode 100644 playbooks/roles/vhosts/vault/vars/main.yml create mode 100644 playbooks/roles/vhosts/wireguard-client/files/enable_ip_forward.sh create mode 100755 playbooks/roles/vhosts/wireguard-client/tasks/main.yml create mode 100755 playbooks/roles/vhosts/wireguard-client/templates/server.conf create mode 100644 playbooks/roles/vhosts/wireguard-gateway/files/enable_ip_forward.sh create mode 100644 playbooks/roles/vhosts/wireguard-gateway/handlers/main.yml create mode 100755 playbooks/roles/vhosts/wireguard-gateway/meta/main.yml create mode 100755 playbooks/roles/vhosts/wireguard-gateway/tasks/main.yml create mode 100644 playbooks/roles/vhosts/wireguard-gateway/templates/wg0.conf.j2 create mode 100755 playbooks/wireguard_ali_vpn_gw create mode 100755 playbooks/wireguard_client create mode 100755 playbooks/wireguard_gateway create mode 100644 scripts/Fetch_packages_depends.sh create mode 100644 scripts/Jenkinsfile create mode 100644 scripts/ansible_playbook_hosts_setup.sh create mode 100644 scripts/artifact/setup-harbor.sh create mode 100755 scripts/deepflow/backup_images_v6.3-20250309-17.json create mode 100755 scripts/deepflow/check_k8s_node_config.sh create mode 100755 scripts/deepflow/clean-failed-pods.sh create mode 100755 scripts/deepflow/deepflow-server-master-controller-pre.sh create mode 100755 scripts/deepflow/deepflow-server-slave-controller-pre.sh create mode 100755 scripts/deepflow/deepflow_k8s_backup.sh create mode 100755 scripts/deepflow/deploy-k8s.sh create mode 100644 scripts/deepflow/pull-all-v6.4.sh create mode 100644 scripts/deepflow/pull_save_scp_image.sh create mode 100644 scripts/deepflow/setup-agent-all-in-one.sh create mode 100644 scripts/deepflow/setup-deepflow-agent.sh create mode 100644 scripts/deploy-open-webui.sh create mode 100644 scripts/dynamic_inventory.py create mode 100644 scripts/gather_network_info.yml create mode 100644 scripts/generate_ssl.sh create mode 100644 scripts/global-monitor/agent-group-config.yaml create mode 100644 scripts/global-monitor/config/containerd.toml create mode 100644 scripts/global-monitor/config/deepflow-registry.yaml create mode 100644 scripts/global-monitor/config/nginx.conf create mode 100644 scripts/global-monitor/config/registry.yaml create mode 100644 scripts/global-monitor/custom-domain.yaml create mode 100644 scripts/global-monitor/deepflow-registry/all.tag.list create mode 100644 scripts/global-monitor/deepflow-registry/compose.yaml create mode 100644 scripts/global-monitor/deepflow-registry/push_images.sh create mode 100644 scripts/global-monitor/deepflow-registry/setup-nerdctl.sh create mode 100644 scripts/global-monitor/deepflow-registry/setup-registry.sh create mode 100644 scripts/global-monitor/deepflow-registry/show_images.sh create mode 100644 scripts/global-monitor/deepflow-sever-values-v6.3.yaml create mode 100644 scripts/global-monitor/setup-agent-group-config.sh create mode 100644 scripts/global-monitor/setup-coroot.sh create mode 100644 scripts/global-monitor/setup-deepflow-Host-Domain-Group.sh create mode 100644 scripts/global-monitor/setup-deepflow-Host-Domain.sh create mode 100644 scripts/global-monitor/setup-deepflow-add-domain.sh create mode 100644 scripts/global-monitor/setup-deepflow-server-ee-all-in-one.sh create mode 100644 scripts/global-monitor/setup-deepflow.sh create mode 100644 scripts/global-monitor/setup-ingress.sh create mode 100644 scripts/global-monitor/setup-kubesphere-core.sh create mode 100644 scripts/global-monitor/setup-signoz.sh create mode 100644 scripts/ingress-installer.sh create mode 100644 scripts/init_ansible_role.sh create mode 100644 scripts/init_linux_user.sh create mode 100644 scripts/k3s-cluster/.gitignore create mode 100644 scripts/k3s-cluster/check-cilium-egress.sh create mode 100644 scripts/k3s-cluster/check_cilium_requirements.sh create mode 100644 scripts/k3s-cluster/cilium-cli.sh create mode 100644 scripts/k3s-cluster/cilium-fixed.sh create mode 100755 scripts/k3s-cluster/deploy_velero_with_chart_values_yaml.sh create mode 100644 scripts/k3s-cluster/egress-nat-test.yaml create mode 100644 scripts/k3s-cluster/init_k3s_cluster_agent_role.sh create mode 100644 scripts/k3s-cluster/init_k3s_cluster_server_role.sh create mode 100644 scripts/k3s-cluster/k3s.service create mode 100644 scripts/k3s-cluster/k3s.service-without-cni create mode 100755 scripts/k3s-cluster/k8s_backup_config.yaml create mode 100755 scripts/k3s-cluster/k8s_backup_tool.sh create mode 100644 scripts/k3s-cluster/k8s_backup_tool_howto.md create mode 100644 scripts/k3s-cluster/k8s_restore_all.sh create mode 100644 scripts/k3s-cluster/set-node-label.sh create mode 100644 scripts/k3s-cluster/setup-cilium-cni.sh create mode 100644 scripts/k3s-cluster/setup-cilium-helm.sh create mode 100644 scripts/k3s-cluster/setup-egress-gateway.sh create mode 100644 scripts/k3s-cluster/setup-k3s-agent.sh create mode 100644 scripts/k3s-cluster/setup-k3s-cluster-agent.sh create mode 100644 scripts/k3s-cluster/setup-k3s-cluster-with-br0.sh create mode 100644 scripts/k3s-cluster/setup-k3s-cluster.md create mode 100644 scripts/k3s-cluster/setup-k3s-cluster.sh create mode 100644 scripts/k3s-cluster/setup-k3s-with-gitops.sh create mode 100644 scripts/k3s-cluster/setup-k3s-with-ingress.sh create mode 100644 scripts/k3s-cluster/setup-nginx-ingress.sh create mode 100644 scripts/kong-gateway/GatewayAPI-deepflow-example.yaml create mode 100644 scripts/kong-gateway/GatewayAPI-example.yaml create mode 100644 scripts/kong-gateway/GatewayAPI-http-example.yaml create mode 100644 scripts/kong-gateway/deploy-kong-gateway.sh create mode 100644 scripts/make_k3s_offline_package.sh create mode 100644 scripts/merge_csv.py create mode 100644 scripts/merge_vars.py create mode 100644 scripts/network-config/ubuntu/init-wsl.sh create mode 100644 scripts/network-config/ubuntu/readme.md create mode 100644 scripts/network-config/windows/readme.md create mode 100644 scripts/network-config/windows/start-wsl-bridge.ps1 create mode 100644 scripts/pipeline-library/vars/ansibleSteps.groovy create mode 100644 scripts/pulp-installer.sh create mode 100644 scripts/registry/.gitignore create mode 100644 scripts/registry/all.tag.list create mode 100644 scripts/registry/clean_unlabeled_images.sh create mode 100644 scripts/registry/push_images.sh create mode 100644 scripts/registry/setup-nerdctl.sh create mode 100644 scripts/registry/setup-registry.sh create mode 100644 scripts/registry/show_images.sh create mode 100644 scripts/secret/README.md create mode 100644 scripts/secret/hcp/__init__.py create mode 100644 scripts/secret/hcp/secret.py create mode 100644 scripts/secret/setup.py create mode 100644 scripts/secret/tests/__init__.py create mode 100644 scripts/secret/tests/test_secret.py create mode 100644 scripts/setup-gitea.sh create mode 100644 scripts/setup-grafana-agent.sh create mode 100644 scripts/setup-microservice-demo.sh create mode 100644 scripts/setup-open-telemetry-demo.sh create mode 100644 scripts/setup-vector.sh diff --git a/README.md b/README.md index d010a60..e078dcb 100644 --- a/README.md +++ b/README.md @@ -1 +1,90 @@ -# gitops \ No newline at end of file +# ansible-playbook + +This repository contains a collection of Ansible playbooks and roles for various infrastructure setups and service management tasks. + +## Playbook 角色说明 + +1. playbooks/roles/docker:适用于简单的、单机环境的部署,主要使用 Docker 和 Docker Compose 进行容器化管理。 +2. playbooks/roles/charts:面向大规模的 Kubernetes 集群,使用 Helm 和标准化 Chart 部署模式进行高可用和可扩展的管理。 +3. playbooks/roles/vhosts:传统的非容器化部署方式,通常涉及手动配置服务器和虚拟主机,适用于不使用容器的应用场景。 + + +## Role Summary + +| Role Name | Description | Docker | Charts | VHosts | CICD | Validate | Last Update | +|-------------------------|-------------------------------------------------------|--------|--------|--------|---------|----------|--------------| +| `common` | 通用角色,包含一些常用的功能,如日志记录、监控等。 | | | ✔ | | yes | 2025-02-14 | +| `keycloak` | 用于管理身份认证和授权服务。 | ✔ | | | github | yes | 2024-11-10 | +| `harbor` | 容器镜像仓库角色,用于存储和管理容器镜像。 | ✔ | | | github | yes | 2024-11-14 | +| `app` | 参考模板。 | | | | | | | +| `nginx` | 用于设置 Nginx | | ✔ | ✔ | | | | +| `grafana` | 用于设置 Grafana | | ✔ | ✔ | | | | +| `grafana-loki` | 用于设置 Grafana-loki | | ✔ | ✔ | | | | +| `Grafana-tempo` | 用于设置 Grafana-tempo | | ✔ | ✔ | | | | +| `prometheus` | 用于设置 Prometheus | | ✔ | ✔ | | | | +| `prometheus-transfer` | 用于 Prometheus 数据传输设置。 | | | ✔ | | | | +| `vector` | 用于配置日志收集代理。 | | | ✔ | | | | +| `node-exporter` | 用于导出系统和硬件的监控数据。 | | ✔ | | | | | +| `observability-agent` | 用于管理 Observability 代理。 | | ✔ | ✔ | | | | +| `observability-server` | 用于设置 Observability 服务端。 | | ✔ | ✔ | | | | +| `wireguard-client` | 用于设置 WireGuard 客户端。 | | | ✔ | | | | +| `wireguard-gateway` | 用于设置 WireGuard 网关。 | | | ✔ | | | | +| `vault` | 用于管理敏感数据和密钥。 | | | ✔ | | | | +| `postgresql` | PostgreSQL 数据库角色,用于提供 PostgreSQL 数据库服务。 | | ✔ | | | | | +| `redis` | Redis 数据库角色,用于提供 Redis 数据库服务。 | | ✔ | | | | | +| `chartmuseum` | 图表仓库角色,用于存储和管理 Kubernetes 图表。 | | ✔ | | | | | +| `gitlab` | 代码仓库角色,用于存储和管理代码。 | | ✔ | | | | | +| `mysql` | MySQL 数据库角色,用于提供 MySQL 数据库服务。 | | ✔ | | | | | +| `argo-server` | 用于设置和管理 Argo Server。 | | ✔ | | | | | +| `deepflow` | 用于流量监控与网络性能分析的 DeepFlow 服务。 | | ✔ | | | | | +| `jenkins` | Jenkins 自动化构建工具角色,用于 CI/CD 管道。 | | ✔ | | | | | +| `chaos-mesh` | 用于 Chaos Engineering 测试的 Chaos Mesh 角色。 | | ✔ | | | | | +| `flagger-loadtester` | 用于负载测试的 Flagger Loadtester 角色。 | | ✔ | | | | | +| `splunk-otel-collector` | 用于配置 Splunk OpenTelemetry Collector。 | | ✔ | | | | | +| `openldap` | 用于设置和管理 OpenLDAP 身份认证服务。 | | ✔ | | | | | +| `alerting` | 用于设置和管理警报系统。 | | | ✔ | | | | +| `k3s` | 用于创建 Kubernetes 集群。 | | | ✔ | | | | +| `k3s-reset` | 用于重置 Kubernetes 集群。 | | | ✔ | | | | +| `k3s-addon` | 用于安装 Kubernetes 集群插件。 | | | ✔ | | | | +| `secret-manger` | 密钥管理角色,用于管理密钥。 | | | ✔ | | | | +| `cert-manager` | 证书管理角色,用于管理证书。 | | | ✔ | | | | + +表格说明 +- Docker:是否属于 Docker 角色。 +- Charts:是否属于 Helm Chart 角色。 +- VHosts:是否属于虚拟主机管理相关角色。 +- CICD:是否启用 CICD 管道,标明是否集成了自动化流程。 +- Validate:是否经过验证测试。 +- Last Update:最后更新时间。 + +## Usage Examples + +- Linux OS Setup + +ansible-playbook -i inventory/hosts/all playbooks/common -D -C +ansible-playbook -i inventory/hosts/all playbooks/common -D + +- Gather Network Information + +ansible-playbook -i inventory gather_network_info.yml -e target_group=master + +- Display network information on all nodes + +ansible -i inventory all -m script -a 'roles/network_info/tasks/files/display_network_info.sh' + +- Deploy Keycloak Server + +ansible-playbook -i inventory/hosts/core playbooks/keycloak_server -D + +- Set up WireGuard Gateway + +ansible-playbook -i inventory/hosts/vpn playbooks/wireguard_gateway.yaml -D + +- Set up Grafana Alloy + +ansible-playbook -i inventory/k3s-cluster playbooks/init_grafana_alloy -D -C -l cn-k3s-server.svc.plus -e @playbooks/roles/alloy/files/loki_journal_sources_k3s_server.yml -e "ansible_become_pass='xxxx'" + + +- Setup VPN gateway + +ansible-playbook -i inventory/hosts/all playbooks/common -l gateway -D diff --git a/ansible.cfg b/ansible.cfg new file mode 100644 index 0000000..4cee05d --- /dev/null +++ b/ansible.cfg @@ -0,0 +1,15 @@ +[inventory] +cache: yes +cache_plugin: ansible.builtin.jsonfile + +[defaults] +vault_password_file = ~/.vault_password +timeout = 10 +forks = 10 +poll_interval = 10 +transport = smart +gathering = smart +stdout_callback = skippy +host_key_checking = False +deprecation_warnings = False +ansible_python_interpreter=/usr/bin/python3 diff --git a/inventory/group_vars/all.yml b/inventory/group_vars/all.yml new file mode 100644 index 0000000..f04e15a --- /dev/null +++ b/inventory/group_vars/all.yml @@ -0,0 +1,5 @@ +ansible_port: 22 +ansible_ssh_user: ubuntu +ansible_ssh_private_key_file: ~/.ssh/id_rsa +ansible_host_key_checking: False + diff --git a/inventory/hosts/all b/inventory/hosts/all new file mode 100644 index 0000000..c8aba05 --- /dev/null +++ b/inventory/hosts/all @@ -0,0 +1,19 @@ +[all] +hw-node.svc.plus ansible_host=139.9.139.22 ansible_ssh_user=root +cn-gateway.svc.plus ansible_host=8.130.10.142 ansible_ssh_user=root +us-gateway.svc.plus ansible_host=52.196.108.28 ansible_ssh_user=ubuntu +global-gateway.svc.plus ansible_host=54.183.199.99 ansible_ssh_user=ubuntu +canada-gateway.svc.plus ansible_host=3.96.167.208 ansible_ssh_user=ubuntu +vault.onwalk.net ansible_host=3.101.151.231 ansible_ssh_user=ubuntu +ldap.svc.plus ansible_host=35.182.63.247 ansible_ssh_user=ubuntu +keycloak.svc.plus ansible_host=3.99.126.158 ansible_ssh_user=ubuntu +observability.onwalk.net ansible_host=54.153.80.120 ansible_ssh_user=ubuntu +argocd.svc.plus ansible_host=13.57.247.27 ansible_ssh_user=ubuntu + +[gateway] +vpn-gateway.svc.plus ansible_host=167.179.72.223 ansible_ssh_user=root + +[all:vars] +ansible_port=22 +ansible_ssh_private_key_file=~/.ssh/id_rsa +ansible_host_key_checking=False diff --git a/inventory/hosts/vpn b/inventory/hosts/vpn new file mode 100644 index 0000000..24ab9dd --- /dev/null +++ b/inventory/hosts/vpn @@ -0,0 +1,2 @@ +[vpn-gateway] +xproxy.onwalk.net ansible_host=43.206.158.21 diff --git a/inventory/k3s-cluster b/inventory/k3s-cluster new file mode 100644 index 0000000..117c650 --- /dev/null +++ b/inventory/k3s-cluster @@ -0,0 +1,12 @@ +[all] +cn-gateway.svc.plus ansible_host=10.254.0.1 +cn-k3s-server.svc.plus ansible_host=10.254.0.3 +cn-hw-node.svc.plus ansible_host=10.254.0.4 +global-gateway.svc.plus ansible_host=10.255.0.1 +global-k3s-server.svc.plus ansible_host=10.255.0.3 + +[all:vars] +ansible_port=22 +ansible_ssh_user=ubuntu +ansible_ssh_private_key_file=~/.ssh/id_rsa +ansible_host_key_checking=False diff --git a/playbooks/common b/playbooks/common new file mode 100644 index 0000000..11a2796 --- /dev/null +++ b/playbooks/common @@ -0,0 +1,8 @@ +--- +- name: Init Linux OS Common setting + hosts: all + user: ubuntu + become: yes + gather_facts: yes + roles: + - vhosts/common diff --git a/playbooks/deploy-docker-harbor.yml b/playbooks/deploy-docker-harbor.yml new file mode 100644 index 0000000..b97054a --- /dev/null +++ b/playbooks/deploy-docker-harbor.yml @@ -0,0 +1,5 @@ +--- +- hosts: all + become: yes + roles: + - docker/harbor diff --git a/playbooks/deploy-docker-keycloak.yml b/playbooks/deploy-docker-keycloak.yml new file mode 100644 index 0000000..9bd9c42 --- /dev/null +++ b/playbooks/deploy-docker-keycloak.yml @@ -0,0 +1,5 @@ +--- +- hosts: all + become: yes + roles: + - docker/keycloak diff --git a/playbooks/init-harbor-server b/playbooks/init-harbor-server new file mode 100644 index 0000000..70d8e1f --- /dev/null +++ b/playbooks/init-harbor-server @@ -0,0 +1,17 @@ +- name: setup harbor + hosts: all + user: root + become: yes + gather_facts: yes + tasks: + - include_role: + name: harbor + vars: + group: master + namespace: harbor + db_namespace: database + update_secret: true + tls: + - secret_name: harbor-tls + keyfile: /etc/ssl/onwalk.net.key + certfile: /etc/ssl/onwalk.net.pem diff --git a/playbooks/init_chaos_mesh b/playbooks/init_chaos_mesh new file mode 100644 index 0000000..4f9f530 --- /dev/null +++ b/playbooks/init_chaos_mesh @@ -0,0 +1,17 @@ +- name: setup chaos-mesh server + hosts: all + user: root + become: yes + gather_facts: yes + tasks: + - include_role: + name: chaos-mesh + vars: + group: master + domain: onwalk.net + namespace: chaos-mesh + update_secret: true + tls: + - secret_name: chaos-mesh-tls + keyfile: /etc/ssl/onwalk.net.key + certfile: /etc/ssl/onwalk.net.pem diff --git a/playbooks/init_chartmuseum b/playbooks/init_chartmuseum new file mode 100644 index 0000000..6e1f24c --- /dev/null +++ b/playbooks/init_chartmuseum @@ -0,0 +1,8 @@ +--- +- name: deploy chartmuseum + hosts: all + user: ubuntu + become: yes + gather_facts: yes + roles: + - chartmuseum diff --git a/playbooks/init_deepflow b/playbooks/init_deepflow new file mode 100644 index 0000000..57b59a4 --- /dev/null +++ b/playbooks/init_deepflow @@ -0,0 +1,16 @@ +- name: setup deepflow server + hosts: all + user: root + become: yes + gather_facts: yes + tasks: + - include_role: + name: deepflow + vars: + group: master + update_secret: true + namespace: monitoring + tls: + - secret_name: obs-tls + keyfile: /etc/ssl/onwalk.net.key + certfile: /etc/ssl/onwalk.net.pem diff --git a/playbooks/init_flagger-loadtester b/playbooks/init_flagger-loadtester new file mode 100644 index 0000000..f7d8315 --- /dev/null +++ b/playbooks/init_flagger-loadtester @@ -0,0 +1,16 @@ +- name: setup flagger-loadtester server + hosts: all + user: root + become: yes + gather_facts: yes + tasks: + - include_role: + name: flagger-loadtester + vars: + group: master + update_secret: true + namespace: loadtester + tls: + - secret_name: obs-tls + keyfile: /etc/ssl/${DOMAIN}.key + certfile: /etc/ssl/${DOMAIN}.pem diff --git a/playbooks/init_gitlab b/playbooks/init_gitlab new file mode 100644 index 0000000..3e7411c --- /dev/null +++ b/playbooks/init_gitlab @@ -0,0 +1,23 @@ +- name: setup gitlab + hosts: all + user: root + become: yes + gather_facts: yes + tasks: + - include_role: + name: gitlab + vars: + group: master + gitlab_version: '7.0.4' + namespace: gitlab + db_namespace: database + domain: onwalk.net + auto_issuance: false + update_secret: true + tls: + - secret_name: gitlab-tls + keyfile: /etc/ssl/onwalk.net.key + certfile: /etc/ssl/onwalk.net.pem + gitlab_oidc_client_id: gitlab-oidc + gitlab_oidc_isser: 'https://keycloak.onwalk.net/realms/cloud-sso' + gitlab_oidc_redirect_uri: 'https://gitlab.onwalk.net/users/auth/openid_connect/callback' diff --git a/playbooks/init_grafana_alloy b/playbooks/init_grafana_alloy new file mode 100644 index 0000000..f34d339 --- /dev/null +++ b/playbooks/init_grafana_alloy @@ -0,0 +1,8 @@ +--- +- name: deploy grafana alloy agent + hosts: all + user: ubuntu + become: yes + gather_facts: yes + roles: + - alloy diff --git a/playbooks/init_harbor_server b/playbooks/init_harbor_server new file mode 100644 index 0000000..5bb2c7b --- /dev/null +++ b/playbooks/init_harbor_server @@ -0,0 +1,8 @@ +--- +- name: deploy harbor server + hosts: all + user: ubuntu + become: yes + gather_facts: yes + roles: + - harbor diff --git a/playbooks/init_jenkins b/playbooks/init_jenkins new file mode 100644 index 0000000..e30dd66 --- /dev/null +++ b/playbooks/init_jenkins @@ -0,0 +1,18 @@ +- name: setup jenkins server + hosts: all + user: root + become: yes + gather_facts: yes + tasks: + - include_role: + name: jenkins + vars: + group: master + domain: onwalk.net + namespace: jenkins + update_secret: true + db_namespace: database + tls: + - secret_name: jenkins-tls + keyfile: /etc/ssl/onwalk.net.key + certfile: /etc/ssl/onwalk.net.pem diff --git a/playbooks/init_k3s_cluster_agent b/playbooks/init_k3s_cluster_agent new file mode 100644 index 0000000..daa9b1f --- /dev/null +++ b/playbooks/init_k3s_cluster_agent @@ -0,0 +1,8 @@ +--- +- name: Initialize K3s Cluster Agent + hosts: all + user: ubuntu + become: yes + gather_facts: yes + roles: + - k3s-cluster-agent diff --git a/playbooks/init_k3s_cluster_server b/playbooks/init_k3s_cluster_server new file mode 100644 index 0000000..6f33338 --- /dev/null +++ b/playbooks/init_k3s_cluster_server @@ -0,0 +1,8 @@ +--- +- name: Initialize K3s Cluster Server + hosts: all + user: ubuntu + become: yes + gather_facts: yes + roles: + - k3s-cluster-server diff --git a/playbooks/init_k3s_cluster_std b/playbooks/init_k3s_cluster_std new file mode 100644 index 0000000..727c462 --- /dev/null +++ b/playbooks/init_k3s_cluster_std @@ -0,0 +1,27 @@ +- name: set artifact cluster with vhosts + hosts: all + user: root + become: yes + gather_facts: yes + tasks: + - include_role: + name: k3s-reset + vars: + group: master + cluster_reset: 'enable' + - include_role: + name: k3s + vars: + group: master + cni: default + version: 'v1.27.2+k3s1' + pod_cidr: '10.10.0.0/16' + svc_cidr: '172.16.0.0/16' + enable_api_access: true + - include_role: + name: k3s-addon + vars: + group: master + ingress: nginx + external_dns: enable + cert_issuance: vault diff --git a/playbooks/init_k3s_cluster_with_argo_server b/playbooks/init_k3s_cluster_with_argo_server new file mode 100644 index 0000000..7c818c1 --- /dev/null +++ b/playbooks/init_k3s_cluster_with_argo_server @@ -0,0 +1,38 @@ +- name: set artifact cluster with vhosts + hosts: all + user: root + become: yes + gather_facts: yes + tasks: + - include_role: + name: k3s-reset + vars: + group: master + cluster_reset: 'enable' + - include_role: + name: k3s + vars: + group: master + cni: default + version: 'v1.27.2+k3s1' + pod_cidr: '10.10.0.0/16' + svc_cidr: '172.16.0.0/16' + enable_api_access: true + - include_role: + name: k3s-addon + vars: + group: master + ingress: disable + external_dns: disable + cert_issuance: vault + - include_role: + name: argo-server + vars: + group: master + namespace: argocd + domain: onwalk.net + update_secret: true + tls: + - secret_name: argocd-server-tls + keyfile: /etc/ssl/onwalk.net.key + certfile: /etc/ssl/onwalk.net.pem diff --git a/playbooks/init_observability-agent b/playbooks/init_observability-agent new file mode 100644 index 0000000..8c2b666 --- /dev/null +++ b/playbooks/init_observability-agent @@ -0,0 +1,13 @@ +- name: setup observability agent + hosts: all + user: root + become: yes + gather_facts: yes + tasks: + - include_role: + name: observability-agent + vars: + group: master + namespace: monitoring + deepflowserverip: 10.146.0.8 + deepflowk8sclusterid: d-kqjofXyZbg diff --git a/playbooks/init_observability-server b/playbooks/init_observability-server new file mode 100644 index 0000000..cba56a0 --- /dev/null +++ b/playbooks/init_observability-server @@ -0,0 +1,29 @@ +- name: setup observability server + hosts: all + user: root + become: yes + gather_facts: yes + tasks: + - include_role: + name: observability-server + vars: + group: master + update_secret: true + auto_issuance: false + namespace: monitoring + db_namespace: database + tls: + - secret_name: obs-tls + keyfile: /etc/ssl/svc.ink.key + certfile: /etc/ssl/svc.ink.pem + - include_role: + name: flagger-loadtester + vars: + group: master + update_secret: true + auto_issuance: false + namespace: loadtester + tls: + - secret_name: obs-tls + keyfile: /etc/ssl/svc.ink.key + certfile: /etc/ssl/svc.ink.pem diff --git a/playbooks/init_openldap b/playbooks/init_openldap new file mode 100644 index 0000000..069d3eb --- /dev/null +++ b/playbooks/init_openldap @@ -0,0 +1,18 @@ +- name: setup openldap + hosts: all + user: root + become: yes + gather_facts: yes + tasks: + - include_role: + name: openldap + vars: + group: master + namespace: itsm + domain: onwalk.net + update_secret: true + auto_issuance: false + tls: + - secret_name: openldap-tls + keyfile: /etc/ssl/onwalk.net.key + certfile: /etc/ssl/onwalk.net.pem diff --git a/playbooks/init_splunk-otel-collector b/playbooks/init_splunk-otel-collector new file mode 100644 index 0000000..ec1e411 --- /dev/null +++ b/playbooks/init_splunk-otel-collector @@ -0,0 +1,13 @@ +- name: setup splunk otel collector + hosts: all + user: root + become: yes + gather_facts: yes + tasks: + - include_role: + name: splunk-otel-collector + vars: + group: master + namespace: default + splunk_hec_url: https://xxxx.splunkcloud.com:8088/services/collector/event + splunk_hec_token: "token-xxxxxx" diff --git a/playbooks/init_telegraf b/playbooks/init_telegraf new file mode 100644 index 0000000..32bfd57 --- /dev/null +++ b/playbooks/init_telegraf @@ -0,0 +1,10 @@ +- name: Setup telegraf + hosts: all + user: root + become: yes + gather_facts: yes + tasks: + - include_role: + name: telegraf + vars: + update_secret: true diff --git a/playbooks/init_vault b/playbooks/init_vault new file mode 100644 index 0000000..45dc7e0 --- /dev/null +++ b/playbooks/init_vault @@ -0,0 +1,8 @@ +--- +- name: deploy vault server + hosts: all + user: ubuntu + become: yes + gather_facts: yes + roles: + - vault diff --git a/playbooks/init_vpn_gateway.yml b/playbooks/init_vpn_gateway.yml new file mode 100755 index 0000000..525b7c7 --- /dev/null +++ b/playbooks/init_vpn_gateway.yml @@ -0,0 +1,7 @@ +--- +- hosts: vpn-gateway + user: ubuntu + become: yes + gather_facts: yes + roles: + - wireguard-gateway diff --git a/playbooks/keycloak_server b/playbooks/keycloak_server new file mode 100644 index 0000000..96a381b --- /dev/null +++ b/playbooks/keycloak_server @@ -0,0 +1,7 @@ +--- +- hosts: all + user: ubuntu + become: yes + gather_facts: yes + roles: + - keycloak diff --git a/playbooks/playbooks/roles/docker/keycloak/defaults/main.yml b/playbooks/playbooks/roles/docker/keycloak/defaults/main.yml new file mode 100644 index 0000000..34df4b1 --- /dev/null +++ b/playbooks/playbooks/roles/docker/keycloak/defaults/main.yml @@ -0,0 +1,14 @@ +--- +postgres_db: keycloak +postgres_user: keycloak_user +postgres_password: keycloak_password + +keycloak_admin: admin +keycloak_admin_password: admin_password + +keycloak_key_store_password: a4h3ljbn +keycloak_trust_store_password: a4h3ljbn + +ssl_certificate_path: /etc/ssl/onwalk.net.pem +ssl_certificate_key_path: /etc/ssl/onwalk.net.key +dhparam_path: /etc/ssl/dhparam.pem diff --git a/playbooks/playbooks/roles/docker/keycloak/files/nginx.conf b/playbooks/playbooks/roles/docker/keycloak/files/nginx.conf new file mode 100644 index 0000000..17db275 --- /dev/null +++ b/playbooks/playbooks/roles/docker/keycloak/files/nginx.conf @@ -0,0 +1,37 @@ +server { + listen 80; + server_name keycloak.onwalk.net; + + # 强制 HTTP 请求重定向到 HTTPS + return 301 https://$host$request_uri; +} + +server { + listen 443 ssl; + server_name keycloak.onwalk.net; + + # SSL 配置 + ssl_certificate /etc/ssl/certs/onwalk.net.pem; + ssl_certificate_key /etc/ssl/certs/onwalk.net.key; + + # 日志设置 + access_log /dev/stdout; + error_log /dev/stderr; + + # 配置反向代理 + location / { + proxy_pass https://127.0.0.1:8443; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-Port $server_port; + proxy_set_header Cookie $http_cookie; + proxy_redirect off; + } + + # SSL 强化 + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers 'ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES128-GCM-SHA256'; + ssl_prefer_server_ciphers off; +} diff --git a/playbooks/playbooks/roles/docker/keycloak/tasks/main.yml b/playbooks/playbooks/roles/docker/keycloak/tasks/main.yml new file mode 100644 index 0000000..7f63148 --- /dev/null +++ b/playbooks/playbooks/roles/docker/keycloak/tasks/main.yml @@ -0,0 +1,29 @@ +- name: 执行 pre-setup 操作 + include_tasks: "{{ playbook_dir }}/roles/docker/keycloak/tasks/pre-setup.yml" + +- name: 渲染 .env 配置文件 + template: + src: "{{ playbook_dir }}/roles/docker/keycloak/templates/.env.j2" + dest: "{{ playbook_dir }}/roles/docker/keycloak/files/.env" + +- name: 执行 create_keystore.sh 脚本 + script: "{{ playbook_dir }}/roles/docker/keycloak/files/create_keystore.sh" + args: + chdir: "/home/ubuntu" + +- name: 渲染 Docker Compose 配置文件 + template: + src: "{{ playbook_dir }}/roles/docker/keycloak/templates/docker-compose.yml.j2" + dest: "{{ playbook_dir }}/roles/docker/keycloak/files/docker-compose.yml" + +- name: 启动 Docker Compose 服务 + become: true + docker_compose: + project_src: "{{ playbook_dir }}/roles/docker/keycloak" + files: + - "{{ playbook_dir }}/roles/docker/keycloak/files/docker-compose.yml" + restarted: true + state: present + +- name: 执行 post-setup 操作 + include_tasks: "{{ playbook_dir }}/roles/docker/keycloak/tasks/post-setup.yml" diff --git a/playbooks/playbooks/roles/docker/keycloak/templates/docker-compose.yml.j2 b/playbooks/playbooks/roles/docker/keycloak/templates/docker-compose.yml.j2 new file mode 100644 index 0000000..bd8a3b8 --- /dev/null +++ b/playbooks/playbooks/roles/docker/keycloak/templates/docker-compose.yml.j2 @@ -0,0 +1,64 @@ +version: '3.7' + +services: + postgres: + image: postgres:16.0-bookworm + environment: + POSTGRES_DB: {{ postgres_db }} + POSTGRES_USER: {{ postgres_user }} + POSTGRES_PASSWORD: {{ postgres_password }} + volumes: + - postgres_data:/var/lib/postgresql/data + networks: + - keycloak_network + + keycloak: + image: bitnami/keycloak:latest + environment: + KEYCLOAK_ADMIN: {{ keycloak_admin }} + KEYCLOAK_ADMIN_PASSWORD: {{ keycloak_admin_password }} + KEYCLOAK_DATABASE_VENDOR: postgresql + KEYCLOAK_DATABASE_HOST: postgres + KEYCLOAK_DATABASE_PORT: 5432 + KEYCLOAK_DATABASE_USER: {{ postgres_user }} + KEYCLOAK_DATABASE_NAME: {{ postgres_db }} + KEYCLOAK_DATABASE_PASSWORD: {{ postgres_password }} + KEYCLOAK_ENABLE_HTTPS: true + KEYCLOAK_HTTPS_KEY_STORE_FILE: /etc/ssl/keystore.jks + KEYCLOAK_HTTPS_KEY_STORE_PASSWORD: {{ keycloak_key_store_password }} + KEYCLOAK_HTTPS_TRUST_STORE_FILE: /etc/ssl/truststore.jks + KEYCLOAK_HTTPS_TRUST_STORE_PASSWORD: {{ keycloak_trust_store_password }} + ports: + - 8080:8080 + volumes: + - /etc/ssl/keystore.jks:/etc/ssl/keystore.jks + - /etc/ssl/truststore.jks:/etc/ssl/truststore.jks + restart: always + depends_on: + - postgres + networks: + - keycloak_network + + nginx: + image: nginx:latest + depends_on: + - keycloak + ports: + - "80:80" + - "443:443" + volumes: + - /etc/ssl/onwalk.net.pem:/etc/ssl/certs/onwalk.net.pem + - /etc/ssl/onwalk.net.key:/etc/ssl/certs/onwalk.net.key + - /etc/ssl/dhparam.pem:/etc/nginx/ssl/dhparam.pem + - ./nginx.conf:/etc/nginx/nginx.conf + restart: unless-stopped + networks: + - keycloak_network + +volumes: + postgres_data: + driver: local + +networks: + keycloak_network: + driver: bridge diff --git a/playbooks/pre_setup.sh b/playbooks/pre_setup.sh new file mode 100644 index 0000000..7379c04 --- /dev/null +++ b/playbooks/pre_setup.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +# Function to check if a variable is empty +check_empty() { + if [ -z "${!1}" ]; then + echo "$1 is empty. Aborting." + exit 1 + fi +} + +# List of variables to check +variables=("DNS_AK" "DNS_SK" "OSS_AK" "OSS_SK" "ROOT_PASSWORD" "SMTP_PASSWORD" "GITLAB_OIDC_CLIENT_TOKEN" "HARBOR_OIDC_CLIENT_TOKEN" "SSH_USER" "SSH_HOST_IP" "SSH_HOST_DOMAIN" "SSH_PRIVATE_KEY") + +# Loop through variables and check if each one is empty +for var in "${variables[@]}"; do + check_empty "$var" +done + +sudo apt install jq ansible -y + +mkdir -pv ~/.ssh/ +cat > ~/.ssh/id_rsa << EOF +$SSH_PRIVATE_KEY +EOF +sudo chmod 0400 ~/.ssh/id_rsa +md5sum ~/.ssh/id_rsa + +mkdir -pv hosts/ + +cat > hosts/inventory << EOF +[master] +$SSH_HOST_DOMAIN ansible_host=$SSH_HOST_IP + +[all:vars] +ansible_port=22 +ansible_ssh_user=$SSH_USER +ansible_ssh_private_key_file=~/.ssh/id_rsa +ansible_host_key_checking=False +ingress_ip=$SSH_HOST_IP +dns_ak=$DNS_AK +dns_sk=$DNS_SK +oss_ak=$OSS_AK +oss_sk=$OSS_SK +admin_password=$ROOT_PASSWORD +smtp_password=$SMTP_PASSWORD +gitlab_oidc_client_token=$GITLAB_OIDC_CLIENT_TOKEN +harbor_oidc_client_token=$HARBOR_OIDC_CLIENT_TOKEN +EOF diff --git a/playbooks/renew_nodes_ssl_certs b/playbooks/renew_nodes_ssl_certs new file mode 100644 index 0000000..3fc0c75 --- /dev/null +++ b/playbooks/renew_nodes_ssl_certs @@ -0,0 +1,8 @@ +--- +- name: renew nodes ssl certs + hosts: all + user: ubuntu + become: yes + gather_facts: yes + roles: + - cert-manager diff --git a/playbooks/roles/charts/app/meta/main.yml b/playbooks/roles/charts/app/meta/main.yml new file mode 100644 index 0000000..9711b33 --- /dev/null +++ b/playbooks/roles/charts/app/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: common diff --git a/playbooks/roles/charts/app/tasks/main.yml b/playbooks/roles/charts/app/tasks/main.yml new file mode 100755 index 0000000..f7fdd0e --- /dev/null +++ b/playbooks/roles/charts/app/tasks/main.yml @@ -0,0 +1,16 @@ +- name: Prep DIR + shell: "mkdir -pv /tmp/app/" + +- name: Prep NameSpace + shell: "kubectl create namespace default || echo true" + +- name: Sync Deploy yaml + template: src=templates/{{ item }} dest=/tmp/app/{{ item }} owner=root group=root mode=0644 force=yes unsafe_writes=yes + with_items: + - deploy-app.yaml + +- name: Setup App + shell: "kubectl apply -f /tmp/app/{{ item }}" + when: inventory_hostname in groups[group] + with_items: + - deploy-app.yaml diff --git a/playbooks/roles/charts/app/templates/.gitignore b/playbooks/roles/charts/app/templates/.gitignore new file mode 100644 index 0000000..a194b20 --- /dev/null +++ b/playbooks/roles/charts/app/templates/.gitignore @@ -0,0 +1,2 @@ +/clickhouse-keeper-k8s.iml +/.idea/ diff --git a/playbooks/roles/charts/app/templates/deploy-app.yaml b/playbooks/roles/charts/app/templates/deploy-app.yaml new file mode 100644 index 0000000..aebc9ff --- /dev/null +++ b/playbooks/roles/charts/app/templates/deploy-app.yaml @@ -0,0 +1,18 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: app +spec: + replicas: 1 + selector: + matchLabels: + app: demo + template: + metadata: + labels: + app: demo + spec: + containers: + - name: demo + image: {{ app_image }}:{{ app_tag }} + imagePullPolicy: Always diff --git a/playbooks/roles/charts/argo-server/files/setup-argocd.sh b/playbooks/roles/charts/argo-server/files/setup-argocd.sh new file mode 100644 index 0000000..8aec1f0 --- /dev/null +++ b/playbooks/roles/charts/argo-server/files/setup-argocd.sh @@ -0,0 +1,100 @@ +#!/bin/bash + +# 检查参数是否为空 +check_not_empty() { + if [[ -z $1 ]]; then + echo "Error: $2 is empty. Please provide a value." + exit 1 + fi +} + +helm repo add argo https://argoproj.github.io/argo-helm +helm repo update + +# 使用 Helm 部署 Argo CD +#helm upgrade --install argocd argo/argo-cd -n argocd --create-namespace + +cat < values.yaml +global: + domain: argocd.onwalk.net +server: + service: + type: ClusterIP + servicePortHttp: 80 + servicePortHttps: 443 + servicePortHttpName: http + servicePortHttpsName: https + ingress: + enabled: false + ingressClassName: "nginx" + hostname: argocd.onwalk.net + annotations: + nginx.ingress.kubernetes.io/force-ssl-redirect: "true" + nginx.ingress.kubernetes.io/backend-protocol: "HTTP" + tls: true +repoServer: + extraContainers: + - name: helmfile + image: ghcr.io/helmfile/helmfile:v0.157.0 + # Entrypoint should be Argo CD lightweight CMP server i.e. argocd-cmp-server + command: ["/var/run/argocd/argocd-cmp-server"] + env: + - name: HELM_CACHE_HOME + value: /tmp/helm/cache + - name: HELM_CONFIG_HOME + value: /tmp/helm/config + - name: HELMFILE_CACHE_HOME + value: /tmp/helmfile/cache + - name: HELMFILE_TEMPDIR + value: /tmp/helmfile/tmp + securityContext: + runAsNonRoot: true + runAsUser: 999 + volumeMounts: + - mountPath: /var/run/argocd + name: var-files + - mountPath: /home/argocd/cmp-server/plugins + name: plugins + # Register helmfile plugin into sidecar + - mountPath: /home/argocd/cmp-server/config/plugin.yaml + subPath: helmfile.yaml + name: argocd-cmp-cm + # Starting with v2.4, do NOT mount the same tmp volume as the repo-server container. The filesystem separation helps mitigate path traversal attacks. + - mountPath: /tmp + name: helmfile-tmp + volumes: + - name: argocd-cmp-cm + configMap: + name: argocd-cmp-cm + - name: helmfile-tmp + emptyDir: {} +configs: + cmp: + create: true + plugins: + helmfile: + allowConcurrency: true + discover: + fileName: helmfile.yaml + generate: + command: + - bash + - "-c" + - | + if [[ -v ENV_NAME ]]; then + helmfile -n "$ARGOCD_APP_NAMESPACE" -e $ENV_NAME template --include-crds -q + elif [[ -v ARGOCD_ENV_ENV_NAME ]]; then + helmfile -n "$ARGOCD_APP_NAMESPACE" -e "$ARGOCD_ENV_ENV_NAME" template --include-crds -q + else + helmfile -n "$ARGOCD_APP_NAMESPACE" template --include-crds -q + fi + lockRepo: false +EOF + +helm upgrade --install argocd argo/argo-cd -n argocd -f values.yaml + +# 等待 Argo CD 完全启动 +echo "Waiting for Argo CD to be ready..." +kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=argocd-server -n argocd --timeout=180s + +echo "Argo CD deployment and configuration complete." diff --git a/playbooks/roles/charts/argo-server/meta/main.yml b/playbooks/roles/charts/argo-server/meta/main.yml new file mode 100644 index 0000000..83cef7b --- /dev/null +++ b/playbooks/roles/charts/argo-server/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: cert-manager diff --git a/playbooks/roles/charts/argo-server/tasks/main.yml b/playbooks/roles/charts/argo-server/tasks/main.yml new file mode 100755 index 0000000..fbf7c91 --- /dev/null +++ b/playbooks/roles/charts/argo-server/tasks/main.yml @@ -0,0 +1,2 @@ +- name: Set ArgoCD Contoller + script: files/setup-argocd.sh diff --git a/playbooks/roles/charts/chaos-mesh/files/setup.sh b/playbooks/roles/charts/chaos-mesh/files/setup.sh new file mode 100644 index 0000000..d26f23a --- /dev/null +++ b/playbooks/roles/charts/chaos-mesh/files/setup.sh @@ -0,0 +1,24 @@ +#!/bin/bash +set -x +export domain=$1 +export secret=$2 +export namespace=$3 + +cat > values.yaml << EOF +chaosDaemon: + runtime: containerd + socketPath: /run/k3s/containerd/containerd.sock +dashboard: + create: true + ingress: + enabled: true + ingressClassName: "nginx" + hosts: + - name: chaos-mesh.$domain + tls: true + tlsSecret: $secret +EOF + +helm repo add chaos-mesh https://charts.chaos-mesh.org +helm repo update +helm upgrade --install chaos-mesh chaos-mesh/chaos-mesh -n $namespace --create-namespace --version 2.6.3 -f values.yaml diff --git a/playbooks/roles/charts/chaos-mesh/howto.md b/playbooks/roles/charts/chaos-mesh/howto.md new file mode 100644 index 0000000..e978bcb --- /dev/null +++ b/playbooks/roles/charts/chaos-mesh/howto.md @@ -0,0 +1,124 @@ +# Jenkins Mater 部署 + +# Jenkins Node IaC Runner 设置 +1. 安装git terraform + +## GitLab to trigger Jenkins + +1. Gitlab https://gitlab.xxx.com/-/profile/personal_access_tokens + +2. GitLab和Jenkins的集成可以让你在GitLab中的代码更新后自动触发Jenkins的构建任务。以下是配置GitLab插件和Jenkins以实现GitLab触发Jenkins的步骤: +3. 在Jenkins中安装GitLab插件 +首先,你需要在Jenkins中安装GitLab插件。登录到Jenkins的管理界面,然后转到“Manage Jenkins” > “Manage Plugins” > “Available”,在搜索框中输入“GitLab”,找到并安装“GitLab Plugin”。 +4. 在Jenkins中配置GitLab连接 +安装完插件后,你需要配置GitLab的连接。转到“Manage Jenkins” > “Configure System”,滚动到“GitLab”部分,点击“Add GitLab Server” > “Server”,输入你的GitLab服务器URL,并生成并输入一个与你的GitLab账户相关联的API Token。 +5. 在Jenkins中创建一个新的任务 +创建一个新的任务,并在源代码管理部分选择“Git”,输入你的GitLab项目的URL。在构建触发器部分,选择“Build when a change is pushed to GitLab”。 +记录:GitLab webhook URL: https://jenkins.xxx.xxx/project/alicloud-oss-pipeline +6. 在GitLab中配置Webhook +在你的GitLab项目中,转到“Settings” > “Integrations” -> 启用"Jenkins" +- 在URL中输入步骤5记录的 Webhook URL https://jenkins.xxx.xxx/project/alicloud-oss-pipeline +- 选择你想要触发Jenkins任务的事件(例如,当代码被推送时) +- Project name: 输入项目名称 +- Username: Jenkins 用户名 +- Password: Jenkins 认证密码 +- 保存更改, 测试设置,返回状态200为配置正确 + +以上就是配置GitLab插件和Jenkins以实现GitLab触发Jenkins的步骤。在完成这些步骤后,每当你的GitLab项目有更新时,都会自动触发对应的Jenkins构建任务。 + +## 要将GitHub代码仓库与Jenkins关联起来,您需要完成以下步骤: + +1 要在 GitHub 中启用 webhook 功能以触发 Jenkins 构建,请按照以下步骤操作: +2 进入 GitHub 仓库设置:在要设置 webhook 的 GitHub 仓库页面上,点击右上角的“Settings”。 +3 选择 Webhooks 选项:在仓库设置页面的左侧菜单中,选择“Webhooks”。 +4 添加 Webhook:在 Webhooks 页面的右上角,点击“Add webhook”。 + +配置 Webhook: + +1. Payload URL:输入 Jenkins 服务器的 webhook URL。格式应为 http://your-jenkins-server/github-webhook/。确保替换 your-jenkins-server 为您 Jenkins 服务器的实际地址。 +2. Content type:选择 application/json。 +3. Secret(可选):如果需要额外的安全性,可以输入一个秘密令牌。 +4. SSL verification:选择是否验证 SSL 证书。 +5. Which events would you like to trigger this webhook?:选择触发 webhook 的事件。通常选择 Just the push event(只有推送事件)或 Let me select individual events(让我选择单独的事件)并选择适当的事件(例如,push、pull request 等)。 +添加 Webhook:点击页面底部的“Add webhook”按钮以保存配置。 + +完成以上步骤后,您的 GitHub 仓库就配置好了一个 webhook,可以触发 Jenkins 构建。记得在 Jenkins 中设置相应的任务来响应这些 webhook。 + + +安装Jenkins插件: + +确保您的Jenkins实例已经安装了“GitHub”和“GitHub Integration”插件。您可以在Jenkins管理界面的“插件管理”部分进行安装。 +配置GitHub Webhook: + +在GitHub仓库的设置中,找到“Webhooks”部分并添加一个新的Webhook。 +将“Payload URL”设置为您的Jenkins服务器的URL,通常是这样的格式:http:///github-webhook/。 +选择触发Webhook的事件,通常是“Just the push event”或者“Send me everything”。 +确保“Content type”设置为“application/json”。 +点击“Add webhook”保存设置。 +配置Jenkins Job: + +在Jenkins中创建一个新的构建任务或者配置现有的任务。 +在“源码管理”部分,选择“Git”并填写您的GitHub仓库的URL。 +在“构建触发器”部分,选择“GitHub hook trigger for GITScm polling”选项。这样,每当GitHub仓库有新的推送事件时,Jenkins就会自动触发构建。 +测试配置: + +推送一些改动到您的GitHub仓库,检查是否触发了Jenkins构建。 +在Jenkins的构建历史中查看构建是否成功执行。 +通过完成以上步骤,您的GitHub代码仓库就与Jenkins关联起来了,可以实现自动触发构建的功能。 + +要在 Jenkins 中设置 GitHub 服务,您需要进行以下步骤: + +安装 GitHub 插件:首先确保您的 Jenkins 实例已安装 GitHub 插件。如果尚未安装,请转到 Jenkins 的“插件管理”页面,在“可选插件”选项卡中搜索并安装 GitHub 插件。 + +配置 GitHub 服务器:在 Jenkins 管理界面中,转到“系统管理” > “系统设置”。 + +在系统设置页面中,找到并点击“GitHub”部分。 +点击“Add GitHub Server”添加一个新的 GitHub 服务器配置。 +在配置页面中,输入一个描述性的名称,例如“GitHub”。 +在 GitHub API URL 中输入 GitHub 的 API 地址。通常为 https://api.github.com。 +如果您的 GitHub 仓库需要身份验证,请在“凭据”部分选择一个已配置的凭据。如果尚未配置凭据,请点击“Add”添加一个新的凭据,选择类型为“Secret text”或“Username with password”,然后输入您的 GitHub 用户名和密码或访问令牌。 +完成配置后,点击“保存”保存 GitHub 服务器配置。 +验证配置:您可以在配置页面的底部点击“Test connection”来验证您的 GitHub 服务器配置是否正常工作。 + +保存设置:确保在完成配置后点击“保存”保存更改。 + +现在,您已成功配置了 Jenkins 的 GitHub 服务。您可以在 Jenkins 任务中使用这个配置来与 GitHub 仓库进行集成,例如触发构建、拉取代码等操作。 + + +对于 Jenkins 中的 GitHub API URL (https://api.github.com) 的凭据设置,您可以使用 GitHub Personal Access Token。这个 Token 可以通过以下步骤生成: + +在 GitHub 上登录您的账号。 +点击页面右上角的头像,选择“Settings”。 +在左侧边栏中,点击“Developer settings”。 +在左侧边栏中,点击“Personal access tokens”。 +点击“Generate new token”。 +输入一个描述性的名称,选择需要的权限(至少需要 repo 权限来访问仓库),然后点击“Generate token”。 +复制生成的 Token,并保存到一个安全的地方。请注意,这个 Token 只会显示一次,如果您丢失了,请重新生成一个新的 Token。 +在 Jenkins 中使用这个 Token 作为 GitHub API URL (https://api.github.com) 的凭据时,您可以将 Token 添加为 Jenkins 的凭据: + +进入 Jenkins 管理界面,转到“凭据” > “系统”。 +在“系统”页面中,点击“Global credentials (unrestricted)”。 +在凭据页面中,点击“Add credentials”。 +在“Kind”下拉菜单中选择“Secret text”。 +在“Secret”框中粘贴您在 GitHub 上生成的 Personal Access Token。 +输入一个描述性的名称,并点击“OK”保存凭据。 +现在,您可以在 Jenkins 的配置中使用这个凭据来访问 GitHub API (https://api.github.com)。 + +确保 Docker 已安装:在 Jenkins 代理节点上确认 Docker 已正确安装并配置。您可以通过在终端中执行 docker --version 命令来检查 Docker 是否可用。 + +检查 Docker 环境:如果 Docker 已安装,请确保 Docker 服务正在运行。您可以使用 sudo systemctl status docker 命令检查 Docker 服务的状态。 + +确认 Jenkins 全局工具配置:在 Jenkins 管理界面中,转到“系统管理”->“全局工具配置”,确保 Docker 工具已正确配置。如果未配置,您可以添加一个 Docker 工具,并指定正确的安装路径。 + +重启 Jenkins 服务:在进行了上述更改后,尝试重启 Jenkins 服务,以确保新的配置生效。 + +尝试在终端中执行 Docker 命令:在 Jenkins 代理节点上打开终端,尝试手动执行一些 Docker 命令(如 docker pull),看看是否能够正常执行 + +要设置 Jenkins Docker 流水线,你可以按照以下步骤进行操作: + +前提条件 +确保你的 Jenkins 实例已经安装了以下插件: + +Docker Pipeline +Docker Commons + diff --git a/playbooks/roles/charts/chaos-mesh/meta/main.yml b/playbooks/roles/charts/chaos-mesh/meta/main.yml new file mode 100644 index 0000000..1f2217b --- /dev/null +++ b/playbooks/roles/charts/chaos-mesh/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: secret-manger diff --git a/playbooks/roles/charts/chaos-mesh/tasks/main.yml b/playbooks/roles/charts/chaos-mesh/tasks/main.yml new file mode 100755 index 0000000..75fdece --- /dev/null +++ b/playbooks/roles/charts/chaos-mesh/tasks/main.yml @@ -0,0 +1,4 @@ +- name: Setup chaos-mesh Server + script: files/setup.sh {{ domain }} {{ item.secret_name }} {{ namespace }} + when: inventory_hostname in groups[group] and ( tls is defined) + loop: "{{ tls }}" diff --git a/playbooks/roles/charts/chartmuseum/files/setup.sh b/playbooks/roles/charts/chartmuseum/files/setup.sh new file mode 100644 index 0000000..f1f870f --- /dev/null +++ b/playbooks/roles/charts/chartmuseum/files/setup.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +domain=$1 +namespace=$2 +admin_password=$3 +secret_name=$4 +storage_type=$5 + +cat > values.yaml << EOF +env: + open: + STORAGE: local + DISABLE_API: false + AUTH_ANONYMOUS_GET: true + secret: + BASIC_AUTH_USER: admin + BASIC_AUTH_PASS: '$admin_password' +ingress: + enabled: true + hosts: + - name: charts.$domain + path: / + tls: true + tlsSecret: $secret_name + ingressClassName: nginx +persistence: + enabled: true + accessMode: ReadWriteOnce + size: 8Gi + path: /storage + storageClass: "local-path" +EOF + +export KUBECONFIG=/etc/rancher/k3s/k3s.yaml +helm repo add chartmuseum https://chartmuseum.github.io/charts +helm repo update +helm upgrade --install chartmuseum chartmuseum/chartmuseum -f values.yaml -n $namespace diff --git a/playbooks/roles/charts/chartmuseum/meta/main.yml b/playbooks/roles/charts/chartmuseum/meta/main.yml new file mode 100644 index 0000000..1f2217b --- /dev/null +++ b/playbooks/roles/charts/chartmuseum/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: secret-manger diff --git a/playbooks/roles/charts/chartmuseum/tasks/main.yml b/playbooks/roles/charts/chartmuseum/tasks/main.yml new file mode 100755 index 0000000..02d9e65 --- /dev/null +++ b/playbooks/roles/charts/chartmuseum/tasks/main.yml @@ -0,0 +1,4 @@ +- name: Setup Chartmuseum Server + script: files/setup.sh {{ domain }} {{ namespace }} {{ admin_password }} {{ item.secret_name }} + loop: "{{ tls }}" + when: inventory_hostname in groups[group] diff --git a/playbooks/roles/charts/chartmuseum/vars/main.yml b/playbooks/roles/charts/chartmuseum/vars/main.yml new file mode 100644 index 0000000..993b09c --- /dev/null +++ b/playbooks/roles/charts/chartmuseum/vars/main.yml @@ -0,0 +1,8 @@ +group: master +namespace: harbor +storage_type: oss +update_secret: true +tls: + - secret_name: chartmuseum-tls + keyfile: /etc/ssl/onwalk.net.key + certfile: /etc/ssl/onwalk.net.pem diff --git a/playbooks/roles/charts/clickhouse/meta/main.yml b/playbooks/roles/charts/clickhouse/meta/main.yml new file mode 100644 index 0000000..1f2217b --- /dev/null +++ b/playbooks/roles/charts/clickhouse/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: secret-manger diff --git a/playbooks/roles/charts/clickhouse/tasks/main.yml b/playbooks/roles/charts/clickhouse/tasks/main.yml new file mode 100755 index 0000000..3a21731 --- /dev/null +++ b/playbooks/roles/charts/clickhouse/tasks/main.yml @@ -0,0 +1,48 @@ +- name: Prep DIR + shell: "mkdir -pv /tmp/clickhouse-cluster/ && mkdir -pv /tmp/qryn" + +- name: Prep NameSpace + shell: "kubectl create namespace monitoring || echo true" + +- name: sync clickhouse deploy yaml + template: src=templates/{{ item }} dest=/tmp/{{ item }} owner=root group=root mode=0644 force=yes unsafe_writes=yes + with_items: + - clickhouse-cluster/clickhouse-config.yaml + - clickhouse-cluster/clickhouse-service.yaml + - clickhouse-cluster/clickhouse-user-config.yaml + - clickhouse-cluster/clickhouse-statefulset.yml + - postsetup.sh + +- name: Setup ClickHouse Server + shell: "cd /tmp/clickhouse-cluster && kubectl apply -f ." + when: inventory_hostname in groups[group] + +#- name: Post Setup ClickHouse Server +# shell: "cd /tmp/ && sh postsetup.sh" +# when: inventory_hostname in groups[group] + +- name: get clickhouse node ip + shell: " kubectl get pods -n monitoring -o wide | grep -E '^clickhouse-' | awk '{print $6}' " + register: ck_node_ip_raw + when: inventory_hostname in groups[group][0] + +- name: Check if ck_node_ip_raw is not empty + fail: + msg: "ck_node_ip_raw is empty, terminating the playbook." + when: ck_node_ip_raw.stdout_lines | length == 0 + +- name: set fact join command for ck_node_ip + set_fact: + ck_node_ip : "{{ ck_node_ip_raw.stdout_lines[0] }}" + when: inventory_hostname in groups[group][0] + +- name: sync clickhouse deploy yaml + template: src=templates/{{ item }} dest=/tmp/{{ item }} owner=root group=root mode=0644 force=yes unsafe_writes=yes + with_items: + - qryn/qryn-deployment.yaml + - qryn/qryn-service.yaml + - qryn/qryn-ingress.yaml + +- name: Setup Qryn Server + shell: "cd /tmp/qryn && kubectl apply -f ." + when: inventory_hostname in groups[group] diff --git a/playbooks/roles/charts/clickhouse/templates/.gitignore b/playbooks/roles/charts/clickhouse/templates/.gitignore new file mode 100644 index 0000000..a194b20 --- /dev/null +++ b/playbooks/roles/charts/clickhouse/templates/.gitignore @@ -0,0 +1,2 @@ +/clickhouse-keeper-k8s.iml +/.idea/ diff --git a/playbooks/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-config.yaml b/playbooks/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-config.yaml new file mode 100644 index 0000000..7e48d80 --- /dev/null +++ b/playbooks/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-config.yaml @@ -0,0 +1,94 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: clickhouse-config + namespace: monitoring +data: + keeper.xml: | + + + 0.0.0.0 + + trace + 1 + + + + + + + + + 2181 + + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + 10000 + 30000 + trace + 10000 + + + + 0 + clickhouse-0.clickhouse-service.monitoring + 9444 + + + 1 + clickhouse-1.clickhouse-service.monitoring + 9444 + + + 2 + clickhouse-2.clickhouse-service.monitoring + 9444 + + + + + + clickhouse-0.clickhouse-service.monitoring + 2181 + + + clickhouse-1.clickhouse-service.monitoring + 2181 + + + clickhouse-2.clickhouse-service.monitoring + 2181 + + + + + cluster.xml: | + + + + + + + clickhouse-0.clickhouse-service.monitoring + 9000 + + + + + clickhouse-1.clickhouse-service.monitoring + 9000 + + + + + + macros.xml: | + + + + testcluster + + 1 + + diff --git a/playbooks/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-ingress.yaml b/playbooks/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-ingress.yaml new file mode 100644 index 0000000..0e1131d --- /dev/null +++ b/playbooks/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-ingress.yaml @@ -0,0 +1,18 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: clickhouse + namespace: monitoring +spec: + ingressClassName: nginx + rules: + - host: clickhouse.{{ domain }} + http: + paths: + - backend: + service: + name: clickhouse-service + port: + number: 8123 + path: / + pathType: Prefix diff --git a/playbooks/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-service.yaml b/playbooks/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-service.yaml new file mode 100644 index 0000000..e1eec7f --- /dev/null +++ b/playbooks/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-service.yaml @@ -0,0 +1,23 @@ +kind: Service +apiVersion: v1 +metadata: + labels: + app: clickhouse + name: clickhouse-service + namespace: monitoring +spec: + ports: + - name: rest + port: 8123 + - name: keeper + port: 2181 + - name: replica-a + port: 9000 + - name: replica-b + port: 9009 + - name: raft + port: 9444 + + clusterIP: None + selector: + app: clickhouse diff --git a/playbooks/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-statefulset.yml b/playbooks/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-statefulset.yml new file mode 100644 index 0000000..3dd7ff6 --- /dev/null +++ b/playbooks/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-statefulset.yml @@ -0,0 +1,103 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: clickhouse + namespace: monitoring +spec: + selector: + matchLabels: + app: clickhouse + serviceName: clickhouse-service + replicas: 3 + podManagementPolicy: "Parallel" + # podManagementPolicy: OrderedReady + template: + metadata: + labels: + app: clickhouse + spec: + containers: + - name: clickhouse + image: clickhouse/clickhouse-server:22.4.5 + imagePullPolicy: IfNotPresent + workingDir: / + command: + - /bin/bash + - -c + - |- + export CK_INDEX=${HOSTNAME##*-} + echo CK_INDEX=${CK_INDEX} + ./entrypoint.sh + env: + - name: HOSTNAME + valueFrom: + fieldRef: + fieldPath: metadata.name + ports: + - name: rest + containerPort: 8123 + - name: keeper + containerPort: 2181 + - name: replica-a + containerPort: 9000 + - name: replica-b + containerPort: 9009 + - name: raft + containerPort: 9444 + volumeMounts: + - name: clickhouse-config + mountPath: /etc/clickhouse-server/config.d/ + - name: clickhouse-user-config + mountPath: /etc/clickhouse-server/users.d/ + - name: clickhouse-meta + mountPath: /var/lib/clickhouse/coordination/ + - name: clickhouse-data + mountPath: /var/lib/clickhouse/ + volumes: + - name: clickhouse-config + configMap: + name: clickhouse-config + items: + - key: keeper.xml + path: keeper.xml + - key: cluster.xml + path: cluster.xml + - key: macros.xml + path: macros.xml + - name: clickhouse-user-config + configMap: + name: clickhouse-user-config + items: + - key: user.xml + path: user.xml + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + labels: + app.kubernetes.io/component: clickhouse + app.kubernetes.io/instance: clickhouse + app.kubernetes.io/name: clickhouse + name: clickhouse-meta + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + volumeMode: Filesystem + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + labels: + app.kubernetes.io/component: clickhouse + app.kubernetes.io/instance: clickhouse + app.kubernetes.io/name: clickhouse + name: clickhouse-data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi + volumeMode: Filesystem diff --git a/playbooks/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-user-config.yaml b/playbooks/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-user-config.yaml new file mode 100644 index 0000000..694ee7d --- /dev/null +++ b/playbooks/roles/charts/clickhouse/templates/clickhouse-cluster/clickhouse-user-config.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: clickhouse-user-config + namespace: monitoring +data: + user.xml: | + + + + + 10000000000 + 4000 + 4096 + 4096 + random + + + diff --git a/playbooks/roles/charts/clickhouse/templates/otel-collector/configmap.yaml b/playbooks/roles/charts/clickhouse/templates/otel-collector/configmap.yaml new file mode 100644 index 0000000..46148e5 --- /dev/null +++ b/playbooks/roles/charts/clickhouse/templates/otel-collector/configmap.yaml @@ -0,0 +1,142 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: otel-collector-config + namespace: default +data: + config.yaml: | + receivers: + loki: + use_incoming_timestamp: true + protocols: + http: + endpoint: 0.0.0.0:3100 + grpc: + endpoint: 0.0.0.0:3200 + syslog: + protocol: rfc5424 + tcp: + listen_address: "0.0.0.0:5514" + fluentforward: + endpoint: 0.0.0.0:24224 + splunk_hec: + endpoint: 0.0.0.0:8088 + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + jaeger: + protocols: + grpc: + endpoint: 0.0.0.0:14250 + thrift_http: + endpoint: 0.0.0.0:14268 + zipkin: + endpoint: 0.0.0.0:9411 + skywalking: + protocols: + grpc: + endpoint: 0.0.0.0:11800 + http: + endpoint: 0.0.0.0:12800 + prometheus: + config: + scrape_configs: + - job_name: 'otel-collector' + scrape_interval: 5s + static_configs: + - targets: ['exporter:8080'] + influxdb: + endpoint: 0.0.0.0:8086 + + connectors: + servicegraph: + latency_histogram_buckets: [ 100us, 1ms, 2ms, 6ms, 10ms, 100ms, 250ms ] + dimensions: [ cluster, namespace ] + store: + ttl: 2s + max_items: 1000 + cache_loop: 2m + store_expiration_loop: 2s + virtual_node_peer_attributes: + - db.name + - rpc.service + spanmetrics: + namespace: span.metrics + exemplars: + enabled: false + dimensions_cache_size: 1000 + aggregation_temporality: 'AGGREGATION_TEMPORALITY_CUMULATIVE' + metrics_flush_interval: 30s + metrics_expiration: 5m + events: + enabled: false + + processors: + batch: + send_batch_size: 10000 + timeout: 5s + memory_limiter: + check_interval: 2s + limit_mib: 1800 + spike_limit_mib: 500 + resourcedetection/system: + detectors: ['system'] + system: + hostname_sources: ['os'] + resource: + attributes: + - key: service.name + value: "serviceName" + action: upsert + metricstransform: + transforms: + - include: calls_total + action: update + new_name: traces_spanmetrics_calls_total + - include: latency + action: update + new_name: traces_spanmetrics_latency + + exporters: + qryn: + dsn: tcp://clickhouse-server:9000/qryn?username=default&password=************* + timeout: 10s + sending_queue: + queue_size: 100 + retry_on_failure: + enabled: true + initial_interval: 5s + max_interval: 30s + max_elapsed_time: 300s + logs: + format: raw + otlp/spanmetrics: + endpoint: localhost:4317 + tls: + insecure: true + + extensions: + health_check: + pprof: + zpages: + + service: + extensions: [pprof, zpages, health_check] + pipelines: + logs: + receivers: [fluentforward, otlp, loki, syslog, splunk_hec] + processors: [memory_limiter, resourcedetection/system, resource, batch] + exporters: [qryn] + traces: + receivers: [otlp, jaeger, zipkin, skywalking] + processors: [memory_limiter, resourcedetection/system, resource, batch] + exporters: [qryn, spanmetrics, servicegraph] + metrics: + receivers: [prometheus, influxdb, spanmetrics, servicegraph] + processors: [memory_limiter, resourcedetection/system, resource, batch] + exporters: [qryn] + + diff --git a/playbooks/roles/charts/clickhouse/templates/otel-collector/deployment.yaml b/playbooks/roles/charts/clickhouse/templates/otel-collector/deployment.yaml new file mode 100644 index 0000000..3017f46 --- /dev/null +++ b/playbooks/roles/charts/clickhouse/templates/otel-collector/deployment.yaml @@ -0,0 +1,42 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: otel-collector + namespace: default + labels: + app: otel-collector +spec: + replicas: 1 + selector: + matchLabels: + app: otel-collector + template: + metadata: + labels: + app: otel-collector + spec: + containers: + - name: otel-collector + image: ghcr.io/metrico/qryn-otel-collector:latest + volumeMounts: + - name: config + mountPath: /etc/otel + subPath: config.yaml + ports: + - containerPort: 3100 + - containerPort: 3200 + - containerPort: 8088 + - containerPort: 5514 + - containerPort: 24224 + - containerPort: 4317 + - containerPort: 4318 + - containerPort: 14250 + - containerPort: 14268 + - containerPort: 9411 + - containerPort: 11800 + - containerPort: 12800 + - containerPort: 8086 + volumes: + - name: config + configMap: + name: otel-collector-config diff --git a/playbooks/roles/charts/clickhouse/templates/otel-collector/ingress.yaml b/playbooks/roles/charts/clickhouse/templates/otel-collector/ingress.yaml new file mode 100644 index 0000000..508b25c --- /dev/null +++ b/playbooks/roles/charts/clickhouse/templates/otel-collector/ingress.yaml @@ -0,0 +1,19 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: otel-collector-ingress + namespace: default + annotations: + nginx.ingress.kubernetes.io/rewrite-target: / +spec: + rules: + - host: your-domain.example.com + http: + paths: + - path: /api + pathType: Prefix + backend: + service: + name: otel-collector + port: + number: 3100 diff --git a/playbooks/roles/charts/clickhouse/templates/otel-collector/service.yaml b/playbooks/roles/charts/clickhouse/templates/otel-collector/service.yaml new file mode 100644 index 0000000..7afcf54 --- /dev/null +++ b/playbooks/roles/charts/clickhouse/templates/otel-collector/service.yaml @@ -0,0 +1,48 @@ +apiVersion: v1 +kind: Service +metadata: + name: otel-collector + namespace: default +spec: + ports: + - port: 3100 + targetPort: 3100 + protocol: TCP + - port: 3200 + targetPort: 3200 + protocol: TCP + - port: 8088 + targetPort: 8088 + protocol: TCP + - port: 5514 + targetPort: 5514 + protocol: TCP + - port: 24224 + targetPort: 24224 + protocol: TCP + - port: 4317 + targetPort: 4317 + protocol: TCP + - port: 4318 + targetPort: 4318 + protocol: TCP + - port: 14250 + targetPort: 14250 + protocol: TCP + - port: 14268 + targetPort: 14268 + protocol: TCP + - port: 9411 + targetPort: 9411 + protocol: TCP + - port: 11800 + targetPort: 11800 + protocol: TCP + - port: 12800 + targetPort: 12800 + protocol: TCP + - port: 8086 + targetPort: 8086 + protocol: TCP + selector: + app: otel-collector diff --git a/playbooks/roles/charts/clickhouse/templates/postsetup.sh b/playbooks/roles/charts/clickhouse/templates/postsetup.sh new file mode 100755 index 0000000..77eddd3 --- /dev/null +++ b/playbooks/roles/charts/clickhouse/templates/postsetup.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +#检查 ClickHouse 版本 +#clickhouse-client --version | grep -q "21.8" +#if [ $? -ne 0 ]; then +#echo "ClickHouse 的版本必须至少为 21.8" +#exit 1 +#fi + +创建数据库 +for db in deepflow_system event ext_metrics flow_log flow_metrics flow_tag profile; do +clickhouse-client -u admin -p admin -q "CREATE DATABASE $db" +done + +创建用户 +clickhouse-client -u admin -p admin -q "CREATE USER admin IDENTIFIED WITH PLAINTEXT_PASSWORD BY 'admin'" +clickhouse-client -u admin -p admin -q "CREATE USER deepflow IDENTIFIED WITH PLAINTEXT_PASSWORD BY 'deepflow'" + +授权账户 +clickhouse-client -u admin -p admin -q "GRANT ALL ON . TO admin" +clickhouse-client -u admin -p admin -q "GRANT SELECT ON deepflow_system.* TO deepflow" +clickhouse-client -u admin -p admin -q "GRANT SELECT ON event.* TO deepflow" +clickhouse-client -u admin -p admin -q "GRANT SELECT ON ext_metrics.* TO deepflow" +clickhouse-client -u admin -p admin -q "GRANT SELECT ON flow_log.* TO deepflow" +clickhouse-client -u admin -p admin -q "GRANT SELECT ON flow_metrics.* TO deepflow" +clickhouse-client -u admin -p admin -q "GRANT SELECT ON flow_tag.* TO deepflow" +clickhouse-client -u admin -p admin -q "GRANT SELECT ON profile.* TO deepflow" diff --git a/playbooks/roles/charts/clickhouse/templates/qryn/qryn-deployment.yaml b/playbooks/roles/charts/clickhouse/templates/qryn/qryn-deployment.yaml new file mode 100644 index 0000000..4a2f4bc --- /dev/null +++ b/playbooks/roles/charts/clickhouse/templates/qryn/qryn-deployment.yaml @@ -0,0 +1,36 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: qryn + namespace: monitoring + labels: + io.metrico.service: qryn +spec: + replicas: 2 + selector: + matchLabels: + io.metrico.service: qryn + strategy: {} + template: + metadata: + annotations: + qryn.cmd: qryn.dev + creationTimestamp: null + labels: + io.metrico.service: qryn + spec: + containers: + - env: + - name: CLICKHOUSE_AUTH + value: "default" + - name: CLICKHOUSE_PORT + value: "8123" + - name: CLICKHOUSE_SERVER + value: "{{ hostvars[groups[group][0]].ck_node_ip }}" + image: qxip/qryn + name: qryn + ports: + - containerPort: 3100 + resources: {} + restartPolicy: Always +status: {} diff --git a/playbooks/roles/charts/clickhouse/templates/qryn/qryn-ingress.yaml b/playbooks/roles/charts/clickhouse/templates/qryn/qryn-ingress.yaml new file mode 100644 index 0000000..09f6caf --- /dev/null +++ b/playbooks/roles/charts/clickhouse/templates/qryn/qryn-ingress.yaml @@ -0,0 +1,24 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: data-gateway + namespace: monitoring + annotations: + nginx.ingress.kubernetes.io/ssl-redirect: "true" +spec: + ingressClassName: nginx + rules: + - host: data-gateway.{{ domain }} + http: + paths: + - backend: + service: + name: qryn + port: + number: 3100 + path: / + pathType: Prefix + tls: + - hosts: + - data-gateway.{{ domain }} + secretName: obs-tls diff --git a/playbooks/roles/charts/clickhouse/templates/qryn/qryn-service.yaml b/playbooks/roles/charts/clickhouse/templates/qryn/qryn-service.yaml new file mode 100644 index 0000000..178462e --- /dev/null +++ b/playbooks/roles/charts/clickhouse/templates/qryn/qryn-service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + creationTimestamp: null + labels: + io.metrico.service: qryn + name: qryn + namespace: monitoring +spec: + ports: + - name: "3100" + port: 3100 + targetPort: 3100 + selector: + io.metrico.service: qryn diff --git a/playbooks/roles/charts/deepflow/Readme.md b/playbooks/roles/charts/deepflow/Readme.md new file mode 100644 index 0000000..ab6a38e --- /dev/null +++ b/playbooks/roles/charts/deepflow/Readme.md @@ -0,0 +1,12 @@ + +# 统计存储数据 + +select formatReadableSize(sum(rows)) as "每天写入行数", formatReadableSize(sum(bytes_on_disk)) as "每天落盘的字节", formatReadableSize(sum(data_uncompressed_bytes)) as "压缩前字节", sum(data_uncompressed_bytes)/sum(bytes_on_disk) as "压缩比", sum(rows)/86400 as "平均每秒写入的行数" from cluster(df_cluster, system.parts) where partition like '%2024-12-03%' limit 10; + + + 可以grafana再 查下确认下,流日志的统计: +select min(partition),max(partition),formatReadableSize(sum(rows)) as "每天写入行数", formatReadableSize(sum(bytes_on_disk)) as "每天落盘的字节", formatReadableSize(sum(data_uncompressed_bytes)) as "压缩前字节", sum(data_uncompressed_bytes)/sum(bytes_on_disk) as "压缩比", sum(rows)/86400 as "平均每秒写入的行数" from cluster(df_cluster, system.parts) where partition like '%2024-12-03%' and table='l4_flow_log_local' limit 10; + +调用日志的统计: +select min(partition),max(partition),formatReadableSize(sum(rows)) as "每天写入行数", formatReadableSize(sum(bytes_on_disk)) as "每天落盘的字节", formatReadableSize(sum(data_uncompressed_bytes)) as "压缩前字节", sum(data_uncompressed_bytes)/sum(bytes_on_disk) as "压缩比", sum(rows)/86400 as "平均每秒写入的行数" from cluster(df_cluster, system.parts) where partition like '%2024-12-03%' and table='l7_flow_log_local' limit 10; + diff --git a/playbooks/roles/charts/deepflow/files/post-setup.sh b/playbooks/roles/charts/deepflow/files/post-setup.sh new file mode 100644 index 0000000..4f6d70d --- /dev/null +++ b/playbooks/roles/charts/deepflow/files/post-setup.sh @@ -0,0 +1,7 @@ +sudo apt-get install -y apt-transport-https ca-certificates curl gnupg +curl -fsSL 'https://packages.clickhouse.com/rpm/lts/repodata/repomd.xml.key' | sudo gpg --dearmor -o /usr/share/keyrings/clickhouse-keyring.gpg + +echo "deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb stable main" | sudo tee \ + /etc/apt/sources.list.d/clickhouse.list +sudo apt-get update +sudo apt-get install -y clickhouse-client diff --git a/playbooks/roles/charts/deepflow/files/pre-setup.sh b/playbooks/roles/charts/deepflow/files/pre-setup.sh new file mode 100644 index 0000000..1da41ac --- /dev/null +++ b/playbooks/roles/charts/deepflow/files/pre-setup.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export namespace=$1 + +export MYSQL_ROOT_PASSWORD=$(kubectl get secret --namespace $namespace mysql -o jsonpath="{.data.mysql-root-password}" | base64 -d) + +kubectl run mysql-client --rm --tty -i --restart='Never' --image docker.io/bitnami/mysql:8.0.32-debian-11-r14 --namespace $namespace --env MYSQL_ROOT_PASSWORD=$MYSQL_ROOT_PASSWORD --command -- bash -c "mysql -h mysql.database.svc.cluster.local -uroot -p$MYSQL_ROOT_PASSWORD -e 'create database IF NOT EXISTS jenkins;'" diff --git a/playbooks/roles/charts/deepflow/files/setup.sh b/playbooks/roles/charts/deepflow/files/setup.sh new file mode 100644 index 0000000..ef67087 --- /dev/null +++ b/playbooks/roles/charts/deepflow/files/setup.sh @@ -0,0 +1,29 @@ +#!/bin/bash +set -x +export domain=$1 +export secret=$2 +export namespace=$3 + +cat << EOF > values-custom.yaml +clickhouse: + enabled: true +server: + enabled: true +deepflow-agent: + enabled: true +grafana: + enabled: true + service: + ingress: + enabled: true + ingressClassName: nginx + hosts: + - grafana.onwalk.net + tls: + - secretName: obs-tls + hosts: + - grafana.onwalk.net +EOF +helm repo add deepflow https://deepflowio.github.io/deepflow +helm repo update deepflow # use `helm repo update` when helm < 3.7.0 +helm upgrade --install deepflow -n monitoring deepflow/deepflow --create-namespace --version 6.4.9 -f values-custom.yaml diff --git a/playbooks/roles/charts/deepflow/meta/main.yml b/playbooks/roles/charts/deepflow/meta/main.yml new file mode 100644 index 0000000..1f2217b --- /dev/null +++ b/playbooks/roles/charts/deepflow/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: secret-manger diff --git a/playbooks/roles/charts/deepflow/tasks/main.yml b/playbooks/roles/charts/deepflow/tasks/main.yml new file mode 100755 index 0000000..a06e1b1 --- /dev/null +++ b/playbooks/roles/charts/deepflow/tasks/main.yml @@ -0,0 +1,19 @@ +#- name: get mysql db password +# shell: 'kubectl get secret --namespace database mysql -o jsonpath="{.data.mysql-root-password}" | base64 -d' +# register: mysql_db_password_raw +# when: inventory_hostname in groups[group][0] +# +#- name: set fact join command +# set_fact: +# mysql_db_password : "{{ mysql_db_password_raw.stdout }}" +# when: inventory_hostname in groups[group][0] +# +#- name: DB Pre Setup for Jenkins Server +# script: files/pre-setup.sh {{ db_namespace }} +# when: inventory_hostname in groups[group] +# script: files/setup.sh {{ domain }} {{ item.secret_name }} {{ namespace }} {{ mysql_db_password }} + +- name: Setup Deepflow Cluster + script: files/setup.sh {{ domain }} {{ item.secret_name }} {{ namespace }} + when: inventory_hostname in groups[group] and ( tls is defined) + loop: "{{ tls }}" diff --git a/playbooks/roles/charts/flagger-loadtester/files/setup.sh b/playbooks/roles/charts/flagger-loadtester/files/setup.sh new file mode 100644 index 0000000..cd31a9a --- /dev/null +++ b/playbooks/roles/charts/flagger-loadtester/files/setup.sh @@ -0,0 +1,47 @@ +#!/bin/bash +set -x + +# 检查参数是否为空 +check_not_empty() { + if [[ -z $1 ]]; then + echo "Error: $2 is empty. Please provide a value." + exit 1 + fi +} + +# 检查参数是否为空 +check_not_empty "$1" "DOMAIN" && DOMAIN=$1 + +helm repo add flagger https://flagger.app +kubectl create ns monitoring || true +helm upgrade -i flaggerloadtester flagger/loadtester --namespace=monitoring + +cat > flagger-loadtester-ingress.yaml << EOF +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + nginx.ingress.kubernetes.io/ssl-redirect: "true" + name: flagger + namespace: monitoring +spec: + ingressClassName: apisix + rules: + - host: flaggerloadtester.${DOMAIN} + http: + paths: + - backend: + service: + name: flagger-loadtester + port: + number: 80 + path: / + pathType: Prefix + tls: + - hosts: + - flaggerloadtester.${DOMAIN} + secretName: obs-tls +EOF + +kubectl apply -f flagger-loadtester-ingress.yaml + diff --git a/playbooks/roles/charts/flagger-loadtester/meta/main.yml b/playbooks/roles/charts/flagger-loadtester/meta/main.yml new file mode 100644 index 0000000..1f2217b --- /dev/null +++ b/playbooks/roles/charts/flagger-loadtester/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: secret-manger diff --git a/playbooks/roles/charts/flagger-loadtester/tasks/main.yml b/playbooks/roles/charts/flagger-loadtester/tasks/main.yml new file mode 100755 index 0000000..803e4eb --- /dev/null +++ b/playbooks/roles/charts/flagger-loadtester/tasks/main.yml @@ -0,0 +1,4 @@ +- name: Setup Loadtester Server + script: files/setup.sh {{ domain }} + when: inventory_hostname in groups[group] + diff --git a/playbooks/roles/charts/gitlab/files/post-setup.sh b/playbooks/roles/charts/gitlab/files/post-setup.sh new file mode 100755 index 0000000..c994271 --- /dev/null +++ b/playbooks/roles/charts/gitlab/files/post-setup.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +kubectl delete hpa --all -A + +# 获取所有部署 +DEPLOYMENTS=$(kubectl get deploy -n gitlab -o jsonpath='{.items[*].metadata.name}') + +# 遍历部署并设置副本数为1 +for DEPLOY in $DEPLOYMENTS +do + echo "Setting replicas=1 for deployment $DEPLOY" + kubectl scale deploy/$DEPLOY -n gitlab --replicas=1 +done + +# 遍历部署并获取 CPU 和内存配置 +for DEPLOY in $DEPLOYMENTS +do + echo "Deployment: $DEPLOY" + echo "====================" + kubectl get deploy $DEPLOY -n gitlab -o=jsonpath='{range .spec.template.spec.containers[*]}{.name}:{"\n"}{"\t"}cpu: {.resources.requests.cpu}{"\n"}{"\t"}mem: {.resources.requests.memory}{"\n"}{end}' + echo "====================" +done + +# 遍历部署并设置 CPU 和内存请求 +#for DEPLOY in $DEPLOYMENTS +#do +# echo "Setting cpu=0.1 and mem=100m for deployment $DEPLOY" +# kubectl patch deployment $DEPLOY -n gitlab -p '{"spec": {"template": {"spec": {"containers": [{"name": "'$DEPLOY'", "resources": {"requests": {"cpu": "0.1", "memory": "100m"}}}]}}}}' +# echo "====================" +#done diff --git a/playbooks/roles/charts/gitlab/files/pre-setup.sh b/playbooks/roles/charts/gitlab/files/pre-setup.sh new file mode 100755 index 0000000..34a5c71 --- /dev/null +++ b/playbooks/roles/charts/gitlab/files/pre-setup.sh @@ -0,0 +1,9 @@ +#!/bin/bash +set +x + +export namespace=$1 +export POSTGRES_PASSWORD=$(kubectl get secret --namespace $namespace postgresql -o jsonpath="{.data.postgres-password}" | base64 -d) + +kubectl run postgresql-client --rm --tty -i --restart='Never' --namespace $namespace --image docker.io/bitnami/postgresql:15.2.0-debian-11-r11 --env="PGPASSWORD=$POSTGRES_PASSWORD" --command -- psql --host postgresql -U postgres -d postgres -p 5432 -w -c "CREATE DATABASE gitlabhq_production OWNER postgres;" || echo true + +kubectl run postgresql-client --rm --tty -i --restart='Never' --namespace $namespace --image docker.io/bitnami/postgresql:15.2.0-debian-11-r11 --env="PGPASSWORD=$POSTGRES_PASSWORD" --command -- psql --host postgresql -U postgres -d gitlabhq_production -p 5432 -w -c "CREATE EXTENSION IF NOT EXISTS plpgsql; CREATE EXTENSION IF NOT EXISTS pg_trgm; CREATE EXTENSION IF NOT EXISTS btree_gist;" || echo true diff --git a/playbooks/roles/charts/gitlab/files/setup-with-oidc.sh b/playbooks/roles/charts/gitlab/files/setup-with-oidc.sh new file mode 100644 index 0000000..b994887 --- /dev/null +++ b/playbooks/roles/charts/gitlab/files/setup-with-oidc.sh @@ -0,0 +1,106 @@ +#!/bin/bash + +domain=$1 +namespace=$2 +object_bucket=$3 +gitlab_secret=$4 +gitlab_stmp_secret=$5 +smtp_port=$7 +smtp_domain=$8 +smtp_address=$9 +smtp_username=$10 +smtp_emailfrom=$11 +smtp_display_name=$12 +oidc_issuer_url=$13 +oidc_client_id=$14 +oidc_client_token=$15 + +cat > gitlab-values.yaml < gitlab-values.yaml < gitlab-values.yaml < values.yaml << EOF +global: + imageRegistry: "$registry" +exposureType: ingress +ingress: + core: + ingressClassName: "nginx" + hostname: images.${domain} + extraTls: + - hosts: + - images.${domain} + secretName: "$secret_name" +externalURL: https://images.${domain} + +postgresql: + enabled: false +redis: + enabled: false +notary: + enabled: false +trivy: + enabled: false + +externalDatabase: + host: postgresql.database.svc.cluster.local + user: postgres + port: 5432 + password: "$pg_db_password" + sslmode: disable + coreDatabase: harbor_core + clairDatabase: harbor_clair + clairUsername: "postgres" + clairPassword: "$pg_db_password" + notaryServerDatabase: harbor_notary_server + notaryServerUsername: "postgres" + notaryServerPassword: "$pg_db_password" + notarySignerDatabase: harbor_notary_signer + notarySignerUsername: "postgres" + notarySignerPassword: "$pg_db_password" +externalRedis: + host: redis-master.redis.svc.cluster.local + port: 6379 + password: "$redis_password" +persistence: + enabled: true + imageChartStorage: + type: $backend_type + oss: + accesskeyid: $ak + accesskeysecret: $sk + region: "oss-cn-wulanchabu" + bucket: "harbor-oss" + endpoint: "oss-cn-wulanchabu.aliyuncs.com" + s3: + region: ap-east-1 + bucket: artifact-s3 + accesskey: $ak + secretkey: $sk +EOF + +export KUBECONFIG=/etc/rancher/k3s/k3s.yaml +helm repo add bitnami https://charts.bitnami.com/bitnami +helm repo update bitnami +kubectl create ns $namespace || true +helm upgrade --install artifact bitnami/harbor --version=16.7.0 -f values.yaml -n $namespace diff --git a/playbooks/roles/charts/harbor/files/setup-office-harbor.sh b/playbooks/roles/charts/harbor/files/setup-office-harbor.sh new file mode 100644 index 0000000..7cf3ec3 --- /dev/null +++ b/playbooks/roles/charts/harbor/files/setup-office-harbor.sh @@ -0,0 +1,91 @@ +#!/bin/bash + +ak=$1 +sk=$2 +domain=$3 +namespace=$4 +secret_name=$5 +redis_password=$6 +pg_db_password=$7 +storage_type=$8 + +cat > harbor-arm-config.yaml << EOF +portal: + image: + repository: ghcr.io/octohelm/harbor/harbor-portal + tag: v2.7.0@sha256:b3f4e0e990500362b554338579497ad89af5473e024564731563704ceab9305b +core: + image: + repository: ghcr.io/octohelm/harbor/harbor-core + tag: v2.7.0@sha256:dd7f3898f32caf8e03cee046596f03034f4297231458d4de39775dd58709b55a +jobservice: + image: + repository: ghcr.io/octohelm/harbor/harbor-jobservice + tag: v2.7.0@sha256:7abd6694f546172ffec4a87e389e8ba425fa6ee82479782693c120a89a291435 +registry: + registry: + image: + repository: ghcr.io/octohelm/harbor/registry-photon + tag: v2.7.0@sha256:d5f23b2bc4271b2eb1ec002eb0c0c51e708015944316e5bd17c61de73ea54415 + controller: + image: + repository: ghcr.io/svc-design/harbor-multi-arch-images/harbor-registryctl + tag: v2.7.0@sha256:ba2412c1a629ca1c2ca4584ba51eb05e964c7eef7b1f9f6ddb39d67512debaf5 +chartmuseum: + enabled: true + image: + repository: ghcr.io/octohelm/harbor/chartmuseum-photon + tag: v2.7.0@sha256:0815066d46474b9403b2d2e5f6f9e2ae44d067d8d2f8523b95ea3d3f20f3d058 +trivy: + enabled: false +notary: + enabled: false +expose: + type: ingress + tls: + enabled: true + certSource: secret + secret: + secretName: $secret_name + notarySecretName: $secret_name + ingress: + hosts: + core: harbor.${domain} + notary: artifact-notary.${domain} + className: "nginx" +externalURL: https://artifact.${domain} +database: + type: external + external: + host: "postgresql.database.svc.cluster.local" + port: "5432" + username: "postgres" + password: "$pg_db_password" + coreDatabase: "registry" + notaryServerDatabase: "notary_server" + notarySignerDatabase: "notary_signer" +redis: + type: external + external: + addr: "redis-master.redis.svc.cluster.local:6379" + password: "$redis_password" +persistence: + imageChartStorage: + type: $storage_type + oss: + accesskeyid: $ak + accesskeysecret: $sk + region: "oss-cn-wulanchabu" + bucket: "harbor-s3" + endpoint: "oss-cn-wulanchabu.aliyuncs.com" + s3: + region: ap-east-1 + bucket: artifact-s3 + accesskey: $ak + secretkey: $sk +EOF + +export KUBECONFIG=/etc/rancher/k3s/k3s.yaml +helm repo add harbor https://helm.goharbor.io +helm repo update +helm upgrade --install artifact harbor/harbor -f harbor-arm-config.yaml --version 1.11.1 -n $namespace diff --git a/playbooks/roles/charts/harbor/meta/main.yml b/playbooks/roles/charts/harbor/meta/main.yml new file mode 100644 index 0000000..2d3db3f --- /dev/null +++ b/playbooks/roles/charts/harbor/meta/main.yml @@ -0,0 +1,4 @@ +dependencies: + - role: redis + - role: postgresql + - role: secret-manger diff --git a/playbooks/roles/charts/harbor/tasks/main.yml b/playbooks/roles/charts/harbor/tasks/main.yml new file mode 100755 index 0000000..23916e8 --- /dev/null +++ b/playbooks/roles/charts/harbor/tasks/main.yml @@ -0,0 +1,38 @@ +- name: get redis password + shell: 'kubectl get secret --namespace {{ cache_namespace }} redis -o jsonpath="{.data.redis-password}" | base64 -d' + register: redis_command_raw + when: inventory_hostname in groups[group][0] + +- name: set fact join command for redis + set_fact: + redis_password : "{{ redis_command_raw.stdout }}" + +- name: get db password + shell: 'kubectl get secret --namespace {{ db_namespace }} postgresql -o jsonpath="{.data.postgres-password}" | base64 -d' + register: db_command_raw + when: inventory_hostname in groups[group][0] + +- name: set fact join command for pg_db + set_fact: + pg_db_password : "{{ db_command_raw.stdout }}" + when: inventory_hostname in groups[group][0] + +#- name: Show Debug Info +# debug: var=command_raw verbosity=0 + +- name: Pre Setup harbor DB + script: files/pre-setup.sh {{ namespace }} + when: inventory_hostname in groups[group] + +- name: Setup harbor Server + script: files/setup-bitnami-harbor.sh {{ oss_ak }} {{ oss_sk }} {{ domain }} {{ namespace }} {{ item.secret_name }} {{ hostvars[groups[group][0]].redis_password }} {{ hostvars[groups[group][0]].pg_db_password }} {{ backend_type }} {{ registry }} + loop: "{{ tls }}" + when: inventory_hostname in groups[group] + +#- name: Sync harbor-oidc-config.json +# template: src=templates/{{ item }} dest=/tmp/{{ item }} owner=root group=root mode=0644 force=yes unsafe_writes=yes +# with_items: +# - harbor-oidc-config.json + +#- name: Setup harbor oidc config +# script: files/post-setup.sh {{ admin_password }} diff --git a/playbooks/roles/charts/harbor/templates/harbor-oidc-config.json b/playbooks/roles/charts/harbor/templates/harbor-oidc-config.json new file mode 100644 index 0000000..a42d602 --- /dev/null +++ b/playbooks/roles/charts/harbor/templates/harbor-oidc-config.json @@ -0,0 +1,11 @@ +{ + "auth_mode": "oidc_auth", + "oidc_name": "Keycloak-sso", + "oidc_endpoint": "https://keycloak.onwalk.net/realms/cloud-sso", + "oidc_client_id": "harbor-oidc", + "oidc_client_secret": '{{ harbor_oidc_client_token }}', + "oidc_scope": "openid,profile,email", + "oidc_groups_claim": "groups", + "oidc_auto_onboard": true, + "oidc_user_claim": "preferred_username" +} diff --git a/playbooks/roles/charts/harbor/vars/main.yml b/playbooks/roles/charts/harbor/vars/main.yml new file mode 100644 index 0000000..f079258 --- /dev/null +++ b/playbooks/roles/charts/harbor/vars/main.yml @@ -0,0 +1,9 @@ +group: master +namespace: artifact +db_namespace: database +cache_namespace: redis +update_secret: true +tls: + - secret_name: harbor-tls + keyfile: /etc/ssl/svc.plus.key + certfile: /etc/ssl/svc.plus.pem diff --git a/playbooks/roles/charts/jenkins/files/pre-setup.sh b/playbooks/roles/charts/jenkins/files/pre-setup.sh new file mode 100644 index 0000000..1da41ac --- /dev/null +++ b/playbooks/roles/charts/jenkins/files/pre-setup.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export namespace=$1 + +export MYSQL_ROOT_PASSWORD=$(kubectl get secret --namespace $namespace mysql -o jsonpath="{.data.mysql-root-password}" | base64 -d) + +kubectl run mysql-client --rm --tty -i --restart='Never' --image docker.io/bitnami/mysql:8.0.32-debian-11-r14 --namespace $namespace --env MYSQL_ROOT_PASSWORD=$MYSQL_ROOT_PASSWORD --command -- bash -c "mysql -h mysql.database.svc.cluster.local -uroot -p$MYSQL_ROOT_PASSWORD -e 'create database IF NOT EXISTS jenkins;'" diff --git a/playbooks/roles/charts/jenkins/files/setup.sh b/playbooks/roles/charts/jenkins/files/setup.sh new file mode 100644 index 0000000..9d8310c --- /dev/null +++ b/playbooks/roles/charts/jenkins/files/setup.sh @@ -0,0 +1,86 @@ +#!/bin/bash +set -x +export domain=$1 +export secret=$2 +export namespace=$3 +export mysql_db_password=$4 + +cat > values.yaml << EOF + +controller: + agentListenerServiceType: "NodePort" + agentListenerNodePort: 50000 + admin: + username: 'admin' + password: "jenkins" + jenkinsUrlProtocol: "https" + jenkinsHome: "/var/jenkins_home" + jenkinsUrl: https://jenkins.$domain + ingress: + enabled: true + annotations: + kubernetes.io/tls-acme: "false" + ingressClassName: nginx + hostName: jenkins.$domain + path: '/' + tls: + - secretName: $secret + hosts: + - jenkins.$domain + installLatestPlugins: true + installPlugins: + - git:5.2.1 + - github:1.38.0 + - github-pullrequest:0.7.0 + - locale:314.v22ce953dfe9e + - database-mysql:1.4 + - database:191.vd5981b_97a_5fa_ + - credentials:1337.v60b_d7b_c7b_c9f + - credentials-binding:642.v737c34dea_6c2 # 更新版本以满足依赖关系 + - configuration-as-code:1775.v810dc950b_514 # 更新版本以满足依赖关系 + - gitlab-plugin:1.7.16 + - kubernetes:4029.v5712230ccb_f8 + - docker-plugin:1.6 + - docker-workflow:572.v950f58993843 + - docker-commons:439.va_3cb_0a_6a_fb_29 + - pipeline-stage-view:2.33 + - workflow-job:1385.vb_58b_86ea_fff1 + - workflow-cps:3883.vb_3ff2a_e3eea_f + - workflow-aggregator:596.v8c21c963d92d + JCasC: + enabled: true + defaultConfig: true + configScripts: + database: | + unclassified: + globalDatabaseConfiguration: + database: + mysql: + hostname: mysql.database.svc.cluster.local + username: "root" + database: "jenkins" + password: $mysql_db_password + properties: "?useSSL=false" + validationQuery: "SELECT 1" +agent: + enabled: true + replicas: 3 + numExecutors: 1 + jenkinsUrl: https://jenkins.$domain + image: + repository: "jenkins/inbound-agent" + tag: "latest" + customJenkinsLabels: [] + +persistence: + enabled: true + storageClass: "local-path" + size: "10Gi" +networkPolicy: + enabled: false +additionalConfig: {} +EOF + +helm repo add jenkins https://charts.jenkins.io +helm repo update +helm upgrade --install jenkins jenkins/jenkins -n $namespace --create-namespace -f values.yaml diff --git a/playbooks/roles/charts/jenkins/howto.md b/playbooks/roles/charts/jenkins/howto.md new file mode 100644 index 0000000..e978bcb --- /dev/null +++ b/playbooks/roles/charts/jenkins/howto.md @@ -0,0 +1,124 @@ +# Jenkins Mater 部署 + +# Jenkins Node IaC Runner 设置 +1. 安装git terraform + +## GitLab to trigger Jenkins + +1. Gitlab https://gitlab.xxx.com/-/profile/personal_access_tokens + +2. GitLab和Jenkins的集成可以让你在GitLab中的代码更新后自动触发Jenkins的构建任务。以下是配置GitLab插件和Jenkins以实现GitLab触发Jenkins的步骤: +3. 在Jenkins中安装GitLab插件 +首先,你需要在Jenkins中安装GitLab插件。登录到Jenkins的管理界面,然后转到“Manage Jenkins” > “Manage Plugins” > “Available”,在搜索框中输入“GitLab”,找到并安装“GitLab Plugin”。 +4. 在Jenkins中配置GitLab连接 +安装完插件后,你需要配置GitLab的连接。转到“Manage Jenkins” > “Configure System”,滚动到“GitLab”部分,点击“Add GitLab Server” > “Server”,输入你的GitLab服务器URL,并生成并输入一个与你的GitLab账户相关联的API Token。 +5. 在Jenkins中创建一个新的任务 +创建一个新的任务,并在源代码管理部分选择“Git”,输入你的GitLab项目的URL。在构建触发器部分,选择“Build when a change is pushed to GitLab”。 +记录:GitLab webhook URL: https://jenkins.xxx.xxx/project/alicloud-oss-pipeline +6. 在GitLab中配置Webhook +在你的GitLab项目中,转到“Settings” > “Integrations” -> 启用"Jenkins" +- 在URL中输入步骤5记录的 Webhook URL https://jenkins.xxx.xxx/project/alicloud-oss-pipeline +- 选择你想要触发Jenkins任务的事件(例如,当代码被推送时) +- Project name: 输入项目名称 +- Username: Jenkins 用户名 +- Password: Jenkins 认证密码 +- 保存更改, 测试设置,返回状态200为配置正确 + +以上就是配置GitLab插件和Jenkins以实现GitLab触发Jenkins的步骤。在完成这些步骤后,每当你的GitLab项目有更新时,都会自动触发对应的Jenkins构建任务。 + +## 要将GitHub代码仓库与Jenkins关联起来,您需要完成以下步骤: + +1 要在 GitHub 中启用 webhook 功能以触发 Jenkins 构建,请按照以下步骤操作: +2 进入 GitHub 仓库设置:在要设置 webhook 的 GitHub 仓库页面上,点击右上角的“Settings”。 +3 选择 Webhooks 选项:在仓库设置页面的左侧菜单中,选择“Webhooks”。 +4 添加 Webhook:在 Webhooks 页面的右上角,点击“Add webhook”。 + +配置 Webhook: + +1. Payload URL:输入 Jenkins 服务器的 webhook URL。格式应为 http://your-jenkins-server/github-webhook/。确保替换 your-jenkins-server 为您 Jenkins 服务器的实际地址。 +2. Content type:选择 application/json。 +3. Secret(可选):如果需要额外的安全性,可以输入一个秘密令牌。 +4. SSL verification:选择是否验证 SSL 证书。 +5. Which events would you like to trigger this webhook?:选择触发 webhook 的事件。通常选择 Just the push event(只有推送事件)或 Let me select individual events(让我选择单独的事件)并选择适当的事件(例如,push、pull request 等)。 +添加 Webhook:点击页面底部的“Add webhook”按钮以保存配置。 + +完成以上步骤后,您的 GitHub 仓库就配置好了一个 webhook,可以触发 Jenkins 构建。记得在 Jenkins 中设置相应的任务来响应这些 webhook。 + + +安装Jenkins插件: + +确保您的Jenkins实例已经安装了“GitHub”和“GitHub Integration”插件。您可以在Jenkins管理界面的“插件管理”部分进行安装。 +配置GitHub Webhook: + +在GitHub仓库的设置中,找到“Webhooks”部分并添加一个新的Webhook。 +将“Payload URL”设置为您的Jenkins服务器的URL,通常是这样的格式:http:///github-webhook/。 +选择触发Webhook的事件,通常是“Just the push event”或者“Send me everything”。 +确保“Content type”设置为“application/json”。 +点击“Add webhook”保存设置。 +配置Jenkins Job: + +在Jenkins中创建一个新的构建任务或者配置现有的任务。 +在“源码管理”部分,选择“Git”并填写您的GitHub仓库的URL。 +在“构建触发器”部分,选择“GitHub hook trigger for GITScm polling”选项。这样,每当GitHub仓库有新的推送事件时,Jenkins就会自动触发构建。 +测试配置: + +推送一些改动到您的GitHub仓库,检查是否触发了Jenkins构建。 +在Jenkins的构建历史中查看构建是否成功执行。 +通过完成以上步骤,您的GitHub代码仓库就与Jenkins关联起来了,可以实现自动触发构建的功能。 + +要在 Jenkins 中设置 GitHub 服务,您需要进行以下步骤: + +安装 GitHub 插件:首先确保您的 Jenkins 实例已安装 GitHub 插件。如果尚未安装,请转到 Jenkins 的“插件管理”页面,在“可选插件”选项卡中搜索并安装 GitHub 插件。 + +配置 GitHub 服务器:在 Jenkins 管理界面中,转到“系统管理” > “系统设置”。 + +在系统设置页面中,找到并点击“GitHub”部分。 +点击“Add GitHub Server”添加一个新的 GitHub 服务器配置。 +在配置页面中,输入一个描述性的名称,例如“GitHub”。 +在 GitHub API URL 中输入 GitHub 的 API 地址。通常为 https://api.github.com。 +如果您的 GitHub 仓库需要身份验证,请在“凭据”部分选择一个已配置的凭据。如果尚未配置凭据,请点击“Add”添加一个新的凭据,选择类型为“Secret text”或“Username with password”,然后输入您的 GitHub 用户名和密码或访问令牌。 +完成配置后,点击“保存”保存 GitHub 服务器配置。 +验证配置:您可以在配置页面的底部点击“Test connection”来验证您的 GitHub 服务器配置是否正常工作。 + +保存设置:确保在完成配置后点击“保存”保存更改。 + +现在,您已成功配置了 Jenkins 的 GitHub 服务。您可以在 Jenkins 任务中使用这个配置来与 GitHub 仓库进行集成,例如触发构建、拉取代码等操作。 + + +对于 Jenkins 中的 GitHub API URL (https://api.github.com) 的凭据设置,您可以使用 GitHub Personal Access Token。这个 Token 可以通过以下步骤生成: + +在 GitHub 上登录您的账号。 +点击页面右上角的头像,选择“Settings”。 +在左侧边栏中,点击“Developer settings”。 +在左侧边栏中,点击“Personal access tokens”。 +点击“Generate new token”。 +输入一个描述性的名称,选择需要的权限(至少需要 repo 权限来访问仓库),然后点击“Generate token”。 +复制生成的 Token,并保存到一个安全的地方。请注意,这个 Token 只会显示一次,如果您丢失了,请重新生成一个新的 Token。 +在 Jenkins 中使用这个 Token 作为 GitHub API URL (https://api.github.com) 的凭据时,您可以将 Token 添加为 Jenkins 的凭据: + +进入 Jenkins 管理界面,转到“凭据” > “系统”。 +在“系统”页面中,点击“Global credentials (unrestricted)”。 +在凭据页面中,点击“Add credentials”。 +在“Kind”下拉菜单中选择“Secret text”。 +在“Secret”框中粘贴您在 GitHub 上生成的 Personal Access Token。 +输入一个描述性的名称,并点击“OK”保存凭据。 +现在,您可以在 Jenkins 的配置中使用这个凭据来访问 GitHub API (https://api.github.com)。 + +确保 Docker 已安装:在 Jenkins 代理节点上确认 Docker 已正确安装并配置。您可以通过在终端中执行 docker --version 命令来检查 Docker 是否可用。 + +检查 Docker 环境:如果 Docker 已安装,请确保 Docker 服务正在运行。您可以使用 sudo systemctl status docker 命令检查 Docker 服务的状态。 + +确认 Jenkins 全局工具配置:在 Jenkins 管理界面中,转到“系统管理”->“全局工具配置”,确保 Docker 工具已正确配置。如果未配置,您可以添加一个 Docker 工具,并指定正确的安装路径。 + +重启 Jenkins 服务:在进行了上述更改后,尝试重启 Jenkins 服务,以确保新的配置生效。 + +尝试在终端中执行 Docker 命令:在 Jenkins 代理节点上打开终端,尝试手动执行一些 Docker 命令(如 docker pull),看看是否能够正常执行 + +要设置 Jenkins Docker 流水线,你可以按照以下步骤进行操作: + +前提条件 +确保你的 Jenkins 实例已经安装了以下插件: + +Docker Pipeline +Docker Commons + diff --git a/playbooks/roles/charts/jenkins/meta/main.yml b/playbooks/roles/charts/jenkins/meta/main.yml new file mode 100644 index 0000000..7e3f81b --- /dev/null +++ b/playbooks/roles/charts/jenkins/meta/main.yml @@ -0,0 +1,3 @@ +dependencies: + - role: mysql + - role: secret-manger diff --git a/playbooks/roles/charts/jenkins/tasks/main.yml b/playbooks/roles/charts/jenkins/tasks/main.yml new file mode 100755 index 0000000..2bd1ab0 --- /dev/null +++ b/playbooks/roles/charts/jenkins/tasks/main.yml @@ -0,0 +1,18 @@ +- name: get mysql db password + shell: 'kubectl get secret --namespace database mysql -o jsonpath="{.data.mysql-root-password}" | base64 -d' + register: mysql_db_password_raw + when: inventory_hostname in groups[group][0] + +- name: set fact join command + set_fact: + mysql_db_password : "{{ mysql_db_password_raw.stdout }}" + when: inventory_hostname in groups[group][0] + +- name: DB Pre Setup for Jenkins Server + script: files/pre-setup.sh {{ db_namespace }} + when: inventory_hostname in groups[group] + +- name: Setup Jenkins Cluster + script: files/setup.sh {{ domain }} {{ item.secret_name }} {{ namespace }} {{ mysql_db_password }} + when: inventory_hostname in groups[group] and ( tls is defined) + loop: "{{ tls }}" diff --git a/playbooks/roles/charts/keycloak/files/pre-setup.sh b/playbooks/roles/charts/keycloak/files/pre-setup.sh new file mode 100644 index 0000000..ae0beaa --- /dev/null +++ b/playbooks/roles/charts/keycloak/files/pre-setup.sh @@ -0,0 +1,5 @@ +#!/bin/bash +export namespace=$1 +export POSTGRES_PASSWORD=$(kubectl get secret --namespace $namespace postgresql -o jsonpath="{.data.postgres-password}" | base64 -d) + +kubectl run postgresql-client --rm --tty -i --restart='Never' --namespace $namespace --image docker.io/bitnami/postgresql:15.2.0-debian-11-r11 --env="PGPASSWORD=$POSTGRES_PASSWORD" --command -- psql --host postgresql -U postgres -d postgres -p 5432 -w -c "CREATE DATABASE keycloak;" || echo true diff --git a/playbooks/roles/charts/keycloak/files/setup-keycloak.sh b/playbooks/roles/charts/keycloak/files/setup-keycloak.sh new file mode 100644 index 0000000..8ffaa7b --- /dev/null +++ b/playbooks/roles/charts/keycloak/files/setup-keycloak.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +export domain=$1 +export secret=$2 +export namespace=$3 +export keycloak_ui_password=$4 +export keycloak_db_password=$5 + +cat > keycloak-values.yaml << EOF +proxy: edge +tls: + enabled: false + existingSecret: "$secret" +auth: + adminPassword: "$keycloak_ui_password" +ingress: + enabled: false + ingressClassName: "nginx" + hostname: keycloak.${domain} + tls: true + extraTls: + - hosts: + - keycloak.${domain} + secretName: $secret +postgresql: + enabled: true +#externalDatabase: +# host: "postgresql.database.svc.cluster.local" +# port: 5432 +# user: postgres +# database: keycloak +# password: "$keycloak_db_password" +EOF + +helm repo add bitnami https://charts.bitnami.com/bitnami || echo true +helm repo update +kubectl create ns ${namespace} || echo true +kubectl create secret tls onwalk-tls --cert=/etc/ssl/onwalk.net.pem --key=/etc/ssl/onwalk.net.key -n ${namespace} || echo true +helm upgrade --install keycloak bitnami/keycloak -n $namespace -f keycloak-values.yaml diff --git a/playbooks/roles/charts/keycloak/meta/main.yml b/playbooks/roles/charts/keycloak/meta/main.yml new file mode 100644 index 0000000..1faf774 --- /dev/null +++ b/playbooks/roles/charts/keycloak/meta/main.yml @@ -0,0 +1,3 @@ +dependencies: + - role: postgresql + - role: secret-manger diff --git a/playbooks/roles/charts/keycloak/readme.md b/playbooks/roles/charts/keycloak/readme.md new file mode 100644 index 0000000..c2d80d2 --- /dev/null +++ b/playbooks/roles/charts/keycloak/readme.md @@ -0,0 +1,8 @@ +https://github.com/bitnami/charts/issues/6940 + +Describe the bug +Mixed Content: The page at 'https://keycloak.dev.trademaster.com.br/auth/admin/master/console/' was loaded over HTTPS, but requested an insecure script 'http://keycloak.dev.trademaster.com.br/auth/js/keycloak.js?version=7a4is'. This request has been blocked; the content must be served over HTTPS + +extraEnvVars: +name: KEYCLOAK_PROXY +value: reencrypt diff --git a/playbooks/roles/charts/keycloak/tasks/main.yml b/playbooks/roles/charts/keycloak/tasks/main.yml new file mode 100755 index 0000000..dab240e --- /dev/null +++ b/playbooks/roles/charts/keycloak/tasks/main.yml @@ -0,0 +1,29 @@ +- name: Init Keycloak DB + script: files/pre-setup.sh {{ db_namespace }} + +- name: get db password + shell: 'kubectl get secret --namespace database postgresql -o jsonpath="{.data.postgres-password}" | base64 -d' + register: command_raw + when: inventory_hostname in groups[group][0] + +- name: set fact join command + set_fact: + keycloak_db_password : "{{ command_raw.stdout }}" + when: inventory_hostname in groups[group][0] + +#- name: Show Debug Info +# debug: var=command_raw verbosity=0 + +- name: Setup Keycloak Server + script: files/setup-keycloak.sh {{ domain }} {{ item.secret_name }} {{ namespace }} {{ admin_password }} {{ hostvars[groups[group][0]].keycloak_db_password }} + loop: "{{ tls }}" + when: inventory_hostname in groups[group] + +#- name: Sync aws-gloabl-oidc-broker deploy yaml +# template: src=templates/{{ item }} dest=/tmp/{{ item }} owner=root group=root mode=0644 force=yes unsafe_writes=yes +# with_items: +# - aws-gloabl-oidc-broker.yaml + +#- name: Setup aws-gloabl-oidc-broker +# shell: "kubectl apply -f /tmp/aws-gloabl-oidc-broker.yaml" +# when: inventory_hostname in groups[group] diff --git a/playbooks/roles/charts/keycloak/templates/aws-gloabl-oidc-broker.yaml b/playbooks/roles/charts/keycloak/templates/aws-gloabl-oidc-broker.yaml new file mode 100644 index 0000000..0aa9fd2 --- /dev/null +++ b/playbooks/roles/charts/keycloak/templates/aws-gloabl-oidc-broker.yaml @@ -0,0 +1,74 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: aws-global-oidc-broker + namespace: itsm + labels: + io.metrico.service: aws-global-oidc-broker +spec: + replicas: 1 + selector: + matchLabels: + io.metrico.service: aws-global-oidc-broker + strategy: {} + template: + metadata: + creationTimestamp: null + labels: + io.metrico.service: aws-global-oidc-broker + spec: + containers: + - name: aws-global-oidc-broker + image: artifact.onwalk.net/public/aws-global-oidc-broker:1.2.0 + ports: + - containerPort: 5000 + resources: {} + env: + - name: KEYCLOAK_CLIENT_ID + value: "aws-oidc" + - name: KEYCLOAK_WELLKNOWN + value: "https://keycloak.apollo-ev.com/realms/cloud-sso/.well-known/openid-configuration" + - name: KEYCLOAK_CLIENT_SECRET + value: "WYyZJGUOOiwooIp700PtykmjYkrsPJPi" + - name: TITLE + value: "aws-oidc" + restartPolicy: Always +--- +apiVersion: v1 +kind: Service +metadata: + creationTimestamp: null + name: aws-global-oidc-broker + namespace: itsm +spec: + ports: + - name: "5000" + port: 5000 + nodePort: 5000 + targetPort: 5000 + selector: + io.metrico.service: aws-global-oidc-broker + type: NodePort +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: aws-global-oidc-broker + namespace: itsm +spec: + ingressClassName: nginx + rules: + - host: loki.apollo-ev.com + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: aws-global-oidc-broker + port: + number: 5000 + tls: + - hosts: + - loki.apollo-ev.com + secretName: keycloak-tls diff --git a/playbooks/roles/charts/keycloak/vars/main.yml b/playbooks/roles/charts/keycloak/vars/main.yml new file mode 100644 index 0000000..2be57f6 --- /dev/null +++ b/playbooks/roles/charts/keycloak/vars/main.yml @@ -0,0 +1,16 @@ +group: master +domain: onwalk.net +namespace: keycloak +update_secret: true +db_namespace: database +admin_password: !vault | + $ANSIBLE_VAULT;1.1;AES256 + 37363730333461643562316539303664363262646238366336353434643461323263323437646362 + 3537373363343563316334333861623663383832363034350a326362646636643665383632383531 + 63646132343636613739383534323838613639656262363039323030353761636164633165303066 + 6230663366373831610a363032643039373330663239373733323931396531333733366364313265 + 3130 +tls: + - secret_name: keycloak-tls + keyfile: /etc/ssl/onwalk.net.key + certfile: /etc/ssl/onwalk.net.pem diff --git a/playbooks/roles/charts/mysql/files/setup-mysql.sh b/playbooks/roles/charts/mysql/files/setup-mysql.sh new file mode 100644 index 0000000..563a99e --- /dev/null +++ b/playbooks/roles/charts/mysql/files/setup-mysql.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +helm repo add bitnami https://charts.bitnami.com/bitnami || echo true +helm repo up +kubectl create ns database || echo true +helm upgrade --install mysql bitnami/mysql -n database diff --git a/playbooks/roles/charts/mysql/tasks/main.yml b/playbooks/roles/charts/mysql/tasks/main.yml new file mode 100755 index 0000000..8b630bd --- /dev/null +++ b/playbooks/roles/charts/mysql/tasks/main.yml @@ -0,0 +1,3 @@ +- name: Setup MySQL Server + script: files/setup-mysql.sh + when: inventory_hostname in groups[group] diff --git a/playbooks/roles/charts/node-exporter/tasks/main.yml b/playbooks/roles/charts/node-exporter/tasks/main.yml new file mode 100755 index 0000000..13898d7 --- /dev/null +++ b/playbooks/roles/charts/node-exporter/tasks/main.yml @@ -0,0 +1,23 @@ +- name: create user prometheus + shell: "useradd prometheus -s /sbin/nologin | echo true" + when: inventory_hostname in groups[group] + +- name: clean old file + shell: "rm -f /usr/bin/node_exporter" + when: inventory_hostname in groups[group] + +- name: download node_exporter binary + shell: "curl -Lo /usr/bin/node_exporter https://mirrors.onwalk.net/tools/linux-amd64/node_exporter && chmod 755 /usr/bin/node_exporter" + when: inventory_hostname in groups[group] + +- name: create node-exporter.service + template: src=templates/node-exporter.service dest=/etc/systemd/system/node-exporter.service owner=root group=root mode=0644 + when: inventory_hostname in groups[group] + +- name: reload node-exporter service + shell: 'systemctl daemon-reload' + when: inventory_hostname in groups[group] + +- name: init node-exporter service + shell: 'systemctl restart node-exporter.service' + when: inventory_hostname in groups[group] diff --git a/playbooks/roles/charts/node-exporter/templates/node-exporter.service b/playbooks/roles/charts/node-exporter/templates/node-exporter.service new file mode 100755 index 0000000..f05a15b --- /dev/null +++ b/playbooks/roles/charts/node-exporter/templates/node-exporter.service @@ -0,0 +1,14 @@ +[Unit] +Description=Prometheus Node Exporter +After=network.target + +[Service] +Type=simple +User=prometheus +ExecStart=/usr/bin/node_exporter +Restart=on-failure +RestartSec=30 +StartLimitInterval=0 + +[Install] +WantedBy=multi-user.target diff --git a/playbooks/roles/charts/observability-agent/files/setup.sh b/playbooks/roles/charts/observability-agent/files/setup.sh new file mode 100644 index 0000000..1417a83 --- /dev/null +++ b/playbooks/roles/charts/observability-agent/files/setup.sh @@ -0,0 +1,46 @@ +#!/bin/bash +set -x +export domain=$1 +export deepflowserverip=$2 +export deepflowk8sclusterid=$3 + +cat > values.yaml << EOF +deepflow-agent: + enabled: true + deepflowServerNodeIPS: + - $deepflowserverip + deepflowK8sClusterID: $deepflowk8sclusterid +prometheus: + enabled: true + server: + name: agent + retention: "30m" + extraFlags: + - web.enable-lifecycle + - enable-feature=expand-external-labels + remoteWrite: + - name: remote_prometheus + url: 'https://prometheus.${domain}/api/v1/write' + persistentVolume: + enabled: false + alertmanager: + enabled: false + rometheus-pushgateway: + enabled: false + kube-state-metrics: + enabled: false + prometheus-node-exporter: + enabled: false +promtail: + enabled: true + config: + clients: + - url: https://data-gateway.${domain}/loki/api/v1/push +EOF + +node_name=`kubectl get nodes | awk 'NR>1 {print $1}'` +kubectl create namespace monitoring || echo true +kubectl label nodes $node prometheus=true --overwrite || echo true +helm repo add stable https://charts.onwalk.net/ || echo true +helm repo update +helm upgrade --install observabilityagent stable/observabilityagent -n monitoring -f values.yaml diff --git a/playbooks/roles/charts/observability-agent/meta/main.yml b/playbooks/roles/charts/observability-agent/meta/main.yml new file mode 100644 index 0000000..9711b33 --- /dev/null +++ b/playbooks/roles/charts/observability-agent/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: common diff --git a/playbooks/roles/charts/observability-agent/tasks/main.yml b/playbooks/roles/charts/observability-agent/tasks/main.yml new file mode 100755 index 0000000..ed4f91f --- /dev/null +++ b/playbooks/roles/charts/observability-agent/tasks/main.yml @@ -0,0 +1,3 @@ +- name: Setup Observability Agent + script: files/setup.sh {{ domain }} {{ deepflowserverip }} {{ deepflowk8sclusterid }} + when: inventory_hostname in groups[group] diff --git a/playbooks/roles/charts/observability-server/files/mysql-db-init-setup.sh b/playbooks/roles/charts/observability-server/files/mysql-db-init-setup.sh new file mode 100644 index 0000000..a12840b --- /dev/null +++ b/playbooks/roles/charts/observability-server/files/mysql-db-init-setup.sh @@ -0,0 +1,6 @@ +#!/bin/bash +export namespace=$1 + +export MYSQL_ROOT_PASSWORD=$(kubectl get secret --namespace $namespace mysql -o jsonpath="{.data.mysql-root-password}" | base64 -d) + +kubectl run mysql-client --rm --tty -i --restart='Never' --image docker.io/bitnami/mysql:8.0.32-debian-11-r14 --namespace $namespace --env MYSQL_ROOT_PASSWORD=$MYSQL_ROOT_PASSWORD --command -- bash -c "mysql -h mysql.database.svc.cluster.local -uroot -p$MYSQL_ROOT_PASSWORD -e 'create database IF NOT EXISTS grafana;'" diff --git a/playbooks/roles/charts/observability-server/files/setup-observable-server.sh b/playbooks/roles/charts/observability-server/files/setup-observable-server.sh new file mode 100644 index 0000000..9daa528 --- /dev/null +++ b/playbooks/roles/charts/observability-server/files/setup-observable-server.sh @@ -0,0 +1,132 @@ +#!/bin/bash +set -x +export domain=$1 +export secret=$2 +export namespace=$3 +export mysql_db_password=$4 +export ck_node_ip1=$5 +export ck_node_ip2=$6 +export ck_node_ip3=$7 + +node_name=`kubectl get nodes | awk '{print $1}' | tail -n 1` +kubectl label nodes $node_name app=prometheus --overwrite + +cat > values.yaml << EOF +influxdb: + enabled: true + ingress: + tls: true + enabled: true + secretName: ${secret} + hostname: influxdb.${domain} + className: nginx +deepflow: + enabled: true + clickhouse: + enabled: true + mysql: + enabled: false + grafana: + enabled: true + ingress: + enabled: true + ingressClassName: nginx + hosts: + - grafana.${domain} + tls: + - secretName: ${secret} + hosts: + - grafana.${domain} + global: + #externalClickHouse: + # enabled: true + # type: ep + # clusterName: default + # storagePolicy: default + # username: default + # password: '' + # hosts: + # - ip: $ck_node_ip1 + # port: 9000 + # - ip: $ck_node_ip2 + # port: 9000 + # - ip: $ck_node_ip3 + # port: 9000 + externalMySQL: + enabled: true + ip: mysql.database.svc.cluster.local + port: 3306 + username: root + password: $mysql_db_password +prometheus: + enabled: true + alertmanager: + enabled: false + prometheus-pushgateway: + enabled: false + kube-state-metrics: + enabled: true + server: + extraArgs: + enable-feature: remote-write-receiver + ingress: + enabled: true + ingressClassName: nginx + hosts: + - prometheus.${domain} + tls: + - secretName: ${secret} + hosts: + - prometheus.${domain} + alertmanagers: + - static_configs: + - targets: + - alertmanager.${domain} + serverFiles: + prometheus.yml: + rule_files: + - /etc/config/recording_rules.yml + - /etc/config/alerting_rules.yml +alertmanager: + configmapReload: + enabled: true + ingress: + enabled: true + className: "nginx" + hosts: + - host: alertmanager.$domain + paths: + - path: / + pathType: ImplementationSpecific + tls: + - secretName: ${secret} + hosts: + - alertmanager.$domain + config: + global: + resolve_timeout: 5m + smtp_smarthost: 'smtp.qq.com:465' + smtp_from: '11111111@qq.com' + smtp_auth_username: '11111111@qq.com' + smtp_auth_password: '123456' + smtp_require_tls: false + templates: + - '/etc/alertmanager/*.tmpl' + receivers: + - name: 'default-receiver' + email_configs: + - to: '{{ template "email.to" . }}' + html: '{{ template "email.to.html" . }}' + route: + group_wait: 10s + group_interval: 5m + receiver: default-receiver + repeat_interval: 1h +EOF + +helm repo add stable https://charts.onwalk.net/ || echo true +helm repo update +kubectl delete deploy observability-server -n ${namespace} || echo true +helm upgrade --install observability-server stable/observableserver -n ${namespace} -f values.yaml +sudo curl -o /usr/bin/deepflow-ctl https://deepflow-ce.oss-cn-beijing.aliyuncs.com/bin/ctl/stable/linux/$(arch | sed 's|x86_64|amd64|' | sed 's|aarch64|arm64|')/deepflow-ctl +sudo chmod a+x /usr/bin/deepflow-ctl diff --git a/playbooks/roles/charts/observability-server/meta/main.yml b/playbooks/roles/charts/observability-server/meta/main.yml new file mode 100644 index 0000000..7e3f81b --- /dev/null +++ b/playbooks/roles/charts/observability-server/meta/main.yml @@ -0,0 +1,3 @@ +dependencies: + - role: mysql + - role: secret-manger diff --git a/playbooks/roles/charts/observability-server/tasks/main.yml b/playbooks/roles/charts/observability-server/tasks/main.yml new file mode 100755 index 0000000..af8743b --- /dev/null +++ b/playbooks/roles/charts/observability-server/tasks/main.yml @@ -0,0 +1,39 @@ +- name: Post Setup MySQL Server + script: files/mysql-db-init-setup.sh {{ db_namespace }} + when: inventory_hostname in groups[group] + +- name: get mysql db password + shell: 'kubectl get secret --namespace database mysql -o jsonpath="{.data.mysql-root-password}" | base64 -d' + register: mysql_db_password_raw + when: inventory_hostname in groups[group][0] + +- name: set fact join command + set_fact: + mysql_db_password : "{{ mysql_db_password_raw.stdout }}" + when: inventory_hostname in groups[group][0] + +#- name: get clickhouse node ips +# shell: "kubectl get pods -n monitoring -o wide | grep clickhouse | awk '{print $6}'" +# register: ck_node_ips_raw +# when: inventory_hostname in groups[group][0] +# +#- name: set fact join command for ck_node_ip1 +# set_fact: +# ck_node_ip1 : "{{ ck_node_ips_raw.stdout_lines[0] }}" +# when: inventory_hostname in groups[group][0] +# +#- name: set fact join command for ck_node_ip2 +# set_fact: +# ck_node_ip2 : "{{ ck_node_ips_raw.stdout_lines[1] }}" +# when: inventory_hostname in groups[group][0] +# +#- name: set fact join command for ck_node_ip3 +# set_fact: +# ck_node_ip3 : "{{ ck_node_ips_raw.stdout_lines[2] }}" +# when: inventory_hostname in groups[group][0] + +- name: Setup Observability Server + script: files/setup-observable-server.sh {{ domain }} {{ item.secret_name }} {{ namespace }} {{ mysql_db_password }} + #script: files/setup-observable-server.sh {{ domain }} {{ item.secret_name }} {{ namespace }} {{ mysql_db_password }} {{ ck_node_ip1 }} {{ ck_node_ip2 }} {{ ck_node_ip3 }} + when: inventory_hostname in groups[group] and ( tls is defined) + loop: "{{ tls }}" diff --git a/playbooks/roles/charts/openldap/files/setup-openldap.sh b/playbooks/roles/charts/openldap/files/setup-openldap.sh new file mode 100644 index 0000000..803df34 --- /dev/null +++ b/playbooks/roles/charts/openldap/files/setup-openldap.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +export domain=$1 +export secret=$2 +export namespace=$3 +export password=$4 + +cat > openldap-vaules.yaml << EOF +global: + ldapDomain: $domain + adminPassword: $password + configPassword: $password +service: + type: NodePort + ldapPortNodePort: 389 + sslLdapPortNodePort: 636 +phpldapadmin: + enabled: true + ingress: + enabled: false + ingressClassName: nginx + hosts: + - openldap-admin.${domain} + tls: + - secretName: ${secret} + hosts: + - openldap-admin.${domain} +ltb-passwd: + enabled: true + ingress: + enabled: false + ingressClassName: nginx + hosts: + - openldap-ltb.${domain} + tls: + - secretName: ${secret} + hosts: + - openldap-ltb.${domain} +EOF + +helm repo add openldap https://jp-gouin.github.io/helm-openldap/ +helm repo up +kubectl create ns ${namespace} || echo true +helm upgrade --install openldap openldap/openldap-stack-ha -n ${namespace} --create-namespace -f openldap-vaules.yaml diff --git a/playbooks/roles/charts/openldap/meta/main.yml b/playbooks/roles/charts/openldap/meta/main.yml new file mode 100644 index 0000000..6fc3ce8 --- /dev/null +++ b/playbooks/roles/charts/openldap/meta/main.yml @@ -0,0 +1,3 @@ +dependencies: + - role: cert-manager + - role: secret-manger diff --git a/playbooks/roles/charts/openldap/tasks/main.yml b/playbooks/roles/charts/openldap/tasks/main.yml new file mode 100755 index 0000000..f1266ef --- /dev/null +++ b/playbooks/roles/charts/openldap/tasks/main.yml @@ -0,0 +1,13 @@ +- name: Setup OpenLdap Server + script: files/setup-openldap.sh {{ domain }} {{ item.secret_name }} {{ namespace }} {{ admin_password }} + loop: "{{ tls }}" + when: inventory_hostname in groups[group] + +- name: sync ldap ingress config + template: src=templates/{{ item }} dest=/tmp/{{ item }} owner=root group=root mode=0644 force=yes unsafe_writes=yes + with_items: + - ingress.yaml + +- name: Setup ldap ingress + shell: "cd /tmp/ && kubectl apply -f ingress.yaml" + when: inventory_hostname in groups[group] diff --git a/playbooks/roles/charts/openldap/templates/.gitignore b/playbooks/roles/charts/openldap/templates/.gitignore new file mode 100644 index 0000000..a194b20 --- /dev/null +++ b/playbooks/roles/charts/openldap/templates/.gitignore @@ -0,0 +1,2 @@ +/clickhouse-keeper-k8s.iml +/.idea/ diff --git a/playbooks/roles/charts/openldap/templates/ingress.yaml b/playbooks/roles/charts/openldap/templates/ingress.yaml new file mode 100644 index 0000000..7afd5f1 --- /dev/null +++ b/playbooks/roles/charts/openldap/templates/ingress.yaml @@ -0,0 +1,45 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: openldap + namespace: itsm +spec: + ingressClassName: nginx + rules: + - host: ldap.onwalk.net + http: + paths: + - backend: + service: + name: openldap-headless + port: + name: http + path: / + pathType: ImplementationSpecific +--- +apiVersion: k8s.nginx.org/v1alpha1 +kind: GlobalConfiguration +metadata: + name: nginx-configuration + namespace: ingress +spec: + listeners: + - name: ldap-tcp + port: 389 + protocol: TCP +--- +apiVersion: k8s.nginx.org/v1alpha1 +kind: TransportServer +metadata: + name: ldap-tcp + namespace: itsm +spec: + listener: + name: ldap-tcp + protocol: TCP + upstreams: + - name: ldap-app + service: openldap-headless + port: 389 + action: + pass: ldap-app diff --git a/playbooks/roles/charts/postgresql/files/post-setup.sh b/playbooks/roles/charts/postgresql/files/post-setup.sh new file mode 100644 index 0000000..9e984b8 --- /dev/null +++ b/playbooks/roles/charts/postgresql/files/post-setup.sh @@ -0,0 +1,16 @@ +#!/bin/bash +export namespace=$1 +export POSTGRES_PASSWORD=$(kubectl get secret --namespace $namespace postgresql -o jsonpath="{.data.postgres-password}" | base64 -d) + +kubectl run postgresql-client --rm --tty -i --restart='Never' --namespace $namespace --image docker.io/bitnami/postgresql:15.2.0-debian-11-r11 --env="PGPASSWORD=$POSTGRES_PASSWORD" --command -- psql --host postgresql -U postgres -d postgres -p 5432 -w -c "CREATE EXTENSION IF NOT EXISTS pg_trgm; CREATE EXTENSION IF NOT EXISTS btree_gist; CREATE DATABASE gitlabhq_production OWNER gitlab;" || echo true + +#create user gitlab with encrypted password 'xxxxxx' +#grant all privileges on database gitlabhq_production to gitlab; + +kubectl run postgresql-client --rm --tty -i --restart='Never' --namespace $namespace --image docker.io/bitnami/postgresql:15.2.0-debian-11-r11 --env="PGPASSWORD=$POSTGRES_PASSWORD" --command -- psql --host postgresql -U postgres -d postgres -p 5432 -w -c "CREATE DATABASE keycloak;" || echo true + +kubectl run postgresql-client --rm --tty -i --restart='Never' --namespace $namespace --image docker.io/bitnami/postgresql:15.2.0-debian-11-r11 --env="PGPASSWORD=$POSTGRES_PASSWORD" --command -- psql --host postgresql -U postgres -d postgres -p 5432 -w -c "CREATE DATABASE registry;" || echo true + +kubectl run postgresql-client --rm --tty -i --restart='Never' --namespace $namespace --image docker.io/bitnami/postgresql:15.2.0-debian-11-r11 --env="PGPASSWORD=$POSTGRES_PASSWORD" --command -- psql --host postgresql -U postgres -d postgres -p 5432 -w -c "CREATE DATABASE notary_server;" || echo true + +kubectl run postgresql-client --rm --tty -i --restart='Never' --namespace $namespace --image docker.io/bitnami/postgresql:15.2.0-debian-11-r11 --env="PGPASSWORD=$POSTGRES_PASSWORD" --command -- psql --host postgresql -U postgres -d postgres -p 5432 -w -c "CREATE DATABASE notary_signer;" || echo true diff --git a/playbooks/roles/charts/postgresql/files/setup-postgresql.sh b/playbooks/roles/charts/postgresql/files/setup-postgresql.sh new file mode 100644 index 0000000..98c7bf7 --- /dev/null +++ b/playbooks/roles/charts/postgresql/files/setup-postgresql.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +export namespace=$1 +export registry=$2 + +helm repo add bitnami https://charts.bitnami.com/bitnami || echo true +helm repo up +cat > values.yaml << EOF +global: + imageRegistry: "$registry" +EOF +kubectl create ns $namespace || echo true +helm upgrade --install postgresql bitnami/postgresql --version 12.8.2 -n $namespace -f values.yaml diff --git a/playbooks/roles/charts/postgresql/tasks/main.yml b/playbooks/roles/charts/postgresql/tasks/main.yml new file mode 100755 index 0000000..4d57687 --- /dev/null +++ b/playbooks/roles/charts/postgresql/tasks/main.yml @@ -0,0 +1,3 @@ +- name: Install PostgreSQL Server + script: files/setup-postgresql.sh {{ db_namespace }} {{ registry }} + when: inventory_hostname in groups[group] diff --git a/playbooks/roles/charts/redis/files/setup-redis.sh b/playbooks/roles/charts/redis/files/setup-redis.sh new file mode 100644 index 0000000..f08c99a --- /dev/null +++ b/playbooks/roles/charts/redis/files/setup-redis.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +export namespace=$1 +export registry=$2 + +cat > values.yaml << EOF +global: + imageRegistry: "$registry" +EOF + +helm repo add bitnami https://charts.bitnami.com/bitnami +helm repo up bitnami +kubectl create ns $namespace || true +helm upgrade --install redis bitnami/redis --set architecture=standalone -n $namespace -f values.yaml diff --git a/playbooks/roles/charts/redis/tasks/main.yml b/playbooks/roles/charts/redis/tasks/main.yml new file mode 100755 index 0000000..7951595 --- /dev/null +++ b/playbooks/roles/charts/redis/tasks/main.yml @@ -0,0 +1,3 @@ +- name: Setup Redis Server + script: files/setup-redis.sh {{ cache_namespace }} {{ registry }} + when: inventory_hostname in groups[group] diff --git a/playbooks/roles/charts/splunk-otel-collector/files/setup.sh b/playbooks/roles/charts/splunk-otel-collector/files/setup.sh new file mode 100644 index 0000000..5a3c0c3 --- /dev/null +++ b/playbooks/roles/charts/splunk-otel-collector/files/setup.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +Splunk_HEC_URL=$1 +Splunk_HEC_TOKEN=$2 + +helm repo add splunk-otel-collector-chart https://signalfx.github.io/splunk-otel-collector-chart +helm repo update + +cat > vaules.yaml << EOF +clusterName: Demo +splunkPlatform: + endpoint: $Splunk_HEC_URL + token: $Splunk_HEC_TOKEN + index: harbor + insecureSkipVerify: true +EOF + +helm upgrade --install splunk-otel-collector splunk-otel-collector-chart/splunk-otel-collector -f vaules.yaml + +curl -k "${Splunk_HEC_URL}" -H "Authorization: Splunk ${Splunk_HEC_TOKEN}" -d '{"event": "Hello, world!", "sourcetype": "manual"}' diff --git a/playbooks/roles/charts/splunk-otel-collector/tasks/main.yml b/playbooks/roles/charts/splunk-otel-collector/tasks/main.yml new file mode 100755 index 0000000..09cc640 --- /dev/null +++ b/playbooks/roles/charts/splunk-otel-collector/tasks/main.yml @@ -0,0 +1,2 @@ +- name: Setup splunk otel collector + script: files/setup.sh {{ splunk_hec_url }} {{ splunk_hec_token }} diff --git a/playbooks/roles/docker/harbor/README.md b/playbooks/roles/docker/harbor/README.md new file mode 100644 index 0000000..0f268e3 --- /dev/null +++ b/playbooks/roles/docker/harbor/README.md @@ -0,0 +1,99 @@ +## Docker 镜像版本 + +| 服务 | 镜像版本 | +|-------------|---------------------------------| +| Keycloak | `bitnami/keycloak:26.0` | +| PostgreSQL | `postgres:16.0-bookworm` | +| Nginx | `nginx:1.27` | + +# 目录结构 +```ii +```bash +playbooks/roles/docker/keycloak +├── defaults/ # 存放默认变量的目录 +│ └── main.yml # 默认配置变量 +├── files/ # 存放静态文件的目录 +│ └── nginx.conf # Nginx 配置文件 +├── tasks/ # 存放任务脚本的目录 +│ ├── main.yml # 主要任务脚本 +│ ├── post-setup.yml # 部署后设置任务 +│ ├── pre-setup.yml # 部署前设置任务 +├── templates/ # 存放模板文件的目录 +│ ├── create_keystore.sh.j2 # 创建 Keystore 和 Truststore 的脚本模板 +│ └── docker-compose.yml.j2 # Docker Compose 配置文件模板 +└── README.md # 项目说明文件 +```bash +```````` +使用 Ansible 部署前的准备 +在运行 Playbook 之前,请确保以下准备工作已完成: + +1. 主机准备 +操作系统要求:本 Playbook 适用于 Ubuntu 20.04 及以上版本的主机。 + +主机要求:确保主机上已安装 Docker、Docker Compose 和 Ansible。你可以通过以下命令安装这些工具: + +bash +复制代码 +# 安装 Docker 和 Docker Compose +sudo apt-get update +sudo apt-get install docker.io docker-compose +主机名称:确保主机名称已正确设置,并且该主机可以访问 DNS 配置的域名。 + +2. 域名和 SSL 证书 +域名:确保你已经为 Keycloak 设置了域名(例如 keycloak.onwalk.net)。在实际部署前,你需要准备一个有效的域名和 SSL 证书。可以使用 Let’s Encrypt 或其他证书颁发机构获取证书。 + +证书文件:准备好 SSL 证书(如 onwalk.net.pem)和 SSL 密钥文件(如 onwalk.net.key)。这两个文件将用于配置 Keycloak 和 Nginx 服务的 HTTPS 访问。 + +证书路径应为 /etc/ssl/onwalk.net.pem,密钥路径应为 /etc/ssl/onwalk.net.key。 + +3. Ansible 配置文件(如果需要) +根据需要,你可以创建一个 inventory.ini 文件来指定部署目标主机: + +ini +复制代码 +[servers] +your_server_ip_or_hostname ansible_ssh_user=your_user ansible_ssh_private_key_file=your_key + +# Ansible Playbook 执行和部署 + + +1. 克隆仓库 +首先,克隆该仓库到你的本地机器: + +bash +复制代码 +git clone https://your_repository_url.git +cd ansible-playbook + +2. 测试执行 +ansible-playbook -i inventory/k3s-cluster playbooks/deploy-docker-harbor.yml -l cn-hw-node.svc.plus -D -C + +2. 执行部署 +执行部署任务时,使用以下命令来运行 Ansible Playbook: + +ansible-playbook -i inventory.ini playbooks/deploy-docker-keycloak.yml -l cn-gateway.svc.plus -D + +此命令将会启动以下步骤: + +- 安装并配置 Docker 和 Docker Compose。 +- 创建所需的 Keystore 和 Truststore 文件。 +- 启动 Keycloak 和 PostgreSQL 容器,Nginx 容器 + +3. 验证部署 +部署完成后,你可以通过以下命令检查 Keycloak 和 PostgreSQL 服务是否正常运行: + +docker ps -q -f name=postgres +docker ps -q -f name=keycloak +docker ps -q -f name=nginx + +如果服务正常运行,则会显示容器的 ID。 + +部署后的配置 +1. DNS 配置 +确保你的域名(如 keycloak.onwalk.net)已正确解析,并且指向部署 Keycloak 的主机。你可以使用 nslookup 或 dig 工具验证 DNS 解析: + + +## defaults/main.yml encrypt_string + +ansible-vault encrypt_string 'xxxxx' --name 'core_secret' + diff --git a/playbooks/roles/docker/harbor/defaults/main.yml b/playbooks/roles/docker/harbor/defaults/main.yml new file mode 100644 index 0000000..6102a5c --- /dev/null +++ b/playbooks/roles/docker/harbor/defaults/main.yml @@ -0,0 +1,138 @@ +# External endpoint configuration +ext_endpoint: https://images.onwalk.net + +# Harbor Log service configuration +harbor_log_image: goharbor/harbor-log:v2.12.0 +harbor_log_container_name: harbor-log +harbor_log_volume: /var/log/harbor +logrotate_conf_path: ./common/config/log/logrotate.conf +rsyslog_conf_path: ./common/config/log/rsyslog_docker.conf +harbor_log_port: 1514 + +# Registry service configuration +registry_image: goharbor/registry-photon:v2.12.0 +registry_container_name: registry +registry_volume_storage: /data/registry +registry_config_volume: ./common/config/registry/ +registry_cert_path: /data/secret/registry/root.crt +shared_trust_certificates: ./common/config/shared/trust-certificates +harbor_syslog_port: 1514 + +# Registry Controller configuration +registryctl_image: goharbor/harbor-registryctl:v2.12.0 +registryctl_container_name: registryctl +registryctl_env_file: ./common/config/registryctl/env +registryctl_volume_storage: /data/registry +registryctl_config_volume: ./common/config/registry/ +registryctl_config_file: ./common/config/registryctl/config.yml +registry_credential_username: harbor_registry_user + +# PostgreSQL service configuration +postgresql_image: goharbor/harbor-db:v2.12.0 +postgresql_container_name: harbor-db +postgresql_data_volume: /data/database +postgresql_env_file: ./common/config/db/env +postgresql_shm_size: '1gb' +postgresql_host: postgresql +postgresql_port: 5432 +postgresql_username: postgres +postgresql_database: registry + +# Core service configuration +core_image: goharbor/harbor-core:v2.12.0 +core_container_name: harbor-core +core_env_file: ./common/config/core/env +core_ca_volume: /data/ca_download/ +core_data_volume: /data/ +core_certificates_volume: ./common/config/core/certificates/ +core_app_conf_path: ./common/config/core/app.conf +core_private_key_path: /data/secret/core/private_key.pem +core_secret_key_path: /data/secret/keys/secretkey + +# Portal service configuration +portal_image: goharbor/harbor-portal:v2.12.0 +portal_container_name: harbor-portal +portal_nginx_conf_path: ./common/config/portal/nginx.conf + +# Cache service configuration +redis_image: goharbor/redis-photon:v2.12.0 +redis_container_name: redis +redis_data_volume: /data/redis + +# Job service configuration +jobservice_image: goharbor/harbor-jobservice:v2.12.0 +jobservice_container_name: harbor-jobservice +jobservice_env_file: ./common/config/jobservice/env +jobservice_config_file: ./common/config/jobservice/config.yml +jobservice_trust_certificates: ./common/config/shared/trust-certificates +jobservice_log_driver: stdout + +# Proxy Service Configuration +proxy_image: goharbor/nginx-photon:v2.12.0 +proxy_container_name: nginx +proxy_restart_policy: always + +# Ali OSS Configuration +oss_accesskeyid: !vault | + $ANSIBLE_VAULT;1.1;AES256 + 36623836396330326132396463623864623134383661623162343235323764626665353432633932 + 3561336662643938386435643162633439666132353835650a326466363033316339653838653761 + 33643864626139643363343533653666303738383637653435346163323339666335323966396464 + 3135663763396238340a303062643539396430613834663563643862343734343230343965323735 + 66326138663430363431353461653364333734656366333635656535653239613235 +oss_accesskeysecret: !vault | + $ANSIBLE_VAULT;1.1;AES256 + 64616533326661396138656437653235376137333437646465633733376362626462623335646634 + 6333373431303235653531636638656261633031346236320a366666616333646261366539646665 + 32613833333762353336333534623561643631336538393933353635383662313339333734623436 + 6166626431633730390a303836323636343165363339343264656139343036306132653139363963 + 39626432336162636631326430393134653135303535353239366464376338616462 +oss_bucket: harbor-oss +oss_region: oss-cn-wulanchabu +oss_endpoint: harbor-oss.oss-cn-wulanchabu.aliyuncs.com + +# Sensitive variables +harbor_csrf_key: !vault | + $ANSIBLE_VAULT;1.1;AES256 + 63663337656331383635663037643036353832633639636165383030366561663130643731303934 + 3563313234626334646364343966616133306231623765620a306261353633316533396630353164 + 65633236336135303432666130346637393434616664306633316333333836363764613138366637 + 3338373365323666390a343463623862616636363733653031366237616238313031356434303439 + 31666266653836333230343766323966623862383630633662636633393234643131316565353437 + 3130333635373830393235373435383232396635346531623965 +core_secret: !vault | + $ANSIBLE_VAULT;1.1;AES256 + 35616638393966386331633338643332393336663530633239376430393735363430343031613137 + 6434633238313232323437366166633733376239646235380a383137363933326531363961356230 + 65326637653137646130663735363862343462383636326362353532633536366234643930336134 + 6234616561303965320a316230383863363861626534613038313132303862363731633530653938 + 31656439653338623437366363353035303666373734316666326563323531643362 +jobservice_secret: !vault | + $ANSIBLE_VAULT;1.1;AES256 + 38666535373439616132393061356266326361303631383663363638373933366464613061333433 + 6239663433336335323062303333393939313036373038340a316266663233343232626237623733 + 37363664663164646439633338333065333831333662393664303064376231646664306164316338 + 3831393630373033350a663031303333326531656166636436366431386633633832633466363836 + 34643163663935336539333865323830613531386331623663643432313531383861 +harbor_admin_password: !vault | + $ANSIBLE_VAULT;1.1;AES256 + 36336465346533313435383536386231353561663336326635323465313033383264633862333264 + 3037376630353534376565326437653730326130303636370a393737343635393335353233346137 + 64373532396339663065376534373534623732323762643634396630386430323766363334306663 + 3536636138343666330a323066393939333861656131623837626430666332363237616639323831 + 3532 +postgresql_password: !vault | + $ANSIBLE_VAULT;1.1;AES256 + 66626230333636656666346537343137303439613864616431343531333766336434313136626463 + 3238626161616635653566306162346232643735303236320a393231616534353130306264623231 + 39333032356632616462623736376161326464306433316234353665633136396332363866626336 + 3364333463313035390a353866663663643333393835613664643832613338356530353834633232 + 66346231346264396139333165633361326139383131363861623232646330326664 +registry_credential_password: !vault | + $ANSIBLE_VAULT;1.1;AES256 + 32633462323230656439313165616564373965636632646234396437363432653566653432636638 + 3664633136656437356331623330343463346536613361310a333962633365636335616236383230 + 38333837656637646633663330383132623837613063356331646264333437613132376130663764 + 3530626162323261620a333464316433383037306134386339633036623235376138663832366535 + 65613865323832326363393936376465363964363864616131393933343435623433356564373433 + 6266393231653930373138353332393538336262396238646266 diff --git a/playbooks/roles/docker/harbor/tasks/main.yml b/playbooks/roles/docker/harbor/tasks/main.yml new file mode 100644 index 0000000..fddafcb --- /dev/null +++ b/playbooks/roles/docker/harbor/tasks/main.yml @@ -0,0 +1,37 @@ +--- +# 主任务:创建 Keycloak 服务,启动 Docker Compose 等 + +- name: 执行 pre-setup 操作 + include_tasks: "pre-setup.yml" + +- name: 渲染 Docker Compose Common 配置文件 + template: + src: "templates/{{ item }}" + dest: "/home/ubuntu/harbor/{{ item }}" + loop: + - common/config/portal/nginx.conf + - common/config/core/app.conf + - common/config/core/env + - common/config/jobservice/env + - common/config/jobservice/config.yml + - common/config/nginx/nginx.conf + - common/config/registry/root.crt + - common/config/registry/config.yml + - common/config/registry/passwd + - common/config/db/env + - common/config/log/logrotate.conf + - common/config/log/rsyslog_docker.conf + - common/config/registryctl/env + - common/config/registryctl/config.yml + +- name: 渲染 Docker Compose 配置文件 + template: + src: "templates/docker-compose.yml.j2" + dest: "/home/ubuntu/harbor/docker-compose.yml" + +- name: 启动 Docker Compose 服务 + become: true + command: docker-compose -f /home/ubuntu/harbor/docker-compose.yml up -d + +- name: 执行 post-setup 操作 + include_tasks: "post-setup.yml" diff --git a/playbooks/roles/docker/harbor/tasks/post-setup.yml b/playbooks/roles/docker/harbor/tasks/post-setup.yml new file mode 100644 index 0000000..05e30f3 --- /dev/null +++ b/playbooks/roles/docker/harbor/tasks/post-setup.yml @@ -0,0 +1,19 @@ +--- +# post-setup.yml + +- name: 检查容器是否运行并输出状态 + command: docker ps -q -f name={{ item.name }} + register: container_status + loop: + - { name: "{{ core_container_name }}" } + - { name: "{{ proxy_container_name }}" } + - { name: "{{ redis_container_name }}" } + - { name: "{{ portal_container_name }}" } + - { name: "{{ registry_container_name }}" } + - { name: "{{ postgresql_container_name }}" } + - { name: "{{ jobservice_container_name }}" } + - { name: "{{ registryctl_container_name }}" } + changed_when: false + failed_when: container_status.stdout == "" + loop_control: + loop_var: item diff --git a/playbooks/roles/docker/harbor/tasks/pre-setup.yml b/playbooks/roles/docker/harbor/tasks/pre-setup.yml new file mode 100644 index 0000000..56211e8 --- /dev/null +++ b/playbooks/roles/docker/harbor/tasks/pre-setup.yml @@ -0,0 +1,40 @@ +--- +- name: 安装 Docker 和 Docker Compose + apt: + name: + - docker.io + - docker-compose + state: present + update_cache: yes + +- name: 启动并启用 Docker 服务 + systemd: + name: docker + enabled: yes + state: started + +- name: 创建所需的目录结构 /etc/ssl + file: + path: "{{ item }}" + state: directory + mode: '0755' + with_items: + - /etc/ssl +- name: 创建所需的目录结构 common config + file: + path: "/home/ubuntu/harbor/{{ item }}" + state: directory + mode: '0755' + with_items: + - common/config/registryctl + - common/config/db + - common/config/jobservice + - common/config/registry + - common/config/portal + - common/config/core + - common/config/core/certificates + - common/config/log + - common/config/shared + - common/config/shared/trust-certificates + - common/config/nginx + - common/config/nginx/conf.d diff --git a/playbooks/roles/docker/harbor/templates/common/config/core/app.conf b/playbooks/roles/docker/harbor/templates/common/config/core/app.conf new file mode 100644 index 0000000..28351cd --- /dev/null +++ b/playbooks/roles/docker/harbor/templates/common/config/core/app.conf @@ -0,0 +1,6 @@ +appname = Harbor +runmode = prod +enablegzip = true + +[prod] +httpport = 8080 diff --git a/playbooks/roles/docker/harbor/templates/common/config/core/env b/playbooks/roles/docker/harbor/templates/common/config/core/env new file mode 100644 index 0000000..f5a11e8 --- /dev/null +++ b/playbooks/roles/docker/harbor/templates/common/config/core/env @@ -0,0 +1,47 @@ +CONFIG_PATH=/etc/core/app.conf +UAA_CA_ROOT=/etc/core/certificates/uaa_ca.pem +_REDIS_URL_CORE=redis://redis:6379?idle_timeout_seconds=30 +SYNC_QUOTA=true +_REDIS_URL_REG=redis://redis:6379/1?idle_timeout_seconds=30 + +LOG_LEVEL=info +EXT_ENDPOINT={{ ext_endpoint }} +DATABASE_TYPE=postgresql +POSTGRESQL_HOST={{ postgresql_host }} +POSTGRESQL_PORT={{ postgresql_port }} +POSTGRESQL_USERNAME={{ postgresql_username }} +POSTGRESQL_PASSWORD={{ postgresql_password }} +POSTGRESQL_DATABASE={{ postgresql_database }} +POSTGRESQL_SSLMODE=disable +POSTGRESQL_MAX_IDLE_CONNS=50 +POSTGRESQL_MAX_OPEN_CONNS=100 +POSTGRESQL_CONN_MAX_LIFETIME=5m +POSTGRESQL_CONN_MAX_IDLE_TIME=0 +REGISTRY_URL=http://registry:5000 +PORTAL_URL=http://portal:8080 +TOKEN_SERVICE_URL=http://core:8080/service/token +HARBOR_ADMIN_PASSWORD={{ harbor_admin_password }} +MAX_JOB_WORKERS=10 +CORE_SECRET={{ core_secret }} +JOBSERVICE_SECRET={{ jobservice_secret }} +WITH_TRIVY=False +CORE_URL=http://core:8080 +CORE_LOCAL_URL=http://127.0.0.1:8080 +JOBSERVICE_URL=http://jobservice:8080 +TRIVY_ADAPTER_URL=http://trivy-adapter:8080 +REGISTRY_STORAGE_PROVIDER_NAME=oss +READ_ONLY=false +RELOAD_KEY= +REGISTRY_CONTROLLER_URL=http://registryctl:8080 +REGISTRY_CREDENTIAL_USERNAME={{ registry_credential_username }} +REGISTRY_CREDENTIAL_PASSWORD={{ registry_credential_password }} +CSRF_KEY={{ harbor_csrf_key }} +ROBOT_SCANNER_NAME_PREFIX=oEBK0BPU +PERMITTED_REGISTRY_TYPES_FOR_PROXY_CACHE=docker-hub,harbor,azure-acr,ali-acr,aws-ecr,google-gcr,quay,docker-registry,github-ghcr,jfrog-artifactory + +HTTP_PROXY= +HTTPS_PROXY= +NO_PROXY= + +PORT=8080 +QUOTA_UPDATE_PROVIDER=db diff --git a/playbooks/roles/docker/harbor/templates/common/config/db/env b/playbooks/roles/docker/harbor/templates/common/config/db/env new file mode 100644 index 0000000..eb5d09e --- /dev/null +++ b/playbooks/roles/docker/harbor/templates/common/config/db/env @@ -0,0 +1,2 @@ +POSTGRES_PASSWORD={{ postgresql_password }} + diff --git a/playbooks/roles/docker/harbor/templates/common/config/jobservice/config.yml b/playbooks/roles/docker/harbor/templates/common/config/jobservice/config.yml new file mode 100644 index 0000000..c09c577 --- /dev/null +++ b/playbooks/roles/docker/harbor/templates/common/config/jobservice/config.yml @@ -0,0 +1,38 @@ +--- +#Protocol used to serve +protocol: "http" + +#Server listening port +port: 8080 + +#Worker pool +worker_pool: + #Worker concurrency + workers: 10 + backend: "redis" + #Additional config if use 'redis' backend + redis_pool: + #redis://[arbitrary_username:password@]ipaddress:port/database_index + redis_url: redis://redis:6379/2?idle_timeout_seconds=30 + namespace: "harbor_job_service_namespace" + idle_timeout_second: 3600 +#Loggers for the running job +job_loggers: + # The jobLoggers backend name, only support "STD_OUTPUT", "FILE" and/or "DB" + - name: "STD_OUTPUT" + level: "INFO" # INFO/DEBUG/WARNING/ERROR/FATAL + +#Loggers for the job service +loggers: + - name: "STD_OUTPUT" # Same with above + level: "INFO" + + +reaper: + # the max time to wait for a task to finish, if unfinished after max_update_hours, the task will be mark as error, but the task will continue to run, default value is 24, + max_update_hours: 24 + # the max time for execution in running state without new task created + max_dangling_hours: 168 + +# the max size of job log returned by API, default is 10M +max_retrieve_size_mb: 10 \ No newline at end of file diff --git a/playbooks/roles/docker/harbor/templates/common/config/jobservice/env b/playbooks/roles/docker/harbor/templates/common/config/jobservice/env new file mode 100644 index 0000000..712ad38 --- /dev/null +++ b/playbooks/roles/docker/harbor/templates/common/config/jobservice/env @@ -0,0 +1,13 @@ +CORE_SECRET={{ core_secret }} +REGISTRY_URL=http://registry:5000 +JOBSERVICE_SECRET={{ jobservice_secret }} +CORE_URL=http://core:8080 +REGISTRY_CONTROLLER_URL=http://registryctl:8080 +JOBSERVICE_WEBHOOK_JOB_MAX_RETRY=3 +JOBSERVICE_WEBHOOK_JOB_HTTP_CLIENT_TIMEOUT=3 + +HTTP_PROXY={{ http_proxy | default('') }} +HTTPS_PROXY={{ https_proxy | default('') }} +NO_PROXY={{ no_proxy | default('') }} +REGISTRY_CREDENTIAL_USERNAME={{ registry_credential_username }} +REGISTRY_CREDENTIAL_PASSWORD={{ registry_credential_password }} diff --git a/playbooks/roles/docker/harbor/templates/common/config/log/logrotate.conf b/playbooks/roles/docker/harbor/templates/common/config/log/logrotate.conf new file mode 100644 index 0000000..97f5f93 --- /dev/null +++ b/playbooks/roles/docker/harbor/templates/common/config/log/logrotate.conf @@ -0,0 +1,8 @@ +/var/log/docker/*.log { + rotate 50 + size 200M + copytruncate + compress + missingok + nodateext +} \ No newline at end of file diff --git a/playbooks/roles/docker/harbor/templates/common/config/log/rsyslog_docker.conf b/playbooks/roles/docker/harbor/templates/common/config/log/rsyslog_docker.conf new file mode 100644 index 0000000..0be27a6 --- /dev/null +++ b/playbooks/roles/docker/harbor/templates/common/config/log/rsyslog_docker.conf @@ -0,0 +1,7 @@ +# Rsyslog configuration file for docker. + +template(name="DynaFile" type="string" string="/var/log/docker/%programname%.log") + +if $programname != "rsyslogd" then { + action(type="omfile" dynaFile="DynaFile") +} \ No newline at end of file diff --git a/playbooks/roles/docker/harbor/templates/common/config/nginx/nginx.conf b/playbooks/roles/docker/harbor/templates/common/config/nginx/nginx.conf new file mode 100644 index 0000000..77dd45c --- /dev/null +++ b/playbooks/roles/docker/harbor/templates/common/config/nginx/nginx.conf @@ -0,0 +1,149 @@ +worker_processes auto; +pid /tmp/nginx.pid; + +events { + worker_connections 3096; + use epoll; + multi_accept on; +} + +http { + client_body_temp_path /tmp/client_body_temp; + proxy_temp_path /tmp/proxy_temp; + fastcgi_temp_path /tmp/fastcgi_temp; + uwsgi_temp_path /tmp/uwsgi_temp; + scgi_temp_path /tmp/scgi_temp; + tcp_nodelay on; + include /etc/nginx/conf.d/*.upstream.conf; + + # this is necessary for us to be able to disable request buffering in all cases + proxy_http_version 1.1; + + upstream core { + server core:8080; + } + + upstream portal { + server portal:8080; + } + + log_format timed_combined '$remote_addr - ' + '"$request" $status $body_bytes_sent ' + '"$http_referer" "$http_user_agent" ' + '$request_time $upstream_response_time $pipe'; + + access_log /dev/stdout timed_combined; + + map $http_x_forwarded_proto $x_forwarded_proto { + default $http_x_forwarded_proto; + "" $scheme; + } + + include /etc/nginx/conf.d/*.server.conf; + + server { + listen 8443 ssl; +# server_name harbordomain.com; + server_tokens off; + # SSL + ssl_certificate /etc/cert/server.crt; + ssl_certificate_key /etc/cert/server.key; + + # Recommendations from https://raymii.org/s/tutorials/Strong_SSL_Security_On_nginx.html + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers '!aNULL:kECDH+AESGCM:ECDH+AESGCM:RSA+AESGCM:kECDH+AES:ECDH+AES:RSA+AES:'; + ssl_prefer_server_ciphers on; + ssl_session_cache shared:SSL:10m; + + # disable any limits to avoid HTTP 413 for large image uploads + client_max_body_size 0; + + # required to avoid HTTP 411: see Issue #1486 (https://github.com/docker/docker/issues/1486) + chunked_transfer_encoding on; + + # Add extra headers + add_header Strict-Transport-Security "max-age=31536000; includeSubdomains; preload"; + add_header X-Frame-Options DENY; + add_header Content-Security-Policy "frame-ancestors 'none'"; + + # customized location config file can place to /etc/nginx dir with prefix harbor.https. and suffix .conf + include /etc/nginx/conf.d/harbor.https.*.conf; + + location / { + proxy_pass http://portal/; + proxy_set_header Host $http_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $x_forwarded_proto; + + proxy_cookie_path / "/; HttpOnly; Secure"; + + proxy_buffering off; + proxy_request_buffering off; + } + + location /c/ { + proxy_pass http://core/c/; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $x_forwarded_proto; + + proxy_cookie_path / "/; Secure"; + + proxy_buffering off; + proxy_request_buffering off; + } + + location /api/ { + proxy_pass http://core/api/; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $x_forwarded_proto; + + proxy_cookie_path / "/; Secure"; + + proxy_buffering off; + proxy_request_buffering off; + } + + location /v1/ { + return 404; + } + + location /v2/ { + proxy_pass http://core/v2/; + proxy_set_header Host $http_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $x_forwarded_proto; + proxy_buffering off; + proxy_request_buffering off; + proxy_send_timeout 900; + proxy_read_timeout 900; + } + + location /service/ { + proxy_pass http://core/service/; + proxy_set_header Host $http_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $x_forwarded_proto; + + proxy_cookie_path / "/; Secure"; + + proxy_buffering off; + proxy_request_buffering off; + } + + location /service/notifications { + return 404; + } + } + server { + listen 8080; + #server_name harbordomain.com; + return 308 https://$host:443$request_uri; + } +} \ No newline at end of file diff --git a/playbooks/roles/docker/harbor/templates/common/config/portal/nginx.conf b/playbooks/roles/docker/harbor/templates/common/config/portal/nginx.conf new file mode 100644 index 0000000..3058d65 --- /dev/null +++ b/playbooks/roles/docker/harbor/templates/common/config/portal/nginx.conf @@ -0,0 +1,42 @@ + +worker_processes auto; +pid /tmp/nginx.pid; + +events { + worker_connections 1024; +} + +http { + + client_body_temp_path /tmp/client_body_temp; + proxy_temp_path /tmp/proxy_temp; + fastcgi_temp_path /tmp/fastcgi_temp; + uwsgi_temp_path /tmp/uwsgi_temp; + scgi_temp_path /tmp/scgi_temp; + + server { + listen 8080; + server_name localhost; + + root /usr/share/nginx/html; + index index.html index.htm; + include /etc/nginx/mime.types; + + gzip on; + gzip_min_length 1000; + gzip_proxied expired no-cache no-store private auth; + gzip_types text/plain text/css application/json application/javascript application/x-javascript text/xml application/xml application/xml+rss text/javascript; + + location /devcenter-api-2.0 { + try_files $uri $uri/ /swagger-ui-index.html; + } + + location / { + try_files $uri $uri/ /index.html; + } + + location = /index.html { + add_header Cache-Control "no-store, no-cache, must-revalidate"; + } + } +} \ No newline at end of file diff --git a/playbooks/roles/docker/harbor/templates/common/config/registry/config.yml b/playbooks/roles/docker/harbor/templates/common/config/registry/config.yml new file mode 100644 index 0000000..4d68ee7 --- /dev/null +++ b/playbooks/roles/docker/harbor/templates/common/config/registry/config.yml @@ -0,0 +1,49 @@ +version: 0.1 +log: + level: info + fields: + service: registry +storage: + cache: + layerinfo: redis + oss: + accesskeyid: {{ oss_accesskeyid }} + accesskeysecret: {{ oss_accesskeysecret }} + bucket: {{ oss_bucket }} + region: {{ oss_region }} + endpoint: {{ oss_endpoint }} + internal: False + encrypt: False + secure: true + chunksize: 5242880 + rootdirectory: /docker + maintenance: + uploadpurging: + enabled: false + delete: + enabled: true +redis: + addr: redis:6379 + readtimeout: 10s + writetimeout: 10s + dialtimeout: 10s + password: + db: 1 + pool: + maxidle: 100 + maxactive: 500 + idletimeout: 60s +http: + addr: :5000 + secret: placeholder + debug: + addr: localhost:5001 +auth: + htpasswd: + realm: harbor-registry-basic-realm + path: /etc/registry/passwd +validation: + disabled: true +compatibility: + schema1: + enabled: true diff --git a/playbooks/roles/docker/harbor/templates/common/config/registry/passwd b/playbooks/roles/docker/harbor/templates/common/config/registry/passwd new file mode 100644 index 0000000..495c4a1 --- /dev/null +++ b/playbooks/roles/docker/harbor/templates/common/config/registry/passwd @@ -0,0 +1 @@ +harbor_registry_user:$2y$05$GQPl7njy.t26N5rFZxqDu.CcXQi4eaYqC5heBhtzqz4x6bVfI7.Rq diff --git a/playbooks/roles/docker/harbor/templates/common/config/registry/root.crt b/playbooks/roles/docker/harbor/templates/common/config/registry/root.crt new file mode 100755 index 0000000..e69de29 diff --git a/playbooks/roles/docker/harbor/templates/common/config/registryctl/config.yml b/playbooks/roles/docker/harbor/templates/common/config/registryctl/config.yml new file mode 100644 index 0000000..2c70735 --- /dev/null +++ b/playbooks/roles/docker/harbor/templates/common/config/registryctl/config.yml @@ -0,0 +1,5 @@ +--- +protocol: "http" +port: 8080 +log_level: info +registry_config: "/etc/registry/config.yml" \ No newline at end of file diff --git a/playbooks/roles/docker/harbor/templates/common/config/registryctl/env b/playbooks/roles/docker/harbor/templates/common/config/registryctl/env new file mode 100644 index 0000000..a7b5b24 --- /dev/null +++ b/playbooks/roles/docker/harbor/templates/common/config/registryctl/env @@ -0,0 +1,2 @@ +CORE_SECRET={{ core_secret }} +JOBSERVICE_SECRET={{ jobservice_secret }} diff --git a/playbooks/roles/docker/harbor/templates/docker-compose.yml.j2 b/playbooks/roles/docker/harbor/templates/docker-compose.yml.j2 new file mode 100644 index 0000000..90eb501 --- /dev/null +++ b/playbooks/roles/docker/harbor/templates/docker-compose.yml.j2 @@ -0,0 +1,195 @@ +version: '3' +services: + registry: + image: {{ registry_image }} + container_name: {{ registry_container_name }} + restart: always + cap_drop: + - ALL + cap_add: + - CHOWN + - SETGID + - SETUID + volumes: + - {{ registry_volume_storage }}:/storage:z + - {{ registry_config_volume }}:/etc/registry/:z + - type: bind + source: {{ registry_cert_path }} + target: /etc/registry/root.crt + - type: bind + source: {{ shared_trust_certificates }} + target: /harbor_cust_cert + networks: + - harbor + depends_on: + - postgresql # 移除了 log 依赖 + + registryctl: + image: {{ registryctl_image }} + container_name: {{ registryctl_container_name }} + env_file: + - {{ registryctl_env_file }} + restart: always + cap_drop: + - ALL + cap_add: + - CHOWN + - SETGID + - SETUID + volumes: + - {{ registryctl_volume_storage }}:/storage:z + - {{ registryctl_config_volume }}:/etc/registry/:z + - type: bind + source: {{ registryctl_config_file }} + target: /etc/registryctl/config.yml + - type: bind + source: {{ shared_trust_certificates }} + target: /harbor_cust_cert + networks: + - harbor + depends_on: + - registry # 移除了 log 依赖 + + postgresql: + image: {{ postgresql_image }} + container_name: {{ postgresql_container_name }} + restart: always + cap_drop: + - ALL + cap_add: + - CHOWN + - DAC_OVERRIDE + - SETGID + - SETUID + volumes: + - {{ postgresql_data_volume }}:/var/lib/postgresql/data:z + networks: + - harbor + env_file: + - {{ postgresql_env_file }} + shm_size: '{{ postgresql_shm_size }}' + + core: + image: {{ core_image }} + container_name: {{ core_container_name }} + env_file: + - {{ core_env_file }} + restart: always + cap_drop: + - ALL + cap_add: + - SETGID + - SETUID + volumes: + - {{ core_ca_volume }}:/etc/core/ca/:z + - {{ core_data_volume }}:/data/:z + - {{ core_certificates_volume }}:/etc/core/certificates/:z + - type: bind + source: {{ core_app_conf_path }} + target: /etc/core/app.conf + - type: bind + source: {{ core_private_key_path }} + target: /etc/core/private_key.pem + - type: bind + source: {{ core_secret_key_path }} + target: /etc/core/key + - type: bind + source: {{ shared_trust_certificates }} + target: /harbor_cust_cert + networks: + - harbor + depends_on: + - registry + - redis + - postgresql # 移除了 log 依赖 + + portal: + image: {{ portal_image }} + container_name: {{ portal_container_name }} + restart: always + cap_drop: + - ALL + cap_add: + - CHOWN + - SETGID + - SETUID + - NET_BIND_SERVICE + volumes: + - type: bind + source: {{ portal_nginx_conf_path }} + target: /etc/nginx/nginx.conf + networks: + - harbor + depends_on: + - core # 移除了 log 依赖 + + jobservice: + image: "{{ jobservice_image }}" + container_name: "{{ jobservice_container_name }}" + env_file: + - "{{ jobservice_env_file }}" + restart: always + cap_drop: + - ALL + cap_add: + - CHOWN + - SETGID + - SETUID + volumes: + - /data/job_logs:/var/log/jobs:z + - type: bind + source: "{{ jobservice_config_file }}" + target: /etc/jobservice/config.yml + - type: bind + source: "{{ jobservice_trust_certificates }}" + target: /harbor_cust_cert + networks: + - harbor + depends_on: + - core + + redis: + image: {{ redis_image }} + container_name: {{ redis_container_name }} + restart: always + cap_drop: + - ALL + cap_add: + - CHOWN + - SETGID + - SETUID + volumes: + - {{ redis_data_volume }}:/var/lib/redis + networks: + - harbor + + proxy: + image: {{ proxy_image }} + container_name: {{ proxy_container_name }} + restart: always + cap_drop: + - ALL + cap_add: + - CHOWN + - SETGID + - SETUID + - NET_BIND_SERVICE + volumes: + - ./common/config/nginx:/etc/nginx:z + - /data/secret/cert:/etc/cert:z + - type: bind + source: ./common/config/shared/trust-certificates + target: /harbor_cust_cert + ports: + - 80:8080 + - 443:8443 + networks: + - harbor + depends_on: + - registry + - core + - portal + +networks: + harbor: + external: false diff --git a/playbooks/roles/docker/keycloak/README.md b/playbooks/roles/docker/keycloak/README.md new file mode 100644 index 0000000..42bdfbe --- /dev/null +++ b/playbooks/roles/docker/keycloak/README.md @@ -0,0 +1,113 @@ +## Docker 镜像版本 + +| 服务 | 镜像版本 | +|-------------|---------------------------------| +| Keycloak | `bitnami/keycloak:26.0` | +| PostgreSQL | `postgres:16.0-bookworm` | +| Nginx | `nginx:1.27` | + +# 目录结构 +```ii +```bash +playbooks/roles/docker/keycloak +├── defaults/ # 存放默认变量的目录 +│ └── main.yml # 默认配置变量 +├── files/ # 存放静态文件的目录 +│ └── nginx.conf # Nginx 配置文件 +├── tasks/ # 存放任务脚本的目录 +│ ├── main.yml # 主要任务脚本 +│ ├── post-setup.yml # 部署后设置任务 +│ ├── pre-setup.yml # 部署前设置任务 +├── templates/ # 存放模板文件的目录 +│ ├── create_keystore.sh.j2 # 创建 Keystore 和 Truststore 的脚本模板 +│ └── docker-compose.yml.j2 # Docker Compose 配置文件模板 +└── README.md # 项目说明文件 +```bash +```````` +使用 Ansible 部署前的准备 +在运行 Playbook 之前,请确保以下准备工作已完成: + +1. 主机准备 +操作系统要求:本 Playbook 适用于 Ubuntu 20.04 及以上版本的主机。 + +主机要求:确保主机上已安装 Docker、Docker Compose 和 Ansible。你可以通过以下命令安装这些工具: + +bash +复制代码 +# 安装 Docker 和 Docker Compose +sudo apt-get update +sudo apt-get install docker.io docker-compose +主机名称:确保主机名称已正确设置,并且该主机可以访问 DNS 配置的域名。 + +2. 域名和 SSL 证书 +域名:确保你已经为 Keycloak 设置了域名(例如 keycloak.onwalk.net)。在实际部署前,你需要准备一个有效的域名和 SSL 证书。可以使用 Let’s Encrypt 或其他证书颁发机构获取证书。 + +证书文件:准备好 SSL 证书(如 onwalk.net.pem)和 SSL 密钥文件(如 onwalk.net.key)。这两个文件将用于配置 Keycloak 和 Nginx 服务的 HTTPS 访问。 + +证书路径应为 /etc/ssl/onwalk.net.pem,密钥路径应为 /etc/ssl/onwalk.net.key。 + +3. Ansible 配置文件(如果需要) +根据需要,你可以创建一个 inventory.ini 文件来指定部署目标主机: + +ini +复制代码 +[servers] +your_server_ip_or_hostname ansible_ssh_user=your_user ansible_ssh_private_key_file=your_key + +# Ansible Playbook 执行和部署 + + +1. 克隆仓库 +首先,克隆该仓库到你的本地机器: + +bash +复制代码 +git clone https://your_repository_url.git +cd ansible-playbook + +2. 测试执行 +ansible-playbook -i inventory.ini playbooks/deploy-docker-keycloak.yml -l cn-gateway.svc.plus -D -C + +2. 执行部署 +执行部署任务时,使用以下命令来运行 Ansible Playbook: + +ansible-playbook -i inventory.ini playbooks/deploy-docker-keycloak.yml -l cn-gateway.svc.plus -D + +此命令将会启动以下步骤: + +- 安装并配置 Docker 和 Docker Compose。 +- 创建所需的 Keystore 和 Truststore 文件。 +- 启动 Keycloak 和 PostgreSQL 容器,Nginx 容器 + +3. 验证部署 +部署完成后,你可以通过以下命令检查 Keycloak 和 PostgreSQL 服务是否正常运行: + +bash +复制代码 +docker ps -q -f name=postgres +docker ps -q -f name=keycloak +docker ps -q -f name=nginx + +如果服务正常运行,则会显示容器的 ID。 + +部署后的配置 +1. DNS 配置 +确保你的域名(如 keycloak.onwalk.net)已正确解析,并且指向部署 Keycloak 的主机。你可以使用 nslookup 或 dig 工具验证 DNS 解析: + +bash +复制代码 +nslookup keycloak.onwalk.net +2. Keycloak 领域设置 +部署后,你需要在 Keycloak 管理控制台进行以下配置: + +创建新的 Realm(领域):登录到 Keycloak 管理界面,使用你在 .env 文件中设置的 KEYCLOAK_ADMIN 和 KEYCLOAK_ADMIN_PASSWORD 登录。然后创建一个新的 Realm(例如 onwalk)。 +配置客户端和身份提供者:根据需要,创建新的客户端并配置身份验证提供者(如 OAuth、SAML 等)。 +3. 前端配置 +前端应用(如使用 Keycloak 作为身份验证提供者的应用)需要配置与 Keycloak 的集成: + +在前端应用中设置正确的 Keycloak URL,例如 https://keycloak.onwalk.net。 +配置正确的 realm 和 client ID,以便与 Keycloak 实现 SSO(单点登录)。 + +如果遇到任何问题,请检查 Nginx 和 Keycloak 的日志,确保它们正确启动并按预期运行。 +通过使用 Ansible Playbook,你可以快速部署和配置一个完整的 Keycloak 环境。部署完成后,确保 DNS 解析、Keycloak 领域设置以及 HTTPS 配置正确,以便你的应用能够通过安全的 SSL/TLS 通道与 Keycloak 进行交互。 + diff --git a/playbooks/roles/docker/keycloak/defaults/main.yml b/playbooks/roles/docker/keycloak/defaults/main.yml new file mode 100644 index 0000000..ae526e2 --- /dev/null +++ b/playbooks/roles/docker/keycloak/defaults/main.yml @@ -0,0 +1,36 @@ +--- +postgres_name: postgres +postgres_image: postgres:16.0-bookworm +postgres_db: keycloak +postgres_user: keycloak_user +postgres_password: keycloak_password + +keycloak_name: keycloak +keycloak_domain: keycloak.onwalk.net +keycloak_image: bitnami/keycloak:latest +keycloak_admin: admin +keycloak_admin_password: admin_password + +nginx_name: nginx +nginx_image: nginx:latest + +key_alias: mykey +key_password: your_key_password +keycloak_key_store: + file: /etc/ssl/keystore.jks + password: your_key_password +keycloak_trust_store: + file: /etc/ssl/truststore.jks + password: your_trust_password + +# SSL证书路径和文件名 +ca_certificate: + file: /etc/ssl/onwalk.net.pem +ssl_certificate: + path: /etc/ssl/ + name: onwalk.net.pem +ssl_certificate_key: + path: /etc/ssl/ + name: onwalk.net.key + +dhparam_path: /etc/ssl/dhparam.pem diff --git a/playbooks/roles/docker/keycloak/files/create_keystore.sh b/playbooks/roles/docker/keycloak/files/create_keystore.sh new file mode 100644 index 0000000..94d0302 --- /dev/null +++ b/playbooks/roles/docker/keycloak/files/create_keystore.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +# 定义非空检查函数 +check_non_empty() { + if [ -z "$1" ]; then + echo "ERROR: $2 is not set." + exit 1 + fi +} + +# 使用非空检查函数检查所有变量 +check_non_empty "$KEYSTORE_FILE" "KEYSTORE_FILE" +check_non_empty "$TRUSTSTORE_FILE" "TRUSTSTORE_FILE" +check_non_empty "$KEYSTORE_PASSWORD" "KEYSTORE_PASSWORD" +check_non_empty "$TRUSTSTORE_PASSWORD" "TRUSTSTORE_PASSWORD" +check_non_empty "$KEY_ALIAS" "KEY_ALIAS" +check_non_empty "$KEY_PASSWORD" "KEY_PASSWORD" +check_non_empty "$ROOT_CA_CERT" "ROOT_CA_CERT" + +# 1. 创建 Keystore (包括私钥) +echo "Creating keystore..." +keytool -genkeypair -v -keystore "$KEYSTORE_FILE" -keyalg RSA -keysize 2048 -validity 365 -alias "$KEY_ALIAS" -storepass "$KEYSTORE_PASSWORD" -keypass "$KEY_PASSWORD" -dname "CN=localhost, OU=Dev, O=MyCompany, L=City, ST=State, C=US" -noprompt + +# 2. 创建 Truststore 并导入根证书 +echo "Creating truststore and importing root CA certificate..." +keytool -import -file "$ROOT_CA_CERT" -keystore "$TRUSTSTORE_FILE" -alias root-ca -storepass "$TRUSTSTORE_PASSWORD" -noprompt + +echo "Keystore and truststore have been created and configured successfully." + +# 创建 Diffie-Hellman 参数 +#echo "Generating Diffie-Hellman parameters..." +#openssl dhparam -out /etc/ssl/dhparam.pem 2048 diff --git a/playbooks/roles/docker/keycloak/tasks/main.yml b/playbooks/roles/docker/keycloak/tasks/main.yml new file mode 100644 index 0000000..db6fb0e --- /dev/null +++ b/playbooks/roles/docker/keycloak/tasks/main.yml @@ -0,0 +1,33 @@ +--- +# 主任务:创建 Keycloak 服务,启动 Docker Compose 等 + +- name: 执行 pre-setup 操作 + include_tasks: "pre-setup.yml" + +- name: 执行 create_keystore.sh 脚本 + script: files/create_keystore.sh + environment: + KEYSTORE_FILE: "{{ keycloak_key_store.file }}" + TRUSTSTORE_FILE: "{{ keycloak_trust_store.file }}" + KEYSTORE_PASSWORD: "{{ keycloak_key_store.password }}" + TRUSTSTORE_PASSWORD: "{{ keycloak_trust_store.password }}" + KEY_ALIAS: "{{ key_alias }}" + KEY_PASSWORD: "{{ key_password }}" + ROOT_CA_CERT: "{{ ca_certificate.file }}" + +- name: 渲染 nginx 配置文件 + template: + src: "templates/nginx.conf.j2" + dest: "/tmp/nginx.conf" + +- name: 渲染 Docker Compose 配置文件 + template: + src: "templates/docker-compose.yml.j2" + dest: "/tmp/docker-compose.yml" + +- name: 启动 Docker Compose 服务 + become: true + command: docker-compose -f /tmp/docker-compose.yml up -d + +- name: 执行 post-setup 操作 + include_tasks: "post-setup.yml" diff --git a/playbooks/roles/docker/keycloak/tasks/post-setup.yml b/playbooks/roles/docker/keycloak/tasks/post-setup.yml new file mode 100644 index 0000000..9e3d888 --- /dev/null +++ b/playbooks/roles/docker/keycloak/tasks/post-setup.yml @@ -0,0 +1,31 @@ +--- +# post-setup.yml + +- name: 检查容器是否运行并输出状态 + command: docker ps -q -f name={{ item.name }} + register: container_status + loop: + - { name: "{{ keycloak_name }}" } + - { name: "{{ postgres_name }}" } + - { name: "{{ nginx_name }}" } + changed_when: false + failed_when: container_status.stdout == "" + loop_control: + loop_var: item + +- name: 显示容器状态 + debug: + msg: | + Docker Compose 容器状态: + {% for item in [keycloak_name, postgres_name, nginx_name] %} + - {{ item }}: {% if container_status.results | selectattr('item.name', 'equalto', item) | map(attribute='stdout') | list | first %} 运行中 {% else %} 未运行 {% endif %} + {% endfor %} + +- name: 清理临时文件 + file: + path: "/tmp/{{ item }}" + state: absent + with_items: + - docker-compose.yml + - nginx.conf + ignore_errors: yes diff --git a/playbooks/roles/docker/keycloak/tasks/pre-setup.yml b/playbooks/roles/docker/keycloak/tasks/pre-setup.yml new file mode 100644 index 0000000..8c93817 --- /dev/null +++ b/playbooks/roles/docker/keycloak/tasks/pre-setup.yml @@ -0,0 +1,22 @@ +--- +- name: 安装 Docker 和 Docker Compose + apt: + name: + - docker.io + - docker-compose + state: present + update_cache: yes + +- name: 启动并启用 Docker 服务 + systemd: + name: docker + enabled: yes + state: started + +- name: 创建所需的目录结构 + file: + path: "{{ item }}" + state: directory + mode: '0755' + with_items: + - /etc/ssl diff --git a/playbooks/roles/docker/keycloak/templates/docker-compose.yml.j2 b/playbooks/roles/docker/keycloak/templates/docker-compose.yml.j2 new file mode 100644 index 0000000..0e9d752 --- /dev/null +++ b/playbooks/roles/docker/keycloak/templates/docker-compose.yml.j2 @@ -0,0 +1,67 @@ +version: '3.7' + +services: + postgres: + container_name: {{ postgres_name }} # 设置容器名称 + image: {{ postgres_image }} + environment: + POSTGRES_DB: {{ postgres_db }} + POSTGRES_USER: {{ postgres_user }} + POSTGRES_PASSWORD: {{ postgres_password }} + volumes: + - postgres_data:/var/lib/postgresql/data + networks: + - keycloak_network + + keycloak: + image: {{ keycloak_image }} + container_name: {{ keycloak_name }} # 设置容器名称 + environment: + KEYCLOAK_PROXY_HEADERS: xforwarded + KEYCLOAK_ADMIN: {{ keycloak_admin }} + KEYCLOAK_ADMIN_PASSWORD: {{ keycloak_admin_password }} + KEYCLOAK_DATABASE_VENDOR: postgresql + KEYCLOAK_DATABASE_HOST: {{ postgres_name }} + KEYCLOAK_DATABASE_PORT: 5432 + KEYCLOAK_DATABASE_USER: {{ postgres_user }} + KEYCLOAK_DATABASE_NAME: {{ postgres_db }} + KEYCLOAK_DATABASE_PASSWORD: {{ postgres_password }} + KEYCLOAK_HTTPS_KEY_STORE_FILE: /etc/ssl/keystore.jks + KEYCLOAK_HTTPS_KEY_STORE_PASSWORD: {{ keycloak_key_store.password }} # 私钥保护密码 + KEYCLOAK_HTTPS_TRUST_STORE_FILE: /etc/ssl/truststore.jks + KEYCLOAK_HTTPS_TRUST_STORE_PASSWORD: {{ keycloak_trust_store.password }} # 证书信任库保护密码 + ports: + - 8080:8080 + volumes: + - {{ keycloak_key_store.file }}:/etc/ssl/keystore.jks + - {{ keycloak_trust_store.file }}:/etc/ssl/truststore.jks + restart: always + depends_on: + - postgres + networks: + - keycloak_network + + nginx: + image: {{ nginx_image }} + container_name: {{ nginx_name }} # 设置容器名称 + depends_on: + - {{ keycloak_name }} + ports: + - "80:80" + - "443:443" + volumes: + - {{ ssl_certificate.path }}{{ ssl_certificate.name }}:/etc/ssl/certs/{{ ssl_certificate.name }} + - {{ ssl_certificate_key.path }}{{ ssl_certificate_key.name }}:/etc/ssl/certs/{{ ssl_certificate_key.name }} + - {{ dhparam_path }}:/etc/nginx/ssl/dhparam.pem + - ./nginx.conf:/etc/nginx/nginx.conf + restart: unless-stopped + networks: + - keycloak_network + +volumes: + postgres_data: + driver: local + +networks: + keycloak_network: + driver: bridge diff --git a/playbooks/roles/docker/keycloak/templates/nginx.conf.j2 b/playbooks/roles/docker/keycloak/templates/nginx.conf.j2 new file mode 100644 index 0000000..f58cf42 --- /dev/null +++ b/playbooks/roles/docker/keycloak/templates/nginx.conf.j2 @@ -0,0 +1,70 @@ +# global settings +worker_processes auto; + +events { + worker_connections 1024; +} + +http { + # 全局配置 + include /etc/nginx/mime.types; + default_type application/octet-stream; + + # 配置日志 + access_log /dev/stdout; + error_log /dev/stderr; + + # 设置 Content-Security-Policy 头部,允许 keycloak_domain 被嵌入 + add_header Content-Security-Policy "frame-src 'self' https://{{ keycloak_domain }};"; + + # 配置反向代理缓冲区 + client_body_buffer_size 16k; # 客户端请求体缓冲区大小 + proxy_buffer_size 128k; # 设置反向代理请求的单个缓冲区大小 + proxy_buffers 4 256k; # 设置反向代理请求的缓冲区大小(4个 256KB) + proxy_max_temp_file_size 512m; # 设置最大临时文件大小 + + # 设置真实 IP 的来源 + set_real_ip_from 0.0.0.0/0; + real_ip_header X-Real-IP; + real_ip_recursive on; + + # server 配置 + server { + listen 80; + server_name {{ keycloak_domain }}; + + # 强制 HTTP 请求重定向到 HTTPS + return 301 https://$host$request_uri; + } + + server { + listen 443 ssl; + server_name {{ keycloak_domain }}; + + # SSL 配置 + ssl_certificate /etc/ssl/certs/{{ ssl_certificate.name }}; + ssl_certificate_key /etc/ssl/certs/{{ ssl_certificate_key.name }}; + + # 日志设置 + access_log /dev/stdout; + error_log /dev/stderr; + + # 配置反向代理 + location / { + proxy_pass http://{{ keycloak_name }}:8080; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-Port 443; + proxy_set_header Cookie $http_cookie; + proxy_redirect off; + } + + # SSL 强化 + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers 'ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES128-GCM-SHA256'; + ssl_prefer_server_ciphers off; + } +} + diff --git a/playbooks/roles/grafana-dashboard/K8S-Dashboard-2025-01015.json b/playbooks/roles/grafana-dashboard/K8S-Dashboard-2025-01015.json new file mode 100644 index 0000000..d00c5e1 --- /dev/null +++ b/playbooks/roles/grafana-dashboard/K8S-Dashboard-2025-01015.json @@ -0,0 +1,6734 @@ +{ + "__inputs": [ + { + "name": "DS_指标", + "label": "指标", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "panel", + "id": "bargauge", + "name": "Bar gauge", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "11.5.0-80683" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "$$hashKey": "object:247", + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "【中文版本】2024.05.13更新,kubernetes资源全面展示!包含K8S整体资源总览、微服务资源明细、Pod资源明细及K8S网络带宽,优化重要指标展示。https://grafana.com/orgs/starsliao/dashboards", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [ + { + "icon": "bolt", + "tags": [], + "targetBlank": true, + "title": "Update", + "tooltip": "查看更多仪表板", + "type": "link", + "url": "https://grafana.com/orgs/starsliao/dashboards" + }, + { + "$$hashKey": "object:831", + "icon": "question", + "tags": [ + "node_exporter" + ], + "targetBlank": true, + "title": "GitHub", + "tooltip": "查看更多仪表板", + "type": "link", + "url": "https://github.com/starsliao" + } + ], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 54, + "panels": [], + "title": "节点资源总览:所选节点:【$Node】", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "decimals": 1, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 0.8 + }, + { + "color": "red", + "value": 0.9 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 44, + "options": { + "displayMode": "basic", + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "maxVizHeight": 300, + "minVizHeight": 10, + "minVizWidth": 0, + "namePlacement": "auto", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showUnfilled": false, + "sizing": "auto", + "text": {}, + "valueMode": "color" + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(container_memory_working_set_bytes{origin_prometheus=~\"$origin_prometheus\",container!=\"\",node=~\"^$Node$\"}) / sum(kube_node_status_allocatable{origin_prometheus=~\"$origin_prometheus\",resource=\"memory\", unit=\"byte\", node=~\"^$Node$\"})", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "内存使用率", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(kube_pod_container_resource_requests{origin_prometheus=~\"$origin_prometheus\",resource=\"memory\", unit=\"byte\",node=~\"^$Node$\"}) / sum(kube_node_status_allocatable{origin_prometheus=~\"$origin_prometheus\",resource=\"memory\", unit=\"byte\", node=~\"^$Node$\"})", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "内存请求率", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(kube_pod_container_resource_limits{origin_prometheus=~\"$origin_prometheus\",resource=\"memory\", unit=\"byte\",node=~\"^$Node$\"}) / sum(kube_node_status_allocatable{origin_prometheus=~\"$origin_prometheus\",resource=\"memory\", unit=\"byte\", node=~\"^$Node$\"})", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "内存限制率", + "refId": "B", + "step": 10 + } + ], + "title": "节点内存比例", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "decimals": 1, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 0.7 + }, + { + "color": "red", + "value": 0.9 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 1 + }, + "id": 45, + "options": { + "displayMode": "basic", + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "maxVizHeight": 300, + "minVizHeight": 10, + "minVizWidth": 0, + "namePlacement": "auto", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showUnfilled": false, + "sizing": "auto", + "text": {}, + "valueMode": "color" + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum (irate(container_cpu_usage_seconds_total{origin_prometheus=~\"$origin_prometheus\",container!=\"\",node=~\"^$Node$\"}[2m])) / sum(kube_node_status_allocatable{origin_prometheus=~\"$origin_prometheus\",resource=\"cpu\", unit=\"core\", node=~\"^$Node$\"})", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "CPU使用率", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(kube_pod_container_resource_requests{origin_prometheus=~\"$origin_prometheus\",resource=\"cpu\", unit=\"core\",node=~\"^$Node$\"}) / sum(kube_node_status_allocatable{origin_prometheus=~\"$origin_prometheus\",resource=\"cpu\", unit=\"core\", node=~\"^$Node$\"})", + "format": "time_series", + "instant": true, + "interval": "", + "legendFormat": "CPU请求率", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(kube_pod_container_resource_limits{origin_prometheus=~\"$origin_prometheus\",resource=\"cpu\", unit=\"core\",node=~\"^$Node$\"}) / sum(kube_node_status_allocatable{origin_prometheus=~\"$origin_prometheus\",resource=\"cpu\", unit=\"core\", node=~\"^$Node$\"})", + "format": "time_series", + "instant": true, + "interval": "", + "legendFormat": "CPU限制率", + "refId": "B" + } + ], + "title": "节点CPU比例", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "description": "集群节点数,节点POD数,节点POD上限", + "fieldConfig": { + "defaults": { + "mappings": [], + "max": 100, + "min": 1, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 1000 + }, + { + "color": "red", + "value": 2000 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 8, + "y": 1 + }, + "id": 74, + "options": { + "displayMode": "basic", + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "maxVizHeight": 300, + "minVizHeight": 10, + "minVizWidth": 0, + "namePlacement": "auto", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showUnfilled": false, + "sizing": "auto", + "text": {}, + "valueMode": "color" + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "count(kube_node_info{origin_prometheus=~\"$origin_prometheus\"})", + "instant": true, + "interval": "", + "legendFormat": "节点数", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "count(kube_pod_info{origin_prometheus=~\"$origin_prometheus\",created_by_kind!~\"|Job\",node=~\"^$Node$\"})", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Pod数", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": "sum(kube_node_status_allocatable{origin_prometheus=~\"$origin_prometheus\",resource=\"pods\", unit=\"integer\",node=~\"^$Node$\"})", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "上限Pod", + "refId": "C" + } + ], + "title": "节点与Pod", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "align": "center", + "cellOptions": { + "type": "color-text" + }, + "filterable": false, + "inspect": false + }, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "空间" + }, + "properties": [ + { + "id": "custom.width", + "value": 59 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Pod" + }, + "properties": [ + { + "id": "custom.width", + "value": 21 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "SVC" + }, + "properties": [ + { + "id": "custom.width", + "value": 7 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "微服务" + }, + "properties": [ + { + "id": "custom.width", + "value": 4 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "配置" + }, + "properties": [ + { + "id": "custom.width", + "value": 16 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "密码" + }, + "properties": [ + { + "id": "custom.width", + "value": 33 + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 5, + "x": 11, + "y": 1 + }, + "id": 51, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": true + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "微服务" + } + ] + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "expr": "count(kube_pod_info{origin_prometheus=~\"$origin_prometheus\",node=~\"^$Node$\"}) by (namespace)", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "expr": "count(kube_service_info{origin_prometheus=~\"$origin_prometheus\"}) by(namespace)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "expr": "count by (namespace)({__name__=~\"kube_deployment_metadata_generation|kube_daemonset_metadata_generation|kube_statefulset_metadata_generation\",origin_prometheus=~\"$origin_prometheus\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "__auto", + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "expr": "count(kube_configmap_info{origin_prometheus=~\"$origin_prometheus\"}) by(namespace)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "configmap", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "expr": "count(kube_secret_info{origin_prometheus=~\"$origin_prometheus\"}) by(namespace)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "secret", + "refId": "E" + } + ], + "title": "命名空间资源统计", + "transformations": [ + { + "id": "seriesToColumns", + "options": { + "byField": "namespace" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true + }, + "includeByName": {}, + "indexByName": { + "Time 1": 2, + "Time 2": 4, + "Time 3": 6, + "Value #A": 3, + "Value #C": 5, + "Value #D": 1, + "namespace": 0 + }, + "renameByName": { + "Time 1": "", + "Time 2": "", + "Value #A": "Pod", + "Value #B": "配置", + "Value #C": "SVC", + "Value #D": "微服务", + "Value #E": "密码", + "namespace": "空间" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "series", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 2, + "pointSize": 4, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binbps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 1 + }, + "id": 32, + "options": { + "legend": { + "calcs": [ + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true, + "width": 200 + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum (irate(container_network_receive_bytes_total{origin_prometheus=~\"$origin_prometheus\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}[2m]))*8", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "接收", + "metric": "network", + "range": true, + "refId": "A", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum (irate(container_network_transmit_bytes_total{origin_prometheus=~\"$origin_prometheus\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}[2m]))*8", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "发送", + "metric": "network", + "refId": "B", + "step": 10 + } + ], + "title": "$NameSpace:网络总览(可关联节点与命名空间)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "decimals": 1, + "mappings": [], + "max": 2000000000000, + "min": 1, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 100000000000 + }, + { + "color": "red", + "value": 2000000000000 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 5 + }, + "id": 71, + "options": { + "displayMode": "basic", + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "maxVizHeight": 300, + "minVizHeight": 10, + "minVizWidth": 0, + "namePlacement": "auto", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showUnfilled": false, + "sizing": "auto", + "text": {}, + "valueMode": "color" + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": "sum(kube_node_status_allocatable{origin_prometheus=~\"$origin_prometheus\",resource=\"memory\", unit=\"byte\", node=~\"^$Node$\"})", + "instant": true, + "interval": "", + "legendFormat": "总内存", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": "sum (container_memory_working_set_bytes{origin_prometheus=~\"$origin_prometheus\",container!=\"\",node=~\"^$Node$\"})", + "instant": true, + "interval": "", + "legendFormat": "使用量", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": "sum(kube_pod_container_resource_requests{origin_prometheus=~\"$origin_prometheus\",resource=\"memory\", unit=\"byte\",node=~\"^$Node$\"})", + "instant": true, + "interval": "", + "legendFormat": "请求量", + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": "sum(kube_pod_container_resource_limits{origin_prometheus=~\"$origin_prometheus\",resource=\"memory\", unit=\"byte\",node=~\"^$Node$\"})", + "instant": true, + "interval": "", + "legendFormat": "限制量", + "refId": "B" + } + ], + "title": "节点内存信息", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "decimals": 1, + "mappings": [], + "max": 500, + "min": 1, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 500 + }, + { + "color": "red", + "value": 1000 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 5 + }, + "id": 72, + "options": { + "displayMode": "basic", + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "maxVizHeight": 300, + "minVizHeight": 10, + "minVizWidth": 0, + "namePlacement": "auto", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showUnfilled": false, + "sizing": "auto", + "text": {}, + "valueMode": "color" + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": "sum(kube_node_status_allocatable{origin_prometheus=~\"$origin_prometheus\",resource=\"cpu\", unit=\"core\", node=~\"^$Node$\"})", + "instant": true, + "interval": "", + "legendFormat": "总核数", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": "sum (irate(container_cpu_usage_seconds_total{origin_prometheus=~\"$origin_prometheus\",id=\"/\",node=~\"^$Node$\"}[2m]))", + "instant": true, + "interval": "", + "legendFormat": "使用量", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": "sum(kube_pod_container_resource_requests{origin_prometheus=~\"$origin_prometheus\",resource=\"cpu\", unit=\"core\",node=~\"^$Node$\"})", + "instant": true, + "interval": "", + "legendFormat": "请求量", + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": "sum(kube_pod_container_resource_limits{origin_prometheus=~\"$origin_prometheus\",resource=\"cpu\", unit=\"core\",node=~\"^$Node$\"})", + "instant": true, + "interval": "", + "legendFormat": "限制量", + "refId": "B" + } + ], + "title": "节点CPU核数", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "decimals": 1, + "mappings": [], + "max": 8000000000000, + "min": 1, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 5000000000000 + }, + { + "color": "red", + "value": 10000000000000 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "使用率" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "max" + }, + { + "id": "min", + "value": 0 + }, + { + "id": "thresholds", + "value": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 80 + }, + { + "color": "red", + "value": 90 + } + ] + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "总量" + }, + "properties": [ + { + "id": "decimals", + "value": 0 + } + ] + } + ] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 8, + "y": 5 + }, + "id": 73, + "options": { + "displayMode": "basic", + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "maxVizHeight": 300, + "minVizHeight": 10, + "minVizWidth": 0, + "namePlacement": "auto", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showUnfilled": false, + "sizing": "auto", + "text": {}, + "valueMode": "color" + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "sum (container_fs_usage_bytes{origin_prometheus=~\"$origin_prometheus\",device=~\"^/dev/.*$\",id=\"/\",node=~\"^$Node$\"}) / sum (container_fs_limit_bytes{origin_prometheus=~\"$origin_prometheus\",device=~\"^/dev/.*$\",id=\"/\",node=~\"^$Node$\"})", + "instant": true, + "interval": "", + "legendFormat": "使用率", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "sum (container_fs_usage_bytes{origin_prometheus=~\"$origin_prometheus\",device=~\"^/dev/.*$\",id=\"/\",node=~\"^$Node$\"})", + "instant": true, + "interval": "", + "legendFormat": "使用量", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "sum (container_fs_limit_bytes{origin_prometheus=~\"$origin_prometheus\",device=~\"^/dev/.*$\",id=\"/\",node=~\"^$Node$\"})", + "instant": true, + "interval": "", + "legendFormat": "总量", + "refId": "B" + } + ], + "title": "节点存储信息", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/异常.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 88, + "maxPerRow": 2, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.3", + "repeat": "origin_prometheus", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count({__name__=~\"kube_deployment_metadata_generation|kube_daemonset_metadata_generation|kube_statefulset_metadata_generation\",origin_prometheus=~\"$origin_prometheus\"})", + "hide": false, + "instant": true, + "legendFormat": "工作负载", + "range": false, + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(kube_pod_info{origin_prometheus=~\"$origin_prometheus\"})", + "hide": false, + "instant": true, + "legendFormat": "总Pod", + "range": false, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count by(key,origin_prometheus)(kube_node_spec_taint{origin_prometheus=~\"$origin_prometheus\",key=~\"node.kubernetes.io.*\"})", + "format": "time_series", + "hide": false, + "instant": true, + "legendFormat": "{{key}}", + "range": false, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count by(origin_prometheus)(kube_node_info{origin_prometheus=~\"$origin_prometheus\"})", + "hide": false, + "instant": true, + "legendFormat": "总节点", + "range": false, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count by(origin_prometheus)(kube_node_info{origin_prometheus=~\"$origin_prometheus\"}) - count by(origin_prometheus)(kube_node_spec_taint{origin_prometheus=~\"$origin_prometheus\",key!~\"node.kubernetes.io.*\"})", + "hide": false, + "instant": true, + "legendFormat": "正常节点", + "range": false, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count by(key,origin_prometheus)(kube_node_spec_taint{origin_prometheus=~\"$origin_prometheus\",key!~\"node.kubernetes.io.*\"})", + "hide": false, + "instant": true, + "legendFormat": "{{key}}", + "range": false, + "refId": "A" + } + ], + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "(node.kubernetes.io/)(.*)", + "renamePattern": "异常:$2" + } + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "总内存" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + }, + { + "id": "custom.lineWidth", + "value": 2 + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 0, + "y": 11 + }, + "id": 79, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(kube_node_status_allocatable{origin_prometheus=~\"$origin_prometheus\",resource=\"memory\", unit=\"byte\", node=~\"^$Node$\"})", + "instant": false, + "interval": "", + "legendFormat": "总内存", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": "sum (container_memory_working_set_bytes{origin_prometheus=~\"$origin_prometheus\",container!=\"\",node=~\"^$Node$\"})", + "instant": false, + "interval": "", + "legendFormat": "使用量", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": "sum(kube_pod_container_resource_requests{origin_prometheus=~\"$origin_prometheus\",resource=\"memory\", unit=\"byte\",node=~\"^$Node$\"})", + "hide": true, + "instant": false, + "interval": "", + "legendFormat": "请求量", + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": "sum(kube_pod_container_resource_limits{origin_prometheus=~\"$origin_prometheus\",resource=\"memory\", unit=\"byte\",node=~\"^$Node$\"})", + "hide": true, + "instant": false, + "interval": "", + "legendFormat": "限制量", + "refId": "B" + } + ], + "title": "内存使用量【$Node】", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "总核数" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + }, + { + "id": "custom.lineWidth", + "value": 2 + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 8, + "y": 11 + }, + "id": 80, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(kube_node_status_allocatable{origin_prometheus=~\"$origin_prometheus\",resource=\"cpu\", unit=\"core\", node=~\"^$Node$\"})", + "instant": false, + "interval": "", + "legendFormat": "总核数", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": "sum (irate(container_cpu_usage_seconds_total{origin_prometheus=~\"$origin_prometheus\",id=\"/\",node=~\"^$Node$\"}[2m]))", + "instant": false, + "interval": "", + "legendFormat": "使用量", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": "sum(kube_pod_container_resource_requests{origin_prometheus=~\"$origin_prometheus\",resource=\"cpu\", unit=\"core\",node=~\"^$Node$\"})", + "hide": true, + "instant": false, + "interval": "", + "legendFormat": "请求量", + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": "sum(kube_pod_container_resource_limits{origin_prometheus=~\"$origin_prometheus\",resource=\"cpu\", unit=\"core\",node=~\"^$Node$\"})", + "hide": true, + "instant": true, + "interval": "", + "legendFormat": "限制量", + "refId": "B" + } + ], + "title": "CPU使用核【$Node】", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "description": "集群节点数,节点POD数,节点POD上限", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "series", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "上限Pod" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + }, + { + "id": "custom.lineWidth", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "节点数" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "custom.drawStyle", + "value": "points" + }, + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + }, + { + "id": "custom.pointSize", + "value": 3 + } + ] + } + ] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 16, + "y": 11 + }, + "id": 81, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": "count(kube_node_info{origin_prometheus=~\"$origin_prometheus\"})", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "节点数", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "count(kube_pod_info{origin_prometheus=~\"$origin_prometheus\",created_by_kind!~\"|Job\",node=~\"^$Node$\"})", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Pod数", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": "sum(kube_node_status_allocatable{origin_prometheus=~\"$origin_prometheus\",resource=\"pods\", unit=\"integer\",node=~\"^$Node$\"})", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "上限Pod", + "refId": "C" + } + ], + "title": "Pod数与节点数【$Node】", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/总核数.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C4162A", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 16 + }, + "id": 75, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": "sum (irate(container_cpu_usage_seconds_total{origin_prometheus=~\"$origin_prometheus\",container!=\"\",node=~\"^$Node$\"}[2m]))by (node) / sum(kube_node_status_allocatable{origin_prometheus=~\"$origin_prometheus\",resource=\"cpu\", unit=\"core\", node=~\"^$Node$\"})by (node)*100", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "{{node}}", + "refId": "I" + } + ], + "title": "$Node:节点CPU明细", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 16 + }, + "id": 76, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": "sum (container_memory_working_set_bytes{origin_prometheus=~\"$origin_prometheus\",container!=\"\",node=~\"^$Node$\"})by (node) / sum(kube_node_status_allocatable{origin_prometheus=~\"$origin_prometheus\",resource=\"memory\", unit=\"byte\", node=~\"^$Node$\"})by (node)*100", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "{{node}}", + "refId": "I" + } + ], + "title": "$Node:节点内存明细", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "series", + "axisLabel": "←流入/流出→", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binbps" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/流入.*/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 16 + }, + "id": 78, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": "sum (irate(container_network_receive_bytes_total{origin_prometheus=~\"$origin_prometheus\",node=~\"^$Node$\"}[2m]))by (node) *8", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "流入:{{node}}", + "metric": "network", + "refId": "A", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": "sum (irate(container_network_transmit_bytes_total{origin_prometheus=~\"$origin_prometheus\",node=~\"^$Node$\"}[2m]))by (node) *8", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "流出:{{node}}", + "metric": "network", + "refId": "B", + "step": 10 + } + ], + "title": "$Node:节点网络总览", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "center", + "cellOptions": { + "type": "auto" + }, + "filterable": false, + "inspect": false + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "CPU限制" + }, + "properties": [ + { + "id": "custom.width", + "value": 76 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "内存使用" + }, + "properties": [ + { + "id": "custom.width", + "value": 71 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "内存限制" + }, + "properties": [ + { + "id": "custom.width", + "value": 74 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "磁盘使用" + }, + "properties": [ + { + "id": "custom.width", + "value": 74 + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*%" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "color-background" + } + }, + { + "id": "color", + "value": { + "mode": "continuous-GrYlRd" + } + }, + { + "id": "custom.width", + "value": 85 + }, + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "(内存使用|内存总|内存请求|内存限制|磁盘使用|磁盘总)" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "节点" + }, + "properties": [ + { + "id": "custom.width", + "value": 96 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "内存请求" + }, + "properties": [ + { + "id": "custom.width", + "value": 76 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "CPU请求" + }, + "properties": [ + { + "id": "custom.width", + "value": 75 + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "(CPU总|内存总|磁盘总|Pod上限)" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "color-background" + } + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + } + ] + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Pod上限" + }, + "properties": [ + { + "id": "custom.width", + "value": 66 + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "CPU核使用$|内存使用$|磁盘使用$|Pod数" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "type": "color-text" + } + }, + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*总/" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "color-background" + } + }, + { + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "color-background" + } + }, + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Pod数" + }, + "properties": [ + { + "id": "custom.width", + "value": 58 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "CPU核总" + }, + "properties": [ + { + "id": "custom.width", + "value": 69 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "内存总" + }, + "properties": [ + { + "id": "custom.width", + "value": 75 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "磁盘总" + }, + "properties": [ + { + "id": "custom.width", + "value": 74 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "CPU核使用" + }, + "properties": [ + { + "id": "custom.width", + "value": 74 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "内存使用%" + }, + "properties": [ + { + "id": "custom.width", + "value": 102 + } + ] + } + ] + }, + "gridPos": { + "h": 11, + "w": 24, + "x": 0, + "y": 22 + }, + "id": 52, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "enablePagination": true, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "内存使用%" + } + ] + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "count(kube_pod_info{origin_prometheus=~\"$origin_prometheus\",created_by_kind!~\"|Job\",node=~\"^$Node$\"}) by (node)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "pod数", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "kube_node_status_condition{origin_prometheus=~\"$origin_prometheus\",status=\"true\",node=~\"^$Node$\"} == 1", + "format": "table", + "hide": true, + "instant": true, + "interval": "", + "legendFormat": "状态", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "sum (irate(container_cpu_usage_seconds_total{origin_prometheus=~\"$origin_prometheus\",container!=\"\",node=~\"^$Node$\"}[2m])) by (node)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "I" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "kube_node_status_allocatable{origin_prometheus=~\"$origin_prometheus\",resource=\"cpu\", unit=\"core\", node=~\"^$Node$\"} - 0", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{origin_prometheus=~\"$origin_prometheus\",resource=\"cpu\", unit=\"core\",node=~\"^$Node$\"}) by (node)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "sum(kube_pod_container_resource_limits{origin_prometheus=~\"$origin_prometheus\",resource=\"cpu\", unit=\"core\",node=~\"^$Node$\"}) by (node)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "sum (container_memory_working_set_bytes{origin_prometheus=~\"$origin_prometheus\",container!=\"\",node=~\"^$Node$\"}) by (node)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "J" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "sum(kube_node_status_allocatable{origin_prometheus=~\"$origin_prometheus\",resource=\"memory\", unit=\"byte\", node=~\"^$Node$\"}) by (node) - 0", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{origin_prometheus=~\"$origin_prometheus\",resource=\"memory\", unit=\"byte\",node=~\"^$Node$\"}) by (node)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "G" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "sum(kube_pod_container_resource_limits{origin_prometheus=~\"$origin_prometheus\",resource=\"memory\", unit=\"byte\",node=~\"^$Node$\"}) by (node)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "H" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "sum (container_fs_usage_bytes{origin_prometheus=~\"$origin_prometheus\",device=~\"^/dev/.*$\",id=\"/\",node=~\"^$Node$\"}) by (node)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "K" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "sum (container_fs_limit_bytes{origin_prometheus=~\"$origin_prometheus\",device=~\"^/dev/.*$\",id=\"/\",node=~\"^$Node$\"}) by (node)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "L" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "sum (container_memory_working_set_bytes{origin_prometheus=~\"$origin_prometheus\",container!=\"\",node=~\"^$Node$\"})by (node) / sum(kube_node_status_allocatable{origin_prometheus=~\"$origin_prometheus\",resource=\"memory\", unit=\"byte\", node=~\"^$Node$\"})by (node)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "内存使用%", + "refId": "M" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{origin_prometheus=~\"$origin_prometheus\",resource=\"memory\", unit=\"byte\",node=~\"^$Node$\"})by (node) / sum(kube_node_status_allocatable{origin_prometheus=~\"$origin_prometheus\",resource=\"memory\", unit=\"byte\", node=~\"^$Node$\"})by (node)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "内存请求%", + "refId": "N" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "sum(kube_pod_container_resource_limits{origin_prometheus=~\"$origin_prometheus\",resource=\"memory\", unit=\"byte\",node=~\"^$Node$\"})by (node) / sum(kube_node_status_allocatable{origin_prometheus=~\"$origin_prometheus\",resource=\"memory\", unit=\"byte\", node=~\"^$Node$\"})by (node)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "内存限制%", + "refId": "O" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "sum (irate(container_cpu_usage_seconds_total{origin_prometheus=~\"$origin_prometheus\",container!=\"\",node=~\"^$Node$\"}[2m]))by (node) / sum(kube_node_status_allocatable{origin_prometheus=~\"$origin_prometheus\",resource=\"cpu\", unit=\"core\", node=~\"^$Node$\"})by (node)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "CPU使用%", + "refId": "P" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{origin_prometheus=~\"$origin_prometheus\",resource=\"cpu\", unit=\"core\",node=~\"^$Node$\"})by (node) / sum(kube_node_status_allocatable{origin_prometheus=~\"$origin_prometheus\",resource=\"cpu\", unit=\"core\", node=~\"^$Node$\"})by (node)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "CPU请求%", + "refId": "Q" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "sum(kube_pod_container_resource_limits{origin_prometheus=~\"$origin_prometheus\",resource=\"cpu\", unit=\"core\",node=~\"^$Node$\"})by (node) / sum(kube_node_status_allocatable{origin_prometheus=~\"$origin_prometheus\",resource=\"cpu\", unit=\"core\", node=~\"^$Node$\"})by (node)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "内存限制%", + "refId": "R" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "sum (container_fs_usage_bytes{origin_prometheus=~\"$origin_prometheus\",device=~\"^/dev/.*$\",id=\"/\",node=~\"^$Node$\"})by (node) / sum (container_fs_limit_bytes{origin_prometheus=~\"$origin_prometheus\",device=~\"^/dev/.*$\",id=\"/\",node=~\"^$Node$\"})by (node)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "磁盘使用%", + "refId": "S" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "sum(kube_node_status_allocatable{origin_prometheus=~\"$origin_prometheus\",resource=\"pods\", unit=\"integer\",node=~\"^$Node$\"})by (node)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "Pod上限", + "refId": "T" + } + ], + "title": "$Node:节点信息明细", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 10": true, + "Time 11": true, + "Time 12": true, + "Time 13": true, + "Time 14": true, + "Time 15": true, + "Time 16": true, + "Time 17": true, + "Time 18": true, + "Time 19": true, + "Time 2": true, + "Time 20": true, + "Time 3": true, + "Time 4": true, + "Time 5": true, + "Time 6": true, + "Time 7": true, + "Time 8": true, + "Time 9": true, + "Value #B": true, + "Value #E": false, + "Value #F": false, + "__name__": true, + "app_kubernetes_io_name": true, + "app_kubernetes_io_name 1": true, + "app_kubernetes_io_name 2": true, + "app_kubernetes_io_name 3": true, + "app_kubernetes_io_version": true, + "app_kubernetes_io_version 1": true, + "app_kubernetes_io_version 2": true, + "app_kubernetes_io_version 3": true, + "condition": true, + "instance": true, + "instance 1": true, + "instance 2": true, + "instance 3": true, + "job": true, + "job 1": true, + "job 2": true, + "job 3": true, + "k8s_namespace": true, + "k8s_namespace 1": true, + "k8s_namespace 2": true, + "k8s_namespace 3": true, + "k8s_sname": true, + "k8s_sname 1": true, + "k8s_sname 2": true, + "k8s_sname 3": true, + "origin_prometheus": true, + "origin_prometheus 1": true, + "origin_prometheus 2": true, + "origin_prometheus 3": true, + "resource": true, + "status": true, + "unit": true + }, + "includeByName": {}, + "indexByName": { + "Time": 22, + "Value #A": 2, + "Value #C": 6, + "Value #D": 8, + "Value #E": 16, + "Value #F": 17, + "Value #G": 18, + "Value #H": 19, + "Value #I": 7, + "Value #J": 9, + "Value #K": 11, + "Value #L": 10, + "Value #M": 4, + "Value #N": 13, + "Value #O": 15, + "Value #P": 3, + "Value #Q": 12, + "Value #R": 14, + "Value #S": 5, + "Value #T": 1, + "instance": 23, + "job": 24, + "node": 0, + "origin_prometheus": 25, + "resource": 20, + "unit": 21 + }, + "renameByName": { + "Value #A": "Pod数", + "Value #C": "CPU核总", + "Value #D": "内存总", + "Value #E": "CPU请求", + "Value #F": "CPU限制", + "Value #G": "内存请求", + "Value #H": "内存限制", + "Value #I": "CPU核使用", + "Value #J": "内存使用", + "Value #K": "磁盘使用", + "Value #L": "磁盘总", + "Value #M": "内存使用%", + "Value #N": "内存请求%", + "Value #O": "内存限制%", + "Value #P": "CPU使用%", + "Value #Q": "CPU请求%", + "Value #R": "CPU限制%", + "Value #S": "磁盘使用%", + "Value #T": "Pod上限", + "condition": "状态", + "node": "节点" + } + } + }, + { + "id": "filterFieldsByName", + "options": {} + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "align": "center", + "cellOptions": { + "type": "color-background" + }, + "inspect": false + }, + "decimals": 0, + "links": [], + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 0 + } + }, + "type": "value" + }, + { + "options": { + "match": "null", + "result": { + "color": "red", + "index": 1 + } + }, + "type": "special" + } + ], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "使用率" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + }, + { + "id": "custom.width", + "value": 54 + }, + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + }, + { + "id": "decimals" + }, + { + "id": "mappings", + "value": [ + { + "options": { + "from": 75, + "result": { + "color": "semi-dark-red", + "index": 0 + }, + "to": 110 + }, + "type": "range" + } + ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "挂载Pod数" + }, + "properties": [ + { + "id": "unit", + "value": "none" + }, + { + "id": "custom.width", + "value": 59 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "命名空间" + }, + "properties": [ + { + "id": "custom.width", + "value": 58 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "PVC" + }, + "properties": [ + { + "id": "custom.width", + "value": 94 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "使用量" + }, + "properties": [ + { + "id": "custom.width", + "value": 57 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "总量" + }, + "properties": [ + { + "id": "custom.width", + "value": 54 + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 33 + }, + "id": 92, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "使用率" + } + ] + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{origin_prometheus=~\"$origin_prometheus\"})", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{namespace}}:{{ persistentvolumeclaim }}", + "metric": "container_memory_usage:sort_desc", + "range": false, + "refId": "A", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "min by (namespace,persistentvolumeclaim) (kubelet_volume_stats_available_bytes{origin_prometheus=~\"$origin_prometheus\"}) + max by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{origin_prometheus=~\"$origin_prometheus\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "__auto", + "metric": "container_memory_usage:sort_desc", + "range": false, + "refId": "B", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{origin_prometheus=~\"$origin_prometheus\"}) /(min by (namespace,persistentvolumeclaim) (kubelet_volume_stats_available_bytes{origin_prometheus=~\"$origin_prometheus\"}) + max by (namespace,persistentvolumeclaim) (kubelet_volume_stats_used_bytes{origin_prometheus=~\"$origin_prometheus\"}))*100", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{namespace}}:{{ persistentvolumeclaim }}", + "metric": "container_memory_usage:sort_desc", + "range": false, + "refId": "C", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count by (namespace,persistentvolumeclaim)(kube_pod_spec_volumes_persistentvolumeclaims_info{origin_prometheus=~\"$origin_prometheus\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "__auto", + "metric": "container_memory_usage:sort_desc", + "range": false, + "refId": "D", + "step": 10 + } + ], + "title": "PVC存储使用情况", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": { + "Value #A": "使用量", + "Value #B": "总量", + "Value #C": "使用率", + "Value #D": "挂载Pod数", + "namespace": "命名空间", + "persistentvolumeclaim": "PVC" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 9, + "x": 6, + "y": 33 + }, + "id": 86, + "options": { + "legend": { + "calcs": [ + "last", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(irate(container_cpu_usage_seconds_total{origin_prometheus=~\"$origin_prometheus\",container !=\"\",container!=\"POD\"}[2m])) by (namespace)>0.5", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ namespace }}", + "metric": "container_cpu", + "refId": "A", + "step": 10 + } + ], + "title": "各命名空间CPU使用核(>0.5)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 9, + "x": 15, + "y": 33 + }, + "id": 85, + "options": { + "legend": { + "calcs": [ + "last", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum (container_memory_working_set_bytes{origin_prometheus=~\"$origin_prometheus\",container !=\"\",container!=\"POD\"}) by (namespace) > 1*1024*1024*1024", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{namespace} {{ pod }}", + "metric": "container_memory_usage:sort_desc", + "range": true, + "refId": "A", + "step": 10 + } + ], + "title": "命名空间WSS内存使用(>1G)", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 41 + }, + "id": 49, + "panels": [], + "title": "Pod资源总览:所选Pod:【$Pod】", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "center", + "cellOptions": { + "type": "auto" + }, + "filterable": false, + "inspect": false + }, + "displayName": "", + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 80 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "命名空间" + }, + "properties": [ + { + "id": "custom.width", + "value": 96 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Pod名称" + }, + "properties": [ + { + "id": "custom.width", + "value": 207 + }, + { + "id": "custom.align", + "value": "right" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "使用核数" + }, + "properties": [ + { + "id": "custom.width", + "value": 71 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "重启" + }, + "properties": [ + { + "id": "custom.width", + "value": 38 + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 1 + }, + { + "color": "red", + "value": 3 + } + ] + } + }, + { + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "color-background" + } + }, + { + "id": "decimals" + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*%" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "custom.cellOptions", + "value": { + "type": "color-background" + } + }, + { + "id": "color", + "value": { + "mode": "continuous-GrYlRd" + } + }, + { + "id": "decimals", + "value": 1 + }, + { + "id": "custom.width", + "value": 55 + }, + { + "id": "min", + "value": 0 + }, + { + "id": "max", + "value": 1 + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*限制" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + }, + { + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "color-background" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "节点" + }, + "properties": [ + { + "id": "custom.width", + "value": 100 + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "使用核$|WSS$|RSS$|存活|流入|流出" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "type": "color-text" + } + }, + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "容器名" + }, + "properties": [ + { + "id": "custom.width", + "value": 57 + }, + { + "id": "custom.align", + "value": "left" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "存活" + }, + "properties": [ + { + "id": "unit", + "value": "s" + }, + { + "id": "custom.width", + "value": 80 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "使用核" + }, + "properties": [ + { + "id": "custom.width", + "value": 62 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "CPU限制" + }, + "properties": [ + { + "id": "custom.width", + "value": 58 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "内存限制" + }, + "properties": [ + { + "id": "custom.width", + "value": 68 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "内存需求" + }, + "properties": [ + { + "id": "custom.width", + "value": 88 + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "WSS$|RSS$|内存需求$|内存限制$|磁盘.*$" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "WSS" + }, + "properties": [ + { + "id": "custom.width", + "value": 81 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "RSS" + }, + "properties": [ + { + "id": "custom.width", + "value": 74 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "CPU需求" + }, + "properties": [ + { + "id": "custom.width", + "value": 72 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "磁盘限制" + }, + "properties": [ + { + "id": "custom.width", + "value": 83 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "磁盘使用" + }, + "properties": [ + { + "id": "custom.width", + "value": 72 + }, + { + "id": "custom.cellOptions", + "value": { + "type": "color-background" + } + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "orange", + "value": 10737418240 + }, + { + "color": "red", + "value": 16106127360 + } + ] + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "WSS%" + }, + "properties": [ + { + "id": "custom.width", + "value": 77 + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/流入|流出/" + }, + "properties": [ + { + "id": "unit", + "value": "binbps" + }, + { + "id": "custom.width", + "value": 80 + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 42 + }, + "id": 47, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "WSS%" + } + ] + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(irate(container_cpu_usage_seconds_total{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",container =~\"$Container\",container !=\"\",container!=\"POD\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}[2m])) by (container, pod,node,namespace) / (sum(container_spec_cpu_quota{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",container =~\"$Container\",container !=\"\",container!=\"POD\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}/100000) by (container, pod,node,namespace)) ", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "sum(irate(container_cpu_usage_seconds_total{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",container =~\"$Container\",container !=\"\",container!=\"POD\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}[2m])) by (container, pod,node,namespace)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "CPU核数使用", + "refId": "Q" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{origin_prometheus=~\"$origin_prometheus\",resource=\"cpu\", unit=\"core\",pod=~\"$Pod\",container =~\"$Container\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}) by (container,pod,node,namespace)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "sum(kube_pod_container_resource_limits{origin_prometheus=~\"$origin_prometheus\",resource=\"cpu\", unit=\"core\",pod=~\"$Pod\",container =~\"$Container\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}) by (container,pod,node,namespace)", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum (container_memory_working_set_bytes{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",container =~\"$Container\",container !=\"\",container!=\"POD\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}) by (container, pod,node,namespace)/ sum(container_spec_memory_limit_bytes{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",container =~\"$Container\",container !=\"\",container!=\"POD\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}) by (container, pod,node,namespace)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "wss%", + "refId": "I" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "sum (container_memory_working_set_bytes{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",container =~\"$Container\",container !=\"\",container!=\"POD\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}) by (container, pod,node,namespace)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "wss", + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum (container_memory_rss{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",container =~\"$Container\",container !=\"\",container!=\"POD\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}) by (container, pod,node,namespace)/ sum(container_spec_memory_limit_bytes{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",container =~\"$Container\",container !=\"\",container!=\"POD\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}) by (container, pod,node,namespace)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "rss%", + "refId": "L" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "sum (container_memory_rss{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",container =~\"$Container\",container !=\"\",container!=\"POD\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}) by (container, pod,node,namespace)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "rss", + "refId": "K" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{origin_prometheus=~\"$origin_prometheus\",resource=\"memory\", unit=\"byte\",pod=~\"$Pod\",container =~\"$Container\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}) by (container,pod,node,namespace)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "sum(kube_pod_container_resource_limits{origin_prometheus=~\"$origin_prometheus\",resource=\"memory\", unit=\"byte\",pod=~\"$Pod\",container =~\"$Container\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}) by (container,pod,node,namespace)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(container_fs_usage_bytes{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",container =~\"$Container\",container !=\"\",container!=\"POD\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}) by (container,pod,node,namespace)", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "J" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "kube_pod_container_status_restarts_total{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",container =~\"$Container\",container !=\"\",container!=\"POD\",namespace=~\"$NameSpace\"} * on (pod) group_left(node) kube_pod_info{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "H" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "time() - kube_pod_created{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",namespace=~\"$NameSpace\"} * on(pod) group_right kube_pod_container_info{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",namespace=~\"$NameSpace\",container =~\"$Container\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "__auto", + "refId": "R" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(container_fs_limit_bytes{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",container =~\"$Container\",container !=\"\",container!=\"POD\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}) by (container,pod,node,namespace)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "S" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(sum(irate(container_network_receive_bytes_total{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}[2m])) by (pod)* on(pod) group_right kube_pod_container_info{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",namespace=~\"$NameSpace\",container =~\"$Container\"}) by(pod) *8", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "__auto", + "refId": "T" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(sum(irate(container_network_transmit_bytes_total{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}[2m])) by (pod)* on(pod) group_right kube_pod_container_info{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",namespace=~\"$NameSpace\",container =~\"$Container\"}) by(pod) *8", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "__auto", + "refId": "U" + } + ], + "title": "$Node:Pod资源明细(可关联节点)", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 10": true, + "Time 11": true, + "Time 12": true, + "Time 13": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true, + "Time 6": true, + "Time 7": true, + "Time 8": true, + "Time 9": true, + "Value #G": false, + "__name__": true, + "app_kubernetes_io_name": true, + "app_kubernetes_io_name 1": true, + "app_kubernetes_io_name 2": true, + "app_kubernetes_io_version": true, + "app_kubernetes_io_version 1": true, + "app_kubernetes_io_version 2": true, + "container 1": true, + "container 10": true, + "container 11": true, + "container 12": true, + "container 2": true, + "container 3": true, + "container 4": true, + "container 5": true, + "container 6": true, + "container 7": true, + "container 8": true, + "container 9": true, + "created_by_kind": true, + "created_by_name": true, + "host_ip": true, + "instance": true, + "instance 1": true, + "instance 2": true, + "job": true, + "job 1": true, + "job 2": true, + "k8s_namespace": true, + "k8s_namespace 1": true, + "k8s_namespace 2": true, + "k8s_sname": true, + "k8s_sname 1": true, + "k8s_sname 2": true, + "namespace": false, + "namespace 1": true, + "namespace 10": true, + "namespace 11": true, + "namespace 12": false, + "namespace 2": true, + "namespace 3": true, + "namespace 4": true, + "namespace 5": true, + "namespace 6": true, + "namespace 7": true, + "namespace 8": true, + "namespace 9": true, + "node 1": true, + "node 10": true, + "node 11": false, + "node 12": true, + "node 2": true, + "node 3": true, + "node 4": true, + "node 5": true, + "node 6": true, + "node 7": true, + "node 8": true, + "node 9": true, + "origin_prometheus": true, + "origin_prometheus 1": true, + "origin_prometheus 2": true, + "phase": true, + "pod_ip": true, + "priority_class": true, + "uid": true + }, + "includeByName": {}, + "indexByName": { + "Time": 21, + "Value #A": 4, + "Value #B": 16, + "Value #C": 7, + "Value #D": 10, + "Value #E": 17, + "Value #F": 9, + "Value #G": 23, + "Value #H": 14, + "Value #I": 5, + "Value #J": 13, + "Value #K": 11, + "Value #L": 6, + "Value #M": 24, + "Value #N": 25, + "Value #O": 26, + "Value #P": 27, + "Value #Q": 8, + "Value #R": 15, + "Value #S": 12, + "container": 2, + "instance": 18, + "ip": 28, + "job": 19, + "namespace": 1, + "node": 0, + "origin_prometheus": 20, + "pod": 3, + "uid": 22 + }, + "renameByName": { + "Value #A": "CPU%", + "Value #B": "CPU需求", + "Value #C": "CPU限制", + "Value #D": "WSS", + "Value #E": "内存需求", + "Value #F": "内存限制", + "Value #H": "重启", + "Value #I": "WSS%", + "Value #J": "磁盘使用", + "Value #K": "RSS", + "Value #L": "RSS%", + "Value #M": "堆内存", + "Value #N": "max堆", + "Value #O": "非堆", + "Value #P": "max非堆", + "Value #Q": "使用核", + "Value #R": "存活", + "Value #S": "磁盘限制", + "Value #T": "流入", + "Value #U": "流出", + "container": "容器名", + "instance": "", + "ip": "POD IP", + "namespace": "命名空间", + "namespace 1": "", + "namespace 12": "命名空间", + "node": "节点", + "node 1": "", + "node 11": "节点", + "pod": "Pod名称", + "priority_class": "" + } + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "节点", + "命名空间", + "容器名", + "Pod名称", + "CPU%", + "WSS%", + "RSS%", + "CPU限制", + "使用核", + "内存限制", + "WSS", + "RSS", + "磁盘限制", + "磁盘使用", + "重启", + "CPU需求", + "内存需求", + "流入", + "流出", + "存活" + ] + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 50 + }, + "id": 58, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(irate(container_cpu_usage_seconds_total{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",container =~\"$Container\",container !=\"\",container!=\"POD\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}[2m])) by (container, pod) / (max(container_spec_cpu_quota{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",container =~\"$Container\",container !=\"\",container!=\"POD\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}/100000) by (container, pod)) * 100", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ pod }}", + "metric": "container_cpu", + "refId": "A", + "step": 10 + } + ], + "title": "Pod容器CPU使用率(最大100%可关联节点)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 8, + "y": 50 + }, + "id": 27, + "options": { + "legend": { + "calcs": [ + "max", + "last", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max (container_memory_working_set_bytes{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",container =~\"$Container\",container !=\"\",container!=\"POD\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}) by (container, pod)/ max(container_spec_memory_limit_bytes{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",container =~\"$Container\",container !=\"\",container!=\"POD\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}) by (container, pod) * 100", + "interval": "", + "intervalFactor": 1, + "legendFormat": "WSS:{{ pod }}", + "metric": "container_memory_usage:sort_desc", + "range": true, + "refId": "A", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max (container_memory_rss{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",container =~\"$Container\",container !=\"\",container!=\"POD\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}) by (container, pod)/ max(container_spec_memory_limit_bytes{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",container =~\"$Container\",container !=\"\",container!=\"POD\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}) by (container, pod) * 100", + "interval": "", + "intervalFactor": 1, + "legendFormat": "RSS:{{ pod }}", + "metric": "container_memory_usage:sort_desc", + "range": true, + "refId": "B", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "(cass_jvm_heap{service=~\"$Container\"} * on (pod_ip) group_right(service) kube_pod_info{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",namespace=~\"$NameSpace\"}) / (cass_jvm_heap_max{service=~\"$Container\"} * on (pod_ip) group_right(service) kube_pod_info{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",namespace=~\"$NameSpace\"}) * 100", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Heap:{{ pod }}", + "metric": "container_memory_usage:sort_desc", + "refId": "C", + "step": 10 + } + ], + "title": "Pod容器内存使用率(可关联节点)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binbps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 16, + "y": 50 + }, + "id": 77, + "options": { + "legend": { + "calcs": [ + "max", + "last", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(max(irate(container_network_receive_bytes_total{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}[2m])) by (pod)* on(pod) group_right kube_pod_container_info{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",namespace=~\"$NameSpace\",container =~\"$Container\"}) by(pod) *8", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "流入:{{ pod}}", + "metric": "network", + "range": true, + "refId": "A", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(max(irate(container_network_transmit_bytes_total{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}[2m])) by (pod)* on(pod) group_right kube_pod_container_info{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",namespace=~\"$NameSpace\",container =~\"$Container\"}) by(pod) *8", + "interval": "", + "intervalFactor": 1, + "legendFormat": "流出:{{ pod}}", + "metric": "network", + "range": true, + "refId": "B", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "expr": "sum(sum(irate(container_network_receive_bytes_total{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",image!=\"\",name=~\"^k8s_.*\",node=~\"^$Node$\",namespace=~\"$NameSpace\",pod=~\".*$Container.*\"}[2m])) by (pod)* on(pod) group_right kube_pod_container_info) by(pod) *8", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "__auto", + "metric": "network", + "range": true, + "refId": "C", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "expr": "sum(sum(irate(container_network_transmit_bytes_total{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",image!=\"\",name=~\"^k8s_.*\",node=~\"^$Node$\",namespace=~\"$NameSpace\",pod=~\".*$Container.*\"}[2m])) by (pod)* on(pod) group_right kube_pod_container_info) by(pod) *8", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "__auto", + "metric": "network", + "range": true, + "refId": "D", + "step": 10 + } + ], + "title": "Pod每秒网络带宽 (可关联节点)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/限制.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 59 + }, + "id": 82, + "options": { + "legend": { + "calcs": [ + "max", + "last", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(irate(container_cpu_usage_seconds_total{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",container =~\"$Container\",container !=\"\",container!=\"POD\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}[2m])) by (container, pod,node,namespace)", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "CPU使用:{{ pod }}", + "metric": "container_cpu", + "refId": "A", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max(max(kube_pod_container_resource_limits{origin_prometheus=~\"$origin_prometheus\",resource=\"cpu\", unit=\"core\",pod=~\"$Pod\",container =~\"$Container\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}) by (container,pod,node,namespace)) by(container)", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Pod CPU限制:{{ container}}", + "metric": "container_cpu", + "refId": "B", + "step": 10 + } + ], + "title": "Pod容器CPU核使用", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*限制/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 8, + "y": 59 + }, + "id": 84, + "options": { + "legend": { + "calcs": [ + "max", + "last", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max (container_memory_working_set_bytes{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",container =~\"$Container\",container !=\"\",container!=\"POD\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}) by (container, pod,node,namespace)", + "interval": "", + "intervalFactor": 1, + "legendFormat": "WSS:{{ pod }}", + "metric": "container_memory_usage:sort_desc", + "range": true, + "refId": "A", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(max(kube_pod_container_resource_limits{origin_prometheus=~\"$origin_prometheus\",resource=\"memory\", unit=\"byte\",pod=~\"$Pod\",container =~\"$Container\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}) by (container,pod,node,namespace)) by(container)", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Pod内存限制:{{ container}}", + "metric": "container_memory_usage:sort_desc", + "range": true, + "refId": "B", + "step": 10 + } + ], + "title": "Pod容器WSS内存使用(可关联节点)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 16, + "y": 59 + }, + "id": 83, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "max (container_memory_rss{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",container =~\"$Container\",container !=\"\",container!=\"POD\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}) by (container, pod,node,namespace)", + "interval": "", + "intervalFactor": 1, + "legendFormat": "RSS:{{ pod }}", + "metric": "container_memory_usage:sort_desc", + "range": true, + "refId": "A", + "step": 10 + } + ], + "title": "Pod容器RSS内存使用(可关联节点)", + "type": "timeseries" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 68 + }, + "id": 61, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "center", + "cellOptions": { + "type": "auto" + }, + "filterable": false, + "inspect": false + }, + "displayName": "", + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": ".*%" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "gauge", + "valueDisplayMode": "color" + } + }, + { + "id": "max", + "value": 1 + }, + { + "id": "min", + "value": 0 + }, + { + "id": "color", + "value": { + "mode": "continuous-GrYlRd" + } + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": ".*内存使用$|.*内存限制$|.*内存需求$|.*磁盘使用$|.*磁盘限制$" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "命名空间" + }, + "properties": [ + { + "id": "custom.width", + "value": 92 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "容器名" + }, + "properties": [ + { + "id": "custom.width", + "value": 187 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "总CPU核使用" + }, + "properties": [ + { + "id": "custom.width", + "value": 100 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Pod" + }, + "properties": [ + { + "id": "custom.width", + "value": 44 + }, + { + "id": "custom.cellOptions", + "value": { + "type": "color-background" + } + }, + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "平均CPU使用%" + }, + "properties": [ + { + "id": "custom.width", + "value": 116 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "平均RSS内存使用%" + }, + "properties": [ + { + "id": "custom.width", + "value": 141 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "平均WSS内存使用%" + }, + "properties": [ + { + "id": "custom.width", + "value": 165 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "总CPU限制" + }, + "properties": [ + { + "id": "custom.width", + "value": 86 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "总内存限制" + }, + "properties": [ + { + "id": "custom.width", + "value": 86 + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*限制$/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + }, + { + "id": "custom.cellOptions", + "value": { + "type": "color-background" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*内存使用$|.*核使用$/" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "type": "color-text" + } + }, + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "总RSS内存使用" + }, + "properties": [ + { + "id": "custom.width", + "value": 107 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "总WSS内存使用" + }, + "properties": [ + { + "id": "custom.width", + "value": 113 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "平均磁盘使用" + }, + "properties": [ + { + "id": "custom.width", + "value": 96 + }, + { + "id": "custom.cellOptions", + "value": { + "type": "color-background" + } + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "orange", + "value": 10737418240 + }, + { + "color": "red", + "value": 16106127360 + } + ] + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "平均磁盘限制" + }, + "properties": [ + { + "id": "custom.width", + "value": 96 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "总CPU需求" + }, + "properties": [ + { + "id": "custom.width", + "value": 80 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "总内存需求" + }, + "properties": [ + { + "id": "custom.width", + "value": 80 + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 3 + }, + "id": 87, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "enablePagination": true, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "平均WSS内存使用%" + } + ] + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(irate(container_cpu_usage_seconds_total{origin_prometheus=~\"$origin_prometheus\",container =~\"$Container\",container !=\"\",container!=\"POD\",namespace=~\"$NameSpace\"}[2m])) by (container) / (sum(container_spec_cpu_quota{origin_prometheus=~\"$origin_prometheus\",container =~\"$Container\",container !=\"\",container!=\"POD\",namespace=~\"$NameSpace\"}/100000) by (container))", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(irate(container_cpu_usage_seconds_total{origin_prometheus=~\"$origin_prometheus\",container =~\"$Container\",container !=\"\",container!=\"POD\",namespace=~\"$NameSpace\"}[2m])) by (container)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "总核数使用", + "refId": "L" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{origin_prometheus=~\"$origin_prometheus\",resource=\"cpu\", unit=\"core\",container =~\"$Container\",container !=\"\",container!=\"POD\",namespace=~\"$NameSpace\"}) by (container)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "sum(kube_pod_container_resource_limits{origin_prometheus=~\"$origin_prometheus\",resource=\"cpu\", unit=\"core\",container =~\"$Container\",container !=\"\",container!=\"POD\",namespace=~\"$NameSpace\"}) by (container)", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum (container_memory_working_set_bytes{origin_prometheus=~\"$origin_prometheus\",container =~\"$Container\",container !=\"\",container!=\"POD\",namespace=~\"$NameSpace\"}) by (container)/ sum(container_spec_memory_limit_bytes{origin_prometheus=~\"$origin_prometheus\",container =~\"$Container\",container !=\"\",container!=\"POD\",namespace=~\"$NameSpace\"}) by (container)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "I" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "sum (container_memory_working_set_bytes{origin_prometheus=~\"$origin_prometheus\",container =~\"$Container\",container !=\"\",container!=\"POD\",namespace=~\"$NameSpace\"}) by (container)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum (container_memory_rss{origin_prometheus=~\"$origin_prometheus\",container =~\"$Container\",container !=\"\",container!=\"POD\",namespace=~\"$NameSpace\"}) by (container)/ sum(container_spec_memory_limit_bytes{origin_prometheus=~\"$origin_prometheus\",container =~\"$Container\",container !=\"\",container!=\"POD\",namespace=~\"$NameSpace\"}) by (container)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "平均内存%(RSS)", + "refId": "H" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "sum (container_memory_rss{origin_prometheus=~\"$origin_prometheus\",container =~\"$Container\",container !=\"\",container!=\"POD\",namespace=~\"$NameSpace\"}) by (container)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "总内存使用(RSS) ", + "refId": "K" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by(container) (kube_pod_container_resource_requests{origin_prometheus=~\"$origin_prometheus\",resource=\"memory\", unit=\"byte\",container =~\"$Container\",container !=\"\",container!=\"POD\",namespace=~\"$NameSpace\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "sum(kube_pod_container_resource_limits{origin_prometheus=~\"$origin_prometheus\",resource=\"memory\", unit=\"byte\",container =~\"$Container\",container !=\"\",container!=\"POD\",namespace=~\"$NameSpace\"}) by (container)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "avg(container_fs_usage_bytes{origin_prometheus=~\"$origin_prometheus\",container =~\"$Container\",container !=\"\",container!=\"POD\",namespace=~\"$NameSpace\"}) by (container)", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "J" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(kube_pod_container_info{origin_prometheus=~\"$origin_prometheus\",container =~\"$Container\",container !=\"\",container!=\"POD\",namespace=~\"$NameSpace\"}) by(container,namespace)", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "G" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "avg(container_fs_limit_bytes{origin_prometheus=~\"$origin_prometheus\",container =~\"$Container\",container !=\"\",container!=\"POD\",namespace=~\"$NameSpace\"}) by (container)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "M" + } + ], + "title": "微服务(容器名)资源统计", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 10": true, + "Time 11": true, + "Time 12": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true, + "Time 6": true, + "Time 7": true, + "Time 8": true, + "Time 9": true + }, + "includeByName": {}, + "indexByName": { + "Time": 15, + "Value #A": 3, + "Value #B": 13, + "Value #C": 6, + "Value #D": 9, + "Value #E": 14, + "Value #F": 8, + "Value #G": 2, + "Value #H": 5, + "Value #I": 4, + "Value #J": 12, + "Value #K": 10, + "Value #L": 7, + "Value #M": 11, + "container": 1, + "namespace": 0 + }, + "renameByName": { + "Time 1": "", + "Value #A": "平均CPU使用%", + "Value #B": "总CPU需求", + "Value #C": "总CPU限制", + "Value #D": "总WSS内存使用", + "Value #E": "总内存需求", + "Value #F": "总内存限制", + "Value #G": "Pod", + "Value #H": "平均RSS内存使用%", + "Value #I": "平均WSS内存使用%", + "Value #J": "平均磁盘使用", + "Value #K": "总RSS内存使用", + "Value #L": "总CPU核使用", + "Value #M": "平均磁盘限制", + "container": "容器名", + "namespace": "命名空间" + } + } + }, + { + "id": "filterFieldsByName", + "options": {} + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 12 + }, + "id": 24, + "options": { + "legend": { + "calcs": [ + "max", + "last", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": "sum(irate(container_cpu_usage_seconds_total{origin_prometheus=~\"$origin_prometheus\",container =~\"$Container\",container !=\"\",container!=\"POD\",namespace=~\"$NameSpace\"}[2m])) by (container) / (sum(container_spec_cpu_quota{origin_prometheus=~\"$origin_prometheus\",container =~\"$Container\",container !=\"\",container!=\"POD\",namespace=~\"$NameSpace\"}/100000) by (container)) * 100", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ container}}", + "metric": "container_cpu", + "refId": "A", + "step": 10 + } + ], + "title": "微服务(容器名)平均CPU使用率(最大100%)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 12 + }, + "id": 89, + "options": { + "legend": { + "calcs": [ + "max", + "last", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "sum (container_memory_working_set_bytes{origin_prometheus=~\"$origin_prometheus\",container =~\"$Container\",container !=\"\",container!=\"POD\",namespace=~\"$NameSpace\"}) by (container)/ sum(container_spec_memory_limit_bytes{origin_prometheus=~\"$origin_prometheus\",container =~\"$Container\",container !=\"\",container!=\"POD\",namespace=~\"$NameSpace\"}) by (container) * 100", + "interval": "", + "intervalFactor": 1, + "legendFormat": "WSS:{{ container }}", + "metric": "container_memory_usage:sort_desc", + "refId": "A", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "sum (container_memory_rss{origin_prometheus=~\"$origin_prometheus\",container =~\"$Container\",container !=\"\",container!=\"POD\",namespace=~\"$NameSpace\"}) by (container)/ sum(container_spec_memory_limit_bytes{origin_prometheus=~\"$origin_prometheus\",container =~\"$Container\",container !=\"\",container!=\"POD\",namespace=~\"$NameSpace\"}) by (container) * 100", + "interval": "", + "intervalFactor": 1, + "legendFormat": "RSS:{{ container }}", + "metric": "container_memory_usage:sort_desc", + "refId": "B", + "step": 10 + } + ], + "title": "微服务(容器名)平均内存使用率", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binbps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 12 + }, + "id": 16, + "options": { + "legend": { + "calcs": [ + "mean", + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(sum(irate(container_network_receive_bytes_total{origin_prometheus=~\"$origin_prometheus\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}[2m])) by (pod)* on(pod) group_right kube_pod_container_info{origin_prometheus=~\"$origin_prometheus\",namespace=~\"$NameSpace\",container =~\"$Container\"}) by(container) *8", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "流入:{{ container }}", + "metric": "network", + "range": true, + "refId": "A", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(sum(irate(container_network_transmit_bytes_total{origin_prometheus=~\"$origin_prometheus\",node=~\"^$Node$\",namespace=~\"$NameSpace\"}[2m])) by (pod)* on(pod) group_right kube_pod_container_info{origin_prometheus=~\"$origin_prometheus\",namespace=~\"$NameSpace\",container =~\"$Container\"}) by(container) *8", + "interval": "", + "intervalFactor": 1, + "legendFormat": "流出:{{ container }}", + "metric": "network", + "range": true, + "refId": "B", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "sum (rate (container_network_receive_bytes_total{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",image!=\"\",name=~\"^k8s_.*\",node=~\"^$Node$\",namespace=~\"$NameSpace\",pod=~\".*$Container.*\"}[2m])) by (pod)", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "-> {{ pod }}", + "metric": "network", + "refId": "C", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "- sum (rate (container_network_transmit_bytes_total{origin_prometheus=~\"$origin_prometheus\",pod=~\"$Pod\",image!=\"\",name=~\"^k8s_.*\",node=~\"^$Node$\",namespace=~\"$NameSpace\",pod=~\".*$Container.*\"}[2m])) by (pod)", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "<- {{ pod }}", + "metric": "network", + "refId": "D", + "step": 10 + } + ], + "title": "微服务(容器名)每秒网络带宽 (可关联节点)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/CPU限制.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 19 + }, + "id": 91, + "options": { + "legend": { + "calcs": [ + "max", + "last", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(kube_pod_container_resource_limits{origin_prometheus=~\"$origin_prometheus\",resource=\"cpu\", unit=\"core\",container =~\"$Container\",container !=\"\",container!=\"POD\",namespace=~\"$NameSpace\"}) by (container)", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "CPU限制:{{ container}}", + "metric": "container_cpu", + "refId": "A", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(irate(container_cpu_usage_seconds_total{origin_prometheus=~\"$origin_prometheus\",container =~\"$Container\",container !=\"\",container!=\"POD\",namespace=~\"$NameSpace\"}[2m])) by (container)", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "CPU核使用:{{ container}}", + "metric": "container_cpu", + "refId": "B", + "step": 10 + } + ], + "title": "微服务(容器名)整体CPU使用核", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/内存限制.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 19 + }, + "id": 90, + "options": { + "legend": { + "calcs": [ + "max", + "last", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "expr": "sum (container_memory_working_set_bytes{origin_prometheus=~\"$origin_prometheus\",container =~\"$Container\",container !=\"\",container!=\"POD\",namespace=~\"$NameSpace\"}) by (container)", + "interval": "", + "intervalFactor": 1, + "legendFormat": "WSS:{{ container }}", + "metric": "container_memory_usage:sort_desc", + "range": true, + "refId": "A", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "expr": "sum(container_spec_memory_limit_bytes{origin_prometheus=~\"$origin_prometheus\",container =~\"$Container\",container !=\"\",container!=\"POD\",namespace=~\"$NameSpace\"}) by (container)", + "interval": "", + "intervalFactor": 1, + "legendFormat": "内存限制:{{ container }}", + "metric": "container_memory_usage:sort_desc", + "range": true, + "refId": "B", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "expr": "sum (container_memory_rss{origin_prometheus=~\"$origin_prometheus\",container =~\"$Container\",container !=\"\",container!=\"POD\",namespace=~\"$NameSpace\"}) by (container)", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "RSS:{{ container }}", + "metric": "container_memory_usage:sort_desc", + "range": true, + "refId": "C", + "step": 10 + } + ], + "title": "微服务(容器名)整体内存使用量", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 19 + }, + "id": 59, + "options": { + "legend": { + "calcs": [ + "max", + "last", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "expr": "count(kube_pod_container_info{origin_prometheus=~\"$origin_prometheus\",container =~\"$Container\",container !=\"\",container!=\"POD\",namespace=~\"$NameSpace\"}) by(container,namespace)", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{namespace}}:{{ container }}", + "metric": "container_memory_usage:sort_desc", + "range": true, + "refId": "A", + "step": 10 + } + ], + "title": "微服务(容器名)Pod数", + "type": "timeseries" + } + ], + "title": "微服务(容器名)资源总览:所选微服务:【$Container】", + "type": "row" + } + ], + "refresh": "", + "schemaVersion": 40, + "tags": [ + "StarsL.cn", + "Prometheus", + "Kubernetes" + ], + "templating": { + "list": [ + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "definition": "label_values(kube_node_info,origin_prometheus)", + "includeAll": false, + "label": "K8S", + "name": "origin_prometheus", + "options": [], + "query": { + "query": "label_values(kube_node_info,origin_prometheus)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "sort": 5, + "type": "query" + }, + { + "allValue": ".*", + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "definition": "label_values(kube_node_info{origin_prometheus=~\"$origin_prometheus\"},node)", + "includeAll": true, + "label": "节点", + "name": "Node", + "options": [], + "query": { + "query": "label_values(kube_node_info{origin_prometheus=~\"$origin_prometheus\"},node)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "sort": 5, + "type": "query" + }, + { + "allValue": ".*", + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "definition": "label_values(kube_namespace_created{origin_prometheus=~\"$origin_prometheus\"},namespace)", + "includeAll": true, + "label": "命名空间", + "name": "NameSpace", + "options": [], + "query": { + "query": "label_values(kube_namespace_created{origin_prometheus=~\"$origin_prometheus\"},namespace)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "sort": 5, + "type": "query" + }, + { + "allValue": ".*", + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "definition": "label_values(kube_pod_container_info{origin_prometheus=~\"$origin_prometheus\",namespace=~\"$NameSpace\"},container)", + "includeAll": true, + "label": "微服务(容器名)", + "name": "Container", + "options": [], + "query": { + "query": "label_values(kube_pod_container_info{origin_prometheus=~\"$origin_prometheus\",namespace=~\"$NameSpace\"},container)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "sort": 5, + "type": "query" + }, + { + "allValue": ".*", + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "definition": "label_values(kube_pod_container_info{origin_prometheus=~\"$origin_prometheus\",namespace=~\"$NameSpace\",container=~\"$Container\"},pod)", + "includeAll": true, + "label": "Pod", + "name": "Pod", + "options": [], + "query": { + "query": "label_values(kube_pod_container_info{origin_prometheus=~\"$origin_prometheus\",namespace=~\"$NameSpace\",container=~\"$Container\"},pod)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "sort": 5, + "type": "query" + } + ] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "30s", + "1m", + "5m", + "15m", + "30m", + "1h" + ] + }, + "timezone": "browser", + "title": "K8S Dashboard", + "uid": "StarsL_cn_K8S", + "version": 4, + "weekStart": "" +} \ No newline at end of file diff --git a/playbooks/roles/grafana-dashboard/Node-Exporter-Dashboard-202501015.json b/playbooks/roles/grafana-dashboard/Node-Exporter-Dashboard-202501015.json new file mode 100644 index 0000000..31a9be3 --- /dev/null +++ b/playbooks/roles/grafana-dashboard/Node-Exporter-Dashboard-202501015.json @@ -0,0 +1,5890 @@ +{ + "__inputs": [ + { + "name": "DS_指标", + "label": "指标", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "panel", + "id": "bargauge", + "name": "Bar gauge", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "11.5.0-80683" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "$$hashKey": "object:2875", + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "【中文版本】2024.05.22更新,优化重要指标展示。使用Grafana10新样式重建,新增健康评分概念,并新增了整体资源消耗信息的一些图表。包含整体资源展示与资源明细图表:CPU 内存 磁盘 IO 网络等监控指标。https://github.com/starsliao/TenSunS", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [ + { + "$$hashKey": "object:2300", + "icon": "bolt", + "tags": [], + "targetBlank": true, + "title": "Update", + "tooltip": "更多仪表板", + "type": "link", + "url": "https://grafana.com/orgs/starsliao/dashboards" + }, + { + "$$hashKey": "object:2301", + "icon": "question", + "tags": [], + "targetBlank": true, + "title": "GitHub", + "tooltip": "GITHUB:TenSunS", + "type": "link", + "url": "https://github.com/starsliao/TenSunS" + }, + { + "$$hashKey": "object:2302", + "asDropdown": true, + "icon": "external link", + "tags": [], + "targetBlank": true, + "title": "", + "type": "dashboards" + } + ], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 187, + "panels": [], + "title": "🏡资源总览:当前JOB【$origin_prometheus:$job】当前选中主机【$show_name】实例:$instance", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "description": "分区使用率、磁盘读取、磁盘写入、下载带宽、上传带宽,如果有多个网卡或者多个分区,是采集的使用率最高的网卡或者分区的数值。\n\n连接数:CurrEstab - 当前状态为 ESTABLISHED 或 CLOSE-WAIT 的 TCP 连接数。\n\n健康值是一个新增的指标,根据CPU,内存,IO计算出来的一个值,低于90分说明系统的资源使用情况需要注意了,这是一个正在测试的指标,参数可能需要根据实际情况再优化。", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "center", + "cellOptions": { + "type": "auto" + }, + "filterable": false, + "inspect": false + }, + "decimals": 1, + "mappings": [], + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "内存" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + }, + { + "id": "decimals" + }, + { + "id": "custom.width", + "value": 66 + }, + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + }, + { + "id": "custom.cellOptions", + "value": { + "type": "color-text" + } + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "启动(天)" + }, + "properties": [ + { + "id": "unit", + "value": "none" + }, + { + "id": "custom.width", + "value": 69 + }, + { + "id": "decimals" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "磁盘读取*" + }, + "properties": [ + { + "id": "unit", + "value": "binBps" + }, + { + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "color-background" + } + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 10485760 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 20485760 + } + ] + } + }, + { + "id": "custom.width", + "value": 93 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "磁盘写入*" + }, + "properties": [ + { + "id": "unit", + "value": "binBps" + }, + { + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "color-background" + } + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 10485760 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 20485760 + } + ] + } + }, + { + "id": "custom.width", + "value": 95 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "下载带宽*" + }, + "properties": [ + { + "id": "unit", + "value": "binbps" + }, + { + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "color-background" + } + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 30485760 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 104857600 + } + ] + } + }, + { + "id": "custom.width", + "value": 91 + }, + { + "id": "decimals" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "上传带宽*" + }, + "properties": [ + { + "id": "unit", + "value": "binbps" + }, + { + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "color-background" + } + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 30485760 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 104857600 + } + ] + } + }, + { + "id": "custom.width", + "value": 85 + }, + { + "id": "decimals" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "负载" + }, + "properties": [ + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.width", + "value": 63 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "连接数" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "color-background" + } + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 1000 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 1500 + } + ] + } + }, + { + "id": "custom.width", + "value": 59 + }, + { + "id": "decimals" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "TCP_tw" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "color-background" + } + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 5000 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 20000 + } + ] + } + }, + { + "id": "custom.width", + "value": 69 + }, + { + "id": "decimals" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "CPU" + }, + "properties": [ + { + "id": "custom.width", + "value": 63 + }, + { + "id": "decimals", + "value": 0 + }, + { + "id": "custom.cellOptions", + "value": { + "type": "color-text" + } + }, + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "IP" + }, + "properties": [ + { + "id": "custom.width", + "value": 111 + }, + { + "id": "custom.filterable", + "value": true + }, + { + "id": "custom.align", + "value": "left" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "主机名" + }, + "properties": [ + { + "id": "custom.filterable", + "value": true + }, + { + "id": "custom.width" + }, + { + "id": "custom.align", + "value": "left" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "健康值" + }, + "properties": [ + { + "id": "custom.width", + "value": 75 + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 80 + }, + { + "color": "green", + "value": 90 + } + ] + } + }, + { + "id": "color", + "value": { + "mode": "thresholds" + } + }, + { + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "color-background" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*使用率.*/" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + }, + { + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "gauge" + } + }, + { + "id": "color", + "value": { + "mode": "continuous-GrYlRd" + } + }, + { + "id": "custom.width", + "value": 110 + } + ] + } + ] + }, + "gridPos": { + "h": 11, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 198, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "enablePagination": true, + "fields": [ + "Value #B", + "Value #C", + "Value #L", + "Value #H", + "Value #I", + "Value #M", + "Value #N", + "Value #J", + "Value #K" + ], + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": false, + "displayName": "健康值" + } + ] + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "node_uname_info{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"} - 0", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "主机名", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(time() - node_boot_time_seconds{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"})by(instance)/86400", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "运行时间", + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "node_memory_MemTotal_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"} - 0", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "总内存", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(node_cpu_seconds_total{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",mode='system'}) by (instance)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "总核数", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "node_load5{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "5分钟负载", + "refId": "L" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "(1 - avg(irate(node_cpu_seconds_total{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",mode=\"idle\"}[$interval])) by (instance)) * 100", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "CPU使用率", + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "(1 - (node_memory_MemAvailable_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"} / (node_memory_MemTotal_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"})))* 100", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "内存使用率", + "refId": "G" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max((node_filesystem_size_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",fstype=~\"ext.?|xfs\"}-node_filesystem_free_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",fstype=~\"ext.?|xfs\"}) *100/(node_filesystem_avail_bytes {origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",fstype=~\"ext.?|xfs\"}+(node_filesystem_size_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",fstype=~\"ext.?|xfs\"}-node_filesystem_free_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",fstype=~\"ext.?|xfs\"})))by(instance)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "__auto", + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(irate(node_disk_read_bytes_total{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"}[$interval])) by (instance)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "最大读取", + "refId": "H" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(irate(node_disk_written_bytes_total{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"}[$interval])) by (instance)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "最大写入", + "refId": "I" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "node_netstat_Tcp_CurrEstab{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"} - 0", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "连接数", + "refId": "M" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "node_sockstat_TCP_tw{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"} - 0", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "TIME_WAIT", + "refId": "N" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(irate(node_network_receive_bytes_total{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"}[$interval])*8) by (instance)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "下载带宽", + "refId": "J" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(irate(node_network_transmit_bytes_total{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"}[$interval])*8) by (instance)", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "上传带宽", + "refId": "K" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "((1-(1 - avg(irate(node_cpu_seconds_total{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",mode=\"idle\"}[$interval])) by (instance))^1.3)^(1/3)*0.5 + \r\n(1-(1 - avg(node_memory_MemAvailable_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"} / node_memory_MemTotal_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"})by (instance))^6)^(1/3)*0.3 + \r\n(1 - max(irate(node_disk_io_time_seconds_total{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"}[$interval]))by (instance)^1.1)^(1/2)*0.2)*100", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "__auto", + "refId": "O" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(irate(node_disk_io_time_seconds_total{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"}[$interval])) by (instance) *100", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "IOutil使用率", + "refId": "P" + } + ], + "title": "服务器资源总览表【主机总数:$total】", + "transformations": [ + { + "id": "merge", + "options": { + "reducers": [] + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "exp": false, + "iid": false + }, + "includeByName": {}, + "indexByName": { + "Time": 20, + "Value #A": 36, + "Value #B": 7, + "Value #C": 8, + "Value #D": 4, + "Value #E": 13, + "Value #F": 10, + "Value #G": 11, + "Value #H": 14, + "Value #I": 15, + "Value #J": 18, + "Value #K": 19, + "Value #L": 9, + "Value #M": 16, + "Value #N": 17, + "Value #O": 6, + "Value #P": 12, + "__name__": 37, + "account": 21, + "cservice": 22, + "domainname": 23, + "exp": 5, + "group": 24, + "iaccount": 25, + "igroup": 26, + "iid": 3, + "iname": 27, + "instance": 2, + "job": 28, + "machine": 29, + "name": 1, + "nodename": 0, + "origin_prometheus": 30, + "region": 31, + "release": 32, + "sysname": 33, + "vendor": 34, + "version": 35 + }, + "renameByName": { + "Value #B": "内存", + "Value #C": "CPU", + "Value #D": "启动(天)", + "Value #E": "分区使用率*", + "Value #F": "CPU使用率", + "Value #G": "内存使用率", + "Value #H": "磁盘读取*", + "Value #I": "磁盘写入*", + "Value #J": "下载带宽*", + "Value #K": "上传带宽*", + "Value #L": "负载", + "Value #M": "连接数", + "Value #N": "TCP_tw", + "Value #O": "健康值", + "Value #P": "IOutil使用率*", + "exp": "到期日", + "iid": "实例ID", + "instance": "IP", + "name": "", + "nodename": "主机名" + } + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "主机名", + "IP", + "启动(天)", + "健康值", + "内存", + "CPU", + "负载", + "CPU使用率", + "内存使用率", + "IOutil使用率*", + "分区使用率*", + "磁盘读取*", + "磁盘写入*", + "连接数", + "TCP_tw", + "下载带宽*", + "上传带宽*" + ] + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "description": "- P99:数据集按升序排列,第99分位置大的数据。(即升序排列后排在99%位置的数据)\n- 该表格需要在Prometheus增加记录规则(参考看板下载页)\n- 增加记录规则1小时后才会有展示的数据。\n- 时间范围[7d:1h]表示要查看过去 7 天内每小时的数据点。", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "align": "center", + "cellOptions": { + "type": "color-text" + }, + "inspect": false + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*%/" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + }, + { + "id": "decimals", + "value": 1 + }, + { + "id": "custom.width", + "value": 72 + }, + { + "id": "color", + "value": { + "mode": "continuous-GrYlRd" + } + }, + { + "id": "custom.cellOptions", + "value": { + "type": "color-background" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "IP" + }, + "properties": [ + { + "id": "custom.width", + "value": 91 + }, + { + "id": "mappings", + "value": [ + { + "options": { + "pattern": "/(.+):.+/", + "result": { + "index": 0, + "text": "$1" + } + }, + "type": "regex" + } + ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "P99内存使用率" + }, + "properties": [ + { + "id": "custom.width", + "value": 79 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "P99CPU使用率" + }, + "properties": [ + { + "id": "custom.width", + "value": 101 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "名称" + }, + "properties": [ + { + "id": "custom.width", + "value": 128 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "主机名" + }, + "properties": [ + { + "id": "custom.width", + "value": 138 + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 12 + }, + "id": 200, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "P99内存%" + } + ] + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "node_uname_info{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"} - 0", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "主机名", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "quantile_over_time(0.99, cpu:usage:rate1m{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"}[7d:1h])", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "CPU使用率P99", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "quantile_over_time(0.99, mem:usage:rate1m{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"}[7d:1h])", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "内存使用率P99", + "refId": "C" + } + ], + "title": "最近7天P99资源使用率", + "transformations": [ + { + "id": "seriesToColumns", + "options": { + "byField": "instance" + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "pattern": "/^Value #[^A]|^instance$|^nodename$/" + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "includeByName": {}, + "indexByName": { + "Value #B": 2, + "Value #C": 3, + "instance": 1, + "nodename": 0 + }, + "renameByName": { + "Value #B": "P99CPU%", + "Value #C": "P99内存%", + "instance": "IP", + "name": "", + "name 1": "", + "nodename": "主机名" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "series", + "axisLabel": "总5分钟负载", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "平均%" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + }, + { + "id": "custom.axisLabel", + "value": "总平均使用率" + }, + { + "id": "custom.pointSize", + "value": 3 + }, + { + "id": "custom.lineWidth", + "value": 1 + }, + { + "id": "custom.showPoints", + "value": "always" + }, + { + "id": "custom.axisSoftMin" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "核数" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C4162A", + "mode": "fixed" + } + }, + { + "id": "custom.pointSize", + "value": 3 + }, + { + "id": "custom.drawStyle", + "value": "points" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5m负载" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 12 + }, + "id": 191, + "maxDataPoints": 100, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(node_load5{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "5m负载", + "range": true, + "refId": "A", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "count(node_cpu_seconds_total{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",mode='system'})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "核数", + "refId": "B", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "avg(1 - avg(irate(node_cpu_seconds_total{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",mode=\"idle\"}[$interval])) by (instance)) * 100", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "平均%", + "range": true, + "refId": "F", + "step": 240 + } + ], + "title": "整体总负载与整体平均CPU使用率", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "series", + "axisLabel": "总已用内存", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "总内存" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C4162A", + "mode": "fixed" + } + }, + { + "id": "custom.drawStyle", + "value": "points" + }, + { + "id": "custom.pointSize", + "value": 3 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "平均%" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + }, + { + "id": "decimals", + "value": 1 + }, + { + "id": "custom.axisLabel", + "value": "总平均使用率" + }, + { + "id": "custom.showPoints", + "value": "always" + }, + { + "id": "custom.lineWidth", + "value": 1 + }, + { + "id": "custom.pointSize", + "value": 3 + }, + { + "id": "custom.axisSoftMin" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "总已用" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 12 + }, + "id": 195, + "maxDataPoints": 100, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(node_memory_MemTotal_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"} - node_memory_MemAvailable_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "总已用", + "range": true, + "refId": "B", + "step": 4 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(node_memory_MemTotal_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "总内存", + "refId": "A", + "step": 4 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "(sum(node_memory_MemTotal_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"} - node_memory_MemAvailable_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"}) / sum(node_memory_MemTotal_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"}))*100", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "平均%", + "range": true, + "refId": "H" + } + ], + "title": "整体总内存与整体平均内存使用率", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "series", + "axisLabel": "总磁盘使用量", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "平均%" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.axisLabel", + "value": "总平均使用率" + }, + { + "id": "custom.lineWidth", + "value": 1 + }, + { + "id": "custom.showPoints", + "value": "always" + }, + { + "id": "custom.pointSize", + "value": 3 + }, + { + "id": "custom.axisSoftMin" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "总磁盘" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C4162A", + "mode": "fixed" + } + }, + { + "id": "custom.drawStyle", + "value": "points" + }, + { + "id": "custom.pointSize", + "value": 3 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "使用量" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 12 + }, + "id": 197, + "maxDataPoints": 100, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(avg(node_filesystem_size_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",fstype=~\"xfs|ext.*\"})by(device,instance)) - sum(avg(node_filesystem_free_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",fstype=~\"xfs|ext.*\"})by(device,instance))", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "使用量", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(avg(node_filesystem_size_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",fstype=~\"xfs|ext.*\"})by(device,instance))", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "总磁盘", + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "(sum(avg(node_filesystem_size_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",fstype=~\"xfs|ext.*\"})by(device,instance)) - sum(avg(node_filesystem_free_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",fstype=~\"xfs|ext.*\"})by(device,instance))) *100/(sum(avg(node_filesystem_avail_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",fstype=~\"xfs|ext.*\"})by(device,instance))+(sum(avg(node_filesystem_size_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",fstype=~\"xfs|ext.*\"})by(device,instance)) - sum(avg(node_filesystem_free_bytes{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\",fstype=~\"xfs|ext.*\"})by(device,instance))))", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "平均%", + "refId": "A" + } + ], + "title": "整体总磁盘与整体平均磁盘使用率", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 189, + "panels": [], + "title": "🧮资源明细:【$show_name】【$instance】", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "description": "本看板中的:磁盘总量、使用量、可用量、使用率保持和df命令的Size、Used、Avail、Use% 列的值一致,并且Use%的值会四舍五入保留一位小数,会更加准确。\n\n注:df中Use%算法为:(size - free) * 100 / (avail + (size - free)),结果是整除则为该值,非整除则为该值+1,结果的单位是%。\n参考df命令源码:", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "center", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "displayName": "", + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "分区" + }, + "properties": [ + { + "id": "custom.width", + "value": 81 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "剩余空间" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + }, + { + "id": "decimals" + }, + { + "id": "custom.cellOptions", + "value": { + "type": "color-text" + } + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "red" + }, + { + "color": "orange", + "value": 10000000000 + }, + { + "color": "green", + "value": 20000000000 + } + ] + } + }, + { + "id": "custom.width", + "value": 72 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "使用率" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + }, + { + "id": "decimals", + "value": 0 + }, + { + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "gauge" + } + }, + { + "id": "custom.width", + "value": 115 + }, + { + "id": "min", + "value": 0 + }, + { + "id": "max", + "value": 100 + }, + { + "id": "color", + "value": { + "mode": "continuous-GrYlRd" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "总空间" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + }, + { + "id": "custom.width", + "value": 75 + }, + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + }, + { + "id": "custom.cellOptions", + "value": { + "type": "color-text" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "类型" + }, + "properties": [ + { + "id": "custom.width", + "value": 51 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "设备名" + }, + "properties": [ + { + "id": "custom.width", + "value": 120 + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 20 + }, + "id": 181, + "links": [ + { + "targetBlank": true, + "title": "https://github.com/coreutils/coreutils/blob/master/src/df.c", + "url": "https://github.com/coreutils/coreutils/blob/master/src/df.c" + } + ], + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "使用率" + } + ] + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "node_filesystem_size_bytes{instance=~\"$instance\",fstype=~\"ext.*|xfs|nfs\",mountpoint !~\".*pod.*\"}-0", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "总量", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "node_filesystem_avail_bytes {instance=~\"$instance\",fstype=~\"ext.*|xfs|nfs\",mountpoint !~\".*pod.*\"}-0", + "format": "table", + "hide": false, + "instant": true, + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "(node_filesystem_size_bytes{instance=~\"$instance\",fstype=~\"ext.*|xfs|nfs\",mountpoint !~\".*pod.*\"}-node_filesystem_free_bytes{instance=~\"$instance\",fstype=~\"ext.*|xfs|nfs\",mountpoint !~\".*pod.*\"}) *100/(node_filesystem_avail_bytes {instance=~\"$instance\",fstype=~\"ext.*|xfs|nfs\",mountpoint !~\".*pod.*\"}+(node_filesystem_size_bytes{instance=~\"$instance\",fstype=~\"ext.*|xfs|nfs\",mountpoint !~\".*pod.*\"}-node_filesystem_free_bytes{instance=~\"$instance\",fstype=~\"ext.*|xfs|nfs\",mountpoint !~\".*pod.*\"}))", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "B" + } + ], + "title": "【$show_name】:分区可用空间(EXT.*/XFS/NFS)", + "transformations": [ + { + "id": "merge", + "options": { + "reducers": [] + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "device", + "fstype", + "mountpoint", + "Value #C", + "Value #A", + "Value #B" + ] + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "Value #A": "剩余空间", + "Value #B": "使用率", + "Value #C": "总空间", + "device": "设备名", + "fstype": "类型", + "mountpoint": "分区" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [ + { + "options": { + "0": { + "text": "N/A" + } + }, + "type": "value" + } + ], + "max": 100, + "min": 0.1, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 70 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 8, + "y": 20 + }, + "id": 177, + "options": { + "displayMode": "lcd", + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "maxVizHeight": 300, + "minVizHeight": 45, + "minVizWidth": 0, + "namePlacement": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "100 - (avg(irate(node_cpu_seconds_total{instance=~\"$instance\",mode=\"idle\"}[$interval])) * 100)", + "instant": true, + "interval": "", + "legendFormat": "总CPU使用率", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "avg(irate(node_cpu_seconds_total{instance=~\"$instance\",mode=\"iowait\"}[$interval])) * 100", + "hide": true, + "instant": true, + "interval": "", + "legendFormat": "IOwait使用率", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "(1 - (node_memory_MemAvailable_bytes{instance=~\"$instance\"} / (node_memory_MemTotal_bytes{instance=~\"$instance\"})))* 100", + "instant": true, + "interval": "", + "legendFormat": "内存使用率", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "(node_filesystem_size_bytes{instance=~\"$instance\",fstype=~\"ext.*|xfs\",mountpoint=\"$maxmount\"}-node_filesystem_free_bytes{instance=~\"$instance\",fstype=~\"ext.*|xfs\",mountpoint=\"$maxmount\"})*100 /(node_filesystem_avail_bytes {instance=~\"$instance\",fstype=~\"ext.*|xfs\",mountpoint=\"$maxmount\"}+(node_filesystem_size_bytes{instance=~\"$instance\",fstype=~\"ext.*|xfs\",mountpoint=\"$maxmount\"}-node_filesystem_free_bytes{instance=~\"$instance\",fstype=~\"ext.*|xfs\",mountpoint=\"$maxmount\"}))", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "最大分区使用率({{mountpoint}})", + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "(1 - ((node_memory_SwapFree_bytes{instance=~\"$instance\"} + 1)/ (node_memory_SwapTotal_bytes{instance=~\"$instance\"} + 1))) * 100", + "instant": true, + "interval": "", + "legendFormat": "交换分区使用率", + "refId": "F" + } + ], + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "运行时间" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "red" + }, + { + "color": "orange", + "value": 3600 + }, + { + "color": "green", + "value": 7200 + } + ] + } + }, + { + "id": "unit", + "value": "s" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "总内存" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + }, + { + "id": "decimals", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "CPU iowait" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + }, + { + "id": "thresholds", + "value": { + "mode": "percentage", + "steps": [ + { + "color": "green" + }, + { + "color": "orange", + "value": 40 + }, + { + "color": "red", + "value": 60 + } + ] + } + }, + { + "id": "decimals", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "总文件描述符" + }, + "properties": [ + { + "id": "unit", + "value": "short" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "red" + }, + { + "color": "orange", + "value": 50000 + }, + { + "color": "green", + "value": 200000 + } + ] + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "最大打开文件" + }, + "properties": [ + { + "id": "unit", + "value": "none" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "red" + }, + { + "color": "orange", + "value": 10000 + }, + { + "color": "green", + "value": 50000 + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 12, + "y": 20 + }, + "id": 206, + "interval": "15s", + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": { + "valueSize": 15 + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "alias": "", + "bucketAggs": [ + { + "id": "2", + "settings": { + "interval": "auto" + }, + "type": "date_histogram" + } + ], + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "avg(time() - node_boot_time_seconds{instance=~\"$instance\"})", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "运行时间", + "metrics": [ + { + "id": "1", + "type": "count" + } + ], + "query": "", + "refId": "C", + "timeField": "@timestamp" + }, + { + "alias": "", + "bucketAggs": [ + { + "id": "2", + "settings": { + "interval": "auto" + }, + "type": "date_histogram" + } + ], + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "count(node_cpu_seconds_total{instance=~\"$instance\", mode='system'})", + "instant": true, + "interval": "", + "legendFormat": "CPU 核数", + "metrics": [ + { + "id": "1", + "type": "count" + } + ], + "query": "", + "refId": "A", + "timeField": "@timestamp" + }, + { + "alias": "", + "bucketAggs": [ + { + "id": "2", + "settings": { + "interval": "auto" + }, + "type": "date_histogram" + } + ], + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "sum(node_memory_MemTotal_bytes{instance=~\"$instance\"})", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "总内存", + "metrics": [ + { + "id": "1", + "type": "count" + } + ], + "query": "", + "refId": "B", + "timeField": "@timestamp" + }, + { + "alias": "", + "bucketAggs": [ + { + "id": "2", + "settings": { + "interval": "auto" + }, + "type": "date_histogram" + } + ], + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "avg(irate(node_cpu_seconds_total{instance=~\"$instance\",mode=\"iowait\"}[$interval])) * 100", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "CPU iowait", + "metrics": [ + { + "id": "1", + "type": "count" + } + ], + "query": "", + "refId": "D", + "timeField": "@timestamp" + }, + { + "alias": "", + "bucketAggs": [ + { + "id": "2", + "settings": { + "interval": "auto" + }, + "type": "date_histogram" + } + ], + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": false, + "expr": "node_filefd_maximum{instance=~\"$instance\"}", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "总文件描述符", + "metrics": [ + { + "id": "1", + "type": "count" + } + ], + "query": "", + "refId": "E", + "timeField": "@timestamp" + }, + { + "alias": "", + "bucketAggs": [ + { + "id": "2", + "settings": { + "interval": "auto" + }, + "type": "date_histogram" + } + ], + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "process_max_fds{job=\"node_exporter\",instance=~\"$instance\"}", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "最大打开文件", + "metrics": [ + { + "id": "1", + "type": "count" + } + ], + "query": "", + "refId": "F", + "timeField": "@timestamp" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "bars", + "fillOpacity": 60, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "cn-shenzhen.i-wz9cq1dcb6zwc39ehw59_cni0_in" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cn-shenzhen.i-wz9cq1dcb6zwc39ehw59_cni0_in下载" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cn-shenzhen.i-wz9cq1dcb6zwc39ehw59_cni0_out上传" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cn-shenzhen.i-wz9cq1dcb6zwc39ehw59_eth0_in下载" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cn-shenzhen.i-wz9cq1dcb6zwc39ehw59_eth0_out" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cn-shenzhen.i-wz9cq1dcb6zwc39ehw59_eth0_out上传" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 20 + }, + "id": 183, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": "increase(node_network_receive_bytes_total{instance=~\"$instance\",device=~\"$device\"}[1m])", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{device}}_in下载", + "metric": "", + "refId": "A", + "step": 600, + "target": "" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": "increase(node_network_transmit_bytes_total{instance=~\"$instance\",device=~\"$device\"}[1m])", + "hide": false, + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "{{device}}_out上传", + "refId": "B", + "step": 600 + } + ], + "title": "每分钟流量$device", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "series", + "axisLabel": "总使用率", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*总使用率/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-red", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineWidth", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "磁盘IO使用率" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "custom.axisLabel", + "value": "磁盘IO使用率" + }, + { + "id": "color", + "value": { + "fixedColor": "#0ad4ff", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "用户使用率" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "系统使用率" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 26 + }, + "id": 207, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "(1 - avg(irate(node_cpu_seconds_total{instance=~\"$instance\",mode=\"idle\"}[$interval])) by (instance))*100", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "总使用率", + "refId": "F", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "expr": "avg(irate(node_cpu_seconds_total{instance=~\"$instance\",mode=\"system\"}[$interval])) by (instance) *100", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "系统使用率", + "refId": "A", + "step": 20 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "avg(irate(node_cpu_seconds_total{instance=~\"$instance\",mode=\"user\"}[$interval])) by (instance) *100", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "用户使用率", + "refId": "B", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "avg(irate(node_cpu_seconds_total{instance=~\"$instance\",mode=\"iowait\"}[$interval])) by (instance) *100", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "磁盘IO使用率", + "refId": "D", + "step": 240 + } + ], + "title": "CPU使用率", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "series", + "axisLabel": "剩余内存", + "axisPlacement": "left", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "可用" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#9ac48a", + "mode": "fixed" + } + }, + { + "id": "custom.lineWidth", + "value": 2 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "总内存" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C4162A", + "mode": "fixed" + } + }, + { + "id": "custom.lineWidth", + "value": 1 + }, + { + "id": "custom.pointSize", + "value": 3 + }, + { + "id": "custom.showPoints", + "value": "always" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "使用率" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#00d1ff", + "mode": "fixed" + } + }, + { + "id": "custom.lineWidth", + "value": 1 + }, + { + "id": "unit", + "value": "percent" + }, + { + "id": "custom.axisLabel", + "value": "内存使用率" + }, + { + "id": "custom.pointSize", + "value": 3 + }, + { + "id": "custom.showPoints", + "value": "always" + }, + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "custom.axisSoftMin" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "已用" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 26 + }, + "id": 156, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "node_memory_MemAvailable_bytes{instance=~\"$instance\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "可用", + "refId": "F", + "step": 4 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": "node_memory_MemTotal_bytes{instance=~\"$instance\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "2m", + "intervalFactor": 1, + "legendFormat": "总内存", + "refId": "A", + "step": 4 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "node_memory_MemTotal_bytes{instance=~\"$instance\"} - node_memory_MemAvailable_bytes{instance=~\"$instance\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "已用", + "refId": "B", + "step": 4 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": "(1 - (node_memory_MemAvailable_bytes{instance=~\"$instance\"} / (node_memory_MemTotal_bytes{instance=~\"$instance\"})))* 100", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "使用率", + "refId": "H" + } + ], + "title": "内存信息", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "series", + "axisLabel": "容量使用率", + "axisPlacement": "auto", + "axisSoftMax": 100, + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/Inodes.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#00d1ff", + "mode": "fixed" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.showPoints", + "value": "always" + }, + { + "id": "custom.pointSize", + "value": 3 + }, + { + "id": "custom.axisLabel", + "value": "Inodes使用率" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 26 + }, + "id": 174, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "(node_filesystem_size_bytes{instance=~\"$instance\",fstype=~\"ext.*|xfs|nfs\",mountpoint !~\".*pod.*\"}-node_filesystem_free_bytes{instance=~\"$instance\",fstype=~\"ext.*|xfs|nfs\",mountpoint !~\".*pod.*\"}) *100/(node_filesystem_avail_bytes {instance=~\"$instance\",fstype=~\"ext.*|xfs|nfs\",mountpoint !~\".*pod.*\"}+(node_filesystem_size_bytes{instance=~\"$instance\",fstype=~\"ext.*|xfs|nfs\",mountpoint !~\".*pod.*\"}-node_filesystem_free_bytes{instance=~\"$instance\",fstype=~\"ext.*|xfs|nfs\",mountpoint !~\".*pod.*\"}))", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "容量%:{{mountpoint}}", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "(1 - node_filesystem_files_free{instance=~\"$instance\",fstype=~\"ext.?|xfs|nfs\",mountpoint !~\".*pod.*\"} / node_filesystem_files{instance=~\"$instance\",fstype=~\"ext.?|xfs|nfs\",mountpoint !~\".*pod.*\"}) * 100", + "hide": false, + "interval": "", + "legendFormat": "Inodes%:{{mountpoint}}", + "range": true, + "refId": "B" + } + ], + "title": "磁盘使用率", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "series", + "axisLabel": "1分钟负载", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "15分钟负载" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "1分钟负载" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5分钟负载" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*总核数/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C4162A", + "mode": "fixed" + } + }, + { + "id": "custom.lineWidth", + "value": 1 + }, + { + "id": "custom.pointSize", + "value": 4 + }, + { + "id": "custom.showPoints", + "value": "always" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 34 + }, + "id": 13, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": "node_load1{instance=~\"$instance\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "1分钟负载", + "metric": "", + "refId": "A", + "step": 20, + "target": "" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": " sum(count(node_cpu_seconds_total{instance=~\"$instance\", mode='system'}) by (cpu,instance)) by(instance)", + "format": "time_series", + "instant": false, + "interval": "2m", + "intervalFactor": 1, + "legendFormat": "CPU总核数", + "refId": "D", + "step": 20 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "node_load5{instance=~\"$instance\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "5分钟负载", + "refId": "B", + "step": 20 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "node_load15{instance=~\"$instance\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "15分钟负载", + "refId": "C", + "step": 20 + } + ], + "title": "系统平均负载", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "等待IO完成阻塞的进程" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "运行态的进程" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + }, + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsNull", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 34 + }, + "id": 202, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "calculatedInterval": "2m", + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "datasourceErrors": {}, + "errors": {}, + "exemplar": true, + "expr": "node_procs_running{instance=~\"$instance\"}", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "运行态的进程", + "metric": "", + "prometheusLink": "/api/datasources/proxy/1/graph#%5B%7B%22expr%22%3A%22node_procs_running%7Binstance%3D%5C%22%24host%5C%22%7D%22%2C%22range_input%22%3A%2243200s%22%2C%22end_input%22%3A%222015-9-18%2013%3A46%22%2C%22step_input%22%3A%22%22%2C%22stacked%22%3Atrue%2C%22tab%22%3A0%7D%5D", + "refId": "A", + "step": 5, + "target": "" + }, + { + "calculatedInterval": "2m", + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "datasourceErrors": {}, + "errors": {}, + "exemplar": true, + "expr": "node_procs_blocked{instance=~\"$instance\"}", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "等待IO完成阻塞的进程", + "metric": "", + "prometheusLink": "/api/datasources/proxy/1/graph#%5B%7B%22expr%22%3A%22node_procs_blocked%7Binstance%3D%5C%22%24host%5C%22%7D%22%2C%22range_input%22%3A%2243200s%22%2C%22end_input%22%3A%222015-9-18%2013%3A46%22%2C%22step_input%22%3A%22%22%2C%22stacked%22%3Atrue%2C%22tab%22%3A0%7D%5D", + "refId": "B", + "step": 5, + "target": "" + } + ], + "title": "进程运行状态", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "series", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "总使用FD" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "总使用FD占比" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "每秒上下文切换" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*占比/" + }, + "properties": [ + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "unit", + "value": "percent" + }, + { + "id": "custom.showPoints", + "value": "always" + }, + { + "id": "custom.pointSize", + "value": 3 + }, + { + "id": "custom.axisSoftMax", + "value": 100 + }, + { + "id": "custom.axisSoftMin", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "进程使用FD占比" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "dark-orange", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 34 + }, + "id": 16, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "irate(node_context_switches_total{instance=~\"$instance\"}[$interval])", + "interval": "", + "intervalFactor": 1, + "legendFormat": "每秒上下文切换", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "node_filefd_allocated{instance=~\"$instance\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "总使用FD", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "(node_filefd_allocated{instance=~\"$instance\"}/node_filefd_maximum{instance=~\"$instance\"}) *100", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "总使用FD占比", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "(process_open_fds{instance=~\"$instance\"}/process_max_fds{instance=~\"$instance\"}) *100", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "进程使用FD占比", + "refId": "D" + } + ], + "title": "文件描述符(FD)/每秒上下文切换", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "description": "Read time seconds 每个磁盘分区读操作花费的秒数\n\nWrite time seconds 每个磁盘分区写操作花费的秒数\n\nIO time seconds 每个磁盘分区输入/输出操作花费的秒数\n\nIO time weighted seconds每个磁盘分区输入/输出操作花费的加权秒数", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "读取(-)/写入(+)", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "vda" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/,*_读取$/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + }, + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + }, + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsNull", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 42 + }, + "id": 160, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "irate(node_disk_read_time_seconds_total{instance=~\"$instance\"}[$interval]) / irate(node_disk_reads_completed_total{instance=~\"$instance\"}[$interval])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}_读取", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "irate(node_disk_write_time_seconds_total{instance=~\"$instance\"}[$interval]) / irate(node_disk_writes_completed_total{instance=~\"$instance\"}[$interval])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}_写入", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "irate(node_disk_io_time_seconds_total{instance=~\"$instance\"}[$interval])", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}", + "refId": "A", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "irate(node_disk_io_time_weighted_seconds_total{instance=~\"$instance\"}[$interval])", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}_加权", + "refId": "D" + } + ], + "title": "每次IO读写的耗时(参考:小于100ms)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "description": "Reads completed: 每个磁盘分区每秒读完成次数\n\nWrites completed: 每个磁盘分区每秒写完成次数\n\nIO now 每个磁盘分区每秒正在处理的输入/输出请求数", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "读取(-)/写入(+)I/O ops/sec", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "vda_write" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_读取$/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + }, + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + }, + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsNull", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 42 + }, + "id": 161, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "irate(node_disk_reads_completed_total{instance=~\"$instance\"}[$interval])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}_读取", + "refId": "A", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "irate(node_disk_writes_completed_total{instance=~\"$instance\"}[$interval])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}_写入", + "refId": "B", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "node_disk_io_now{instance=~\"$instance\"}", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}", + "refId": "C" + } + ], + "title": "磁盘读写速率(IOPS)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "description": "Read bytes 每个磁盘分区每秒读取的比特数\nWritten bytes 每个磁盘分区每秒写入的比特数", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "读取(-)/写入(+)", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "vda_write" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_读取$/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + }, + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + }, + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsNull", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 42 + }, + "id": 168, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "irate(node_disk_read_bytes_total{instance=~\"$instance\"}[$interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}_读取", + "refId": "A", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "irate(node_disk_written_bytes_total{instance=~\"$instance\"}[$interval])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}_写入", + "refId": "B", + "step": 10 + } + ], + "title": "每秒磁盘读写容量", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "description": "每一秒钟的自然时间内,花费在I/O上的耗时。(wall-clock time)\n\nnode_disk_io_time_seconds_total:\n磁盘花费在输入/输出操作上的秒数。该值为累加值。(Milliseconds Spent Doing I/Os)\n\nirate(node_disk_io_time_seconds_total[1m]):\n计算每秒的速率:(last值-last前一个值)/时间戳差值,即:1秒钟内磁盘花费在I/O操作的时间占比。", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Idle - Waiting for something to happen" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#052B51", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "guest" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#9AC48A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "idle" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#052B51", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "iowait" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "irq" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BF1B00", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "nice" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C15C17", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "sdb_每秒I/O操作%" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#d683ce", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "softirq" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "steal" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FCE2DE", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "system" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#508642", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "user" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#5195CE", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "磁盘花费在I/O操作占比" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#ba43a9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + }, + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsNull", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 42 + }, + "id": 175, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": "irate(node_disk_io_time_seconds_total{instance=~\"$instance\"}[$interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}_每秒I/O操作%", + "refId": "C" + } + ], + "title": "每1秒内I/O操作耗时占比(I/O Util)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "description": "Sockets_used - 已使用的所有协议套接字总量\n\nCurrEstab - 当前状态为 ESTABLISHED 或 CLOSE-WAIT 的 TCP 连接数\n\nTCP_alloc - 已分配(已建立、已申请到sk_buff)的TCP套接字数量\n\nTCP_tw - 等待关闭的TCP连接数\n\nUDP_inuse - 正在使用的 UDP 套接字数量\n\nRetransSegs - TCP 重传报文数\n\nOutSegs - TCP 发送的报文数\n\nInSegs - TCP 接收的报文数", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "series", + "axisLabel": "CurrEstab", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "TCP_alloc" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*Sockets_used/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.lineWidth", + "value": 1 + }, + { + "id": "custom.axisLabel", + "value": "已使用的所有协议套接字总量" + }, + { + "id": "custom.showPoints", + "value": "always" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 16, + "x": 0, + "y": 50 + }, + "id": 158, + "options": { + "legend": { + "calcs": [ + "last", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "node_netstat_Tcp_CurrEstab{instance=~\"$instance\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "CurrEstab", + "refId": "A", + "step": 20 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "node_sockstat_TCP_tw{instance=~\"$instance\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "TCP_tw", + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": "node_sockstat_sockets_used{instance=~\"$instance\"}", + "hide": false, + "interval": "2m", + "intervalFactor": 1, + "legendFormat": "Sockets_used", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "node_sockstat_UDP_inuse{instance=~\"$instance\"}", + "interval": "", + "legendFormat": "UDP_inuse", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "node_sockstat_TCP_alloc{instance=~\"$instance\"}", + "interval": "", + "legendFormat": "TCP_alloc", + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "irate(node_netstat_Tcp_PassiveOpens{instance=~\"$instance\"}[$interval])", + "hide": true, + "interval": "", + "legendFormat": "{{instance}}_Tcp_PassiveOpens", + "refId": "G" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "irate(node_netstat_Tcp_ActiveOpens{instance=~\"$instance\"}[$interval])", + "hide": true, + "interval": "", + "legendFormat": "{{instance}}_Tcp_ActiveOpens", + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "irate(node_netstat_Tcp_InSegs{instance=~\"$instance\"}[$interval])", + "interval": "", + "legendFormat": "Tcp_InSegs", + "refId": "H" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "irate(node_netstat_Tcp_OutSegs{instance=~\"$instance\"}[$interval])", + "interval": "", + "legendFormat": "Tcp_OutSegs", + "refId": "I" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "irate(node_netstat_Tcp_RetransSegs{instance=~\"$instance\"}[$interval])", + "hide": false, + "interval": "", + "legendFormat": "Tcp_RetransSegs", + "refId": "J" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "irate(node_netstat_TcpExt_ListenDrops{instance=~\"$instance\"}[$interval])", + "hide": true, + "interval": "", + "legendFormat": "", + "refId": "K" + } + ], + "title": "网络Socket连接信息", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "上传(-)/下载(+)", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bps" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*_out上传$/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + }, + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + }, + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsNull", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_in下载$/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 50 + }, + "id": 157, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "11.5.0-80683", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "exemplar": true, + "expr": "irate(node_network_receive_bytes_total{instance=~\"$instance\",device=~\"$device\"}[$interval])*8", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}_in下载", + "refId": "A", + "step": 4 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "expr": "irate(node_network_transmit_bytes_total{instance=~\"$instance\",device=~\"$device\"}[$interval])*8", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}_out上传", + "refId": "B", + "step": 4 + } + ], + "title": "每秒网络带宽使用$device", + "type": "timeseries" + } + ], + "refresh": "", + "schemaVersion": 40, + "tags": [ + "Prometheus", + "node_exporter", + "StarsL.cn", + "TenSunS" + ], + "templating": { + "list": [ + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "definition": "label_values(origin_prometheus)", + "includeAll": false, + "label": "数据源", + "name": "origin_prometheus", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(origin_prometheus)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "sort": 5, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "definition": "label_values(node_uname_info{origin_prometheus=~\"$origin_prometheus\"},job)", + "includeAll": false, + "label": "JOB", + "name": "job", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(node_uname_info{origin_prometheus=~\"$origin_prometheus\"},job)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "sort": 5, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "definition": "label_values(node_uname_info{origin_prometheus=~\"$origin_prometheus\", job=~\"$job\"},nodename)", + "includeAll": true, + "label": "名称", + "name": "name", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(node_uname_info{origin_prometheus=~\"$origin_prometheus\", job=~\"$job\"},nodename)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "sort": 5, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "definition": "label_values(node_uname_info{origin_prometheus=~\"$origin_prometheus\", job=~\"$job\", nodename=~\"$name\"},instance)", + "includeAll": false, + "label": "IP", + "name": "instance", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(node_uname_info{origin_prometheus=~\"$origin_prometheus\", job=~\"$job\", nodename=~\"$name\"},instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "sort": 5, + "type": "query" + }, + { + "auto": false, + "auto_count": 100, + "auto_min": "1m", + "current": { + "text": "3m", + "value": "3m" + }, + "hide": 2, + "label": "间隔", + "name": "interval", + "options": [ + { + "selected": true, + "text": "3m", + "value": "3m" + } + ], + "query": "3m", + "refresh": 2, + "type": "interval" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "definition": "query_result(count(node_uname_info{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"}))", + "hide": 2, + "includeAll": false, + "label": "主机数", + "name": "total", + "options": [], + "query": { + "qryType": 3, + "query": "query_result(count(node_uname_info{origin_prometheus=~\"$origin_prometheus\",job=~\"$job\"}))", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "/{} (.*) .*/", + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "definition": "label_values(node_network_info{origin_prometheus=~\"$origin_prometheus\", job=~\"$job\", instance=~\"$instance\", device!~\"'tap.*|veth.*|br.*|docker.*|virbr.*|lo.*|cni.*'\"},device)", + "includeAll": true, + "label": "网卡", + "multi": true, + "name": "device", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(node_network_info{origin_prometheus=~\"$origin_prometheus\", job=~\"$job\", instance=~\"$instance\", device!~\"'tap.*|veth.*|br.*|docker.*|virbr.*|lo.*|cni.*'\"},device)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "sort": 5, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "definition": "query_result(topk(1,sort_desc(max(node_filesystem_size_bytes{origin_prometheus=~\"$origin_prometheus\",instance=~\"$instance\",fstype=~\"ext.?|xfs\",mountpoint!~\".*pods.*\"}) by (mountpoint))))", + "hide": 2, + "includeAll": false, + "label": "最大挂载目录", + "name": "maxmount", + "options": [], + "query": { + "qryType": 3, + "query": "query_result(topk(1,sort_desc(max(node_filesystem_size_bytes{origin_prometheus=~\"$origin_prometheus\",instance=~\"$instance\",fstype=~\"ext.?|xfs\",mountpoint!~\".*pods.*\"}) by (mountpoint))))", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "/.*\\\"(.*)\\\".*/", + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_指标}" + }, + "definition": "label_values(node_uname_info{origin_prometheus=~\"$origin_prometheus\", job=~\"$job\", nodename=~\"$name\", instance=~\"$instance\"},nodename)", + "hide": 2, + "includeAll": false, + "label": "展示使用的名称", + "name": "show_name", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(node_uname_info{origin_prometheus=~\"$origin_prometheus\", job=~\"$job\", nodename=~\"$name\", instance=~\"$instance\"},nodename)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "sort": 5, + "type": "query" + } + ] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "30s", + "1m", + "3m", + "5m", + "15m", + "30m" + ] + }, + "timezone": "browser", + "title": "Node Exporter Dashboard", + "uid": "StarsL-JOB-node", + "version": 6, + "weekStart": "" +} \ No newline at end of file diff --git a/playbooks/roles/vhosts/alerting/files/setup-observable-server.sh b/playbooks/roles/vhosts/alerting/files/setup-observable-server.sh new file mode 100644 index 0000000..848ebe0 --- /dev/null +++ b/playbooks/roles/vhosts/alerting/files/setup-observable-server.sh @@ -0,0 +1,102 @@ +#!/bin/bash + +export domain=$1 +export secret=$2 +export namespace=$3 +export mysql_db_password=$4 + +kubectl label nodes k3s-server prometheus=true --overwrite + +cat > values.yaml << EOF +deepflow: + enabled: true + clickhouse: + enabled: false + mysql: + enabled: false + grafana: + enabled: true + ingress: + enabled: true + ingressClassName: nginx + hosts: + - grafana.${domain} + tls: + - secretName: ${secret} + hosts: + - grafana.${domain} + global: + externalClickHouse: + enabled: true + type: ep + clusterName: default + storagePolicy: default + username: default + password: '' + hosts: + - ip: 10.1.2.3 + port: 9000 + - ip: 10.1.2.4 + port: 9000 + - ip: 10.1.2.5 + port: 9000 + externalMySQL: + enabled: true + ip: mysql.database.svc.cluster.local + port: 3306 + username: root + password: {{ mysql_db_password }} +prometheus: + enabled: true + alertmanager: + enabled: false + prometheus-pushgateway: + enabled: false + kube-state-metrics: + enabled: false + server: + ingress: + ingressClassName: nginx + hosts: + - prometheus.${domain} + tls: + - secretName: ${secret} + hosts: + - prometheus.${domain} + alertmanagers: + - static_configs: + - targets: + - alertmanager.${domain} + serverFiles: + prometheus.yml: + rule_files: + - /etc/config/recording_rules.yml + - /etc/config/alerting_rules.yml +alertmanager: + configmapReload: + enabled: false + config: + global: + resolve_timeout: 5m + smtp_smarthost: 'smtp.qq.com:465' + smtp_from: '11111111@qq.com' + smtp_auth_username: '11111111@qq.com' + smtp_auth_password: '123456' + smtp_require_tls: false + templates: + - '/etc/alertmanager/*.tmpl' + receivers: + - name: 'default-receiver' + email_configs: + - to: '{{ template "email.to" . }}' + html: '{{ template "email.to.html" . }}' + route: + group_wait: 10s + group_interval: 5m + receiver: default-receiver + repeat_interval: 1h +EOF + +helm repo add stable https://artifact.onwalk.net/chartrepo/public/ || echo true +helm repo update +helm upgrade --install observable-server stable/observableserver -n ${namspace} -f values.yaml diff --git a/playbooks/roles/vhosts/alerting/meta/main.yml b/playbooks/roles/vhosts/alerting/meta/main.yml new file mode 100644 index 0000000..83cef7b --- /dev/null +++ b/playbooks/roles/vhosts/alerting/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: cert-manager diff --git a/playbooks/roles/vhosts/alerting/tasks/main.yml b/playbooks/roles/vhosts/alerting/tasks/main.yml new file mode 100755 index 0000000..cd7f06e --- /dev/null +++ b/playbooks/roles/vhosts/alerting/tasks/main.yml @@ -0,0 +1,17 @@ +roles/alerting/tasks/main.yml- name: get db password + shell: 'kubectl get secret --namespace database postgresql -o jsonpath="{.data.postgres-password}" | base64 -d' + register: command_raw + when: inventory_hostname in groups[group][0] + +- name: set fact join command + set_fact: + mysql_db_password : "{{ command_raw.stdout_lines[0] }}" + when: inventory_hostname in groups[group][0] + +- name: Setup OpenLdap Server + script: files/setup-observable-server.sh {{ domain }} {{ secret }} {{ namespace }} {{ mysql_db_password }} + when: inventory_hostname in groups[group] + +- name: Check alerting rules config + shell: promtool check rules /path/to/example.rules.yml + when: inventory_hostname in groups[group] diff --git a/playbooks/roles/vhosts/alerting/templates/alerting_rules b/playbooks/roles/vhosts/alerting/templates/alerting_rules new file mode 100644 index 0000000..6b8a01e --- /dev/null +++ b/playbooks/roles/vhosts/alerting/templates/alerting_rules @@ -0,0 +1,37 @@ +data: + alerting_rules.yml: | + groups: + - name: host-monitoring + rules: + - alert: HighLoad + expr: node_load1 > 2.0 + for: 5m + labels: + severity: warning + annotations: + summary: High load on {{ $labels.instance }} + description: "Load is {{ $value }} (threshold: 2.0)" + - alert: HighCpuUsage + expr: 100 - (avg by (instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 90 + for: 5m + labels: + severity: critical + annotations: + summary: High CPU usage on {{ $labels.instance }} + description: "CPU usage is {{ $value }}%" + - alert: HighMemoryUsage + expr: (node_memory_MemTotal_bytes - node_memory_MemFree_bytes - node_memory_Buffers_bytes - node_memory_Cached_bytes) / node_memory_MemTotal_bytes * 100 > 90 + for: 5m + labels: + severity: warning + annotations: + summary: High memory usage on {{ $labels.instance }} + description: "Memory usage is {{ $value }}%" + - alert: HighDiskUsage + expr: node_filesystem_avail_bytes{fstype="ext4"} / node_filesystem_size_bytes{fstype="ext4"} * 100 < 10 + for: 5m + labels: + severity: critical + annotations: + summary: High disk usage on {{ $labels.instance }} + description: "Disk usage is {{ $value }}% diff --git a/playbooks/roles/vhosts/alerting/templates/recording_rules b/playbooks/roles/vhosts/alerting/templates/recording_rules new file mode 100644 index 0000000..061e1c5 --- /dev/null +++ b/playbooks/roles/vhosts/alerting/templates/recording_rules @@ -0,0 +1,55 @@ +data: + recording_rules.yml: | + groups: + - name: host-monitoring + rules: + - record: node_load1 + expr: node_load1 + - record: node_cpu_usage + expr: 100 - (avg by (instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) + - record: node_memory_usage + expr: (node_memory_MemTotal_bytes - node_memory_MemFree_bytes - node_memory_Buffers_bytes - node_memory_Cached_bytes) / node_memory_MemTotal_bytes * 100 + - record: node_disk_usage + expr: 100 - (avg by (instance) (node_filesystem_avail_bytes{fstype="ext4"} / node_filesystem_size_bytes{fstype="ext4"}) * 100) +groups: +- name: 实例存活告警规则 + rules: + - alert: 实例存活告警 + expr: up == 0 + for: 1m + labels: + user: prometheus + severity: warning + annotations: + summary: "主机宕机 !!!" + description: "该实例主机已经宕机超过一分钟了。" +- name: 内存报警规则 + rules: + - alert: 内存使用率告警 + expr: (1 - (node_memory_MemAvailable_bytes / (node_memory_MemTotal_bytes))) * 100 > 50 + for: 1m + labels: + severity: warning + annotations: + summary: "服务器可用内存不足。" + description: "内存使用率已超过50%(当前值:{{ $value }}%)" +- name: CPU报警规则 + rules: + - alert: CPU使用率告警 + expr: 100 - (avg by (instance)(irate(node_cpu_seconds_total{mode="idle"}[1m]) )) * 100 > 50 + for: 1m + labels: + severity: warning + annotations: + summary: "CPU使用率正在飙升。" + description: "CPU使用率超过50%(当前值:{{ $value }}%)" +- name: 磁盘使用率报警规则 + rules: + - alert: 磁盘使用率告警 + expr: 100 - node_filesystem_free_bytes{fstype=~"xfs|ext4"} / node_filesystem_size_bytes{fstype=~"xfs|ext4"} * 100 > 80 + for: 20m + labels: + severity: warning + annotations: + summary: "硬盘分区使用率过高" + description: "分区使用大于80%(当前值:{{ $value }}%)" diff --git a/playbooks/roles/vhosts/alloy/defaults/main.yml b/playbooks/roles/vhosts/alloy/defaults/main.yml new file mode 100644 index 0000000..85aab39 --- /dev/null +++ b/playbooks/roles/vhosts/alloy/defaults/main.yml @@ -0,0 +1,23 @@ +# Default variables for alloy +alloy_config_path: "/etc/alloy/config.alloy" + +grafana_gpg_key_url: "https://apt.grafana.com/gpg.key" +grafana_apt_source: "deb [signed-by=/etc/apt/keyrings/grafana.gpg] https://apt.grafana.com stable main" + +loki_endpoint_url: "https://logs-prod-030.grafana.net/loki/api/v1/push" +loki_basic_auth_username: "965018" +loki_basic_auth_password: !vault | + $ANSIBLE_VAULT;1.1;AES256 + 35313466643636366632313038386338303535616334306134663261333237646231653965626333 + 3032306662663236363139653863623263623034363264630a373766353630326131376335386463 + 33363363306539303264346230366239303636366130636233333536646537613932393961343635 + 6266653566616331310a633861303963643237366362656139303232323066323665623130326163 + 31316366626161396636343966363263313637383665633037323666323266633062653966333566 + 61306664653334366331393265363265383832393361613663633138383730613666623038616531 + 35333031613765616562323663613333313464343732663930356337343836396133363265376666 + 33343836633465356330663266623838646461613633313031343232613066356336386665663165 + 65366530613966373466323934303466336537323433396135623933383239393530333762633963 + 36356562303361396332633166386236666265326339313731666632646335336136373931313131 + 65626231616233333061396135383334623030643532636335656262376464383039303834363938 + 63636466386266643234386536336164353138643839393934393464623037306636613964376462 + 38386466643633643036626239626438393762326434643563383237343762626139 diff --git a/playbooks/roles/vhosts/alloy/files/loki_journal_sources_gateway.yml b/playbooks/roles/vhosts/alloy/files/loki_journal_sources_gateway.yml new file mode 100644 index 0000000..ac3af8b --- /dev/null +++ b/playbooks/roles/vhosts/alloy/files/loki_journal_sources_gateway.yml @@ -0,0 +1,5 @@ +loki_journal_sources: + - name: "xray" + unit: "xray.service" + - name: "xray_tproxy" + unit: "xray-tproxy.service" diff --git a/playbooks/roles/vhosts/alloy/files/loki_journal_sources_k3s_agent.yml b/playbooks/roles/vhosts/alloy/files/loki_journal_sources_k3s_agent.yml new file mode 100644 index 0000000..40ade11 --- /dev/null +++ b/playbooks/roles/vhosts/alloy/files/loki_journal_sources_k3s_agent.yml @@ -0,0 +1,3 @@ +loki_journal_sources: + - name: "k3s_agent" + unit: "k3s-agent.service" diff --git a/playbooks/roles/vhosts/alloy/files/loki_journal_sources_k3s_server.yml b/playbooks/roles/vhosts/alloy/files/loki_journal_sources_k3s_server.yml new file mode 100644 index 0000000..63c41af --- /dev/null +++ b/playbooks/roles/vhosts/alloy/files/loki_journal_sources_k3s_server.yml @@ -0,0 +1,3 @@ +loki_journal_sources: + - name: "k3s" + unit: "k3s.service" diff --git a/playbooks/roles/vhosts/alloy/files/loki_journal_sources_postgresql.yml b/playbooks/roles/vhosts/alloy/files/loki_journal_sources_postgresql.yml new file mode 100644 index 0000000..eea425c --- /dev/null +++ b/playbooks/roles/vhosts/alloy/files/loki_journal_sources_postgresql.yml @@ -0,0 +1,3 @@ +loki_journal_sources: + - name: "postgresql" + unit: "postgresql.service" diff --git a/playbooks/roles/vhosts/alloy/files/loki_journal_sources_vpn.yml b/playbooks/roles/vhosts/alloy/files/loki_journal_sources_vpn.yml new file mode 100644 index 0000000..9298616 --- /dev/null +++ b/playbooks/roles/vhosts/alloy/files/loki_journal_sources_vpn.yml @@ -0,0 +1,3 @@ +loki_journal_sources: + - name: "vpn" + unit: "wg-quick@wg0" diff --git a/playbooks/roles/vhosts/alloy/tasks/main.yml b/playbooks/roles/vhosts/alloy/tasks/main.yml new file mode 100644 index 0000000..23f680e --- /dev/null +++ b/playbooks/roles/vhosts/alloy/tasks/main.yml @@ -0,0 +1,55 @@ +--- +- name: Install GPG + apt: + name: gpg + state: present + +- name: Create APT keyrings directory + file: + path: /etc/apt/keyrings/ + state: directory + mode: '0755' + become: yes + +- name: Add Grafana GPG key + shell: | + wget -q -O - {{ grafana_gpg_key_url }} | gpg --dearmor | sudo tee /etc/apt/keyrings/grafana.gpg > /dev/null + become: yes + +- name: Check if Grafana GPG key file is not empty + ansible.builtin.stat: + path: /etc/apt/keyrings/grafana.gpg + register: grafana_gpg_key_stat + +- name: Fail if Grafana GPG key file is empty + ansible.builtin.fail: + msg: "The Grafana GPG key file is empty or does not exist." + when: grafana_gpg_key_stat.stat.size == 0 + +- name: Add Grafana APT source + shell: | + echo "deb [signed-by=/etc/apt/keyrings/grafana.gpg] https://apt.grafana.com stable main" | sudo tee /etc/apt/sources.list.d/grafana.list + become: yes + +- name: Update APT package list and install Grafana Alloy + apt: + name: alloy + state: present + update_cache: yes + +- name: Create Alloy configuration directory + file: + path: /etc/alloy + state: directory + mode: '0770' + +- name: Create Alloy configuration file + template: + src: templates/config.alloy.j2 + dest: "/etc/alloy/config.alloy" + +- name: Reload and restart Alloy service + systemd: + name: alloy + state: restarted + daemon_reload: yes diff --git a/playbooks/roles/vhosts/alloy/templates/config.alloy.j2 b/playbooks/roles/vhosts/alloy/templates/config.alloy.j2 new file mode 100644 index 0000000..547c961 --- /dev/null +++ b/playbooks/roles/vhosts/alloy/templates/config.alloy.j2 @@ -0,0 +1,19 @@ +loki.write "grafanacloud" { + endpoint { + url = "{{ loki_endpoint_url }}" + + basic_auth { + username = "{{ loki_basic_auth_username }}" + password = "{{ loki_basic_auth_password }}" + } + } +} + +{% for source in loki_journal_sources %} +loki.source.journal "{{ source.name }}" { + format_as_json = true + labels = {job = "{{ source.name }}"} + matches = "_SYSTEMD_UNIT={{ source.unit }}" + forward_to = [loki.write.grafanacloud.receiver] +} +{% endfor %} diff --git a/playbooks/roles/vhosts/cert-manager/files/certs_automated_issuance.sh b/playbooks/roles/vhosts/cert-manager/files/certs_automated_issuance.sh new file mode 100644 index 0000000..4e04058 --- /dev/null +++ b/playbooks/roles/vhosts/cert-manager/files/certs_automated_issuance.sh @@ -0,0 +1,32 @@ +#!/bin/bash + + +#!/bin/bash +set -x +export domain=$1 +export Ali_Key=$2 +export Ali_Secret=$3 + +rm -fv ${domain}.key ${domain}.pem -f +rm -fv /etc/ssl/${domain}.* -f + +# Try to issue a certificate from ZeroSSL. If it fails, try Let's Encrypt. + +curl https://get.acme.sh | sh -s email=156405189@qq.com +sh ~/.acme.sh/acme.sh --set-default-ca --server zerossl --issue --force --dns dns_ali -d ${domain} -d "*.${domain}" +if [ $? -eq 0 ]; then + echo "Certificate from letsencrypt successfully issued" +else + sh ~/.acme.sh/acme.sh --set-default-ca --server letsencrypt --issue --force --dns dns_ali -d ${domain} -d "*.${domain}" + if [ $? -eq 0 ]; then + echo "Certificate from zerossl successfully issued" + else + echo "Command failed" + exit 1 + fi +fi + +cat ~/.acme.sh/${domain}_ecc/${domain}.cer > ${domain}.pem +cat ~/.acme.sh/${domain}_ecc/ca.cer >> ${domain}.pem +cat ~/.acme.sh/${domain}_ecc/${domain}.key > ${domain}.key +sudo cp ${domain}.pem /etc/ssl/ -f && sudo cp ${domain}.key /etc/ssl/ -f diff --git a/playbooks/roles/vhosts/cert-manager/files/fetch_certs_from_vault.py b/playbooks/roles/vhosts/cert-manager/files/fetch_certs_from_vault.py new file mode 100644 index 0000000..f18ceb2 --- /dev/null +++ b/playbooks/roles/vhosts/cert-manager/files/fetch_certs_from_vault.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 + +import hvac +import os +import shutil +from datetime import datetime + +# Set your Vault configurations +vault_url = "{{ vault_url }}" +vault_token = "{{ vault_token }}" +vault_secret_path = "{{ vault_secret_path }}" +domain = "{{ domain }}" + +# Connect to Vault +client = hvac.Client(url=vault_url, token=vault_token) + +# Fetch Certificate and Private Key from Vault +vault_result = client.read(vault_secret_path) + +if vault_result and 'data' in vault_result: + certificate = vault_result['data'].get('certificate', '') + private_key = vault_result['data'].get('private_key', '') + + # Paths for certificate and private key files + cert_path = f"/etc/ssl/{domain}.pem" + key_path = f"/etc/ssl/{domain}.key" + + # Check if files already exist + cert_exists = os.path.exists(cert_path) + key_exists = os.path.exists(key_path) + + # Backup existing files with timestamp + backup_dir = "/opt/bak/" + timestamp = datetime.now().strftime("%Y%m%d%H%M%S") + + if cert_exists: + backup_cert_path = f"{backup_dir}{domain}_{timestamp}.pem" + shutil.move(cert_path, backup_cert_path) + print(f"Backup created: {backup_cert_path}") + + if key_exists: + backup_key_path = f"{backup_dir}{domain}_{timestamp}.key" + shutil.move(key_path, backup_key_path) + print(f"Backup created: {backup_key_path}") + + # Write Certificate to File (force overwrite) + with open(cert_path, 'w') as cert_file: + cert_file.write(certificate) + + # Write Private Key to File (force overwrite) + with open(key_path, 'w') as key_file: + key_file.write(private_key) + + # Set file permissions + os.chmod(cert_path, 0o644) + os.chown(cert_path, 0, 0) + + os.chmod(key_path, 0o600) + os.chown(key_path, 0, 0) + + if cert_exists: + print(f"Certificate updated: {cert_path}") + else: + print(f"Certificate written: {cert_path}") + + if key_exists: + print(f"Private key updated: {key_path}") + else: + print(f"Private key written: {key_path}") +else: + print("Failed to fetch certificate and private key from Vault.") diff --git a/playbooks/roles/vhosts/cert-manager/files/get_certificate.sh b/playbooks/roles/vhosts/cert-manager/files/get_certificate.sh new file mode 100644 index 0000000..4195c04 --- /dev/null +++ b/playbooks/roles/vhosts/cert-manager/files/get_certificate.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +check_empty() { + if [ -z "$1" ]; then + echo "$2" + exit 1 + fi +} + +check_empty "$1" "Please provide DOMAIN" && export DOMAIN=$1 +check_empty "$2" "Please provide VAULT_ADDR" && export VAULT_ADDR=$2 +check_empty "$3" "Please provide VAULT_TOKEN" && export VAULT_TOKEN=$3 + +SECRET_PATH="certs/$DOMAIN" + +# Output paths +CERTIFICATE_PATH="/etc/ssl/${DOMAIN}.pem" +PRIVATE_KEY_PATH="/etc/ssl/${DOMAIN}.key" + +vault login -address=$VAULT_ADDR $VAULT_TOKEN +# Read certificate from Vault +vault kv get -field=certificate certs/${DOMAIN} > "$CERTIFICATE_PATH" +# Read private key from Vault +vault kv get -field=private_key certs/${DOMAIN} > "$PRIVATE_KEY_PATH" + +# Set permissions for the private key (modify as needed) +chmod 600 "$PRIVATE_KEY_PATH" + +# Check if certificate and private key files are non-empty +if [ ! -s "$CERTIFICATE_PATH" ] || [ ! -s "$PRIVATE_KEY_PATH" ]; then + echo "Certificate or private key is empty. Exiting..." + exit 1 +else + echo "Certificate and private key have been written to $CERTIFICATE_PATH and $PRIVATE_KEY_PATH" +fi diff --git a/playbooks/roles/vhosts/cert-manager/files/update-certs-secret.sh b/playbooks/roles/vhosts/cert-manager/files/update-certs-secret.sh new file mode 100644 index 0000000..d7af429 --- /dev/null +++ b/playbooks/roles/vhosts/cert-manager/files/update-certs-secret.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +export secret=$1 +export key_file=$2 +export cert_file=$3 +export namespace=$4 + +kubectl create namespace $namespace || echo true +kubectl delete secret tls $secret -n $namespace || echo true +kubectl create secret tls $secret --cert=$cert_file --key=$key_file -n $namespace diff --git a/playbooks/roles/vhosts/cert-manager/meta/main.yml b/playbooks/roles/vhosts/cert-manager/meta/main.yml new file mode 100644 index 0000000..9711b33 --- /dev/null +++ b/playbooks/roles/vhosts/cert-manager/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: common diff --git a/playbooks/roles/vhosts/cert-manager/tasks/main.yml b/playbooks/roles/vhosts/cert-manager/tasks/main.yml new file mode 100755 index 0000000..a6a9fca --- /dev/null +++ b/playbooks/roles/vhosts/cert-manager/tasks/main.yml @@ -0,0 +1,7 @@ +- name: Fetch Certificate and Private Key from Vault + script: files/get_certificate.sh {{ domain }} {{ vault_url }} {{ vault_token }} + when: (inventory_hostname in groups[group]) and ( cert_issuance == 'vault' ) + +- name: certs automated issuance + script: files/certs_automated_issuance.sh {{ domain }} {{ dns_ak }} {{ dns_sk }} + when: (inventory_hostname in groups[group]) and ( cert_issuance == 'auto' ) diff --git a/playbooks/roles/vhosts/cert-manager/vars/main.yml b/playbooks/roles/vhosts/cert-manager/vars/main.yml new file mode 100644 index 0000000..e86c3cd --- /dev/null +++ b/playbooks/roles/vhosts/cert-manager/vars/main.yml @@ -0,0 +1,2 @@ +group: master +cert_issuance: vault diff --git a/playbooks/roles/vhosts/common/defaults/main.yml b/playbooks/roles/vhosts/common/defaults/main.yml new file mode 100644 index 0000000..368939d --- /dev/null +++ b/playbooks/roles/vhosts/common/defaults/main.yml @@ -0,0 +1,38 @@ +enable_set_timezone: true # 默认启用 Set timezone +enable_set_hostname: true # 默认启用 Set hostname +enable_install_packages: false # 默认不安装额外的软件包 +enable_all_hosts_update: false # 默认不更新所有主机的条目 + +rsyslog_log_rotation: # 可选的日志管理配置 + enable: true # 启用 rsyslog 日志管理 + rotate_count: 4 # 默认保留的日志文件数量 + rotate_frequency: weekly # 默认每周轮换, 可选:daily, hourly + max_log_size: 100M # 默认日志文件最大大小 + +journald_log_rotation: # 启用 journald 日志管理 + enable: true # 启用 journald 日志管理 + max_log_size: 100M # 默认日志文件最大大小 + max_files: 100 # 默认保留的最大日志文件数 + max_file_sec: 1month # 默认日志文件保存的最大时长 + system_max_use: 1G # 默认系统日志最大使用空间 + runtime_max_use: 500M # 默认运行时日志最大使用空间 + + #config_temp: + # k8s-node: + # dns_servers: + # - "8.8.8.8" + # - "114.114.114.114" + # swap_off: true + # ip_forward: true + # disk: + # - name: /dev/sdb1 + # mount: /mnt + # - name: /var/lib/containerd + # mount: /mnt/lib/containerd + # type: bind + # - name: /var/log/deepflow + # mount: /mnt/log/deepflow + # type: bind + # selinux_enable: false + # ssh_auth: + # key: /root/.ssh/id_rsa.pub diff --git a/playbooks/roles/vhosts/common/files/install-packages.sh b/playbooks/roles/vhosts/common/files/install-packages.sh new file mode 100644 index 0000000..14bb78f --- /dev/null +++ b/playbooks/roles/vhosts/common/files/install-packages.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +export DEBIAN_FRONTEND=noninteractive +curl -fsSL https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor > /usr/share/keyrings/hashicorp-archive-keyring.gpg +sudo echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" > /etc/apt/sources.list.d/hashicorp.list +sudo apt-get update +sudo apt-get install -y vault auditd diff --git a/playbooks/roles/vhosts/common/files/secure_ssh.sh b/playbooks/roles/vhosts/common/files/secure_ssh.sh new file mode 100644 index 0000000..4c6f5f0 --- /dev/null +++ b/playbooks/roles/vhosts/common/files/secure_ssh.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +# 设置 ~/.ssh/ 目录的权限 +sudo chmod 700 ~/.ssh + +# 设置 ~/.ssh/authorized_keys 文件的权限 +sudo chmod 600 ~/.ssh/authorized_keys + +# 使用 chattr +i 确保 authorized_keys 文件不能被删除 +sudo chattr +i ~/.ssh/authorized_keys || true + diff --git a/playbooks/roles/vhosts/common/handlers/main.yml b/playbooks/roles/vhosts/common/handlers/main.yml new file mode 100644 index 0000000..da6f188 --- /dev/null +++ b/playbooks/roles/vhosts/common/handlers/main.yml @@ -0,0 +1,10 @@ +--- +- name: Restart logrotate service + service: + name: logrotate + state: restarted + +- name: Restart systemd-journald service + service: + name: systemd-journald + state: restarted diff --git a/playbooks/roles/vhosts/common/tasks/configure_journald.yml b/playbooks/roles/vhosts/common/tasks/configure_journald.yml new file mode 100644 index 0000000..993aeba --- /dev/null +++ b/playbooks/roles/vhosts/common/tasks/configure_journald.yml @@ -0,0 +1,7 @@ +--- +- name: Configure journald log rotation using template + template: + src: journald_logrotate.j2 + dest: /etc/systemd/journald.conf + when: journald_log_rotation.enable + notify: Restart systemd-journald service diff --git a/playbooks/roles/vhosts/common/tasks/configure_logrotate.yaml b/playbooks/roles/vhosts/common/tasks/configure_logrotate.yaml new file mode 100644 index 0000000..90332b8 --- /dev/null +++ b/playbooks/roles/vhosts/common/tasks/configure_logrotate.yaml @@ -0,0 +1,7 @@ +--- +- name: Configure logrotate for rsyslog using template + template: + src: rsyslog_logrotate.j2 + dest: /etc/logrotate.d/rsyslog + when: rsyslog_log_rotation.enable + notify: Restart logrotate service diff --git a/playbooks/roles/vhosts/common/tasks/include_gpu.yaml b/playbooks/roles/vhosts/common/tasks/include_gpu.yaml new file mode 100644 index 0000000..cb55513 --- /dev/null +++ b/playbooks/roles/vhosts/common/tasks/include_gpu.yaml @@ -0,0 +1,17 @@ +- name: Add NVIDIA repository + shell: | + add-apt-repository -y ppa:graphics-drivers + curl -s -L https://nvidia.github.io/nvidia-container-runtime/gpgkey | apt-key add - + distribution=$(. /etc/os-release;echo $ID$VERSION_ID) + curl -s -L https://nvidia.github.io/nvidia-container-runtime/$distribution/nvidia-container-runtime.list | tee /etc/apt/sources.list.d/nvidia-container-runtime.list + apt-get update + +- name: Install NVIDIA driver and container runtime + apt: + name: + - nvidia-modprobe + - nvidia-driver-535 + - nvidia-headless-535 + - nvidia-container-runtime + state: present + update_cache: yes diff --git a/playbooks/roles/vhosts/common/tasks/main.yml b/playbooks/roles/vhosts/common/tasks/main.yml new file mode 100644 index 0000000..acf68fe --- /dev/null +++ b/playbooks/roles/vhosts/common/tasks/main.yml @@ -0,0 +1,34 @@ +- name: Set timezone + shell: "timedatectl set-timezone Asia/Shanghai" + +- name: Set hostname + shell: "hostname -F /etc/hostname" + +- name: update /etc/hostname + template: src=templates/hostname dest=/etc/hostname owner=root group=root mode=0644 unsafe_writes=yes + +- name: Update /etc/hosts + template: src=templates/hosts dest=/etc/hosts owner=root group=root mode=0644 force=yes unsafe_writes=yes + +- name: Run secure_ssh.sh script + script: files/secure_ssh.sh + +- name: Install packages + script: files/install-packages.sh + when: (ansible_facts['distribution'] == "Ubuntu") or (ansible_facts['distribution'] == "Debian") + +#- name: Include GPU Configuration +# include_tasks: include_gpu.yaml +# when: (ansible_facts['distribution'] == "Ubuntu") or (ansible_facts['distribution'] == "Debian") +# tags: +# - k3s +# - gpu +# - nvidia + +#- name: enable ip_forward +# shell: 'echo "net.ipv4.ip_forward = 1" >> /etc/sysctl.conf; echo "net.ipv4.conf.all.proxy_arp = 1" >> /etc/sysctl.conf ; sysctl -p /etc/sysctl.conf' + + +#- name: Install packages +# shell: "yum makecache && yum install -y audit container-selinux" +# when: (ansible_facts['distribution'] != "Ubuntu") or (ansible_facts['distribution'] != "Debian") diff --git a/playbooks/roles/vhosts/common/tasks/set_hostname.yaml b/playbooks/roles/vhosts/common/tasks/set_hostname.yaml new file mode 100644 index 0000000..edd3dda --- /dev/null +++ b/playbooks/roles/vhosts/common/tasks/set_hostname.yaml @@ -0,0 +1,12 @@ +- name: Check if systemctl is available + command: which hostnamectl + register: systemctl_check + ignore_errors: true + +- name: Set hostname using systemctl if available + shell: "hostnamectl set-hostname {{ inventory_hostname }}" + when: systemctl_check.rc == 0 + +- name: Set hostname using hostname -F if systemctl is not available + shell: "hostname -F /etc/hostname" + when: systemctl_check.rc != 0 diff --git a/playbooks/roles/vhosts/common/tasks/set_timezone.yaml b/playbooks/roles/vhosts/common/tasks/set_timezone.yaml new file mode 100644 index 0000000..1c85cd7 --- /dev/null +++ b/playbooks/roles/vhosts/common/tasks/set_timezone.yaml @@ -0,0 +1,2 @@ +- name: Set timezone + shell: "timedatectl set-timezone Asia/Shanghai" diff --git a/playbooks/roles/vhosts/common/templates/authorized_keys b/playbooks/roles/vhosts/common/templates/authorized_keys new file mode 100755 index 0000000..f7bb4d5 --- /dev/null +++ b/playbooks/roles/vhosts/common/templates/authorized_keys @@ -0,0 +1,3 @@ +{% for item in ssh_keys %} +{{ item }} +{% endfor %} diff --git a/playbooks/roles/vhosts/common/templates/hostname b/playbooks/roles/vhosts/common/templates/hostname new file mode 100755 index 0000000..1fad51f --- /dev/null +++ b/playbooks/roles/vhosts/common/templates/hostname @@ -0,0 +1 @@ +{{ inventory_hostname }} diff --git a/playbooks/roles/vhosts/common/templates/hosts b/playbooks/roles/vhosts/common/templates/hosts new file mode 100644 index 0000000..2a13249 --- /dev/null +++ b/playbooks/roles/vhosts/common/templates/hosts @@ -0,0 +1,26 @@ +# IPv4 localhost configuration +127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4 + +# IPv6 localhost configuration +::1 localhost localhost.localdomain localhost6 localhost6.localdomain6 + +# IPv6 Local addresses (desirable for IPv6 capable hosts) +::1 ip6-localhost ip6-loopback +fe00::0 ip6-localnet +ff00::0 ip6-mcastprefix +ff02::1 ip6-allnodes +ff02::2 ip6-allrouters + +{{ ansible_default_ipv4.address }} {{ inventory_hostname }} + +{% if enable_all_hosts_update is defined and enable_all_hosts_update %} + {% for item in groups['all'] %} + {{ hostvars[item]['ansible_host'] }} {{ item }} + {% endfor %} +{% endif %} + +{% if extra_domain is defined %} +{% for ip, domain_name in extra_domain.items() %} +{{ ip }} {{ domain_name }} +{% endfor %} +{% endif %} diff --git a/playbooks/roles/vhosts/common/templates/journald_logrotate.j2 b/playbooks/roles/vhosts/common/templates/journald_logrotate.j2 new file mode 100644 index 0000000..3bb8d1e --- /dev/null +++ b/playbooks/roles/vhosts/common/templates/journald_logrotate.j2 @@ -0,0 +1,5 @@ +[Journal] +SystemMaxUse={{ journald_log_rotation.system_max_use }} # 设置最大日志使用空间 +SystemMaxFiles={{ journald_log_rotation.max_files }} # 设置最大日志文件数 +MaxFileSec={{ journald_log_rotation.max_file_sec }} # 设置日志文件的轮换频率(例如 weekly, daily, hourly) +RuntimeMaxUse={{ journald_log_rotation.runtime_max_use }} # 设置运行时日志最大使用空间 diff --git a/playbooks/roles/vhosts/common/templates/logrotate-monitor-agent b/playbooks/roles/vhosts/common/templates/logrotate-monitor-agent new file mode 100644 index 0000000..b4120ce --- /dev/null +++ b/playbooks/roles/vhosts/common/templates/logrotate-monitor-agent @@ -0,0 +1,8 @@ +/var/log/prometheus-agent.log +/var/log/prometheus-transfer.log { + rotate 12 + monthly + compress + missingok + notifempty +} diff --git a/playbooks/roles/vhosts/common/templates/rsyslog_logrotate.j2 b/playbooks/roles/vhosts/common/templates/rsyslog_logrotate.j2 new file mode 100644 index 0000000..f31334b --- /dev/null +++ b/playbooks/roles/vhosts/common/templates/rsyslog_logrotate.j2 @@ -0,0 +1,23 @@ +/var/log/syslog +/var/log/mail* # 包括所有以 mail 开头的日志文件 +/var/log/daemon.log +/var/log/kern.log +/var/log/auth.log +/var/log/user.log +/var/log/lpr.log +/var/log/cron.log +/var/log/debug +/var/log/messages +{ + rotate {{ rsyslog_log_rotation.rotate_count }} + {{ rsyslog_log_rotation.rotate_frequency }} + missingok + notifempty + compress + delaycompress + sharedscripts + postrotate + /usr/lib/rsyslog/rsyslog-rotate + endscript + maxsize {{ rsyslog_log_rotation.max_log_size }} +} diff --git a/playbooks/roles/vhosts/k3s-addon/files/setup-argocd.sh b/playbooks/roles/vhosts/k3s-addon/files/setup-argocd.sh new file mode 100644 index 0000000..3374e34 --- /dev/null +++ b/playbooks/roles/vhosts/k3s-addon/files/setup-argocd.sh @@ -0,0 +1,102 @@ +#!/bin/bash + +# 检查参数是否为空 +check_not_empty() { + if [[ -z $1 ]]; then + echo "Error: $2 is empty. Please provide a value." + exit 1 + fi +} + +helm repo add argo https://argoproj.github.io/argo-helm +helm repo update + +# 使用 Helm 部署 Argo CD +#helm upgrade --install argocd argo/argo-cd -n argocd --create-namespace + +cat < values.yaml +global: + domain: argocd.onwalk.net +server: + service: + type: NodePort + nodePortHttp: 80 + nodePortHttps: 443 + servicePortHttp: 80 + servicePortHttps: 443 + servicePortHttpName: http + servicePortHttpsName: https + ingress: + enabled: false + ingressClassName: "nginx" + hostname: argocd.onwalk.net + annotations: + nginx.ingress.kubernetes.io/force-ssl-redirect: "true" + nginx.ingress.kubernetes.io/backend-protocol: "HTTP" + tls: true +repoServer: + extraContainers: + - name: helmfile + image: ghcr.io/helmfile/helmfile:v0.157.0 + # Entrypoint should be Argo CD lightweight CMP server i.e. argocd-cmp-server + command: ["/var/run/argocd/argocd-cmp-server"] + env: + - name: HELM_CACHE_HOME + value: /tmp/helm/cache + - name: HELM_CONFIG_HOME + value: /tmp/helm/config + - name: HELMFILE_CACHE_HOME + value: /tmp/helmfile/cache + - name: HELMFILE_TEMPDIR + value: /tmp/helmfile/tmp + securityContext: + runAsNonRoot: true + runAsUser: 999 + volumeMounts: + - mountPath: /var/run/argocd + name: var-files + - mountPath: /home/argocd/cmp-server/plugins + name: plugins + # Register helmfile plugin into sidecar + - mountPath: /home/argocd/cmp-server/config/plugin.yaml + subPath: helmfile.yaml + name: argocd-cmp-cm + # Starting with v2.4, do NOT mount the same tmp volume as the repo-server container. The filesystem separation helps mitigate path traversal attacks. + - mountPath: /tmp + name: helmfile-tmp + volumes: + - name: argocd-cmp-cm + configMap: + name: argocd-cmp-cm + - name: helmfile-tmp + emptyDir: {} +configs: + cmp: + create: true + plugins: + helmfile: + allowConcurrency: true + discover: + fileName: helmfile.yaml + generate: + command: + - bash + - "-c" + - | + if [[ -v ENV_NAME ]]; then + helmfile -n "$ARGOCD_APP_NAMESPACE" -e $ENV_NAME template --include-crds -q + elif [[ -v ARGOCD_ENV_ENV_NAME ]]; then + helmfile -n "$ARGOCD_APP_NAMESPACE" -e "$ARGOCD_ENV_ENV_NAME" template --include-crds -q + else + helmfile -n "$ARGOCD_APP_NAMESPACE" template --include-crds -q + fi + lockRepo: false +EOF + +helm upgrade --install argocd argo/argo-cd -n argocd -f values.yaml + +# 等待 Argo CD 完全启动 +echo "Waiting for Argo CD to be ready..." +kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=argocd-server -n argocd --timeout=180s + +echo "Argo CD deployment and configuration complete." diff --git a/playbooks/roles/vhosts/k3s-addon/files/setup-dns-provider.sh b/playbooks/roles/vhosts/k3s-addon/files/setup-dns-provider.sh new file mode 100644 index 0000000..beb17d5 --- /dev/null +++ b/playbooks/roles/vhosts/k3s-addon/files/setup-dns-provider.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# 检查参数是否为空 +check_not_empty() { + if [[ -z $1 ]]; then + echo "Error: $2 is empty. Please provide a value." + exit 1 + fi +} + +# 检查参数是否为空 +check_not_empty "$1" "DNS_AK" && DNS_AK=$1 +check_not_empty "$2" "DNS_SK" && DNS_SK=$2 +check_not_empty "$3" "DOMAIN" && DOMAIN=$3 + +# Deploy external-dns +cat > external-dns-values.yaml << EOF +clusterDomain: admin.local +sources: + - service + - ingress +domainFilters: + - $DOMAIN +policy: upsert-only +provider: alibabacloud +alibabacloud: + accessKeyId: $DNS_AK + accessKeySecret: $DNS_SK + regionId: rg-acfm2akhd255pgi + zoneType: public +EOF + +helm repo add bitnami https://charts.bitnami.com/bitnami || echo true +helm repo update +kubectl create namespace external-dns || echo true +helm upgrade --install external-dns -f external-dns-values.yaml bitnami/external-dns -n external-dns diff --git a/playbooks/roles/vhosts/k3s-addon/files/setup-egress.sh b/playbooks/roles/vhosts/k3s-addon/files/setup-egress.sh new file mode 100644 index 0000000..998b079 --- /dev/null +++ b/playbooks/roles/vhosts/k3s-addon/files/setup-egress.sh @@ -0,0 +1,24 @@ +#!/bin/bash +ip=$1 +namespace=$2 + +cat > /tmp/egress.yaml << EOF +apiVersion: cilium.io/v2 +kind: CiliumEgressGatewayPolicy +metadata: + name: egress-nat-policy +spec: + selectors: + - podSelector: + matchLabels: + role: egress-gateway + io.kubernetes.pod.namespace: $namespace + destinationCIDRs: + - "0.0.0.0/0" + egressGateway: + nodeSelector: + matchLabels: + node.kubernetes.io/name: tky-connector.onwalk.net + egressIP: $ip +EOF +kubectl apply -f /tmp/egress.yaml diff --git a/playbooks/roles/vhosts/k3s-addon/files/setup-flagger.sh b/playbooks/roles/vhosts/k3s-addon/files/setup-flagger.sh new file mode 100644 index 0000000..ac8011a --- /dev/null +++ b/playbooks/roles/vhosts/k3s-addon/files/setup-flagger.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# 检查参数是否为空 +check_not_empty() { + if [[ -z $1 ]]; then + echo "Error: $2 is empty. Please provide a value." + exit 1 + fi +} + +# 检查参数是否为空 +check_not_empty "$1" "DOMAIN" && DOMAIN=$1 + +helm repo add flagger https://flagger.app +helm repo update +kubectl create ns ingress || echo true +helm upgrade -i flagger flagger/flagger \ +--namespace ingress \ +--set prometheus.install=false \ +--set meshProvider=nginx \ +--set metricsServer="https://prometheus.${DOMAIN}" diff --git a/playbooks/roles/vhosts/k3s-addon/files/setup-fluxcd.sh b/playbooks/roles/vhosts/k3s-addon/files/setup-fluxcd.sh new file mode 100644 index 0000000..eea72c4 --- /dev/null +++ b/playbooks/roles/vhosts/k3s-addon/files/setup-fluxcd.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +# 检查参数是否为空 +check_not_empty() { + if [[ -z $1 ]]; then + echo "Error: $2 is empty. Please provide a value." + exit 1 + fi +} + +# 检查参数是否为空 +check_not_empty "$1" "Git repository URL" && git_repo=$1 +check_not_empty "$2" "Cluster name" && cluster_name=$2 + +helm repo add fluxcd https://fluxcd-community.github.io/helm-charts +helm repo update +kubectl create namespace gitops-system || true +helm upgrade --install fluxcd fluxcd/flux2 --version 2.12.1 -n gitops-system + +cat > cluster-config.yaml << EOF +apiVersion: source.toolkit.fluxcd.io/v1beta2 +kind: GitRepository +metadata: + name: stable + namespace: gitops-system +spec: + interval: 1m0s + ref: + branch: main + url: $git_repo +--- +apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 +kind: Kustomization +metadata: + name: cluster + namespace: gitops-system +spec: + interval: 1m0s + sourceRef: + kind: GitRepository + name: stable + path: ./clusters/${cluster_name} + prune: true +EOF + +kubectl apply -f cluster-config.yaml && rm cluster-config.yaml -f diff --git a/playbooks/roles/vhosts/k3s-addon/files/setup-ingress-apisix.sh b/playbooks/roles/vhosts/k3s-addon/files/setup-ingress-apisix.sh new file mode 100644 index 0000000..495286a --- /dev/null +++ b/playbooks/roles/vhosts/k3s-addon/files/setup-ingress-apisix.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +ingress_ip=$1 + +cat > values.yaml << EOF +service: + type: NodePort + externalIPs: + - $ingress_ip + http: + enabled: true + servicePort: 80 + tls: + servicePort: 443 + nodePort: 443 +apisix: + ssl: + enabled: true + prometheus: + enabled: true +ingress-controller: + enabled: true + config: + apisix: + serviceNamespace: "ingress" + kubernetes: + enableGatewayAPI: true +metrics: + serviceMonitor: + enabled: true + namespace: "ingress" +EOF + +helm repo add apisix https://charts.apiseven.com || echo true +helm repo update +kubectl create ns ingress || echo true +helm delete nginx -n ingress || echo true +helm upgrade --install apisix apisix/apisix --namespace ingress -f values.yaml diff --git a/playbooks/roles/vhosts/k3s-addon/files/setup-ingress.sh b/playbooks/roles/vhosts/k3s-addon/files/setup-ingress.sh new file mode 100644 index 0000000..1dfb5fb --- /dev/null +++ b/playbooks/roles/vhosts/k3s-addon/files/setup-ingress.sh @@ -0,0 +1,145 @@ +#!/bin/bash +ingress=$1 +ingress_ip=$2 + +if [[ $ingress == "default" ]]; then +export KUBECONFIG=/etc/rancher/k3s/k3s.yaml +helm repo add stable https://kubernetes.github.io/ingress-nginx +helm repo up + +cat > value.yaml < svc-patch.yaml < value.yaml < nginx-cm.yaml << EOF +apiVersion: v1 +kind: ConfigMap +metadata: + name: nginx-nginx-ingress + namespace: ingress +data: + use-ssl-certificate-for-ingress: "false" + external-status-address: $ingress_ip + proxy-connect-timeout: 10s + proxy-read-timeout: 10s + client-header-buffer-size: 64k + client-body-buffer-size: 64k + client-max-body-size: 1000m + proxy-buffers: 8 32k + proxy-body-size: 1024m + proxy-buffer-size: 32k + proxy-connect-timeout: 10s + proxy-read-timeout: 10s +EOF + +cat > nginx-svc-patch.yaml << EOF +spec: + ports: + - name: http + nodePort: 80 + port: 80 + protocol: TCP + targetPort: 80 + - name: https + nodePort: 443 + port: 443 + protocol: TCP + targetPort: 443 +EOF + +helm repo add nginx-stable https://helm.nginx.com/stable || echo true +helm repo up +helm delete apisix -n ingress || echo true +kubectl create namespace ingress || echo true +helm upgrade --install nginx nginx-stable/nginx-ingress --version=0.15.0 --namespace ingress -f value.yaml +kubectl apply -f nginx-cm.yaml +kubectl patch svc nginx-nginx-ingress -n ingress --patch-file nginx-svc-patch.yaml + +elif [[ $ingress == "apisix" ]]; then + +cat > values.yaml << EOF +service: + type: NodePort + externalIPs: + - $ingress_ip + http: + enabled: true + servicePort: 80 + tls: + servicePort: 443 + nodePort: 443 +apisix: + ssl: + enabled: true + prometheus: + enabled: true +ingress-controller: + enabled: true + config: + apisix: + serviceNamespace: "ingress" + kubernetes: + enableGatewayAPI: true +metrics: + serviceMonitor: + enabled: true + namespace: "ingress" +EOF + +helm repo add apisix https://charts.apiseven.com || echo true +helm repo update +kubectl create ns ingress || echo true +helm delete nginx -n ingress || echo true +helm upgrade --install apisix apisix/apisix --namespace ingress -f values.yaml + +fi diff --git a/playbooks/roles/vhosts/k3s-addon/files/setup-keda-operator.sh b/playbooks/roles/vhosts/k3s-addon/files/setup-keda-operator.sh new file mode 100644 index 0000000..0ee63ac --- /dev/null +++ b/playbooks/roles/vhosts/k3s-addon/files/setup-keda-operator.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +helm repo add kedacore https://kedacore.github.io/charts +helm repo update +kubectl create namespace kube-system || true +helm upgrade --install keda kedacore/keda --namespace kube-system diff --git a/playbooks/roles/vhosts/k3s-addon/files/setup-prometheus-operator.sh b/playbooks/roles/vhosts/k3s-addon/files/setup-prometheus-operator.sh new file mode 100644 index 0000000..c5ed2a0 --- /dev/null +++ b/playbooks/roles/vhosts/k3s-addon/files/setup-prometheus-operator.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +# 检查参数是否为空 +check_not_empty() { + if [[ -z $1 ]]; then + echo "Error: $2 is empty. Please provide a value." + exit 1 + fi +} + +# 检查参数是否为空 +check_not_empty "$1" "DOMAIN" && DOMAIN=$1 + +cat > prometheus-values.yaml << EOF +global: + imageRegistry: "artifact.onwalk.net/base" +prometheus: + enabled: true + agentMode: false + prometheusSpec: + remoteWrite: + - name: remote_prometheus + url: 'https://prometheus.${DOMAIN}/api/v1/write' + retention: 30m + resources: + requests: + cpu: 200m + memory: 200Mi + podMonitorNamespaceSelector: { } + podMonitorSelector: + matchLabels: + app.kubernetes.io/component: monitoring +nodeExporter: + enabled: true +kubeStateMetrics: + enabled: true +grafana: + enabled: false +prometheus-windows-exporter: + enabled: false +alertmanager: + enabled: false +defaultRules: + create: false +EOF + +node_name=`kubectl get nodes | awk 'NR>1 {print $1}'` +kubectl create namespace monitoring || echo true +kubectl label nodes $node prometheus=true --overwrite || echo true +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update +helm upgrade --install prometheus-agent prometheus-community/kube-prometheus-stack --version 55.11.0 -n monitoring -f prometheus-values.yaml diff --git a/playbooks/roles/vhosts/k3s-addon/meta/main.yml b/playbooks/roles/vhosts/k3s-addon/meta/main.yml new file mode 100644 index 0000000..83cef7b --- /dev/null +++ b/playbooks/roles/vhosts/k3s-addon/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: cert-manager diff --git a/playbooks/roles/vhosts/k3s-addon/tasks/main.yml b/playbooks/roles/vhosts/k3s-addon/tasks/main.yml new file mode 100755 index 0000000..a8a61e7 --- /dev/null +++ b/playbooks/roles/vhosts/k3s-addon/tasks/main.yml @@ -0,0 +1,15 @@ +- name: Enable nginx Ingress + script: files/setup-ingress.sh {{ ingress }} {{ ingress_ip }} + when: inventory_hostname in groups[group] and ( ingress == 'nginx' ) +- name: Remove nginx ingress + shell: 'helm delete nginx -n ingress || true ; helm delete apisix -n ingress || true ;' + when: ( inventory_hostname in groups[group] ) and (ingress == 'disable' ) + ignore_errors: yes + +- name: Setup DNS Provider + script: files/setup-dns-provider.sh {{ dns_ak }} {{ dns_sk }} {{ domain }} + when: ( inventory_hostname in groups[group] ) and (external_dns == 'enable' ) +- name: Remove DNS Provider + shell: 'helm delete external-dns -n external-dns' + when: ( inventory_hostname in groups[group] ) and (external_dns == 'disable' ) + ignore_errors: yes diff --git a/playbooks/roles/vhosts/k3s-addon/templates/ingress-apisix-dashboard.yaml b/playbooks/roles/vhosts/k3s-addon/templates/ingress-apisix-dashboard.yaml new file mode 100644 index 0000000..b43c0e0 --- /dev/null +++ b/playbooks/roles/vhosts/k3s-addon/templates/ingress-apisix-dashboard.yaml @@ -0,0 +1,33 @@ +apiVersion: apisix.apache.org/v2 +kind: ApisixRoute +metadata: + name: apisix-dashboard + namespace: ingress +spec: + http: + - name: root + match: + hosts: + - apisix-dashboard.onwalk.net + paths: + - '/*' + backends: + - serviceName: apisix-dashboard + servicePort: 80 + plugins: + - config: + http_to_https: true + enable: true + name: redirect +--- +apiVersion: apisix.apache.org/v2 +kind: ApisixTls +metadata: + name: apisix + namespace: ingress +spec: + hosts: + - apisix-dashboard.onwalk.net + secret: + name: apisix-tls + namespace: ingress diff --git a/playbooks/roles/vhosts/k3s-addon/templates/ingress-apisix-values.yaml b/playbooks/roles/vhosts/k3s-addon/templates/ingress-apisix-values.yaml new file mode 100644 index 0000000..24f386a --- /dev/null +++ b/playbooks/roles/vhosts/k3s-addon/templates/ingress-apisix-values.yaml @@ -0,0 +1,24 @@ +ingress-controller: + enabled: true + config: + apisix: + serviceNamespace: ingress +etcd: + replicaCount: 1 +discovery: + enabled: true +admin: + enabled: true +gateway: + enabled: true + type: NodePort + http: + enabled: true + nodePort: 80 + tls: + enabled: true + nodePort: 443 + externalIPs: + - {{ ingress_ip }} +dashboard: + enabled: true diff --git a/playbooks/roles/vhosts/k3s-addon/templates/kubernetes-discovery-config.yaml b/playbooks/roles/vhosts/k3s-addon/templates/kubernetes-discovery-config.yaml new file mode 100644 index 0000000..d382fcb --- /dev/null +++ b/playbooks/roles/vhosts/k3s-addon/templates/kubernetes-discovery-config.yaml @@ -0,0 +1,65 @@ +apiVersion: apisix.apache.org/v2 +kind: ApisixUpstream +metadata: + name: bookinfo-upstream + namespace: bookinfo +spec: + discovery: + type: kubernetes + serviceName: apisix/bookinfo/productpage:9080 +--- +apiVersion: apisix.apache.org/v2 +kind: ApisixRoute +metadata: + name: bookinfo + namespace: bookinfo +spec: + http: + - name: root + match: + hosts: + - bookinfo.onwalk.net + paths: + - /* + upstreams: + - name: bookinfo-upstream + plugins: + - config: + http_to_https: true + enable: true + name: redirect +--- +apiVersion: apisix.apache.org/v2 +kind: ApisixTls +metadata: + name: bookinfo + namespace: bookinfo +spec: + hosts: + - bookinfo.onwalk.net + secret: + name: bookinfo-tls + namespace: bookinfo +--- +curl -k --header "Authorization: Bearer tokenxxxxx" https://10.170.0.8:6443/api +--- +kubectl get secret kubernetes-discovery-token -o jsonpath={.data.token} | base64 -d +--- +kubectl edit cm -n ingress apisix + discovery: + kubernetes: + - id: apisix + service: + schema: https + host: "10.170.0.6" + port: "6443" + client: + token: |- + #xxxxxxxxxxxxxxx + default_weight: 50 + namespace_selector: + match: + - bookinfo + - nginx + shared_size: 1m +--- diff --git a/playbooks/roles/vhosts/k3s-addon/templates/kubernetes-discovery-serviceaccount.yaml b/playbooks/roles/vhosts/k3s-addon/templates/kubernetes-discovery-serviceaccount.yaml new file mode 100644 index 0000000..734f0df --- /dev/null +++ b/playbooks/roles/vhosts/k3s-addon/templates/kubernetes-discovery-serviceaccount.yaml @@ -0,0 +1,40 @@ +kind: ServiceAccount +apiVersion: v1 +metadata: + name: kubernetes-discovery +--- +apiVersion: v1 +kind: Secret +metadata: + name: kubernetes-discovery-token + annotations: + kubernetes.io/service-account.name: "kubernetes-discovery" +type: kubernetes.io/service-account-token +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: kubernetes-discovery +rules: +- apiGroups: [""] + resources: ["endpoints" ] + verbs: ["get", "list", "watch" ] +- apiGroups: [""] + resources: [ "namespaces"] + verbs: ["get", "list", "watch"] +- apiGroups: [""] + resources: ["services", "endpoints"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: kubernetes-discovery +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kubernetes-discovery +subjects: +- kind: ServiceAccount + name: kubernetes-discovery + namespace: default diff --git a/playbooks/roles/vhosts/k3s-addon/templates/kubernetes-discovery.yaml b/playbooks/roles/vhosts/k3s-addon/templates/kubernetes-discovery.yaml new file mode 100644 index 0000000..3c5cca9 --- /dev/null +++ b/playbooks/roles/vhosts/k3s-addon/templates/kubernetes-discovery.yaml @@ -0,0 +1,47 @@ +kind: ServiceAccount +apiVersion: v1 +metadata: + name: kubernetes-discovery + namespace: default +--- + +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: kubernetes-discovery +rules: +- apiGroups: [ "" ] + resources: [ endpoints ] + verbs: [ get,list,watch ] +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: kubernetes-discovery +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: apisix-test +subjects: + - kind: ServiceAccount + name: kubernetes-discovery + namespace: default +--- +#discovery: +# kubernetes: +# - id: release # a custom name refer to the cluster, pattern ^[a-z0-9]{1,8} +# service: +# schema: https #default https +# host: "1.cluster.com" +# port: "6443" +# client: +# #token: |- +# # eyJhbGciOiJSUzI1NiIsImtpZCI6Ikx5ME1DNWdnbmhQNkZCNlZYMXBsT3pYU3BBS2swYzBPSkN3ZnBESGpkUEEif +# # 6Ikx5ME1DNWdnbmhQNkZCNlZYMXBsT3pYU3BBS2swYzBPSkN3ZnBESGpkUEEifeyJhbGciOiJSUzI1NiIsImtpZCI +# default_weight: 50 # weight assigned to each discovered endpoint. default 50, minimum 0 +# namespace_selector: +# equal: default +# label_selector: |- +# first="a",second="b" +# shared_size: 1m #default 1m diff --git a/playbooks/roles/vhosts/k3s-cluster-agent/defaults/main.yml b/playbooks/roles/vhosts/k3s-cluster-agent/defaults/main.yml new file mode 100644 index 0000000..0b79c3e --- /dev/null +++ b/playbooks/roles/vhosts/k3s-cluster-agent/defaults/main.yml @@ -0,0 +1 @@ +# Default values for k3s-cluster-agent role diff --git a/playbooks/roles/vhosts/k3s-cluster-agent/tasks/bootstrap.yml b/playbooks/roles/vhosts/k3s-cluster-agent/tasks/bootstrap.yml new file mode 100644 index 0000000..e69de29 diff --git a/playbooks/roles/vhosts/k3s-cluster-agent/tasks/destroy.yml b/playbooks/roles/vhosts/k3s-cluster-agent/tasks/destroy.yml new file mode 100644 index 0000000..e69de29 diff --git a/playbooks/roles/vhosts/k3s-cluster-agent/tasks/main.yml b/playbooks/roles/vhosts/k3s-cluster-agent/tasks/main.yml new file mode 100644 index 0000000..ffeb04b --- /dev/null +++ b/playbooks/roles/vhosts/k3s-cluster-agent/tasks/main.yml @@ -0,0 +1,2 @@ +- name: Execute action on K3s cluster agent + include_tasks: "{{ action }}.yml" diff --git a/playbooks/roles/vhosts/k3s-cluster-agent/tasks/upgrade.yml b/playbooks/roles/vhosts/k3s-cluster-agent/tasks/upgrade.yml new file mode 100644 index 0000000..e69de29 diff --git a/playbooks/roles/vhosts/k3s-cluster-agent/templates/install_k3s_agent.sh.j2 b/playbooks/roles/vhosts/k3s-cluster-agent/templates/install_k3s_agent.sh.j2 new file mode 100644 index 0000000..a91e43a --- /dev/null +++ b/playbooks/roles/vhosts/k3s-cluster-agent/templates/install_k3s_agent.sh.j2 @@ -0,0 +1,3 @@ +#!/bin/bash + +curl -sfL https://rancher-mirror.rancher.cn/k3s/k3s-install.sh | INSTALL_K3S_MIRROR=cn K3S_URL=https://{{ agent.k3s_url }}:6443 K3S_TOKEN={{ agent.server_token }} INSTALL_K3S_EXEC="{{ agent.extra_vars }}" sh - diff --git a/playbooks/roles/vhosts/k3s-cluster-agent/vars/main.yml b/playbooks/roles/vhosts/k3s-cluster-agent/vars/main.yml new file mode 100644 index 0000000..1e61379 --- /dev/null +++ b/playbooks/roles/vhosts/k3s-cluster-agent/vars/main.yml @@ -0,0 +1,5 @@ +action: 'bootstrap' +agent: + node_ip: '10.254.0.1' + server_token: 'your_server_token' + extra_vars: '--node-label deployment=true --node-external-ip 110.42.238.110 --node-ip {{ agent.node_ip }} --flannel-iface wg0' diff --git a/playbooks/roles/vhosts/k3s-cluster-server/defaults/main.yml b/playbooks/roles/vhosts/k3s-cluster-server/defaults/main.yml new file mode 100644 index 0000000..1b488a9 --- /dev/null +++ b/playbooks/roles/vhosts/k3s-cluster-server/defaults/main.yml @@ -0,0 +1 @@ +# Default values for k3s-cluster-server role diff --git a/playbooks/roles/vhosts/k3s-cluster-server/tasks/add-master.yml b/playbooks/roles/vhosts/k3s-cluster-server/tasks/add-master.yml new file mode 100644 index 0000000..e69de29 diff --git a/playbooks/roles/vhosts/k3s-cluster-server/tasks/backup.yml b/playbooks/roles/vhosts/k3s-cluster-server/tasks/backup.yml new file mode 100644 index 0000000..e69de29 diff --git a/playbooks/roles/vhosts/k3s-cluster-server/tasks/bootstrap.yml b/playbooks/roles/vhosts/k3s-cluster-server/tasks/bootstrap.yml new file mode 100644 index 0000000..e69de29 diff --git a/playbooks/roles/vhosts/k3s-cluster-server/tasks/destroy.yml b/playbooks/roles/vhosts/k3s-cluster-server/tasks/destroy.yml new file mode 100644 index 0000000..e69de29 diff --git a/playbooks/roles/vhosts/k3s-cluster-server/tasks/main.yml b/playbooks/roles/vhosts/k3s-cluster-server/tasks/main.yml new file mode 100644 index 0000000..388f080 --- /dev/null +++ b/playbooks/roles/vhosts/k3s-cluster-server/tasks/main.yml @@ -0,0 +1,2 @@ +- name: Execute action on K3s cluster server + include_tasks: "{{ action }}.yml" diff --git a/playbooks/roles/vhosts/k3s-cluster-server/tasks/recovery.yml b/playbooks/roles/vhosts/k3s-cluster-server/tasks/recovery.yml new file mode 100644 index 0000000..e69de29 diff --git a/playbooks/roles/vhosts/k3s-cluster-server/tasks/upgrade.yml b/playbooks/roles/vhosts/k3s-cluster-server/tasks/upgrade.yml new file mode 100644 index 0000000..e69de29 diff --git a/playbooks/roles/vhosts/k3s-cluster-server/templates/install_k3s_server.sh.j2 b/playbooks/roles/vhosts/k3s-cluster-server/templates/install_k3s_server.sh.j2 new file mode 100644 index 0000000..ae9b8ef --- /dev/null +++ b/playbooks/roles/vhosts/k3s-cluster-server/templates/install_k3s_server.sh.j2 @@ -0,0 +1,3 @@ +#!/bin/bash + +INSTALL_K3S_SKIP_DOWNLOAD=true bash /usr/local/share/k3s/install.sh -s - --disable={{ cluster.server_disable }} --token='{{ cluster.token }}' --datastore-endpoint='{{ cluster.datastore_endpoint }}' --system-default-registry '{{ cluster.registry }}' --data-dir='{{ cluster.data_dir }}' --kube-apiserver-arg '{{ cluster.apiserver_arg }}' --bind-address='{{ cluster.bind_address }}' --tls-san='{{ cluster.tls_san }}' --advertise-address='{{ cluster.advertise_address }}' --node-ip='{{ cluster.node_ip }}' --node-external-ip '{{ cluster.node_external_ip }}' --flannel-iface '{{ cluster.flannel_iface }}' --cluster-cidr '{{ cluster.cluster_cidr }}' --service-cidr '{{ cluster.service_cidr }}' diff --git a/playbooks/roles/vhosts/k3s-cluster-server/vars/main.yml b/playbooks/roles/vhosts/k3s-cluster-server/vars/main.yml new file mode 100644 index 0000000..45f4838 --- /dev/null +++ b/playbooks/roles/vhosts/k3s-cluster-server/vars/main.yml @@ -0,0 +1,17 @@ +action: 'bootstrap' +cluster: + name: 'cn-k3s-cluster-1' + token: 'your_default_token' + server_disable: "traefik,servicelb" + datastore_endpoint: "mysql://user:password@tcp(database_url:3306)/k3s" + registry: "registry.cn-hangzhou.aliyuncs.com" + data_dir: "/opt/rancher/k3s" + apiserver_arg: "service-node-port-range=0-50000" + bind_address: "0.0.0.0" + tls_san: "cn-k3s-server.svc.plus" + advertise_address: "8.130.93.47" + node_ip: "10.254.0.3" + node_external_ip: "8.130.93.47" + flannel_iface: "wg0" + cluster_cidr: "10.42.0.0/16" + service_cidr: "10.43.0.0/16" diff --git a/playbooks/roles/vhosts/k3s-reset/files/reset-k3s.sh b/playbooks/roles/vhosts/k3s-reset/files/reset-k3s.sh new file mode 100644 index 0000000..719a657 --- /dev/null +++ b/playbooks/roles/vhosts/k3s-reset/files/reset-k3s.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +wget https://raw.githubusercontent.com/kubeovn/kube-ovn/release-1.10/dist/images/cleanup.sh +bash cleanup.sh + +rm -rf /var/run/openvswitch +rm -rf /var/run/ovn +rm -rf /etc/origin/openvswitch/ +rm -rf /etc/origin/ovn/ +rm -rf /etc/cni/net.d/00-kube-ovn.conflist +rm -rf /etc/cni/net.d/01-kube-ovn.conflist +rm -rf /var/log/openvswitch +rm -rf /var/log/ovn +rm -fr /var/log/kube-ovn + +/usr/local/bin/k3s-uninstall.sh +rm -rvf /opt/rancher/ /etc/rancher/ /var/lib/rancher/ ~/.kube + +rm -rvf /etc/cni/net.d/* + +# 移除cni命名空间 +ip netns show 2>/dev/null | grep cni- | xargs -r -t -n 1 ip netns delete +# 移除cnio网卡 +ip link show 2>/dev/null | grep 'master cni0' | while read ignore iface ignore; do + iface=${iface%%@*} + [ -z "$iface" ] || ip link delete $iface +done +ip link delete cni0 +ip link delete flannel.1 +rm -rf /var/lib/cni/ +# 清理iptables +iptables-save | grep -v KUBE- | grep -v CNI- | iptables-restore diff --git a/playbooks/roles/vhosts/k3s-reset/tasks/main.yml b/playbooks/roles/vhosts/k3s-reset/tasks/main.yml new file mode 100755 index 0000000..f500dec --- /dev/null +++ b/playbooks/roles/vhosts/k3s-reset/tasks/main.yml @@ -0,0 +1,4 @@ +- name: Reset K3S Cluster + script: files/reset-k3s.sh + when: (inventory_hostname in groups[group] ) and ( cluster_reset == 'enanble' ) + diff --git a/playbooks/roles/vhosts/k3s/files/setup-cni-cilium.sh b/playbooks/roles/vhosts/k3s/files/setup-cni-cilium.sh new file mode 100644 index 0000000..34c9ac2 --- /dev/null +++ b/playbooks/roles/vhosts/k3s/files/setup-cni-cilium.sh @@ -0,0 +1,19 @@ + +# 移除cni命名空间 +ip netns show 2>/dev/null | grep cni- | xargs -r -t -n 1 ip netns delete +# 移除cnio网卡 +ip link show 2>/dev/null | grep 'master cni0' | while read ignore iface ignore; do + iface=${iface%%@*} + [ -z "$iface" ] || ip link delete $iface +done +ip link delete cni0 +ip link delete flannel.1 +rm -rf /var/lib/cni/ +# 清理iptables +iptables-save | grep -v KUBE- | grep -v CNI- | iptables-restore + +helm repo add cilium https://helm.cilium.io/ +helm install cilium cilium/cilium --version 1.10.4 \ + --namespace kube-system\ + --set hubble.relay.enabled=true \ + --set hubble.ui.enabled=true diff --git a/playbooks/roles/vhosts/k3s/files/setup-cni-kubeovn.sh b/playbooks/roles/vhosts/k3s/files/setup-cni-kubeovn.sh new file mode 100644 index 0000000..b1f8139 --- /dev/null +++ b/playbooks/roles/vhosts/k3s/files/setup-cni-kubeovn.sh @@ -0,0 +1,17 @@ +#!/bin/bash +export NodeIP=$1 +node_name=`hostname` + +modprobe geneve +modprobe openvswitch +modprobe ip_tables +modprobe iptable_nat + +rm -rvf /etc/cni/net.d/* + +kubectl taint node $node_name node-role.kubernetes.io/control-plane:NoSchedule- +kubectl label node $node_name kubernetes.io/os=linux --overwrite +kubectl label node $node_name kube-ovn/role=master --overwrite +helm repo add kubeovn https://kubeovn.github.io/kube-ovn/ +helm repo up +helm upgrade --install kube-ovn kubeovn/kube-ovn --set MASTER_NODES=${NodeIP} -n kube-system diff --git a/playbooks/roles/vhosts/k3s/files/setup-k3s.sh b/playbooks/roles/vhosts/k3s/files/setup-k3s.sh new file mode 100644 index 0000000..7a6db79 --- /dev/null +++ b/playbooks/roles/vhosts/k3s/files/setup-k3s.sh @@ -0,0 +1,134 @@ +#!/bin/bash +set -x + +export version=$1 +export cni=$2 +export pod_cidr=$3 +export svc_cidr=$4 +export enable_api_access=$5 +export advertise-address=$6 + +function setup_k3s() +{ + local extra_opts=$1 + mkdir -pv /opt/rancher/k3s + + ping -c 1 google.com > /dev/null 2>&1 + if [ $? -eq 0 ]; then + echo "当前主机在国际网络上" + curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION=$version sh -s - $extra_opts + else + echo "当前主机在大陆网络上" + curl -sfL https://rancher-mirror.rancher.cn/k3s/k3s-install.sh | INSTALL_K3S_VERSION=$version INSTALL_K3S_MIRROR=cn sh -s - $extra_opts + fi + mkdir -pv ~/.kube/ && cp /etc/rancher/k3s/k3s.yaml ~/.kube/config +} + +function setup_helm() +{ + ping -c 1 google.com > /dev/null 2>&1 + if [ $? -eq 0 ]; then + echo "当前主机在国际网络上" + curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash + else + echo "当前主机在大陆网络上" + case `uname -m` in + x86_64) ARCH=amd64; ;; + aarch64) ARCH=arm64; ;; + loongarch64) ARCH=loongarch64; ;; + *) echo "un-supported arch, exit ..."; exit 1; ;; + esac + rm -rf helm.tar.gz* /usr/local/bin/helm || echo true + sudo wget --no-check-certificate https://mirrors.onwalk.net/tools/linux-${ARCH}/helm.tar.gz && sudo tar -xvpf helm.tar.gz -C /usr/local/bin/ + sudo chmod 755 /usr/local/bin/helm + fi +} + + +function set_apiserver_l4_proxy() +{ + sudo apt update && apt install nginx -y +cat > /etc/nginx/sites-available/default << EOF + +load_module /usr/lib64/nginx/modules/ngx_stream_module.so; + +worker_processes 4; +worker_rlimit_nofile 40000; + + +events { + worker_connections 8192; +} + +stream { + log_format logs '$remote_addr - - [$time_local] $protocol $status $bytes_sent $bytes_received $session_time "$upstream_addr"'; + + access_log /var/log/nginx/access.log logs; + + upstream K3s_api_server { + least_conn; + server 127.0.0.1:6443 max_fails=3 fail_timeout=5s; + } + server { + listen 8022; + server_name k3s-cluster.onwalk.net; + proxy_pass K3s_api_server; + } +} +EOF + sudo systemctl restart nginx +} + +###### function set_apiserver_l7_proxy ####### +function set_apiserver_l7_proxy() +{ + sudo apt update && apt install nginx -y +cat > /etc/nginx/sites-available/default << EOF + +http { + upstream api { + kubernetes.default.svc.cluster.local:6443; + } + + server { + listen 6443 ssl; + ssl_certificate /usr/local/nginx/ssl/apiserver.crt; # kube-apiserver cert + ssl_certificate_key /usr/local/nginx/ssl/apiserver.key; # kube-apiserver key + ssl_trusted_certificate /usr/local/nginx/ssl/ca.crt; # ca.pem + + location / { + } + + location /api/ { + rewrite ^/api(/.*)$ $1 break; + proxy_pass https://api; + proxy_ssl_certificate /etc/nginx/k8s-client-certificate.pem; + proxy_ssl_certificate_key /etc/nginx/k8s-client-key.key; + proxy_ssl_session_reuse on; + } + } +} +EOF + sudo systemctl restart nginx +} + +disable_proxy="--disable-kube-proxy" +disable_cni="--flannel-backend=none --disable-network-policy" +default="--disable=traefik,servicelb --data-dir=/opt/rancher/k3s --kube-apiserver-arg service-node-port-range=0-50000" + +case $enable_api_access in + 'true') api_opts="--bind-address=0.0.0.0" ;; + *) api_opts="" ;; +esac + +case $cni in + 'default') opts="$default $api_opts" ;; + 'kubeovn') opts="$default $disable_cni $api_opts" ;; + 'cilium') opts="$default $disable_cni $disable_proxy $api_opts" ;; + *) echo "error args" ;; +esac + +setup_k3s "$opts" +setup_helm +#set_apiserver_l4_proxy +#set_apiserver_l7_proxy diff --git a/playbooks/roles/vhosts/k3s/meta/main.yml b/playbooks/roles/vhosts/k3s/meta/main.yml new file mode 100644 index 0000000..9711b33 --- /dev/null +++ b/playbooks/roles/vhosts/k3s/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: common diff --git a/playbooks/roles/vhosts/k3s/tasks/main.yml b/playbooks/roles/vhosts/k3s/tasks/main.yml new file mode 100755 index 0000000..af3f2d7 --- /dev/null +++ b/playbooks/roles/vhosts/k3s/tasks/main.yml @@ -0,0 +1,11 @@ +- name: Setup K3S Server + script: files/setup-k3s.sh {{ version }} {{ cni }} {{ pod_cidr }} {{ svc_cidr }} {{ enable_api_access }} + when: inventory_hostname in groups[group] + +- name: Sync K3S CNI Config + template: src=templates/cni_install.sh dest=/tmp/ owner=root group=root mode=0644 + when: ( inventory_hostname in groups[group] ) and (cni == 'kubeovn' ) +- name: Setup K3S CNI + shell: 'bash /tmp/cni_install.sh' + when: ( inventory_hostname in groups[group] ) and (cni == 'kubeovn' ) + ignore_errors: yes diff --git a/playbooks/roles/vhosts/k3s/templates/cni_install.sh b/playbooks/roles/vhosts/k3s/templates/cni_install.sh new file mode 100644 index 0000000..1b004ba --- /dev/null +++ b/playbooks/roles/vhosts/k3s/templates/cni_install.sh @@ -0,0 +1,3657 @@ +#!/usr/bin/env bash +set -euo pipefail + +IPV6=${IPV6:-false} +DUAL_STACK=${DUAL_STACK:-false} +ENABLE_SSL=${ENABLE_SSL:-false} +ENABLE_VLAN=${ENABLE_VLAN:-false} +CHECK_GATEWAY=${CHECK_GATEWAY:-true} +LOGICAL_GATEWAY=${LOGICAL_GATEWAY:-false} +U2O_INTERCONNECTION=${U2O_INTERCONNECTION:-false} +ENABLE_MIRROR=${ENABLE_MIRROR:-false} +VLAN_NIC=${VLAN_NIC:-} +HW_OFFLOAD=${HW_OFFLOAD:-false} +ENABLE_LB=${ENABLE_LB:-true} +ENABLE_NP=${ENABLE_NP:-true} +ENABLE_EIP_SNAT=${ENABLE_EIP_SNAT:-true} +LS_DNAT_MOD_DL_DST=${LS_DNAT_MOD_DL_DST:-true} +ENABLE_EXTERNAL_VPC=${ENABLE_EXTERNAL_VPC:-true} +CNI_CONFIG_PRIORITY=${CNI_CONFIG_PRIORITY:-01} +ENABLE_LB_SVC=${ENABLE_LB_SVC:-false} +ENABLE_KEEP_VM_IP=${ENABLE_KEEP_VM_IP:-true} + +# exchange link names of OVS bridge and the provider nic +# in the default provider-network +EXCHANGE_LINK_NAME=${EXCHANGE_LINK_NAME:-false} +# The nic to support container network can be a nic name or a group of regex +# separated by comma, if empty will use the nic that the default route use +IFACE=${IFACE:-} +# Specifies the name of the dpdk tunnel iface. +# Note that the dpdk tunnel iface and tunnel ip cidr should be diffierent with Kubernetes api cidr,otherwise the route will be a problem. +DPDK_TUNNEL_IFACE=${DPDK_TUNNEL_IFACE:-br-phy} +ENABLE_BIND_LOCAL_IP=${ENABLE_BIND_LOCAL_IP:-true} + +# debug +DEBUG_WRAPPER=${DEBUG_WRAPPER:-} + +CNI_CONF_DIR="/etc/cni/net.d" +CNI_BIN_DIR="/opt/cni/bin" + +REGISTRY="kubeovn" +VERSION="v1.11.5" +IMAGE_PULL_POLICY="IfNotPresent" +POD_CIDR="{{ pod_cidr }}" # Do NOT overlap with NODE/SVC/JOIN CIDR +POD_GATEWAY="{{ pod_gateway }}" +SVC_CIDR="{{ svc_cidr }}" # Do NOT overlap with NODE/POD/JOIN CIDR +JOIN_CIDR="{{ join_cidr }}" # Do NOT overlap with NODE/POD/SVC CIDR +PINGER_EXTERNAL_ADDRESS="114.114.114.114" # Pinger check external ip probe +PINGER_EXTERNAL_DOMAIN="alauda.cn" # Pinger check external domain probe +SVC_YAML_IPFAMILYPOLICY="" +if [ "$IPV6" = "true" ]; then + POD_CIDR="fd00:10:16::/64" # Do NOT overlap with NODE/SVC/JOIN CIDR + POD_GATEWAY="fd00:10:16::1" + SVC_CIDR="fd00:10:96::/112" # Do NOT overlap with NODE/POD/JOIN CIDR + JOIN_CIDR="fd00:100:64::/64" # Do NOT overlap with NODE/POD/SVC CIDR + PINGER_EXTERNAL_ADDRESS="2400:3200::1" + PINGER_EXTERNAL_DOMAIN="google.com" +fi +if [ "$DUAL_STACK" = "true" ]; then + POD_CIDR="10.16.0.0/16,fd00:10:16::/64" # Do NOT overlap with NODE/SVC/JOIN CIDR + POD_GATEWAY="10.16.0.1,fd00:10:16::1" + SVC_CIDR="10.96.0.0/12,fd00:10:96::/112" # Do NOT overlap with NODE/POD/JOIN CIDR + JOIN_CIDR="100.64.0.0/16,fd00:100:64::/64" # Do NOT overlap with NODE/POD/SVC CIDR + PINGER_EXTERNAL_ADDRESS="114.114.114.114,2400:3200::1" + PINGER_EXTERNAL_DOMAIN="google.com" + SVC_YAML_IPFAMILYPOLICY="ipFamilyPolicy: PreferDualStack" +fi + +EXCLUDE_IPS="" # EXCLUDE_IPS for default subnet +LABEL="node-role.kubernetes.io/control-plane" # The node label to deploy OVN DB +DEPRECATED_LABEL="node-role.kubernetes.io/master" # The node label to deploy OVN DB in earlier versions +NETWORK_TYPE="geneve" # geneve or vlan +TUNNEL_TYPE="geneve" # geneve, vxlan or stt. ATTENTION: some networkpolicy cannot take effect when using vxlan and stt need custom compile ovs kernel module +POD_NIC_TYPE="veth-pair" # veth-pair or internal-port +POD_DEFAULT_FIP_TYPE="" # iptables, pod can set iptables fip automatically by enable fip annotation + +# VLAN Config only take effect when NETWORK_TYPE is vlan +PROVIDER_NAME="provider" +VLAN_INTERFACE_NAME="" +VLAN_NAME="ovn-vlan" +VLAN_ID="100" + +if [ "$ENABLE_VLAN" = "true" ]; then + NETWORK_TYPE="vlan" + if [ "$VLAN_NIC" != "" ]; then + VLAN_INTERFACE_NAME="$VLAN_NIC" + fi +fi + +# hybrid dpdk +HYBRID_DPDK="false" + +# DPDK +DPDK="false" +DPDK_SUPPORTED_VERSIONS=("19.11") +DPDK_VERSION="" +DPDK_CPU="1000m" # Default CPU configuration for if --dpdk-cpu flag is not included +DPDK_MEMORY="2Gi" # Default Memory configuration for it --dpdk-memory flag is not included + +# performance +MODULES="kube_ovn_fastpath.ko" +RPMS="openvswitch-kmod" +GC_INTERVAL=360 +INSPECT_INTERVAL=20 + +display_help() { + echo "Usage: $0 [option...]" + echo + echo " -h, --help Print Help (this message) and exit" + echo " --with-hybrid-dpdk Install Kube-OVN with nodes which run ovs-dpdk or ovs-kernel" + echo " --with-dpdk= Install Kube-OVN with OVS-DPDK instead of kernel OVS" + echo " --dpdk-cpu=m Configure DPDK to use a specific amount of CPU" + echo " --dpdk-memory=Gi Configure DPDK to use a specific amount of memory" + echo + exit 0 +} + +if [ -n "${1-}" ] +then + set +u + while :; do + case $1 in + -h|--help) + display_help + ;; + --with-hybrid-dpdk) + HYBRID_DPDK="true" + ;; + --with-dpdk=*) + DPDK=true + DPDK_VERSION="${1#*=}" + if [[ ! "${DPDK_SUPPORTED_VERSIONS[@]}" = "${DPDK_VERSION}" ]] || [[ -z "${DPDK_VERSION}" ]]; then + echo "Unsupported DPDK version: ${DPDK_VERSION}" + echo "Supported DPDK versions: ${DPDK_SUPPORTED_VERSIONS[*]}" + exit 1 + fi + ;; + --dpdk-cpu=*) + DPDK_CPU="${1#*=}" + if [[ $DPDK_CPU =~ ^[0-9]+(m)$ ]] + then + echo "CPU $DPDK_CPU" + else + echo "$DPDK_CPU is not valid, please use the format --dpdk-cpu=m" + exit 1 + fi + ;; + --dpdk-memory=*) + DPDK_MEMORY="${1#*=}" + if [[ $DPDK_MEMORY =~ ^[0-9]+(Gi)$ ]] + then + echo "MEMORY $DPDK_MEMORY" + else + echo "$DPDK_MEMORY is not valid, please use the format --dpdk-memory=Gi" + exit 1 + fi + ;; + -?*) + echo "Unknown argument $1" + exit 1 + ;; + *) break + esac + shift + done + set -u +fi + +echo "-------------------------------" +echo "Kube-OVN Version: $VERSION" +echo "Default Network Mode: $NETWORK_TYPE" +if [[ $NETWORK_TYPE = "vlan" ]];then + echo "Default Vlan Nic: $VLAN_INTERFACE_NAME" + echo "Default Vlan ID: $VLAN_ID" +fi +echo "Default Subnet CIDR: $POD_CIDR" +echo "Join Subnet CIDR: $JOIN_CIDR" +echo "Enable SVC LB: $ENABLE_LB" +echo "Enable Networkpolicy: $ENABLE_NP" +echo "Enable EIP and SNAT: $ENABLE_EIP_SNAT" +echo "Enable Mirror: $ENABLE_MIRROR" +echo "-------------------------------" + +if [[ $ENABLE_SSL = "true" ]];then + echo "[Step 0/6] Generate SSL key and cert" + exist=$(kubectl get secret -n kube-system kube-ovn-tls --ignore-not-found) + if [[ $exist == "" ]];then + docker run --rm -v "$PWD":/etc/ovn $REGISTRY/kube-ovn:$VERSION bash generate-ssl.sh + kubectl create secret generic -n kube-system kube-ovn-tls --from-file=cacert=cacert.pem --from-file=cert=ovn-cert.pem --from-file=key=ovn-privkey.pem + rm -rf cacert.pem ovn-cert.pem ovn-privkey.pem ovn-req.pem + fi + echo "-------------------------------" + echo "" +fi + +echo "[Step 1/6] Label kube-ovn-master node and label datapath type" +count=$(kubectl get no -l$LABEL --no-headers | wc -l) +node_label="$LABEL" +if [ $count -eq 0 ]; then + count=$(kubectl get no -l$DEPRECATED_LABEL --no-headers | wc -l) + node_label="$DEPRECATED_LABEL" + if [ $count -eq 0 ]; then + echo "ERROR: No node with label $LABEL or $DEPRECATED_LABEL found" + exit 1 + fi +fi +kubectl label no -l$node_label kube-ovn/role=master --overwrite + +if [ "$DPDK" = "true" -o "$HYBRID_DPDK" = "true" ]; then + kubectl label no -lovn.kubernetes.io/ovs_dp_type!=userspace ovn.kubernetes.io/ovs_dp_type=kernel --overwrite +fi + +echo "-------------------------------" +echo "" + +echo "[Step 2/6] Install OVN components" +addresses=$(kubectl get no -lkube-ovn/role=master --no-headers -o wide | awk '{print $6}' | tr \\n ',') +count=$(kubectl get no -lkube-ovn/role=master --no-headers | wc -l) +echo "Install OVN DB in $addresses" + +cat < kube-ovn-crd.yaml +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: vpc-dnses.kubeovn.io +spec: + group: kubeovn.io + names: + plural: vpc-dnses + singular: vpc-dns + shortNames: + - vpc-dns + kind: VpcDns + listKind: VpcDnsList + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .status.active + name: Active + type: boolean + - jsonPath: .spec.vpc + name: Vpc + type: string + - jsonPath: .spec.subnet + name: Subnet + type: string + name: v1 + served: true + storage: true + subresources: + status: {} + schema: + openAPIV3Schema: + type: object + properties: + spec: + type: object + properties: + vpc: + type: string + subnet: + type: string + status: + type: object + properties: + active: + type: boolean + conditions: + type: array + items: + type: object + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastUpdateTime: + type: string + lastTransitionTime: + type: string +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: switch-lb-rules.kubeovn.io +spec: + group: kubeovn.io + names: + plural: switch-lb-rules + singular: switch-lb-rule + shortNames: + - slr + kind: SwitchLBRule + listKind: SwitchLBRuleList + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .spec.vip + name: vip + type: string + - jsonPath: .status.ports + name: port(s) + type: string + - jsonPath: .status.service + name: service + type: string + - jsonPath: .metadata.creationTimestamp + name: age + type: date + name: v1 + served: true + storage: true + subresources: + status: {} + schema: + openAPIV3Schema: + type: object + properties: + spec: + type: object + properties: + namespace: + type: string + vip: + type: string + sessionAffinity: + type: string + ports: + items: + properties: + name: + type: string + port: + type: integer + minimum: 1 + maximum: 65535 + protocol: + type: string + targetPort: + type: integer + minimum: 1 + maximum: 65535 + type: object + type: array + selector: + items: + type: string + type: array + status: + type: object + properties: + ports: + type: string + service: + type: string +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: vpc-nat-gateways.kubeovn.io +spec: + group: kubeovn.io + names: + plural: vpc-nat-gateways + singular: vpc-nat-gateway + shortNames: + - vpc-nat-gw + kind: VpcNatGateway + listKind: VpcNatGatewayList + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .spec.vpc + name: Vpc + type: string + - jsonPath: .spec.subnet + name: Subnet + type: string + - jsonPath: .spec.lanIp + name: LanIP + type: string + name: v1 + served: true + storage: true + schema: + openAPIV3Schema: + type: object + properties: + spec: + type: object + properties: + lanIp: + type: string + subnet: + type: string + vpc: + type: string + selector: + type: array + items: + type: string + tolerations: + type: array + items: + type: object + properties: + key: + type: string + operator: + type: string + value: + type: string + effect: + type: string + tolerationSeconds: + type: integer +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: iptables-eips.kubeovn.io +spec: + group: kubeovn.io + names: + plural: iptables-eips + singular: iptables-eip + shortNames: + - eip + kind: IptablesEIP + listKind: IptablesEIPList + scope: Cluster + versions: + - name: v1 + served: true + storage: true + subresources: + status: {} + additionalPrinterColumns: + - jsonPath: .status.ip + name: IP + type: string + - jsonPath: .spec.macAddress + name: Mac + type: string + - jsonPath: .status.nat + name: Nat + type: string + - jsonPath: .spec.natGwDp + name: NatGwDp + type: string + - jsonPath: .status.ready + name: Ready + type: boolean + schema: + openAPIV3Schema: + type: object + properties: + status: + type: object + properties: + ready: + type: boolean + ip: + type: string + nat: + type: string + redo: + type: string + conditions: + type: array + items: + type: object + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastUpdateTime: + type: string + lastTransitionTime: + type: string + spec: + type: object + properties: + v4ip: + type: string + v6ip: + type: string + macAddress: + type: string + natGwDp: + type: string +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: iptables-fip-rules.kubeovn.io +spec: + group: kubeovn.io + names: + plural: iptables-fip-rules + singular: iptables-fip-rule + shortNames: + - fip + kind: IptablesFIPRule + listKind: IptablesFIPRuleList + scope: Cluster + versions: + - name: v1 + served: true + storage: true + subresources: + status: {} + additionalPrinterColumns: + - jsonPath: .spec.eip + name: Eip + type: string + - jsonPath: .status.v4ip + name: V4ip + type: string + - jsonPath: .spec.internalIp + name: InternalIp + type: string + - jsonPath: .status.v6ip + name: V6ip + type: string + - jsonPath: .status.ready + name: Ready + type: boolean + - jsonPath: .status.natGwDp + name: NatGwDp + type: string + schema: + openAPIV3Schema: + type: object + properties: + status: + type: object + properties: + ready: + type: boolean + v4ip: + type: string + v6ip: + type: string + natGwDp: + type: string + redo: + type: string + conditions: + type: array + items: + type: object + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastUpdateTime: + type: string + lastTransitionTime: + type: string + spec: + type: object + properties: + eip: + type: string + internalIp: + type: string +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: iptables-dnat-rules.kubeovn.io +spec: + group: kubeovn.io + names: + plural: iptables-dnat-rules + singular: iptables-dnat-rule + shortNames: + - dnat + kind: IptablesDnatRule + listKind: IptablesDnatRuleList + scope: Cluster + versions: + - name: v1 + served: true + storage: true + subresources: + status: {} + additionalPrinterColumns: + - jsonPath: .spec.eip + name: Eip + type: string + - jsonPath: .spec.protocol + name: Protocol + type: string + - jsonPath: .status.v4ip + name: V4ip + type: string + - jsonPath: .status.v6ip + name: V6ip + type: string + - jsonPath: .spec.internalIp + name: InternalIp + type: string + - jsonPath: .spec.externalPort + name: ExternalPort + type: string + - jsonPath: .spec.internalPort + name: InternalPort + type: string + - jsonPath: .status.natGwDp + name: NatGwDp + type: string + - jsonPath: .status.ready + name: Ready + type: boolean + schema: + openAPIV3Schema: + type: object + properties: + status: + type: object + properties: + ready: + type: boolean + v4ip: + type: string + v6ip: + type: string + natGwDp: + type: string + redo: + type: string + protocol: + type: string + internalIp: + type: string + internalPort: + type: string + externalPort: + type: string + conditions: + type: array + items: + type: object + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastUpdateTime: + type: string + lastTransitionTime: + type: string + spec: + type: object + properties: + eip: + type: string + externalPort: + type: string + protocol: + type: string + internalIp: + type: string + internalPort: + type: string +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: iptables-snat-rules.kubeovn.io +spec: + group: kubeovn.io + names: + plural: iptables-snat-rules + singular: iptables-snat-rule + shortNames: + - snat + kind: IptablesSnatRule + listKind: IptablesSnatRuleList + scope: Cluster + versions: + - name: v1 + served: true + storage: true + subresources: + status: {} + additionalPrinterColumns: + - jsonPath: .spec.eip + name: EIP + type: string + - jsonPath: .status.v4ip + name: V4ip + type: string + - jsonPath: .status.v6ip + name: V6ip + type: string + - jsonPath: .spec.internalCIDR + name: InternalCIDR + type: string + - jsonPath: .status.natGwDp + name: NatGwDp + type: string + - jsonPath: .status.ready + name: Ready + type: boolean + schema: + openAPIV3Schema: + type: object + properties: + status: + type: object + properties: + ready: + type: boolean + v4ip: + type: string + v6ip: + type: string + natGwDp: + type: string + redo: + type: string + conditions: + type: array + items: + type: object + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastUpdateTime: + type: string + lastTransitionTime: + type: string + spec: + type: object + properties: + eip: + type: string + internalCIDR: + type: string +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: ovn-eips.kubeovn.io +spec: + group: kubeovn.io + names: + plural: ovn-eips + singular: ovn-eip + shortNames: + - oeip + kind: OvnEip + listKind: OvnEipList + scope: Cluster + versions: + - name: v1 + served: true + storage: true + subresources: + status: {} + additionalPrinterColumns: + - jsonPath: .spec.v4ip + name: IP + type: string + - jsonPath: .spec.macAddress + name: Mac + type: string + - jsonPath: .spec.type + name: Type + type: string + schema: + openAPIV3Schema: + type: object + properties: + status: + type: object + properties: + v4Ip: + type: string + macAddress: + type: string + conditions: + type: array + items: + type: object + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastUpdateTime: + type: string + lastTransitionTime: + type: string + spec: + type: object + properties: + externalSubnet: + type: string + type: + type: string + v4ip: + type: string + macAddress: + type: string +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: ovn-fips.kubeovn.io +spec: + group: kubeovn.io + names: + plural: ovn-fips + singular: ovn-fip + shortNames: + - ofip + kind: OvnFip + listKind: OvnFipList + scope: Cluster + versions: + - name: v1 + served: true + storage: true + subresources: + status: {} + additionalPrinterColumns: + - jsonPath: .status.vpc + name: Vpc + type: string + - jsonPath: .status.v4Eip + name: V4Eip + type: string + - jsonPath: .status.v4Ip + name: V4Ip + type: string + - jsonPath: .status.ready + name: Ready + type: boolean + schema: + openAPIV3Schema: + type: object + properties: + status: + type: object + properties: + ready: + type: boolean + v4Eip: + type: string + v4Ip: + type: string + macAddress: + type: string + vpc: + type: string + conditions: + type: array + items: + type: object + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastUpdateTime: + type: string + lastTransitionTime: + type: string + spec: + type: object + properties: + ovnEip: + type: string + ipName: + type: string +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: ovn-snat-rules.kubeovn.io +spec: + group: kubeovn.io + names: + plural: ovn-snat-rules + singular: ovn-snat-rule + shortNames: + - osnat + kind: OvnSnatRule + listKind: OvnSnatRuleList + scope: Cluster + versions: + - name: v1 + served: true + storage: true + subresources: + status: {} + additionalPrinterColumns: + - jsonPath: .status.vpc + name: Vpc + type: string + - jsonPath: .status.v4Eip + name: V4Eip + type: string + - jsonPath: .status.v4ipCidr + name: V4Ip + type: string + - jsonPath: .status.ready + name: Ready + type: boolean + schema: + openAPIV3Schema: + type: object + properties: + status: + type: object + properties: + ready: + type: boolean + v4Eip: + type: string + v4ipCidr: + type: string + vpc: + type: string + conditions: + type: array + items: + type: object + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastUpdateTime: + type: string + lastTransitionTime: + type: string + spec: + type: object + properties: + ovnEip: + type: string + vpcSubnet: + type: string + ipName: + type: string +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: vpcs.kubeovn.io +spec: + group: kubeovn.io + versions: + - additionalPrinterColumns: + - jsonPath: .status.enableExternal + name: EnableExternal + type: boolean + - jsonPath: .status.standby + name: Standby + type: boolean + - jsonPath: .status.subnets + name: Subnets + type: string + - jsonPath: .spec.namespaces + name: Namespaces + type: string + name: v1 + schema: + openAPIV3Schema: + properties: + spec: + properties: + enableExternal: + type: boolean + namespaces: + items: + type: string + type: array + staticRoutes: + items: + properties: + policy: + type: string + cidr: + type: string + nextHopIP: + type: string + type: object + type: array + policyRoutes: + items: + properties: + priority: + type: integer + action: + type: string + match: + type: string + nextHopIP: + type: string + type: object + type: array + vpcPeerings: + items: + properties: + remoteVpc: + type: string + localConnectIP: + type: string + type: object + type: array + type: object + status: + properties: + conditions: + items: + properties: + lastTransitionTime: + type: string + lastUpdateTime: + type: string + message: + type: string + reason: + type: string + status: + type: string + type: + type: string + type: object + type: array + default: + type: boolean + defaultLogicalSwitch: + type: string + router: + type: string + standby: + type: boolean + enableExternal: + type: boolean + subnets: + items: + type: string + type: array + vpcPeerings: + items: + type: string + type: array + tcpLoadBalancer: + type: string + tcpSessionLoadBalancer: + type: string + udpLoadBalancer: + type: string + udpSessionLoadBalancer: + type: string + sctpLoadBalancer: + type: string + sctpSessionLoadBalancer: + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} + names: + kind: Vpc + listKind: VpcList + plural: vpcs + shortNames: + - vpc + singular: vpc + scope: Cluster +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: ips.kubeovn.io +spec: + group: kubeovn.io + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: V4IP + type: string + jsonPath: .spec.v4IpAddress + - name: V6IP + type: string + jsonPath: .spec.v6IpAddress + - name: Mac + type: string + jsonPath: .spec.macAddress + - name: Node + type: string + jsonPath: .spec.nodeName + - name: Subnet + type: string + jsonPath: .spec.subnet + schema: + openAPIV3Schema: + type: object + properties: + spec: + type: object + properties: + podName: + type: string + namespace: + type: string + subnet: + type: string + attachSubnets: + type: array + items: + type: string + nodeName: + type: string + ipAddress: + type: string + v4IpAddress: + type: string + v6IpAddress: + type: string + attachIps: + type: array + items: + type: string + macAddress: + type: string + attachMacs: + type: array + items: + type: string + containerID: + type: string + podType: + type: string + scope: Cluster + names: + plural: ips + singular: ip + kind: IP + shortNames: + - ip +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: vips.kubeovn.io +spec: + group: kubeovn.io + names: + plural: vips + singular: vip + shortNames: + - vip + kind: Vip + listKind: VipList + scope: Cluster + versions: + - name: v1 + served: true + storage: true + additionalPrinterColumns: + - name: V4IP + type: string + jsonPath: .status.v4ip + - name: PV4IP + type: string + jsonPath: .spec.parentV4ip + - name: Mac + type: string + jsonPath: .status.mac + - name: PMac + type: string + jsonPath: .spec.parentMac + - name: V6IP + type: string + jsonPath: .status.v6ip + - name: PV6IP + type: string + jsonPath: .spec.parentV6ip + - name: Subnet + type: string + jsonPath: .spec.subnet + - jsonPath: .status.ready + name: Ready + type: boolean + schema: + openAPIV3Schema: + type: object + properties: + status: + type: object + properties: + ready: + type: boolean + v4ip: + type: string + v6ip: + type: string + mac: + type: string + pv4ip: + type: string + pv6ip: + type: string + pmac: + type: string + conditions: + type: array + items: + type: object + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastUpdateTime: + type: string + lastTransitionTime: + type: string + spec: + type: object + properties: + namespace: + type: string + subnet: + type: string + attachSubnets: + type: array + items: + type: string + v4ip: + type: string + macAddress: + type: string + v6ip: + type: string + parentV4ip: + type: string + parentMac: + type: string + parentV6ip: + type: string +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: subnets.kubeovn.io +spec: + group: kubeovn.io + versions: + - name: v1 + served: true + storage: true + subresources: + status: {} + additionalPrinterColumns: + - name: Provider + type: string + jsonPath: .spec.provider + - name: Vpc + type: string + jsonPath: .spec.vpc + - name: Protocol + type: string + jsonPath: .spec.protocol + - name: CIDR + type: string + jsonPath: .spec.cidrBlock + - name: Private + type: boolean + jsonPath: .spec.private + - name: NAT + type: boolean + jsonPath: .spec.natOutgoing + - name: Default + type: boolean + jsonPath: .spec.default + - name: GatewayType + type: string + jsonPath: .spec.gatewayType + - name: V4Used + type: number + jsonPath: .status.v4usingIPs + - name: V4Available + type: number + jsonPath: .status.v4availableIPs + - name: V6Used + type: number + jsonPath: .status.v6usingIPs + - name: V6Available + type: number + jsonPath: .status.v6availableIPs + - name: ExcludeIPs + type: string + jsonPath: .spec.excludeIps + - name: U2OInterconnectionIP + type: string + jsonPath: .status.u2oInterconnectionIP + schema: + openAPIV3Schema: + type: object + properties: + status: + type: object + properties: + v4availableIPs: + type: number + v4usingIPs: + type: number + v6availableIPs: + type: number + v6usingIPs: + type: number + activateGateway: + type: string + dhcpV4OptionsUUID: + type: string + dhcpV6OptionsUUID: + type: string + u2oInterconnectionIP: + type: string + u2oInterconnectionVPC: + type: string + conditions: + type: array + items: + type: object + properties: + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastUpdateTime: + type: string + lastTransitionTime: + type: string + spec: + type: object + properties: + vpc: + type: string + default: + type: boolean + protocol: + type: string + enum: + - IPv4 + - IPv6 + - Dual + cidrBlock: + type: string + namespaces: + type: array + items: + type: string + gateway: + type: string + provider: + type: string + excludeIps: + type: array + items: + type: string + vips: + type: array + items: + type: string + gatewayType: + type: string + allowSubnets: + type: array + items: + type: string + gatewayNode: + type: string + natOutgoing: + type: boolean + u2oRouting: + type: boolean + externalEgressGateway: + type: string + policyRoutingPriority: + type: integer + minimum: 1 + maximum: 32765 + policyRoutingTableID: + type: integer + minimum: 1 + maximum: 2147483647 + not: + enum: + - 252 # compat + - 253 # default + - 254 # main + - 255 # local + private: + type: boolean + vlan: + type: string + logicalGateway: + type: boolean + disableGatewayCheck: + type: boolean + disableInterConnection: + type: boolean + enableDHCP: + type: boolean + dhcpV4Options: + type: string + dhcpV6Options: + type: string + enableIPv6RA: + type: boolean + ipv6RAConfigs: + type: string + acls: + type: array + items: + type: object + properties: + direction: + type: string + enum: + - from-lport + - to-lport + priority: + type: integer + minimum: 0 + maximum: 32767 + match: + type: string + action: + type: string + enum: + - allow-related + - allow-stateless + - allow + - drop + - reject + u2oInterconnection: + type: boolean + scope: Cluster + names: + plural: subnets + singular: subnet + kind: Subnet + shortNames: + - subnet +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: vlans.kubeovn.io +spec: + group: kubeovn.io + versions: + - name: v1 + served: true + storage: true + subresources: + status: {} + schema: + openAPIV3Schema: + type: object + properties: + spec: + type: object + properties: + id: + type: integer + minimum: 0 + maximum: 4095 + provider: + type: string + vlanId: + type: integer + description: Deprecated in favor of id + providerInterfaceName: + type: string + description: Deprecated in favor of provider + required: + - provider + status: + type: object + properties: + subnets: + type: array + items: + type: string + additionalPrinterColumns: + - name: ID + type: string + jsonPath: .spec.id + - name: Provider + type: string + jsonPath: .spec.provider + scope: Cluster + names: + plural: vlans + singular: vlan + kind: Vlan + shortNames: + - vlan +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: provider-networks.kubeovn.io +spec: + group: kubeovn.io + versions: + - name: v1 + served: true + storage: true + subresources: + status: {} + schema: + openAPIV3Schema: + type: object + properties: + metadata: + type: object + properties: + name: + type: string + maxLength: 12 + not: + enum: + - int + - external + spec: + type: object + properties: + defaultInterface: + type: string + maxLength: 15 + pattern: '^[^/\s]+$' + customInterfaces: + type: array + items: + type: object + properties: + interface: + type: string + maxLength: 15 + pattern: '^[^/\s]+$' + nodes: + type: array + items: + type: string + exchangeLinkName: + type: boolean + excludeNodes: + type: array + items: + type: string + required: + - defaultInterface + status: + type: object + properties: + ready: + type: boolean + readyNodes: + type: array + items: + type: string + notReadyNodes: + type: array + items: + type: string + vlans: + type: array + items: + type: string + conditions: + type: array + items: + type: object + properties: + node: + type: string + type: + type: string + status: + type: string + reason: + type: string + message: + type: string + lastUpdateTime: + type: string + lastTransitionTime: + type: string + additionalPrinterColumns: + - name: DefaultInterface + type: string + jsonPath: .spec.defaultInterface + - name: Ready + type: boolean + jsonPath: .status.ready + scope: Cluster + names: + plural: provider-networks + singular: provider-network + kind: ProviderNetwork + listKind: ProviderNetworkList +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: security-groups.kubeovn.io +spec: + group: kubeovn.io + names: + plural: security-groups + singular: security-group + shortNames: + - sg + kind: SecurityGroup + listKind: SecurityGroupList + scope: Cluster + versions: + - name: v1 + served: true + storage: true + schema: + openAPIV3Schema: + type: object + properties: + spec: + type: object + properties: + ingressRules: + type: array + items: + type: object + properties: + ipVersion: + type: string + protocol: + type: string + priority: + type: integer + remoteType: + type: string + remoteAddress: + type: string + remoteSecurityGroup: + type: string + portRangeMin: + type: integer + portRangeMax: + type: integer + policy: + type: string + egressRules: + type: array + items: + type: object + properties: + ipVersion: + type: string + protocol: + type: string + priority: + type: integer + remoteType: + type: string + remoteAddress: + type: string + remoteSecurityGroup: + type: string + portRangeMin: + type: integer + portRangeMax: + type: integer + policy: + type: string + allowSameGroupTraffic: + type: boolean + status: + type: object + properties: + portGroup: + type: string + allowSameGroupTraffic: + type: boolean + ingressMd5: + type: string + egressMd5: + type: string + ingressLastSyncSuccess: + type: boolean + egressLastSyncSuccess: + type: boolean + subresources: + status: {} + conversion: + strategy: None +EOF + +if $DPDK; then + cat < ovn.yaml +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: ovn + namespace: kube-system + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + annotations: + rbac.authorization.k8s.io/system-only: "true" + name: system:ovn +rules: + - apiGroups: + - "kubeovn.io" + resources: + - vpcs + - vpcs/status + - vpc-nat-gateways + - subnets + - subnets/status + - ips + - vips + - vips/status + - vlans + - vlans/status + - provider-networks + - provider-networks/status + - security-groups + - security-groups/status + - iptables-eips + - iptables-fip-rules + - iptables-dnat-rules + - iptables-snat-rules + - iptables-eips/status + - iptables-fip-rules/status + - iptables-dnat-rules/status + - iptables-snat-rules/status + - ovn-eips + - ovn-fips + - ovn-snat-rules + - ovn-eips/status + - ovn-fips/status + - ovn-snat-rules/status + - switch-lb-rules + - switch-lb-rules/status + - vpc-dnses + - vpc-dnses/status + verbs: + - "*" + - apiGroups: + - "" + resources: + - pods + - pods/exec + - namespaces + - nodes + - configmaps + verbs: + - create + - get + - list + - watch + - patch + - update + - apiGroups: + - "k8s.cni.cncf.io" + resources: + - network-attachment-definitions + verbs: + - create + - delete + - get + - list + - update + - apiGroups: + - "" + - networking.k8s.io + - apps + - extensions + resources: + - networkpolicies + - services + - services/status + - endpoints + - statefulsets + - daemonsets + - deployments + - deployments/scale + verbs: + - create + - delete + - update + - patch + - get + - list + - watch + - apiGroups: + - "" + resources: + - events + verbs: + - create + - patch + - update + - apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - "*" + - apiGroups: + - "kubevirt.io" + resources: + - virtualmachines + - virtualmachineinstances + verbs: + - get + - list +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: ovn +roleRef: + name: system:ovn + kind: ClusterRole + apiGroup: rbac.authorization.k8s.io +subjects: + - kind: ServiceAccount + name: ovn + namespace: kube-system + +--- +kind: Service +apiVersion: v1 +metadata: + name: ovn-nb + namespace: kube-system +spec: + ports: + - name: ovn-nb + protocol: TCP + port: 6641 + targetPort: 6641 + type: ClusterIP + ${SVC_YAML_IPFAMILYPOLICY} + selector: + app: ovn-central + ovn-nb-leader: "true" + sessionAffinity: None + +--- +kind: Service +apiVersion: v1 +metadata: + name: ovn-sb + namespace: kube-system +spec: + ports: + - name: ovn-sb + protocol: TCP + port: 6642 + targetPort: 6642 + type: ClusterIP + ${SVC_YAML_IPFAMILYPOLICY} + selector: + app: ovn-central + ovn-sb-leader: "true" + sessionAffinity: None + +--- +kind: Service +apiVersion: v1 +metadata: + name: ovn-northd + namespace: kube-system +spec: + ports: + - name: ovn-northd + protocol: TCP + port: 6643 + targetPort: 6643 + type: ClusterIP + ${SVC_YAML_IPFAMILYPOLICY} + selector: + app: ovn-central + ovn-northd-leader: "true" + sessionAffinity: None +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: ovn-central + namespace: kube-system + annotations: + kubernetes.io/description: | + OVN components: northd, nb and sb. +spec: + replicas: $count + strategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 + type: RollingUpdate + selector: + matchLabels: + app: ovn-central + template: + metadata: + labels: + app: ovn-central + component: network + type: infra + spec: + tolerations: + - effect: NoSchedule + operator: Exists + - effect: NoExecute + operator: Exists + - key: CriticalAddonsOnly + operator: Exists + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app: ovn-central + topologyKey: kubernetes.io/hostname + priorityClassName: system-cluster-critical + serviceAccountName: ovn + hostNetwork: true + containers: + - name: ovn-central + image: "$REGISTRY/kube-ovn:$VERSION" + imagePullPolicy: $IMAGE_PULL_POLICY + command: ["/kube-ovn/start-db.sh"] + securityContext: + capabilities: + add: ["SYS_NICE"] + env: + - name: ENABLE_SSL + value: "$ENABLE_SSL" + - name: NODE_IPS + value: $addresses + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_IPS + valueFrom: + fieldRef: + fieldPath: status.podIPs + - name: ENABLE_BIND_LOCAL_IP + value: "$ENABLE_BIND_LOCAL_IP" + - name: DEBUG_WRAPPER + value: "$DEBUG_WRAPPER" + resources: + requests: + cpu: 300m + memory: 300Mi + limits: + cpu: 3 + memory: 4Gi + volumeMounts: + - mountPath: /var/run/openvswitch + name: host-run-ovs + - mountPath: /var/run/ovn + name: host-run-ovn + - mountPath: /sys + name: host-sys + readOnly: true + - mountPath: /etc/openvswitch + name: host-config-openvswitch + - mountPath: /etc/ovn + name: host-config-ovn + - mountPath: /var/log/openvswitch + name: host-log-ovs + - mountPath: /var/log/ovn + name: host-log-ovn + - mountPath: /etc/localtime + name: localtime + - mountPath: /var/run/tls + name: kube-ovn-tls + readinessProbe: + exec: + command: + - bash + - /kube-ovn/ovn-healthcheck.sh + periodSeconds: 15 + timeoutSeconds: 45 + livenessProbe: + exec: + command: + - bash + - /kube-ovn/ovn-healthcheck.sh + initialDelaySeconds: 30 + periodSeconds: 15 + failureThreshold: 5 + timeoutSeconds: 45 + nodeSelector: + kubernetes.io/os: "linux" + kube-ovn/role: "master" + volumes: + - name: host-run-ovs + hostPath: + path: /run/openvswitch + - name: host-run-ovn + hostPath: + path: /run/ovn + - name: host-sys + hostPath: + path: /sys + - name: host-config-openvswitch + hostPath: + path: /etc/origin/openvswitch + - name: host-config-ovn + hostPath: + path: /etc/origin/ovn + - name: host-log-ovs + hostPath: + path: /var/log/openvswitch + - name: host-log-ovn + hostPath: + path: /var/log/ovn + - name: localtime + hostPath: + path: /etc/localtime + - name: kube-ovn-tls + secret: + optional: true + secretName: kube-ovn-tls + +--- +kind: DaemonSet +apiVersion: apps/v1 +metadata: + name: ovs-ovn + namespace: kube-system + annotations: + kubernetes.io/description: | + This daemon set launches the openvswitch daemon. +spec: + selector: + matchLabels: + app: ovs + updateStrategy: + type: OnDelete + template: + metadata: + labels: + app: ovs + component: network + type: infra + spec: + tolerations: + - effect: NoSchedule + operator: Exists + - effect: NoExecute + operator: Exists + - key: CriticalAddonsOnly + operator: Exists + priorityClassName: system-node-critical + serviceAccountName: ovn + hostNetwork: true + hostPID: true + containers: + - name: openvswitch + image: "$REGISTRY/kube-ovn-dpdk:$DPDK_VERSION-$VERSION" + imagePullPolicy: $IMAGE_PULL_POLICY + command: ["/kube-ovn/start-ovs-dpdk.sh"] + securityContext: + runAsUser: 0 + privileged: true + env: + - name: ENABLE_SSL + value: "$ENABLE_SSL" + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: KUBE_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: OVN_DB_IPS + value: $addresses + volumeMounts: + - mountPath: /var/run/netns + name: host-ns + mountPropagation: HostToContainer + - mountPath: /lib/modules + name: host-modules + readOnly: true + - mountPath: /var/run/openvswitch + name: host-run-ovs + - mountPath: /var/run/ovn + name: host-run-ovn + - mountPath: /sys + name: host-sys + readOnly: true + - mountPath: /etc/cni/net.d + name: cni-conf + - mountPath: /etc/openvswitch + name: host-config-openvswitch + - mountPath: /etc/ovn + name: host-config-ovn + - mountPath: /var/log/openvswitch + name: host-log-ovs + - mountPath: /var/log/ovn + name: host-log-ovn + - mountPath: /opt/ovs-config + name: host-config-ovs + - mountPath: /dev/hugepages + name: hugepage + - mountPath: /etc/localtime + name: localtime + - mountPath: /var/run/tls + name: kube-ovn-tls + readinessProbe: + exec: + command: + - bash + - /kube-ovn/ovs-dpdk-healthcheck.sh + periodSeconds: 5 + timeoutSeconds: 45 + livenessProbe: + exec: + command: + - bash + - /kube-ovn/ovs-dpdk-healthcheck.sh + initialDelaySeconds: 60 + periodSeconds: 5 + failureThreshold: 5 + timeoutSeconds: 45 + resources: + requests: + cpu: $DPDK_CPU + memory: $DPDK_MEMORY + limits: + cpu: $DPDK_CPU + memory: $DPDK_MEMORY + hugepages-1Gi: 1Gi + nodeSelector: + kubernetes.io/os: "linux" + ovn.kubernetes.io/ovs_dp_type: "kernel" + volumes: + - name: host-modules + hostPath: + path: /lib/modules + - name: host-run-ovs + hostPath: + path: /run/openvswitch + - name: host-run-ovn + hostPath: + path: /run/ovn + - name: host-sys + hostPath: + path: /sys + - name: host-ns + hostPath: + path: /var/run/netns + - name: cni-conf + hostPath: + path: /etc/cni/net.d + - name: host-config-openvswitch + hostPath: + path: /etc/origin/openvswitch + - name: host-config-ovn + hostPath: + path: /etc/origin/ovn + - name: host-log-ovs + hostPath: + path: /var/log/openvswitch + - name: host-log-ovn + hostPath: + path: /var/log/ovn + - name: host-config-ovs + hostPath: + path: /opt/ovs-config + type: DirectoryOrCreate + - name: hugepage + emptyDir: + medium: HugePages + - name: localtime + hostPath: + path: /etc/localtime + - name: kube-ovn-tls + secret: + optional: true + secretName: kube-ovn-tls +EOF + +else + cat < ovn.yaml +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: ovn + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + annotations: + rbac.authorization.k8s.io/system-only: "true" + name: system:ovn +rules: + - apiGroups: + - "kubeovn.io" + resources: + - vpcs + - vpcs/status + - vpc-nat-gateways + - subnets + - subnets/status + - ips + - vips + - vips/status + - vlans + - vlans/status + - provider-networks + - provider-networks/status + - security-groups + - security-groups/status + - iptables-eips + - iptables-fip-rules + - iptables-dnat-rules + - iptables-snat-rules + - iptables-eips/status + - iptables-fip-rules/status + - iptables-dnat-rules/status + - iptables-snat-rules/status + - ovn-eips + - ovn-fips + - ovn-snat-rules + - ovn-eips/status + - ovn-fips/status + - ovn-snat-rules/status + - vpc-dnses + - vpc-dnses/status + - switch-lb-rules + - switch-lb-rules/status + verbs: + - "*" + - apiGroups: + - "" + resources: + - pods + - pods/exec + - namespaces + - nodes + - configmaps + verbs: + - create + - get + - list + - watch + - patch + - update + - apiGroups: + - "" + - networking.k8s.io + - apps + - extensions + resources: + - networkpolicies + - services + - services/status + - endpoints + - statefulsets + - daemonsets + - deployments + - deployments/scale + verbs: + - create + - delete + - update + - patch + - get + - list + - watch + - apiGroups: + - "" + resources: + - events + verbs: + - create + - patch + - update + - apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - "*" + - apiGroups: + - "k8s.cni.cncf.io" + resources: + - network-attachment-definitions + verbs: + - create + - delete + - get + - list + - update + - apiGroups: + - "kubevirt.io" + resources: + - virtualmachines + - virtualmachineinstances + verbs: + - get + - list +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: ovn +roleRef: + name: system:ovn + kind: ClusterRole + apiGroup: rbac.authorization.k8s.io +subjects: + - kind: ServiceAccount + name: ovn + namespace: kube-system +--- +kind: Service +apiVersion: v1 +metadata: + name: ovn-nb + namespace: kube-system +spec: + ports: + - name: ovn-nb + protocol: TCP + port: 6641 + targetPort: 6641 + type: ClusterIP + ${SVC_YAML_IPFAMILYPOLICY} + selector: + app: ovn-central + ovn-nb-leader: "true" + sessionAffinity: None +--- +kind: Service +apiVersion: v1 +metadata: + name: ovn-sb + namespace: kube-system +spec: + ports: + - name: ovn-sb + protocol: TCP + port: 6642 + targetPort: 6642 + type: ClusterIP + ${SVC_YAML_IPFAMILYPOLICY} + selector: + app: ovn-central + ovn-sb-leader: "true" + sessionAffinity: None +--- +kind: Service +apiVersion: v1 +metadata: + name: ovn-northd + namespace: kube-system +spec: + ports: + - name: ovn-northd + protocol: TCP + port: 6643 + targetPort: 6643 + type: ClusterIP + ${SVC_YAML_IPFAMILYPOLICY} + selector: + app: ovn-central + ovn-northd-leader: "true" + sessionAffinity: None +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: ovn-central + namespace: kube-system + annotations: + kubernetes.io/description: | + OVN components: northd, nb and sb. +spec: + replicas: $count + strategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 + type: RollingUpdate + selector: + matchLabels: + app: ovn-central + template: + metadata: + labels: + app: ovn-central + component: network + type: infra + spec: + tolerations: + - effect: NoSchedule + operator: Exists + - effect: NoExecute + operator: Exists + - key: CriticalAddonsOnly + operator: Exists + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app: ovn-central + topologyKey: kubernetes.io/hostname + priorityClassName: system-cluster-critical + serviceAccountName: ovn + hostNetwork: true + containers: + - name: ovn-central + image: "$REGISTRY/kube-ovn:$VERSION" + imagePullPolicy: $IMAGE_PULL_POLICY + command: ["/kube-ovn/start-db.sh"] + securityContext: + capabilities: + add: ["SYS_NICE"] + env: + - name: ENABLE_SSL + value: "$ENABLE_SSL" + - name: NODE_IPS + value: $addresses + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_IPS + valueFrom: + fieldRef: + fieldPath: status.podIPs + - name: ENABLE_BIND_LOCAL_IP + value: "$ENABLE_BIND_LOCAL_IP" + - name: DEBUG_WRAPPER + value: "$DEBUG_WRAPPER" + resources: + requests: + cpu: 300m + memory: 200Mi + limits: + cpu: 3 + memory: 4Gi + volumeMounts: + - mountPath: /var/run/openvswitch + name: host-run-ovs + - mountPath: /var/run/ovn + name: host-run-ovn + - mountPath: /sys + name: host-sys + readOnly: true + - mountPath: /etc/openvswitch + name: host-config-openvswitch + - mountPath: /etc/ovn + name: host-config-ovn + - mountPath: /var/log/openvswitch + name: host-log-ovs + - mountPath: /var/log/ovn + name: host-log-ovn + - mountPath: /etc/localtime + name: localtime + - mountPath: /var/run/tls + name: kube-ovn-tls + readinessProbe: + exec: + command: + - bash + - /kube-ovn/ovn-healthcheck.sh + periodSeconds: 15 + timeoutSeconds: 45 + livenessProbe: + exec: + command: + - bash + - /kube-ovn/ovn-healthcheck.sh + initialDelaySeconds: 30 + periodSeconds: 15 + failureThreshold: 5 + timeoutSeconds: 45 + nodeSelector: + kubernetes.io/os: "linux" + kube-ovn/role: "master" + volumes: + - name: host-run-ovs + hostPath: + path: /run/openvswitch + - name: host-run-ovn + hostPath: + path: /run/ovn + - name: host-sys + hostPath: + path: /sys + - name: host-config-openvswitch + hostPath: + path: /etc/origin/openvswitch + - name: host-config-ovn + hostPath: + path: /etc/origin/ovn + - name: host-log-ovs + hostPath: + path: /var/log/openvswitch + - name: host-log-ovn + hostPath: + path: /var/log/ovn + - name: localtime + hostPath: + path: /etc/localtime + - name: kube-ovn-tls + secret: + optional: true + secretName: kube-ovn-tls +--- +kind: DaemonSet +apiVersion: apps/v1 +metadata: + name: ovs-ovn + namespace: kube-system + annotations: + kubernetes.io/description: | + This daemon set launches the openvswitch daemon. +spec: + selector: + matchLabels: + app: ovs + updateStrategy: + type: OnDelete + template: + metadata: + labels: + app: ovs + component: network + type: infra + spec: + tolerations: + - effect: NoSchedule + operator: Exists + - effect: NoExecute + operator: Exists + - key: CriticalAddonsOnly + operator: Exists + priorityClassName: system-node-critical + serviceAccountName: ovn + hostNetwork: true + hostPID: true + containers: + - name: openvswitch + image: "$REGISTRY/kube-ovn:$VERSION" + imagePullPolicy: $IMAGE_PULL_POLICY + command: ["/kube-ovn/start-ovs.sh"] + securityContext: + runAsUser: 0 + privileged: true + env: + - name: ENABLE_SSL + value: "$ENABLE_SSL" + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: HW_OFFLOAD + value: "$HW_OFFLOAD" + - name: TUNNEL_TYPE + value: "$TUNNEL_TYPE" + - name: KUBE_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: OVN_DB_IPS + value: $addresses + - name: DEBUG_WRAPPER + value: "$DEBUG_WRAPPER" + volumeMounts: + - mountPath: /var/run/netns + name: host-ns + mountPropagation: HostToContainer + - mountPath: /lib/modules + name: host-modules + readOnly: true + - mountPath: /var/run/openvswitch + name: host-run-ovs + - mountPath: /var/run/ovn + name: host-run-ovn + - mountPath: /sys + name: host-sys + readOnly: true + - mountPath: /etc/cni/net.d + name: cni-conf + - mountPath: /etc/openvswitch + name: host-config-openvswitch + - mountPath: /etc/ovn + name: host-config-ovn + - mountPath: /var/log/openvswitch + name: host-log-ovs + - mountPath: /var/log/ovn + name: host-log-ovn + - mountPath: /etc/localtime + name: localtime + - mountPath: /var/run/tls + name: kube-ovn-tls + - mountPath: /var/run/containerd + name: cruntime + readinessProbe: + exec: + command: + - bash + - -c + - LOG_ROTATE=true /kube-ovn/ovs-healthcheck.sh + periodSeconds: 5 + timeoutSeconds: 45 + livenessProbe: + exec: + command: + - bash + - /kube-ovn/ovs-healthcheck.sh + initialDelaySeconds: 60 + periodSeconds: 5 + failureThreshold: 5 + timeoutSeconds: 45 + resources: + requests: + cpu: 200m + memory: 200Mi + limits: + cpu: 1000m + memory: 1000Mi + nodeSelector: + kubernetes.io/os: "linux" + volumes: + - name: host-modules + hostPath: + path: /lib/modules + - name: host-run-ovs + hostPath: + path: /run/openvswitch + - name: host-run-ovn + hostPath: + path: /run/ovn + - name: host-sys + hostPath: + path: /sys + - name: host-ns + hostPath: + path: /var/run/netns + - name: cni-conf + hostPath: + path: /etc/cni/net.d + - name: host-config-openvswitch + hostPath: + path: /etc/origin/openvswitch + - name: host-config-ovn + hostPath: + path: /etc/origin/ovn + - name: host-log-ovs + hostPath: + path: /var/log/openvswitch + - name: host-log-ovn + hostPath: + path: /var/log/ovn + - name: localtime + hostPath: + path: /etc/localtime + - hostPath: + path: /var/run/containerd + name: cruntime + - name: kube-ovn-tls + secret: + optional: true + secretName: kube-ovn-tls +EOF +fi + +kubectl apply -f kube-ovn-crd.yaml +kubectl apply -f ovn.yaml + +if $HYBRID_DPDK; then + +cat < ovn-dpdk.yaml +kind: DaemonSet +apiVersion: apps/v1 +metadata: + name: ovs-ovn-dpdk + namespace: kube-system + annotations: + kubernetes.io/description: | + This daemon set launches the openvswitch daemon. +spec: + selector: + matchLabels: + app: ovs-dpdk + updateStrategy: + type: OnDelete + template: + metadata: + labels: + app: ovs-dpdk + component: network + type: infra + spec: + tolerations: + - operator: Exists + priorityClassName: system-node-critical + serviceAccountName: ovn + hostNetwork: true + hostPID: true + containers: + - name: openvswitch + image: "$REGISTRY/kube-ovn:${VERSION}-dpdk" + imagePullPolicy: $IMAGE_PULL_POLICY + command: ["/kube-ovn/start-ovs-dpdk-v2.sh"] + securityContext: + runAsUser: 0 + privileged: true + env: + - name: ENABLE_SSL + value: "$ENABLE_SSL" + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: HW_OFFLOAD + value: "$HW_OFFLOAD" + - name: TUNNEL_TYPE + value: "$TUNNEL_TYPE" + - name: DPDK_TUNNEL_IFACE + value: "$DPDK_TUNNEL_IFACE" + - name: KUBE_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: OVN_DB_IPS + value: $addresses + volumeMounts: + - mountPath: /opt/ovs-config + name: host-config-ovs + - name: shareddir + mountPath: /var/lib/kubelet/pods + - name: hugepage + mountPath: /dev/hugepages + - mountPath: /lib/modules + name: host-modules + readOnly: true + - mountPath: /var/run/openvswitch + name: host-run-ovs + mountPropagation: HostToContainer + - mountPath: /var/run/ovn + name: host-run-ovn + - mountPath: /sys + name: host-sys + - mountPath: /etc/cni/net.d + name: cni-conf + - mountPath: /etc/openvswitch + name: host-config-openvswitch + - mountPath: /etc/ovn + name: host-config-ovn + - mountPath: /var/log/openvswitch + name: host-log-ovs + - mountPath: /var/log/ovn + name: host-log-ovn + - mountPath: /etc/localtime + name: localtime + - mountPath: /var/run/tls + name: kube-ovn-tls + readinessProbe: + exec: + command: + - bash + - -c + - LOG_ROTATE=true /kube-ovn/ovs-healthcheck.sh + periodSeconds: 5 + timeoutSeconds: 45 + livenessProbe: + exec: + command: + - bash + - /kube-ovn/ovs-healthcheck.sh + initialDelaySeconds: 60 + periodSeconds: 5 + failureThreshold: 5 + timeoutSeconds: 45 + resources: + requests: + cpu: 200m + hugepages-2Mi: 1Gi + memory: 200Mi + limits: + cpu: 1000m + hugepages-2Mi: 1Gi + memory: 800Mi + nodeSelector: + kubernetes.io/os: "linux" + ovn.kubernetes.io/ovs_dp_type: "userspace" + volumes: + - name: host-config-ovs + hostPath: + path: /opt/ovs-config + type: DirectoryOrCreate + - name: shareddir + hostPath: + path: /var/lib/kubelet/pods + type: '' + - name: hugepage + emptyDir: + medium: HugePages + - name: host-modules + hostPath: + path: /lib/modules + - name: host-run-ovs + hostPath: + path: /run/openvswitch + - name: host-run-ovn + hostPath: + path: /run/ovn + - name: host-sys + hostPath: + path: /sys + - name: cni-conf + hostPath: + path: /etc/cni/net.d + - name: host-config-openvswitch + hostPath: + path: /etc/origin/openvswitch + - name: host-config-ovn + hostPath: + path: /etc/origin/ovn + - name: host-log-ovs + hostPath: + path: /var/log/openvswitch + - name: host-log-ovn + hostPath: + path: /var/log/ovn + - name: localtime + hostPath: + path: /etc/localtime + - name: kube-ovn-tls + secret: + optional: true + secretName: kube-ovn-tls +EOF +kubectl apply -f ovn-dpdk.yaml +fi +kubectl rollout status deployment/ovn-central -n kube-system --timeout 300s +echo "-------------------------------" +echo "" + +echo "[Step 3/6] Install Kube-OVN" + +cat < kube-ovn.yaml +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: kube-ovn-controller + namespace: kube-system + annotations: + kubernetes.io/description: | + kube-ovn controller +spec: + replicas: $count + selector: + matchLabels: + app: kube-ovn-controller + strategy: + rollingUpdate: + maxSurge: 0% + maxUnavailable: 100% + type: RollingUpdate + template: + metadata: + labels: + app: kube-ovn-controller + component: network + type: infra + spec: + tolerations: + - effect: NoSchedule + operator: Exists + - key: CriticalAddonsOnly + operator: Exists + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app: kube-ovn-controller + topologyKey: kubernetes.io/hostname + priorityClassName: system-cluster-critical + serviceAccountName: ovn + hostNetwork: true + containers: + - name: kube-ovn-controller + image: "$REGISTRY/kube-ovn:$VERSION" + imagePullPolicy: $IMAGE_PULL_POLICY + args: + - /kube-ovn/start-controller.sh + - --default-cidr=$POD_CIDR + - --default-gateway=$POD_GATEWAY + - --default-gateway-check=$CHECK_GATEWAY + - --default-logical-gateway=$LOGICAL_GATEWAY + - --default-u2o-interconnection=$U2O_INTERCONNECTION + - --default-exclude-ips=$EXCLUDE_IPS + - --node-switch-cidr=$JOIN_CIDR + - --service-cluster-ip-range=$SVC_CIDR + - --network-type=$NETWORK_TYPE + - --default-interface-name=$VLAN_INTERFACE_NAME + - --default-exchange-link-name=$EXCHANGE_LINK_NAME + - --default-vlan-id=$VLAN_ID + - --ls-dnat-mod-dl-dst=$LS_DNAT_MOD_DL_DST + - --pod-nic-type=$POD_NIC_TYPE + - --enable-lb=$ENABLE_LB + - --enable-np=$ENABLE_NP + - --enable-eip-snat=$ENABLE_EIP_SNAT + - --enable-external-vpc=$ENABLE_EXTERNAL_VPC + - --logtostderr=false + - --alsologtostderr=true + - --gc-interval=$GC_INTERVAL + - --inspect-interval=$INSPECT_INTERVAL + - --log_file=/var/log/kube-ovn/kube-ovn-controller.log + - --log_file_max_size=0 + - --enable-lb-svc=$ENABLE_LB_SVC + - --keep-vm-ip=$ENABLE_KEEP_VM_IP + - --pod-default-fip-type=$POD_DEFAULT_FIP_TYPE + env: + - name: ENABLE_SSL + value: "$ENABLE_SSL" + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: KUBE_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: KUBE_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: OVN_DB_IPS + value: $addresses + - name: POD_IPS + valueFrom: + fieldRef: + fieldPath: status.podIPs + - name: ENABLE_BIND_LOCAL_IP + value: "$ENABLE_BIND_LOCAL_IP" + volumeMounts: + - mountPath: /etc/localtime + name: localtime + - mountPath: /var/log/kube-ovn + name: kube-ovn-log + - mountPath: /var/run/tls + name: kube-ovn-tls + readinessProbe: + exec: + command: + - /kube-ovn/kube-ovn-controller-healthcheck + periodSeconds: 3 + timeoutSeconds: 45 + livenessProbe: + exec: + command: + - /kube-ovn/kube-ovn-controller-healthcheck + initialDelaySeconds: 300 + periodSeconds: 7 + failureThreshold: 5 + timeoutSeconds: 45 + resources: + requests: + cpu: 200m + memory: 200Mi + limits: + cpu: 1000m + memory: 1Gi + nodeSelector: + kubernetes.io/os: "linux" + volumes: + - name: localtime + hostPath: + path: /etc/localtime + - name: kube-ovn-log + hostPath: + path: /var/log/kube-ovn + - name: kube-ovn-tls + secret: + optional: true + secretName: kube-ovn-tls + +--- +kind: DaemonSet +apiVersion: apps/v1 +metadata: + name: kube-ovn-cni + namespace: kube-system + annotations: + kubernetes.io/description: | + This daemon set launches the kube-ovn cni daemon. +spec: + selector: + matchLabels: + app: kube-ovn-cni + template: + metadata: + labels: + app: kube-ovn-cni + component: network + type: infra + spec: + tolerations: + - effect: NoSchedule + operator: Exists + - effect: NoExecute + operator: Exists + - key: CriticalAddonsOnly + operator: Exists + priorityClassName: system-node-critical + serviceAccountName: ovn + hostNetwork: true + hostPID: true + initContainers: + - name: install-cni + image: "$REGISTRY/kube-ovn:$VERSION" + imagePullPolicy: $IMAGE_PULL_POLICY + command: ["/kube-ovn/install-cni.sh"] + securityContext: + runAsUser: 0 + privileged: true + volumeMounts: + - mountPath: /opt/cni/bin + name: cni-bin + - mountPath: /usr/local/bin + name: local-bin + containers: + - name: cni-server + image: "$REGISTRY/kube-ovn:$VERSION" + imagePullPolicy: $IMAGE_PULL_POLICY + command: + - bash + - /kube-ovn/start-cniserver.sh + args: + - --enable-mirror=$ENABLE_MIRROR + - --encap-checksum=true + - --service-cluster-ip-range=$SVC_CIDR + - --iface=${IFACE} + - --dpdk-tunnel-iface=${DPDK_TUNNEL_IFACE} + - --network-type=$TUNNEL_TYPE + - --default-interface-name=$VLAN_INTERFACE_NAME + - --cni-conf-name=${CNI_CONFIG_PRIORITY}-kube-ovn.conflist + - --logtostderr=false + - --alsologtostderr=true + - --log_file=/var/log/kube-ovn/kube-ovn-cni.log + - --log_file_max_size=0 + securityContext: + runAsUser: 0 + privileged: true + env: + - name: ENABLE_SSL + value: "$ENABLE_SSL" + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: KUBE_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: MODULES + value: $MODULES + - name: RPMS + value: $RPMS + - name: POD_IPS + valueFrom: + fieldRef: + fieldPath: status.podIPs + - name: ENABLE_BIND_LOCAL_IP + value: "$ENABLE_BIND_LOCAL_IP" + - name: DBUS_SYSTEM_BUS_ADDRESS + value: "unix:path=/host/var/run/dbus/system_bus_socket" + volumeMounts: + - name: host-modules + mountPath: /lib/modules + readOnly: true + - name: shared-dir + mountPath: /var/lib/kubelet/pods + - mountPath: /etc/openvswitch + name: systemid + - mountPath: /etc/cni/net.d + name: cni-conf + - mountPath: /run/openvswitch + name: host-run-ovs + mountPropagation: Bidirectional + - mountPath: /run/ovn + name: host-run-ovn + - mountPath: /host/var/run/dbus + name: host-dbus + mountPropagation: HostToContainer + - mountPath: /var/run/netns + name: host-ns + mountPropagation: HostToContainer + - mountPath: /var/log/kube-ovn + name: kube-ovn-log + - mountPath: /var/log/openvswitch + name: host-log-ovs + - mountPath: /var/log/ovn + name: host-log-ovn + - mountPath: /etc/localtime + name: localtime + - mountPath: /tmp + name: tmp + livenessProbe: + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 7 + successThreshold: 1 + tcpSocket: + port: 10665 + timeoutSeconds: 3 + readinessProbe: + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 7 + successThreshold: 1 + tcpSocket: + port: 10665 + timeoutSeconds: 3 + resources: + requests: + cpu: 100m + memory: 100Mi + limits: + cpu: 1000m + memory: 1Gi + nodeSelector: + kubernetes.io/os: "linux" + volumes: + - name: host-modules + hostPath: + path: /lib/modules + - name: shared-dir + hostPath: + path: /var/lib/kubelet/pods + - name: systemid + hostPath: + path: /etc/origin/openvswitch + - name: host-run-ovs + hostPath: + path: /run/openvswitch + - name: host-run-ovn + hostPath: + path: /run/ovn + - name: cni-conf + hostPath: + path: $CNI_CONF_DIR + - name: cni-bin + hostPath: + path: $CNI_BIN_DIR + - name: host-ns + hostPath: + path: /var/run/netns + - name: host-dbus + hostPath: + path: /var/run/dbus + - name: host-log-ovs + hostPath: + path: /var/log/openvswitch + - name: kube-ovn-log + hostPath: + path: /var/log/kube-ovn + - name: host-log-ovn + hostPath: + path: /var/log/ovn + - name: localtime + hostPath: + path: /etc/localtime + - name: tmp + hostPath: + path: /tmp + - name: local-bin + hostPath: + path: /usr/local/bin + +--- +kind: DaemonSet +apiVersion: apps/v1 +metadata: + name: kube-ovn-pinger + namespace: kube-system + annotations: + kubernetes.io/description: | + This daemon set launches the openvswitch daemon. +spec: + selector: + matchLabels: + app: kube-ovn-pinger + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + app: kube-ovn-pinger + component: network + type: infra + spec: + priorityClassName: system-node-critical + serviceAccountName: ovn + hostPID: true + containers: + - name: pinger + image: "$REGISTRY/kube-ovn:$VERSION" + command: + - /kube-ovn/kube-ovn-pinger + args: + - --external-address=$PINGER_EXTERNAL_ADDRESS + - --external-dns=$PINGER_EXTERNAL_DOMAIN + - --logtostderr=false + - --alsologtostderr=true + - --log_file=/var/log/kube-ovn/kube-ovn-pinger.log + - --log_file_max_size=0 + imagePullPolicy: $IMAGE_PULL_POLICY + securityContext: + runAsUser: 0 + privileged: false + env: + - name: ENABLE_SSL + value: "$ENABLE_SSL" + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + volumeMounts: + - mountPath: /lib/modules + name: host-modules + readOnly: true + - mountPath: /run/openvswitch + name: host-run-ovs + - mountPath: /var/run/openvswitch + name: host-run-ovs + - mountPath: /var/run/ovn + name: host-run-ovn + - mountPath: /sys + name: host-sys + readOnly: true + - mountPath: /etc/openvswitch + name: host-config-openvswitch + - mountPath: /var/log/openvswitch + name: host-log-ovs + - mountPath: /var/log/ovn + name: host-log-ovn + - mountPath: /var/log/kube-ovn + name: kube-ovn-log + - mountPath: /etc/localtime + name: localtime + - mountPath: /var/run/tls + name: kube-ovn-tls + resources: + requests: + cpu: 100m + memory: 100Mi + limits: + cpu: 200m + memory: 400Mi + nodeSelector: + kubernetes.io/os: "linux" + volumes: + - name: host-modules + hostPath: + path: /lib/modules + - name: host-run-ovs + hostPath: + path: /run/openvswitch + - name: host-run-ovn + hostPath: + path: /run/ovn + - name: host-sys + hostPath: + path: /sys + - name: host-config-openvswitch + hostPath: + path: /etc/origin/openvswitch + - name: host-log-ovs + hostPath: + path: /var/log/openvswitch + - name: kube-ovn-log + hostPath: + path: /var/log/kube-ovn + - name: host-log-ovn + hostPath: + path: /var/log/ovn + - name: localtime + hostPath: + path: /etc/localtime + - name: kube-ovn-tls + secret: + optional: true + secretName: kube-ovn-tls +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: kube-ovn-monitor + namespace: kube-system + annotations: + kubernetes.io/description: | + Metrics for OVN components: northd, nb and sb. +spec: + replicas: 1 + strategy: + rollingUpdate: + maxSurge: 1 + maxUnavailable: 1 + type: RollingUpdate + selector: + matchLabels: + app: kube-ovn-monitor + template: + metadata: + labels: + app: kube-ovn-monitor + component: network + type: infra + spec: + tolerations: + - effect: NoSchedule + operator: Exists + - key: CriticalAddonsOnly + operator: Exists + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app: kube-ovn-monitor + topologyKey: kubernetes.io/hostname + priorityClassName: system-cluster-critical + serviceAccountName: ovn + hostNetwork: true + containers: + - name: kube-ovn-monitor + image: "$REGISTRY/kube-ovn:$VERSION" + imagePullPolicy: $IMAGE_PULL_POLICY + command: ["/kube-ovn/start-ovn-monitor.sh"] + securityContext: + runAsUser: 0 + privileged: false + env: + - name: ENABLE_SSL + value: "$ENABLE_SSL" + - name: KUBE_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: POD_IPS + valueFrom: + fieldRef: + fieldPath: status.podIPs + - name: ENABLE_BIND_LOCAL_IP + value: "$ENABLE_BIND_LOCAL_IP" + resources: + requests: + cpu: 200m + memory: 200Mi + limits: + cpu: 200m + memory: 200Mi + volumeMounts: + - mountPath: /var/run/openvswitch + name: host-run-ovs + - mountPath: /var/run/ovn + name: host-run-ovn + - mountPath: /etc/openvswitch + name: host-config-openvswitch + - mountPath: /etc/ovn + name: host-config-ovn + - mountPath: /var/log/openvswitch + name: host-log-ovs + - mountPath: /var/log/ovn + name: host-log-ovn + - mountPath: /etc/localtime + name: localtime + - mountPath: /var/run/tls + name: kube-ovn-tls + readinessProbe: + exec: + command: + - cat + - /var/run/ovn/ovn-controller.pid + periodSeconds: 10 + timeoutSeconds: 45 + livenessProbe: + exec: + command: + - cat + - /var/run/ovn/ovn-controller.pid + initialDelaySeconds: 30 + periodSeconds: 10 + failureThreshold: 5 + timeoutSeconds: 45 + nodeSelector: + kubernetes.io/os: "linux" + kube-ovn/role: "master" + volumes: + - name: host-run-ovs + hostPath: + path: /run/openvswitch + - name: host-run-ovn + hostPath: + path: /run/ovn + - name: host-config-openvswitch + hostPath: + path: /etc/origin/openvswitch + - name: host-config-ovn + hostPath: + path: /etc/origin/ovn + - name: host-log-ovs + hostPath: + path: /var/log/openvswitch + - name: host-log-ovn + hostPath: + path: /var/log/ovn + - name: localtime + hostPath: + path: /etc/localtime + - name: kube-ovn-tls + secret: + optional: true + secretName: kube-ovn-tls +--- +kind: Service +apiVersion: v1 +metadata: + name: kube-ovn-monitor + namespace: kube-system + labels: + app: kube-ovn-monitor +spec: + ports: + - name: metrics + port: 10661 + type: ClusterIP + ${SVC_YAML_IPFAMILYPOLICY} + selector: + app: kube-ovn-monitor + sessionAffinity: None +--- +kind: Service +apiVersion: v1 +metadata: + name: kube-ovn-pinger + namespace: kube-system + labels: + app: kube-ovn-pinger +spec: + ${SVC_YAML_IPFAMILYPOLICY} + selector: + app: kube-ovn-pinger + ports: + - port: 8080 + name: metrics +--- +kind: Service +apiVersion: v1 +metadata: + name: kube-ovn-controller + namespace: kube-system + labels: + app: kube-ovn-controller +spec: + ${SVC_YAML_IPFAMILYPOLICY} + selector: + app: kube-ovn-controller + ports: + - port: 10660 + name: metrics +--- +kind: Service +apiVersion: v1 +metadata: + name: kube-ovn-cni + namespace: kube-system + labels: + app: kube-ovn-cni +spec: + ${SVC_YAML_IPFAMILYPOLICY} + selector: + app: kube-ovn-cni + ports: + - port: 10665 + name: metrics +EOF + +kubectl apply -f kube-ovn.yaml +kubectl rollout status deployment/kube-ovn-controller -n kube-system --timeout 300s +kubectl rollout status daemonset/kube-ovn-cni -n kube-system --timeout 300s +echo "-------------------------------" +echo "" + +echo "[Step 4/6] Delete pod that not in host network mode" +for ns in $(kubectl get ns --no-headers -o custom-columns=NAME:.metadata.name); do + for pod in $(kubectl get pod --no-headers -n "$ns" --field-selector spec.restartPolicy=Always -o custom-columns=NAME:.metadata.name,HOST:spec.hostNetwork | awk '{if ($2!="true") print $1}'); do + kubectl delete pod "$pod" -n "$ns" --ignore-not-found + done +done + +sleep 5 +kubectl rollout status daemonset/kube-ovn-pinger -n kube-system --timeout 300s +kubectl rollout status deployment/coredns -n kube-system --timeout 600s +echo "-------------------------------" +echo "" + +echo "[Step 5/6] Add kubectl plugin PATH" + +if ! sh -c "echo \":$PATH:\" | grep -q \":/usr/local/bin:\""; then + echo "Tips:Please join the /usr/local/bin to your PATH. Temporarily, we do it for this execution." + export PATH=/usr/local/bin:$PATH + echo "-------------------------------" + echo "" +fi + +echo "[Step 6/6] Run network diagnose" +kubectl cp kube-system/$(kubectl -n kube-system get pods -o wide | grep cni | awk '{print $1}' | awk 'NR==1{print}'):/kube-ovn/kubectl-ko /usr/local/bin/kubectl-ko +chmod +x /usr/local/bin/kubectl-ko +kubectl ko diagnose all + +echo "-------------------------------" +echo " + ,,,, + ,::, + ,,::,,,, + ,,,,,::::::::::::,,,,, + ,,,::::::::::::::::::::::,,, + ,,::::::::::::::::::::::::::::,, + ,,::::::::::::::::::::::::::::::::,, + ,::::::::::::::::::::::::::::::::::::, + ,:::::::::::::,, ,,:::::,,,::::::::::, + ,,:::::::::::::, ,::, ,:::::::::, + ,:::::::::::::, :x, ,:: :, ,:::::::::, +,:::::::::::::::, ,,, ,::, ,, ,::::::::::, +,:::::::::::::::::,,,,,,:::::,,,,::::::::::::, ,:, ,:, ,xx, ,:::::, ,:, ,:: :::, ,x +,::::::::::::::::::::::::::::::::::::::::::::, :x: ,:xx: , :xx, :xxxxxxxxx, :xx, ,xx:,xxxx, :x +,::::::::::::::::::::::::::::::::::::::::::::, :xxxxx:, ,xx, :x: :xxx:x::, ::xxxx: :xx:, ,:xxx :xx, ,xx: ,xxxxx:, :x +,::::::::::::::::::::::::::::::::::::::::::::, :xxxxx, :xx, :x: :xxx,,:xx,:xx:,:xx, ,,,,,,,,,xxx, ,xx: :xx:xx: ,xxx,:xx::x +,::::::,,::::::::,,::::::::,,:::::::,,,::::::, :x:,xxx: ,xx, :xx :xx: ,xx,xxxxxx:, ,xxxxxxx:,xxx:, ,xxx, :xxx: ,xxx, :xxxx +,::::, ,::::, ,:::::, ,,::::, ,::::, :x: ,:xx,,:xx::xxxx,,xxx::xx: :xx::::x: ,,,,,, ,xxxxxxxxx, ,xx: ,xxx, :xxx +,::::, ,::::, ,::::, ,::::, ,::::, ,:, ,:, ,,::,,:, ,::::,, ,:::::, ,,:::::, ,, :x: ,:: +,::::, ,::::, ,::::, ,::::, ,::::, + ,,,,, ,::::, ,::::, ,::::, ,:::, ,,,,,,,,,,,,, + ,::::, ,::::, ,::::, ,:::, ,,,:::::::::::::::, + ,::::, ,::::, ,::::, ,::::, ,,,,:::::::::,,,,,,,:::, + ,::::, ,::::, ,::::, ,::::::::::::,,,,, + ,,,, ,::::, ,,,, ,,,::::,,,, + ,::::, + ,,::, +" +echo "Thanks for choosing Kube-OVN! +For more advanced features, please read https://kubeovn.github.io/docs/stable/en/ +If you have any question, please file an issue https://github.com/kubeovn/kube-ovn/issues/new/choose" diff --git a/playbooks/roles/vhosts/network_info/files/display_network_info.sh b/playbooks/roles/vhosts/network_info/files/display_network_info.sh new file mode 100644 index 0000000..e3cab71 --- /dev/null +++ b/playbooks/roles/vhosts/network_info/files/display_network_info.sh @@ -0,0 +1,10 @@ +#!/bin/bash +for interface in `ip -br link | awk '/^(ens|cni)/ {print $1}'` +do + status=$(ip link show "$interface" | grep -q "state UP" && echo -n "UP" || echo -n "DOWN") + ip_addr=$(ip -br addr show "$interface" | awk '{print $3}') + default_gw=$(ip route | grep default | awk '{print $3}') + + echo "$interface $status $ip_addr $default_gw" + +done diff --git a/playbooks/roles/vhosts/network_info/tasks/main.yml b/playbooks/roles/vhosts/network_info/tasks/main.yml new file mode 100755 index 0000000..a86aa01 --- /dev/null +++ b/playbooks/roles/vhosts/network_info/tasks/main.yml @@ -0,0 +1,15 @@ +- name: Gather network default gateway + ansible.builtin.shell: | + ip route | grep default | awk '{print $3}' + register: default_gateway + +- name: Gather network interface information and display details + ansible.builtin.debug: + msg: | + Interface: "{{ item }}" + Status: "{{ 'UP' if hostvars[inventory_hostname]['ansible_' + item].active else 'DOWN' }}" + IP Address: "{{ hostvars[inventory_hostname]['ansible_' + item].ipv4.address if hostvars[inventory_hostname]['ansible_' + item].ipv4 is defined else 'N/A' }}" + Netmask: "{{ hostvars[inventory_hostname]['ansible_' + item].ipv4.netmask if hostvars[inventory_hostname]['ansible_' + item].ipv4 is defined else 'N/A' }}" + Gateway: "{{ default_gateway.stdout }}" + loop: "{{ ansible_facts.interfaces }}" + when: "'ens' in item or 'cni' in item or '^eth' in item" diff --git a/playbooks/roles/vhosts/prometheus-transfer/meta/main.yml b/playbooks/roles/vhosts/prometheus-transfer/meta/main.yml new file mode 100644 index 0000000..cfa117f --- /dev/null +++ b/playbooks/roles/vhosts/prometheus-transfer/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: node-exporter diff --git a/playbooks/roles/vhosts/prometheus-transfer/tasks/main.yml b/playbooks/roles/vhosts/prometheus-transfer/tasks/main.yml new file mode 100755 index 0000000..dd139d3 --- /dev/null +++ b/playbooks/roles/vhosts/prometheus-transfer/tasks/main.yml @@ -0,0 +1,21 @@ +- name: Pre setting + shell: "rm -f /usr/bin/prometheus; mkdir -pv /opt/prometheus/data/ && chown prometheus:prometheus /opt/prometheus/data/" + +- name: download prometheus binary + shell: 'curl -Lo /usr/bin/prometheus https://mirrors.onwalk.net/tools/linux-amd64/prometheus && chmod 755 /usr/bin/prometheus' + +- name: create prometheus-transfer service + template: src=templates/prometheus-transfer.service dest=/etc/systemd/system/prometheus-transfer.service owner=root group=root mode=0644 + +- name: create prometheus-transfer config + template: src=templates/prometheus-transfer.yml dest=/etc/prometheus/prometheus-transfer.yml owner=root group=root mode=0644 + +- name: create prometheus-transfer start script + template: src=templates/start-prometheus-transfer-service.sh dest=/usr/bin/start-prometheus-transfer-service.sh owner=root group=root mode=0755 + +- name: create prometheus-transfer stop script + template: src=templates/stop-prometheus-transfer-service.sh dest=/usr/bin/stop-prometheus-transfer-service.sh owner=root group=root mode=0755 + +- name: init prometheus-transfer service + shell: "systemctl enable prometheus-transfer && systemctl daemon-reload && systemctl restart prometheus-transfer" + diff --git a/playbooks/roles/vhosts/prometheus-transfer/templates/prometheus-transfer.service b/playbooks/roles/vhosts/prometheus-transfer/templates/prometheus-transfer.service new file mode 100644 index 0000000..99b2750 --- /dev/null +++ b/playbooks/roles/vhosts/prometheus-transfer/templates/prometheus-transfer.service @@ -0,0 +1,16 @@ +[Unit] +Description=Prometheus +Documentation=https://prometheus.io/ +After=network.target + +[Service] +Type=simple +User=prometheus +ExecStart=/usr/bin/start-prometheus-transfer-service.sh +ExecStop=/usr/bin/stop-prometheus-transfer-service.sh +Restart=on-failure +RestartSec=30 +StartLimitInterval=0 + +[Install] +WantedBy=multi-user.target diff --git a/playbooks/roles/vhosts/prometheus-transfer/templates/prometheus-transfer.yml b/playbooks/roles/vhosts/prometheus-transfer/templates/prometheus-transfer.yml new file mode 100644 index 0000000..f3b536b --- /dev/null +++ b/playbooks/roles/vhosts/prometheus-transfer/templates/prometheus-transfer.yml @@ -0,0 +1,9 @@ +global: + scrape_interval: 3s + evaluation_interval: 3s +remote_read: + - url: '{{ remote_read }}' + read_recent: true + basic_auth: + username: '{{ remote_user }}' + password: '{{ remote_token }}' diff --git a/playbooks/roles/vhosts/prometheus-transfer/templates/start-prometheus-transfer-service.sh b/playbooks/roles/vhosts/prometheus-transfer/templates/start-prometheus-transfer-service.sh new file mode 100755 index 0000000..f9c01a3 --- /dev/null +++ b/playbooks/roles/vhosts/prometheus-transfer/templates/start-prometheus-transfer-service.sh @@ -0,0 +1,2 @@ +#!/bin/sh +/usr/bin/prometheus --config.file=/etc/prometheus/prometheus-transfer.yml --web.listen-address="0.0.0.0:9092" --web.enable-lifecycle --storage.tsdb.path="/opt/prometheus/data/" diff --git a/playbooks/roles/vhosts/prometheus-transfer/templates/stop-prometheus-transfer-service.sh b/playbooks/roles/vhosts/prometheus-transfer/templates/stop-prometheus-transfer-service.sh new file mode 100755 index 0000000..bb2b391 --- /dev/null +++ b/playbooks/roles/vhosts/prometheus-transfer/templates/stop-prometheus-transfer-service.sh @@ -0,0 +1,2 @@ +#!/bin/sh +pkill -9 prometheus diff --git a/playbooks/roles/vhosts/promtail-agent/meta/main.yml b/playbooks/roles/vhosts/promtail-agent/meta/main.yml new file mode 100644 index 0000000..9711b33 --- /dev/null +++ b/playbooks/roles/vhosts/promtail-agent/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: common diff --git a/playbooks/roles/vhosts/promtail-agent/tasks/main.yml b/playbooks/roles/vhosts/promtail-agent/tasks/main.yml new file mode 100755 index 0000000..4fa017f --- /dev/null +++ b/playbooks/roles/vhosts/promtail-agent/tasks/main.yml @@ -0,0 +1,19 @@ +- name: Pre setting + shell: "rm -f /usr/bin/promtail; mkdir -pv /etc/promtail/ && touch /var/log/positions.yaml" + when: inventory_hostname in groups[group] + +- name: Download promtail binary + shell: 'curl -Lo /usr/bin/promtail https://mirrors.onwalk.net/tools/linux-amd64/promtail && chmod 755 /usr/bin/promtail' + when: inventory_hostname in groups[group] + +- name: Create promtail-agent service + template: src=templates/promtail-agent.service dest=/lib/systemd/system/promtail-agent.service owner=root group=root mode=0644 + when: inventory_hostname in groups[group] + +- name: Create promtail-agent config + template: src=templates/promtail.yaml dest=/etc/promtail/promtail.yaml owner=root group=root mode=0644 + when: inventory_hostname in groups[group] + +- name: Init promtail-agent service + shell: "systemctl enable promtail-agent && systemctl daemon-reload && systemctl restart promtail-agent" + when: inventory_hostname in groups[group] diff --git a/playbooks/roles/vhosts/promtail-agent/templates/promtail-agent.service b/playbooks/roles/vhosts/promtail-agent/templates/promtail-agent.service new file mode 100644 index 0000000..4d27b40 --- /dev/null +++ b/playbooks/roles/vhosts/promtail-agent/templates/promtail-agent.service @@ -0,0 +1,15 @@ +[Unit] +Description=Prometheus +Documentation=https://grafana.io/ +After=network.target + +[Service] +Type=simple +User=root +ExecStart=/usr/bin/promtail -config.file=/etc/promtail/promtail.yaml +Restart=on-failure +RestartSec=30 +StartLimitInterval=0 + +[Install] +WantedBy=multi-user.target diff --git a/playbooks/roles/vhosts/promtail-agent/templates/promtail.yaml b/playbooks/roles/vhosts/promtail-agent/templates/promtail.yaml new file mode 100644 index 0000000..8bdb778 --- /dev/null +++ b/playbooks/roles/vhosts/promtail-agent/templates/promtail.yaml @@ -0,0 +1,38 @@ +server: + http_listen_port: 9080 + grpc_listen_port: 0 + +clients: + - url: http://{{ loki_host }}:{{ loki_port }}/loki/api/v1/push + +positions: + filename: /var/log/positions.yaml + +scrape_configs: +- job_name: system-auth-log + static_configs: + - targets: + - localhost + labels: + instance: {{ inventory_hostname }} + {{ label }} + job: secure + __path__: /var/log/auth.log +- job_name: system-os-log + static_configs: + - targets: + - localhost + labels: + instance: {{ inventory_hostname }} + {{ label }} + job: syslog + __path__: /var/log/syslog +- job_name: system-audit-log + static_configs: + - targets: + - localhost + labels: + instance: {{ inventory_hostname }} + {{ label }} + job: audit + __path__: /var/log/audit/audit.log diff --git a/playbooks/roles/vhosts/secret-manger/tasks/main.yml b/playbooks/roles/vhosts/secret-manger/tasks/main.yml new file mode 100755 index 0000000..f12b12b --- /dev/null +++ b/playbooks/roles/vhosts/secret-manger/tasks/main.yml @@ -0,0 +1,48 @@ +- name: "cluster {{ ClusterContext }} : Create namespace" + shell: "kubectl create ns {{ namespace }} || echo true" + +# Create General Secret for K8S form File + +- name: "cluster {{ ClusterContext }} : Clean OLD Secret" + shell: "kubectl delete secret {{ item.secret_name }} -n {{ namespace }}" + ignore_errors: yes + loop: "{{ generic }}" + when: generic is defined + +- name: "cluster {{ ClusterContext }} Create New Generic Secret from Key/Vaule" + shell: 'kubectl create secret generic {{ item.secret_name }} \ + --from-literal={{ item.sercet_key }}="{{ item.secret_value }}" \ + -n {{ namespace }}' + loop: "{{ generic }}" + when: generic is defined + +# Create General Secret for K8S From Key/Value + +- name: "cluster {{ ClusterContext }} : Clean OLD Secret" + shell: "kubectl delete secret {{ item.secret_name }} -n {{ namespace }}" + ignore_errors: yes + loop: "{{ secret }}" + when: secret is defined + +- name: "cluster {{ ClusterContext }} Create New Generic Secret from Key/Vaule" + shell: 'kubectl create secret generic {{ item.secret_name }} \ + --from-literal={{ item.sercet_key }}="{{ item.secret_value }}" \ + -n {{ namespace }}' + loop: "{{ secrets }}" + when: secrets is defined + +# TLS Secret for K8S key/cert + +- name: "cluster {{ ClusterContext }} : Clean OLD Secret" + shell: "kubectl delete secret {{ item.secret_name }} -n {{ namespace }}" + ignore_errors: yes + loop: "{{ tls }}" + when: tls is defined + +- name: "cluster {{ ClusterContext }} : Create New tls secret" + shell: 'kubectl create secret tls {{ item.secret_name }} \ + --key={{ item.keyfile }} \ + --cert={{ item.certfile }} \ + -n {{ namespace }}' + loop: "{{ tls }}" + when: tls is defined diff --git a/playbooks/roles/vhosts/telegraf/handlers/main.yml b/playbooks/roles/vhosts/telegraf/handlers/main.yml new file mode 100644 index 0000000..6a906e7 --- /dev/null +++ b/playbooks/roles/vhosts/telegraf/handlers/main.yml @@ -0,0 +1,5 @@ +--- +- name: restart telegraf + service: + name: telegraf + state: restarted diff --git a/playbooks/roles/vhosts/telegraf/meta/main.yml b/playbooks/roles/vhosts/telegraf/meta/main.yml new file mode 100644 index 0000000..9711b33 --- /dev/null +++ b/playbooks/roles/vhosts/telegraf/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: common diff --git a/playbooks/roles/vhosts/telegraf/tasks/main.yml b/playbooks/roles/vhosts/telegraf/tasks/main.yml new file mode 100755 index 0000000..e6118ff --- /dev/null +++ b/playbooks/roles/vhosts/telegraf/tasks/main.yml @@ -0,0 +1,49 @@ +- name: Import InfluxData GPG key (CentOS) + rpm_key: + state: present + key: https://repos.influxdata.com/influxdb.key + when: ansible_os_family == "RedHat" + +- name: Add Telegraf YUM repository (CentOS) + yum_repository: + name: influxdata + description: InfluxData Repository - Telegraf + baseurl: https://repos.influxdata.com/rhel/$releasever/$basearch/stable + gpgcheck: yes + gpgkey: https://repos.influxdata.com/influxdb.key + when: ansible_os_family == "RedHat" + +- name: Import InfluxData GPG key (Ubuntu) + apt_key: + url: https://repos.influxdata.com/influxdb.key + state: present + when: ansible_os_family == "Debian" + +- name: Add Telegraf APT repository (Ubuntu) + apt_repository: + repo: deb https://repos.influxdata.com/ubuntu {{ ansible_distribution_release }} stable + state: present + when: ansible_os_family == "Debian" + +- name: Install Telegraf + package: + name: telegraf + state: present + +- name: Create Telegraf configuration directory + file: + path: /etc/telegraf/ + state: directory + owner: root + group: root + mode: '0755' + +- name: Install telegraf.conf + template: + src: templates/telegraf.conf.j2 + dest: /etc/telegraf/telegraf.conf + owner: root + group: root + mode: '0644' + notify: + - restart telegraf diff --git a/playbooks/roles/vhosts/telegraf/templates/telegraf.conf b/playbooks/roles/vhosts/telegraf/templates/telegraf.conf new file mode 100644 index 0000000..382eb98 --- /dev/null +++ b/playbooks/roles/vhosts/telegraf/templates/telegraf.conf @@ -0,0 +1,32 @@ +[global_tags] +[agent] + interval = "10s" + round_interval = true + metric_batch_size = 1000 + metric_buffer_limit = 10000 + collection_jitter = "0s" + flush_interval = "10s" + flush_jitter = "0s" + precision = "0s" + hostname = "" + omit_hostname = false +[[outputs.influxdb]] +urls = ["https://influxdb.svc-dev.ink"] +[[inputs.cpu]] + percpu = true + totalcpu = true + collect_cpu_time = false + report_active = false + core_tags = false +[[inputs.disk]] + ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"] +[[inputs.diskio]] +[[inputs.kernel]] +[[inputs.mem]] +[[inputs.processes]] +[[inputs.swap]] +[[inputs.system]] +[[inputs.net]] + fielddrop = ["icmp_*", "icmpmsg_*", "ip_*", "tcp_*", "udp_*", "udplite_*"] + interfaces = ["eth*", "en*" ] +[[inputs.netstat]] diff --git a/playbooks/roles/vhosts/vault/files/setup.sh b/playbooks/roles/vhosts/vault/files/setup.sh new file mode 100644 index 0000000..747a36b --- /dev/null +++ b/playbooks/roles/vhosts/vault/files/setup.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +# 检查参数是否为空 +check_not_empty() { + if [[ -z $1 ]]; then + echo "Error: $2 is empty. Please provide a value." + exit 1 + fi +} + +# 检查参数是否为空 +check_not_empty "$1" "DOMAIN" && DOMAIN=$1 +check_not_empty "$2" "NAMESPACE" && NAMESPACE=$2 +check_not_empty "$3" "SECRET_NAME" && SECRET_NAME=$3 + +cat > vaules.yaml << EOF +server: + ingress: + enabled: true + ingressClassName: "nginx" + hosts: + - host: vault.$DOMAIN + paths: + - / + tls: + - secretName: $SECRET_NAME + hosts: + - vault.$DOMAIN +EOF + +helm repo add hashicorp https://helm.releases.hashicorp.com +helm repo up +kubectl create ns $NAMESPACE || echo true +helm upgrade --install vault-server hashicorp/vault -n $NAMESPACE --create-namespace -f vaules.yaml diff --git a/playbooks/roles/vhosts/vault/meta/main.yml b/playbooks/roles/vhosts/vault/meta/main.yml new file mode 100644 index 0000000..1f2217b --- /dev/null +++ b/playbooks/roles/vhosts/vault/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: secret-manger diff --git a/playbooks/roles/vhosts/vault/readme.md b/playbooks/roles/vhosts/vault/readme.md new file mode 100644 index 0000000..bdb3af3 --- /dev/null +++ b/playbooks/roles/vhosts/vault/readme.md @@ -0,0 +1,4 @@ +# Init Vault Server + +kubectl exec -t -i vault-server-0 -n vault -- sh +vault operator init -key-shares=5 -key-threshold=3 diff --git a/playbooks/roles/vhosts/vault/tasks/main.yml b/playbooks/roles/vhosts/vault/tasks/main.yml new file mode 100755 index 0000000..0ff84c4 --- /dev/null +++ b/playbooks/roles/vhosts/vault/tasks/main.yml @@ -0,0 +1,4 @@ +- name: Setup Vault Server + script: files/setup.sh {{ domain }} {{ namespace }} {{ item.secret_name }} + loop: "{{ tls }}" + when: inventory_hostname in groups[group] diff --git a/playbooks/roles/vhosts/vault/vars/main.yml b/playbooks/roles/vhosts/vault/vars/main.yml new file mode 100644 index 0000000..b5de537 --- /dev/null +++ b/playbooks/roles/vhosts/vault/vars/main.yml @@ -0,0 +1,7 @@ +group: master +namespace: vault +update_secret: true +tls: + - secret_name: vault-tls + keyfile: /etc/ssl/svc.plus.key + certfile: /etc/ssl/svc.plus.pem diff --git a/playbooks/roles/vhosts/wireguard-client/files/enable_ip_forward.sh b/playbooks/roles/vhosts/wireguard-client/files/enable_ip_forward.sh new file mode 100644 index 0000000..9b652e1 --- /dev/null +++ b/playbooks/roles/vhosts/wireguard-client/files/enable_ip_forward.sh @@ -0,0 +1,5 @@ +#!/bin/sh +cat >> /etc/sysctl.conf << EOF +net.ipv4.ip_forward = 1 +EOF +sysctl -p diff --git a/playbooks/roles/vhosts/wireguard-client/tasks/main.yml b/playbooks/roles/vhosts/wireguard-client/tasks/main.yml new file mode 100755 index 0000000..20d1165 --- /dev/null +++ b/playbooks/roles/vhosts/wireguard-client/tasks/main.yml @@ -0,0 +1,17 @@ +- name: Init wireguard env + shell: "sudo mkdir -pv /etc/wireguard/ && \ + sudo touch /etc/wireguard/{{ local.tunnel }}.conf && \ + sudo chown shenlan /etc/wireguard/ && \ + brew install wireguard-tools" + +- name: Configure wireguard tunnel + template: + src: "templates/server.conf" + dest: "/etc/wireguard/{{ local.tunnel }}.conf" + +- name: Stop Wireguard interface + shell: sudo wg-quick down {{ local.tunnel }} + ignore_errors: yes + +- name: Start Wireguard interface + shell: sudo wg-quick up {{ local.tunnel }} diff --git a/playbooks/roles/vhosts/wireguard-client/templates/server.conf b/playbooks/roles/vhosts/wireguard-client/templates/server.conf new file mode 100755 index 0000000..bb28ee5 --- /dev/null +++ b/playbooks/roles/vhosts/wireguard-client/templates/server.conf @@ -0,0 +1,15 @@ +[Interface] +PrivateKey = {{ local.private_key }} +Address = {{ local.network }} +ListenPort = {{ local.listen_port }} +SaveConfig = false +MTU = 1420 + +{% for peer in peers %} +[Peer] +{% if peer.endpoint is defined %} +Endpoint = {{ peer.endpoint }} +{% endif %} +PublicKey = {{ peer.public_key }} +AllowedIPs = {{ peer.allowed_ips }} +{% endfor %} diff --git a/playbooks/roles/vhosts/wireguard-gateway/files/enable_ip_forward.sh b/playbooks/roles/vhosts/wireguard-gateway/files/enable_ip_forward.sh new file mode 100644 index 0000000..9b652e1 --- /dev/null +++ b/playbooks/roles/vhosts/wireguard-gateway/files/enable_ip_forward.sh @@ -0,0 +1,5 @@ +#!/bin/sh +cat >> /etc/sysctl.conf << EOF +net.ipv4.ip_forward = 1 +EOF +sysctl -p diff --git a/playbooks/roles/vhosts/wireguard-gateway/handlers/main.yml b/playbooks/roles/vhosts/wireguard-gateway/handlers/main.yml new file mode 100644 index 0000000..cd770a6 --- /dev/null +++ b/playbooks/roles/vhosts/wireguard-gateway/handlers/main.yml @@ -0,0 +1,4 @@ +- name: Restart WireGuard service + systemd: + name: wg-quick@{{ gateway.public_config.tunnel }} + state: restarted diff --git a/playbooks/roles/vhosts/wireguard-gateway/meta/main.yml b/playbooks/roles/vhosts/wireguard-gateway/meta/main.yml new file mode 100755 index 0000000..9711b33 --- /dev/null +++ b/playbooks/roles/vhosts/wireguard-gateway/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: common diff --git a/playbooks/roles/vhosts/wireguard-gateway/tasks/main.yml b/playbooks/roles/vhosts/wireguard-gateway/tasks/main.yml new file mode 100755 index 0000000..a3e8c04 --- /dev/null +++ b/playbooks/roles/vhosts/wireguard-gateway/tasks/main.yml @@ -0,0 +1,45 @@ +- name: Install WireGuard Packages + shell: "mkdir -pv /etc/wireguard/ && \ + apt update && \ + apt install resolvconf wireguard-dkms wireguard-tools -y" + tags: wireguard + +- name: Check if IP forwarding is enabled + command: sysctl -n net.ipv4.ip_forward + register: ip_forwarding_status + +- name: Enable IP forwarding if not already enabled + block: + - name: Append IP forwarding configuration + lineinfile: + path: /etc/sysctl.conf + line: "net.ipv4.ip_forward = 1" + state: present + when: ip_forwarding_status.stdout != '1' + + - name: Apply sysctl changes + command: sysctl -p + when: ip_forwarding_status.stdout != '1' + +- name: Configure WireGuard tunnel + template: + src: templates/wg0.conf.j2 + dest: "/etc/wireguard/{{ gateway.public_config.tunnel }}.conf" + owner: root + group: root + notify: Restart WireGuard service # 触发 handler + tags: wireguard + +- name: Stop WireGuard service if running + systemd: + name: wg-quick@{{ gateway.public_config.tunnel }} + state: stopped + ignore_errors: yes + tags: wireguard + +- name: Enable and start WireGuard service + systemd: + name: wg-quick@{{ gateway.public_config.tunnel }} + enabled: yes + state: started + tags: wireguard diff --git a/playbooks/roles/vhosts/wireguard-gateway/templates/wg0.conf.j2 b/playbooks/roles/vhosts/wireguard-gateway/templates/wg0.conf.j2 new file mode 100644 index 0000000..5fe6007 --- /dev/null +++ b/playbooks/roles/vhosts/wireguard-gateway/templates/wg0.conf.j2 @@ -0,0 +1,27 @@ +[Interface] +PrivateKey = {{ gateway.private_key }} +Address = {{ gateway.public_config.network }} +ListenPort = {{ gateway.public_config.listen_port }} +SaveConfig = false +MTU = {{ gateway.public_config.mtu | default(1420) }} + +{% for rule in gateway.public_config.postup_rules %} +PostUp = {{ rule }} +{% endfor %} + +{% for rule in gateway.public_config.postdown_rules %} +PostDown = {{ rule }} +{% endfor %} + +{% for peer in gateway.public_config.peers %} +[Peer] +PublicKey = {{ peer.public_key }} +AllowedIPs = {{ peer.allowed_ips }} +{% if peer.endpoint is defined %} +Endpoint = {{ peer.endpoint }} +{% endif %} +{% if peer.persistent_keepalive is defined %} +PersistentKeepalive = {{ peer.persistent_keepalive }} +{% endif %} +{% endfor %} + diff --git a/playbooks/wireguard_ali_vpn_gw b/playbooks/wireguard_ali_vpn_gw new file mode 100755 index 0000000..4e5e2c6 --- /dev/null +++ b/playbooks/wireguard_ali_vpn_gw @@ -0,0 +1,29 @@ +- name: set wireguard nodes gateway + hosts: all + user: ubuntu + become: 'yes' + gather_facts: 'yes' + tasks: + - include_role: + name: wireguard-gw + vars: + group: ali-vpn-gateway + nameserver: + - 127.0.0.53 + extra_resolv_config: + - "options timeout:2 attempts:3 rotate single-request-reopen" + gateway: + private_key: "sJHxddh8M1DRGLExZJ+QatGk2pkkUopj2M4p8r3t5Ek=" + interface: eth0 + tunnel: wg0 + network: 10.255.0.2/32 + listen_port: '51820' + persistent_keepalive: '60' + peers: + - name: aws_vpc + endpoint: "52.81.109.27:51820" + public_key: "nL2vjhMnBnGhWcW0frclcL/+Os17wSof6rMtxJrjmRo=" + allowed_ips: "10.255.0.1/32,10.0.0.0/16" + - name: home-desktop + public_key: "ZzsemBUYKDu52AMQEJwQvIziwJ4Vomp7t4TEtCQ/ywI=" + allowed_ips: 10.255.0.3/32 diff --git a/playbooks/wireguard_client b/playbooks/wireguard_client new file mode 100755 index 0000000..2cc3a7e --- /dev/null +++ b/playbooks/wireguard_client @@ -0,0 +1,24 @@ +- name: Config wireguard for peer + hosts: localhost + connection: local + gather_facts: false + tasks: + - include_role: + name: wireguard-client + vars: + group: local + local: + private_key: "" + tunnel: wg0 + network: 10.255.0.3/32 + listen_port: '54321' + persistent_keepalive: '60' + peers: + - name: aws_vpc + endpoint: "52.80.191.64:51820" + public_key: "" + allowed_ips: "10.255.0.1/32,10.0.0.0/16" + - name: ali_vpc + endpoint: "8.130.97.92:51820" + public_key: "" + allowed_ips: "10.255.0.2/32,10.2.0.0/16" diff --git a/playbooks/wireguard_gateway b/playbooks/wireguard_gateway new file mode 100755 index 0000000..525b7c7 --- /dev/null +++ b/playbooks/wireguard_gateway @@ -0,0 +1,7 @@ +--- +- hosts: vpn-gateway + user: ubuntu + become: yes + gather_facts: yes + roles: + - wireguard-gateway diff --git a/scripts/Fetch_packages_depends.sh b/scripts/Fetch_packages_depends.sh new file mode 100644 index 0000000..7b71600 --- /dev/null +++ b/scripts/Fetch_packages_depends.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +# 定义要查询的包列表 +PACKAGES=(plasma-desktop dolphin konsole chromium sddm) + +# 输出依赖关系的文件 +DEP_FILE="kde_dependencies.txt" +SRPM_FILE="kde_srpm_list.txt" + +# 清空旧文件 +> "$DEP_FILE" +> "$SRPM_FILE" + +# 递归获取依赖项的函数 +get_dependencies() { + local package="$1" + echo "查询 $package 的依赖关系..." + local dependencies=$(dnf repoquery --requires --resolve "$package" 2>/dev/null) + + for dep in $dependencies; do + # 避免重复写入 + if ! grep -q "^$dep$" "$DEP_FILE"; then + echo "$dep" | tee -a "$DEP_FILE" + get_dependencies "$dep" + fi + done +} + +# 遍历所有初始包 +for pkg in "${PACKAGES[@]}"; do + echo "$pkg" | tee -a "$DEP_FILE" + get_dependencies "$pkg" + echo "------------------------------------------------------" +done + +# 统计最终的依赖包数量 +TOTAL_PACKAGES=$(wc -l < "$DEP_FILE") +echo "总计依赖包数量: $TOTAL_PACKAGES" + +# 获取所有包的 SRPM +while read -r pkg; do + srpm=$(dnf repoquery --source "$pkg" 2>/dev/null) + if [ -n "$srpm" ]; then + echo "$srpm" | tee -a "$SRPM_FILE" + fi +done < "$DEP_FILE" + +# 统计 SRPM 数量 +TOTAL_SRPM=$(wc -l < "$SRPM_FILE") +echo "总计 SRPM 包数量: $TOTAL_SRPM" + +# 下载所有 SRPM 包 +dnf download --source $(cat "$SRPM_FILE") --setopt=install_weak_deps=False + +echo "依赖包列表已保存到 $DEP_FILE" +echo "SRPM 包列表已保存到 $SRPM_FILE" +echo "所有 SRPM 包下载完成" + diff --git a/scripts/Jenkinsfile b/scripts/Jenkinsfile new file mode 100644 index 0000000..5c0cbfb --- /dev/null +++ b/scripts/Jenkinsfile @@ -0,0 +1,27 @@ +pipeline { + agent any + stages { + stage('Build') { + steps { + sh 'ansible-lint' + } + } + stage('Pre Setup') { + steps { + sh "echo \"${secrets.ANSIBLE_SSH_PASSWORD}\" > ~/.vault_pass.txt" + sh "echo 'ansible_password: \'xxxx\'' >> inventory/group_vars/all.yml" + sh "echo 'ansible_become_password: \'xxxx\'' >> inventory/group_vars/all.yml" + } + } + stage('Deploy') { + steps { + sh "ansible-playbook -u ${secrets.ANSIBLE_SSH_USER} -i inventor.ini -kK playbooks/server.yml -l ${params.instance_name} -e 'ign_install_ver=${params.install_version}' --vault-password-file .vault_pass.txt --diff" + } + } + stage('Postsetup') { + steps { + echo "Todo" + } + } + } +} diff --git a/scripts/ansible_playbook_hosts_setup.sh b/scripts/ansible_playbook_hosts_setup.sh new file mode 100644 index 0000000..14a574a --- /dev/null +++ b/scripts/ansible_playbook_hosts_setup.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +# Function to check if a variable is empty +check_empty() { + if [ -z "${!1}" ]; then + echo "$1 is empty. Aborting." + exit 1 + fi +} + +# List of variables to check +variables=("SSH_USER" "SSH_HOST_IP" "SSH_HOST_DOMAIN" "SSH_PRIVATE_KEY") + +# Loop through variables and check if each one is empty +for var in "${variables[@]}"; do + check_empty "$var" +done + +mkdir -pv ~/.ssh/ +cat > ~/.ssh/id_rsa << EOF +$SSH_PRIVATE_KEY +EOF +sudo chmod 0400 ~/.ssh/id_rsa +md5sum ~/.ssh/id_rsa + +mkdir -pv hosts/ + +cat > hosts/inventory << EOF +[master] +$SSH_HOST_DOMAIN ansible_host=$SSH_HOST_IP + +[all:vars] +ansible_port=22 +ansible_ssh_user=$SSH_USER +ansible_ssh_private_key_file=~/.ssh/id_rsa +ansible_host_key_checking=False +ingress_ip=$SSH_HOST_IP +EOF + +cat hosts/inventory diff --git a/scripts/artifact/setup-harbor.sh b/scripts/artifact/setup-harbor.sh new file mode 100644 index 0000000..92aa232 --- /dev/null +++ b/scripts/artifact/setup-harbor.sh @@ -0,0 +1,66 @@ +helm repo add harbor https://helm.goharbor.io +helm repo update +kubectl create ns harbor || true +kubectl create secret tls harbor-secret --key=/etc/ssl/onwalk.net.key --cert=/etc/ssl/onwalk.net.pem -n harbor +cat > harbor-arm-config.yaml << EOF +expose: + type: ingress + tls: + enabled: true + certSource: secret + secret: + secretName: harbor-secret + notarySecretName: harbor-secret + ingress: + hosts: + core: harbor.onwalk.net + notary: artifact-notary.onwalk.net + className: "nginx" +externalURL: https://harbor.onwalk.net +nginx: + image: + repository: images.onwalk.net/public/goharbor/nginx-photon + tag: v2.12.0 +portal: + image: + repository: images.onwalk.net/public/goharbor/harbor-portal + tag: v2.12.0 +core: + image: + repository: images.onwalk.net/public/goharbor/harbor-core + tag: v2.12.0 +jobservice: + image: + repository: images.onwalk.net/public/goharbor/harbor-jobservice + tag: v2.12.0 +registry: + registry: + image: + repository: images.onwalk.net/public/goharbor/registry-photon + tag: v2.12.0 + controller: + image: + repository: images.onwalk.net/public/goharbor/harbor-registryctl + tag: v2.12.0 +trivy: + enabled: true + image: + repository: images.onwalk.net/public/goharbor/trivy-adapter-photon + tag: v2.12.0 +database: + type: internal + internal: + image: + repository: images.onwalk.net/public/goharbor/harbor-db + tag: v2.12.0 +redis: + type: internal + internal: + image: + repository: images.onwalk.net/public/goharbor/redis-photon + tag: v2.12.0 +exporter: + image: + repository: images.onwalk.net/public/goharbor/harbor-exporter +EOF +helm upgrade --install harbor harbor/harbor -f harbor-arm-config.yaml -n harbor diff --git a/scripts/deepflow/backup_images_v6.3-20250309-17.json b/scripts/deepflow/backup_images_v6.3-20250309-17.json new file mode 100755 index 0000000..c6a35c4 --- /dev/null +++ b/scripts/deepflow/backup_images_v6.3-20250309-17.json @@ -0,0 +1,435 @@ +{ + "version": "v6.3-20250309-17", + "items": [ + { + "kind": "Deployment", + "name": "acl-controller-deployment", + "containers": [ + { + "name": "deepflow-acl-controller", + "image": "images.onwalk.net/private/deepflow-v6.3/acl-controller:v6.3.179" + } + ] + }, + { + "kind": "Deployment", + "name": "alarm-deployment", + "containers": [ + { + "name": "deepflow-alarm", + "image": "images.onwalk.net/private/deepflow-v6.3/alarm:v6.3.686" + } + ] + }, + { + "kind": "Deployment", + "name": "cerebro-deployment", + "containers": [ + { + "name": "deepflow-cerebro", + "image": "images.onwalk.net/private/deepflow-v6.3/cerebro:0.9.0" + } + ] + }, + { + "kind": "Deployment", + "name": "deepflow-app", + "containers": [ + { + "name": "deepflow-app", + "image": "images.onwalk.net/private/deepflow-v6.3/deepflow-app:v6.3.120" + } + ] + }, + { + "kind": "Deployment", + "name": "df-help-deployment", + "containers": [ + { + "name": "df-help", + "image": "images.onwalk.net/private/deepflow-v6.3/df-help:v6.3.1086" + } + ] + }, + { + "kind": "Deployment", + "name": "df-web-core-deployment", + "containers": [ + { + "name": "df-web-core", + "image": "images.onwalk.net/private/deepflow-v6.3/df-web-qiankun-core:v6.3.9969" + } + ] + }, + { + "kind": "Deployment", + "name": "df-web-deployment", + "containers": [ + { + "name": "df-web", + "image": "images.onwalk.net/private/deepflow-v6.3/df-web-service:v6.3.580" + } + ] + }, + { + "kind": "Deployment", + "name": "df-web-metrics-explore-deployment", + "containers": [ + { + "name": "df-web-metrics-explore", + "image": "images.onwalk.net/private/deepflow-v6.3/df-web-metrics-explore:v6.3.4318" + } + ] + }, + { + "kind": "Deployment", + "name": "diagnose-deployment", + "containers": [ + { + "name": "deepflow-diagnose", + "image": "images.onwalk.net/private/deepflow-v6.3/diagnose:v6.3.103" + } + ] + }, + { + "kind": "Deployment", + "name": "fauths-deployment-deployment", + "containers": [ + { + "name": "fauths", + "image": "images.onwalk.net/private/deepflow-v6.3/fauths:v6.3.452" + } + ] + }, + { + "kind": "Deployment", + "name": "fpermit-deployment", + "containers": [ + { + "name": "fpermit", + "image": "images.onwalk.net/private/deepflow-v6.3/fpermit:v6.3.254" + } + ] + }, + { + "kind": "Deployment", + "name": "fuser-deployment", + "containers": [ + { + "name": "fuser", + "image": "images.onwalk.net/private/deepflow-v6.3/fuser:v6.3.328" + } + ] + }, + { + "kind": "Deployment", + "name": "grafana-deployment", + "containers": [ + { + "name": "deepflow-grafana", + "image": "images.onwalk.net/private/deepflow-v6.3/grafana:9.2.4" + } + ] + }, + { + "kind": "Deployment", + "name": "kibana-deployment", + "containers": [ + { + "name": "deepflow-kibana", + "image": "images.onwalk.net/private/deepflow-v6.3/kibana:6.8.8" + } + ] + }, + { + "kind": "Deployment", + "name": "manager-deployment", + "containers": [ + { + "name": "deepflow-manager", + "image": "images.onwalk.net/private/deepflow-v6.3/manager:v6.3.684" + } + ] + }, + { + "kind": "Deployment", + "name": "masterdeepflow-server", + "containers": [ + { + "name": "deepflow-server", + "image": "images.onwalk.net/private/deepflow-v6.3/deepflow-server:v6.3.4211" + } + ] + }, + { + "kind": "Deployment", + "name": "mntnct-deployment", + "containers": [ + { + "name": "deepflow-mntnct", + "image": "images.onwalk.net/private/deepflow-v6.3/mntnct:v6.3.1317" + } + ] + }, + { + "kind": "Deployment", + "name": "monitor-deployment", + "containers": [ + { + "name": "deepflow-monitor", + "image": "images.onwalk.net/private/deepflow-v6.3/monitor:v6.3.174" + } + ] + }, + { + "kind": "Deployment", + "name": "mysql-deployment", + "containers": [ + { + "name": "deepflow-mysql", + "image": "images.onwalk.net/private/deepflow-v6.3/mysql-server:8.0.26" + } + ] + }, + { + "kind": "Deployment", + "name": "opensource-endpoints-operator", + "containers": [ + { + "name": "endpoints-operator", + "image": "images.onwalk.net/private/deepflow-v6.3/endpoints-operator:0.2.1" + } + ] + }, + { + "kind": "Deployment", + "name": "pcap-deployment", + "containers": [ + { + "name": "pcap", + "image": "images.onwalk.net/private/deepflow-v6.3/pcap:v6.3.188" + } + ] + }, + { + "kind": "Deployment", + "name": "postman-deployment", + "containers": [ + { + "name": "deepflow-postman", + "image": "images.onwalk.net/private/deepflow-v6.3/postman:v6.3.54" + } + ] + }, + { + "kind": "Deployment", + "name": "querier-js-deployment", + "containers": [ + { + "name": "deepflow-querier-js", + "image": "images.onwalk.net/private/deepflow-v6.3/querier-js:v6.3.264" + } + ] + }, + { + "kind": "Deployment", + "name": "rabbitmq-deployment", + "containers": [ + { + "name": "deepflow-rabbitmq", + "image": "images.onwalk.net/private/deepflow-v6.3/rabbitmq:3.10.25" + } + ] + }, + { + "kind": "Deployment", + "name": "redis-deployment", + "containers": [ + { + "name": "deepflow-redis", + "image": "images.onwalk.net/private/deepflow-v6.3/redis:6.2.6" + } + ] + }, + { + "kind": "Deployment", + "name": "report-deployment", + "containers": [ + { + "name": "deepflow-report", + "image": "images.onwalk.net/private/deepflow-v6.3/report:v6.3.247" + } + ] + }, + { + "kind": "Deployment", + "name": "statistics-deployment", + "containers": [ + { + "name": "deepflow-statistics", + "image": "images.onwalk.net/private/deepflow-v6.3/statistics:v6.3.2082" + } + ] + }, + { + "kind": "Deployment", + "name": "talker-deployment", + "containers": [ + { + "name": "deepflow-talker", + "image": "images.onwalk.net/private/deepflow-v6.3/talker:v6.3.2958" + } + ] + }, + { + "kind": "Deployment", + "name": "warrant-deployment", + "containers": [ + { + "name": "deepflow-warrant", + "image": "images.onwalk.net/private/deepflow-v6.3/warrant:v6.3.81" + } + ] + }, + { + "kind": "Deployment", + "name": "web-sched-deployment", + "containers": [ + { + "name": "web-sched", + "image": "images.onwalk.net/private/deepflow-v6.3/df-web-sched:v6.3.103" + } + ] + }, + { + "kind": "Deployment", + "name": "web-tools-deployment", + "containers": [ + { + "name": "web-tools", + "image": "images.onwalk.net/private/deepflow-v6.3/web-tools:v6.3.122" + } + ] + }, + { + "kind": "Deployment", + "name": "webssh-deployment", + "containers": [ + { + "name": "webssh", + "image": "images.onwalk.net/private/deepflow-v6.3/webssh:v6.3.22" + } + ] + }, + { + "kind": "StatefulSet", + "name": "masterdeepflow-clickhouse", + "containers": [ + { + "name": "clickhouse", + "image": "images.onwalk.net/private/deepflow-v6.3/clickhouse-server:22.8.20.11" + } + ] + }, + { + "kind": "StatefulSet", + "name": "opensource-loki", + "containers": [ + { + "name": "loki", + "image": "images.onwalk.net/private/deepflow-v6.3/loki:2.4.2" + } + ] + }, + { + "kind": "DaemonSet", + "name": "check", + "containers": [ + { + "name": "deepflow-check", + "image": "images.onwalk.net/private/deepflow-v6.3/mntnct:v6.3.1317" + } + ] + }, + { + "kind": "DaemonSet", + "name": "dedicated-agent", + "containers": [ + { + "name": "dedicated-agent", + "image": "images.onwalk.net/private/deepflow-v6.3/deepflow-agent:v6.3.4400" + } + ] + }, + { + "kind": "DaemonSet", + "name": "deepflow-agent", + "containers": [ + { + "name": "deepflow-agent", + "image": "images.onwalk.net/private/deepflow-v6.3/deepflow-agent:v6.3.4211" + } + ] + }, + { + "kind": "DaemonSet", + "name": "elasticsearch-daemonset", + "containers": [ + { + "name": "deepflow-elasticsearch", + "image": "images.onwalk.net/private/deepflow-v6.3/elasticsearch:6.8.8" + } + ] + }, + { + "kind": "DaemonSet", + "name": "front-end-daemonset", + "containers": [ + { + "name": "front-end", + "image": "images.onwalk.net/private/deepflow-v6.3/apientry:v6.3.193" + } + ] + }, + { + "kind": "DaemonSet", + "name": "log-cleaner-daemonset", + "containers": [ + { + "name": "deepflow-log-cleaner", + "image": "images.onwalk.net/private/deepflow-v6.3/log-cleaner:v6.3.11" + } + ] + }, + { + "kind": "DaemonSet", + "name": "opensource-promtail", + "containers": [ + { + "name": "promtail", + "image": "images.onwalk.net/private/deepflow-v6.3/promtail:2.4.2" + } + ] + }, + { + "kind": "DaemonSet", + "name": "telegraf-daemonset", + "containers": [ + { + "name": "deepflow-telegraf", + "image": "images.onwalk.net/private/deepflow-v6.3/telegraf:1.14.1.12" + } + ] + }, + { + "kind": "CronJob", + "name": "database-backup", + "containers": [ + { + "name": "database-backup", + "image": "images.onwalk.net/private/deepflow-v6.3/mntnct:v6.3.1317" + } + ] + } + ] +} diff --git a/scripts/deepflow/check_k8s_node_config.sh b/scripts/deepflow/check_k8s_node_config.sh new file mode 100755 index 0000000..9db95ea --- /dev/null +++ b/scripts/deepflow/check_k8s_node_config.sh @@ -0,0 +1,215 @@ + +#!/bin/bash + +# 获取操作系统信息 +get_os_info() { + if [ -f /etc/os-release ]; then + . /etc/os-release + OS_NAME=$NAME + OS_VERSION=$VERSION_ID + elif type lsb_release >/dev/null 2>&1; then + OS_NAME=$(lsb_release -si) + OS_VERSION=$(lsb_release -sr) + else + OS_NAME=$(uname -s) + OS_VERSION=$(uname -r) + fi + echo "当前操作系统: $OS_NAME $OS_VERSION" +} + +# 检查 DNS 解析 +check_dns() { + echo "检查 DNS 解析配置..." + dns_config=$(grep "nameserver" /etc/resolv.conf) + if [[ -n "$dns_config" && "$dns_config" != *"127.0.0.1"* ]]; then + echo "✅ DNS 解析配置正确" + else + echo "❌ DNS 解析配置错误,未设置或包含127.0.0.1" + operations+="\n1. 编辑 /etc/resolv.conf,配置有效的 nameserver,如 114.114.114.114" + fi +} + +# 检查主机名配置 +check_hostname() { + echo "检查主机名配置..." + hostname=$(hostname) + if [[ "$hostname" != *"local"* && "$hostname" != *"_"* && ${#hostname} -le 64 ]]; then + echo "✅ 主机名配置正确:$hostname" + else + echo "❌ 主机名配置不符合要求:$hostname" + operations+="\n2. 修改主机名为合法值,使用 hostnamectl set-hostname 命令" + fi + + # 检查 /etc/hosts 是否包含主机名解析 + hosts_file=$(cat /etc/hosts) + if [[ "$hosts_file" == *"$hostname"* ]]; then + echo "✅ /etc/hosts 中包含主机名解析" + else + echo "❌ /etc/hosts 中未找到主机名解析" + operations+="\n3. 修改 /etc/hosts,添加主机名解析" + fi +} + +# 检查数据盘挂载 +check_disk_mount() { + echo "检查数据盘挂载..." + lsblk_output=$(lsblk) + df_output=$(df -hT) + # 检查是否挂载 /mnt 目录 + if [[ "$df_output" == *"/mnt"* ]]; then + echo "✅ 数据盘已挂载到 /mnt" + # 打印 /mnt 的大小 + mnt_size=$(df -h | grep '/mnt' | awk '{print $2}') + echo "当前 /mnt 大小: $mnt_size" + else + echo "❌ 数据盘未挂载到 /mnt" + operations+="\n4. 挂载数据盘到 /mnt" + fi + + # 检查 /etc/fstab 中是否包含自动挂载配置 + fstab_config=$(grep "/mnt" /etc/fstab) + if [[ -n "$fstab_config" ]]; then + echo "✅ /etc/fstab 中包含数据盘自动挂载配置" + else + echo "❌ /etc/fstab 中未找到数据盘自动挂载配置" + operations+="\n5. 在 /etc/fstab 中添加自动挂载配置" + fi +} + +# 检查免密登录配置 +check_ssh_key() { + echo "检查免密登录配置..." + ssh_config_dir="/root/.ssh" + if [[ -d "$ssh_config_dir" && -f "$ssh_config_dir/authorized_keys" ]]; then + echo "✅ 已配置免密登录" + else + echo "❌ 未配置免密登录" + operations+="\n6. 配置免密登录:使用 ssh-keygen 和 ssh-copy-id 配置公钥免密登录" + fi +} + +# 检查 swap 状态 +check_swap() { + echo "检查 swap 缓存..." + swap_status=$(swapon --show) + if [[ -z "$swap_status" ]]; then + echo "✅ swap 已关闭" + else + echo "❌ swap 未关闭" + operations+="\n7. 关闭 swap:执行 swapoff -a 并删除 /etc/fstab 中的 swap 条目" + fi +} + +# 检查防火墙状态 +check_firewall() { + echo "检查防火墙状态..." + if [[ "$OS_NAME" == "CentOS" || "$OS_NAME" == "RedHat" ]]; then + firewalld_status=$(systemctl is-active firewalld) + if [[ "$firewalld_status" == "inactive" ]]; then + echo "✅ 防火墙已关闭" + else + echo "❌ 防火墙未关闭" + operations+="\n8. 停止防火墙并禁用:执行 systemctl stop firewalld 和 systemctl disable firewalld" + fi + else + ufw_status=$(ufw status | grep "Status" | awk '{print $2}') + if [[ "$ufw_status" == "inactive" ]]; then + echo "✅ 防火墙已关闭" + else + echo "❌ 防火墙未关闭" + operations+="\n8. 停止防火墙并禁用:执行 ufw disable" + fi + fi +} + +# 检查 SELinux 或 AppArmor 状态 +check_security() { + echo "检查 SELinux 或 AppArmor 状态..." + if [[ "$OS_NAME" == "CentOS" || "$OS_NAME" == "RedHat" ]]; then + selinux_status=$(getenforce) + if [[ "$selinux_status" == "Disabled" ]]; then + echo "✅ SELinux 已禁用" + else + echo "❌ SELinux 未禁用" + operations+="\n9. 禁用 SELinux:执行 setenforce 0 并修改 /etc/selinux/config" + fi + elif [[ "$OS_NAME" == "Ubuntu" ]]; then + apparmor_status=$(systemctl is-active apparmor) + if [[ "$apparmor_status" == "inactive" ]]; then + echo "✅ AppArmor 已禁用" + else + echo "❌ AppArmor 未禁用" + operations+="\n9. 禁用 AppArmor:执行 systemctl stop apparmor 并禁用 systemctl disable apparmor" + fi + else + echo "❌ 无法识别 SELinux 或 AppArmor 状态" + operations+="\n9. SELinux 或 AppArmor 状态检查适用于 CentOS/RedHat 或 Ubuntu 系统" + fi +} + +# 检查 IPV4 流量转发 +check_ip_forward() { + echo "检查 IPV4 流量转发..." + ipv4_forward_status=$(sysctl net.ipv4.ip_forward | grep -o "net.ipv4.ip_forward = 1") + if [[ -n "$ipv4_forward_status" ]]; then + echo "✅ IPV4 流量转发已开启" + else + echo "❌ IPV4 流量转发未开启" + operations+="\n10. 开启 IPV4 流量转发:执行 echo 'net.ipv4.ip_forward=1' >> /etc/sysctl.conf && sysctl -p" + fi + # 打印 /etc/sysctl.conf 中 ip_forward 配置 + ip_forward_config=$(grep "net.ipv4.ip_forward" /etc/sysctl.conf) + echo "当前 /etc/sysctl.conf 中的 IPV4 流量转发配置:$ip_forward_config" +} + +# 检查操作系统连接数限制 +check_conn_limit() { + echo "检查操作系统级别连接数限制..." + + # 获取 ulimit 输出 + ulimit_output=$(ulimit -a) + + # 获取 nofile 和 nproc 配置的值 + nofile_limit=$(ulimit -n) + nproc_limit=$(ulimit -u) + + # 检查 nofile 和 nproc 是否为 1048576 + if [[ "$nofile_limit" -eq 1048576 && "$nproc_limit" -eq 1048576 ]]; then + echo "✅ 系统连接数限制配置正确: nofile = $nofile_limit, nproc = $nproc_limit" + else + echo "❌ 系统连接数限制配置错误" + echo " 当前 nofile = $nofile_limit, nproc = $nproc_limit" + operations+="\n11. 修改连接数限制:编辑 /etc/security/limits.conf 文件并配置 nofile 和 nproc 为 1048576" + fi + + # 检查 /etc/security/limits.conf 文件中的 root 连接数限制配置 + limits_config=$(grep -E "root\s+soft\s+nofile\s+1048576|root\s+hard\s+nofile\s+1048576|root\s+soft\s+nproc\s+1048576|root\s+hard\s+nproc\s+1048576" /etc/security/limits.conf) + if [[ -z "$limits_config" ]]; then + echo "❌ /etc/security/limits.conf 中未设置正确的连接数限制" + operations+="\n12. 请检查 /etc/security/limits.conf 中是否配置了以下项:\nroot soft nofile 1048576\nroot hard nofile 1048576\nroot soft nproc 1048576\nroot hard nproc 1048576" + else + echo "✅ /etc/security/limits.conf 中的关键配置项:" + echo "$limits_config" + fi +} + +# 统一列出检查结果 +operations="" +get_os_info +check_dns +check_hostname +check_disk_mount +check_ssh_key +check_swap +check_firewall +check_security +check_ip_forward +check_conn_limit + +echo -e "\n检查完成。" + +if [[ -n "$operations" ]]; then + echo -e "未通过的检查项及建议操作:$operations" +else + echo "所有检查项通过!" +fi diff --git a/scripts/deepflow/clean-failed-pods.sh b/scripts/deepflow/clean-failed-pods.sh new file mode 100755 index 0000000..ac35ddf --- /dev/null +++ b/scripts/deepflow/clean-failed-pods.sh @@ -0,0 +1,14 @@ +!/bin/bash +# 脚本名称: clean-failed-pods.sh +# 作用: 删除指定命名空间中非 Running 状态的 Pod + +# 定义需要处理的命名空间 +NAMESPACES=("deepflow" "openebs" "kube-system") + +# 遍历命名空间 +for NAMESPACE in "${NAMESPACES[@]}"; do + echo "正在删除 $NAMESPACE 命名空间中非 Running 状态的 Pod..." + kubectl get pods -n $NAMESPACE | grep -v Running | awk 'NR>1 {print $1}' | xargs kubectl delete pod -n $NAMESPACE --force + kubectl delete jobs --all -n $NAMESPACE + echo "$NAMESPACE 命名空间清理完成!" +done diff --git a/scripts/deepflow/deepflow-server-master-controller-pre.sh b/scripts/deepflow/deepflow-server-master-controller-pre.sh new file mode 100755 index 0000000..1a99089 --- /dev/null +++ b/scripts/deepflow/deepflow-server-master-controller-pre.sh @@ -0,0 +1,20 @@ +k8s_node=`kubectl get nodes | awk 'NR>1{print $1}'` + +kubectl label node $k8s_node master_controller=enable +kubectl label node $k8s_node tsdb=enable +kubectl label node $k8s_node dfdb=enable +kubectl label node $k8s_node elasticsearch-warm=enable + +kubectl describe node | grep Taint +kubectl taint nodes $k8s_node node-role.kubernetes.io/control-plane:NoSchedule- + + +kubectl label nodes $k8s_node slave_controller- + +kubectl get node --show-labels + +mkdir -p /usr/local/deepflow +mount -o ro deepflow-docker-release-v6.5-242.iso /media + +rsync -av /media/* /usr/local/deepflow/ +ln -sv /usr/local/deepflow/registry /var/lib/registry -f || rsync -av /usr/local/deepflow/registry/* /var/lib/registry/ diff --git a/scripts/deepflow/deepflow-server-slave-controller-pre.sh b/scripts/deepflow/deepflow-server-slave-controller-pre.sh new file mode 100755 index 0000000..fb5ea7f --- /dev/null +++ b/scripts/deepflow/deepflow-server-slave-controller-pre.sh @@ -0,0 +1,20 @@ + +k8s_node=`sudo kubectl get nodes | awk 'NR>1{print $1}'` + +sudo kubectl label node $k8s_node slave_controller=enable +sudo kubectl label node $k8s_node tsdb=enable +sudo kubectl label node $k8s_node dfdb=enable +sudo kubectl label node $k8s_node elasticsearch-warm=enable + +kubectl describe node | grep Taint +kubectl taint nodes $k8s_node node-role.kubernetes.io/control-plane:NoSchedule- + +kubectl label nodes $k8s_node master_controller- + +kubectl get node --show-labels + +mkdir -p /usr/local/deepflow +mount -o ro deepflow-docker-release-v6.5-242.iso /media + +rsync -av /media/* /usr/local/deepflow/ +rsync -av /usr/local/deepflow/registry/* /var/lib/registry/ diff --git a/scripts/deepflow/deepflow_k8s_backup.sh b/scripts/deepflow/deepflow_k8s_backup.sh new file mode 100755 index 0000000..07ad09d --- /dev/null +++ b/scripts/deepflow/deepflow_k8s_backup.sh @@ -0,0 +1,161 @@ +#!/bin/bash + +NAMESPACE="deepflow" +VERSION_PREFIX="v6.3" +TIMESTAMP=$(date +"%Y%m%d-%H") +BACKUP_FILE="backup_images_${VERSION_PREFIX}-${TIMESTAMP}.json" + +# 备份 deepflow 命名空间的 Kubernetes 资源镜像信息 +backup_images() { + echo "🔄 开始备份 deepflow 命名空间的 Kubernetes 资源镜像信息..." + + kubectl get deployments,statefulsets,daemonsets,cronjobs -n "$NAMESPACE" -o json | jq ' + { + version: "'${VERSION_PREFIX}-${TIMESTAMP}'", + items: [ + .items[] | select(.spec != null) | { + kind: .kind, + name: .metadata.name, + containers: ( + if .kind == "CronJob" then + [.spec.jobTemplate.spec.template.spec.containers[]? | {name: .name, image: .image}] + else + [.spec.template.spec.containers[]? | {name: .name, image: .image}] + end + ) + } + ] + }' > "$BACKUP_FILE" + + if [[ -f "$BACKUP_FILE" ]]; then + echo "✅ 备份成功!文件路径: $BACKUP_FILE" + echo "📋 备份内容预览(前10行):" + head -n 10 "$BACKUP_FILE" + else + echo "❌ 备份失败,请检查 Kubernetes 访问权限!" + exit 1 + fi +} + +# 校验当前 Kubernetes 资源是否与备份文件一致 +check_images() { + if [[ ! -f "$1" ]]; then + echo "❌ 错误: 备份文件 $1 不存在!请先运行备份。" + exit 1 + fi + + echo "🔍 正在校验当前 Kubernetes 资源与备份文件是否一致..." + + CURRENT_IMAGES=$(kubectl get deployments,statefulsets,daemonsets,cronjobs -n "$NAMESPACE" -o json | jq ' + { + items: [ + .items[] | select(.spec != null) | { + kind: .kind, + name: .metadata.name, + containers: ( + if .kind == "CronJob" then + [.spec.jobTemplate.spec.template.spec.containers[]? | {name: .name, image: .image}] + else + [.spec.template.spec.containers[]? | {name: .name, image: .image}] + end + ) + } + ] + }') + + BACKUP_IMAGES=$(cat "$1") + + MATCH_COUNT=0 + MISMATCH_COUNT=0 + + echo "$BACKUP_IMAGES" | jq -c '.items[]' | while read -r backup_item; do + kind=$(echo "$backup_item" | jq -r '.kind') + name=$(echo "$backup_item" | jq -r '.name') + + echo "📌 检查 $kind/$name ..." + + backup_containers=$(echo "$backup_item" | jq -c '.containers[]?') + current_containers=$(echo "$CURRENT_IMAGES" | jq -c --arg name "$name" '.items[] | select(.name == $name) | .containers[]?') + + for backup_container in $backup_containers; do + container_name=$(echo "$backup_container" | jq -r '.name') + backup_image=$(echo "$backup_container" | jq -r '.image') + + current_image=$(echo "$current_containers" | jq -r --arg container_name "$container_name" 'select(.name == $container_name) | .image') + + if [[ "$backup_image" == "$current_image" ]]; then + echo " ✅ $container_name 镜像匹配: $backup_image" + ((MATCH_COUNT++)) + else + echo " ❌ $container_name 镜像不匹配: 期望 $backup_image,当前 $current_image" + ((MISMATCH_COUNT++)) + fi + done + done + + echo "📊 校验结果: ✅ 匹配 $MATCH_COUNT 项, ❌ 不匹配 $MISMATCH_COUNT 项" + + if [[ $MISMATCH_COUNT -eq 0 ]]; then + echo "✅ 校验通过!当前运行的镜像版本与备份一致。" + else + echo "❌ 校验失败!请检查上方输出。" + fi +} + +# 恢复 deepflow 命名空间的 Kubernetes 资源镜像 +restore_images() { + if [[ ! -f "$1" ]]; then + echo "❌ 错误: 备份文件 $1 不存在!请先运行备份。" + exit 1 + fi + + echo "🔄 开始恢复 deepflow 命名空间的 Kubernetes 资源镜像..." + + cat "$1" | jq -c '.items[]' | while read -r item; do + kind=$(echo "$item" | jq -r '.kind') + name=$(echo "$item" | jq -r '.name') + + echo "📌 处理 $kind/$name ..." + + containers=$(echo "$item" | jq -c '.containers[]?') + for container in $containers; do + container_name=$(echo "$container" | jq -r '.name') + image=$(echo "$container" | jq -r '.image') + + echo " 🔄 更新容器: $container_name -> 镜像: $image" + kubectl set image -n "$NAMESPACE" "$kind/$name" "$container_name=$image" --record + if [[ $? -eq 0 ]]; then + echo " ✅ 更新成功!" + else + echo " ❌ 更新失败!请检查日志。" + fi + done + done + + echo "✅ 恢复完成!" +} + +# 解析命令参数 +case "$1" in + backup) + backup_images + ;; + check) + if [[ -z "$2" ]]; then + echo "❌ 错误: 需要提供备份文件路径!示例: $0 check backup_images_v6.3-20250309-17.json" + exit 1 + fi + check_images "$2" + ;; + restore) + if [[ -z "$2" ]]; then + echo "❌ 错误: 需要提供备份文件路径!示例: $0 restore backup_images_v6.3-20250309-17.json" + exit 1 + fi + restore_images "$2" + ;; + *) + echo "📌 用法: $0 {backup|check <备份文件>|restore <备份文件>}" + exit 1 + ;; +esac diff --git a/scripts/deepflow/deploy-k8s.sh b/scripts/deepflow/deploy-k8s.sh new file mode 100755 index 0000000..5e9e697 --- /dev/null +++ b/scripts/deepflow/deploy-k8s.sh @@ -0,0 +1,19 @@ + +# 新建部署目录,并解压安装包到该目录 + +mkdir /opt/k8s-deploy && tar -xvpf sealos-amd64-k8s-1.25.16.tar.gz -C /opt/k8s-deploy +cd /opt/k8s-deploy/ && \ +cp sealos helm calicoctl nerdctl /usr/bin/ && \ +chmod +x /usr/bin/sealos /usr/bin/helm /usr/bin/calicoctl /usr/bin/nerdctl + +# 导入离线镜像 +sealos load -i sealos-calico.tar +sealos load -i sealos-helm.tar +sealos load -i sealos-k8s-1.25.16.tar + +# 单机部署(单机部署无需ssh密码,root用户本机直接执行即可) +sealos run \ + registry.cn-shanghai.aliyuncs.com/labring/kubernetes:v1.25.16 \ + registry.cn-shanghai.aliyuncs.com/labring/helm:v3.9.4 \ + registry.cn-shanghai.aliyuncs.com/labring/calico:v3.24.1 \ + --single diff --git a/scripts/deepflow/pull-all-v6.4.sh b/scripts/deepflow/pull-all-v6.4.sh new file mode 100644 index 0000000..c2dbe75 --- /dev/null +++ b/scripts/deepflow/pull-all-v6.4.sh @@ -0,0 +1,40 @@ +for image in \ +kube.registry.local:5000/acl-controller:v6.4.182 \ +kube.registry.local:5000/alarm:v6.4.703 \ +kube.registry.local:5000/cerebro:0.9.0 \ +kube.registry.local:5000/deepflow-agent:v6.4.4729 \ +kube.registry.local:5000/deepflow-app:v6.4.178 \ +kube.registry.local:5000/df-help:v6.4.1211 \ +kube.registry.local:5000/df-web-qiankun-core:v6.4.11721 \ +kube.registry.local:5000/df-web-service:v6.4.647 \ +kube.registry.local:5000/df-web-metrics-explore:v6.4.5342 \ +kube.registry.local:5000/df-env:v6.4.884 \ +kube.registry.local:5000/fauths:v6.4.482 \ +kube.registry.local:5000/fpermit:v6.4.278 \ +kube.registry.local:5000/apientry:v6.4.210 \ +kube.registry.local:5000/fuser:v6.4.356 \ +kube.registry.local:5000/grafana-agent:v0.38.0 \ +kube.registry.local:5000/grafana-agent-reload:v0.8.0 \ +kube.registry.local:5000/deepflow-init-grafana-ce:latest \ +kube.registry.local:5000/kibana:6.8.8 \ +kube.registry.local:5000/kube-rbac-proxy:v0.14.0 \ +kube.registry.local:5000/kube-state-metrics:v2.9.2 \ +kube.registry.local:5000/manager:v6.4.695 \ +kube.registry.local:5000/mntnct:v6.4.1320 \ +kube.registry.local:5000/mysql-server:8.0.39 \ +kube.registry.local:5000/pcap:v6.4.194 \ +kube.registry.local:5000/postman:v6.4.55 \ +kube.registry.local:5000/querier-js:v6.4.303 \ +kube.registry.local:5000/rabbitmq:3.10.25 \ +kube.registry.local:5000/redis:7.0.12 \ +kube.registry.local:5000/report:v6.4.267 \ +kube.registry.local:5000/statistics:v6.4.2171 \ +kube.registry.local:5000/talker:v6.4.2987 \ +kube.registry.local:5000/warrant:v6.4.88 \ +kube.registry.local:5000/df-web-sched:v6.4.213 \ +kube.registry.local:5000/web-tools:v6.4.231 \ +kube.registry.local:5000/webssh:v6.4.25 +do + echo "🔄 Pulling $image ..." + nerdctl --insecure-registry -n k8s.io pull "$image" +done diff --git a/scripts/deepflow/pull_save_scp_image.sh b/scripts/deepflow/pull_save_scp_image.sh new file mode 100644 index 0000000..a5a53cc --- /dev/null +++ b/scripts/deepflow/pull_save_scp_image.sh @@ -0,0 +1,32 @@ +#!/bin/bash +set -e + +REMOTE_HOST="root@10.1.3.179" + +if [ -z "$1" ]; then + echo "❌ 用法: $0 " + echo "示例: $0 dfcloud-image-registry-vpc.cn-beijing.cr.aliyuncs.com/dev/df-web-ai:v6.6.18839" + exit 1 +fi + +IMAGE="$1" + +# 提取镜像名和版本号 +NAME_TAG="${IMAGE##*/}" # df-web-ai:v6.6.18839 +NAME="${NAME_TAG%%:*}" # df-web-ai +TAG="${NAME_TAG##*:}" # v6.6.18839 +FILE_NAME="${NAME}-${TAG//v/}.tar" # df-web-ai-6.6.18839.tar + +echo "📦 镜像: $IMAGE" +echo "📁 导出文件名: $FILE_NAME" + +echo "🚀 在远程拉取镜像..." +ssh $REMOTE_HOST docker pull "$IMAGE" + +echo "💾 在远程保存镜像为 /tmp/$FILE_NAME..." +ssh $REMOTE_HOST "docker save $IMAGE > /tmp/$FILE_NAME" + +echo "📥 拷贝镜像回本地 ~/Desktop..." +scp $REMOTE_HOST:/tmp/$FILE_NAME ~/Desktop + +echo "✅ 完成!镜像保存于:~/Desktop/$FILE_NAME" diff --git a/scripts/deepflow/setup-agent-all-in-one.sh b/scripts/deepflow/setup-agent-all-in-one.sh new file mode 100644 index 0000000..55ec98a --- /dev/null +++ b/scripts/deepflow/setup-agent-all-in-one.sh @@ -0,0 +1,194 @@ +#!/bin/bash +set -e + +cat << EOF > values-custom.yaml +deepflowServerNodeIPS: +- 10.50.1.111 +#deepflowK8sClusterID: "fffffff" # FIXME: K8s ClusterID +image: + repository: hub.deepflow.yunshan.net/public/deepflow-agent + pullPolicy: Always + tag: v6.5 +EOF + +helm repo add deepflow https://deepflowio.github.io/deepflow +helm repo update deepflow # use `helm repo update` when helm < 3.7.0 +helm install deepflow-agent -n deepflow deepflow/deepflow-agent --create-namespace -f values-custom.yaml + +######################################################################################################## + +helm repo add grafana https://grafana.github.io/helm-charts +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update +kubectl create ns deepflow || true + +helm upgrade --install kube-state-metrics prometheus-community/kube-state-metrics \ + --namespace deepflow --create-namespace + +helm upgrade --install node-exporter prometheus-community/prometheus-node-exporter \ + --namespace deepflow --create-namespace \ + --set service.type=ClusterIP \ + --set service.port=9100 + +cat > grafana-agent-values.yaml << EOF +global: + image: + registry: "images.onwalk.net/public" +agent: + mode: 'static' + configMap: + create: true + content: '' +logs: + enabled: false +traces: + enabled: false +EOF + +helm upgrade --install grafana-agent grafana/grafana-agent --namespace deepflow -f grafana-agent-values.yaml + +cat > grafana-agent-configmap.yaml << EOF +apiVersion: v1 +data: + config.yaml: |- + server: + log_level: info + log_format: logfmt + metrics: + global: + scrape_interval: 1m + configs: + - name: agent + scrape_configs: + - job_name: kube-state-metrics + static_configs: + - targets: ['10.43.155.169:8080'] + - job_name: node-metrics + static_configs: + - targets: ['10.43.68.133:9100'] + remote_write: + - url: http://deepflow-agent.deepflow.svc.cluster.local/api/v1/prometheus +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: grafana-agent + meta.helm.sh/release-namespace: deepflow + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.42.0 + helm.sh/chart: grafana-agent-0.42.0 + name: grafana-agent + namespace: deepflow +EOF + +kubectl apply -f grafana-agent-configmap.yaml + +kubectl get pods -n deepflow + +######################################################################################################## + + +helm repo add vector https://helm.vector.dev +helm repo update +cat << EOF > vector-values-custom.yaml +role: Agent +#nodeSelector: +# allow/vector: "false" + +# resources -- Set Vector resource requests and limits. +resources: + requests: + cpu: 200m + memory: 256Mi + limits: + cpu: 200m + memory: 256Mi +image: + repository: images.onwalk.net/public/timberio/vector + pullPolicy: Always + tag: "0.37.1-distroless-libc" +podLabels: + vector.dev/exclude: "true" + app: deepflow +# extraVolumes -- Additional Volumes to use with Vector Pods. + # extraVolumes: + # - name: opt-log + # hostPath: + # path: "/opt/log/" +# extraVolumeMounts -- Additional Volume to mount into Vector Containers. + # extraVolumeMounts: + # - name: opt-log + # mountPath: "/opt/log/" + # readOnly: true +customConfig: + ## The configuration comes from https://vector.dev/docs/reference/configuration/global-options/#data_dir + data_dir: /vector-data-dir + api: + enabled: true + address: 127.0.0.1:8686 + playground: false + sources: + kubernetes_logs: + type: kubernetes_logs + namespace_annotation_fields: + namespace_labels: "" + node_annotation_fields: + node_labels: "" + pod_annotation_fields: + pod_annotations: "" + pod_labels: "" + + transforms: + remap_kubernetes_logs: + type: remap + inputs: + - kubernetes_logs + source: |- + # try to parse json + if is_string(.message) && is_json(string!(.message)) { + tags = parse_json(.message) ?? {} + .message = tags.message # FIXME: the log content key inside json + del(tags.message) + .json = tags + } + + if !exists(.level) { + if exists(.json) { + .level = .json.level + del(.json.level) + } else { + # match log levels surround by ``[]`` or ``<>`` with ignore case + level_tags = parse_regex(.message, r'[\[\\\<](?(?i)INFOR?(MATION)?|WARN(ING)?|DEBUG?|ERROR?|TRACE|FATAL|CRIT(ICAL)?)[\]\\\>]') ?? {} + if !exists(level_tags.level) { + # match log levels surround by whitespace, required uppercase strictly in case mismatching + level_tags = parse_regex(.message, r'[\s](?INFOR?(MATION)?|WARN(ING)?|DEBUG?|ERROR?|TRACE|FATAL|CRIT(ICAL)?)[\s]') ?? {} + } + if exists(level_tags.level) { + level_tags.level = upcase(string!(level_tags.level)) + .level = level_tags.level + } + } + } + + if !exists(._df_log_type) { + # default log type + ._df_log_type = "user" + } + + if !exists(.app_service) { + # FIXME: files 模块没有此字段,请通过日志内容注入应用名称 + .app_service = .kubernetes.container_name + } + sinks: + http: + encoding: + codec: json + inputs: + - remap_kubernetes_logs # NOTE: 注意这里数据源是 transform 模块的 key + type: http + uri: http://deepflow-agent.deepflow/api/v1/log +EOF +helm upgrade --install vector vector/vector --namespace deepflow --create-namespace -f vector-values-custom.yaml + diff --git a/scripts/deepflow/setup-deepflow-agent.sh b/scripts/deepflow/setup-deepflow-agent.sh new file mode 100644 index 0000000..4efa95f --- /dev/null +++ b/scripts/deepflow/setup-deepflow-agent.sh @@ -0,0 +1,13 @@ +cat << EOF > values-custom.yaml +deepflowServerNodeIPS: +- 10.50.1.111 +#deepflowK8sClusterID: "fffffff" # FIXME: K8s ClusterID +image: + repository: hub.deepflow.yunshan.net/public/deepflow-agent + pullPolicy: Always + tag: v6.5 +EOF + +helm repo add deepflow https://deepflowio.github.io/deepflow +helm repo update deepflow # use `helm repo update` when helm < 3.7.0 +helm install deepflow-agent -n deepflow deepflow/deepflow-agent --create-namespace -f values-custom.yaml diff --git a/scripts/deploy-open-webui.sh b/scripts/deploy-open-webui.sh new file mode 100644 index 0000000..3cb5fa5 --- /dev/null +++ b/scripts/deploy-open-webui.sh @@ -0,0 +1 @@ +helm upgrade --install --namespace ai open-webui open-webui/open-webui --set pipelines.enable==true --set ollama.enable==false diff --git a/scripts/dynamic_inventory.py b/scripts/dynamic_inventory.py new file mode 100644 index 0000000..700ed89 --- /dev/null +++ b/scripts/dynamic_inventory.py @@ -0,0 +1,69 @@ +import os +import sys +from jinja2 import Template + +# Check if required environment variables are set +required_vars = ['SSH_USER', 'SSH_PRIVATE_KEY'] +for var in required_vars: + if var not in os.environ: + print(f"{var} is not set. Aborting.") + sys.exit(1) + +# Get the SSH_USER and SSH_PRIVATE_KEY from environment variables +ssh_user = os.environ['SSH_USER'] +ssh_private_key = os.environ['SSH_PRIVATE_KEY'] + +# Check if input is provided +if len(sys.argv) < 2: + print("No groups and nodes provided. Usage: python dynamic_inventory.py 'group_name:host_name:host_ip'") + sys.exit(1) + +# Parse input groups and hosts +input_data = sys.argv[1] +group_nodes = input_data.split() + +# Dictionary to hold groups and their hosts +groups = {} + +for group_node in group_nodes: + group, host_name, host_ip = group_node.split(':') + if group not in groups: + groups[group] = [] + groups[group].append({'host_name': host_name, 'host_ip': host_ip}) + +# Define the inventory template +inventory_template = """ +{% for group, hosts in groups.items() %} +[{{ group }}] +{% for host in hosts %} +{{ host.host_name }} ansible_host={{ host.host_ip }} +{% endfor %} +{% endfor %} + +[all:vars] +ansible_port=22 +ansible_ssh_user={{ ssh_user }} +ansible_ssh_private_key_file=~/.ssh/id_rsa +ansible_host_key_checking=False +""" + +# Create the SSH key directory if it doesn't exist +ssh_dir = os.path.expanduser("~/.ssh") +os.makedirs(ssh_dir, exist_ok=True) + +# Create the SSH key file +ssh_key_path = os.path.join(ssh_dir, 'id_rsa') +with open(ssh_key_path, 'w') as ssh_key_file: + ssh_key_file.write(ssh_private_key) +os.chmod(ssh_key_path, 0o400) # Set permissions to 0400 + +# Render the inventory file +template = Template(inventory_template) +output = template.render(groups=groups, ssh_user=ssh_user) + +# Write to the inventory file +os.makedirs('hosts', exist_ok=True) +with open('hosts/inventory', 'w') as inventory_file: + inventory_file.write(output) + +print("Inventory file created successfully!") diff --git a/scripts/gather_network_info.yml b/scripts/gather_network_info.yml new file mode 100644 index 0000000..7c4257c --- /dev/null +++ b/scripts/gather_network_info.yml @@ -0,0 +1,12 @@ +--- +- name: Gather and display specific network interface information based on dynamic filter + hosts: "{{ target_group }}" + user: ubuntu + become: yes + gather_facts: yes + tasks: + - include_role: + name: network_info + vars: + target_group: master + interface_filter: "^(ens|cni)" diff --git a/scripts/generate_ssl.sh b/scripts/generate_ssl.sh new file mode 100644 index 0000000..836f409 --- /dev/null +++ b/scripts/generate_ssl.sh @@ -0,0 +1,79 @@ +#!/bin/bash + +# 获取参数 +DOMAIN="$1" +VALID_DAYS="$2" +OUTPUT_DIR="$3" + +# 确保参数不为空 +if [[ -z "$DOMAIN" || -z "$VALID_DAYS" || -z "$OUTPUT_DIR" ]]; then + echo "Usage: $0 " + exit 1 +fi + +# 确保输出目录存在 +mkdir -p "$OUTPUT_DIR" + +CERT_FILE="$DOMAIN.cert" +KEY_FILE="$DOMAIN.key" + +echo "Generating self-signed SSL certificate for domain: $DOMAIN (with SAN), valid for $VALID_DAYS days" + +# 1. 生成 CA 私钥 +openssl genrsa -out "$OUTPUT_DIR/ca.key" 2048 + +# 2. 生成 CA 证书(自签的根证书) +openssl req -x509 -new -nodes \ + -key "$OUTPUT_DIR/ca.key" \ + -sha256 -days "$VALID_DAYS" \ + -out "$OUTPUT_DIR/ca.cert" \ + -subj "/C=CN/ST=State/L=City/O=Company/OU=Org/CN=Custom-CA" + +# 3. 生成服务器私钥 +openssl genrsa -out "$OUTPUT_DIR/$KEY_FILE" 2048 + +# 4. 创建 OpenSSL 配置文件(兼容 Linux & macOS) +SAN_CONFIG="$OUTPUT_DIR/san.cnf" +cat < "$SAN_CONFIG" +[ req ] +default_bits = 2048 +prompt = no +default_md = sha256 +req_extensions = req_ext +distinguished_name = dn + +[ dn ] +C = CN +ST = State +L = City +O = Company +OU = Org +CN = $DOMAIN + +[ req_ext ] +subjectAltName = @alt_names + +[ alt_names ] +DNS.1 = $DOMAIN +EOF + +# 5. 生成 CSR(证书签名请求) +openssl req -new -key "$OUTPUT_DIR/$KEY_FILE" \ + -out "$OUTPUT_DIR/$DOMAIN.csr" \ + -config "$SAN_CONFIG" + +# 6. 用 CA 证书签发服务器证书,保留 SAN +openssl x509 -req \ + -in "$OUTPUT_DIR/$DOMAIN.csr" \ + -CA "$OUTPUT_DIR/ca.cert" \ + -CAkey "$OUTPUT_DIR/ca.key" \ + -CAcreateserial \ + -out "$OUTPUT_DIR/$CERT_FILE" \ + -days "$VALID_DAYS" \ + -sha256 \ + -extensions req_ext -extfile "$SAN_CONFIG" + +# 7. 清理 CSR 和配置文件 +rm -f "$OUTPUT_DIR/$DOMAIN.csr" "$SAN_CONFIG" + +echo "✅ Self-signed SSL certificate (with SAN) for $DOMAIN generated in $OUTPUT_DIR!" diff --git a/scripts/global-monitor/agent-group-config.yaml b/scripts/global-monitor/agent-group-config.yaml new file mode 100644 index 0000000..d5afe19 --- /dev/null +++ b/scripts/global-monitor/agent-group-config.yaml @@ -0,0 +1,3 @@ +vtap_group_id: g-3lSjoT4zjY +platform_enabled: 1 +tap_interface_regex: ^(tap.*|cali.*|veth.*|eth.*|en[ospx].*|lxc.*|lo|docker.*|br.*|wg.*)$ diff --git a/scripts/global-monitor/config/containerd.toml b/scripts/global-monitor/config/containerd.toml new file mode 100644 index 0000000..0f3e3a8 --- /dev/null +++ b/scripts/global-monitor/config/containerd.toml @@ -0,0 +1,34 @@ +version = 2 +root = "CONTAINERD_ROOT_DIR" +state = "/run/containerd" +oom_score = 0 + +[grpc] + max_recv_message_size = 16777216 + max_send_message_size = 16777216 + +[debug] + level = "info" + +[metrics] + address = "" + grpc_histogram = false + +[plugins] + [plugins."io.containerd.grpc.v1.cri"] + sandbox_image = "REGISTRY_DOMAIN/library/pause:3.3" + max_container_log_line_size = -1 + [plugins."io.containerd.grpc.v1.cri".containerd] + default_runtime_name = "runc" + snapshotter = "overlayfs" + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes] + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] + runtime_type = "io.containerd.runc.v2" + runtime_engine = "" + runtime_root = "" + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] + systemdCgroup = true + [plugins."io.containerd.grpc.v1.cri".registry] + [plugins."io.containerd.grpc.v1.cri".registry.mirrors] + [plugins."io.containerd.grpc.v1.cri".registry.mirrors."docker.io"] + endpoint = ["https://registry-1.docker.io"] \ No newline at end of file diff --git a/scripts/global-monitor/config/deepflow-registry.yaml b/scripts/global-monitor/config/deepflow-registry.yaml new file mode 100644 index 0000000..7aece34 --- /dev/null +++ b/scripts/global-monitor/config/deepflow-registry.yaml @@ -0,0 +1,23 @@ +version: 0.1 +log: + fields: + service: registry +storage: + cache: + blobdescriptor: inmemory + filesystem: + rootdirectory: /var/lib/registry + delete: + enabled: true +http: + addr: :5000 + headers: + X-Content-Type-Options: [nosniff] + tls: + certificate: /etc/docker/registry/domain.crt + key: /etc/docker/registry/domain.key +health: + storagedriver: + enabled: true + interval: 10s + threshold: 3 \ No newline at end of file diff --git a/scripts/global-monitor/config/nginx.conf b/scripts/global-monitor/config/nginx.conf new file mode 100644 index 0000000..b534744 --- /dev/null +++ b/scripts/global-monitor/config/nginx.conf @@ -0,0 +1,19 @@ +# This server listen at 8080 for download files and packages +server { + listen NGINX_PORT; + listen [::]:NGINX_PORT; + server_name kube.registry.local; + gzip_static on; + autoindex on; + autoindex_exact_size off; + autoindex_format html; #修改为html + autoindex_localtime on; + location / { + root /usr/share/nginx; + index index.html index.htm; + } + error_page 500 502 503 504 /50x.html; + location = /50x.html { + root html; + } +} diff --git a/scripts/global-monitor/config/registry.yaml b/scripts/global-monitor/config/registry.yaml new file mode 100644 index 0000000..7aece34 --- /dev/null +++ b/scripts/global-monitor/config/registry.yaml @@ -0,0 +1,23 @@ +version: 0.1 +log: + fields: + service: registry +storage: + cache: + blobdescriptor: inmemory + filesystem: + rootdirectory: /var/lib/registry + delete: + enabled: true +http: + addr: :5000 + headers: + X-Content-Type-Options: [nosniff] + tls: + certificate: /etc/docker/registry/domain.crt + key: /etc/docker/registry/domain.key +health: + storagedriver: + enabled: true + interval: 10s + threshold: 3 \ No newline at end of file diff --git a/scripts/global-monitor/custom-domain.yaml b/scripts/global-monitor/custom-domain.yaml new file mode 100644 index 0000000..6851918 --- /dev/null +++ b/scripts/global-monitor/custom-domain.yaml @@ -0,0 +1,5 @@ +name: "" +type: kubernetes +config: + controller_ip: 35.72.247.255 + node_port_name_regex: ^(cni|eth|flannel|vxlan.calico|wg|ens|tunl|en[ospx]) diff --git a/scripts/global-monitor/deepflow-registry/all.tag.list b/scripts/global-monitor/deepflow-registry/all.tag.list new file mode 100644 index 0000000..e8e0550 --- /dev/null +++ b/scripts/global-monitor/deepflow-registry/all.tag.list @@ -0,0 +1,51 @@ +local-registry.onwalk.net:5000/acl-controller:v6.3.179 +local-registry.onwalk.net:5000/alarm:v6.3.686 +local-registry.onwalk.net:5000/apientry:v6.3.193 +local-registry.onwalk.net:5000/cerebro:0.9.0 +local-registry.onwalk.net:5000/clickhouse-server:22.8.20.11 +local-registry.onwalk.net:5000/deepflow-agent:v6.3.4211 +local-registry.onwalk.net:5000/deepflow-app:v6.3.120 +local-registry.onwalk.net:5000/deepflow-init-grafana-ce:latest +local-registry.onwalk.net:5000/deepflow-init-grafana-ds-dh-ce:latest +local-registry.onwalk.net:5000/deepflow-server:v6.3.4211 +local-registry.onwalk.net:5000/df-env:v6.3.809 +local-registry.onwalk.net:5000/df-help:v6.3.1086 +local-registry.onwalk.net:5000/df-web-metrics-explore:v6.3.4318 +local-registry.onwalk.net:5000/df-web-qiankun-core:v6.3.9969 +local-registry.onwalk.net:5000/df-web-sched:v6.3.103 +local-registry.onwalk.net:5000/df-web-service:v6.3.580 +local-registry.onwalk.net:5000/diagnose:v6.3.103 +local-registry.onwalk.net:5000/elasticsearch:6.8.8 +local-registry.onwalk.net:5000/endpoints-operator:0.2.1 +local-registry.onwalk.net:5000/fauths:v6.3.452 +local-registry.onwalk.net:5000/fpermit:v6.3.254 +local-registry.onwalk.net:5000/fuser:v6.3.328 +local-registry.onwalk.net:5000/grafana-dashboards:v6.3.809 +local-registry.onwalk.net:5000/grafana:9.2.4 +local-registry.onwalk.net:5000/influxdb:v6.3.14424 +local-registry.onwalk.net:5000/kibana:6.8.8 +local-registry.onwalk.net:5000/kubectl:v6.3.809 +local-registry.onwalk.net:5000/log-cleaner:v6.3.11 +local-registry.onwalk.net:5000/loki:2.4.2 +local-registry.onwalk.net:5000/manager:v6.3.684 +local-registry.onwalk.net:5000/mntnct:v6.3.1317 +local-registry.onwalk.net:5000/monitor:v6.3.174 +local-registry.onwalk.net:5000/mysql-server:8.0.26 +local-registry.onwalk.net:5000/openebs/linux-utils:3.3.0 +local-registry.onwalk.net:5000/openebs/node-disk-manager:2.0.0 +local-registry.onwalk.net:5000/openebs/node-disk-operator:2.0.0 +local-registry.onwalk.net:5000/openebs/provisioner-localpv:3.3.0 +local-registry.onwalk.net:5000/pcap:v6.3.188 +local-registry.onwalk.net:5000/postman:v6.3.54 +local-registry.onwalk.net:5000/promtail:2.4.2 +local-registry.onwalk.net:5000/querier-js:v6.3.264 +local-registry.onwalk.net:5000/rabbitmq:3.10.25 +local-registry.onwalk.net:5000/redis:6.2.6 +local-registry.onwalk.net:5000/report:v6.3.247 +local-registry.onwalk.net:5000/statistics:v6.3.2082 +local-registry.onwalk.net:5000/talker:v6.3.2958 +local-registry.onwalk.net:5000/telegraf:1.14.1.12 +local-registry.onwalk.net:5000/trident:v6.3.1666 +local-registry.onwalk.net:5000/warrant:v6.3.81 +local-registry.onwalk.net:5000/web-tools:v6.3.122 +local-registry.onwalk.net:5000/webssh:v6.3.22 diff --git a/scripts/global-monitor/deepflow-registry/compose.yaml b/scripts/global-monitor/deepflow-registry/compose.yaml new file mode 100644 index 0000000..db6f21a --- /dev/null +++ b/scripts/global-monitor/deepflow-registry/compose.yaml @@ -0,0 +1,12 @@ +version: '3' +services: + registry: + image: registry:2.7.1 + container_name: deepflow-registry + restart: always + network_mode: host + volumes: + - /usr/local/deepflow/registry:/var/lib/registry + - /opt/deepflow-registry/config/deepflow-registry.yaml:/etc/docker/registry/config.yml + - /etc/ssl/onwalk.net.pem:/etc/docker/registry/domain.crt + - /etc/ssl/onwalk.net.key:/etc/docker/registry/domain.key diff --git a/scripts/global-monitor/deepflow-registry/push_images.sh b/scripts/global-monitor/deepflow-registry/push_images.sh new file mode 100644 index 0000000..cfa0a27 --- /dev/null +++ b/scripts/global-monitor/deepflow-registry/push_images.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +set +x + +# 设置容器和仓库地址 +CONTAINERD_ADDRESS="/run/k3s/containerd/containerd.sock" +LOCAL_REGISTRY="local-registry.onwalk.net:5000" +TARGET_REGISTRY="global-images.onwalk.net/private/deepflow-v6.3" + +# 设置输出文件 +input_file="all.tag.list" + +# 登录到目标 registry +echo "Logging in to $TARGET_REGISTRY..." +sudo CONTAINERD_ADDRESS=$CONTAINERD_ADDRESS nerdctl login $TARGET_REGISTRY + +# 读取 all.tag.list 并处理每个镜像 +while IFS= read -r line; do + # 替换 local-registry 地址为目标地址 + image_tag="${line//$LOCAL_REGISTRY/$TARGET_REGISTRY}" + + # 打标签并推送镜像 + echo "Tagging $line as $image_tag and pushing to $TARGET_REGISTRY" + sudo CONTAINERD_ADDRESS=$CONTAINERD_ADDRESS nerdctl pull "$line" + sudo CONTAINERD_ADDRESS=$CONTAINERD_ADDRESS nerdctl tag "$line" "$image_tag" + sudo CONTAINERD_ADDRESS=$CONTAINERD_ADDRESS nerdctl push "$image_tag" + + # 清理本地镜像 + echo "Cleaning up local image: $line" + sudo CONTAINERD_ADDRESS=$CONTAINERD_ADDRESS nerdctl rmi "$line" +done < "$input_file" + +echo "All images processed and pushed successfully." diff --git a/scripts/global-monitor/deepflow-registry/setup-nerdctl.sh b/scripts/global-monitor/deepflow-registry/setup-nerdctl.sh new file mode 100644 index 0000000..4bdd1ed --- /dev/null +++ b/scripts/global-monitor/deepflow-registry/setup-nerdctl.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +wget https://github.com/containerd/nerdctl/releases/download/v2.0.2/nerdctl-full-2.0.2-linux-amd64.tar.gz + +sudo mkdir -pv /etc/nerdctl +sudo touch /etc/nerdctl/nerdctl.toml + +sudo cat > /etc/nerdctl/nerdctl.toml << EOF +debug = false +debug_full = false +address = "unix:///run/k3s/containerd/containerd.sock" +namespace = "k8s.io" +cni_path = "/var/lib/nerdctl/cni/bin" +cni_netconfpath = "/var/lib/nerdctl/cni/net.d" +EOF + +sudo CONTAINERD_ADDRESS=/run/k3s/containerd/containerd.sock nerdctl --namespace k8s.io ps diff --git a/scripts/global-monitor/deepflow-registry/setup-registry.sh b/scripts/global-monitor/deepflow-registry/setup-registry.sh new file mode 100644 index 0000000..e68f79e --- /dev/null +++ b/scripts/global-monitor/deepflow-registry/setup-registry.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +https://github.com/containerd/nerdctl/releases/download/v2.0.2/nerdctl-2.0.2-linux-amd64.tar.gz + +sudo cp compose.yaml /opt/deepflow-registry/compose.yaml +sudo nerdctl compose -f /opt/deepflow-registry/compose.yaml down +sudo nerdctl compose -f /opt/deepflow-registry/compose.yaml up -d + +#运行时为Containerd +sudo erdctl load -i /usr/local/deepflow/registry.tar +sudo CONTAINERD_ADDRESS=/run/k3s/containerd/containerd.sock nerdctl --namespace k8s.io compose -f /opt/deepflow-registry/compose.yaml up -d +#nerdctl run -d -e REGISTRY_HTTP_ADDR=0.0.0.0:5000 --net=host -v /usr/local/deepflow/registry:/var/lib/registry --restart=always --name registry hub.deepflow.yunshan.net/dev/registry:latest diff --git a/scripts/global-monitor/deepflow-registry/show_images.sh b/scripts/global-monitor/deepflow-registry/show_images.sh new file mode 100644 index 0000000..011fc6b --- /dev/null +++ b/scripts/global-monitor/deepflow-registry/show_images.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# 设置协议和 registry 地址(https:// 或 http://) +PROTOCOL="https://" +REGISTRY="local-registry.onwalk.net:5000" + +# 获取仓库列表 +repos=$(curl -s -X GET "$PROTOCOL$REGISTRY/v2/_catalog" | jq -r '.repositories[]') + +# 要隐藏的仓库列表 +hidden_repos=("") + +# 创建或清空输出文件 +output_file="all.tag.list" +> "$output_file" + +# 遍历每个仓库,获取对应的标签列表 +for repo in $repos; do + # 如果是隐藏的仓库,跳过 + if [[ " ${hidden_repos[@]} " =~ " ${repo} " ]]; then + continue + fi + + # 获取标签列表 + tags=$(curl -s -X GET "$PROTOCOL$REGISTRY/v2/$repo/tags/list" | jq -r '.tags[]') + + # 如果仓库有标签,则按格式输出到文件 + if [ -n "$tags" ]; then + for tag in $tags; do + # 输出格式:local-registry.onwalk.net:5000/repository:tag + echo "$REGISTRY/$repo:$tag" >> "$output_file" + done + fi +done + +# 排序并去重 +sort -u "$output_file" -o "$output_file" + +echo "Tags have been saved to $output_file and sorted." diff --git a/scripts/global-monitor/deepflow-sever-values-v6.3.yaml b/scripts/global-monitor/deepflow-sever-values-v6.3.yaml new file mode 100644 index 0000000..ac75ece --- /dev/null +++ b/scripts/global-monitor/deepflow-sever-values-v6.3.yaml @@ -0,0 +1,117 @@ +global: + ## 此文件中的字段会替换掉 values.yaml 中的默认值,如需修改 values.yaml,都需要在此修改 + ## 需保证与 values.yaml 格式一致 + ## Tips: grep redis_tag: values.yaml -A 16 -B1 >> values-custom.yaml + deepflowVersion: DeepFlow-6.3.36 + imagePullSecrets: + - deepflow-registry-secret + image: + repository: global-images.onwalk.net/private/deepflow-v6.3 + pullPolicy: Always + node_type: master + master_region_domain_prefix: '' + current_region_domain_prefix: 'master-' + s3StorageEnabled: false + ## 计费模式 license 授权模式 / voucher 按量计费 + billing_method: license + ## + ## 【可选】用于部署ESXi环境中的采集器虚拟机,需先在页面创建采集器组及配置流量采集模式为虚拟镜像,再 `-uo trident` 升级 esxi 采集器。 + tridentConfigmap: + ## 自定义采集器配置 + customConfig: + vtap-group-id-request: "__FIX_ME__" + # controller-port: 20035 + + +## 各组件自定义参数 +## Example + +## server 和 clickhouse 副本数控制,server 副本数必须 >= clickhouse 数量。 +deepflow: + server: + replicas: 1 + resources: + ## limit.memory 需配置为节点资源的 70% and > 2Gi,如 128G,则配置为 128x0.7~=90G。 + limits: + memory: 5G + clickhouse: + replicas: 1 + +alarm: + alarmEmail: stats01@yunshan.net + + +## df-web-metrics-explore +dfWebMetricsExplore: + customConfig: + app_config_user.js: |- + window.APP_USER_CONFIG = {} + +## df-web-core +dfWebCore: + customConfig: + app_config_user.js: |- + window.APP_USER_CONFIG = {} + +## 主从区域通信配置 +customResource: + clusterEndpointMasterToSlave: + ## 创建内建负载均衡器配置开关 + enabled: false + slaveRegionList: + ## 此处从区域名称和部署从区域时的域名前缀一致 + ## 从区域一 + - name: slave1- + ## 访问从区域组件端口,如遇 nodePort 冲突,更换端口后可修改如下配置,如需新增组件访问,添加 service 名称及端口即可。 + ports: + influxdb: + ## 集群内 server 暴露端口,无需更改 + port: 20044 + ## 从区域组件端口,如果各从区域 nodePort 端口不相同可修改 targetPort + targetPort: 20044 + elasticsearch: + port: 20042 + targetPort: 20042 + deepflow-server: + port: 20416 + targetPort: 30416 + ## 从区域所有节点IP + ips: + - 1.1.1.1 + - 2.2.2.2 + ## 从区域二 + - name: slave2- + ports: + influxdb: + ## 集群内 server 暴露端口,无需更改 + port: 20044 + ## 从区域组件端口,如果各从区域 nodePort 端口不相同可修改 targetPort + targetPort: 20044 + elasticsearch: + port: 20042 + targetPort: 20042 + deepflow-server: + port: 20416 + targetPort: 30416 + ips: + - 3.3.3.3 + - 4.4.4.4 + ## 从区域访问主区域配置 + clusterEndpointSlaveToMaster: + ## 主区域组件端口,如需新增组件访问,添加 service 名称及端口即可。 + ## 填写主区域 server 所在节点 IP + master_controller_ips: + - __FIXME_MASTER_CONTROLLER1_IP__ + - __FIXME_MASTER_CONTROLLER2_IP__ + - __FIXME_MASTER_CONTROLLER3_IP__ + ports: + talker: + port: 20013 + targetPort: 20013 + mysql: + port: 30130 + targetPort: 30130 + manager: + port: 20403 + targetPort: 20403 + diff --git a/scripts/global-monitor/setup-agent-group-config.sh b/scripts/global-monitor/setup-agent-group-config.sh new file mode 100644 index 0000000..5c020d9 --- /dev/null +++ b/scripts/global-monitor/setup-agent-group-config.sh @@ -0,0 +1,8 @@ +deepflow-ctl agent-group create vm-group +deepflow-ctl agent-group list vm-group + +cat > agent-group-config.yaml << EOF +vtap_group_id: g-3lSjoT4zjY +tap_interface_regex: ^(tap.*|cali.*|veth.*|eth.*|en[ospx].*|lxc.*|lo|docker.*|br.*|wg.*)$ +EOF +deepflow-ctl agent-group-config create -f agent-group-config.yaml diff --git a/scripts/global-monitor/setup-coroot.sh b/scripts/global-monitor/setup-coroot.sh new file mode 100644 index 0000000..740cea1 --- /dev/null +++ b/scripts/global-monitor/setup-coroot.sh @@ -0,0 +1,11 @@ +helm repo add coroot https://coroot.github.io/helm-charts +helm repo update coroot +helm install --namespace coroot --create-namespace --set corootCE.service.type=NodePort coroot coroot/coroot +export NODE_PORT=$(kubectl get --namespace coroot -o jsonpath="{.spec.ports[0].nodePort}" services coroot) +export NODE_IP=$(kubectl get nodes --namespace coroot -o jsonpath="{.items[0].status.addresses[0].address}") +echo http://$NODE_IP:$NODE_PORT +curl -sfL https://raw.githubusercontent.com/coroot/coroot-node-agent/main/install.sh | \ + COLLECTOR_ENDPOINT=http://35.75.12.83:35412 \ + API_KEY=8npswdyt \ + SCRAPE_INTERVAL=15s \ + sh - diff --git a/scripts/global-monitor/setup-deepflow-Host-Domain-Group.sh b/scripts/global-monitor/setup-deepflow-Host-Domain-Group.sh new file mode 100644 index 0000000..88b60b0 --- /dev/null +++ b/scripts/global-monitor/setup-deepflow-Host-Domain-Group.sh @@ -0,0 +1,5 @@ +unset AGENT_GROUP +AGENT_GROUP="legacy-host" # FIXME: domain name + +deepflow-ctl agent-group create $AGENT_GROUP +deepflow-ctl agent-group list $AGENT_GROUP # Get agent-group ID diff --git a/scripts/global-monitor/setup-deepflow-Host-Domain.sh b/scripts/global-monitor/setup-deepflow-Host-Domain.sh new file mode 100644 index 0000000..2ba0f30 --- /dev/null +++ b/scripts/global-monitor/setup-deepflow-Host-Domain.sh @@ -0,0 +1,8 @@ +unset DOMAIN_NAME +DOMAIN_NAME="vm-group" # FIXME: domain name + +cat << EOF | deepflow-ctl domain create -f - +name: $DOMAIN_NAME +type: agent_sync +EOF + diff --git a/scripts/global-monitor/setup-deepflow-add-domain.sh b/scripts/global-monitor/setup-deepflow-add-domain.sh new file mode 100644 index 0000000..543a610 --- /dev/null +++ b/scripts/global-monitor/setup-deepflow-add-domain.sh @@ -0,0 +1,13 @@ +#!/bin/bash +export CLUSTER_NAME=$1 + +cat > custom-domain.yaml << EOF +name: "$CLUSTER_NAME" +type: kubernetes +config: + controller_ip: 35.72.247.255 + node_port_name_regex: ^(cni|eth|flannel|vxlan.calico|wg|ens|tunl|en[ospx]) +EOF + +deepflow-ctl domain create -f custom-domain.yaml +deepflow-ctl domain list $CLUSTER_NAME diff --git a/scripts/global-monitor/setup-deepflow-server-ee-all-in-one.sh b/scripts/global-monitor/setup-deepflow-server-ee-all-in-one.sh new file mode 100644 index 0000000..5a357d9 --- /dev/null +++ b/scripts/global-monitor/setup-deepflow-server-ee-all-in-one.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +sudo mkdir -pv /opt/rancher/k3s +curl -sfL https://get.k3s.io | sudo sh -s - --disable=traefik,servicelb \ + --data-dir=/opt/rancher/k3s \ + --kube-apiserver-arg service-node-port-range=0-50000 + +sudo mkdir -pv ~/.kube/ +sudo cp /etc/rancher/k3s/k3s.yaml ~/.kube/config +sudo chown -R ubuntu:ubuntu ~/.kube/ + +sudo snap install helm --classic + +k8s_node=`sudo kubectl get nodes | awk 'NR>1{print $1}'` + +sudo kubectl label node $k8s_node master_controller=enable +sudo kubectl label node $k8s_node tsdb=enable +sudo kubectl label node $k8s_node dfdb=enable + +sudo kubectl create ns deepflow || true +sudo kubectl create secret docker-registry deepflow-registry-secret \ + --docker-server=global-images.onwalk.net \ + --docker-username=admin \ + --docker-password=Harbor12345 \ + --docker-email=manbuzhe2009@qq.com \ + -n deepflow + +sudo kubectl create ns openebs || true +sudo kubectl create secret docker-registry deepflow-registry-secret \ + --docker-server=global-images.onwalk.net \ + --docker-username=admin \ + --docker-password=Harbor12345 \ + --docker-email=manbuzhe2009@qq.com \ + -n openebs + +/usr/local/deepflow/bin/deepflow-deploy -i diff --git a/scripts/global-monitor/setup-deepflow.sh b/scripts/global-monitor/setup-deepflow.sh new file mode 100644 index 0000000..54095d8 --- /dev/null +++ b/scripts/global-monitor/setup-deepflow.sh @@ -0,0 +1,27 @@ +helm repo add deepflow https://deepflowio.github.io/deepflow +helm repo update deepflow +cat > values.yaml << EOF +global: + replicas: 1 + storageEngine: clickhouse +byconity: + enabled: false +EOF +helm upgrade --install deepflow -n deepflow deepflow/deepflow --version 6.5.012 --create-namespace -f values.yaml +curl -o /usr/bin/deepflow-ctl https://deepflow-ce.oss-cn-beijing.aliyuncs.com/bin/ctl/v6.4.9/linux/$(arch | sed 's|x86_64|amd64|' | sed 's|aarch64|arm64|')/deepflow-ctl +chmod a+x /usr/bin/deepflow-ctl + +NODE_PORT=$(kubectl get --namespace deepflow -o jsonpath="{.spec.ports[0].nodePort}" services deepflow-grafana) +NODE_IP=$(kubectl get nodes -o jsonpath="{.items[0].status.addresses[0].address}") +echo -e "Grafana URL: http://$NODE_IP:$NODE_PORT \nGrafana auth: admin:deepflow" + + +kubectl delete deployment.apps/deepflow-byconity-daemon-manager -n deepflow +kubectl delete deployment.apps/deepflow-byconity-fdbcli -n deepflow +kubectl delete deployment.apps/deepflow-byconity-resource-manager -n deepflow +kubectl delete deployment.apps/deepflow-fdb-operator -n deepflow +kubectl delete statefulset.apps/deepflow-byconity-server -n deepflow +kubectl delete statefulset.apps/deepflow-byconity-tso -n deepflow +kubectl delete statefulset.apps/deepflow-byconity-vw-vw-default -n deepflow +kubectl delete statefulset.apps/deepflow-byconity-vw-vw-write -n deepflow +kubectl delete svc -n deepflow `kubectl get svc -n deepflow | grep deepflow-byconity | awk '{print $1}' | xargs` diff --git a/scripts/global-monitor/setup-ingress.sh b/scripts/global-monitor/setup-ingress.sh new file mode 100644 index 0000000..7f1920d --- /dev/null +++ b/scripts/global-monitor/setup-ingress.sh @@ -0,0 +1,56 @@ +cat > value.yaml < nginx-cm.yaml < nginx-svc-patch.yaml < values-custom.yaml +clickhouse: + enabled: true +EOF +helm upgrade --install signoz -n signoz signoz/signoz --create-namespace -f values-custom.yaml diff --git a/scripts/ingress-installer.sh b/scripts/ingress-installer.sh new file mode 100644 index 0000000..e5e91cf --- /dev/null +++ b/scripts/ingress-installer.sh @@ -0,0 +1,106 @@ +#!/bin/bash +set -e + +INGRESS_IP="${1:-$(hostname -I | awk '{print $1}')}" +NODE_LABEL="$2" + +echo "🚀 Ingress离线部署开始,IP: ${INGRESS_IP}" + +# 解压 nerdctl 并安装 +echo "📦 安装nerdctl..." +tar xzvf nerdctl.tar.gz -C /usr/local/bin/ + +echo "🚀 尝试导入镜像..." + +if command -v docker &>/dev/null && docker info &>/dev/null; then + echo "✅ 检测到 Docker 正常运行,使用 docker load 导入镜像" + docker load -i images/nginx-ingress.tar + docker load -i images/kube-webhook-certgen.tar + +elif [ -S /run/k3s/containerd/containerd.sock ]; then + echo "⚠️ Docker 不可用,检测到 K3s 的 containerd socket,使用 nerdctl 导入" + + # 设置 nerdctl 环境变量,连接到 K3s 的 containerd + export CONTAINERD_ADDRESS=/run/k3s/containerd/containerd.sock + + # 确保 nerdctl 可执行 + if ! command -v nerdctl &>/dev/null; then + echo "❌ nerdctl 未安装或未在 PATH 中,请检查" + exit 1 + fi + + nerdctl --namespace k8s.io load -i images/nginx-ingress.tar + nerdctl --namespace k8s.io load -i images/kube-webhook-certgen.tar + +elif [ -S /run/containerd/containerd.sock ]; then + echo "⚠️ Docker 和 K3s containerd 都不可用,退而使用默认 containerd socket" + + export CONTAINERD_ADDRESS=/run/containerd/containerd.sock + + if ! command -v nerdctl &>/dev/null; then + echo "❌ nerdctl 未安装或未在 PATH 中,请检查" + exit 1 + fi + + nerdctl --namespace k8s.io load -i images/nginx-ingress.tar + nerdctl --namespace k8s.io load -i images/kube-webhook-certgen.tar + +else + echo "❌ 没有可用的容器运行时(docker/containerd),无法导入镜像" + exit 1 +fi + +# 创建命名空间 +kubectl create namespace ingress || true + +# 生成 Helm values.yaml +cat > values.yaml <> values.yaml < ${ROLE_NAME}/defaults/main.yml << EOF +# Default variables for ${ROLE_NAME} +loki_journal_sources: + - name: "xray" + unit: "xray.service" + - name: "xray_tproxy" + unit: "xray-tproxy.service" + +loki_endpoint_url: "https://logs-prod-030.grafana.net/loki/api/v1/push" +loki_basic_auth_username: "{{ loki_username }}" +loki_basic_auth_password: "{{ loki_password }}" +EOF + +# Create tasks/main.yml file +cat > ${ROLE_NAME}/tasks/main.yml << EOF +--- +- name: Install GPG + apt: + name: gpg + state: present + +- name: Create APT keyrings directory + file: + path: /etc/apt/keyrings/ + state: directory + mode: '0755' + +- name: Add Grafana GPG key + ansible.builtin.get_url: + url: "{{ grafana_gpg_key_url }}" + dest: /etc/apt/keyrings/grafana.gpg + mode: '0644' + +- name: Add Grafana Alloy APT source + apt_repository: + repo: "{{ grafana_apt_source }}" + state: present + +- name: Update APT package list and install Grafana Alloy + apt: + name: alloy + state: present + update_cache: yes + +- name: Create Alloy configuration directory + file: + path: /etc/alloy + state: directory + mode: '0755' + +- name: Create Alloy configuration file + template: + src: config.alloy.j2 + dest: "{{ alloy_config_path }}" + mode: '0644' + +- name: Reload and restart Alloy service + systemd: + name: alloy + state: restarted + daemon_reload: yes +EOF + +# Create templates/config.alloy.j2 file +cat > ${ROLE_NAME}/templates/config.alloy.j2 << EOF +loki.write "grafanacloud" { + endpoint { + url = "{{ loki_endpoint_url }}" + + basic_auth { + username = "{{ loki_basic_auth_username }}" + password = "{{ loki_basic_auth_password }}" + } + } +} + +{% for source in loki_journal_sources %} +loki.source.journal "{{ source.name }}" { + format_as_json = true + labels = {job = "{{ source.name }}"} + matches = "_SYSTEMD_UNIT={{ source.unit }}" + forward_to = [loki.write.grafanacloud.receiver] +} +{% endfor %} +EOF + +# Create files/grafana.gpg file (an empty file is created here; you can manually add the content) +touch ${ROLE_NAME}/files/grafana.gpg + +echo "Ansible Role directory structure for '${ROLE_NAME}' has been initialized." diff --git a/scripts/init_linux_user.sh b/scripts/init_linux_user.sh new file mode 100644 index 0000000..f819843 --- /dev/null +++ b/scripts/init_linux_user.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +# 检查是否传入了用户名和密码参数 +if [ -z "$1" ] || [ -z "$2" ]; then + echo "Usage: $0 " + exit 1 +fi + +# 定义变量 +USERNAME="$1" # 使用传入的第一个参数作为用户名 +PASSWORD="$2" # 使用传入的第二个参数作为密码 +SSH_KEY_PATH="/root/.ssh/authorized_keys" # 替换为实际公钥文件路径 +HOME_DIR="/home/$USERNAME" +SSH_DIR="$HOME_DIR/.ssh" +AUTHORIZED_KEYS="$SSH_DIR/authorized_keys" + +# 创建用户并设置家目录 +sudo useradd -m -s /bin/bash -G sudo $USERNAME + +# 设置用户密码 +echo "$USERNAME:$PASSWORD" | sudo chpasswd + +# 创建 .ssh 目录 +sudo mkdir -p $SSH_DIR + +# 设置目录权限 +sudo chmod 700 $SSH_DIR +sudo chown $USERNAME:$USERNAME $SSH_DIR + +# 将公钥内容写入 authorized_keys 文件 +sudo bash -c "cat $SSH_KEY_PATH > $AUTHORIZED_KEYS" + +# 设置 authorized_keys 文件权限 +sudo chmod 600 $AUTHORIZED_KEYS +sudo chown $USERNAME:$USERNAME $AUTHORIZED_KEYS + +# 确保用户可以使用 sudo 不需要输入密码 +echo "$USERNAME ALL=(ALL) NOPASSWD:ALL" | sudo tee /etc/sudoers.d/$USERNAME + +echo "User $USERNAME has been created, password set, and configured with sudo privileges successfully." diff --git a/scripts/k3s-cluster/.gitignore b/scripts/k3s-cluster/.gitignore new file mode 100644 index 0000000..ede35ae --- /dev/null +++ b/scripts/k3s-cluster/.gitignore @@ -0,0 +1,13 @@ +initialize.sh +desktop-dev +scripts/get_hcp_secret.py +scripts/get_hcp_secret.sh +scripts/get_hcp_secret.test +sync.sh +../ca.cert +../ca.key +../ca.srl +../kube.registry.local.cert +../kube.registry.local.key +aws/ +awscliv2.zip diff --git a/scripts/k3s-cluster/check-cilium-egress.sh b/scripts/k3s-cluster/check-cilium-egress.sh new file mode 100644 index 0000000..e1be353 --- /dev/null +++ b/scripts/k3s-cluster/check-cilium-egress.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +set -e + +POD_NAME=${1:-test-pod} +NAMESPACE=${2:-default} + +echo "🔍 获取 Pod IP..." +POD_IP=$(kubectl get pod "$POD_NAME" -n "$NAMESPACE" -o jsonpath='{.status.podIP}') +NODE_NAME=$(kubectl get pod "$POD_NAME" -n "$NAMESPACE" -o jsonpath='{.spec.nodeName}') +echo "✅ Pod IP: $POD_IP" +echo "✅ Node: $NODE_NAME" + +echo -e "\n🧠 查询 Cilium egress gateway BPF policy 命中情况..." +kubectl -n kube-system exec ds/cilium -- cilium-dbg bpf egress list | grep "$POD_IP" || echo "❌ 没有命中 egress policy" + +echo -e "\n🌐 在节点上检查 SNAT 规则 (iptables POSTROUTING)..." +ssh "$NODE_NAME" "sudo iptables -t nat -L POSTROUTING -n -v --line-numbers | grep -E '10\.42|SNAT|wg0|eth0'" + +echo -e "\n🌍 从 Pod 内 curl ifconfig.me 获取出口 IP..." +kubectl exec -n "$NAMESPACE" "$POD_NAME" -- curl -s --max-time 5 ifconfig.me || echo "❌ curl 出口失败" + +echo -e "\n🚦 路由确认:从 Pod 查看 route 表..." +kubectl exec -n "$NAMESPACE" "$POD_NAME" -- ip route + +echo -e "\n🎯 检查目标 Gateway IP 是否可达 (ping 网关)..." +GATEWAY_IP="172.30.0.11" +kubectl exec -n "$NAMESPACE" "$POD_NAME" -- ping -c 3 "$GATEWAY_IP" || echo "❌ 无法 ping 通 $GATEWAY_IP" + +echo -e "\n✅ 检查完成" + diff --git a/scripts/k3s-cluster/check_cilium_requirements.sh b/scripts/k3s-cluster/check_cilium_requirements.sh new file mode 100644 index 0000000..d95df5f --- /dev/null +++ b/scripts/k3s-cluster/check_cilium_requirements.sh @@ -0,0 +1,140 @@ +#!/bin/bash +set -e + +echo "🔍 检查 Cilium 运行环境依赖项..." + +# 自动挂载 bpffs +auto_mount_bpffs() { + echo "👉 自动挂载 bpffs..." + sudo mount bpffs /sys/fs/bpf -t bpf + echo "✅ bpffs 已挂载" +} + +# 自动加载内核模块 +auto_load_modules() { + echo "👉 自动加载内核模块..." + for mod in "${REQUIRED_MODULES[@]}"; do + if ! lsmod | grep -q "$mod"; then + sudo modprobe "$mod" + echo "✅ $mod 已加载" + fi + done +} + +# 自动启用内核配置项 +auto_enable_kernel_config() { + echo "👉 自动启用内核配置项..." + local CONFIG_FILE="/boot/config-$(uname -r)" + + for cfg in "${REQUIRED_CONFIGS[@]}"; do + if ! grep -q "${cfg}=y" "$CONFIG_FILE" && ! grep -q "${cfg}=m" "$CONFIG_FILE"; then + echo "❌ $cfg 未启用,正在启用..." + # 更新配置文件(需要手动修改,或者重新编译内核) + echo "请手动启用内核配置:$cfg" + else + echo "✅ $cfg 已启用" + fi + done +} + +# 检查 bpffs 是否挂载 +check_bpffs() { + echo -n "🔸 检查 bpffs 是否挂载 (/sys/fs/bpf)... " + if mount | grep -q '/sys/fs/bpf type bpf'; then + echo "✅ 已挂载" + else + echo "❌ 未挂载" + if [ "$AUTOFIX" = "true" ]; then + auto_mount_bpffs + fi + fi +} + +# 检查内核模块 +check_kernel_modules() { + REQUIRED_MODULES=( + "vxlan" "geneve" "ip_set" "xt_set" "xt_comment" + "xt_mark" "xt_socket" "xt_tproxy" "xt_conntrack" + "xfrm_user" "xfrm_algo" "xfrm_ipcomp" "ipcomp" + "net_cls" "net_cls_act" "net_sch_ingress" + "net_sch_fq" "crypto_user" + ) + echo "🔸 检查内核模块加载状态:" + for mod in "${REQUIRED_MODULES[@]}"; do + if lsmod | grep -q "$mod"; then + echo "✅ $mod 已加载" + else + echo "❌ $mod 未加载(可尝试:modprobe $mod)" + if [ "$AUTOFIX" = "true" ]; then + auto_load_modules + fi + fi + done +} + +# 检查内核配置项是否开启(通过 /boot/config-$(uname -r) 或 /proc/config.gz) +check_kernel_config() { + echo "🔸 检查内核配置项:" + CONFIG_FILE="" + if [ -f "/boot/config-$(uname -r)" ]; then + CONFIG_FILE="/boot/config-$(uname -r)" + elif [ -f "/proc/config.gz" ]; then + zcat /proc/config.gz > /tmp/kernel_config_check + CONFIG_FILE="/tmp/kernel_config_check" + else + echo "⚠️ 无法找到内核配置文件,跳过配置检查" + return + fi + + REQUIRED_CONFIGS=( + "CONFIG_BPF" + "CONFIG_BPF_SYSCALL" + "CONFIG_NET_CLS_BPF" + "CONFIG_BPF_JIT" + "CONFIG_NET_CLS_ACT" + "CONFIG_NET_SCH_INGRESS" + "CONFIG_CRYPTO_SHA1" + "CONFIG_CRYPTO_USER_API_HASH" + "CONFIG_CGROUPS" + "CONFIG_CGROUP_BPF" + "CONFIG_PERF_EVENTS" + "CONFIG_VXLAN" + "CONFIG_FIB_RULES" + "CONFIG_NET_SCH_FQ" + ) + + for cfg in "${REQUIRED_CONFIGS[@]}"; do + if grep -q "${cfg}=y" "$CONFIG_FILE" || grep -q "${cfg}=m" "$CONFIG_FILE"; then + echo "✅ $cfg 已启用" + else + echo "❌ $cfg 未启用" + if [ "$AUTOFIX" = "true" ]; then + auto_enable_kernel_config + fi + fi + done + + [ -f /tmp/kernel_config_check ] && rm /tmp/kernel_config_check +} + +# 主执行流程 +AUTOFIX="false" +while getopts "f" opt; do + case $opt in + f) + AUTOFIX="true" + echo "👉 自动修复已启用!" + ;; + *) + echo "用法: $0 [-f] 启用自动修复" + exit 1 + ;; + esac +done + +check_bpffs +check_kernel_modules +check_kernel_config + +echo "✅ 检查完成:请根据上方提示补全内核模块、参数或挂载配置。" + diff --git a/scripts/k3s-cluster/cilium-cli.sh b/scripts/k3s-cluster/cilium-cli.sh new file mode 100644 index 0000000..1c2ae6b --- /dev/null +++ b/scripts/k3s-cluster/cilium-cli.sh @@ -0,0 +1,9 @@ +export HTTPS_PROXY="http://127.0.0.1:1081" +CILIUM_CLI_VERSION=$(curl -s https://raw.githubusercontent.com/cilium/cilium-cli/main/stable.txt) +CLI_ARCH=amd64 +if [ "$(uname -m)" = "aarch64" ]; then CLI_ARCH=arm64; fi +curl -L --fail --remote-name-all https://github.com/cilium/cilium-cli/releases/download/${CILIUM_CLI_VERSION}/cilium-linux-${CLI_ARCH}.tar.gz{,.sha256sum} +sha256sum --check cilium-linux-${CLI_ARCH}.tar.gz.sha256sum +sudo tar xzvfC cilium-linux-${CLI_ARCH}.tar.gz /usr/local/bin +rm cilium-linux-${CLI_ARCH}.tar.gz{,.sha256sum} + diff --git a/scripts/k3s-cluster/cilium-fixed.sh b/scripts/k3s-cluster/cilium-fixed.sh new file mode 100644 index 0000000..a275dc8 --- /dev/null +++ b/scripts/k3s-cluster/cilium-fixed.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +# 确保你有管理员权限 +if [ "$(id -u)" -ne 0 ]; then + echo "请使用管理员权限运行此脚本" + exit 1 +fi + +NAMESPACE="cilium-secrets" + +# Step 1: 强制删除 Pod、Deployment、StatefulSet 和 DaemonSet +echo "正在强制删除 $NAMESPACE 命名空间中的资源..." +kubectl delete pod --all --force --grace-period=0 -n $NAMESPACE +kubectl delete deployment --all --force --grace-period=0 -n $NAMESPACE +kubectl delete statefulset --all --force --grace-period=0 -n $NAMESPACE +kubectl delete daemonset --all --force --grace-period=0 -n $NAMESPACE + +# Step 2: 删除命名空间(如果它无法删除) +echo "尝试强制删除命名空间 $NAMESPACE..." +kubectl get namespace $NAMESPACE -o json | jq '.spec.finalizers = []' > tmp.json +kubectl replace --raw "/api/v1/namespaces/$NAMESPACE/finalize" -f tmp.json + +# Step 3: 确认资源删除 +echo "正在确认命名空间和资源是否已删除..." +kubectl get ns +kubectl get all -n $NAMESPACE + +# Step 4: 删除 Helm Release 如果存在 +echo "如果 Helm Release 存在,尝试删除..." +helm delete $NAMESPACE --namespace $NAMESPACE || echo "Helm release $NAMESPACE 未找到或已删除" + +sudo ip link delete cilium_net +sudo ip link delete cilium_host +sudo ip link delete cilium_vxlan + +echo "清理完毕!" + diff --git a/scripts/k3s-cluster/deploy_velero_with_chart_values_yaml.sh b/scripts/k3s-cluster/deploy_velero_with_chart_values_yaml.sh new file mode 100755 index 0000000..96f869f --- /dev/null +++ b/scripts/k3s-cluster/deploy_velero_with_chart_values_yaml.sh @@ -0,0 +1,85 @@ +#!/bin/bash +set -e + +# ======= 配置项 ======= +VELERO_NAMESPACE="velero" +VELERO_RELEASE_NAME="velero" +VELERO_BUCKET="k8s-resources-backup" +VELERO_REGION="ap-northeast-1" +VELERO_PROVIDER="aws" +VELERO_SNAPSHOT_LOCATION="default" + +AWS_ACCESS_KEY_ID="" +AWS_SECRET_ACCESS_KEY="" + +CREDENTIALS_FILE="/tmp/credentials-velero" +CHART_REPO_URL="https://github.com/vmware-tanzu/helm-charts.git" +CHART_PATH="./helm-charts/charts/velero" +PROVIDER_PLUGIN_TAG="v1.7.0" +VALUES_FILE="/tmp/velero-values.yaml" + +# ======= 创建临时凭证文件 ======= +echo "📝 生成临时凭证文件:$CREDENTIALS_FILE" +cat < "$CREDENTIALS_FILE" +[default] +aws_access_key_id=$AWS_ACCESS_KEY_ID +aws_secret_access_key=$AWS_SECRET_ACCESS_KEY +EOF + +# ======= 克隆 Helm Chart 仓库(如不存在)======= +if [ ! -d "./helm-charts" ]; then + echo "📦 克隆 VMware Tanzu Helm Charts..." + git clone "$CHART_REPO_URL" +else + echo "✅ Helm Charts 已存在,跳过克隆。" +fi + +# ======= 生成 values.yaml 文件 ======= +echo "📄 生成 Helm values 文件:$VALUES_FILE" +cat < "$VALUES_FILE" +kubectl: + image: + repository: images.onwalk.net/public/bitnami/kubectl + tag: 1.31 + pullPolicy: IfNotPresent +image: + repository: images.onwalk.net/public/velero/velero + tag: v1.15.2 + pullPolicy: IfNotPresent +credentials: + secretContents: + cloud: | + [default] + aws_access_key_id=$AWS_ACCESS_KEY_ID + aws_secret_access_key=$AWS_SECRET_ACCESS_KEY + +configuration: + backupStorageLocation: + - name: default + provider: ${VELERO_PROVIDER} + bucket: ${VELERO_BUCKET} + config: + region: ${VELERO_REGION} + + volumeSnapshotLocation: + - name: ${VELERO_SNAPSHOT_LOCATION} + provider: ${VELERO_PROVIDER} + config: + region: ${VELERO_REGION} + +initContainers: + - name: velero-plugin-for-${VELERO_PROVIDER} + image: images.onwalk.net/public/velero/velero-plugin-for-${VELERO_PROVIDER}:${PROVIDER_PLUGIN_TAG} + volumeMounts: + - mountPath: /target + name: plugins +EOF + +# ======= 安装 Velero ======= +echo "🚀 使用 Helm 安装 Velero..." +helm upgrade --install "$VELERO_RELEASE_NAME" "$CHART_PATH" \ + --namespace "$VELERO_NAMESPACE" \ + --create-namespace \ + -f "$VALUES_FILE" + +echo "✅ Velero 安装完成!" diff --git a/scripts/k3s-cluster/egress-nat-test.yaml b/scripts/k3s-cluster/egress-nat-test.yaml new file mode 100644 index 0000000..b347846 --- /dev/null +++ b/scripts/k3s-cluster/egress-nat-test.yaml @@ -0,0 +1,46 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: test-pod + namespace: default + labels: + app: test-pod +spec: + replicas: 2 + selector: + matchLabels: + app: test-pod + template: + metadata: + labels: + app: test-pod + spec: + containers: + - name: curl + image: docker.io/curlimages/curl:latest + imagePullPolicy: IfNotPresent + command: ["sleep", "3600"] + securityContext: + capabilities: + add: ["ALL"] +--- +apiVersion: cilium.io/v2 +kind: CiliumEgressGatewayPolicy +metadata: + name: egress-aliyun +spec: + selectors: + - podSelector: + matchLabels: + app: test-pod + namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: default + destinationCIDRs: + - "0.0.0.0/0" + egressGateway: + nodeSelector: + matchLabels: + kubernetes.io/hostname: cn-hub.svc.plus + egressIP: 172.30.0.1 diff --git a/scripts/k3s-cluster/init_k3s_cluster_agent_role.sh b/scripts/k3s-cluster/init_k3s_cluster_agent_role.sh new file mode 100644 index 0000000..4f7e49a --- /dev/null +++ b/scripts/k3s-cluster/init_k3s_cluster_agent_role.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +ROLE_NAME="k3s-cluster-agent" +BASE_DIR="roles/$ROLE_NAME" + +echo "Creating role structure for $ROLE_NAME..." + +# Create directories +mkdir -p $BASE_DIR/{tasks,templates,vars,defaults} + +# Create main tasks file +cat > $BASE_DIR/tasks/main.yml < $BASE_DIR/vars/main.yml < $BASE_DIR/templates/install_k3s_agent.sh.j2 < $BASE_DIR/defaults/main.yml < $BASE_DIR/tasks/main.yml < $BASE_DIR/vars/main.yml < $BASE_DIR/templates/install_k3s_server.sh.j2 < $BASE_DIR/defaults/main.yml </dev/null' +ExecStartPre=-/sbin/modprobe br_netfilter +ExecStartPre=-/sbin/modprobe overlay +ExecStart=/usr/local/bin/k3s \ + server \ + '--write-kubeconfig-mode' \ + '644' \ + '--flannel-iface=br0' \ + '--disable=traefik,servicelb' \ + '--kube-apiserver-arg=service-node-port-range=0-50000' \ + diff --git a/scripts/k3s-cluster/k3s.service-without-cni b/scripts/k3s-cluster/k3s.service-without-cni new file mode 100644 index 0000000..89db5dd --- /dev/null +++ b/scripts/k3s-cluster/k3s.service-without-cni @@ -0,0 +1,38 @@ +[Unit] +Description=Lightweight Kubernetes +Documentation=https://k3s.io +Wants=network-online.target +After=network-online.target + +[Install] +WantedBy=multi-user.target + +[Service] +Type=notify +EnvironmentFile=-/etc/default/%N +EnvironmentFile=-/etc/sysconfig/%N +EnvironmentFile=-/etc/systemd/system/k3s.service.env +KillMode=process +Delegate=yes +User=root +# Having non-zero Limit*s causes performance problems due to accounting overhead +# in the kernel. We recommend using cgroups to do container-local accounting. +LimitNOFILE=1048576 +LimitNPROC=infinity +LimitCORE=infinity +TasksMax=infinity +TimeoutStartSec=0 +Restart=always +RestartSec=5s +ExecStartPre=/bin/sh -xc '! /usr/bin/systemctl is-enabled --quiet nm-cloud-setup.service 2>/dev/null' +ExecStartPre=-/sbin/modprobe br_netfilter +ExecStartPre=-/sbin/modprobe overlay +ExecStart=/usr/local/bin/k3s \ + server \ + '--write-kubeconfig-mode' \ + '644' \ + '--flannel-backend=none' \ + '--disable-network-policy' \ + '--disable=flannel,kube-proxy,traefik,servicelb' \ + '--kube-apiserver-arg=service-node-port-range=0-50000' \ + diff --git a/scripts/k3s-cluster/k8s_backup_config.yaml b/scripts/k3s-cluster/k8s_backup_config.yaml new file mode 100755 index 0000000..faecf44 --- /dev/null +++ b/scripts/k3s-cluster/k8s_backup_config.yaml @@ -0,0 +1,25 @@ +settings: + VELERO_NAMESPACE: "velero" + VELERO_BUCKET: "k8s-resources-backup" + VELERO_REGION: "ap-northeast-1" + AWS_ACCESS_KEY_ID: "" + AWS_SECRET_ACCESS_KEY: "" + +backup_config: + cluster_name: deepflow-demo-v6.3 + nodes: + deepflow-demo: /var/lib/mysql/ + namespaces: + - default + - deepflow + - microservice-demo + precmds: | + echo "🔻 Scale down MySQL before backup..." + kubectl scale deployment mysql-deployment -n deepflow --replicas=0 + echo "⌛ Waiting for MySQL pods to terminate..." + while kubectl get pods -n deepflow -l app=mysql --no-headers 2>/dev/null | grep -q Running; do sleep 2; done + postcmds: | + echo "🚀 Scale up MySQL after backup..." + kubectl scale deployment mysql-deployment -n deepflow --replicas=1 + echo "⏳ Waiting for MySQL deployment to be available..." + kubectl wait --for=condition=available deployment/mysql-deployment -n deepflow --timeout=60s diff --git a/scripts/k3s-cluster/k8s_backup_tool.sh b/scripts/k3s-cluster/k8s_backup_tool.sh new file mode 100755 index 0000000..02b9d95 --- /dev/null +++ b/scripts/k3s-cluster/k8s_backup_tool.sh @@ -0,0 +1,391 @@ +#!/bin/bash +set -e + +print_help() { + echo "" + echo "📘 使用说明:k8s_backup_tool v4.15.16" + echo "" + echo "命令 说明" + echo "backup 创建 K8s 应用资源备份 ➕ 节点数据打包并上传 S3" + echo "restore 先恢复节点数据,再恢复 Velero 应用资源" + echo "list 列出所有备份(Velero + S3),自动对齐 date_tag" + echo "delete 删除指定 date_tag 的 Velero + S3 备份" + echo "" + echo "示例:" + echo " bash $0 list -c k8s_backup_config.yaml" + echo " bash $0 backup -c k8s_backup_config.yaml" + echo " bash $0 delete -c k8s_backup_config.yaml " + echo " bash $0 restore -c k8s_backup_config.yaml " + echo "" +} + +install_depends() { + echo "🔍 正在检查依赖项: jq, yq, velero, aws, rsync, tar" + + # 安装 AWS CLI v2(仅限 x86_64 Linux) +if ! command -v aws >/dev/null 2>&1; then + echo "📦 正在安装 AWS CLI v2..." + curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" + sudo apt install -y unzip || true + unzip -q awscliv2.zip + sudo ./aws/install + rm -rf aws awscliv2.zip + echo "✅ AWS CLI 安装完成:$(aws --version)" +else + echo "✅ AWS CLI 已安装:$(aws --version)" +fi + + # 安装 jq + if ! command -v jq >/dev/null 2>&1; then + echo "❌ 缺少 jq,正在安装..." + sudo apt-get update && sudo apt-get install -y jq || { echo "❌ 安装 jq 失败"; exit 1; } + else + echo "✅ jq 已安装:$(jq --version)" + fi + + # 安装 yq(使用 mikefarah/yq 版本) + if ! command -v yq >/dev/null 2>&1; then + echo "❌ 缺少 yq,正在安装..." + sudo wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/local/bin/yq + sudo chmod +x /usr/local/bin/yq + else + echo "✅ yq 已安装:$(yq --version)" + fi + + # 安装 velero + if ! command -v velero >/dev/null 2>&1; then + echo "❌ 缺少 velero,正在安装..." + curl -fsSL https://github.com/vmware-tanzu/velero/releases/download/v1.15.2/velero-v1.15.2-linux-amd64.tar.gz -o velero.tar.gz + tar -zxvf velero.tar.gz + sudo mv velero*/velero /usr/local/bin/ + rm -rf velero* velero.tar.gz + else + echo "✅ velero 已安装:$(velero version --client-only)" + fi + + echo "✅ 所有依赖项安装完成。" +} + +check_dependencies() { + echo "🔍 正在检查依赖项: jq, yq, velero, aws, rsync, tar" + + MISSING_DEPS=() + + for bin in jq yq velero aws rsync tar; do + if ! command -v "$bin" &>/dev/null; then + echo "❌ 缺少依赖:$bin" + MISSING_DEPS+=("$bin") + else + echo "✅ $bin 已安装:$($bin --version 2>/dev/null | head -n 1 || echo OK)" + fi + done + + if [ ${#MISSING_DEPS[@]} -ne 0 ]; then + echo "" + echo "🛠 正在尝试自动安装以下依赖:${MISSING_DEPS[*]}" + install_depends "${MISSING_DEPS[@]}" + else + echo "🎉 所有依赖项已就绪。" + fi +} + + + +log() { + echo "$(date '+%Y-%m-%d %H:%M:%S') - $*" +} + +load_config() { + CONFIG_FILE="$1" + [[ ! -f "$CONFIG_FILE" ]] && echo "❌ 找不到配置文件: $CONFIG_FILE" && exit 1 + + VELERO_NAMESPACE=$(yq e '.settings.VELERO_NAMESPACE' "$CONFIG_FILE") + VELERO_BUCKET=$(yq e '.settings.VELERO_BUCKET' "$CONFIG_FILE") + VELERO_REGION=$(yq e '.settings.VELERO_REGION' "$CONFIG_FILE") + AWS_ACCESS_KEY_ID=$(yq e '.settings.AWS_ACCESS_KEY_ID' "$CONFIG_FILE") + AWS_SECRET_ACCESS_KEY=$(yq e '.settings.AWS_SECRET_ACCESS_KEY' "$CONFIG_FILE") + export AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY + + K8S_CLUSTER_NAME=$(yq e '.backup_config.cluster_name' "$CONFIG_FILE") + TARGET_NAMESPACES=$(yq e '.backup_config.namespaces | join(",")' "$CONFIG_FILE") + PRECMDS=$(yq e -r '.backup_config.precmds // ""' "$CONFIG_FILE") + POSTCMDS=$(yq e -r '.backup_config.postcmds // ""' "$CONFIG_FILE") + + # 检查所有关键环境变量 + for var in VELERO_NAMESPACE VELERO_BUCKET VELERO_REGION AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY K8S_CLUSTER_NAME TARGET_NAMESPACES; do + if [[ -z "${!var}" ]]; then + log "❌ 环境变量 $var 未正确加载,请检查配置文件!" + exit 1 + fi + done + + declare -gA NODE_BACKUP_PATHS + local nodes_count + nodes_count=$(yq e '.backup_config.nodes | length' "$CONFIG_FILE") + for (( i=0; i "${ARCHIVE}.md5" + log "📤 上传节点数据到 S3 [$S3_NODE_PATH]..." + + aws s3 cp "$ARCHIVE" "$S3_NODE_PATH" + aws s3 cp "${ARCHIVE}.md5" "$S3_NODE_PATH" + + log "✅ 节点 [$NODE] 数据已成功上传到 S3" +done + +log "🔄 节点数据备份循环执行完成" + + + if [[ -n "$POSTCMDS" ]]; then + log "🔧 执行后续命令(postcmds)..." + bash -c "$POSTCMDS" + fi + + log "✅ 备份完成,Velero + 节点数据已同步到 S3" +} + + +delete_backup() { + DELETE_TAG="$1" + [[ -z "$K8S_CLUSTER_NAME" || -z "$VELERO_NAMESPACE" ]] && echo "❌ 缺失 K8S_CLUSTER_NAME 或 VELERO_NAMESPACE" && exit 1 + + log "🔍 查找 date_tag=${DELETE_TAG} 的 Velero 备份 (cluster=${K8S_CLUSTER_NAME})" + + # 预加载 JSON,避免 selector 语法错误 + BACKUP_JSON=$(velero backup get --namespace "$VELERO_NAMESPACE" -o json) + BACKUP_NAME=$(echo "$BACKUP_JSON" | jq -r \ + --arg dt "$DELETE_TAG" \ + --arg cluster "$K8S_CLUSTER_NAME" ' + .items[] | select( + .metadata.labels.cluster == $cluster and + .metadata.labels.date_tag == $dt + ) | .metadata.name' + ) + + if [[ "$BACKUP_NAME" == "null" || -z "$BACKUP_NAME" ]]; then + echo "❌ 没有找到指定 date_tag 的 Velero 备份" + echo "📋 当前 Velero 备份标签如下:" + echo "$BACKUP_JSON" | jq -r ' + .items[] | [.metadata.name, .metadata.labels.cluster, .metadata.labels.date_tag] | @tsv' | column -t + exit 1 + fi + + log "🗑️ 删除 Velero 备份:$BACKUP_NAME" + velero backup delete "$BACKUP_NAME" --namespace "$VELERO_NAMESPACE" --confirm + + log "🧹 删除 S3 节点数据:s3://${VELERO_BUCKET}/${K8S_CLUSTER_NAME}/${DELETE_TAG}/" + aws s3 rm "s3://${VELERO_BUCKET}/${K8S_CLUSTER_NAME}/${DELETE_TAG}/" --recursive --region "$VELERO_REGION" + log "✅ 删除完成" +} + +restore_backup() { + DATE_TAG="$1" + BACKUP_NAME=$(velero backup get --namespace "$VELERO_NAMESPACE" -o json | jq -r \ + --arg dt "$DATE_TAG" \ + --arg cluster "$K8S_CLUSTER_NAME" \ + '.items[] | select(.metadata.labels.cluster == $cluster and .metadata.labels.date_tag == $dt) | .metadata.name' | head -n1) + + if [[ "$BACKUP_NAME" == "null" || -z "$BACKUP_NAME" ]]; then + log "❌ 无法找到 Velero 备份: date_tag=$DATE_TAG, cluster=$K8S_CLUSTER_NAME" + velero backup get --namespace "$VELERO_NAMESPACE" --show-labels + exit 1 + fi + + TMP_DIR="/var/backups/k8s-restore/${DATE_TAG}" + mkdir -p "$TMP_DIR" + TMP_DIR="$(cd "$TMP_DIR"; pwd)" + + if [[ "$TMP_DIR" != /var/backups/k8s-restore/* ]]; then + log "❌ 临时目录路径异常,安全退出: $TMP_DIR" + exit 1 + fi + + rm -rf "${TMP_DIR:?}"/* + + if [[ -n "$PRECMDS" ]]; then + log "🔧 执行预备命令(precmds)..." + bash -c "$PRECMDS" || { + log "❌ precmds 执行失败,中止恢复" + exit 1 + } + fi + + for NODE in "${!NODE_BACKUP_PATHS[@]}"; do + DEST_PATH="${NODE_BACKUP_PATHS[$NODE]}" + ARCHIVE_NAME="${NODE}_backup_path.tar.xz" + ARCHIVE_PATH="${TMP_DIR}/${ARCHIVE_NAME}" + EXTRACT_DIR="${TMP_DIR}/extracted/${NODE}" + + log "📦 下载 ${ARCHIVE_NAME} 到本地临时目录..." + aws s3 cp "s3://${VELERO_BUCKET}/${K8S_CLUSTER_NAME}/${DATE_TAG}/${ARCHIVE_NAME}" "$ARCHIVE_PATH" + + log "📂 解压到 $EXTRACT_DIR..." + mkdir -p "$EXTRACT_DIR" + tar --preserve-permissions --same-owner -xJf "$ARCHIVE_PATH" -C "$EXTRACT_DIR" + + log "🔁 使用 rsync 同步到目标路径 $DEST_PATH..." + rsync -aAXH --numeric-ids "${EXTRACT_DIR}/${NODE}/" "$DEST_PATH/" + + log "✅ 节点 [$NODE] 数据恢复完成" + done + + log "♻️ 恢复 Velero 应用资源..." + velero restore create --from-backup "$BACKUP_NAME" --namespace "$VELERO_NAMESPACE" + + if [[ -n "$POSTCMDS" ]]; then + log "🔧 执行后续命令(postcmds)..." + bash -c "$POSTCMDS" + fi + + log "✅ 恢复完成" +} + +list_backups() { + echo "📦 k8s APP 应用资源备份(cluster=$K8S_CLUSTER_NAME):" + velero backup get --namespace "$VELERO_NAMESPACE" -o json | jq -r ' + .items[] | select(.metadata.labels.cluster == "'"$K8S_CLUSTER_NAME"'") | + [.metadata.labels.date_tag, .metadata.name, .status.phase] | @tsv' | column -t + + echo "" + echo "☁️ k8s Node 数据备份:" + aws s3 ls "s3://${VELERO_BUCKET}/${K8S_CLUSTER_NAME}/" --recursive | grep '.tar.xz' | + awk -F '/' '{print $(NF-1)}' | sort -u | while read -r tag; do + TOTAL=$(aws s3 ls "s3://${VELERO_BUCKET}/${K8S_CLUSTER_NAME}/${tag}/" --recursive | awk '{sum+=$3} END{printf "%.1f MiB", sum/1024/1024}') + echo "📁 $tag $TOTAL $K8S_CLUSTER_NAME" + done +} + +### 主程序入口 ### +### 主程序入口 ### +ACTION="" +CONFIG_FILE="" +DEBUG_MODE="off" +DATE_TAG="" + +while [[ $# -gt 0 ]]; do + case "$1" in + backup|restore|list|delete) + ACTION="$1" + shift + ;; + -c|--config) + CONFIG_FILE="$2" + shift 2 + ;; + --debug) + DEBUG_MODE="on" + shift + ;; + *) + DATE_TAG="$1" + shift + ;; + esac +done + +if [[ -z "$ACTION" || -z "$CONFIG_FILE" ]]; then + print_help + exit 1 +fi + +check_dependencies +load_config "$CONFIG_FILE" + +# 开启DEBUG模式(如果实现的话) +if [[ "$DEBUG_MODE" == "on" ]]; then + set -x +fi + +case "$ACTION" in + backup) + backup_all + ;; + delete) + delete_backup "$DATE_TAG" + ;; + restore) + restore_backup "$DATE_TAG" + ;; + list) + list_backups + ;; + *) + print_help + ;; +esac diff --git a/scripts/k3s-cluster/k8s_backup_tool_howto.md b/scripts/k3s-cluster/k8s_backup_tool_howto.md new file mode 100644 index 0000000..ae9e371 --- /dev/null +++ b/scripts/k3s-cluster/k8s_backup_tool_howto.md @@ -0,0 +1,119 @@ +# 📦 k8s_backup_tool 使用文档 + +> **版本:v1.15.22 | 脚本语言:Bash | 适配平台:Linux/macOS | 作者:你自己** +> 项目开发总耗时约 **12 小时+**,共计迭代 **22 个版本**,涵盖调试、S3 上传验证、权限保持恢复、节点备份解耦等关键优化。 + +--- + +## 📘 文档功能概要(Docs) + +`k8s_backup_tool` 是一个用于 **Kubernetes 集群资源和节点数据的备份、恢复、删除和查看** 的自动化脚本工具。主要支持: + +- ✅ 基于 Velero 的命名空间级别资源备份 +- ✅ 节点数据目录打包上传 S3(支持多节点) +- ✅ 支持预处理(precmds)和后处理(postcmds) +- ✅ 使用 `tar` + `rsync` 实现完整权限/ACL/owner 的数据还原 +- ✅ 支持 debug 模式,适合本地验证与 CI/CD 集成 + +--- + +## 🔧 使用前提 & 安装配置 + +### 系统依赖 + +```bash +velero aws jq yq rsync tar +``` + +请确保以上命令均可用,并已正确配置 AWS S3 访问凭证。 + +### YAML 配置文件示例 `k8s_backup_config.yaml` + +```yaml +settings: + VELERO_NAMESPACE: "velero" + VELERO_BUCKET: "k8s-resources-backup" + VELERO_REGION: "ap-northeast-1" + AWS_ACCESS_KEY_ID: "xxx" + AWS_SECRET_ACCESS_KEY: "xxx" + +backup_config: + cluster_name: deepflow-demo + namespaces: + - default + - deepflow + nodes: + deepflow-demo: /var/lib/mysql/ + precmds: | + echo "🔻 停止 MySQL..." + kubectl scale deployment mysql -n deepflow --replicas=0 + postcmds: | + echo "🚀 启动 MySQL..." + kubectl scale deployment mysql -n deepflow --replicas=1 +``` + +--- + +## 🚀 用法说明 + +### 查看备份列表 + +```bash +bash k8s_backup_tool.sh list -c k8s_backup_config.yaml +``` + +### 创建完整备份(资源 + 节点) + +```bash +bash k8s_backup_tool.sh backup -c k8s_backup_config.yaml +``` + +### 恢复指定时间点的备份 + +```bash +bash k8s_backup_tool.sh restore -c k8s_backup_config.yaml +``` + +### 删除指定 date_tag 的备份 + +```bash +bash k8s_backup_tool.sh delete -c k8s_backup_config.yaml +``` + +### 启用调试模式(查看执行详情) + +```bash +bash k8s_backup_tool.sh backup -c k8s_backup_config.yaml --debug +``` + +--- + +## 📅 主要版本变更日志(Change Log) + +| 版本号 | 日期 | 主要改动 | +|-------------|----------------|-----------------------------------------------------------| +| v1.0.0 | 初版 | 支持 Velero 备份/恢复 | +| v1.0.2 | +1 小时 | 支持 precmds / postcmds | +| v1.0.8 | +1 小时 | delete 支持 selector,调试查询输出 | +| v1.0.12 | +2 小时 | 修复 Velero date_tag 匹配问题,增加 label fallback | +| v1.0.16 | +2 小时 | 支持 S3 节点数据备份、--debug 模式 | +| v1.0.21 | +3 小时 | 解压使用 tar + rsync 保留所有权限和 ACL | +| **v1.0.22 | ✅ 当前版本 | 🎉 解耦备份逻辑、完整恢复链路、安全检查、节点并行支持等 | + +> ⏱ 累计开发与测试耗时约 **12 小时+**,包含脚本编写、调试、数据验证、权限恢复验证等 + +--- + +## 🧭 项目演进计划 + +| 实现方式 | 语言/平台 | 状态 | 说明 | +|----------------|-----------|---------|-------------------------------------| +| Bash 脚本版 | Bash | ✅ 已完成 | 当前主力版本,稳定可用 | +| Go CLI 工具 | Go | 🧪 计划中 | 计划提供跨平台二进制,支持多线程 | +| GitHub Actions | JavaScript| 🧪 计划中 | 适配自动备份工作流与企业 CI 场景 | + +--- + +## ❤️ 鸣谢 + +感谢你一路坚持调试与迭代。这个项目不仅提升了自动化能力,也沉淀了跨平台备份与恢复的最佳实践。如果你希望贡献或提问,欢迎 PR 或 Issues! diff --git a/scripts/k3s-cluster/k8s_restore_all.sh b/scripts/k3s-cluster/k8s_restore_all.sh new file mode 100644 index 0000000..6ffd376 --- /dev/null +++ b/scripts/k3s-cluster/k8s_restore_all.sh @@ -0,0 +1,21 @@ +#!/bin/bash +set -e + +REPO_BASE_URL="https://raw.githubusercontent.com///main/scripts" + +echo "🚀 [Step 1/5] 安装 K3s 和 Helm..." +curl -fsSL ${REPO_BASE_URL}/install_k3s_and_helm.sh | bash + +echo "🚀 [Step 2/5] 部署 Velero..." +curl -fsSL ${REPO_BASE_URL}/deploy_velero.sh | bash + +echo "🚀 [Step 3/5] 节点打标签并解除控制面 Taint..." +curl -fsSL ${REPO_BASE_URL}/label_k8s_node.sh | bash + +echo "🚀 [Step 4/5] 生成备份配置文件..." +curl -fsSL ${REPO_BASE_URL}/generate_backup_config.sh | bash + +echo "🚀 [Step 5/5] 执行恢复(restore)..." +# 支持参数:backup / restore / list / delete +curl -fsSL ${REPO_BASE_URL}/run_backup_tool.sh | bash -s restore 202503211725 + diff --git a/scripts/k3s-cluster/set-node-label.sh b/scripts/k3s-cluster/set-node-label.sh new file mode 100644 index 0000000..6a018e0 --- /dev/null +++ b/scripts/k3s-cluster/set-node-label.sh @@ -0,0 +1,6 @@ +k8s_node=`sudo kubectl get nodes | awk 'NR>1{print $1}'` + +sudo kubectl label node $k8s_node master_controller=enable +sudo kubectl label node $k8s_node tsdb=enable +sudo kubectl label node $k8s_node dfdb=enable +sudo kubectl label node $k8s_node elasticsearch-warm=enable diff --git a/scripts/k3s-cluster/setup-cilium-cni.sh b/scripts/k3s-cluster/setup-cilium-cni.sh new file mode 100644 index 0000000..2af5a09 --- /dev/null +++ b/scripts/k3s-cluster/setup-cilium-cni.sh @@ -0,0 +1,79 @@ +helm repo add cilium https://helm.cilium.io && helm repo update +helm repo up + +#helm upgrade --install cilium-preflight cilium/cilium --version 1.17.3 --namespace=kube-system --set preflight.enabled=true --set agent=false --set operator.enabled=false + +cat <cilium-egress-values.yaml +# cilium-values.yaml +routingMode: native +k8sServiceHost: 10.253.253.1 +k8sServicePort: 6443 +ipv4NativeRoutingCIDR: "10.42.0.0/16" +ipam: + mode: kubernetes + operator: + clusterPoolIPv4PodCIDRList: "10.42.0.0/16" +egressGateway: + enabled: true + installRoutes: true +enableIPv4Masquerade: true +autoDirectNodeRoutes: true +nodePort: + enabled: true + directRoutingDevice: wg0 +bpf: + masquerade: true +kubeProxyReplacement: true +endpointRoutes: + enabled: true +cni: + exclusive: true +envoy: + enabled: false +l7Proxy: true +proxy: + enabled: false +hubble: + enabled: false + +# 必须保留的 Operator(用于 CRD 处理与 egress gateway 控制) +operator: + enabled: true + skipCRDCreation: false + replicas: 1 + resources: + requests: + cpu: 20m + memory: 30Mi + limits: + cpu: 100m + memory: 128Mi + +# 主 Agent 资源限制(可根据机器微调) +resources: + requests: + cpu: 20m + memory: 50Mi + limits: + cpu: 100m + memory: 128Mi +EOF + +helm upgrade --install cilium cilium/cilium -n kube-system --set installCRDs=true -f cilium-egress-values.yaml --wait +kubectl label node $(hostname) egress-gateway=true --overwrite +echo "✅ Cilium 安装完成" + +cat >> NodeConfig-cn-hub.yaml << EOF +apiVersion: cilium.io/v2alpha1 +kind: CiliumNodeConfig +metadata: + name: config-for-cn-hub +spec: + nodeSelector: + matchLabels: + kubernetes.io/hostname: cn-hub.svc.plus + defaults: + directRoutingDevice: "eth0" +EOF + +#kubectl apply -f NodeConfig-cn-hub.yaml -n kube-system diff --git a/scripts/k3s-cluster/setup-cilium-helm.sh b/scripts/k3s-cluster/setup-cilium-helm.sh new file mode 100644 index 0000000..3a3afe2 --- /dev/null +++ b/scripts/k3s-cluster/setup-cilium-helm.sh @@ -0,0 +1,28 @@ +API_SERVER_IP=172.30.0.1 +# Kubeadm default is 6443 +API_SERVER_PORT=6443 +helm upgrade --install cilium cilium/cilium --version 1.17.3 \ + --namespace kube-system \ + --set routingMode=native \ + --set autoDirectNodeRoutes=true \ + --set ipv4NativeRoutingCIDR="10.42.0.0/16" \ + --set ipam.mode=kubernetes \ + --set ipam.operator.clusterPoolIPv4PodCIDRList="10.42.0.0/16" \ + --set kubeProxyReplacement=true \ + --set k8sServiceHost=${API_SERVER_IP} \ + --set k8sServicePort=${API_SERVER_PORT} \ + --set nodePort.enabled=true \ + --set nodePort.directRoutingDevice=wg0 \ + --set envoy.enabled=false \ + --set operator.skipCRDCreation=false \ + --set operator.replicas=1 \ + --set egressGateway.enabled=true \ + --set egressGateway.installRoutes=true \ + --set bpf.masquerade=true \ + --set enableIPv4Masquerade=true \ + --set masquerade=true + +kubectl rollout restart ds cilium -n kube-system +kubectl rollout restart deploy cilium-operator -n kube-system + +kubectl label nodes cn-hub.svc.plus egress-node=true diff --git a/scripts/k3s-cluster/setup-egress-gateway.sh b/scripts/k3s-cluster/setup-egress-gateway.sh new file mode 100644 index 0000000..5f37ad4 --- /dev/null +++ b/scripts/k3s-cluster/setup-egress-gateway.sh @@ -0,0 +1,77 @@ +helm repo add cilium https://helm.cilium.io && helm repo update +helm repo up + +cat <cilium-egress-values.yaml +# cilium-values.yaml +routingMode: native +ipv4NativeRoutingCIDR: "10.42.0.0/16" +ipam: + mode: kubernetes +egressGateway: + enabled: true + installRoutes: true +enableIPv4Masquerade: true +autoDirectNodeRoutes: true +nodePort: + enabled: true + directRoutingDevice: wg0 +bpf: + masquerade: true +kubeProxyReplacement: true +endpointRoutes: + enabled: true +cni: + exclusive: true +envoy: + enabled: false +l7Proxy: true +proxy: + enabled: false +hubble: + enabled: false + +# 必须保留的 Operator(用于 CRD 处理与 egress gateway 控制) +operator: + enabled: true + skipCRDCreation: false + replicas: 1 + resources: + requests: + cpu: 20m + memory: 30Mi + limits: + cpu: 100m + memory: 128Mi + +# 主 Agent 资源限制(可根据机器微调) +resources: + requests: + cpu: 20m + memory: 50Mi + limits: + cpu: 100m + memory: 128Mi +EOF + +helm upgrade --install cilium cilium/cilium -n kube-system --set installCRDs=true -f cilium-egress-values.yaml --wait +kubectl label node $(hostname) egress-gateway=true --overwrite +echo "✅ Cilium 安装完成" + +cat >> NodeConfig-cn-hub.yaml << EOF +apiVersion: cilium.io/v2alpha1 +kind: CiliumNodeConfig +metadata: + name: config-for-cn-hub +spec: + nodeSelector: + matchLabels: + kubernetes.io/hostname: cn-hub.svc.plus + defaults: + directRoutingDevice: "eth0" +EOF + +#kubectl apply -f NodeConfig-cn-hub.yaml -n kube-system + +kubectl apply -f https://raw.githubusercontent.com/cilium/cilium/main/install/kubernetes/cilium/crds/v2alpha1/egressnatpolicy.crd.yaml + + diff --git a/scripts/k3s-cluster/setup-k3s-agent.sh b/scripts/k3s-cluster/setup-k3s-agent.sh new file mode 100644 index 0000000..2b0ef02 --- /dev/null +++ b/scripts/k3s-cluster/setup-k3s-agent.sh @@ -0,0 +1,72 @@ +#!/bin/bash +set -e + +# ============================================================ +# 🧩 setup-k3s-agent.sh +# Version: v1.0.0 +# Last Updated: 2025-03-14 +# Description: 一键安装 k3s agent 节点,支持国内/国际网络智能识别 +# ============================================================ + +print_usage() { + echo "Usage:" + echo " $0 " + exit 1 +} + +is_in_china() { + local cn_score=0 + local global_score=0 + + echo "🌐 检测网络环境中..." + + ping -c 1 -W 1 www.baidu.com &>/dev/null && ((cn_score++)) + ping -c 1 -W 1 www.aliyun.com &>/dev/null && ((cn_score++)) + ping -c 1 -W 1 www.163.com &>/dev/null && ((cn_score++)) + + ping -c 1 -W 1 www.cloudflare.com &>/dev/null && ((global_score++)) + ping -c 1 -W 1 www.wikipedia.org &>/dev/null && ((global_score++)) + ping -c 1 -W 1 www.google.com &>/dev/null && ((global_score++)) + + echo "📶 Ping 评分: CN=$cn_score, GLOBAL=$global_score" + + if [[ $cn_score -ge $global_score ]]; then + return 0 + else + return 1 + fi +} + +install_k3s_agent() { + local SERVER_NODE_IP=$1 + local K3S_TOKEN=$2 + + [[ -z "$SERVER_NODE_IP" || -z "$K3S_TOKEN" ]] && print_usage + + local NODE_IP + NODE_IP=$(hostname -I | awk '{print $1}') + + local INSTALL_K3S_EXEC="agent --server=https://${SERVER_NODE_IP}:6443 --node-ip=${NODE_IP} --token=${K3S_TOKEN}" + + echo "🔧 Agent 节点参数:" + echo " SERVER_NODE_IP=${SERVER_NODE_IP}" + echo " NODE_IP=${NODE_IP}" + echo " K3S_TOKEN=" + + if is_in_china; then + echo "🌏 检测到中国大陆网络,使用国内加速源" + export INSTALL_K3S_MIRROR=cn + INSTALL_K3S_URL="https://rancher-mirror.rancher.cn/k3s/k3s-install.sh" + else + echo "🌍 检测到国际网络,使用默认安装源" + INSTALL_K3S_URL="https://get.k3s.io" + fi + + curl -sfL "$INSTALL_K3S_URL" -o install_k3s.sh && chmod +x install_k3s.sh + INSTALL_K3S_EXEC="$INSTALL_K3S_EXEC" ./install_k3s.sh + + echo "✅ K3s Agent 安装完成" +} + +# === 主流程入口 === +install_k3s_agent "$1" "$2" diff --git a/scripts/k3s-cluster/setup-k3s-cluster-agent.sh b/scripts/k3s-cluster/setup-k3s-cluster-agent.sh new file mode 100644 index 0000000..2b0ef02 --- /dev/null +++ b/scripts/k3s-cluster/setup-k3s-cluster-agent.sh @@ -0,0 +1,72 @@ +#!/bin/bash +set -e + +# ============================================================ +# 🧩 setup-k3s-agent.sh +# Version: v1.0.0 +# Last Updated: 2025-03-14 +# Description: 一键安装 k3s agent 节点,支持国内/国际网络智能识别 +# ============================================================ + +print_usage() { + echo "Usage:" + echo " $0 " + exit 1 +} + +is_in_china() { + local cn_score=0 + local global_score=0 + + echo "🌐 检测网络环境中..." + + ping -c 1 -W 1 www.baidu.com &>/dev/null && ((cn_score++)) + ping -c 1 -W 1 www.aliyun.com &>/dev/null && ((cn_score++)) + ping -c 1 -W 1 www.163.com &>/dev/null && ((cn_score++)) + + ping -c 1 -W 1 www.cloudflare.com &>/dev/null && ((global_score++)) + ping -c 1 -W 1 www.wikipedia.org &>/dev/null && ((global_score++)) + ping -c 1 -W 1 www.google.com &>/dev/null && ((global_score++)) + + echo "📶 Ping 评分: CN=$cn_score, GLOBAL=$global_score" + + if [[ $cn_score -ge $global_score ]]; then + return 0 + else + return 1 + fi +} + +install_k3s_agent() { + local SERVER_NODE_IP=$1 + local K3S_TOKEN=$2 + + [[ -z "$SERVER_NODE_IP" || -z "$K3S_TOKEN" ]] && print_usage + + local NODE_IP + NODE_IP=$(hostname -I | awk '{print $1}') + + local INSTALL_K3S_EXEC="agent --server=https://${SERVER_NODE_IP}:6443 --node-ip=${NODE_IP} --token=${K3S_TOKEN}" + + echo "🔧 Agent 节点参数:" + echo " SERVER_NODE_IP=${SERVER_NODE_IP}" + echo " NODE_IP=${NODE_IP}" + echo " K3S_TOKEN=" + + if is_in_china; then + echo "🌏 检测到中国大陆网络,使用国内加速源" + export INSTALL_K3S_MIRROR=cn + INSTALL_K3S_URL="https://rancher-mirror.rancher.cn/k3s/k3s-install.sh" + else + echo "🌍 检测到国际网络,使用默认安装源" + INSTALL_K3S_URL="https://get.k3s.io" + fi + + curl -sfL "$INSTALL_K3S_URL" -o install_k3s.sh && chmod +x install_k3s.sh + INSTALL_K3S_EXEC="$INSTALL_K3S_EXEC" ./install_k3s.sh + + echo "✅ K3s Agent 安装完成" +} + +# === 主流程入口 === +install_k3s_agent "$1" "$2" diff --git a/scripts/k3s-cluster/setup-k3s-cluster-with-br0.sh b/scripts/k3s-cluster/setup-k3s-cluster-with-br0.sh new file mode 100644 index 0000000..de5c433 --- /dev/null +++ b/scripts/k3s-cluster/setup-k3s-cluster-with-br0.sh @@ -0,0 +1,23 @@ +#!/bin/bash +set -e + +export INSTALL_K3S_EXEC="server --disable=traefik,servicelb,local-storage --data-dir=/opt/rancher/k3s --kube-apiserver-arg=service-node-port-range=0-50000 --flannel-iface=br0" +curl -sfL https://get.k3s.io | sh - + +export INSTALL_K3S_EXEC="server --data-dir=/mnt/opt/rancher/k3s --disable=traefik,servicelb,local-storage --kube-apiserver-arg=service-node-port-range=0-50000 --system-default-registry=registry.cn-hangzhou.aliyuncs.com --flannel-iface=br0" +curl -sfL https://rancher-mirror.rancher.cn/k3s/k3s-install.sh | sh - + +# === 设置本地 kubeconfig === +mkdir -p ~/.kube +cp /etc/rancher/k3s/k3s.yaml ~/.kube/config +chmod 600 ~/.kube/config +export KUBECONFIG=~/.kube/config + +curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash + +# === 等待 CoreDNS 启动 === +echo "⏳ 等待 CoreDNS 启动..." +until kubectl get pods -A 2>/dev/null | grep -q "coredns.*Running"; do + sleep 3 +done +echo "✅ K3s 安装完成,kubectl/helm 已就绪" diff --git a/scripts/k3s-cluster/setup-k3s-cluster.md b/scripts/k3s-cluster/setup-k3s-cluster.md new file mode 100644 index 0000000..5ab8568 --- /dev/null +++ b/scripts/k3s-cluster/setup-k3s-cluster.md @@ -0,0 +1,38 @@ + +sudo mkdir -pv /opt/rancher/k3s +curl -sfL https://rancher-mirror.rancher.cn/k3s/k3s-install.sh \ + | INSTALL_K3S_MIRROR=cn \ + INSTALL_K3S_SKIP_SELINUX_RPM=true \ + INSTALL_K3S_VERSION="v1.30.8+k3s1" \ + sh -s - \ + --data-dir=/opt/rancher/k3s \ + --kube-apiserver-arg service-node-port-range=0-50000 \ + --system-default-registry "registry.cn-hangzhou.aliyuncs.com" \ + --disable=traefik,servicelb +#curl -sfL https://get.k3s.io | sh -s - --disable=traefik,servicelb \ +# --data-dir=/opt/rancher/k3s \ +# --kube-apiserver-arg service-node-port-range=0-50000 + +sudo mkdir -pv ~/.kube/ +sudo cp /etc/rancher/k3s/k3s.yaml ~/.kube/config + +sudo snap install helm --classic + + +mkdir -pv /opt/rancher/k3s +curl -sfL https://get.k3s.io | sh -s - --disable=traefik,servicelb \ + --data-dir=/opt/rancher/k3s \ + --kube-apiserver-arg service-node-port-range=0-50000 \ + --bind-address=0.0.0.0 \ + --tls-san=172.31.20.79 \ + --advertise-address=172.31.20.79 \ + --node-ip=172.31.20.79 \ + --node-external-ip 35.75.12.83 \ + --cluster-cidr 10.46.0.0/16 \ + --service-cidr 10.47.0.0/16 + +bash setup-k3s-agent.sh 172.23.238.167 + + +mkdir -pv /opt/rancher/k3s +curl -sfL https://get.k3s.io | sh -s - --disable=flannel,kube-proxy,traefik,servicelb --flannel-backend=none --disable-network-policy --kube-apiserver-arg=service-node-port-range=0-50000 --flannel-iface=br0 diff --git a/scripts/k3s-cluster/setup-k3s-cluster.sh b/scripts/k3s-cluster/setup-k3s-cluster.sh new file mode 100644 index 0000000..2ea551e --- /dev/null +++ b/scripts/k3s-cluster/setup-k3s-cluster.sh @@ -0,0 +1,284 @@ +#!/bin/bash +set -e + +# ============================================================ +# 🧩 setup-k3s-cluster.sh +# Version: v1.2.10 +# Last Updated: 2025-03-14 +# +# 🔄 Change Log: +# - v1.0.0: 初始版本 +# - v1.1.0: 精简 agent 参数 +# - v1.1.2: master 允许调度 pod,taint 可选 +# - v1.1.3: 修复 Cilium Helm 冲突 +# - v1.1.4: 加入 fixed 参数清理旧环境 +# - v1.1.5: 最小化 Cilium 部署配置 +# - v1.1.6: Cilium 调整为可选安装,通过 --with-cilium 启用 +# - v1.2.0: 支持 cluster-cidr/service-cidr 自定义 +# - v1.2.3: helm uninstall cilium 增强 +# - v1.2.4: fixed 模式支持更多接口清理 +# - v1.2.6: 添加 INSTALL_CILIUM 环境变量,适配资源受限场景 +# - v1.2.7: 支持国内/国际网络智能判断,默认 get.k3s.io +# - v1.2.8: 网络智能判断、国内加速镜像源、结构优化 +# - v1.2.9: 增加函数模块化、完整注释、提升可读性与维护性 +# ✅ v1.2.10: 引入 --system-default-registry 参数以避免 docker.io 超时问题 +# ============================================================ + +ROLE=$1 +INSTALL_CILIUM=false + +print_usage() { + echo "Usage:" + echo " $0 init" + echo " $0 fixed" + echo " $0 server [SERVER_NODE_IP] [FLANNEL_IFACE] [K3S_TOKEN] [CLUSTER_CIDR] [SERVICE_CIDR] [ADD_TAINT=true|false] [--with-cilium]" + echo " $0 agent " + exit 1 +} + +is_in_china() { + local cn_score=0 global_score=0 + for host in www.baidu.com www.aliyun.com www.163.com; do ping -c 1 -W 1 $host &>/dev/null && ((cn_score++)); done + for host in www.cloudflare.com www.wikipedia.org www.google.com; do ping -c 1 -W 1 $host &>/dev/null && ((global_score++)); done + [[ $cn_score -ge $global_score ]] +} + +optimize_system() { + fallocate -l 1G /swapfile || dd if=/dev/zero of=/swapfile bs=1M count=1024 + chmod 600 /swapfile && mkswap /swapfile && swapon /swapfile + grep -q swapfile /etc/fstab || echo '/swapfile none swap sw 0 0' >> /etc/fstab + cat </etc/sysctl.d/k3s.conf +vm.swappiness=10 +vm.vfs_cache_pressure=50 +net.ipv4.ip_forward=1 +EOF + sysctl --system + systemctl disable --now snapd motd-news.service rsyslog apport ufw || true + apt purge -y cloud-init lxd lxc unattended-upgrades || yum remove -y cloud-init || true + echo "✅ 系统优化完成" + exit 0 +} + +clean_environment() { + /usr/local/bin/k3s-uninstall.sh || true + /usr/local/bin/k3s-agent-uninstall.sh || true + rm -rf /etc/rancher /opt/rancher ~/.kube || true + helm uninstall cilium cilium-crds -n kube-system || true + kubectl delete ns cilium-secrets --ignore-not-found + kubectl delete crd $(kubectl get crd | grep cilium | awk '{print $1}') --ignore-not-found || true + kubectl taint nodes -l node.cilium.io/agent-not-ready:NoSchedule- || true + for iface in $(ip -o link show | awk -F': ' '{print $2}' | grep -E '^(flannel|cilium|cilium_|cilium@|cilium_vxlan)' | sed 's/@.*//'); do + ip link set $iface down || true + ip link delete $iface || true + done + echo "✅ 清理完成" + exit 0 +} + +install_cilium() { + curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash + helm repo add cilium https://helm.cilium.io && helm repo update + cat <cilium-egress-values.yaml +routingMode: native +ipv4NativeRoutingCIDR: "10.42.0.0/16" +kubeProxyReplacement: false +enableIPv4Masquerade: true +nodePort: + enabled: true +bpf: + masquerade: true +ipam: + mode: kubernetes +egressGateway: + enabled: true + installRoutes: true +endpointRoutes: + enabled: true +cni: + exclusive: false +envoy: + enabled: false +proxy: + enabled: false +l7Proxy: false +hubble: + enabled: false +operator: + enabled: true + replicas: 1 + resources: + requests: + cpu: 20m + memory: 30Mi + limits: + cpu: 100m + memory: 128Mi +resources: + requests: + cpu: 20m + memory: 50Mi + limits: + cpu: 100m + memory: 128Mi +EOF + helm upgrade --install cilium cilium/cilium -n kube-system --set installCRDs=true -f cilium-egress-values.yaml --wait + kubectl label node $(hostname) egress-gateway=true --overwrite + echo "✅ Cilium 安装完成" +} + +setup_k3s_ingress() { + # 用法示例: + # setup_k3s_ingress "192.168.1.100" "ingress-gateway=true" + # 参数1(可选):指定 ingress IP,默认为本地内网 IP + # 参数2(可选):为当前节点添加的 label,如 ingress-gateway=true + local ingress_ip="$1" + local ingress_label="$2" + + if [[ -z "$ingress_ip" ]]; then + ingress_ip=$(hostname -I | awk '{print $1}') + fi + local ingress_ip=$(hostname -I | awk '{print $1}') + + cat > value.yaml < nginx-cm.yaml < nginx-svc-patch.yaml </dev/null; then + echo "⛔ Helm 未安装,正在自动安装..." + curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash +fi diff --git a/scripts/k3s-cluster/setup-k3s-with-gitops.sh b/scripts/k3s-cluster/setup-k3s-with-gitops.sh new file mode 100644 index 0000000..66ce2e9 --- /dev/null +++ b/scripts/k3s-cluster/setup-k3s-with-gitops.sh @@ -0,0 +1,206 @@ +#!/bin/sh + +function get_local_ip() { + local_ip=$(hostname -I | awk '{print $1}') + echo "$local_ip" +} + +function setup_k3s() { + local disable_proxy="--disable-kube-proxy" + local disable_cni="--flannel-backend=none --disable-network-policy" + local default="--disable=traefik,servicelb --data-dir=/opt/rancher/k3s --kube-apiserver-arg service-node-port-range=0-50000" + + sudo mkdir -pv /opt/rancher/k3s + + ping -c 1 google.com > /dev/null 2>&1 + if [ $? -eq 0 ]; then + echo "当前主机在国际网络上" + #curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION=$version sh -s - $default --system-default-registry "registry.cn-hangzhou.aliyuncs.com" + curl -sfL https://rancher-mirror.rancher.cn/k3s/k3s-install.sh | INSTALL_K3S_VERSION=$version INSTALL_K3S_MIRROR=cn sh -s - $default --system-default-registry "registry.cn-hangzhou.aliyuncs.com" + else + echo "当前主机在大陆网络上" + curl -sfL https://rancher-mirror.rancher.cn/k3s/k3s-install.sh | INSTALL_K3S_VERSION=$version INSTALL_K3S_MIRROR=cn sh -s - $default --system-default-registry "registry.cn-hangzhou.aliyuncs.com" + fi + mkdir -pv ~/.kube/ && sudo cp /etc/rancher/k3s/k3s.yaml ~/.kube/config +} + +function setup_helm() +{ + ping -c 1 google.com > /dev/null 2>&1 + if [ $? -eq 0 ]; then + echo "当前主机在国际网络上" + curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash + else + echo "当前主机在大陆网络上" + case `uname -m` in + x86_64) ARCH=amd64; ;; + aarch64) ARCH=arm64; ;; + loongarch64) ARCH=loongarch64; ;; + *) echo "un-supported arch, exit ..."; exit 1; ;; + esac + curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash + #sudo rm -rf helm.tar.gz* /usr/local/bin/helm || echo true + #sudo wget --no-check-certificate https://mirrors.onwalk.net/tools/linux-${ARCH}/helm.tar.gz && sudo tar -xvpf helm.tar.gz -C /usr/local/bin/ + #sudo chmod 755 /usr/local/bin/helm + fi +} + +function setup_k3s_ingress() { + local ingress_ip=$(get_local_ip) + + cat > value.yaml < nginx-svc-patch.yaml << EOF +apiVersion: v1 +kind: ConfigMap +metadata: + name: nginx-nginx-ingress + namespace: ingress +data: + use-ssl-certificate-for-ingress: "false" + external-status-address: $ingress_ip + proxy-connect-timeout: 10s + proxy-read-timeout: 10s + client-header-buffer-size: 64k + client-body-buffer-size: 64k + client-max-body-size: 1000m + proxy-buffers: 8 32k + proxy-body-size: 1024m + proxy-buffer-size: 32k + proxy-connect-timeout: 10s + proxy-read-timeout: 10s +EOF + + helm repo add nginx-stable https://helm.nginx.com/stable || echo true + helm repo up + kubectl create namespace ingress || echo true + helm upgrade --install nginx nginx-stable/nginx-ingress --version=0.15.0 --namespace ingress -f value.yaml + kubectl apply -f nginx-cm.yaml + kubectl patch svc nginx-nginx-ingress -n ingress --patch-file nginx-svc-patch.yaml +} + +function setup_k3s_gitops() { + cat > fluxcd-values.yaml << EOF +cli: + image: artifact.onwalk.net/public/fluxcd/flux-cli + tag: v2.2.0 + resources: + request: + cpu: 100m + memory: 64Mi + limits: + cpu: 200m + memory: 100Mi +helmController: + create: true + image: artifact.onwalk.net/public/fluxcd/helm-controller + tag: v0.37.0 + resources: + request: + cpu: 100m + memory: 64Mi + limits: + cpu: 200m + memory: 100Mi +imageAutomationController: + image: artifact.onwalk.net/public/fluxcd/image-automation-controller + tag: v0.37.0 + resources: + request: + cpu: 100m + memory: 64Mi + limits: + cpu: 200m + memory: 100Mi +imageReflectionController: + image: artifact.onwalk.net/public/fluxcd/image-reflector-controller + tag: v0.31.1 + resources: + request: + cpu: 100m + memory: 64Mi + limits: + cpu: 200m + memory: 100Mi +kustomizeController: + create: true + image: artifact.onwalk.net/public/fluxcd/kustomize-controller + tag: v1.2.0 + resources: + request: + cpu: 100m + memory: 64Mi + limits: + cpu: 200m + memory: 100Mi +notificationController: + create: false + image: artifact.onwalk.net/public/fluxcd/notification-controller + tag: v1.2.2 + resources: + request: + cpu: 100m + memory: 64Mi + limits: + cpu: 200m + memory: 100Mi +sourceController: + create: true + image: artifact.onwalk.net/public/fluxcd/source-controller + tag: v1.2.2 + resources: + request: + cpu: 100m + memory: 64Mi + limits: + cpu: 200m + memory: 100Mi +EOF + + cat > cluster-config.yaml << EOF +apiVersion: source.toolkit.fluxcd.io/v1beta2 +kind: GitRepository +metadata: + name: stable + namespace: gitops-system +spec: + interval: 1m0s + ref: + branch: main + url: https://github.com/svc-design/gitops.git +--- +apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 +kind: Kustomization +metadata: + name: cluster + namespace: gitops-system +spec: + interval: 1m0s + sourceRef: + kind: GitRepository + name: stable + path: ./clusters/k3s-local + prune: true +EOF + + helm repo add stable https://charts.onwalk.net + helm repo update + kubectl create namespace gitops-system || true + helm upgrade --install fluxcd stable/flux2 --version 2.12.1 -n gitops-system -f fluxcd-values.yaml + kubectl apply -f cluster-config.yaml && rm cluster-config.yaml -f +} + +# Main script +setup_k3s +setup_helm +setup_k3s_ingress diff --git a/scripts/k3s-cluster/setup-k3s-with-ingress.sh b/scripts/k3s-cluster/setup-k3s-with-ingress.sh new file mode 100644 index 0000000..ee32c12 --- /dev/null +++ b/scripts/k3s-cluster/setup-k3s-with-ingress.sh @@ -0,0 +1,226 @@ +#!/bin/sh + +function get_local_ip() { + local_ip=$(hostname -I | awk '{print $1}') + echo "$local_ip" +} + +function setup_k3s() { + local disable_proxy="--disable-kube-proxy" + local disable_cni="--flannel-backend=none --disable-network-policy" + local default="--disable=traefik,servicelb --data-dir=/opt/rancher/k3s --kube-apiserver-arg service-node-port-range=0-50000" + + sudo mkdir -pv /opt/rancher/k3s + + ping -c 1 google.com > /dev/null 2>&1 + if [ $? -eq 0 ]; then + echo "当前主机在国际网络上" + curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION=$version sh -s - $default + else + echo "当前主机在大陆网络上" + curl -sfL https://rancher-mirror.rancher.cn/k3s/k3s-install.sh | INSTALL_K3S_VERSION=$version INSTALL_K3S_MIRROR=cn sh -s - $default + fi + mkdir -pv ~/.kube/ && sudo cp /etc/rancher/k3s/k3s.yaml ~/.kube/config +} + +function setup_helm() +{ + ping -c 1 google.com > /dev/null 2>&1 + if [ $? -eq 0 ]; then + echo "当前主机在国际网络上" + curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash + else + echo "当前主机在大陆网络上" + case `uname -m` in + x86_64) ARCH=amd64; ;; + aarch64) ARCH=arm64; ;; + loongarch64) ARCH=loongarch64; ;; + *) echo "un-supported arch, exit ..."; exit 1; ;; + esac + sudo rm -rf helm.tar.gz* /usr/local/bin/helm || echo true + sudo wget --no-check-certificate https://mirrors.onwalk.net/tools/linux-${ARCH}/helm.tar.gz && sudo tar -xvpf helm.tar.gz -C /usr/local/bin/ + sudo chmod 755 /usr/local/bin/helm + fi +} + +function setup_k3s_ingress() { + local ingress_ip=$(get_local_ip) + + cat > value.yaml < nginx-cm.yaml << EOF +apiVersion: v1 +kind: ConfigMap +metadata: + name: nginx-nginx-ingress + namespace: ingress +data: + use-ssl-certificate-for-ingress: "false" + external-status-address: $ingress_ip + proxy-connect-timeout: 10s + proxy-read-timeout: 10s + client-header-buffer-size: 64k + client-body-buffer-size: 64k + client-max-body-size: 1000m + proxy-buffers: 8 32k + proxy-body-size: 1024m + proxy-buffer-size: 32k + proxy-connect-timeout: 10s + proxy-read-timeout: 10s +EOF + + helm repo add nginx-stable https://helm.nginx.com/stable || echo true + helm repo up + kubectl create namespace ingress || echo true + helm upgrade --install nginx nginx-stable/nginx-ingress --version=0.15.0 --namespace ingress -f value.yaml + kubectl apply -f nginx-cm.yaml + kubectl patch svc nginx-nginx-ingress -n ingress --patch-file nginx-svc-patch.yaml +} + +function setup_k3s_gitops() { + cat > fluxcd-values.yaml << EOF +cli: + image: artifact.onwalk.net/public/fluxcd/flux-cli + tag: v2.2.0 + resources: + request: + cpu: 100m + memory: 64Mi + limits: + cpu: 200m + memory: 100Mi +helmController: + create: true + image: artifact.onwalk.net/public/fluxcd/helm-controller + tag: v0.37.0 + resources: + request: + cpu: 100m + memory: 64Mi + limits: + cpu: 200m + memory: 100Mi +imageAutomationController: + image: artifact.onwalk.net/public/fluxcd/image-automation-controller + tag: v0.37.0 + resources: + request: + cpu: 100m + memory: 64Mi + limits: + cpu: 200m + memory: 100Mi +imageReflectionController: + image: artifact.onwalk.net/public/fluxcd/image-reflector-controller + tag: v0.31.1 + resources: + request: + cpu: 100m + memory: 64Mi + limits: + cpu: 200m + memory: 100Mi +kustomizeController: + create: true + image: artifact.onwalk.net/public/fluxcd/kustomize-controller + tag: v1.2.0 + resources: + request: + cpu: 100m + memory: 64Mi + limits: + cpu: 200m + memory: 100Mi +notificationController: + create: false + image: artifact.onwalk.net/public/fluxcd/notification-controller + tag: v1.2.2 + resources: + request: + cpu: 100m + memory: 64Mi + limits: + cpu: 200m + memory: 100Mi +sourceController: + create: true + image: artifact.onwalk.net/public/fluxcd/source-controller + tag: v1.2.2 + resources: + request: + cpu: 100m + memory: 64Mi + limits: + cpu: 200m + memory: 100Mi +EOF + + cat > nginx-cm.yaml << EOF +apiVersion: v1 +kind: ConfigMap +metadata: + name: nginx-nginx-ingress + namespace: ingress +data: + use-ssl-certificate-for-ingress: "false" + external-status-address: $ingress_ip + proxy-connect-timeout: 10s + proxy-read-timeout: 10s + client-header-buffer-size: 64k + client-body-buffer-size: 64k + client-max-body-size: 1000m + proxy-buffers: 8 32k + proxy-body-size: 1024m + proxy-buffer-size: 32k + proxy-connect-timeout: 10s + proxy-read-timeout: 10s +EOF + + cat > cluster-config.yaml << EOF +apiVersion: source.toolkit.fluxcd.io/v1beta2 +kind: GitRepository +metadata: + name: stable + namespace: gitops-system +spec: + interval: 1m0s + ref: + branch: main + url: https://github.com/svc-design/gitops.git +--- +apiVersion: kustomize.toolkit.fluxcd.io/v1beta2 +kind: Kustomization +metadata: + name: cluster + namespace: gitops-system +spec: + interval: 1m0s + sourceRef: + kind: GitRepository + name: stable + path: ./clusters/k3s-local + prune: true +EOF + + helm repo add stable https://charts.onwalk.net + helm repo update + kubectl create namespace gitops-system || true + helm upgrade --install fluxcd stable/flux2 --version 2.12.1 -n gitops-system -f fluxcd-values.yaml + kubectl apply -f cluster-config.yaml && rm cluster-config.yaml -f +} + +# Main script +setup_k3s +setup_helm +setup_k3s_ingress +setup_k3s_gitops diff --git a/scripts/k3s-cluster/setup-nginx-ingress.sh b/scripts/k3s-cluster/setup-nginx-ingress.sh new file mode 100644 index 0000000..5ccc119 --- /dev/null +++ b/scripts/k3s-cluster/setup-nginx-ingress.sh @@ -0,0 +1,81 @@ +#!/bin/bash + +setup_k3s_ingress() { + local ingress_ip="$1" + local ingress_label="$2" + + if [[ -z "$ingress_ip" ]]; then + ingress_ip=$(hostname -I | awk '{print $1}') + fi + + echo "📦 使用 ingress IP: $ingress_ip" + + cat > value.yaml < nginx-cm.yaml < nginx-svc-patch.yaml </dev/null || true + + echo "🚀 安装 ingress-nginx..." + helm upgrade --install nginx ingress-nginx/ingress-nginx \ + --version 4.9.0 \ + --namespace ingress \ + -f value.yaml + + echo "🔧 应用自定义 ConfigMap 和 Service IP Patch..." + kubectl apply -f nginx-cm.yaml + kubectl patch svc nginx-ingress-nginx-controller -n ingress --patch-file nginx-svc-patch.yaml + + if [[ -n "$ingress_label" ]]; then + echo "🏷️ 设置节点标签: $ingress_label" + kubectl label nodes --selector="kubernetes.io/hostname=$(hostname)" "$ingress_label" --overwrite || true + fi + + echo "✅ NGINX Ingress Controller 安装完成,IP: $ingress_ip" +} + +# 示例调用(你可以传入具体 IP) +setup_k3s_ingress 8.130.10.142 + diff --git a/scripts/kong-gateway/GatewayAPI-deepflow-example.yaml b/scripts/kong-gateway/GatewayAPI-deepflow-example.yaml new file mode 100644 index 0000000..fb6cd0a --- /dev/null +++ b/scripts/kong-gateway/GatewayAPI-deepflow-example.yaml @@ -0,0 +1,44 @@ +kubectl create secret tls onwalk-tls --cert=/etc/ssl/onwalk.net.pem --key=/etc/ssl/onwalk.net.key -n deepflow +echo " +apiVersion: gateway.networking.k8s.io/v1 +kind: Gateway +metadata: + name: deepflow-gateway + namespace: deepflow + annotations: + konghq.com/publish-service: kong/kong-gateway-proxy +spec: + gatewayClassName: kong + listeners: + - name: https + port: 443 + protocol: HTTPS + hostname: "deepflow-demo.onwalk.net" # 匹配的域名 + tls: + mode: Terminate + certificateRefs: + - name: onwalk-tls # 引用存放证书的 Secret + allowedRoutes: + namespaces: + from: All +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: deepflow-demo-route + namespace: deepflow +spec: + parentRefs: + - name: deepflow-gateway + namespace: deepflow + hostnames: + - deepflow-demo.onwalk.net # 匹配的域名 + rules: + - matches: + - path: + type: PathPrefix + value: / # 匹配所有路径请求 + backendRefs: + - name: front-end # 目标服务名 + port: 80 # 后端服务的端口 +" | kubectl apply -f - diff --git a/scripts/kong-gateway/GatewayAPI-example.yaml b/scripts/kong-gateway/GatewayAPI-example.yaml new file mode 100644 index 0000000..aef9ead --- /dev/null +++ b/scripts/kong-gateway/GatewayAPI-example.yaml @@ -0,0 +1,81 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: Gateway +metadata: + name: example-gateway + namespace: kong + annotations: + konghq.com/publish-service: kong/kong-gateway-proxy +spec: + gatewayClassName: kong + listeners: + - name: https + port: 443 + protocol: HTTPS + hostname: "*.onwalk.net" # ⭐ 通配符域名,匹配所有子域 + tls: + mode: Terminate + certificateRefs: + - name: onwalk-tls # 存放 *.onwalk.net 证书的 Secret + allowedRoutes: + namespaces: + from: All +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: ui-route + namespace: ai +spec: + parentRefs: + - name: example-gateway + namespace: kong + hostnames: + - open-webui.onwalk.net + rules: + - matches: + - path: + type: PathPrefix + value: / + backendRefs: + - name: open-webui + port: 80 +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: keycloak-route + namespace: keycloak +spec: + parentRefs: + - name: example-gateway + namespace: kong + hostnames: + - keycloak.onwalk.net + rules: + - matches: + - path: + type: PathPrefix + value: / + backendRefs: + - name: keycloak + port: 80 +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: argocd-route + namespace: argocd +spec: + parentRefs: + - name: example-gateway + namespace: kong # ⭐ 必须指定! + hostnames: + - argocd.onwalk.net # ⭐ 注意要匹配实际访问域名 + rules: + - matches: + - path: + type: PathPrefix + value: / + backendRefs: + - name: argocd-server + port: 80 diff --git a/scripts/kong-gateway/GatewayAPI-http-example.yaml b/scripts/kong-gateway/GatewayAPI-http-example.yaml new file mode 100644 index 0000000..9c53c16 --- /dev/null +++ b/scripts/kong-gateway/GatewayAPI-http-example.yaml @@ -0,0 +1,81 @@ +kubectl create secret tls onwalk-tls --cert=/etc/ssl/onwalk.net.pem --key=/etc/ssl/onwalk.net.key +echo " +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx + namespace: default +spec: + replicas: 1 # 可根据需要调整副本数 + selector: + matchLabels: + app: nginx + template: + metadata: + labels: + app: nginx + spec: + containers: + - name: nginx + image: nginx:latest # 使用最新的 Nginx 镜像 + ports: + - containerPort: 80 # Nginx 默认的 HTTPS 端口 +--- +apiVersion: v1 +kind: Service +metadata: + name: nginx-svc + namespace: default +spec: + selector: + app: nginx + ports: + - protocol: TCP + port: 80 # 公开的服务端口 + targetPort: 80 # 容器内部的端口 + type: ClusterIP # 可以根据需要选择 NodePort 或 LoadBalancer 类型 +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: Gateway +metadata: + name: demo-gateway + namespace: default + annotations: + konghq.com/publish-service: kong/kong-gateway-proxy +spec: + gatewayClassName: kong + listeners: + - name: https + port: 443 + protocol: HTTPS + hostname: "demo.onwalk.net" # 匹配的域名 + tls: + mode: Terminate + certificateRefs: + - name: onwalk-tls # 引用存放证书的 Secret + allowedRoutes: + namespaces: + from: All +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: demo-route + namespace: default +spec: + parentRefs: + - name: demo-gateway + namespace: default + hostnames: + - demo.onwalk.net # 匹配的域名 + rules: + - matches: + - path: + type: PathPrefix + value: / # 匹配所有路径请求 + backendRefs: + - name: nginx-svc # 目标服务名 + port: 80 # 后端服务的端口 +" | kubectl apply -f - + +curl -ksv https://demo.onwalk.net/ --resolve demo.onwalk.net:443:172.30.0.10 diff --git a/scripts/kong-gateway/deploy-kong-gateway.sh b/scripts/kong-gateway/deploy-kong-gateway.sh new file mode 100644 index 0000000..8feaf14 --- /dev/null +++ b/scripts/kong-gateway/deploy-kong-gateway.sh @@ -0,0 +1,77 @@ +kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.1.0/standard-install.yaml + +helm repo add kong https://charts.konghq.com +helm repo update +cat > kong-values.yaml < ${dest_path}" + curl -sLo "$dest_path" "$src_url" + fi +} + +export_airgap_images() { + local arch=$1 + local out="${BASE_DIR}/images/k3s-airgap-images-${arch}.tar" + local ns="k8s.io" + + nerd() { + sudo nerdctl --namespace $ns --address /run/k3s/containerd/containerd.sock "$@" + } + + # ---- 核心镜像列表 ---- + local core_imgs=( + docker.io/rancher/mirrored-pause:3.6 + docker.io/rancher/mirrored-metrics-server:v0.6.3 + docker.io/rancher/mirrored-coredns-coredns:1.10.1 + docker.io/rancher/mirrored-prometheus-node-exporter:v1.3.1 + docker.io/rancher/mirrored-kube-state-metrics-kube-state-metrics:v2.12.0 + ) + + echo "[INFO] 拉取核心镜像…" + for img in "${core_imgs[@]}"; do + nerd pull "$img" + done + + echo "[INFO] 保存离线包 → $out" + mkdir -p "$(dirname "$out")" + nerd save -o "$out" "${core_imgs[@]}" + + echo "[OK] 完成:$out 已生成" +} + +######################################## +# 写 node‑exporter YAML → addons/node-exporter.yaml +######################################## +generate_node_exporter_yaml() { + local ADDON_DIR=${BASE_DIR}/addons + mkdir -p "$ADDON_DIR" + + cat > "${ADDON_DIR}/node-exporter.yaml" <<'EOF' +apiVersion: v1 +kind: ServiceAccount +metadata: + name: node-exporter + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: {name: node-exporter} +rules: +- apiGroups: [""] + resources: ["nodes", "nodes/proxy", "services", "endpoints"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: {name: node-exporter} +roleRef: {apiGroup: rbac.authorization.k8s.io, kind: ClusterRole, name: node-exporter} +subjects: +- kind: ServiceAccount + name: node-exporter + namespace: kube-system +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: node-exporter + namespace: kube-system +spec: + selector: {matchLabels: {app: node-exporter}} + template: + metadata: {labels: {app: node-exporter}} + spec: + hostPID: true + hostNetwork: true + serviceAccountName: node-exporter + containers: + - name: node-exporter + image: docker.io/rancher/mirrored-prometheus-node-exporter:v1.3.1 + imagePullPolicy: IfNotPresent + args: + - "--path.procfs=/host/proc" + - "--path.sysfs=/host/sys" + - "--path.rootfs=/host/root" + securityContext: {privileged: true} + resources: + requests: {cpu: "50m", memory: "30Mi"} + volumeMounts: + - {name: proc, mountPath: /host/proc, readOnly: true} + - {name: sys, mountPath: /host/sys, readOnly: true} + - {name: rootfs, mountPath: /host/root, readOnly: true} + volumes: + - {name: proc, hostPath: {path: /proc}} + - {name: sys, hostPath: {path: /sys}} + - {name: rootfs, hostPath: {path: /}} +--- +apiVersion: v1 +kind: Service +metadata: + name: node-exporter + namespace: kube-system + labels: {app: node-exporter} +spec: + clusterIP: None + selector: {app: node-exporter} + ports: + - {name: metrics, port: 9100, targetPort: 9100} +EOF + echo "[OK] 生成 ${ADDON_DIR}/node-exporter.yaml" +} + +######################################## +# 写 kube‑state‑metrics YAML → addons/kube-state-metrics.yaml +######################################## +generate_kube_state_metrics_yaml() { + local ADDON_DIR=${BASE_DIR}/addons + mkdir -p "$ADDON_DIR" + + cat > "${ADDON_DIR}/kube-state-metrics.yaml" <<'EOF' +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kube-state-metrics + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: {name: kube-state-metrics} +rules: +- apiGroups: [""] + resources: + ["pods","nodes","namespaces","services","endpoints", + "persistentvolumes","persistentvolumeclaims", + "configmaps","secrets","limitranges","replicationcontrollers"] + verbs: ["get","list","watch"] +- apiGroups: ["apps"] + resources: ["statefulsets","daemonsets","deployments","replicasets"] + verbs: ["get","list","watch"] +- apiGroups: ["batch"] + resources: ["cronjobs","jobs"] + verbs: ["get","list","watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: {name: kube-state-metrics} +roleRef: {apiGroup: rbac.authorization.k8s.io, kind: ClusterRole, name: kube-state-metrics} +subjects: +- kind: ServiceAccount + name: kube-state-metrics + namespace: kube-system +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: kube-state-metrics + namespace: kube-system +spec: + replicas: 1 + selector: {matchLabels: {app: kube-state-metrics}} + template: + metadata: {labels: {app: kube-state-metrics}} + spec: + serviceAccountName: kube-state-metrics + containers: + - name: kube-state-metrics + image: docker.io/rancher/mirrored-kube-state-metrics-kube-state-metrics:v2.12.0 + imagePullPolicy: IfNotPresent + ports: + - {name: metrics, containerPort: 8080} + - {name: telemetry, containerPort: 8081} + resources: + requests: {cpu: "40m", memory: "60Mi"} +--- +apiVersion: v1 +kind: Service +metadata: + name: kube-state-metrics + namespace: kube-system + labels: {app: kube-state-metrics} +spec: + selector: {app: kube-state-metrics} + ports: + - {name: metrics, port: 8080, targetPort: 8080} + - {name: telemetry, port: 8081, targetPort: 8081} +EOF + echo "[OK] 生成 ${ADDON_DIR}/kube-state-metrics.yaml" +} + +for ARCH in "${ARCH_LIST[@]}"; do + echo -e "\n[INFO] 准备架构:${ARCH}" + + safe_copy "${K3S_URL_BASE}/k3s" "${BASE_DIR}/bin/k3s-${ARCH}" + chmod +x "${BASE_DIR}/bin/k3s-${ARCH}" + + safe_copy "https://dl.k8s.io/release/v1.29.1/bin/linux/${ARCH}/kubectl" "${BASE_DIR}/bin/kubectl-${ARCH}" + chmod +x "${BASE_DIR}/bin/kubectl-${ARCH}" + + TMP_HELM="/tmp/helm-${ARCH}.tgz" + safe_copy "https://get.helm.sh/helm-${HELM_VERSION}-linux-${ARCH}.tar.gz" "$TMP_HELM" + tar -xzf "$TMP_HELM" -C /tmp + mv "/tmp/linux-${ARCH}/helm" "${BASE_DIR}/bin/helm-${ARCH}" + chmod +x "${BASE_DIR}/bin/helm-${ARCH}" + + safe_copy "https://github.com/containerd/nerdctl/releases/download/v${NERDCTL_VERSION}/nerdctl-${NERDCTL_VERSION}-linux-${ARCH}.tar.gz" \ + "/tmp/nerdctl-${NERDCTL_VERSION}-linux-${ARCH}.tar.gz" + tar -xzf "/tmp/nerdctl-${NERDCTL_VERSION}-linux-${ARCH}.tar.gz" -C /tmp + cp "/tmp/nerdctl" "${BASE_DIR}/bin/nerdctl-${ARCH}" + chmod +x "${BASE_DIR}/bin/nerdctl-${ARCH}" + + safe_copy "https://github.com/containernetworking/plugins/releases/download/${CNI_VERSION}/cni-plugins-linux-${ARCH}-${CNI_VERSION}.tgz" \ + "${BASE_DIR}/cni-plugins/cni-plugins-linux-${ARCH}-${CNI_VERSION}.tgz" + + export_airgap_images "$ARCH" + + generate_node_exporter_yaml + generate_kube_state_metrics_yaml +done + +safe_copy "https://get.k3s.io" "${BASE_DIR}/install/k3s-official-install.sh" +chmod +x "${BASE_DIR}/install/k3s-official-install.sh" + +# 生成 install-server.sh +cat > "${BASE_DIR}/install-server.sh" <<'EOF' +#!/bin/bash +set -e + +ARCH=$(uname -m) +case "$ARCH" in + x86_64 | amd64) ARCH="amd64" ;; # Intel/AMD 64 位 + aarch64 | arm64) ARCH="arm64" ;; # ARM 64 位 + *) + echo "[ERROR] 不支持的架构:$ARCH" + exit 1 + ;; +esac + +# 路径定义 +BIN_DIR="./bin" +K3S_BIN="${BIN_DIR}/k3s-${ARCH}" +HELM_BIN="${BIN_DIR}/helm-${ARCH}" +KUBECTL_BIN="${BIN_DIR}/kubectl-${ARCH}" +NERDCTL_BIN="${BIN_DIR}/nerdctl-${ARCH}" + +echo "[INFO] 安装 CLI 工具(${ARCH})到 /usr/local/bin" + +install_bin() { + local src=$1 + local dst=$2 + echo " ↳ $dst" + sudo cp "$src" "$dst" + sudo chmod +x "$dst" +} + +install_bin "$K3S_BIN" /usr/local/bin/k3s +install_bin "$HELM_BIN" /usr/local/bin/helm +install_bin "$KUBECTL_BIN" /usr/local/bin/kubectl +install_bin "$NERDCTL_BIN" /usr/local/bin/nerdctl + +echo "[INFO] 执行官方离线安装脚本" +INSTALL_K3S_SKIP_DOWNLOAD=true \ +INSTALL_K3S_EXEC="server \ + --write-kubeconfig-mode 644 \ + --disable=traefik,servicelb,local-storage \ + --kube-apiserver-arg=service-node-port-range=0-50000" \ +bash "install/k3s-official-install.sh" + +echo "[INFO] 准备 airgap 镜像" +sudo nerdctl \ +--namespace k8s.io \ +--address /run/k3s/containerd/containerd.sock load -i images/k3s-airgap-images-amd64.tar + +echo "[INFO] 等待 K3s 启动..." +sleep 5 + +echo "[INFO] 应用默认组件(如存在)" +mkdir -pv ~/.kube/ +cp -v /etc/rancher/k3s/k3s.yaml ~/.kube/config +kubectl apply -f addons/node-exporter.yaml || true +kubectl apply -f addons/kube-state-metrics.yaml || true + +echo "[SUCCESS] 离线 K3s 安装完成 ✅" +EOF + +chmod +x "${BASE_DIR}/install-server.sh" + +# 生成 install-agent.sh +cat > "${BASE_DIR}/install-agent.sh" <<'EOF' +#!/bin/bash +set -e + +ARCH=$(uname -m) +case "$ARCH" in + x86_64 | amd64) ARCH="amd64" ;; + aarch64 | arm64) ARCH="arm64" ;; + *) + echo "[ERROR] 不支持的架构:$ARCH" + exit 1 + ;; +esac + +if [[ -z "$K3S_TOKEN" || -z "$K3S_URL" ]]; then + echo "[ERROR] 你必须设置环境变量 K3S_TOKEN 和 K3S_URL" + echo "例如:" + echo " export K3S_TOKEN=K10xxxxxxxx" + echo " export K3S_URL=https://:6443" + exit 1 +fi + +echo "[INFO] 安装 CLI 工具(${ARCH})到 /usr/local/bin" + +# 路径定义 +BIN_DIR="./bin" +K3S_BIN="${BIN_DIR}/k3s-${ARCH}" +NERDCTL_BIN="${BIN_DIR}/nerdctl-${ARCH}" + + +install_bin() { + local src=$1 + local dst=$2 + echo " ↳ $dst" + sudo cp "$src" "$dst" + sudo chmod +x "$dst" +} + +echo "[INFO] 安装 CLI 工具(${ARCH})到 /usr/local/bin" + +install_bin "$K3S_BIN" /usr/local/bin/k3s +install_bin "$NERDCTL_BIN" /usr/local/bin/nerdctl + +sudo chmod +x /usr/local/bin/k3s +sudo chmod +x /usr/local/bin/neddctl + +echo "[INFO] 执行官方 agent 安装脚本(使用离线模式)" +INSTALL_K3S_SKIP_DOWNLOAD=true \ +INSTALL_K3S_EXEC="agent" \ +bash install/k3s-official-install.sh + +echo "[INFO] 准备 airgap 镜像" +sudo nerdctl \ +--namespace k8s.io \ +--address /run/k3s/containerd/containerd.sock load -i images/k3s-airgap-images-${ARCH}.tar + +echo "[SUCCESS] Agent 节点已完成离线安装 ✅" + +EOF + +chmod +x "${BASE_DIR}/install-agent.sh" +echo "[OK] 已生成 install-agent.sh ✅" + +cat > "${BASE_DIR}/README.md" <:6443 +export K3S_TOKEN=K10xxxxxxxx +bash ./install-agent.sh +\`\`\` + +### 3. 验证安装状态 + +\`\`\`bash +kubectl get nodes +kubectl get pods -A +\`\`\` + +--- + +## 🛠️ 使用 nerdctl 操作 K3s 内部 containerd + +\`\`\`bash +./bin/nerdctl-\$(uname -m) \\ + --namespace k8s.io \\ + --address /run/k3s/containerd/containerd.sock \\ + images +\`\`\` + +--- + +## 📂 目录结构示例 + +\`\`\` +${BASE_DIR}/ +├── bin/ +│ ├── k3s-(amd64/arm64) +│ ├── helm-(amd64/arm64) +│ ├── kubectl-(amd64/arm64) +│ └── nerdctl-(amd64/arm64) +├── images/ +│ └── k3s-airgap-images-amd64.tar +├── addons/ +│ ├── metrics-server.yaml +│ ├── node-exporter.yaml +│ └── kube-state-metrics.yaml +├── install-agent.sh +├── install-server.sh +├── README.md +\`\`\` + +--- +EOF + +echo -e "\n✅ [DONE] 离线安装包构建完成:${BASE_DIR}/" +tree "${BASE_DIR}" || ls -R "${BASE_DIR}" diff --git a/scripts/merge_csv.py b/scripts/merge_csv.py new file mode 100644 index 0000000..4b975ad --- /dev/null +++ b/scripts/merge_csv.py @@ -0,0 +1,30 @@ +import sys +import glob +import pandas as pd + +def merge_csv_files(src_pattern, dest_file): + # 获取匹配的源 CSV 文件列表 + csv_files = glob.glob(src_pattern) + + if not csv_files: + print(f"没有找到匹配的文件: {src_pattern}") + return + + print(f"找到以下文件: {csv_files}") + + # 使用 pandas 读取所有 CSV 文件并合并 + combined_df = pd.concat([pd.read_csv(file) for file in csv_files], ignore_index=True) + + # 将合并后的数据写入目标 CSV 文件 + combined_df.to_csv(dest_file, index=False) + print(f"合并完成,结果已保存到 {dest_file}") + +if __name__ == "__main__": + if len(sys.argv) != 3: + print("使用方法: python merge_csv.py <源文件模式> <目标文件>") + sys.exit(1) + + src_pattern = sys.argv[1] + dest_file = sys.argv[2] + + merge_csv_files(src_pattern, dest_file) diff --git a/scripts/merge_vars.py b/scripts/merge_vars.py new file mode 100644 index 0000000..65ab071 --- /dev/null +++ b/scripts/merge_vars.py @@ -0,0 +1,91 @@ + +import os +import sys +import yaml +import json +from secret.hcp import secret + +def check_env_vars(vars): + """检查环境变量是否存在并且非空""" + for var in vars: + value = os.environ.get(var) + if value is None or value == "": + print(f"Error: Environment variable '{var}' is not set or is empty.") + sys.exit(1) + +def main(): + # 定义需要检查的环境变量 + required_vars = [ + "DOMAIN", + "CLUSTER_NAME", + "SUDO_PASSWORD", + "HCP_API_URL", + "HCP_CLIENT_ID", + "HCP_CLIENT_SECRET", + "GATEWAY_PUBLIC_CONFIG" + ] + + # 检查环境变量 + check_env_vars(required_vars) + + # 从环境变量获取输入 + domain = os.environ.get("DOMAIN") + cluster_name = os.environ.get("CLUSTER_NAME") + ansible_become_pass = os.environ.get("SUDO_PASSWORD") + hcp_api_url = os.environ.get("HCP_API_URL") + hcp_client_id = os.environ.get("HCP_CLIENT_ID") + hcp_client_secret = os.environ.get("HCP_CLIENT_SECRET") + gateway_public_config = os.environ.get("GATEWAY_PUBLIC_CONFIG") + + # 检查并去掉开头的 '$' + if gateway_public_config.startswith('$'): + gateway_public_config = gateway_public_config[1:] + + # 获取 HCP API 令牌 + api_token = secret.get_hcp_api_token(hcp_client_id, hcp_client_secret) + + # 获取密钥数据 + secret_data = secret.get_secret_data(hcp_api_url, api_token) + + # 将 gateway_public_config 转换为字典 + public_config_dict = yaml.safe_load(gateway_public_config) + + # 从密钥数据中提取 private_key + private_key_name = f"{public_config_dict.get('name', '')}_private_key" + private_key = secret.get_secret_value_by_name(secret_data, private_key_name) + + if private_key is None: + print(f"Error: Secret value for '{private_key_name}' not found.") + sys.exit(1) + + # 填充 private_key + public_config_dict['private_key'] = private_key + + # 填充 peers 部分的 public_key + for peer in public_config_dict.get('peers', []): + peer_name = peer.get('name', '') + public_key_name = f"{peer_name}_public_key" + public_key = secret.get_secret_value_by_name(secret_data, public_key_name) + + if public_key is None: + print(f"Error: Secret value for '{public_key_name}' not found.") + sys.exit(1) + + peer['public_key'] = public_key + + # 构建最终的配置字典 + final_config = { + "domain": domain, + "cluster_name": cluster_name, + "ansible_become_pass": ansible_become_pass, + "gateway": { + "public_config": public_config_dict + } + } + + # 输出为 JSON + with open("extra_vars.json", "w") as json_file: + json.dump(final_config, json_file, indent=2) + +if __name__ == "__main__": + main() diff --git a/scripts/network-config/ubuntu/init-wsl.sh b/scripts/network-config/ubuntu/init-wsl.sh new file mode 100644 index 0000000..8a18432 --- /dev/null +++ b/scripts/network-config/ubuntu/init-wsl.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +set -e + +# ✅ 1. 安装 openssh-server +echo "🔧 安装 openssh-server..." +sudo apt update +sudo apt install -y openssh-server + +# ✅ 2. 配置 sshd 默认启动(适配 systemd) +echo "📦 启用 SSH 服务..." +sudo systemctl enable ssh +sudo systemctl start ssh + +# ✅ 3. 配置静态 IP(通过 systemd-networkd) +echo "🌐 配置静态 IP 地址 10.253.0.2..." +sudo mkdir -p /etc/systemd/network + +cat <@$WSLStaticIP" diff --git a/scripts/pipeline-library/vars/ansibleSteps.groovy b/scripts/pipeline-library/vars/ansibleSteps.groovy new file mode 100644 index 0000000..28359fd --- /dev/null +++ b/scripts/pipeline-library/vars/ansibleSteps.groovy @@ -0,0 +1,74 @@ +// pipeline-library/vars/ansibleSteps.groovy + +// 检出代码 +def checkoutCode() { + stage('Checkout repository and submodules') { + agent { + docker { image 'your-docker-image' } // 替换为您的 Docker 镜像 + } + steps { + checkout scm + } + } +} + +// 预先设置 +def preSetup(String sshPassword) { + stage('Pre Setup') { + agent { + docker { image 'your-docker-image' } // 替换为您的 Docker 镜像 + } + steps { + script { + sh "echo \"${sshPassword}\" > ~/.vault_pass.txt" + sh "echo 'ansible_password: \'xxxx\'' >> inventory/group_vars/all.yml" + sh "echo 'ansible_become_password: \'xxxx\'' >> inventory/group_vars/all.yml" + } + } + } +} + +// 部署 +def deploy(String sshUser, String instanceName, String installVersion) { + stage('Deploy Ignition Server') { + agent { + docker { image 'your-docker-image' } // 替换为您的 Docker 镜像 + } + steps { + script { + sh "export ANSIBLE_HOST_KEY_CHECKING=False" + sh "ansible-playbook -u ${sshUser} -i inventor.ini -kK playbooks/server.yml -l ${instanceName} -e 'ign_install_ver=${installVersion}' --vault-password-file .vault_pass.txt --diff" + } + } + } +} + +// 后续设置 +def postSetup() { + stage('Post Setup') { + agent { + docker { image 'your-docker-image' } // 替换为您的 Docker 镜像 + } + steps { + script { + sh "export ANSIBLE_HOST_KEY_CHECKING=False" + } + } + } +} + +// 检查 +def check() { + stage('Check') { + agent { + docker { image 'your-docker-image' } // 替换为您的 Docker 镜像 + } + steps { + script { + // Add your check logic here + } + } + } +} + +return this // 返回以便导出所有函数 diff --git a/scripts/pulp-installer.sh b/scripts/pulp-installer.sh new file mode 100644 index 0000000..465df22 --- /dev/null +++ b/scripts/pulp-installer.sh @@ -0,0 +1,120 @@ +#!/bin/bash +set -e + +echo "🚀 开始离线安装 Pulp Operator..." + +# 安装 nerdctl(如存在) +if [ -f nerdctl.tar.gz ]; then + echo "📦 解压 nerdctl..." + tar xzvf nerdctl.tar.gz -C /usr/local/bin/ +fi + +# 导入镜像 +echo "🚀 导入 pulp-operator 镜像..." + +IMAGES=( + "images/pulp-operator.tar" + "images/kube-rbac-proxy.tar" +) + +if command -v docker &>/dev/null && docker info &>/dev/null; then + for img in "${IMAGES[@]}"; do + docker load -i "$img" + done +elif [ -S /run/k3s/containerd/containerd.sock ]; then + export CONTAINERD_ADDRESS=/run/k3s/containerd/containerd.sock + for img in "${IMAGES[@]}"; do + nerdctl --namespace k8s.io load -i "$img" + done +elif [ -S /run/containerd/containerd.sock ]; then + export CONTAINERD_ADDRESS=/run/containerd/containerd.sock + for img in "${IMAGES[@]}"; do + nerdctl --namespace k8s.io load -i "$img" + done +else + echo "❌ 没有可用的容器运行时" + exit 1 +fi + +# 创建命名空间 +kubectl create namespace pulp || true + +# 安装 chart +echo "📦 安装本地 Helm Chart..." +helm upgrade --install pulp-operator ./charts/pulp-operator/ -n pulp + +# 等待 CRD 注册 +sleep 10 + +# 生成默认 CR yaml(可改为 values 覆盖渲染) +echo "📝 生成 CR manifests/pulp-cr.yaml..." +mkdir -p manifests +cat > manifests/pulp-cr.yaml <" | awk '{print $3}' | while read image_id; do + echo "Deleting image: $image_id" + sudo CONTAINERD_ADDRESS=$CONTAINERD_ADDRESS nerdctl --namespace k8s.io rmi "$image_id" +done + +echo "Cleanup complete." diff --git a/scripts/registry/push_images.sh b/scripts/registry/push_images.sh new file mode 100644 index 0000000..62562bd --- /dev/null +++ b/scripts/registry/push_images.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +set +x + +# 设置容器和仓库地址 +CONTAINERD_ADDRESS="/run/k3s/containerd/containerd.sock" +LOCAL_REGISTRY="local-registry.onwalk.net:5000" +TARGET_REGISTRY="images.onwalk.net/private/deepflow-v6.5" + +# 设置输出文件 +input_file="all.tag.list" + +# 登录到目标 registry +echo "Logging in to $TARGET_REGISTRY..." +sudo CONTAINERD_ADDRESS=$CONTAINERD_ADDRESS nerdctl login $TARGET_REGISTRY + +# 读取 all.tag.list 并处理每个镜像 +while IFS= read -r line; do + # 如果行为空,跳过 + if [ -z "$line" ]; then + continue + fi + + # 替换 local-registry 地址为目标地址, 也删除 :5000 端口 + target_tag="${line//$LOCAL_REGISTRY/$TARGET_REGISTRY}" + + # 打标签并推送镜像 + echo "Tagging and Pushing $line -> $target_tag" + sudo CONTAINERD_ADDRESS=$CONTAINERD_ADDRESS nerdctl pull "$line" + sudo CONTAINERD_ADDRESS=$CONTAINERD_ADDRESS nerdctl tag "$line" "$target_tag" + sudo CONTAINERD_ADDRESS=$CONTAINERD_ADDRESS nerdctl push "$target_tag" + + # 清理本地镜像 + echo "Cleaning up local image: $line" + sudo CONTAINERD_ADDRESS=$CONTAINERD_ADDRESS nerdctl rmi "$line" + echo "Cleaning up local image: $target_tag" + sudo CONTAINERD_ADDRESS=$CONTAINERD_ADDRESS nerdctl rmi "$target_tag" +done < "$input_file" + diff --git a/scripts/registry/setup-nerdctl.sh b/scripts/registry/setup-nerdctl.sh new file mode 100644 index 0000000..749662b --- /dev/null +++ b/scripts/registry/setup-nerdctl.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +wget https://github.com/containerd/nerdctl/releases/download/v2.0.2/nerdctl-2.0.2-linux-amd64.tar.gz + +sudo mkdir -pv /etc/nerdctl +sudo touch /etc/nerdctl/nerdctl.toml + +sudo cat > /etc/nerdctl/nerdctl.toml << EOF +debug = false +debug_full = false +address = "unix:///run/k3s/containerd/containerd.sock" +namespace = "k8s.io" +cni_path = "/var/lib/nerdctl/cni/bin" +cni_netconfpath = "/var/lib/nerdctl/cni/net.d" +EOF + +sudo CONTAINERD_ADDRESS=/run/k3s/containerd/containerd.sock nerdctl --namespace k8s.io ps diff --git a/scripts/registry/setup-registry.sh b/scripts/registry/setup-registry.sh new file mode 100644 index 0000000..070557d --- /dev/null +++ b/scripts/registry/setup-registry.sh @@ -0,0 +1,260 @@ +#!/bin/bash + +#https://github.com/containerd/nerdctl/releases/download/v2.0.2/nerdctl-2.0.2-linux-amd64.tar.gz +#https://github.com/containerd/nerdctl/releases/download/v2.0.2/nerdctl-full-2.0.2-linux-amd64.tar.gz +#wget https://github.com/containernetworking/plugins/releases/download/v1.6.2/cni-plugins-linux-amd64-v1.6.2.tgz + +#!/bin/bash +set -e + +# ============================================= +# ✅ 环境变量检查(可配置) +# ============================================= +: "${REGISTRY_DOMAIN:=kube.registry.local}" +: "${REGISTRY_PORT:=5000}" +: "${NERDCTL_VERSION:=v2.0.2}" +: "${CNI_VERSION:=v1.6.2}" +: "${CNI_DIR:=/opt/cni/bin}" +: "${CERT_DIR:=/opt/registry/certs}" +: "${CONFIG_DIR:=/opt/registry/config}" +: "${REGISTRY_DATA:=/var/lib/registry}" +: "${REGISTRY_YAML:=registry.yaml}" +: "${COMPOSE_YAML:=compose.yaml}" +: "${TAR_FILE:=registry.tar}" + +# ============================================= +# ✅ 自动检测 containerd.sock +# ============================================= +if [[ -S "/run/k3s/containerd/containerd.sock" ]]; then + export CONTAINERD_ADDRESS="/run/k3s/containerd/containerd.sock" +elif [[ -S "/run/containerd/containerd.sock" ]]; then + export CONTAINERD_ADDRESS="/run/containerd/containerd.sock" +elif [[ -S "/var/run/containerd/containerd.sock" ]]; then + export CONTAINERD_ADDRESS="/var/run/containerd/containerd.sock" +else + echo "❌ 未检测到有效的 containerd.sock,请确认 containerd 是否正常运行。" + exit 1 +fi + +export NERDCTL_NAMESPACE="k8s.io" + +# ============================================= +echo "📦 准备 nerdctl 全功能版..." +if ! command -v nerdctl &>/dev/null; then + if [ ! -f /tmp/nerdctl-full.tgz ]; then + echo "⬇️ 下载 nerdctl..." + wget -O /tmp/nerdctl-full.tgz \ + "https://github.com/containerd/nerdctl/releases/download/${NERDCTL_VERSION}/nerdctl-full-${NERDCTL_VERSION#v}-linux-amd64.tar.gz" + else + echo "📦 已存在 nerdctl-full.tgz,跳过下载" + fi + + echo "📦 解压 nerdctl 到 /usr/local..." + sudo tar -C /usr/local -xzf /tmp/nerdctl-full.tgz + echo "✅ nerdctl 安装完成: $(nerdctl --version)" +else + echo "✅ nerdctl 已存在: $(nerdctl --version)" +fi + +# ============================================= +echo "📦 安装 CNI 插件..." +if [ ! -f "${CNI_DIR}/bridge" ]; then + if [ ! -f /tmp/cni.tgz ]; then + echo "⬇️ 下载 CNI 插件..." + wget -O /tmp/cni.tgz \ + "https://github.com/containernetworking/plugins/releases/download/${CNI_VERSION}/cni-plugins-linux-amd64-${CNI_VERSION}.tgz" + else + echo "📦 已存在 cni.tgz,跳过下载" + fi + + sudo mkdir -p "${CNI_DIR}" + sudo tar -C "${CNI_DIR}" -xzf /tmp/cni.tgz + echo "✅ CNI 插件已安装到: ${CNI_DIR}" +else + echo "✅ CNI 插件已存在: ${CNI_DIR}/bridge" +fi + +# ============================================= +echo "📦 解压 SSL 证书..." + +if [ ! -f "ssl_certificates.tar.gz" ]; then + echo "⬇️ 未找到 ssl_certificates.tar.gz,尝试从 GitHub 下载..." + wget -O ssl_certificates.tar.gz \ + "https://github.com/svc-design/ansible/releases/download/release-self-signed-cert_kube.registry.local/ssl_certificates.tar.gz" || { + echo "❌ 无法下载 ssl_certificates.tar.gz,终止执行" + exit 1 + } +else + if [ -f "ssl_certificates.tar.gz" ]; then + mkdir -p "$CERT_DIR" + tar -xvpf ssl_certificates.tar.gz -C "$CERT_DIR" + echo "✅ 证书已解压至: $CERT_DIR" + fi +fi + +# ============================================= + +# ============ 生成 registry-config ============ +echo "⚙️ 准备 registry 配置..." +sudo mkdir -pv "$CONFIG_DIR" +sudo mkdir -pv "$REGISTRY_DATA" +echo "📝 写入 registry-config.yaml..." +sudo cat > "${CONFIG_DIR}/${REGISTRY_YAML}" < /dev/null +version: 0.1 +log: + fields: + service: registry +storage: + cache: + blobdescriptor: inmemory + filesystem: + rootdirectory: /var/lib/registry + delete: + enabled: true +http: + addr: :${REGISTRY_PORT} + headers: + X-Content-Type-Options: [nosniff] + tls: + certificate: /etc/docker/registry/domain.crt + key: /etc/docker/registry/domain.key +health: + storagedriver: + enabled: true + interval: 10s + threshold: 3 +EOF +echo "✅ registry.yaml 已创建" + +# ========== 生成 compose.yaml ========== +echo "🛠️ 生成 compose 配置..." +cat < /dev/null +services: + registry: + image: registry:latest + container_name: registry + restart: always + network_mode: host + volumes: + - /var/lib/registry:/var/lib/registry + - ${CONFIG_DIR}/registry.yaml:/etc/docker/registry/config.yml + - ${CERT_DIR}/kube.registry.local.cert:/etc/docker/registry/domain.crt + - ${CERT_DIR}/kube.registry.local.key:/etc/docker/registry/domain.key +EOF +echo "✅ compose.yaml 已创建" + +# ============================================= +echo "📦 导入本地 registry 镜像..." +if [ -f "/usr/local/deepflow/$TAR_FILE" ]; then + sudo CONTAINERD_ADDRESS="$CONTAINERD_ADDRESS" nerdctl --namespace $NERDCTL_NAMESPACE load -i "/usr/local/deepflow/$TAR_FILE" +else + echo "⚠️ 本地镜像文件不存在:/usr/local/deepflow/$TAR_FILE" +fi + +# ============================================= +echo "🔁 重启 registry 服务..." +sudo CONTAINERD_ADDRESS="$CONTAINERD_ADDRESS" nerdctl --namespace $NERDCTL_NAMESPACE compose -f "$CONFIG_DIR/compose.yaml" down || true +sudo CONTAINERD_ADDRESS="$CONTAINERD_ADDRESS" nerdctl --namespace $NERDCTL_NAMESPACE compose -f "$CONFIG_DIR/compose.yaml" up -d + +# ============================================= +echo "🔗 添加 hosts 映射..." +if ! grep -q "$REGISTRY_DOMAIN" /etc/hosts; then + echo "127.0.0.1 $REGISTRY_DOMAIN" | sudo tee -a /etc/hosts + echo "✅ /etc/hosts 已添加 $REGISTRY_DOMAIN" +else + echo "✅ hosts 中已存在 $REGISTRY_DOMAIN" +fi + +echo "✅ Registry 启动成功: https://$REGISTRY_DOMAIN:$REGISTRY_PORT" + +# ============================================= +echo "🔐 安装 CA 证书到系统信任目录..." + +CA_CERT="${CERT_DIR}/ca.cert" +if [ ! -f "$CA_CERT" ]; then + echo "❌ 未找到 CA 证书: $CA_CERT" +else + if grep -qi "ubuntu\|debian" /etc/os-release; then + sudo cp "$CA_CERT" "/usr/local/share/ca-certificates/kube-registry-ca.crt" + sudo update-ca-certificates + echo "✅ 已导入 CA 到 Ubuntu/Debian 系统信任目录" + elif grep -qi "rhel\|centos\|rocky" /etc/os-release; then + sudo cp "$CA_CERT" "/etc/pki/ca-trust/source/anchors/kube-registry-ca.crt" + sudo update-ca-trust extract + echo "✅ 已导入 CA 到 RHEL/CentOS 系统信任目录" + else + echo "⚠️ 未知发行版,跳过系统 CA 导入" + fi +fi + +# ============================================= +echo "🐳 安装 CA 到容器运行时 (Docker/Containerd)..." + +# --- Docker CA --- +if command -v docker &>/dev/null; then + echo "🔧 配置 Docker..." + DOCKER_CA_DIR="/etc/docker/certs.d/kube.registry.local" + sudo mkdir -p "$DOCKER_CA_DIR" + sudo cp "$CA_CERT" "${DOCKER_CA_DIR}/ca.crt" + echo "✅ 已导入 CA 到 Docker: $DOCKER_CA_DIR" + sudo systemctl restart docker +fi + +# --- Containerd CA --- +if command -v containerd &>/dev/null || [ -S "$CONTAINERD_SOCK" ]; then + echo "🔧 配置 Containerd..." + + # Alpine/K3s: /etc/containerd/certs.d + # cri-o/nerdctl: /etc/containerd/certs.d/kube.registry.local/ca.crt + CONTAINERD_CA_DIR="/etc/containerd/certs.d/kube.registry.local" + sudo mkdir -p "$CONTAINERD_CA_DIR" + sudo cp "$CA_CERT" "${CONTAINERD_CA_DIR}/ca.crt" + echo "✅ 已导入 CA 到 Containerd: $CONTAINERD_CA_DIR" + sudo systemctl restart containerd || echo "⚠️ containerd 重启失败,可能在 K3s 中不适用" +fi + + +# --- K3s CA --- +if [[ -S "/run/k3s/containerd/containerd.sock" ]]; then + echo "🔧 检测到 K3s 环境,准备导入 CA..." + + K3S_CA_DIR="/etc/containerd/certs.d/${REGISTRY_DOMAIN}" + sudo mkdir -p "$K3S_CA_DIR" + sudo cp "$CA_CERT" "${K3S_CA_DIR}/ca.crt" + + echo "✅ 已导入 CA 到 K3s containerd: $K3S_CA_DIR" + + echo "🔁 重启 k3s..." + sudo systemctl restart k3s || echo "⚠️ K3s 重启失败,请手动确认" +fi diff --git a/scripts/registry/show_images.sh b/scripts/registry/show_images.sh new file mode 100644 index 0000000..3b9c4a5 --- /dev/null +++ b/scripts/registry/show_images.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +# 设置协议和 registry 地址(https:// 或 http://) +PROTOCOL="https://" +REGISTRY="local-registry.onwalk.net:5000" + +# 获取仓库列表 +repos=$(curl -s -X GET "$PROTOCOL$REGISTRY/v2/_catalog" | jq -r '.repositories[]') + +# 要隐藏的仓库列表 +hidden_repos=("") + +# 创建或清空输出文件 +output_file="all.tag.list" +> "$output_file" + +# 遍历每个仓库,获取对应的标签列表 +for repo in $repos; do + # 如果是隐藏的仓库,跳过 + if [[ " ${hidden_repos[@]} " =~ " ${repo} " ]]; then + continue + fi + + # 获取标签列表 + tags=$(curl -s -X GET "$PROTOCOL$REGISTRY/v2/$repo/tags/list" | jq -r '.tags[]') + + # 如果仓库有标签,则按格式输出到文件 + if [ -n "$tags" ]; then + for tag in $tags; do + # 输出格式:local-registry.onwalk.net:5000/repository:tag + echo "$REGISTRY/$repo:$tag" >> "$output_file" + done + fi +done + +# 排序并去重 +sort -u "$output_file" -o "$output_file" diff --git a/scripts/secret/README.md b/scripts/secret/README.md new file mode 100644 index 0000000..44fa3a2 --- /dev/null +++ b/scripts/secret/README.md @@ -0,0 +1,45 @@ +# Secret Management Script + +This script is designed to fetch and manage secrets from HCP Cloud Secrets. It retrieves secrets based on environment variables and writes the final configuration to a JSON file. + +# Prerequisites + +1. **Python 3**: Ensure Python 3 is installed on your system. +2. **Python Libraries**: This script requires the `requests`, `pyyaml`, and `secret` libraries. You can install these dependencies using pip: + +```bash +pip install requests pyyaml +``` + +# Environment Variables + +The script requires the following environment variables: + +- HCP_API_URL: The API URL for fetching secrets from HCP. +- HCP_CLIENT_ID: The client ID for HCP authentication. +- HCP_CLIENT_SECRET: The client secret for HCP authentication. + +# Usage + +To use this script, follow these steps: +Set Environment Variables: Ensure all required environment variables are set. For example: + +``` +export HCP_API_URL="https://api.cloud.hashicorp.com/secrets/..." +export HCP_CLIENT_ID="your_client_id" +export HCP_CLIENT_SECRET="your_client_secret" +``` + +# Functions + +## get_hcp_api_token(client_id, client_secret) +Obtains an HCP API token using the provided client ID and secret. + +## get_secret_data(api_url, api_token) +Fetches secret data from HCP Cloud using the provided API URL and token. + +## get_secret_value_by_name(secret_data, secret_name) +Extracts the value of a secret from the fetched secret data based on the provided name. + +# License +This script is licensed under the GPLv3 License. See the LICENSE file for more details. diff --git a/scripts/secret/hcp/__init__.py b/scripts/secret/hcp/__init__.py new file mode 100644 index 0000000..87f0cc7 --- /dev/null +++ b/scripts/secret/hcp/__init__.py @@ -0,0 +1 @@ +from .secret import get_hcp_api_token, get_secret_data, get_secret_value_by_name diff --git a/scripts/secret/hcp/secret.py b/scripts/secret/hcp/secret.py new file mode 100644 index 0000000..0141be9 --- /dev/null +++ b/scripts/secret/hcp/secret.py @@ -0,0 +1,55 @@ +import requests + +def get_hcp_api_token(client_id, client_secret): + """Obtain the HCP API token using client credentials.""" + url = "https://auth.idp.hashicorp.com/oauth2/token" + headers = { + "Content-Type": "application/x-www-form-urlencoded" + } + data = { + "client_id": client_id, + "client_secret": client_secret, + "grant_type": "client_credentials", + "audience": "https://api.hashicorp.cloud" + } + + response = requests.post(url, headers=headers, data=data) + response.raise_for_status() # Raise an error for bad responses + return response.json().get("access_token") + +def get_secret_data(api_url, api_token): + """ + Fetch the secret data from HCP Cloud using the API URL and token. + + Parameters: + - api_url: The URL to fetch secret data from HCP Cloud. + - api_token: The API token for authentication. + + Returns: + - The JSON response containing the secret data. + """ + headers = { + "Authorization": f"Bearer {api_token}" + } + + response = requests.get(api_url, headers=headers) + response.raise_for_status() # Raise an error for bad responses + return response.json() + +def get_secret_value_by_name(secret_data, secret_name): + """ + Get the version value by the specified name from the fetched secret data. + + Parameters: + - secret_data: The JSON data containing secrets fetched from HCP Cloud. + - secret_name: The name of the secret to fetch the version value for. + + Returns: + - The value of the secret for the specified name. + """ + secrets = secret_data.get('secrets', []) + for secret_info in secrets: + if secret_info.get('name') == secret_name: + return secret_info.get('version', {}).get('value') + + return None diff --git a/scripts/secret/setup.py b/scripts/secret/setup.py new file mode 100644 index 0000000..a6edd2f --- /dev/null +++ b/scripts/secret/setup.py @@ -0,0 +1,17 @@ +from setuptools import setup, find_packages + +setup( + name="hcp_secret", + version="0.1", + packages=find_packages(include=['secret', 'secret.hcp']), + install_requires=[ + "requests", + ], + tests_require=[ + "unittest", + ], + description="A library to fetch secrets from HCP Cloud", + author="Haitao Pan", + author_email="manbuzhe2008@gmail.com", + url="https://github.com/yourusername/hcp_secret", +) diff --git a/scripts/secret/tests/__init__.py b/scripts/secret/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/secret/tests/test_secret.py b/scripts/secret/tests/test_secret.py new file mode 100644 index 0000000..5fa84cd --- /dev/null +++ b/scripts/secret/tests/test_secret.py @@ -0,0 +1,29 @@ +import unittest +from hcp import get_hcp_api_token, get_secret_data, get_secret_value_by_name + +class TestHCPSecret(unittest.TestCase): + + def test_get_hcp_api_token(self): + # Mock the API response and test the token retrieval + pass # Add actual test logic here + + def test_get_secret_data(self): + # Mock the API response and test secret data fetching + pass # Add actual test logic here + + def test_get_secret_value_by_name(self): + secret_data = { + "secrets": [ + { + "name": "cn_gateway_private_key", + "version": { + "value": "test_value" + } + } + ] + } + value = get_secret_value_by_name(secret_data, "cn_gateway_private_key") + self.assertEqual(value, "test_value") + +if __name__ == "__main__": + unittest.main() diff --git a/scripts/setup-gitea.sh b/scripts/setup-gitea.sh new file mode 100644 index 0000000..3877d9b --- /dev/null +++ b/scripts/setup-gitea.sh @@ -0,0 +1,83 @@ +helm repo add gitea https://dl.gitea.com/charts +helm repo update +kubectl create ns gitea || true +cat > gitea-values.yaml < grafana-agent-values.yaml << EOF +global: + image: + registry: "images.onwalk.net/public" +agent: + mode: 'static' + configMap: + create: true + content: '' +logs: + enabled: false +traces: + enabled: false +EOF + +helm upgrade --install grafana-agent grafana/grafana-agent --namespace deepflow -f grafana-agent-values.yaml + +cat > grafana-agent-configmap.yaml << EOF +apiVersion: v1 +data: + config.yaml: |- + server: + log_level: info + log_format: logfmt + metrics: + global: + scrape_interval: 1m + configs: + - name: agent + scrape_configs: + - job_name: kube-state-metrics + static_configs: + - targets: ['10.43.155.169:8080'] + - job_name: node-metrics + static_configs: + - targets: ['10.43.68.133:9100'] + remote_write: + - url: http://deepflow-agent.deepflow.svc.cluster.local/api/v1/prometheus +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: grafana-agent + meta.helm.sh/release-namespace: deepflow + labels: + app.kubernetes.io/instance: grafana-agent + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana-agent + app.kubernetes.io/version: v0.42.0 + helm.sh/chart: grafana-agent-0.42.0 + name: grafana-agent + namespace: deepflow +EOF + +kubectl apply -f grafana-agent-configmap.yaml + +kubectl get pods -n deepflow diff --git a/scripts/setup-microservice-demo.sh b/scripts/setup-microservice-demo.sh new file mode 100644 index 0000000..4ea8af8 --- /dev/null +++ b/scripts/setup-microservice-demo.sh @@ -0,0 +1,11 @@ +#git clone https://github.com/aliyun/alibabacloud-microservice-demo.git +kubectl create ns microservice-demo || true +kubectl delete secret tls otel-demo-secret -n microservice-demo || true +kubectl create secret tls otel-demo-secret --key=/etc/ssl/onwalk.net.key --cert=/etc/ssl/onwalk.net.pem -n microservice-demo || true +cat > microservice-demo-config.yaml << EOF +image: + prefix: images.onwalk.net/public/microservice-demo/ + version: 1.0.0-SNAPSHOT +EOF +helm package alibabacloud-microservice-demo/helm-chart/ +helm upgrade --install microservice-demo /root/microservice-demo-0.1.0.tgz -n microservice-demo -f microservice-demo-config.yaml diff --git a/scripts/setup-open-telemetry-demo.sh b/scripts/setup-open-telemetry-demo.sh new file mode 100644 index 0000000..180a9ef --- /dev/null +++ b/scripts/setup-open-telemetry-demo.sh @@ -0,0 +1,116 @@ +helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts +helm repo update +kubectl create ns otel || true +kubectl delete secret tls otel-demo-secret -n otel || true +kubectl create secret tls otel-demo-secret --key=/etc/ssl/onwalk.net.key --cert=/etc/ssl/onwalk.net.pem -n otel || true +cat > otel-demo-config.yaml << EOF +default: + image: + repository: images.onwalk.net/public/opentelemetry/demo + tag: "" + pullPolicy: IfNotPresent +components: + accountingService: + enabled: true + initContainers: + - name: wait-for-kafka + image: images.onwalk.net/public/base/busybox:latest + adService: + enabled: true + cartService: + enabled: true + initContainers: + - name: wait-for-valkey + image: images.onwalk.net/public/base/busybox:latest + checkoutService: + enabled: true + initContainers: + - name: wait-for-kafka + image: images.onwalk.net/public/base/busybox:latest + currencyService: + enabled: true + emailService: + enabled: true + frauddetectionService: + enabled: true + initContainers: + - name: wait-for-kafka + image: images.onwalk.net/public/base/busybox:latest + frontend: + enabled: true + frontendProxy: + enabled: true + ingress: + enabled: true + ingressClassName: nginx + hosts: + - host: otel-demo.onwalk.net + paths: + - path: / + pathType: Prefix + port: 8080 + - path: /jaeger/ui/ + pathType: Prefix + port: 8080 + - path: /grafana/ + pathType: Prefix + port: 8080 + - path: /loadgen/ + pathType: Prefix + port: 8080 + - path: /feature/ + pathType: Prefix + port: 8080 + tls: + - secretName: otel-demo-secret + hosts: + - otel-demo.onwalk.net + imageprovider: + enabled: true + loadgenerator: + enabled: true + paymentService: + enabled: true + productCatalogService: + enabled: true + quoteService: + enabled: true + recommendationService: + enabled: true + shippingService: + enabled: true + flagd: + enabled: false + imageOverride: + repository: "ghcr.io/open-feature/flagd" + tag: "v0.11.4" + initContainers: + - name: init-config + image: images.onwalk.net/public/base/busybox:latest + kafka: + enabled: true + valkey: + enabled: true + imageOverride: + repository: "images.onwalk.net/public/opentelemetry/valkey" + tag: "7.2-alpine" +grafana: + enabled: true + global: + imageRegistry: images.onwalk.net/public +prometheus: + enabled: true +jaeger: + enabled: true + allInOne: + image: + repository: "images.onwalk.net/public/jaegertracing/all-in-one" + tag: "1.53.0" +opentelemetry-collector: + enabled: true + image: + repository: "images.onwalk.net/public/opentelemetry/opentelemetry-collector-contrib" +opensearch: + enabled: false +EOF +helm upgrade --install otel-demo open-telemetry/opentelemetry-demo --version=0.33.3 -n otel -f otel-demo-config.yaml diff --git a/scripts/setup-vector.sh b/scripts/setup-vector.sh new file mode 100644 index 0000000..761f15d --- /dev/null +++ b/scripts/setup-vector.sh @@ -0,0 +1,101 @@ +helm repo add vector https://helm.vector.dev +helm repo update +cat << EOF > vector-values-custom.yaml +role: Agent +#nodeSelector: +# allow/vector: "false" + +# resources -- Set Vector resource requests and limits. +resources: + requests: + cpu: 200m + memory: 256Mi + limits: + cpu: 200m + memory: 256Mi +image: + repository: images.onwalk.net/public/timberio/vector + pullPolicy: Always + tag: "0.37.1-distroless-libc" +podLabels: + vector.dev/exclude: "true" + app: deepflow +# extraVolumes -- Additional Volumes to use with Vector Pods. + # extraVolumes: + # - name: opt-log + # hostPath: + # path: "/opt/log/" +# extraVolumeMounts -- Additional Volume to mount into Vector Containers. + # extraVolumeMounts: + # - name: opt-log + # mountPath: "/opt/log/" + # readOnly: true +customConfig: + ## The configuration comes from https://vector.dev/docs/reference/configuration/global-options/#data_dir + data_dir: /vector-data-dir + api: + enabled: true + address: 127.0.0.1:8686 + playground: false + sources: + kubernetes_logs: + type: kubernetes_logs + namespace_annotation_fields: + namespace_labels: "" + node_annotation_fields: + node_labels: "" + pod_annotation_fields: + pod_annotations: "" + pod_labels: "" + + transforms: + remap_kubernetes_logs: + type: remap + inputs: + - kubernetes_logs + source: |- + # try to parse json + if is_string(.message) && is_json(string!(.message)) { + tags = parse_json(.message) ?? {} + .message = tags.message # FIXME: the log content key inside json + del(tags.message) + .json = tags + } + + if !exists(.level) { + if exists(.json) { + .level = .json.level + del(.json.level) + } else { + # match log levels surround by ``[]`` or ``<>`` with ignore case + level_tags = parse_regex(.message, r'[\[\\\<](?(?i)INFOR?(MATION)?|WARN(ING)?|DEBUG?|ERROR?|TRACE|FATAL|CRIT(ICAL)?)[\]\\\>]') ?? {} + if !exists(level_tags.level) { + # match log levels surround by whitespace, required uppercase strictly in case mismatching + level_tags = parse_regex(.message, r'[\s](?INFOR?(MATION)?|WARN(ING)?|DEBUG?|ERROR?|TRACE|FATAL|CRIT(ICAL)?)[\s]') ?? {} + } + if exists(level_tags.level) { + level_tags.level = upcase(string!(level_tags.level)) + .level = level_tags.level + } + } + } + + if !exists(._df_log_type) { + # default log type + ._df_log_type = "user" + } + + if !exists(.app_service) { + # FIXME: files 模块没有此字段,请通过日志内容注入应用名称 + .app_service = .kubernetes.container_name + } + sinks: + http: + encoding: + codec: json + inputs: + - remap_kubernetes_logs # NOTE: 注意这里数据源是 transform 模块的 key + type: http + uri: http://deepflow-agent.deepflow/api/v1/log +EOF +helm upgrade --install vector vector/vector --namespace deepflow --create-namespace -f vector-values-custom.yaml