feat(playbooks): add comprehensive vhosts roles and ops scripts

This commit is contained in:
Haitao Pan 2025-12-21 19:23:19 +08:00
parent 3344b1e530
commit 8a57639da8
688 changed files with 27226 additions and 32 deletions

12
alicloud_dns_record.yml Normal file
View File

@ -0,0 +1,12 @@
- name: setup OpenResty server
hosts: global-homepage.svc.plus
become: true
vars:
alicloud_dns_domain: "svc.plus"
alicloud_dns_rr: "www"
alicloud_dns_type: "A"
alicloud_dns_value: "1.2.3.4"
alicloud_access_key: "{{ aliyun_ak }}"
alicloud_secret_key: "{{ aliyun_sk }}"
roles:
- role: vhosts/alicloud_dns_record

16
alicloud_dns_sync.yml Normal file
View File

@ -0,0 +1,16 @@
---
- hosts: localhost
gather_facts: no
# 动态加载 DNS 配置文件
vars_files:
- vars/dns_records_svc_plus.yaml # ← 可以切换成不同环境
# 如果你想在命令行覆盖 AK/SK则可以使用 --extra-vars
vars:
alicloud_access_key: "{{ aliyun_ak | default('') }}"
alicloud_secret_key: "{{ aliyun_sk | default('') }}"
roles:
- role: vhosts/alicloud_dns_sync

View File

@ -0,0 +1,7 @@
---
- name: Apply branch protection rules
hosts: localhost
connection: local
gather_facts: false
roles:
- github

8
common Normal file
View File

@ -0,0 +1,8 @@
---
- name: Init Linux OS Common setting
hosts: all
user: ubuntu
become: yes
gather_facts: yes
roles:
- vhosts/common

View File

@ -0,0 +1,2 @@
ansible-playbook -i inventory/js2_hosts.ini playbook.yml -e "ansible_ssh_user=ubuntu area=js2" -D -C
ansible-playbook -i inventory/js2_hosts.ini playbook.yml -e "ansible_ssh_user=ubuntu area=js2" -D

View File

@ -0,0 +1,8 @@
[js2]
10.200.11.[1:24]
[all:vars]
ansible_port=22
ansible_ssh_user=ubuntu
ansible_host_key_checking=False
ansible_ssh_private_key_file=~/.ssh/id_rsa

View File

@ -0,0 +1,10 @@
- name: DeepFlow Agent Upgrade for 区域节点
hosts: all
become: true
gather_facts: false
vars:
area: js2
upgrade_zip_path: ./DeepFlow-Agent-Upgrade-20250523.zip
roles:
- deepflow_upgrade

View File

@ -0,0 +1,16 @@
- name: Sync upgrade package to remote using rsync
synchronize:
src: "{{ upgrade_zip_path }}"
dest: /tmp/
mode: push
- name: Unzip upgrade package
unarchive:
src: "/tmp/{{ upgrade_zip_path | basename }}"
dest: /tmp/
remote_src: yes
- name: Execute upgrade script
command: bash update_agent.sh --area {{ area }}
args:
chdir: /tmp/DeepFlow-Agent-Upgrade

5
deploy-docker-harbor.yml Normal file
View File

@ -0,0 +1,5 @@
---
- hosts: all
become: yes
roles:
- docker/harbor

View File

@ -0,0 +1,5 @@
---
- hosts: all
become: yes
roles:
- docker/keycloak

View File

@ -0,0 +1,5 @@
- name: setup OpenObserve
hosts: all
become: true
roles:
- docker/OpenObserve/

5
deploy_Tempo_docker.yaml Normal file
View File

@ -0,0 +1,5 @@
- name: setup Tempo
hosts: all
become: true
roles:
- docker/Tempo/

View File

@ -0,0 +1,5 @@
- name: setup VictoriaLogs
hosts: all
become: true
roles:
- docker/VictoriaLogs/

View File

@ -0,0 +1,5 @@
- name: setup VictoriaMetrics
hosts: all
become: true
roles:
- docker/VictoriaMetrics/

View File

@ -0,0 +1,50 @@
- name: Deploy blackbox exporter
hosts: global-homepage.svc.plus
become: true
vars:
hosts:
- name: "www.svc.plus"
path:
- "/docs/"
- "/download/"
- "/login/"
- "/logout/"
- '/register/'
- name: "cn-homepage.svc.plus"
path:
- "/docs/"
- "/download/"
- "/login/"
- "/logout/"
- '/register/'
- name: "dl.svc.plus"
path:
- "/"
roles:
- roles/vhosts/common/
- roles/vhosts/blackbox_exporter/
- name: Deploy blackbox exporter
hosts: cn-homepage.svc.plus
become: true
vars:
hosts:
- name: "www.svc.plus"
path:
- "/docs/"
- "/download/"
- "/login/"
- "/logout/"
- '/register/'
- name: "cn-homepage.svc.plus"
path:
- "/docs/"
- "/download/"
- "/login/"
- "/logout/"
- '/register/'
- name: "dl.svc.plus"
path:
- "/"
roles:
- roles/vhosts/common/
- roles/vhosts/blackbox_exporter/

7
deploy_deepflow_agent Normal file
View File

@ -0,0 +1,7 @@
---
- name: Deploy or Upgrade DeepFlow Agent
hosts: all
become: true
roles:
- role: roles/vhosts/deepflow_agent

View File

@ -0,0 +1,15 @@
- name: setup otel exporters
hosts: cn-homepage.svc.plus
become: true
roles:
- roles/vhosts/common/
- roles/vhosts/node_exporter/
- roles/vhosts/process_exporter/
- name: setup otel exporters
hosts: global-homepage.svc.plus
become: true
roles:
- roles/vhosts/common/
- roles/vhosts/node_exporter/
- roles/vhosts/process_exporter/

View File

@ -0,0 +1,11 @@
- name: setup grafana (docker)
hosts: all
become: true
vars:
grafana_domain: "{{ domain }}"
grafana_workspace: /opt/grafana
grafana_admin_user: admin
grafana_admin_password: admin
roles:
- vhosts/docker/
- docker/grafana/

72
deploy_monitor_server.yml Normal file
View File

@ -0,0 +1,72 @@
- name: setup otel exporters
hosts: otel.svc.plus
become: true
vars:
group: web
otlp_endpoint: &otel_endpoint https://otel.svc.plus/api/default/
otlp_auth: &otel_auth "Basic cm9vdEBleGFtcGxlLmNvbTpRN01wRjZBTzZFelRjRjdJ"
otel_prometheus_node_static_configs: &otel_node_static_configs
- targets: ['172.31.2.33:9100']
labels:
vendor: aws
account: prod
group: core
name: tky-proxy.svc.plus
iid: '172.31.2.33'
- targets: ['167.179.72.223:9100']
labels:
vendor: aws
account: prod
group: web-system
name: global-homepage.svc.plus
iid: '167.179.72.223'
- targets: ['47.120.61.35:9100']
labels:
vendor: alicloud
account: prod
group: web-system
name: cn-homepage.svc.plus
iid: '47.120.61.35'
otel_prometheus_process_static_configs: &otel_process_static_configs
- targets: ['172.31.2.33:9256']
labels:
vendor: aws
account: prod
group: core
name: tky-proxy.svc.plus
iid: '172.31.2.33'
- targets: ['167.179.72.223:9256']
labels:
vendor: aws
account: prod
group: web-system
name: global-homepage.svc.plus
iid: '167.179.72.223'
- targets: ['47.120.61.35:9256']
labels:
vendor: alicloud
account: prod
group: web-system
name: cn-homepage.svc.plus
iid: '47.120.61.35'
exporters:
endpoint: *otel_endpoint
roles:
- roles/vhosts/otel-collector/
- name: setup otel exporters
hosts: otel.svc.plus
become: true
vars:
group: web
otlp_endpoint: *otel_endpoint
otlp_auth: *otel_auth
otel_prometheus_node_static_configs: *otel_node_static_configs
otel_prometheus_process_static_configs: *otel_process_static_configs
exporters:
endpoint: *otel_endpoint
roles:
- roles/vhosts/node_exporter/
- roles/vhosts/process_exporter/
- roles/vhosts/grafana/
- roles/vhosts/openobserve/

View File

@ -0,0 +1,11 @@
- name: setup neurapress
hosts: all
become: true
vars:
neurapress_domain: "{{ domain }}"
neurapress_workspace: /opt/neurapress
neurapress_image: neurapress:prod
neurapress_certbot_email: manbuzhe2009@qq.com
roles:
- vhosts/docker/
- docker/neurapress/

23
deploy_nginx_vhosts.yml Normal file
View File

@ -0,0 +1,23 @@
- name: Setup Nginx server
hosts: localhost
become: true
vars:
vhosts:
- name: cn-homepage.svc.plus
domain:
- www.svc.plus
- cn-homepage.svc.plus
ssl_certificate: /etc/ssl/svc.plus.pem
ssl_certificate_key: /etc/ssl/svc.plus.rsa.key
root: /data/update-server/dashboard
type: homepage-static
- name: cn-artifact.svc.plus
domain:
- artifact.svc.plus
- cn-artifact.svc.plus
ssl_certificate: /etc/ssl/svc.plus.pem
ssl_certificate_key: /etc/ssl/svc.plus.rsa.key
root: /data/update-server
type: artifact
roles:
- roles/vhosts/nginx/

37
deploy_nodejs_vhosts.yml Normal file
View File

@ -0,0 +1,37 @@
---
- name: Configure Node.js runtime for vhosts
hosts: all
gather_facts: true
become: yes
vars:
# Choose Node.js version
# Examples: "20.x" (LTS), "18.x", "22.x", or specific version like "20.11.0"
nodejs_version: "20.x"
# Install Yarn package manager (default: true)
# install_yarn: false
# Add npm global bin to PATH (default: true)
# add_npm_to_path: true
# Custom npm prefix
# npm_config_prefix: "/usr/local/lib/npm"
# Additional packages to install globally (optional)
# global_npm_packages:
# - pm2
# - typescript
# - eslint
# - @angular/cli
roles:
- role: vhosts/nodejs
post_tasks:
- name: Install additional global npm packages
npm:
name: "{{ item }}"
state: latest
global: yes
loop: "{{ global_npm_packages | default([]) }}"
when: global_npm_packages is defined and global_npm_packages | length > 0

View File

@ -0,0 +1,48 @@
- name: setup OpenResty server
hosts: cn-homepage.svc.plus
become: true
vars:
vhosts:
- name: cn-homepage.svc.plus
domain:
- www.svc.plus
- cn-homepage.svc.plus
ssl_certificate: /etc/ssl/svc.plus.pem
ssl_certificate_key: /etc/ssl/svc.plus.rsa.key
root: /data/update-server/dashboard
type: homepage-static
- name: cn-artifact.svc.plus
domain:
- artifact.svc.plus
- cn-artifact.svc.plus
ssl_certificate: /etc/ssl/svc.plus.pem
ssl_certificate_key: /etc/ssl/svc.plus.rsa.key
root: /data/update-server
type: artifact
roles:
- roles/vhosts/OpenResty/
- name: setup OpenResty server
hosts: global-homepage.svc.plus
become: true
vars:
vhosts:
- name: global-homepage.svc.plus
domain:
- www.svc.plus
- global-homepage.svc.plus
ssl_certificate: /etc/ssl/svc.plus.pem
ssl_certificate_key: /etc/ssl/svc.plus.rsa.key
root: /data/update-server/dashboard
type: homepage-static
- name: global-artifact.svc.plus
domain:
- artifact.svc.plus
- global-artifact.svc.plus
ssl_certificate: /etc/ssl/svc.plus.pem
ssl_certificate_key: /etc/ssl/svc.plus.rsa.key
root: /data/update-server
autoindex_paths:
- "/"
type: artifact
roles:
- roles/vhosts/OpenResty/

5
deploy_otel_docker.yaml Normal file
View File

@ -0,0 +1,5 @@
- name: setup otel
hosts: all
become: true
roles:
- docker/otel/

162
deploy_postgre_vhosts.yml Normal file
View File

@ -0,0 +1,162 @@
- name: Setup postgres server
hosts: cn-homepage.svc.plus
become: true
vars:
group: cn-homepage.svc.plus
repo_setup: true
apt_keyrings: &postgresql_common_keyrings
- name: postgresql
content: |
-----BEGIN PGP PUBLIC KEY BLOCK-----
Version: Hockeypuck 2.2
Comment: Hostname:
xsFNBE6XR8IBEACVdDKT2HEH1IyHzXkb4nIWAY7echjRxo7MTcj4vbXAyBKOfjja
UrBEJWHN6fjKJXOYWXHLIYg0hOGeW9qcSiaa1/rYIbOzjfGfhE4x0Y+NJHS1db0V
G6GUj3qXaeyqIJGS2z7m0Thy4Lgr/LpZlZ78Nf1fliSzBlMo1sV7PpP/7zUO+aA4
bKa8Rio3weMXQOZgclzgeSdqtwKnyKTQdXY5MkH1QXyFIk1nTfWwyqpJjHlgtwMi
c2cxjqG5nnV9rIYlTTjYG6RBglq0SmzF/raBnF4Lwjxq4qRqvRllBXdFu5+2pMfC
IZ10HPRdqDCTN60DUix+BTzBUT30NzaLhZbOMT5RvQtvTVgWpeIn20i2NrPWNCUh
hj490dKDLpK/v+A5/i8zPvN4c6MkDHi1FZfaoz3863dylUBR3Ip26oM0hHXf4/2U
A/oA4pCl2W0hc4aNtozjKHkVjRx5Q8/hVYu+39csFWxo6YSB/KgIEw+0W8DiTII3
RQj/OlD68ZDmGLyQPiJvaEtY9fDrcSpI0Esm0i4sjkNbuuh0Cvwwwqo5EF1zfkVj
Tqz2REYQGMJGc5LUbIpk5sMHo1HWV038TWxlDRwtOdzw08zQA6BeWe9FOokRPeR2
AqhyaJJwOZJodKZ76S+LDwFkTLzEKnYPCzkoRwLrEdNt1M7wQBThnC5z6wARAQAB
zRxQb3N0Z3JlU1FMIERlYmlhbiBSZXBvc2l0b3J5wsGOBBMBCAA4AhsDBQsJCAcD
BRUKCQgLBRYCAwEAAh4BAheAFiEEuXsK/KoaR/BE8kSgf8x9RqzMTPgFAlhtCD8A
CgkQf8x9RqzMTPgECxAAk8uL+dwveTv6eH21tIHcltt8U3Ofajdo+D/ayO53LiYO
xi27kdHD0zvFMUWXLGxQtWyeqqDRvDagfWglHucIcaLxoxNwL8+e+9hVFIEskQAY
kVToBCKMXTQDLarz8/J030Pmcv3ihbwB+jhnykMuyyNmht4kq0CNgnlcMCdVz0d3
z/09puryIHJrD+A8y3TD4RM74snQuwc9u5bsckvRtRJKbP3GX5JaFZAqUyZNRJRJ
Tn2OQRBhCpxhlZ2afkAPFIq2aVnEt/Ie6tmeRCzsW3lOxEH2K7MQSfSu/kRz7ELf
Cz3NJHj7rMzC+76Rhsas60t9CjmvMuGONEpctijDWONLCuch3Pdj6XpC+MVxpgBy
2VUdkunb48YhXNW0jgFGM/BFRj+dMQOUbY8PjJjsmVV0joDruWATQG/M4C7O8iU0
B7o6yVv4m8LDEN9CiR6r7H17m4xZseT3f+0QpMe7iQjz6XxTUFRQxXqzmNnloA1T
7VjwPqIIzkj/u0V8nICG/ktLzp1OsCFatWXh7LbU+hwYl6gsFH/mFDqVxJ3+DKQi
vyf1NatzEwl62foVjGUSpvh3ymtmtUQ4JUkNDsXiRBWczaiGSuzD9Qi0ONdkAX3b
ewqmN4TfE+XIpCPxxHXwGq9Rv1IFjOdCX0iG436GHyTLC1tTUIKF5xV4Y0+cXIPC
wX0EEwEIACcCGwMFCwkIBwMFFQoJCAsFFgIDAQACHgECF4AFAlLpFRkFCQ6EJy0A
CgkQf8x9RqzMTPhOZA//Zp0e25pcvle7cLc0YuFr9pBv2JIkLzPm83nkcwKmxaWa
yUIG4Sv6pH6hm8+S/CHQij/yFCX+o3ngMw2J9HBUvafZ4bnbI0RGJ70GsAwraQ0V
lkIfg7GUw3TzvoGYO42rZTru9S0K/6nFP6D1HUu+U+AsJONLeb6oypQgInfXQExP
ZyliUnHdipei4WR1YFW6sjSkZT/5C3J1wkAvPl5lvOVthI9Zs6bZlJLZwusKxU0U
M4Btgu1Sf3nnJcHmzisixwS9PMHE+AgPWIGSec/N27a0KmTTvImV6K6nEjXJey0K
2+EYJuIBsYUNorOGBwDFIhfRk9qGlpgt0KRyguV+AP5qvgry95IrYtrOuE7307Si
dEbSnvO5ezNemE7gT9Z1tM7IMPfmoKph4BfpNoH7aXiQh1Wo+ChdP92hZUtQrY2N
m13cmkxYjQ4ZgMWfYMC+DA/GooSgZM5i6hYqyyfAuUD9kwRN6BqTbuAUAp+hCWYe
N4D88sLYpFh3paDYNKJ+Gf7Yyi6gThcV956RUFDH3ys5Dk0vDL9NiWwdebWfRFbz
oRM3dyGP889aOyLzS3mh6nHzZrNGhW73kslSQek8tjKrB+56hXOnb4HaElTZGDvD
5wmrrhN94kbyGtz3cydIohvNO9d90+29h0eGEDYti7j7maHkBKUAwlcPvMg5m3bC
wX0EEwEIACcCGwMFCwkIBwMFFQoJCAsFFgIDAQACHgECF4AFAlEqbZUFCQg2wEEA
CgkQf8x9RqzMTPhFMQ//WxAfKMdpSIA9oIC/yPD/dJpY/+DyouOljpE6MucMy/Ar
BECjFTBwi/j9NYM4ynAk34IkhuNexc1i9/05f5RM6+riLCLgAOsADDbHD4miZzoS
xiVr6GQ3YXMbOGld9kV9Sy6mGNjcUov7iFcf5Hy5w3AjPfKuR9zXswyfzIU1YXOb
iiZT38l55pp/BSgvGVQsvbNjsff5CbEKXS7q3xW+WzN0QWF6YsfNVhFjRGj8hKtH
vwKcA02wwjLeLXVTm6915ZUKhZXUFc0vM4Pj4EgNswH8Ojw9AJaKWJIZmLyW+aP+
wpu6YwVCicxBY59CzBO2pPJDfKFQzUtrErk9irXeuCCLesDyirxJhv8o0JAvmnMA
KOLhNFUrSQ2m+3EnF7zhfz70gHW+EG8X8mL/EN3/dUM09j6TVrjtw43RLxBzwMDe
ariFF9yC+5bLtnGgxjsB9Ik6GV5v34/NEEGf1qBiAzFmDVFRZlrNDkq6gmpvGnA5
hUWNr+y0i01LjGyaLSWHYjgw2UEQOqcUtTFK9MNzbZze4mVaHMEz9/aMfX25R6qb
iNqCChveIm8mYr5Ds2zdZx+G5bAKdzX7nx2IUAxFQJEE94VLSp3npAaTWv3sHr7d
R8tSyUJ9poDwgw4W9BIcnAM7zvFYbLF5FNggg/26njHCCN70sHt8zGxKQINMc6TC
wX0EEwEIACcCGwMFCwkIBwMFFQoJCAsFFgIDAQACHgECF4AFAlB5KywFCQPDFt8A
CgkQf8x9RqzMTPhuCQ//QAjRSAOCQ02qmUAikT+mTB6baOAakkYq6uHbEO7qPZkv
4E/M+HPIJ4wdnBNeSQjfvdNcZBA/x0hr5EMcBneKKPDj4hJ0panOIRQmNSTThQw9
OU351gm3YQctAMPRUu1fTJAL/AuZUQf9ESmhyVtWNlH/56HBfYjE4iVeaRkkNLJy
X3vkWdJSMwC/LO3Lw/0M3R8itDsm74F8w4xOdSQ52nSRFRh7PunFtREl+QzQ3EA/
WB4AIj3VohIGkWDfPFCzV3cyZQiEnjAe9gG5pHsXHUWQsDFZ12t784JgkGyO5wT2
6pzTiuApWM3k/9V+o3HJSgH5hn7wuTi3TelEFwP1fNzI5iUUtZdtxbFOfWMnZAyp
EhaLmXNkg4zDkH44r0ss9fR0DAgUav1a25UnbOn4PgIEQy2fgHKHwRpCy20d6oCS
lmgyWsR40EPPYvtGq49A2aK6ibXmdvvFT+Ts8Z+q2SkFpoYFX20mR2nsF0fbt1lf
H65P64dukxeRGteWIeNakDD40bAAOH8+OaoTGVBJ2ACJfLVNM53PEoftavAwUYMr
R910qvwYfd/46rh46g1Frr9SFMKYE9uvIJIgDsQB3QBp71houU4H55M5GD8XURYs
+bfiQpJG1p7eB8e5jZx1SagNWc4XwL2FzQ9svrkbg1Y+359buUiP7T6QXX2zY+/C
RgQQEQgABgUCTpdI7gAKCRDFr3dKWFELWqaPAKD1TtT5c3sZz92Fj97KYmqbNQZP
+ACfSC6+hfvlj4GxmUjp1aepoVTo3wfCwVwEEAEIAAYFAk6XSQsACgkQTFprqxLS
p64F8Q//cCcutwrH50UoRFejg0EIZav6LUKejC6kpLeubbEtuaIH3r2zMblPGc4i
+eMQKo/PqyQrceRXeNNlqO6/exHozYi2meudxa6IudhwJIOn1MQykJbNMSC2sGUp
1W5M1N5EYgt4hy+qhlfnD66LR4G+9t5FscTJSy84SdiOuqgCOpQmPkVRm1HX5X1+
dmnzMOCk5LHHQuiacV0qeGO7JcBCVEIDr+uhU1H2u5GPFNHm5u15n25tOxVivb94
xg6NDjouECBH7cCVuW79YcExH/0X3/9G45rjdHlKPH1OIUJiiX47OTxdG3dAbB4Q
fnViRJhjehFscFvYWSqXo3pgWqUsEvv9qJac2ZEMSz9x2mj0ekWxuM6/hGWxJdB+
+985rIelPmc7VRAXOjIxWknrXnPCZAMlPlDLu6+vZ5BhFX0Be3y38f7GNCxFkJzl
hWZ4Cj3WojMj+0DaC1eKTj3rJ7OJlt9S9xnO7OOPEUTGyzgNIDAyCiu8F4huLPaT
ape6RupxOMHZeoCVlqx3ouWctelB2oNXcxxiQ/8y+21aHfD4n/CiIFwDvIQjl7dg
mT3u5Lr6yxuosR3QJx1P6rP5ZrDTP9khT30t+HZCbvs5Pq+v/9m6XDmi+NlU7Zuh
Ehy97tL3uBDgoL4b/5BpFL5U9nruPlQzGq1P9jj40dxAaDAX/WLCwFwEEAECAAYF
AlNObS8ACgkQak9cqaePZ1molQf/WYxinFiP38X2HDuzng+krVpQ/H8GMBvrq9i+
jpg2Q/Rhdd/BbLKeYlndcCWdXTLuh9L4Ey98tAxpHJX0pN1XRe/vrEeYHtaKo/M0
1beecsCp9V8WMmbc1SkXM6UG1jzWLN8xKN5mCJrVpD57RlGddxA/XyTqkCl8JhsP
TUtJavACNwzolLJozHIAB0OdRj8S+EvmBb5kcY/9+opaNq4k/uMHt38g2VoKZZIC
G4zXAWe6N/nlCCMhi5iLgf0IrBW5Eqo0pMqnsseB60WJ3WaHkpj73lzxsRq2kW8Z
7PKFGy+5bDXX8qEmtKOvhYtYyrwyJavU52pQeLOwY7chDrzhc8LBXAQQAQoABgUC
WK7LHAAKCRB/GCjHdaJGA/o5D/911ePhusgnrS1BFc+IMZEUijmgJhIQ3JY2Rs8o
pz66vTPlnoa+edOyaAWWQUM10NERCzw6VUo+Ss0IeHQfd+YlGsyakMGGVlzojXVq
NASFQqF9A4vuiVNGqoXlIOdo+RStRtvlj0U779CLUclIOpZGHs68dRsI3K2EmSzj
DDgOlq+SbmEEgSN542qtR7vAMBT+GOah9sVVWY+1+0jPOg4HttiT7yn5p1j9yi2v
DKRjHatGV3Q7sLf1oow+z4XHws6ZPsQZqBMaH5xbJuzHVNq4uNIAqSaWvpbmRMjq
dwfSV8LwJoszZIx09a1vnT103AITUhJxRr6kLbwZ4khSmGgol7vTKGdPd06kyln8
bKLzosHadoM/NQKvzRxao4VZxRvmuLuCIF+Quqbbb830gWDYxdGqvux2iOuiiDKa
lJ/o6ko77qyWsl7hA5L51OG07ZeeHOf38ReUkHcg9cmqdyPY1R0+5upWmcclN11i
qa/QWz5LvFKd4JWbl31rWtSXJJ0QOiSA5ZXjjkbZ08bKDyWl88P3l2bYrh2W+G+h
GiD0Lg9odUCr2m/Url5iiYdtImeTXMxXNQ/9JIzqPaOHgNUMqgbhqDdGqPXOoZPb
2tXx0AcQQa5mW5ve8dmHdCYCe8GLvW1PCuaD73vjhFSV/s7hoR2QW+p7UmgeYqd3
26cEacLAcwQQAQoAHRYhBCoy/gbahgGIctbIPoDB+7VZbdmbBQJbg5y2AAoJEIDB
+7VZbdmbh2IIAK7tjZGvX/axljW2YgcjqN4Dim/ukNa0rBs6m6N1o5msmPYzAxbk
qiwtr05T1v+L4HIE6RO8BvoLEttfij2Gf0V29yL+NOcYOxkqjM3mKNVdO5Oth9mG
fnHEAv/msg/PQ7x24qF2yPxeW0hMcVO24mVN0cQ1s+/D4hMSfE9prPaKhWDcLb/t
0J4lmekULACK4zwHZKKn9YMD3BGcQceJvqMtguNVnxEJZ9STqv6cxMBLpIvnQIHl
XifNno+VNzqo23NRIpVzImV1zlE2prW9+5o4ljELLerHGVSAAzvrIn8t1uo2gc8I
inHk+X7IEcpkMubJXFj6qwuv2TxcdLHdNFDCwXMEEAEKAB0WIQSTSHXCzDNDnepf
7whU7TuPprNXZwUCXOUwzQAKCRBU7TuPprNXZ/h+D/4/cxj/GReBRlWQc16vGVCa
4CAV5yWT2n2ZZvXNYf7Kpx5JD6PDdkLS+r3hlfASn2PeozNPk4Z5g3rqPWioxdML
H3LepPRUoIOnRaKTNko8tPhPuRvOxOEn4SKh6NKQNqc4P6XfCa+26MvNVPaYONQM
5ClaGRwNvBPfLkGIPOUD12nihb4z02u1sFZtOfX8P5nrhadfxjeNKVXZ4RvaJtFr
K5oFef+2DB+BkZULN+L5AY1MmTA/eDiYHS3m2WxnLZE251g8j0BZh/pO6DCSHxNM
AQMqrZW82o6BCItHJFiQvJ6cyoGmaVgYbMMCWtVmlROTm+6QsxNKR7WEymQ8gaDN
p9bPAFHa2MKGgqIUabj+DY32Wz+wNR9g08tl5X/YJO/MARs3LiY+Qy/iqrhp2r7o
1FdnFSewy08D92u0w0EDxz2u96vWcDzxr2s1iXbhkhDIw7UGrJwfUqQ0eOtGGuDB
vNQS2mGttkXTUgYS8t1oAS2qPPpxHj0RVLGU6yanJjJTRUfdvX2a/2vSP6nFN8oM
li0O1pCxbkSTSwX9ltjfzstdg1mj7/l55njcgSMtC4cU8gKz8JxFzhGr2VMp8FoN
QfhmOlCKZRD3apgKBf18GRt928w7avoGERyY//Z6KM1lzoTYzbBRRq2FEfCnEXVw
jYyiMunW5stvFrnOYK+AL8LA8wQQAQoAHRYhBOL4VIJcPEdF1+gNvuUHu/kqCA+W
BQJmWfFNAAoJEOUHu/kqCA+WODQL/juZhOTrLR8n4cKHCwm1MNmtRA1xd9mPtjpI
jXvn/16MDttapukAxXpjfo3sDsL8nAjla0t8WgdYx/MQywI396YZiaNF7nDAoNCD
wnaEP2i2g+vJRDPniR3+dNwZilITfEVwunHkwh9qCq+NgOPYSkqnShVY+EElIHjG
lrqfSeBgBF3kJi2tWjF+ECSr0bk7OHP4LbcksIFfjTq0U24BA63fcpP9ogomNAvn
SaFSumPET7PRX52OJm0JhbZjs97liXe8lkTjtgMWA+S8t62s/DZRSDC6WxPjZzmo
q8izUhr3hrw4kIQl5hD2AJ0sHHXo/b+ME+08qZMpkPFmWMj5YGtvDd6frqNhqW1e
6Q5pnzAJv91sjmYMHKZrGfA0vWY/NcCni0MLpFGVpNiwa+mP4DNNCadU7nt2AfFa
LnO5YBT9AbpoYtOrFh4DxrNo0Wss46+Nd4IBDdCofkb4BdlrP9kCCPmSaHxaau+i
pVHMEzodLsS7KmQt4c6gAQW0dwsbp8KmBBAWCgBOFiEEWN4UGzqiopPVV787DEOU
R/WIRFQFAmcsbe8Fgy7/WQADBQF4JoY8W14+XStbQC5da2FsaVwuc2hcLmNoc2hc
LXNcLmFudG9uPiQAAAoJEAxDlEf1iERUAoUA/iKXsf21IPCffbK/XOovLsAsX/oA
cQ5XYIhVsIvuJMSjAP97o/c5cJSFI511AMIh/DN4Yw7pe6YRvamUB8BlJlLdBA==
=BkpO
-----END PGP PUBLIC KEY BLOCK-----
repos: &postgresql_common_repos
- name: postgresql
uri: "http://apt.postgresql.org/pub/repos/apt"
suite: "{{ ansible_distribution_release }}-pgdg"
components: ["main"]
enabled: true
cleanup:
- /etc/apt/sources.list.d/pgdg.list
- /etc/apt/sources.list.d/pgdg.sources
postgresql_use_official_repo: false
roles:
- roles/vhosts/common/
- roles/vhosts/postgres/
- name: Setup postgres server
hosts: global-homepage.svc.plus
become: true
vars:
group: global-homepage.svc.plus
repo_setup: true
apt_keyrings: *postgresql_common_keyrings
repos: *postgresql_common_repos
postgresql_use_official_repo: false
roles:
- roles/vhosts/common/
- roles/vhosts/postgres/

View File

@ -0,0 +1,8 @@
- name: Deploy PostgreSQL on vhosts
hosts: "{{ postgresql_target | default('postgresql') }}"
become: true
vars:
group: "{{ group | default(postgresql_target | default('postgresql')) }}"
roles:
- roles/vhosts/common/
- roles/vhosts/postgres/

10
deploy_redis_vhosts.yml Normal file
View File

@ -0,0 +1,10 @@
- name: Setup Redis server
hosts: cn-homepage.svc.plus
become: true
roles:
- roles/vhosts/Redis/
- name: Setup Redis server
hosts: global-homepage.svc.plus
become: true
roles:
- roles/vhosts/Redis/

View File

@ -0,0 +1,21 @@
- name: setup tiny monitor server
hosts: cn-homepage.svc.plus
become: true
vars:
group: cn-homepage.svc.plus
roles:
- roles/vhosts/common/
- roles/vhosts/prometheus/
- roles/vhosts/grafana/
- roles/vhosts/nginx/
- name: setup tiny monitor server
hosts: global-homepage.svc.plus
become: true
vars:
group: global-homepage.svc.plus
roles:
- roles/vhosts/common/
- roles/vhosts/prometheus/
- roles/vhosts/grafana/
- roles/vhosts/nginx/

View File

@ -0,0 +1,55 @@
- name: setup otel exporters
hosts: otel.svc.plus
become: true
vars:
group: web
otlp_endpoint: https://otel.svc.plus/api/default/
otlp_auth: "Basic cm9vdEBleGFtcGxlLmNvbTpRN01wRjZBTzZFelRjRjdJ"
otel_prometheus_node_static_configs:
- targets: ['172.31.2.33:9100']
labels:
vendor: aws
account: prod
group: core
name: tky-proxy.svc.plus
iid: '172.31.2.33'
- targets: ['167.179.72.223:9100']
labels:
vendor: aws
account: prod
group: web-system
name: global-homepage.svc.plus
iid: '167.179.72.223'
- targets: ['47.120.61.35:9100']
labels:
vendor: alicloud
account: prod
group: web-system
name: cn-homepage.svc.plus
iid: '47.120.61.35'
otel_prometheus_process_static_configs:
- targets: ['172.31.2.33:9256']
labels:
vendor: aws
account: prod
group: core
name: tky-proxy.svc.plus
iid: '172.31.2.33'
- targets: ['167.179.72.223:9256']
labels:
vendor: aws
account: prod
group: web-system
name: global-homepage.svc.plus
iid: '167.179.72.223'
- targets: ['47.120.61.35:9256']
labels:
vendor: alicloud
account: prod
group: web-system
name: cn-homepage.svc.plus
iid: '47.120.61.35'
exporters:
endpoint: https://otel.svc.plus/api/default/
roles:
- roles/vhosts/otel-collector/

View File

@ -0,0 +1,8 @@
- name: setup xcontrol server
hosts: cn-homepage.svc.plus
become: true
vars:
group: cn-homepage.svc.plus
roles:
- roles/vhosts/common/
- roles/vhosts/xcontrol_server/

8
deploy_xcontrol_web.yml Normal file
View File

@ -0,0 +1,8 @@
- name: setup xcontrol web
hosts: all
become: true
vars:
group: mail
roles:
#- roles/vhosts/common/
- roles/vhosts/nodejs/

View File

@ -0,0 +1,12 @@
- name: setup zitadel
hosts: all
become: true
vars:
zitadel_target_host: auth.svc.plus
zitadel_domain: "{{ domain }}"
zitadel_masterkey: MasterkeyNeedsToHave32Characters
zitadel_workspace: /opt/zitadel
roles:
#- vhosts/common/
- vhosts/docker/
- docker/zitadel/

View File

@ -0,0 +1 @@
ansible-playbook batch_dns_sync.yml --extra-vars "aliyun_ak=XXXX aliyun_sk=YYYY"

15
gpu_k8s_init.yml Normal file
View File

@ -0,0 +1,15 @@
- hosts: all
become: true
vars:
# Use the inventory hostname for delegation so Ansible
# applies the correct connection variables
ops_host: "k8s-1"
masters:
- "k8s-1"
nodes:
- "k8s-2"
- "k8s-3"
roles:
- roles/vhosts/common/
- roles/vhosts/ssh-trust/
- roles/vhosts/gpu-k8s/

13
gpu_k8s_reset.yml Normal file
View File

@ -0,0 +1,13 @@
- hosts: all
become: true
vars:
# Use the inventory hostname for delegation so Ansible
# applies the correct connection variables
ops_host: "k8s-1"
masters:
- "k8s-1"
nodes:
- "k8s-2"
- "k8s-3"
roles:
- roles/vhosts/gpu-k8s-reset/

17
init-harbor-server Normal file
View File

@ -0,0 +1,17 @@
- name: setup harbor
hosts: all
user: root
become: yes
gather_facts: yes
tasks:
- include_role:
name: harbor
vars:
group: master
namespace: harbor
db_namespace: database
update_secret: true
tls:
- secret_name: harbor-tls
keyfile: /etc/ssl/onwalk.net.key
certfile: /etc/ssl/onwalk.net.pem

17
init_chaos_mesh Normal file
View File

@ -0,0 +1,17 @@
- name: setup chaos-mesh server
hosts: all
user: root
become: yes
gather_facts: yes
tasks:
- include_role:
name: chaos-mesh
vars:
group: master
domain: onwalk.net
namespace: chaos-mesh
update_secret: true
tls:
- secret_name: chaos-mesh-tls
keyfile: /etc/ssl/onwalk.net.key
certfile: /etc/ssl/onwalk.net.pem

8
init_chartmuseum Normal file
View File

@ -0,0 +1,8 @@
---
- name: deploy chartmuseum
hosts: all
user: ubuntu
become: yes
gather_facts: yes
roles:
- chartmuseum

16
init_deepflow Normal file
View File

@ -0,0 +1,16 @@
- name: setup deepflow server
hosts: all
user: root
become: yes
gather_facts: yes
tasks:
- include_role:
name: deepflow
vars:
group: master
update_secret: true
namespace: monitoring
tls:
- secret_name: obs-tls
keyfile: /etc/ssl/onwalk.net.key
certfile: /etc/ssl/onwalk.net.pem

16
init_flagger-loadtester Normal file
View File

@ -0,0 +1,16 @@
- name: setup flagger-loadtester server
hosts: all
user: root
become: yes
gather_facts: yes
tasks:
- include_role:
name: flagger-loadtester
vars:
group: master
update_secret: true
namespace: loadtester
tls:
- secret_name: obs-tls
keyfile: /etc/ssl/${DOMAIN}.key
certfile: /etc/ssl/${DOMAIN}.pem

23
init_gitlab Normal file
View File

@ -0,0 +1,23 @@
- name: setup gitlab
hosts: all
user: root
become: yes
gather_facts: yes
tasks:
- include_role:
name: gitlab
vars:
group: master
gitlab_version: '7.0.4'
namespace: gitlab
db_namespace: database
domain: onwalk.net
auto_issuance: false
update_secret: true
tls:
- secret_name: gitlab-tls
keyfile: /etc/ssl/onwalk.net.key
certfile: /etc/ssl/onwalk.net.pem
gitlab_oidc_client_id: gitlab-oidc
gitlab_oidc_isser: 'https://keycloak.onwalk.net/realms/cloud-sso'
gitlab_oidc_redirect_uri: 'https://gitlab.onwalk.net/users/auth/openid_connect/callback'

8
init_grafana_alloy Normal file
View File

@ -0,0 +1,8 @@
---
- name: deploy grafana alloy agent
hosts: all
user: ubuntu
become: yes
gather_facts: yes
roles:
- alloy

8
init_harbor_server Normal file
View File

@ -0,0 +1,8 @@
---
- name: deploy harbor server
hosts: all
user: ubuntu
become: yes
gather_facts: yes
roles:
- harbor

18
init_jenkins Normal file
View File

@ -0,0 +1,18 @@
- name: setup jenkins server
hosts: all
user: root
become: yes
gather_facts: yes
tasks:
- include_role:
name: jenkins
vars:
group: master
domain: onwalk.net
namespace: jenkins
update_secret: true
db_namespace: database
tls:
- secret_name: jenkins-tls
keyfile: /etc/ssl/onwalk.net.key
certfile: /etc/ssl/onwalk.net.pem

8
init_k3s_cluster_agent Normal file
View File

@ -0,0 +1,8 @@
---
- name: Initialize K3s Cluster Agent
hosts: all
user: ubuntu
become: yes
gather_facts: yes
roles:
- k3s-cluster-agent

8
init_k3s_cluster_server Normal file
View File

@ -0,0 +1,8 @@
---
- name: Initialize K3s Cluster Server
hosts: all
user: ubuntu
become: yes
gather_facts: yes
roles:
- k3s-cluster-server

27
init_k3s_cluster_std Normal file
View File

@ -0,0 +1,27 @@
- name: set artifact cluster with vhosts
hosts: all
user: root
become: yes
gather_facts: yes
tasks:
- include_role:
name: k3s-reset
vars:
group: master
cluster_reset: 'enable'
- include_role:
name: k3s
vars:
group: master
cni: default
version: 'v1.27.2+k3s1'
pod_cidr: '10.10.0.0/16'
svc_cidr: '172.16.0.0/16'
enable_api_access: true
- include_role:
name: k3s-addon
vars:
group: master
ingress: nginx
external_dns: enable
cert_issuance: vault

View File

@ -0,0 +1,38 @@
- name: set artifact cluster with vhosts
hosts: all
user: root
become: yes
gather_facts: yes
tasks:
- include_role:
name: k3s-reset
vars:
group: master
cluster_reset: 'enable'
- include_role:
name: k3s
vars:
group: master
cni: default
version: 'v1.27.2+k3s1'
pod_cidr: '10.10.0.0/16'
svc_cidr: '172.16.0.0/16'
enable_api_access: true
- include_role:
name: k3s-addon
vars:
group: master
ingress: disable
external_dns: disable
cert_issuance: vault
- include_role:
name: argo-server
vars:
group: master
namespace: argocd
domain: onwalk.net
update_secret: true
tls:
- secret_name: argocd-server-tls
keyfile: /etc/ssl/onwalk.net.key
certfile: /etc/ssl/onwalk.net.pem

13
init_observability-agent Normal file
View File

@ -0,0 +1,13 @@
- name: setup observability agent
hosts: all
user: root
become: yes
gather_facts: yes
tasks:
- include_role:
name: observability-agent
vars:
group: master
namespace: monitoring
deepflowserverip: 10.146.0.8
deepflowk8sclusterid: d-kqjofXyZbg

29
init_observability-server Normal file
View File

@ -0,0 +1,29 @@
- name: setup observability server
hosts: all
user: root
become: yes
gather_facts: yes
tasks:
- include_role:
name: observability-server
vars:
group: master
update_secret: true
auto_issuance: false
namespace: monitoring
db_namespace: database
tls:
- secret_name: obs-tls
keyfile: /etc/ssl/svc.ink.key
certfile: /etc/ssl/svc.ink.pem
- include_role:
name: flagger-loadtester
vars:
group: master
update_secret: true
auto_issuance: false
namespace: loadtester
tls:
- secret_name: obs-tls
keyfile: /etc/ssl/svc.ink.key
certfile: /etc/ssl/svc.ink.pem

18
init_openldap Normal file
View File

@ -0,0 +1,18 @@
- name: setup openldap
hosts: all
user: root
become: yes
gather_facts: yes
tasks:
- include_role:
name: openldap
vars:
group: master
namespace: itsm
domain: onwalk.net
update_secret: true
auto_issuance: false
tls:
- secret_name: openldap-tls
keyfile: /etc/ssl/onwalk.net.key
certfile: /etc/ssl/onwalk.net.pem

View File

@ -0,0 +1,13 @@
- name: setup splunk otel collector
hosts: all
user: root
become: yes
gather_facts: yes
tasks:
- include_role:
name: splunk-otel-collector
vars:
group: master
namespace: default
splunk_hec_url: https://xxxx.splunkcloud.com:8088/services/collector/event
splunk_hec_token: "token-xxxxxx"

10
init_telegraf Normal file
View File

@ -0,0 +1,10 @@
- name: Setup telegraf
hosts: all
user: root
become: yes
gather_facts: yes
tasks:
- include_role:
name: telegraf
vars:
update_secret: true

8
init_vault Normal file
View File

@ -0,0 +1,8 @@
---
- name: deploy vault server
hosts: all
user: ubuntu
become: yes
gather_facts: yes
roles:
- vault

7
init_vpn_gateway.yml Executable file
View File

@ -0,0 +1,7 @@
---
- hosts: vpn-gateway
user: ubuntu
become: yes
gather_facts: yes
roles:
- wireguard-gateway

32
inventory.ini Normal file
View File

@ -0,0 +1,32 @@
[web]
cn-homepage.svc.plus ansible_host=47.120.61.35
global-homepage.svc.plus ansible_host=167.179.72.223
[deepflow_agents]
192.168.1.101 ansible_user=root ansible_ssh_pass=pass101
192.168.1.102 ansible_user=admin ansible_ssh_pass=pass102
192.168.1.103 ansible_user=root ansible_ssh_pass=pass103 ansible_port=2222
192.168.1.104 ansible_user=ubuntu ansible_ssh_private_key_file=~/.ssh/id_rsa_ubuntu
[mail]
smtp.svc.plus ansible_host=45.130.167.90
[bootstrap]
auth.svc.plus ansible_host=34.92.122.119 ansible_user=root ansible_ssh_private_key_file=~/.ssh/id_rsa
[all:vars]
ansible_port=22
ansible_user=root
ansible_host_key_checking=False
# SSH 密钥或密码(二选一)
# ansible_ssh_private_key_file=~/.ssh/id_rsa
# ansible_ssh_pass=your_password
# DeepFlow agent 配置变量
controller_ips=["10.10.10.10", "10.10.10.11"]
vtap_group_id="g-P22vLIMdB6"
# DeepFlow agent 安装包位置
agent_base_dir="deepflow-agent-for-linux"
agent_package_name="deepflow-agent-1.0-5407.systemd.x86_64.rpm"

7
keycloak_server Normal file
View File

@ -0,0 +1,7 @@
---
- hosts: all
user: ubuntu
become: yes
gather_facts: yes
roles:
- keycloak

48
pre_setup.sh Normal file
View File

@ -0,0 +1,48 @@
#!/bin/bash
# Function to check if a variable is empty
check_empty() {
if [ -z "${!1}" ]; then
echo "$1 is empty. Aborting."
exit 1
fi
}
# List of variables to check
variables=("DNS_AK" "DNS_SK" "OSS_AK" "OSS_SK" "ROOT_PASSWORD" "SMTP_PASSWORD" "GITLAB_OIDC_CLIENT_TOKEN" "HARBOR_OIDC_CLIENT_TOKEN" "SSH_USER" "SSH_HOST_IP" "SSH_HOST_DOMAIN" "SSH_PRIVATE_KEY")
# Loop through variables and check if each one is empty
for var in "${variables[@]}"; do
check_empty "$var"
done
sudo apt install jq ansible -y
mkdir -pv ~/.ssh/
cat > ~/.ssh/id_rsa << EOF
$SSH_PRIVATE_KEY
EOF
sudo chmod 0400 ~/.ssh/id_rsa
md5sum ~/.ssh/id_rsa
mkdir -pv hosts/
cat > hosts/inventory << EOF
[master]
$SSH_HOST_DOMAIN ansible_host=$SSH_HOST_IP
[all:vars]
ansible_port=22
ansible_ssh_user=$SSH_USER
ansible_ssh_private_key_file=~/.ssh/id_rsa
ansible_host_key_checking=False
ingress_ip=$SSH_HOST_IP
dns_ak=$DNS_AK
dns_sk=$DNS_SK
oss_ak=$OSS_AK
oss_sk=$OSS_SK
admin_password=$ROOT_PASSWORD
smtp_password=$SMTP_PASSWORD
gitlab_oidc_client_token=$GITLAB_OIDC_CLIENT_TOKEN
harbor_oidc_client_token=$HARBOR_OIDC_CLIENT_TOKEN
EOF

8
renew_nodes_ssl_certs Normal file
View File

@ -0,0 +1,8 @@
---
- name: renew nodes ssl certs
hosts: all
user: ubuntu
become: yes
gather_facts: yes
roles:
- cert-manager

33
roles/README.md Normal file
View File

@ -0,0 +1,33 @@
# Playbook roles planning
This document clarifies what should live under `/playbooks/roles/` for host-level automation (Ansible) versus what should be delivered through Helm charts, and ensures we cover the five tiers across data platforms: data warehouse → big data → ML → DL → large models.
## Scope rules
- **Ansible roles**: host-coupled configuration that is not itself a cloud resource (GPU driver/runtime, OS tuning, user/SSH prep, rendering on-host config files, database bootstrapping, etc.).
- **Helm charts**: anything that runs as a Kubernetes workload (operators, clusters, services running in pods).
## Base roles shared across tiers (Ansible)
- GPU driver and CUDA stack installation.
- Docker/Containerd runtime setup.
- System parameter tuning (kernel limits, hugepages, network stack), plus user home/SSH layout.
- Database initialization tasks (e.g., bootstrap PostgreSQL/ClickHouse on hosts) and rendering templated configs such as `ClickHouse/users.xml`.
## Coverage by capability tier
| Tier | Host-focused roles (Ansible) | Kubernetes services (Helm) |
| --- | --- | --- |
| Data warehouse | ClickHouse host bootstrap & config render; PostgreSQL init where needed. | — |
| Big data | JVM/runtime, local disks, and OS tuning for data nodes. | Spark Operator; Flink Operator; Kafka/Redpanda; MinIO. |
| ML | GPU runtime base (drivers, container runtime), Python ML base image prep; user workspace/SSH. | Ray Cluster; MLflow; JupyterHub. |
| DL | Same GPU/system tuning plus inference node bootstrap (tensorRT/cuDNN as needed). | Triton Inference Server; LMDeploy (for deployment runtimes). |
| Large models | Secure SSH/user profiles and config templating for model storage/IO. | vLLM serving; model-specific Helm releases atop Ray/K8s. |
## Suggested role layout under `/playbooks/roles/`
- `common/` (new): shared tasks for system tuning, users/SSH, and package repos for GPU/runtime support.
- `gpu/`: install GPU drivers + CUDA toolkit.
- `container_runtime/`: install and configure Docker/Containerd with GPU runtime integration.
- `database_init/`: bootstrap on-host databases (e.g., PostgreSQL, ClickHouse), render config files (`users.xml`, etc.).
- `bigdata_node_prep/`: OS/disk tuning for Spark/Flink/Kafka/Redpanda/MinIO hosts.
- `ml_node_prep/`: Python/conda base, SSH workspace prep for ML workloads.
- `dl_inference_node/`: tensorRT/cuDNN dependencies and runtime checks for Triton/LMDeploy nodes.
Helm-delivered components should live under `playbooks/roles/charts/` or the repos Helm release structure and include Spark/Flink Operators, Kafka/Redpanda/MinIO, Ray Cluster, Triton, vLLM/LMDeploy, MLflow, and JupyterHub.

View File

@ -0,0 +1,2 @@
dependencies:
- role: common

16
roles/charts/app/tasks/main.yml Executable file
View File

@ -0,0 +1,16 @@
- name: Prep DIR
shell: "mkdir -pv /tmp/app/"
- name: Prep NameSpace
shell: "kubectl create namespace default || echo true"
- name: Sync Deploy yaml
template: src=templates/{{ item }} dest=/tmp/app/{{ item }} owner=root group=root mode=0644 force=yes unsafe_writes=yes
with_items:
- deploy-app.yaml
- name: Setup App
shell: "kubectl apply -f /tmp/app/{{ item }}"
when: inventory_hostname in groups[group]
with_items:
- deploy-app.yaml

2
roles/charts/app/templates/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
/clickhouse-keeper-k8s.iml
/.idea/

View File

@ -0,0 +1,18 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: app
spec:
replicas: 1
selector:
matchLabels:
app: demo
template:
metadata:
labels:
app: demo
spec:
containers:
- name: demo
image: {{ app_image }}:{{ app_tag }}
imagePullPolicy: Always

View File

@ -0,0 +1,100 @@
#!/bin/bash
# 检查参数是否为空
check_not_empty() {
if [[ -z $1 ]]; then
echo "Error: $2 is empty. Please provide a value."
exit 1
fi
}
helm repo add argo https://argoproj.github.io/argo-helm
helm repo update
# 使用 Helm 部署 Argo CD
#helm upgrade --install argocd argo/argo-cd -n argocd --create-namespace
cat <<EOF > values.yaml
global:
domain: argocd.onwalk.net
server:
service:
type: ClusterIP
servicePortHttp: 80
servicePortHttps: 443
servicePortHttpName: http
servicePortHttpsName: https
ingress:
enabled: false
ingressClassName: "nginx"
hostname: argocd.onwalk.net
annotations:
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
nginx.ingress.kubernetes.io/backend-protocol: "HTTP"
tls: true
repoServer:
extraContainers:
- name: helmfile
image: ghcr.io/helmfile/helmfile:v0.157.0
# Entrypoint should be Argo CD lightweight CMP server i.e. argocd-cmp-server
command: ["/var/run/argocd/argocd-cmp-server"]
env:
- name: HELM_CACHE_HOME
value: /tmp/helm/cache
- name: HELM_CONFIG_HOME
value: /tmp/helm/config
- name: HELMFILE_CACHE_HOME
value: /tmp/helmfile/cache
- name: HELMFILE_TEMPDIR
value: /tmp/helmfile/tmp
securityContext:
runAsNonRoot: true
runAsUser: 999
volumeMounts:
- mountPath: /var/run/argocd
name: var-files
- mountPath: /home/argocd/cmp-server/plugins
name: plugins
# Register helmfile plugin into sidecar
- mountPath: /home/argocd/cmp-server/config/plugin.yaml
subPath: helmfile.yaml
name: argocd-cmp-cm
# Starting with v2.4, do NOT mount the same tmp volume as the repo-server container. The filesystem separation helps mitigate path traversal attacks.
- mountPath: /tmp
name: helmfile-tmp
volumes:
- name: argocd-cmp-cm
configMap:
name: argocd-cmp-cm
- name: helmfile-tmp
emptyDir: {}
configs:
cmp:
create: true
plugins:
helmfile:
allowConcurrency: true
discover:
fileName: helmfile.yaml
generate:
command:
- bash
- "-c"
- |
if [[ -v ENV_NAME ]]; then
helmfile -n "$ARGOCD_APP_NAMESPACE" -e $ENV_NAME template --include-crds -q
elif [[ -v ARGOCD_ENV_ENV_NAME ]]; then
helmfile -n "$ARGOCD_APP_NAMESPACE" -e "$ARGOCD_ENV_ENV_NAME" template --include-crds -q
else
helmfile -n "$ARGOCD_APP_NAMESPACE" template --include-crds -q
fi
lockRepo: false
EOF
helm upgrade --install argocd argo/argo-cd -n argocd -f values.yaml
# 等待 Argo CD 完全启动
echo "Waiting for Argo CD to be ready..."
kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=argocd-server -n argocd --timeout=180s
echo "Argo CD deployment and configuration complete."

View File

@ -0,0 +1,2 @@
dependencies:
- role: cert-manager

View File

@ -0,0 +1,2 @@
- name: Set ArgoCD Contoller
script: files/setup-argocd.sh

View File

@ -0,0 +1,24 @@
#!/bin/bash
set -x
export domain=$1
export secret=$2
export namespace=$3
cat > values.yaml << EOF
chaosDaemon:
runtime: containerd
socketPath: /run/k3s/containerd/containerd.sock
dashboard:
create: true
ingress:
enabled: true
ingressClassName: "nginx"
hosts:
- name: chaos-mesh.$domain
tls: true
tlsSecret: $secret
EOF
helm repo add chaos-mesh https://charts.chaos-mesh.org
helm repo update
helm upgrade --install chaos-mesh chaos-mesh/chaos-mesh -n $namespace --create-namespace --version 2.6.3 -f values.yaml

View File

@ -0,0 +1,124 @@
# Jenkins Mater 部署
# Jenkins Node IaC Runner 设置
1. 安装git terraform
## GitLab to trigger Jenkins
1. Gitlab https://gitlab.xxx.com/-/profile/personal_access_tokens
2. GitLab和Jenkins的集成可以让你在GitLab中的代码更新后自动触发Jenkins的构建任务。以下是配置GitLab插件和Jenkins以实现GitLab触发Jenkins的步骤
3. 在Jenkins中安装GitLab插件
首先你需要在Jenkins中安装GitLab插件。登录到Jenkins的管理界面然后转到“Manage Jenkins” > “Manage Plugins” > “Available”在搜索框中输入“GitLab”找到并安装“GitLab Plugin”。
4. 在Jenkins中配置GitLab连接
安装完插件后你需要配置GitLab的连接。转到“Manage Jenkins” > “Configure System”滚动到“GitLab”部分点击“Add GitLab Server” > “Server”输入你的GitLab服务器URL并生成并输入一个与你的GitLab账户相关联的API Token。
5. 在Jenkins中创建一个新的任务
创建一个新的任务并在源代码管理部分选择“Git”输入你的GitLab项目的URL。在构建触发器部分选择“Build when a change is pushed to GitLab”。
记录:GitLab webhook URL: https://jenkins.xxx.xxx/project/alicloud-oss-pipeline
6. 在GitLab中配置Webhook
在你的GitLab项目中转到“Settings” > “Integrations” -> 启用"Jenkins"
- 在URL中输入步骤5记录的 Webhook URL https://jenkins.xxx.xxx/project/alicloud-oss-pipeline
- 选择你想要触发Jenkins任务的事件例如当代码被推送时
- Project name: 输入项目名称
- Username: Jenkins 用户名
- Password: Jenkins 认证密码
- 保存更改, 测试设置返回状态200为配置正确
以上就是配置GitLab插件和Jenkins以实现GitLab触发Jenkins的步骤。在完成这些步骤后每当你的GitLab项目有更新时都会自动触发对应的Jenkins构建任务。
## 要将GitHub代码仓库与Jenkins关联起来您需要完成以下步骤
1 要在 GitHub 中启用 webhook 功能以触发 Jenkins 构建,请按照以下步骤操作:
2 进入 GitHub 仓库设置:在要设置 webhook 的 GitHub 仓库页面上点击右上角的“Settings”。
3 选择 Webhooks 选项在仓库设置页面的左侧菜单中选择“Webhooks”。
4 添加 Webhook在 Webhooks 页面的右上角点击“Add webhook”。
配置 Webhook
1. Payload URL输入 Jenkins 服务器的 webhook URL。格式应为 http://your-jenkins-server/github-webhook/。确保替换 your-jenkins-server 为您 Jenkins 服务器的实际地址。
2. Content type选择 application/json。
3. Secret可选如果需要额外的安全性可以输入一个秘密令牌。
4. SSL verification选择是否验证 SSL 证书。
5. Which events would you like to trigger this webhook?:选择触发 webhook 的事件。通常选择 Just the push event只有推送事件或 Let me select individual events让我选择单独的事件并选择适当的事件例如push、pull request 等)。
添加 Webhook点击页面底部的“Add webhook”按钮以保存配置。
完成以上步骤后,您的 GitHub 仓库就配置好了一个 webhook可以触发 Jenkins 构建。记得在 Jenkins 中设置相应的任务来响应这些 webhook。
安装Jenkins插件
确保您的Jenkins实例已经安装了“GitHub”和“GitHub Integration”插件。您可以在Jenkins管理界面的“插件管理”部分进行安装。
配置GitHub Webhook
在GitHub仓库的设置中找到“Webhooks”部分并添加一个新的Webhook。
将“Payload URL”设置为您的Jenkins服务器的URL通常是这样的格式http://<JENKINS_URL>/github-webhook/。
选择触发Webhook的事件通常是“Just the push event”或者“Send me everything”。
确保“Content type”设置为“application/json”。
点击“Add webhook”保存设置。
配置Jenkins Job
在Jenkins中创建一个新的构建任务或者配置现有的任务。
在“源码管理”部分选择“Git”并填写您的GitHub仓库的URL。
在“构建触发器”部分选择“GitHub hook trigger for GITScm polling”选项。这样每当GitHub仓库有新的推送事件时Jenkins就会自动触发构建。
测试配置:
推送一些改动到您的GitHub仓库检查是否触发了Jenkins构建。
在Jenkins的构建历史中查看构建是否成功执行。
通过完成以上步骤您的GitHub代码仓库就与Jenkins关联起来了可以实现自动触发构建的功能。
要在 Jenkins 中设置 GitHub 服务,您需要进行以下步骤:
安装 GitHub 插件:首先确保您的 Jenkins 实例已安装 GitHub 插件。如果尚未安装,请转到 Jenkins 的“插件管理”页面,在“可选插件”选项卡中搜索并安装 GitHub 插件。
配置 GitHub 服务器:在 Jenkins 管理界面中,转到“系统管理” > “系统设置”。
在系统设置页面中找到并点击“GitHub”部分。
点击“Add GitHub Server”添加一个新的 GitHub 服务器配置。
在配置页面中输入一个描述性的名称例如“GitHub”。
在 GitHub API URL 中输入 GitHub 的 API 地址。通常为 https://api.github.com。
如果您的 GitHub 仓库需要身份验证请在“凭据”部分选择一个已配置的凭据。如果尚未配置凭据请点击“Add”添加一个新的凭据选择类型为“Secret text”或“Username with password”然后输入您的 GitHub 用户名和密码或访问令牌。
完成配置后,点击“保存”保存 GitHub 服务器配置。
验证配置您可以在配置页面的底部点击“Test connection”来验证您的 GitHub 服务器配置是否正常工作。
保存设置:确保在完成配置后点击“保存”保存更改。
现在,您已成功配置了 Jenkins 的 GitHub 服务。您可以在 Jenkins 任务中使用这个配置来与 GitHub 仓库进行集成,例如触发构建、拉取代码等操作。
对于 Jenkins 中的 GitHub API URL (https://api.github.com) 的凭据设置,您可以使用 GitHub Personal Access Token。这个 Token 可以通过以下步骤生成:
在 GitHub 上登录您的账号。
点击页面右上角的头像选择“Settings”。
在左侧边栏中点击“Developer settings”。
在左侧边栏中点击“Personal access tokens”。
点击“Generate new token”。
输入一个描述性的名称,选择需要的权限(至少需要 repo 权限来访问仓库然后点击“Generate token”。
复制生成的 Token并保存到一个安全的地方。请注意这个 Token 只会显示一次,如果您丢失了,请重新生成一个新的 Token。
在 Jenkins 中使用这个 Token 作为 GitHub API URL (https://api.github.com) 的凭据时,您可以将 Token 添加为 Jenkins 的凭据:
进入 Jenkins 管理界面,转到“凭据” > “系统”。
在“系统”页面中点击“Global credentials (unrestricted)”。
在凭据页面中点击“Add credentials”。
在“Kind”下拉菜单中选择“Secret text”。
在“Secret”框中粘贴您在 GitHub 上生成的 Personal Access Token。
输入一个描述性的名称并点击“OK”保存凭据。
现在,您可以在 Jenkins 的配置中使用这个凭据来访问 GitHub API (https://api.github.com)。
确保 Docker 已安装:在 Jenkins 代理节点上确认 Docker 已正确安装并配置。您可以通过在终端中执行 docker --version 命令来检查 Docker 是否可用。
检查 Docker 环境:如果 Docker 已安装,请确保 Docker 服务正在运行。您可以使用 sudo systemctl status docker 命令检查 Docker 服务的状态。
确认 Jenkins 全局工具配置:在 Jenkins 管理界面中,转到“系统管理”->“全局工具配置”,确保 Docker 工具已正确配置。如果未配置,您可以添加一个 Docker 工具,并指定正确的安装路径。
重启 Jenkins 服务:在进行了上述更改后,尝试重启 Jenkins 服务,以确保新的配置生效。
尝试在终端中执行 Docker 命令:在 Jenkins 代理节点上打开终端,尝试手动执行一些 Docker 命令(如 docker pull看看是否能够正常执行
要设置 Jenkins Docker 流水线,你可以按照以下步骤进行操作:
前提条件
确保你的 Jenkins 实例已经安装了以下插件:
Docker Pipeline
Docker Commons

View File

@ -0,0 +1,2 @@
dependencies:
- role: secret-manger

View File

@ -0,0 +1,4 @@
- name: Setup chaos-mesh Server
script: files/setup.sh {{ domain }} {{ item.secret_name }} {{ namespace }}
when: inventory_hostname in groups[group] and ( tls is defined)
loop: "{{ tls }}"

View File

@ -0,0 +1,37 @@
#!/bin/bash
domain=$1
namespace=$2
admin_password=$3
secret_name=$4
storage_type=$5
cat > values.yaml << EOF
env:
open:
STORAGE: local
DISABLE_API: false
AUTH_ANONYMOUS_GET: true
secret:
BASIC_AUTH_USER: admin
BASIC_AUTH_PASS: '$admin_password'
ingress:
enabled: true
hosts:
- name: charts.$domain
path: /
tls: true
tlsSecret: $secret_name
ingressClassName: nginx
persistence:
enabled: true
accessMode: ReadWriteOnce
size: 8Gi
path: /storage
storageClass: "local-path"
EOF
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
helm repo add chartmuseum https://chartmuseum.github.io/charts
helm repo update
helm upgrade --install chartmuseum chartmuseum/chartmuseum -f values.yaml -n $namespace

View File

@ -0,0 +1,2 @@
dependencies:
- role: secret-manger

View File

@ -0,0 +1,4 @@
- name: Setup Chartmuseum Server
script: files/setup.sh {{ domain }} {{ namespace }} {{ admin_password }} {{ item.secret_name }}
loop: "{{ tls }}"
when: inventory_hostname in groups[group]

View File

@ -0,0 +1,8 @@
group: master
namespace: harbor
storage_type: oss
update_secret: true
tls:
- secret_name: chartmuseum-tls
keyfile: /etc/ssl/onwalk.net.key
certfile: /etc/ssl/onwalk.net.pem

View File

@ -0,0 +1,2 @@
dependencies:
- role: secret-manger

View File

@ -0,0 +1,48 @@
- name: Prep DIR
shell: "mkdir -pv /tmp/clickhouse-cluster/ && mkdir -pv /tmp/qryn"
- name: Prep NameSpace
shell: "kubectl create namespace monitoring || echo true"
- name: sync clickhouse deploy yaml
template: src=templates/{{ item }} dest=/tmp/{{ item }} owner=root group=root mode=0644 force=yes unsafe_writes=yes
with_items:
- clickhouse-cluster/clickhouse-config.yaml
- clickhouse-cluster/clickhouse-service.yaml
- clickhouse-cluster/clickhouse-user-config.yaml
- clickhouse-cluster/clickhouse-statefulset.yml
- postsetup.sh
- name: Setup ClickHouse Server
shell: "cd /tmp/clickhouse-cluster && kubectl apply -f ."
when: inventory_hostname in groups[group]
#- name: Post Setup ClickHouse Server
# shell: "cd /tmp/ && sh postsetup.sh"
# when: inventory_hostname in groups[group]
- name: get clickhouse node ip
shell: " kubectl get pods -n monitoring -o wide | grep -E '^clickhouse-' | awk '{print $6}' "
register: ck_node_ip_raw
when: inventory_hostname in groups[group][0]
- name: Check if ck_node_ip_raw is not empty
fail:
msg: "ck_node_ip_raw is empty, terminating the playbook."
when: ck_node_ip_raw.stdout_lines | length == 0
- name: set fact join command for ck_node_ip
set_fact:
ck_node_ip : "{{ ck_node_ip_raw.stdout_lines[0] }}"
when: inventory_hostname in groups[group][0]
- name: sync clickhouse deploy yaml
template: src=templates/{{ item }} dest=/tmp/{{ item }} owner=root group=root mode=0644 force=yes unsafe_writes=yes
with_items:
- qryn/qryn-deployment.yaml
- qryn/qryn-service.yaml
- qryn/qryn-ingress.yaml
- name: Setup Qryn Server
shell: "cd /tmp/qryn && kubectl apply -f ."
when: inventory_hostname in groups[group]

View File

@ -0,0 +1,2 @@
/clickhouse-keeper-k8s.iml
/.idea/

View File

@ -0,0 +1,94 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: clickhouse-config
namespace: monitoring
data:
keeper.xml: |
<?xml version="1.0"?>
<yandex>
<listen_host>0.0.0.0</listen_host>
<logger>
<level>trace</level>
<console>1</console>
</logger>
<openSSL>
<server>
<certificateFile remove="1"/>
<privateKeyFile remove="1"/>
</server>
</openSSL>
<keeper_server>
<tcp_port>2181</tcp_port>
<server_id from_env="CK_INDEX"/>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
<coordination_settings>
<operation_timeout_ms>10000</operation_timeout_ms>
<session_timeout_ms>30000</session_timeout_ms>
<raft_logs_level>trace</raft_logs_level>
<rotate_log_storage_interval>10000</rotate_log_storage_interval>
</coordination_settings>
<raft_configuration>
<server>
<id>0</id>
<hostname>clickhouse-0.clickhouse-service.monitoring</hostname>
<port>9444</port>
</server>
<server>
<id>1</id>
<hostname>clickhouse-1.clickhouse-service.monitoring</hostname>
<port>9444</port>
</server>
<server>
<id>2</id>
<hostname>clickhouse-2.clickhouse-service.monitoring</hostname>
<port>9444</port>
</server>
</raft_configuration>
</keeper_server>
<zookeeper>
<node>
<host>clickhouse-0.clickhouse-service.monitoring</host>
<port>2181</port>
</node>
<node>
<host>clickhouse-1.clickhouse-service.monitoring</host>
<port>2181</port>
</node>
<node>
<host>clickhouse-2.clickhouse-service.monitoring</host>
<port>2181</port>
</node>
</zookeeper>
</yandex>
cluster.xml: |
<?xml version="1.0"?>
<yandex>
<remote_servers>
<testcluster>
<shard>
<replica>
<host>clickhouse-0.clickhouse-service.monitoring</host>
<port>9000</port>
</replica>
</shard>
<shard>
<replica>
<host>clickhouse-1.clickhouse-service.monitoring</host>
<port>9000</port>
</replica>
</shard>
</testcluster>
</remote_servers>
</yandex>
macros.xml: |
<?xml version="1.0" ?>
<yandex>
<macros>
<cluster>testcluster</cluster>
<replica from_env="HOSTNAME"/>
<shard>1</shard>
</macros>
</yandex>

View File

@ -0,0 +1,18 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: clickhouse
namespace: monitoring
spec:
ingressClassName: nginx
rules:
- host: clickhouse.{{ domain }}
http:
paths:
- backend:
service:
name: clickhouse-service
port:
number: 8123
path: /
pathType: Prefix

View File

@ -0,0 +1,23 @@
kind: Service
apiVersion: v1
metadata:
labels:
app: clickhouse
name: clickhouse-service
namespace: monitoring
spec:
ports:
- name: rest
port: 8123
- name: keeper
port: 2181
- name: replica-a
port: 9000
- name: replica-b
port: 9009
- name: raft
port: 9444
clusterIP: None
selector:
app: clickhouse

View File

@ -0,0 +1,103 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: clickhouse
namespace: monitoring
spec:
selector:
matchLabels:
app: clickhouse
serviceName: clickhouse-service
replicas: 3
podManagementPolicy: "Parallel"
# podManagementPolicy: OrderedReady
template:
metadata:
labels:
app: clickhouse
spec:
containers:
- name: clickhouse
image: clickhouse/clickhouse-server:22.4.5
imagePullPolicy: IfNotPresent
workingDir: /
command:
- /bin/bash
- -c
- |-
export CK_INDEX=${HOSTNAME##*-}
echo CK_INDEX=${CK_INDEX}
./entrypoint.sh
env:
- name: HOSTNAME
valueFrom:
fieldRef:
fieldPath: metadata.name
ports:
- name: rest
containerPort: 8123
- name: keeper
containerPort: 2181
- name: replica-a
containerPort: 9000
- name: replica-b
containerPort: 9009
- name: raft
containerPort: 9444
volumeMounts:
- name: clickhouse-config
mountPath: /etc/clickhouse-server/config.d/
- name: clickhouse-user-config
mountPath: /etc/clickhouse-server/users.d/
- name: clickhouse-meta
mountPath: /var/lib/clickhouse/coordination/
- name: clickhouse-data
mountPath: /var/lib/clickhouse/
volumes:
- name: clickhouse-config
configMap:
name: clickhouse-config
items:
- key: keeper.xml
path: keeper.xml
- key: cluster.xml
path: cluster.xml
- key: macros.xml
path: macros.xml
- name: clickhouse-user-config
configMap:
name: clickhouse-user-config
items:
- key: user.xml
path: user.xml
volumeClaimTemplates:
- apiVersion: v1
kind: PersistentVolumeClaim
metadata:
labels:
app.kubernetes.io/component: clickhouse
app.kubernetes.io/instance: clickhouse
app.kubernetes.io/name: clickhouse
name: clickhouse-meta
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
volumeMode: Filesystem
- apiVersion: v1
kind: PersistentVolumeClaim
metadata:
labels:
app.kubernetes.io/component: clickhouse
app.kubernetes.io/instance: clickhouse
app.kubernetes.io/name: clickhouse
name: clickhouse-data
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 5Gi
volumeMode: Filesystem

View File

@ -0,0 +1,19 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: clickhouse-user-config
namespace: monitoring
data:
user.xml: |
<?xml version="1.0"?>
<yandex>
<profiles>
<default>
<max_memory_usage>10000000000</max_memory_usage>
<max_distributed_depth>4000</max_distributed_depth>
<distributed_connections_pool_size>4096</distributed_connections_pool_size>
<max_distributed_connections>4096</max_distributed_connections>
<load_balancing>random</load_balancing>
</default>
</profiles>
</yandex>

View File

@ -0,0 +1,142 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: otel-collector-config
namespace: default
data:
config.yaml: |
receivers:
loki:
use_incoming_timestamp: true
protocols:
http:
endpoint: 0.0.0.0:3100
grpc:
endpoint: 0.0.0.0:3200
syslog:
protocol: rfc5424
tcp:
listen_address: "0.0.0.0:5514"
fluentforward:
endpoint: 0.0.0.0:24224
splunk_hec:
endpoint: 0.0.0.0:8088
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318
jaeger:
protocols:
grpc:
endpoint: 0.0.0.0:14250
thrift_http:
endpoint: 0.0.0.0:14268
zipkin:
endpoint: 0.0.0.0:9411
skywalking:
protocols:
grpc:
endpoint: 0.0.0.0:11800
http:
endpoint: 0.0.0.0:12800
prometheus:
config:
scrape_configs:
- job_name: 'otel-collector'
scrape_interval: 5s
static_configs:
- targets: ['exporter:8080']
influxdb:
endpoint: 0.0.0.0:8086
connectors:
servicegraph:
latency_histogram_buckets: [ 100us, 1ms, 2ms, 6ms, 10ms, 100ms, 250ms ]
dimensions: [ cluster, namespace ]
store:
ttl: 2s
max_items: 1000
cache_loop: 2m
store_expiration_loop: 2s
virtual_node_peer_attributes:
- db.name
- rpc.service
spanmetrics:
namespace: span.metrics
exemplars:
enabled: false
dimensions_cache_size: 1000
aggregation_temporality: 'AGGREGATION_TEMPORALITY_CUMULATIVE'
metrics_flush_interval: 30s
metrics_expiration: 5m
events:
enabled: false
processors:
batch:
send_batch_size: 10000
timeout: 5s
memory_limiter:
check_interval: 2s
limit_mib: 1800
spike_limit_mib: 500
resourcedetection/system:
detectors: ['system']
system:
hostname_sources: ['os']
resource:
attributes:
- key: service.name
value: "serviceName"
action: upsert
metricstransform:
transforms:
- include: calls_total
action: update
new_name: traces_spanmetrics_calls_total
- include: latency
action: update
new_name: traces_spanmetrics_latency
exporters:
qryn:
dsn: tcp://clickhouse-server:9000/qryn?username=default&password=*************
timeout: 10s
sending_queue:
queue_size: 100
retry_on_failure:
enabled: true
initial_interval: 5s
max_interval: 30s
max_elapsed_time: 300s
logs:
format: raw
otlp/spanmetrics:
endpoint: localhost:4317
tls:
insecure: true
extensions:
health_check:
pprof:
zpages:
service:
extensions: [pprof, zpages, health_check]
pipelines:
logs:
receivers: [fluentforward, otlp, loki, syslog, splunk_hec]
processors: [memory_limiter, resourcedetection/system, resource, batch]
exporters: [qryn]
traces:
receivers: [otlp, jaeger, zipkin, skywalking]
processors: [memory_limiter, resourcedetection/system, resource, batch]
exporters: [qryn, spanmetrics, servicegraph]
metrics:
receivers: [prometheus, influxdb, spanmetrics, servicegraph]
processors: [memory_limiter, resourcedetection/system, resource, batch]
exporters: [qryn]

View File

@ -0,0 +1,42 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: otel-collector
namespace: default
labels:
app: otel-collector
spec:
replicas: 1
selector:
matchLabels:
app: otel-collector
template:
metadata:
labels:
app: otel-collector
spec:
containers:
- name: otel-collector
image: ghcr.io/metrico/qryn-otel-collector:latest
volumeMounts:
- name: config
mountPath: /etc/otel
subPath: config.yaml
ports:
- containerPort: 3100
- containerPort: 3200
- containerPort: 8088
- containerPort: 5514
- containerPort: 24224
- containerPort: 4317
- containerPort: 4318
- containerPort: 14250
- containerPort: 14268
- containerPort: 9411
- containerPort: 11800
- containerPort: 12800
- containerPort: 8086
volumes:
- name: config
configMap:
name: otel-collector-config

View File

@ -0,0 +1,19 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: otel-collector-ingress
namespace: default
annotations:
nginx.ingress.kubernetes.io/rewrite-target: /
spec:
rules:
- host: your-domain.example.com
http:
paths:
- path: /api
pathType: Prefix
backend:
service:
name: otel-collector
port:
number: 3100

View File

@ -0,0 +1,48 @@
apiVersion: v1
kind: Service
metadata:
name: otel-collector
namespace: default
spec:
ports:
- port: 3100
targetPort: 3100
protocol: TCP
- port: 3200
targetPort: 3200
protocol: TCP
- port: 8088
targetPort: 8088
protocol: TCP
- port: 5514
targetPort: 5514
protocol: TCP
- port: 24224
targetPort: 24224
protocol: TCP
- port: 4317
targetPort: 4317
protocol: TCP
- port: 4318
targetPort: 4318
protocol: TCP
- port: 14250
targetPort: 14250
protocol: TCP
- port: 14268
targetPort: 14268
protocol: TCP
- port: 9411
targetPort: 9411
protocol: TCP
- port: 11800
targetPort: 11800
protocol: TCP
- port: 12800
targetPort: 12800
protocol: TCP
- port: 8086
targetPort: 8086
protocol: TCP
selector:
app: otel-collector

View File

@ -0,0 +1,27 @@
#!/bin/bash
#检查 ClickHouse 版本
#clickhouse-client --version | grep -q "21.8"
#if [ $? -ne 0 ]; then
#echo "ClickHouse 的版本必须至少为 21.8"
#exit 1
#fi
创建数据库
for db in deepflow_system event ext_metrics flow_log flow_metrics flow_tag profile; do
clickhouse-client -u admin -p admin -q "CREATE DATABASE $db"
done
创建用户
clickhouse-client -u admin -p admin -q "CREATE USER admin IDENTIFIED WITH PLAINTEXT_PASSWORD BY 'admin'"
clickhouse-client -u admin -p admin -q "CREATE USER deepflow IDENTIFIED WITH PLAINTEXT_PASSWORD BY 'deepflow'"
授权账户
clickhouse-client -u admin -p admin -q "GRANT ALL ON . TO admin"
clickhouse-client -u admin -p admin -q "GRANT SELECT ON deepflow_system.* TO deepflow"
clickhouse-client -u admin -p admin -q "GRANT SELECT ON event.* TO deepflow"
clickhouse-client -u admin -p admin -q "GRANT SELECT ON ext_metrics.* TO deepflow"
clickhouse-client -u admin -p admin -q "GRANT SELECT ON flow_log.* TO deepflow"
clickhouse-client -u admin -p admin -q "GRANT SELECT ON flow_metrics.* TO deepflow"
clickhouse-client -u admin -p admin -q "GRANT SELECT ON flow_tag.* TO deepflow"
clickhouse-client -u admin -p admin -q "GRANT SELECT ON profile.* TO deepflow"

View File

@ -0,0 +1,36 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: qryn
namespace: monitoring
labels:
io.metrico.service: qryn
spec:
replicas: 2
selector:
matchLabels:
io.metrico.service: qryn
strategy: {}
template:
metadata:
annotations:
qryn.cmd: qryn.dev
creationTimestamp: null
labels:
io.metrico.service: qryn
spec:
containers:
- env:
- name: CLICKHOUSE_AUTH
value: "default"
- name: CLICKHOUSE_PORT
value: "8123"
- name: CLICKHOUSE_SERVER
value: "{{ hostvars[groups[group][0]].ck_node_ip }}"
image: qxip/qryn
name: qryn
ports:
- containerPort: 3100
resources: {}
restartPolicy: Always
status: {}

View File

@ -0,0 +1,24 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: data-gateway
namespace: monitoring
annotations:
nginx.ingress.kubernetes.io/ssl-redirect: "true"
spec:
ingressClassName: nginx
rules:
- host: data-gateway.{{ domain }}
http:
paths:
- backend:
service:
name: qryn
port:
number: 3100
path: /
pathType: Prefix
tls:
- hosts:
- data-gateway.{{ domain }}
secretName: obs-tls

View File

@ -0,0 +1,15 @@
apiVersion: v1
kind: Service
metadata:
creationTimestamp: null
labels:
io.metrico.service: qryn
name: qryn
namespace: monitoring
spec:
ports:
- name: "3100"
port: 3100
targetPort: 3100
selector:
io.metrico.service: qryn

View File

@ -0,0 +1,12 @@
# 统计存储数据
select formatReadableSize(sum(rows)) as "每天写入行数", formatReadableSize(sum(bytes_on_disk)) as "每天落盘的字节", formatReadableSize(sum(data_uncompressed_bytes)) as "压缩前字节", sum(data_uncompressed_bytes)/sum(bytes_on_disk) as "压缩比", sum(rows)/86400 as "平均每秒写入的行数" from cluster(df_cluster, system.parts) where partition like '%2024-12-03%' limit 10;
可以grafana再 查下确认下,流日志的统计:
select min(partition),max(partition),formatReadableSize(sum(rows)) as "每天写入行数", formatReadableSize(sum(bytes_on_disk)) as "每天落盘的字节", formatReadableSize(sum(data_uncompressed_bytes)) as "压缩前字节", sum(data_uncompressed_bytes)/sum(bytes_on_disk) as "压缩比", sum(rows)/86400 as "平均每秒写入的行数" from cluster(df_cluster, system.parts) where partition like '%2024-12-03%' and table='l4_flow_log_local' limit 10;
调用日志的统计:
select min(partition),max(partition),formatReadableSize(sum(rows)) as "每天写入行数", formatReadableSize(sum(bytes_on_disk)) as "每天落盘的字节", formatReadableSize(sum(data_uncompressed_bytes)) as "压缩前字节", sum(data_uncompressed_bytes)/sum(bytes_on_disk) as "压缩比", sum(rows)/86400 as "平均每秒写入的行数" from cluster(df_cluster, system.parts) where partition like '%2024-12-03%' and table='l7_flow_log_local' limit 10;

View File

@ -0,0 +1,7 @@
sudo apt-get install -y apt-transport-https ca-certificates curl gnupg
curl -fsSL 'https://packages.clickhouse.com/rpm/lts/repodata/repomd.xml.key' | sudo gpg --dearmor -o /usr/share/keyrings/clickhouse-keyring.gpg
echo "deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb stable main" | sudo tee \
/etc/apt/sources.list.d/clickhouse.list
sudo apt-get update
sudo apt-get install -y clickhouse-client

View File

@ -0,0 +1,6 @@
#!/bin/bash
export namespace=$1
export MYSQL_ROOT_PASSWORD=$(kubectl get secret --namespace $namespace mysql -o jsonpath="{.data.mysql-root-password}" | base64 -d)
kubectl run mysql-client --rm --tty -i --restart='Never' --image docker.io/bitnami/mysql:8.0.32-debian-11-r14 --namespace $namespace --env MYSQL_ROOT_PASSWORD=$MYSQL_ROOT_PASSWORD --command -- bash -c "mysql -h mysql.database.svc.cluster.local -uroot -p$MYSQL_ROOT_PASSWORD -e 'create database IF NOT EXISTS jenkins;'"

View File

@ -0,0 +1,29 @@
#!/bin/bash
set -x
export domain=$1
export secret=$2
export namespace=$3
cat << EOF > values-custom.yaml
clickhouse:
enabled: true
server:
enabled: true
deepflow-agent:
enabled: true
grafana:
enabled: true
service:
ingress:
enabled: true
ingressClassName: nginx
hosts:
- grafana.onwalk.net
tls:
- secretName: obs-tls
hosts:
- grafana.onwalk.net
EOF
helm repo add deepflow https://deepflowio.github.io/deepflow
helm repo update deepflow # use `helm repo update` when helm < 3.7.0
helm upgrade --install deepflow -n monitoring deepflow/deepflow --create-namespace --version 6.4.9 -f values-custom.yaml

View File

@ -0,0 +1,2 @@
dependencies:
- role: secret-manger

View File

@ -0,0 +1,19 @@
#- name: get mysql db password
# shell: 'kubectl get secret --namespace database mysql -o jsonpath="{.data.mysql-root-password}" | base64 -d'
# register: mysql_db_password_raw
# when: inventory_hostname in groups[group][0]
#
#- name: set fact join command
# set_fact:
# mysql_db_password : "{{ mysql_db_password_raw.stdout }}"
# when: inventory_hostname in groups[group][0]
#
#- name: DB Pre Setup for Jenkins Server
# script: files/pre-setup.sh {{ db_namespace }}
# when: inventory_hostname in groups[group]
# script: files/setup.sh {{ domain }} {{ item.secret_name }} {{ namespace }} {{ mysql_db_password }}
- name: Setup Deepflow Cluster
script: files/setup.sh {{ domain }} {{ item.secret_name }} {{ namespace }}
when: inventory_hostname in groups[group] and ( tls is defined)
loop: "{{ tls }}"

View File

@ -0,0 +1,3 @@
# embedding-service (chart)
Placeholder role for managing the Helm chart release of embedding-service.

View File

@ -0,0 +1,5 @@
---
# TODO: implement Helm release tasks
- name: Placeholder task
debug:
msg: "Role placeholder. Implement Helm release tasks."

View File

@ -0,0 +1,3 @@
# feast (chart)
Placeholder role for managing the Helm chart release of feast.

Some files were not shown because too many files have changed in this diff Show More