fix(k3s): purge stuck external-dns release state

This commit is contained in:
Haitao Pan 2026-04-04 17:09:37 +08:00
parent 78bc356655
commit e8515003f3
12 changed files with 166 additions and 7 deletions

View File

@ -1,6 +1,6 @@
# acp_codex
Minimal Codex ACP deployment role.
Codex ACP deployment role with a public XWorkmate ACP Web endpoint.
Installs:
@ -9,5 +9,12 @@ Installs:
Exposes:
- `codex app-server --listen ws://127.0.0.1:9001`
- `https://acp-server-codex.svc.plus`
- raw Codex upstream: `codex app-server --listen ws://127.0.0.1:9001`
- public ACP Web server: `127.0.0.1:9010` via `xworkmate-go-core serve`
- public HTTPS endpoint: `https://acp-server-codex.svc.plus`
Notes:
- Caddy terminates TLS and proxies the public domain to the Go ACP server.
- The Go ACP server serves `/acp` and `/acp/rpc`.
- `ACP_ALLOWED_ORIGINS` defaults to `https://xworkmate.svc.plus,http://localhost:*,http://127.0.0.1:*`.

View File

@ -5,6 +5,19 @@ acp_codex_service_group: root
acp_codex_workdir: /root
acp_codex_listen_host: 127.0.0.1
acp_codex_listen_port: 9001
acp_codex_bridge_service_name: xworkmate-codex-acp-bridge
acp_codex_bridge_binary_path: /usr/local/bin/xworkmate-go-core
acp_codex_bridge_local_source_dir: "{{ playbook_dir }}/../xworkmate/go/go_core"
acp_codex_bridge_local_build_dir: "{{ playbook_dir }}/.artifacts/acp_codex"
acp_codex_bridge_local_binary_path: "{{ acp_codex_bridge_local_build_dir }}/xworkmate-go-core"
acp_codex_bridge_build_goos: linux
acp_codex_bridge_build_goarch: amd64
acp_codex_bridge_listen_host: 127.0.0.1
acp_codex_bridge_listen_port: 9010
acp_codex_bridge_allowed_origins:
- https://xworkmate.svc.plus
- http://localhost:*
- http://127.0.0.1:*
acp_codex_domain: acp-server-codex.svc.plus
acp_codex_caddyfile_path: /etc/caddy/Caddyfile
acp_codex_caddy_conf_dir: /etc/caddy/conf.d

View File

@ -8,3 +8,8 @@
ansible.builtin.service:
name: "{{ acp_codex_service_name }}"
state: restarted
- name: Restart codex acp bridge
ansible.builtin.service:
name: "{{ acp_codex_bridge_service_name }}"
state: restarted

View File

@ -1,4 +1,32 @@
---
- name: Ensure local Codex ACP build directory exists
ansible.builtin.file:
path: "{{ acp_codex_bridge_local_build_dir }}"
state: directory
mode: "0755"
delegate_to: localhost
become: false
- name: Build XWorkmate Go ACP server locally
ansible.builtin.command:
cmd: go build -o "{{ acp_codex_bridge_local_binary_path }}" .
chdir: "{{ acp_codex_bridge_local_source_dir }}"
environment:
GOOS: "{{ acp_codex_bridge_build_goos }}"
GOARCH: "{{ acp_codex_bridge_build_goarch }}"
CGO_ENABLED: "0"
delegate_to: localhost
become: false
- name: Upload XWorkmate Go ACP server binary
ansible.builtin.copy:
src: "{{ acp_codex_bridge_local_binary_path }}"
dest: "{{ acp_codex_bridge_binary_path }}"
owner: root
group: root
mode: "0755"
notify: Restart codex acp bridge
- name: Deploy Caddy main file
ansible.builtin.template:
src: Caddyfile.j2
@ -26,6 +54,15 @@
mode: "0644"
notify: Restart codex app server
- name: Deploy XWorkmate Codex ACP bridge service
ansible.builtin.template:
src: xworkmate-codex-acp-bridge.service.j2
dest: "/etc/systemd/system/{{ acp_codex_bridge_service_name }}.service"
owner: root
group: root
mode: "0644"
notify: Restart codex acp bridge
- name: Reload systemd manager configuration
ansible.builtin.systemd:
daemon_reload: true
@ -41,3 +78,9 @@
name: "{{ acp_codex_service_name }}"
enabled: true
state: started
- name: Ensure XWorkmate Codex ACP bridge service is enabled and running
ansible.builtin.systemd:
name: "{{ acp_codex_bridge_service_name }}"
enabled: true
state: started

View File

@ -7,6 +7,10 @@
ansible.builtin.import_tasks: config.yml
tags: [acp_codex, acp_codex_config]
- name: Flush Codex ACP handlers before validation
ansible.builtin.meta: flush_handlers
tags: [acp_codex, acp_codex_config, acp_codex_validate]
- name: Validate Codex ACP readiness
ansible.builtin.import_tasks: validate.yml
tags: [acp_codex, acp_codex_validate]

View File

@ -8,16 +8,51 @@
register: acp_codex_ss
changed_when: false
- name: Validate local Codex ACP bridge HTTP endpoint
ansible.builtin.uri:
url: "http://{{ acp_codex_bridge_listen_host }}:{{ acp_codex_bridge_listen_port }}/acp/rpc"
method: POST
body_format: json
body:
jsonrpc: "2.0"
id: 1
method: acp.capabilities
params: {}
return_content: true
status_code: 200
register: acp_codex_bridge_http
- name: Validate local Codex ACP bridge CORS preflight
ansible.builtin.uri:
url: "http://{{ acp_codex_bridge_listen_host }}:{{ acp_codex_bridge_listen_port }}/acp/rpc"
method: OPTIONS
headers:
Origin: "{{ acp_codex_bridge_allowed_origins[0] }}"
Access-Control-Request-Method: POST
return_content: true
status_code: 204
register: acp_codex_bridge_preflight
- name: Show Codex ACP status
ansible.builtin.command: systemctl status "{{ acp_codex_service_name }}" --no-pager
register: acp_codex_status
changed_when: false
failed_when: false
- name: Show Codex ACP bridge status
ansible.builtin.command: systemctl status "{{ acp_codex_bridge_service_name }}" --no-pager
register: acp_codex_bridge_status
changed_when: false
failed_when: false
- name: Show Codex ACP validation summary
ansible.builtin.debug:
msg:
- "Codex domain: {{ acp_codex_domain }}"
- "Listener: {{ acp_codex_listen_host }}:{{ acp_codex_listen_port }}"
- "Upstream listener: {{ acp_codex_listen_host }}:{{ acp_codex_listen_port }}"
- "Public ACP listener: {{ acp_codex_bridge_listen_host }}:{{ acp_codex_bridge_listen_port }}"
- "Service: {{ acp_codex_status.stdout | default('N/A') }}"
- "Bridge service: {{ acp_codex_bridge_status.stdout | default('N/A') }}"
- "Socket: {{ acp_codex_ss.stdout | default('N/A') }}"
- "Bridge capabilities HTTP: {{ acp_codex_bridge_http.content | default('N/A') }}"
- "Bridge preflight allow-origin: {{ acp_codex_bridge_preflight.access_control_allow_origin | default('N/A') }}"

View File

@ -1,3 +1,3 @@
{{ acp_codex_domain }} {
reverse_proxy {{ acp_codex_listen_host }}:{{ acp_codex_listen_port }}
reverse_proxy {{ acp_codex_bridge_listen_host }}:{{ acp_codex_bridge_listen_port }}
}

View File

@ -0,0 +1,20 @@
[Unit]
Description=XWorkmate Codex ACP bridge server
After=network-online.target {{ acp_codex_service_name }}.service
Wants=network-online.target
[Service]
Type=simple
User=root
Group=root
WorkingDirectory={{ acp_codex_workdir }}
Environment=HOME={{ acp_codex_workdir }}
Environment=TERM=xterm-256color
Environment=ACP_LISTEN_ADDR={{ acp_codex_bridge_listen_host }}:{{ acp_codex_bridge_listen_port }}
Environment=ACP_ALLOWED_ORIGINS={{ acp_codex_bridge_allowed_origins | join(',') }}
ExecStart={{ acp_codex_bridge_binary_path }} serve --listen {{ acp_codex_bridge_listen_host }}:{{ acp_codex_bridge_listen_port }}
Restart=always
RestartSec=2
[Install]
WantedBy=multi-user.target

View File

@ -1,6 +1,6 @@
# acp_opencode
Minimal OpenCode ACP deployment role.
OpenCode web endpoint deployment role.
Installs:
@ -10,3 +10,8 @@ Exposes:
- `opencode serve --hostname 127.0.0.1 --port 38992 --print-logs`
- `https://acp-server-opencode.svc.plus`
Notes:
- This role exposes the OpenCode web UI, not the XWorkmate ACP JSON-RPC endpoint.
- Validation checks assert an HTML response marker so the role does not get confused with the Codex ACP bridge role.

View File

@ -7,6 +7,8 @@ acp_opencode_workdir: /home/ubuntu/.opencode
acp_opencode_listen_host: 127.0.0.1
acp_opencode_listen_port: 38992
acp_opencode_domain: acp-server-opencode.svc.plus
acp_opencode_expected_content_type: text/html
acp_opencode_expected_body_marker: opencode-theme-id
acp_opencode_caddyfile_path: /etc/caddy/Caddyfile
acp_opencode_caddy_conf_dir: /etc/caddy/conf.d
acp_opencode_caddy_fragment_path: /etc/caddy/conf.d/acp-server-opencode.caddy

View File

@ -8,6 +8,14 @@
register: acp_opencode_ss
changed_when: false
- name: Validate OpenCode local HTTP endpoint
ansible.builtin.uri:
url: "http://{{ acp_opencode_listen_host }}:{{ acp_opencode_listen_port }}/"
method: GET
return_content: true
status_code: 200
register: acp_opencode_http
- name: Show OpenCode ACP status
ansible.builtin.command: systemctl status "{{ acp_opencode_service_name }}" --no-pager
register: acp_opencode_status
@ -21,3 +29,5 @@
- "Listener: {{ acp_opencode_listen_host }}:{{ acp_opencode_listen_port }}"
- "Service: {{ acp_opencode_status.stdout | default('N/A') }}"
- "Socket: {{ acp_opencode_ss.stdout | default('N/A') }}"
- "HTTP content-type: {{ acp_opencode_http.content_type | default('N/A') }}"
- "HTTP body marker present: {{ acp_opencode_expected_body_marker in (acp_opencode_http.content | default('')) }}"

View File

@ -17,6 +17,21 @@
external-dns
release_name="{{ k3s_platform_values.components.externalDns.releaseName }}"
release_secret_prefix="sh.helm.release.v1.${release_name}.v"
clear_stuck_release_state() {
kubectl -n platform get secret -o name \
| grep "^secret/${release_secret_prefix}" \
| xargs -r kubectl -n platform delete
}
if helm status "$release_name" -n platform >/tmp/external-dns-status.log 2>&1; then
if grep -q "STATUS: pending-install" /tmp/external-dns-status.log || \
grep -q "another operation (install/upgrade/rollback) is in progress" /tmp/external-dns-status.log; then
clear_stuck_release_state
fi
fi
if ! helm upgrade --install "$release_name" "$chart_dir/external-dns" \
--namespace platform \
--create-namespace \
@ -25,7 +40,7 @@
--timeout 10m; then
if helm status "$release_name" -n platform >/tmp/external-dns-status.log 2>&1; then
if grep -q "another operation (install/upgrade/rollback) is in progress" /tmp/external-dns-status.log; then
helm uninstall "$release_name" -n platform --wait --timeout 5m || true
clear_stuck_release_state
helm upgrade --install "$release_name" "$chart_dir/external-dns" \
--namespace platform \
--create-namespace \