232 lines
9.8 KiB
YAML
232 lines
9.8 KiB
YAML
---
|
||
# 03-03 Traefik Dashboard + ACME(HelmChartConfig 合并版)
|
||
# 与 03-02 共用同一 Traefik HelmChartConfig 资源名(traefik);勿在无协调下交替 full 验证二者——后 apply 者覆盖前者。
|
||
#
|
||
- name: Deploy 03-03 Traefik Dashboard + ACME (HelmChartConfig)
|
||
hosts: k3s_server
|
||
become: true
|
||
run_once: true
|
||
vars:
|
||
k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml
|
||
manifest_src: "{{ playbook_dir }}/../../files/03-03/traefik-dashboard-acme.yaml"
|
||
manifest_dest: /tmp/traefik-dashboard-acme.yaml
|
||
acme_email: "{{ lookup('env', 'ACME_EMAIL') | default('', true) }}"
|
||
cf_api_token: "{{ lookup('env', 'CF_API_TOKEN') | default('', true) }}"
|
||
tasks:
|
||
- name: "Gate - skip apply when ACME_EMAIL missing"
|
||
when: acme_email | trim == ""
|
||
ansible.builtin.include_role:
|
||
name: verify_common
|
||
tasks_from: gate-debug-end-play.yml
|
||
vars:
|
||
verify_gate_message: "[GATE] skipped doc_id=03-03 reason=missing_env missing=ACME_EMAIL"
|
||
|
||
- name: Copy manifest
|
||
ansible.builtin.copy:
|
||
src: "{{ manifest_src }}"
|
||
dest: "{{ manifest_dest }}"
|
||
mode: "0644"
|
||
|
||
- name: Replace ACME email placeholder
|
||
ansible.builtin.replace:
|
||
path: "{{ manifest_dest }}"
|
||
regexp: "<YOUR_REAL_EMAIL>"
|
||
replace: "{{ acme_email | trim }}"
|
||
|
||
- name: Enable ACME staging CA when ACME_CA_STAGING=1
|
||
when: (lookup('env', 'ACME_CA_STAGING') | default('0', true) | trim) == "1"
|
||
ansible.builtin.replace:
|
||
path: "{{ manifest_dest }}"
|
||
regexp: '^\s*# - "--certificatesresolvers\.cloudflare\.acme\.caserver=https://acme-staging-v02\.api\.letsencrypt\.org/directory".*$'
|
||
replace: ' - "--certificatesresolvers.cloudflare.acme.caserver=https://acme-staging-v02.api.letsencrypt.org/directory"'
|
||
|
||
- name: Ensure Cloudflare API token Secret before Traefik ACME apply
|
||
when: (cf_api_token | trim | length) > 0
|
||
ansible.builtin.include_role:
|
||
name: verify_common
|
||
tasks_from: ensure-cloudflare-api-token-secret.yml
|
||
vars:
|
||
verify_cf_api_token: "{{ cf_api_token | trim }}"
|
||
|
||
- name: Apply manifest
|
||
ansible.builtin.shell: |
|
||
set -e
|
||
KUBECONFIG={{ k3s_kubeconfig }} kubectl apply -f {{ manifest_dest }}
|
||
args:
|
||
executable: /bin/bash
|
||
changed_when: true
|
||
|
||
- name: Verify 03-03 Traefik Dashboard + ACME (rollout + dashboard http)
|
||
hosts: k3s_server
|
||
become: true
|
||
run_once: true
|
||
vars:
|
||
k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml
|
||
acme_email: "{{ lookup('env', 'ACME_EMAIL') | default('', true) }}"
|
||
cf_api_token: "{{ lookup('env', 'CF_API_TOKEN') | default('', true) }}"
|
||
_traefik_dash_url_env: "{{ lookup('env', 'TRAEFIK_DASHBOARD_VERIFY_URL') | default('', true) | trim }}"
|
||
traefik_dashboard_probe_url: "{{ _traefik_dash_url_env if (_traefik_dash_url_env | length > 0) else ('http://' ~ k3s_server_ip ~ '/dashboard/') }}"
|
||
tasks:
|
||
- name: "Gate - skip verify when ACME_EMAIL missing"
|
||
when: acme_email | trim == ""
|
||
ansible.builtin.include_role:
|
||
name: verify_common
|
||
tasks_from: gate-debug-end-play.yml
|
||
vars:
|
||
verify_gate_message: "[GATE] skipped doc_id=03-03 reason=missing_env missing=ACME_EMAIL"
|
||
|
||
- name: Ensure Cloudflare token Secret from CF_API_TOKEN (real-pass)
|
||
when: (cf_api_token | trim | length) > 0
|
||
ansible.builtin.include_role:
|
||
name: verify_common
|
||
tasks_from: ensure-cloudflare-api-token-secret.yml
|
||
vars:
|
||
verify_cf_api_token: "{{ cf_api_token | trim }}"
|
||
|
||
- name: Check cloudflare-api-token secret exists
|
||
ansible.builtin.shell: |
|
||
set -euo pipefail
|
||
KUBECONFIG={{ k3s_kubeconfig }} kubectl -n kube-system get secret cloudflare-api-token
|
||
args:
|
||
executable: /bin/bash
|
||
changed_when: false
|
||
register: cloudflare_secret_check
|
||
failed_when: false
|
||
|
||
- name: Gate - no CF_API_TOKEN and secret missing
|
||
when: cloudflare_secret_check.rc != 0 and (cf_api_token | trim | length) == 0
|
||
ansible.builtin.include_role:
|
||
name: verify_common
|
||
tasks_from: gate-debug-end-play.yml
|
||
vars:
|
||
verify_gate_message: "[GATE] skipped doc_id=03-03 reason=missing_dependency missing=cloudflare-api-token skip_scope=traefik-dashboard-acme"
|
||
|
||
- name: Fail when secret missing but CF_API_TOKEN was set
|
||
when: cloudflare_secret_check.rc != 0 and (cf_api_token | trim | length) > 0
|
||
ansible.builtin.fail:
|
||
msg: "已设置 CF_API_TOKEN 但 cloudflare-api-token Secret 仍不可用,请检查 apiserver 权限与命名空间 kube-system"
|
||
|
||
# 与 03-02 Verify 一致:旧 RS pending termination 时 rollout 可能永久卡住;scale 重置会短暂影响入口。
|
||
- name: Unstick Traefik deployment via scale down/up (kube-system)
|
||
ansible.builtin.shell: |
|
||
set -euo pipefail
|
||
export KUBECONFIG={{ k3s_kubeconfig }}
|
||
echo "[OC-ASSERT] assertion=traefik_rollout_unblock phase=scale_reset doc_id=03-03"
|
||
kubectl scale deployment traefik -n kube-system --replicas=0
|
||
for i in $(seq 1 90); do
|
||
rep=$(kubectl get deployment traefik -n kube-system -o jsonpath='{.status.replicas}' 2>/dev/null || echo 1)
|
||
[ "${rep:-1}" = "0" ] && break
|
||
sleep 2
|
||
done
|
||
for sel in "app.kubernetes.io/name=traefik" "app.kubernetes.io/instance=traefik"; do
|
||
kubectl get pods -n kube-system -l "$sel" -o name 2>/dev/null | while read -r p; do
|
||
[ -z "$p" ] && continue
|
||
kubectl delete "$p" -n kube-system --grace-period=0 --force --ignore-not-found=true || true
|
||
done
|
||
done
|
||
{ kubectl get pods -n kube-system --no-headers -o custom-columns=:metadata.name 2>/dev/null | grep -E '^traefik-[0-9a-f]+-' || true; } | while read -r n; do
|
||
[ -z "$n" ] && continue
|
||
kubectl delete pod "$n" -n kube-system --grace-period=0 --force --ignore-not-found=true || true
|
||
done
|
||
kubectl scale deployment traefik -n kube-system --replicas=1
|
||
sleep 3
|
||
args:
|
||
executable: /bin/bash
|
||
changed_when: true
|
||
failed_when: false
|
||
|
||
- name: Rollout status traefik (kube-system)
|
||
ansible.builtin.include_role:
|
||
name: verify_common
|
||
tasks_from: kubectl-rollout-status.yml
|
||
vars:
|
||
verify_rollout_ref: deployment/traefik
|
||
verify_rollout_namespace: kube-system
|
||
verify_rollout_timeout_s: 600
|
||
|
||
# deployment spec.replicas=0 时 kubectl rollout status 也会“成功”,需显式等到 Pod Ready
|
||
- name: Wait for traefik Pod Ready (kube-system)
|
||
ansible.builtin.shell: |
|
||
set -euo pipefail
|
||
export KUBECONFIG={{ k3s_kubeconfig }}
|
||
kubectl wait --for=condition=ready pod \
|
||
-l app.kubernetes.io/name=traefik,app.kubernetes.io/instance=traefik-kube-system \
|
||
-n kube-system --timeout=180s
|
||
args:
|
||
executable: /bin/bash
|
||
changed_when: false
|
||
|
||
- name: HTTP probe Traefik Dashboard via TRAEFIK_DASHBOARD_VERIFY_URL (control 机)
|
||
when: _traefik_dash_url_env | length > 0
|
||
ansible.builtin.uri:
|
||
url: "{{ traefik_dashboard_probe_url }}"
|
||
method: GET
|
||
follow_redirects: all
|
||
status_code: [200]
|
||
timeout: 15
|
||
register: traefik_03_03_dashboard_http
|
||
changed_when: false
|
||
delegate_to: localhost
|
||
become: false
|
||
|
||
- name: OC3 summary for Traefik Dashboard HTTP (external URL)
|
||
when: _traefik_dash_url_env | length > 0
|
||
ansible.builtin.debug:
|
||
msg: "[OC-ASSERT] assertion=traefik_03_03_dashboard_http phase=http probe=uri status={{ traefik_03_03_dashboard_http.status | default('') }} url={{ traefik_dashboard_probe_url }}"
|
||
|
||
- name: HTTP probe Traefik Dashboard (port-forward traefik Pod,试 web 容器端口 8000/8080)
|
||
when: _traefik_dash_url_env | length == 0
|
||
ansible.builtin.shell: |
|
||
set -euo pipefail
|
||
export KUBECONFIG={{ k3s_kubeconfig }}
|
||
POD=$(kubectl get pods -n kube-system \
|
||
-l 'app.kubernetes.io/name=traefik,app.kubernetes.io/instance=traefik-kube-system' \
|
||
-o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true)
|
||
test -n "$POD"
|
||
local_port=$(shuf -i 32000-32767 -n 1)
|
||
ok=0
|
||
for cport in 8000 8080 80; do
|
||
kubectl port-forward -n kube-system "pod/$POD" "${local_port}:${cport}" >/tmp/traefik-03-03-pf.log 2>&1 &
|
||
PF_PID=$!
|
||
trap 'kill $PF_PID 2>/dev/null || true' EXIT
|
||
for i in $(seq 1 20); do
|
||
grep -q "Forwarding from" /tmp/traefik-03-03-pf.log 2>/dev/null && break
|
||
sleep 1
|
||
done
|
||
if curl -sfL --connect-timeout 3 --max-time 12 -o /dev/null "http://127.0.0.1:${local_port}/dashboard/" 2>/dev/null; then
|
||
ok=1
|
||
echo "[OC-ASSERT] assertion=traefik_03_03_dashboard_http phase=http probe=port_forward status=200 pod_port=${cport} local_port=${local_port}"
|
||
break
|
||
fi
|
||
kill $PF_PID 2>/dev/null || true
|
||
trap - EXIT
|
||
wait $PF_PID 2>/dev/null || true
|
||
done
|
||
test "$ok" = "1"
|
||
args:
|
||
executable: /bin/bash
|
||
changed_when: false
|
||
|
||
- name: Teardown 03-03 Traefik Dashboard + ACME (optional)
|
||
hosts: k3s_server
|
||
become: true
|
||
run_once: true
|
||
vars:
|
||
k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml
|
||
verify_teardown: "{{ (VERIFY_TEARDOWN | default('1')) | string }}"
|
||
acme_email: "{{ lookup('env', 'ACME_EMAIL') | default('', true) }}"
|
||
manifest_dest: /tmp/traefik-dashboard-acme.yaml
|
||
tasks:
|
||
- name: Skip teardown when gated
|
||
when: acme_email | trim == ""
|
||
meta: end_play
|
||
|
||
- name: Delete resources when VERIFY_TEARDOWN=1
|
||
when: verify_teardown == "1"
|
||
ansible.builtin.shell: |
|
||
set -e
|
||
KUBECONFIG={{ k3s_kubeconfig }} kubectl delete -f {{ manifest_dest }} --ignore-not-found=true
|
||
args:
|
||
executable: /bin/bash
|
||
changed_when: true
|