--- - name: Deploy 03-02 Traefik ACME (HelmChartConfig) hosts: k3s_server become: true run_once: true vars: k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml manifest_src: "{{ playbook_dir }}/../../files/03-02/traefik-acme.yaml" manifest_dest: /tmp/traefik-acme.yaml acme_email: "{{ lookup('env', 'ACME_EMAIL') | default('', true) }}" cf_api_token: "{{ lookup('env', 'CF_API_TOKEN') | default('', true) }}" tasks: - name: "Gate - skip apply when ACME_EMAIL missing" when: acme_email | trim == "" ansible.builtin.include_role: name: verify_common tasks_from: gate-debug-end-play.yml vars: verify_gate_message: "[GATE] skipped doc_id=03-02 reason=missing_env missing=ACME_EMAIL" - name: Copy manifest ansible.builtin.copy: src: "{{ manifest_src }}" dest: "{{ manifest_dest }}" mode: "0644" - name: Replace ACME email placeholder ansible.builtin.replace: path: "{{ manifest_dest }}" regexp: "" replace: "{{ acme_email | trim }}" - name: Enable ACME staging CA when ACME_CA_STAGING=1 when: (lookup('env', 'ACME_CA_STAGING') | default('0', true) | trim) == "1" ansible.builtin.replace: path: "{{ manifest_dest }}" regexp: '^\s*# - "--certificatesresolvers\.cloudflare\.acme\.caserver=https://acme-staging-v02\.api\.letsencrypt\.org/directory".*$' replace: ' - "--certificatesresolvers.cloudflare.acme.caserver=https://acme-staging-v02.api.letsencrypt.org/directory"' - name: Ensure Cloudflare API token Secret before Traefik ACME apply when: (cf_api_token | trim | length) > 0 ansible.builtin.include_role: name: verify_common tasks_from: ensure-cloudflare-api-token-secret.yml vars: verify_cf_api_token: "{{ cf_api_token | trim }}" - name: Apply manifest ansible.builtin.shell: | set -e KUBECONFIG={{ k3s_kubeconfig }} kubectl apply -f {{ manifest_dest }} args: executable: /bin/bash changed_when: true - name: Deploy or cleanup nginx matrix TLS (opt-in) hosts: k3s_server become: true run_once: true vars: k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml _nginx_matrix_tls_enable: "{{ nginx_matrix_tls_enable | default((lookup('env', 'NGINX_MATRIX_TLS_ENABLE') | default('', true) | trim | lower in ['true', '1', 'yes']) | bool) }}" nginx_matrix_tls_enabled: "{{ _nginx_matrix_tls_enable | bool }}" manifests_path: "{{ playbook_dir }}/../../files/03-02" tls_domains: - test01.jackadam.top - test02.jackadam.top - test03.jackadam.top - test04.jackadam.top pre_tasks: - name: Gate - skip nginx matrix TLS when nginx_matrix_tls_enable=false when: not nginx_matrix_tls_enabled block: - ansible.builtin.debug: msg: "[SKIP] optional doc_id=03-02 action=nginx-matrix-tls var=nginx_matrix_tls_enable" - meta: end_play tasks: - name: Deploy nginx matrix TLS (mode=deploy) when: (mode | default('deploy')) == 'deploy' block: - name: Ensure manifests path exists (controller repo path) ansible.builtin.stat: path: "{{ manifests_path }}" register: manifests_stat delegate_to: localhost become: false run_once: true - name: Fail if manifests not found ansible.builtin.fail: msg: "manifests 未找到: {{ manifests_path }},请从仓库根目录或 ansible 同级执行" when: not manifests_stat.stat.exists delegate_to: localhost become: false run_once: true - name: Ensure control-plane label on k3s_server nodes (for M1) ansible.builtin.shell: | KUBECONFIG={{ k3s_kubeconfig }} kubectl label node {{ item }} node-role.kubernetes.io/control-plane= --overwrite loop: "{{ groups['k3s_server'] | default([]) }}" - name: Ensure worker label on k3s_worker nodes (for M3) ansible.builtin.shell: | KUBECONFIG={{ k3s_kubeconfig }} kubectl label node {{ item }} node-role.kubernetes.io/worker= --overwrite loop: "{{ groups['k3s_worker'] | default([]) }}" - name: Copy nginx matrix TLS manifests to server ansible.builtin.copy: src: "{{ manifests_path }}/" dest: /tmp/nginx-matrix-tls/ mode: "0644" - name: Delete non-TLS nginx matrix if present (deployments, ingress, ingressroute, middleware, configmaps) ansible.builtin.shell: | KUBECONFIG={{ k3s_kubeconfig }} kubectl delete deployment,svc -n default nginx-m1 nginx-m2 nginx-m3 nginx-m4 --ignore-not-found=true KUBECONFIG={{ k3s_kubeconfig }} kubectl delete ingress -n default nginx-m1 nginx-m3 --ignore-not-found=true KUBECONFIG={{ k3s_kubeconfig }} kubectl delete ingressroute -n default nginx-m2 nginx-m4 --ignore-not-found=true KUBECONFIG={{ k3s_kubeconfig }} kubectl delete middleware -n default stripprefix-m1 stripprefix-m2 stripprefix-m3 stripprefix-m4 --ignore-not-found=true KUBECONFIG={{ k3s_kubeconfig }} kubectl delete configmap -n default nginx-m1-html nginx-m2-html nginx-m3-html nginx-m4-html --ignore-not-found=true register: del_non_tls changed_when: "'deleted' in del_non_tls.stdout" - name: kubectl apply nginx matrix TLS + HTTP-only ansible.builtin.shell: KUBECONFIG={{ k3s_kubeconfig }} kubectl apply -f /tmp/nginx-matrix-tls/ -R register: k8s_apply changed_when: "'configured' in k8s_apply.stdout or 'created' in k8s_apply.stdout" - name: Restart nginx deployments so pods pick up ConfigMap (M1~M4 标识) ansible.builtin.shell: KUBECONFIG={{ k3s_kubeconfig }} kubectl rollout restart deployment nginx-m1 nginx-m2 nginx-m3 nginx-m4 -n default changed_when: true - name: Wait for nginx rollouts stable after ConfigMap restart ansible.builtin.shell: | set -euo pipefail KCFG={{ k3s_kubeconfig | quote }} export KUBECONFIG="$KCFG" for dep in nginx-m1 nginx-m2; do echo "[OC-ASSERT] assertion=nginx_matrix_tls_rollout deployment=${dep} timeout=60s" kubectl rollout status "deployment/$dep" -n default --timeout=60s done for dep in nginx-m3 nginx-m4; do echo "[OC-ASSERT] assertion=nginx_matrix_tls_rollout deployment=${dep} timeout=120s" kubectl rollout status "deployment/$dep" -n default --timeout=120s done args: executable: /bin/bash changed_when: false - name: Verify nginx matrix TLS resources ansible.builtin.shell: KUBECONFIG={{ k3s_kubeconfig }} kubectl get pod,svc,ing,ingressroute -n default -o wide register: verify_tls changed_when: false - name: ">>> nginx matrix TLS 资源" ansible.builtin.debug: msg: "{{ item }}" loop: "{{ verify_tls.stdout_lines }}" - name: HTTP curl 验证(HTTP-only:16 个目标,所有节点 × 4 域名) ansible.builtin.shell: | bases="{{ groups['k3s_nodes'] | map('extract', hostvars) | map(attribute='ansible_host') | join(' ') }}" count=0 ok=0 echo "=== 16 个目标 (4 节点 × 4 域名) HTTP ===" echo "节点 M1(test01) M2(test02) M3(test03) M4(test04)" for base in $bases; do m1=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 http://test01.jackadam.top/ --resolve "test01.jackadam.top:80:$base" 2>/dev/null) || m1="fail" m2=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 http://test02.jackadam.top/ --resolve "test02.jackadam.top:80:$base" 2>/dev/null) || m2="fail" m3=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 http://test03.jackadam.top/ --resolve "test03.jackadam.top:80:$base" 2>/dev/null) || m3="fail" m4=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 http://test04.jackadam.top/ --resolve "test04.jackadam.top:80:$base" 2>/dev/null) || m4="fail" printf "%-12s %-14s %-14s %-14s %s\n" "$base" "$m1" "$m2" "$m3" "$m4" for c in $m1 $m2 $m3 $m4; do count=$((count+1)); [ "$c" = "200" ] && ok=$((ok+1)); done done echo "---" echo "共验证 $count 个目标,$ok 个返回 200" changed_when: false failed_when: false - name: HTTPS curl 验证(16 个目标:所有节点 × 4 域名,所有节点均为入口点) ansible.builtin.shell: | bases="{{ groups['k3s_nodes'] | map('extract', hostvars) | map(attribute='ansible_host') | join(' ') }}" count=0 ok=0 echo "=== 16 个目标 (4 节点 × 4 域名) HTTPS ===" echo "节点 M1(test01) M2(test02) M3(test03) M4(test04)" for base in $bases; do m1=$(curl -sk -o /dev/null -w "%{http_code}" --connect-timeout 5 https://test01.jackadam.top/ --resolve "test01.jackadam.top:443:$base" 2>/dev/null) || m1="fail" m2=$(curl -sk -o /dev/null -w "%{http_code}" --connect-timeout 5 https://test02.jackadam.top/ --resolve "test02.jackadam.top:443:$base" 2>/dev/null) || m2="fail" m3=$(curl -sk -o /dev/null -w "%{http_code}" --connect-timeout 5 https://test03.jackadam.top/ --resolve "test03.jackadam.top:443:$base" 2>/dev/null) || m3="fail" m4=$(curl -sk -o /dev/null -w "%{http_code}" --connect-timeout 5 https://test04.jackadam.top/ --resolve "test04.jackadam.top:443:$base" 2>/dev/null) || m4="fail" printf "%-12s %-14s %-14s %-14s %s\n" "$base" "$m1" "$m2" "$m3" "$m4" for c in $m1 $m2 $m3 $m4; do count=$((count+1)); [ "$c" = "200" ] && ok=$((ok+1)); done done echo "---" echo "共验证 $count 个目标,$ok 个返回 200" changed_when: false failed_when: false - name: Cleanup nginx matrix TLS (mode=cleanup) when: (mode | default('deploy')) == 'cleanup' block: - name: Delete nginx matrix TLS + HTTP-only resources (deployments, ingress, ingressroute, configmaps) ansible.builtin.shell: | KUBECONFIG={{ k3s_kubeconfig }} kubectl delete deployment,svc -n default nginx-m1 nginx-m2 nginx-m3 nginx-m4 --ignore-not-found=true KUBECONFIG={{ k3s_kubeconfig }} kubectl delete ingress -n default nginx-m1 nginx-m3 nginx-m1-http nginx-m3-http --ignore-not-found=true KUBECONFIG={{ k3s_kubeconfig }} kubectl delete ingressroute -n default nginx-m2 nginx-m4 nginx-m2-http nginx-m4-http --ignore-not-found=true KUBECONFIG={{ k3s_kubeconfig }} kubectl delete configmap -n default nginx-m1-html nginx-m2-html nginx-m3-html nginx-m4-html --ignore-not-found=true register: del_tls changed_when: "'deleted' in del_tls.stdout" - name: Remove copied nginx matrix TLS manifests directory ansible.builtin.file: path: /tmp/nginx-matrix-tls state: absent - name: Verify 03-02 Traefik ACME (rollout + secret) hosts: k3s_server become: true run_once: true vars: k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml acme_email: "{{ lookup('env', 'ACME_EMAIL') | default('', true) }}" cf_api_token: "{{ lookup('env', 'CF_API_TOKEN') | default('', true) }}" tasks: - name: "Gate - skip verify when ACME_EMAIL missing" when: acme_email | trim == "" ansible.builtin.include_role: name: verify_common tasks_from: gate-debug-end-play.yml vars: verify_gate_message: "[GATE] skipped doc_id=03-02 reason=missing_env missing=ACME_EMAIL" - name: Ensure Cloudflare token Secret from CF_API_TOKEN (real-pass) when: (cf_api_token | trim | length) > 0 ansible.builtin.include_role: name: verify_common tasks_from: ensure-cloudflare-api-token-secret.yml vars: verify_cf_api_token: "{{ cf_api_token | trim }}" - name: Check cloudflare-api-token secret exists ansible.builtin.shell: | set -euo pipefail KUBECONFIG={{ k3s_kubeconfig }} kubectl -n kube-system get secret cloudflare-api-token args: executable: /bin/bash changed_when: false register: cloudflare_secret_check failed_when: false - name: Gate - no CF_API_TOKEN and secret missing when: cloudflare_secret_check.rc != 0 and (cf_api_token | trim | length) == 0 ansible.builtin.include_role: name: verify_common tasks_from: gate-debug-end-play.yml vars: verify_gate_message: "[GATE] skipped doc_id=03-02 reason=missing_dependency missing=cloudflare-api-token skip_scope=traefik-acme" - name: Fail when secret missing but CF_API_TOKEN was set when: cloudflare_secret_check.rc != 0 and (cf_api_token | trim | length) > 0 ansible.builtin.fail: msg: "已设置 CF_API_TOKEN 但 cloudflare-api-token Secret 仍不可用,请检查 apiserver 权限与命名空间 kube-system" # Helm/ACME 换新 RS 时,旧 Pod 可能长期「pending termination」,rollout status 永久卡住。 # 实验室验收:scale 0 → 清 Pod → scale 1(入口短暂不可用,可接受)。 - name: Unstick Traefik deployment via scale down/up (kube-system) ansible.builtin.shell: | set -euo pipefail export KUBECONFIG={{ k3s_kubeconfig }} echo "[OC-ASSERT] assertion=traefik_rollout_unblock phase=scale_reset" kubectl scale deployment traefik -n kube-system --replicas=0 for i in $(seq 1 90); do rep=$(kubectl get deployment traefik -n kube-system -o jsonpath='{.status.replicas}' 2>/dev/null || echo 1) [ "${rep:-1}" = "0" ] && break sleep 2 done for sel in "app.kubernetes.io/name=traefik" "app.kubernetes.io/instance=traefik"; do kubectl get pods -n kube-system -l "$sel" -o name 2>/dev/null | while read -r p; do [ -z "$p" ] && continue kubectl delete "$p" -n kube-system --grace-period=0 --force --ignore-not-found=true || true done done { kubectl get pods -n kube-system --no-headers -o custom-columns=:metadata.name 2>/dev/null | grep -E '^traefik-[0-9a-f]+-' || true; } | while read -r n; do [ -z "$n" ] && continue kubectl delete pod "$n" -n kube-system --grace-period=0 --force --ignore-not-found=true || true done kubectl scale deployment traefik -n kube-system --replicas=1 sleep 3 args: executable: /bin/bash changed_when: true failed_when: false - name: Rollout status traefik (kube-system) ansible.builtin.include_role: name: verify_common tasks_from: kubectl-rollout-status.yml vars: verify_rollout_ref: deployment/traefik verify_rollout_namespace: kube-system verify_rollout_timeout_s: 600 - name: Teardown 03-02 Traefik ACME (optional) hosts: k3s_server become: true run_once: true vars: k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml verify_teardown: "{{ (VERIFY_TEARDOWN | default('1')) | string }}" acme_email: "{{ lookup('env', 'ACME_EMAIL') | default('', true) }}" manifest_dest: /tmp/traefik-acme.yaml tasks: - name: Skip teardown when gated when: acme_email | trim == "" meta: end_play - name: Delete resources when VERIFY_TEARDOWN=1 when: verify_teardown == "1" ansible.builtin.shell: | set -e KUBECONFIG={{ k3s_kubeconfig }} kubectl delete -f {{ manifest_dest }} --ignore-not-found=true args: executable: /bin/bash changed_when: true