Files
2026-03-29 09:08:01 +08:00

326 lines
16 KiB
YAML
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
---
- name: Deploy 03-02 Traefik ACME (HelmChartConfig)
hosts: k3s_server
become: true
run_once: true
vars:
k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml
manifest_src: "{{ playbook_dir }}/../../files/03-02/traefik-acme.yaml"
manifest_dest: /tmp/traefik-acme.yaml
acme_email: "{{ lookup('env', 'ACME_EMAIL') | default('', true) }}"
cf_api_token: "{{ lookup('env', 'CF_API_TOKEN') | default('', true) }}"
tasks:
- name: "Gate - skip apply when ACME_EMAIL missing"
when: acme_email | trim == ""
ansible.builtin.include_role:
name: verify_common
tasks_from: gate-debug-end-play.yml
vars:
verify_gate_message: "[GATE] skipped doc_id=03-02 reason=missing_env missing=ACME_EMAIL"
- name: Copy manifest
ansible.builtin.copy:
src: "{{ manifest_src }}"
dest: "{{ manifest_dest }}"
mode: "0644"
- name: Replace ACME email placeholder
ansible.builtin.replace:
path: "{{ manifest_dest }}"
regexp: "<YOUR_REAL_EMAIL>"
replace: "{{ acme_email | trim }}"
- name: Enable ACME staging CA when ACME_CA_STAGING=1
when: (lookup('env', 'ACME_CA_STAGING') | default('0', true) | trim) == "1"
ansible.builtin.replace:
path: "{{ manifest_dest }}"
regexp: '^\s*# - "--certificatesresolvers\.cloudflare\.acme\.caserver=https://acme-staging-v02\.api\.letsencrypt\.org/directory".*$'
replace: ' - "--certificatesresolvers.cloudflare.acme.caserver=https://acme-staging-v02.api.letsencrypt.org/directory"'
- name: Ensure Cloudflare API token Secret before Traefik ACME apply
when: (cf_api_token | trim | length) > 0
ansible.builtin.include_role:
name: verify_common
tasks_from: ensure-cloudflare-api-token-secret.yml
vars:
verify_cf_api_token: "{{ cf_api_token | trim }}"
- name: Apply manifest
ansible.builtin.shell: |
set -e
KUBECONFIG={{ k3s_kubeconfig }} kubectl apply -f {{ manifest_dest }}
args:
executable: /bin/bash
changed_when: true
- name: Deploy or cleanup nginx matrix TLS (opt-in)
hosts: k3s_server
become: true
run_once: true
vars:
k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml
_nginx_matrix_tls_enable: "{{ nginx_matrix_tls_enable | default((lookup('env', 'NGINX_MATRIX_TLS_ENABLE') | default('', true) | trim | lower in ['true', '1', 'yes']) | bool) }}"
nginx_matrix_tls_enabled: "{{ _nginx_matrix_tls_enable | bool }}"
manifests_path: "{{ playbook_dir }}/../../files/03-02"
tls_domains:
- test01.jackadam.top
- test02.jackadam.top
- test03.jackadam.top
- test04.jackadam.top
pre_tasks:
- name: Gate - skip nginx matrix TLS when nginx_matrix_tls_enable=false
when: not nginx_matrix_tls_enabled
block:
- ansible.builtin.debug:
msg: "[SKIP] optional doc_id=03-02 action=nginx-matrix-tls var=nginx_matrix_tls_enable"
- meta: end_play
tasks:
- name: Deploy nginx matrix TLS (mode=deploy)
when: (mode | default('deploy')) == 'deploy'
block:
- name: Ensure manifests path exists (controller repo path)
ansible.builtin.stat:
path: "{{ manifests_path }}"
register: manifests_stat
delegate_to: localhost
become: false
run_once: true
- name: Fail if manifests not found
ansible.builtin.fail:
msg: "manifests 未找到: {{ manifests_path }},请从仓库根目录或 ansible 同级执行"
when: not manifests_stat.stat.exists
delegate_to: localhost
become: false
run_once: true
- name: Ensure control-plane label on k3s_server nodes (for M1)
ansible.builtin.shell: |
KUBECONFIG={{ k3s_kubeconfig }} kubectl label node {{ item }} node-role.kubernetes.io/control-plane= --overwrite
loop: "{{ groups['k3s_server'] | default([]) }}"
- name: Ensure worker label on k3s_worker nodes (for M3)
ansible.builtin.shell: |
KUBECONFIG={{ k3s_kubeconfig }} kubectl label node {{ item }} node-role.kubernetes.io/worker= --overwrite
loop: "{{ groups['k3s_worker'] | default([]) }}"
- name: Copy nginx matrix TLS manifests to server
ansible.builtin.copy:
src: "{{ manifests_path }}/"
dest: /tmp/nginx-matrix-tls/
mode: "0644"
- name: Delete non-TLS nginx matrix if present (deployments, ingress, ingressroute, middleware, configmaps)
ansible.builtin.shell: |
KUBECONFIG={{ k3s_kubeconfig }} kubectl delete deployment,svc -n default nginx-m1 nginx-m2 nginx-m3 nginx-m4 --ignore-not-found=true
KUBECONFIG={{ k3s_kubeconfig }} kubectl delete ingress -n default nginx-m1 nginx-m3 --ignore-not-found=true
KUBECONFIG={{ k3s_kubeconfig }} kubectl delete ingressroute -n default nginx-m2 nginx-m4 --ignore-not-found=true
KUBECONFIG={{ k3s_kubeconfig }} kubectl delete middleware -n default stripprefix-m1 stripprefix-m2 stripprefix-m3 stripprefix-m4 --ignore-not-found=true
KUBECONFIG={{ k3s_kubeconfig }} kubectl delete configmap -n default nginx-m1-html nginx-m2-html nginx-m3-html nginx-m4-html --ignore-not-found=true
register: del_non_tls
changed_when: "'deleted' in del_non_tls.stdout"
- name: kubectl apply nginx matrix TLS + HTTP-only
ansible.builtin.shell: KUBECONFIG={{ k3s_kubeconfig }} kubectl apply -f /tmp/nginx-matrix-tls/ -R
register: k8s_apply
changed_when: "'configured' in k8s_apply.stdout or 'created' in k8s_apply.stdout"
- name: Restart nginx deployments so pods pick up ConfigMap (M1M4 标识)
ansible.builtin.shell: KUBECONFIG={{ k3s_kubeconfig }} kubectl rollout restart deployment nginx-m1 nginx-m2 nginx-m3 nginx-m4 -n default
changed_when: true
- name: Wait for nginx rollouts stable after ConfigMap restart
ansible.builtin.shell: |
set -euo pipefail
KCFG={{ k3s_kubeconfig | quote }}
export KUBECONFIG="$KCFG"
for dep in nginx-m1 nginx-m2; do
echo "[OC-ASSERT] assertion=nginx_matrix_tls_rollout deployment=${dep} timeout=60s"
kubectl rollout status "deployment/$dep" -n default --timeout=60s
done
for dep in nginx-m3 nginx-m4; do
echo "[OC-ASSERT] assertion=nginx_matrix_tls_rollout deployment=${dep} timeout=120s"
kubectl rollout status "deployment/$dep" -n default --timeout=120s
done
args:
executable: /bin/bash
changed_when: false
- name: Verify nginx matrix TLS resources
ansible.builtin.shell: KUBECONFIG={{ k3s_kubeconfig }} kubectl get pod,svc,ing,ingressroute -n default -o wide
register: verify_tls
changed_when: false
- name: ">>> nginx matrix TLS 资源"
ansible.builtin.debug:
msg: "{{ item }}"
loop: "{{ verify_tls.stdout_lines }}"
- name: HTTP curl 验证HTTP-only16 个目标,所有节点 × 4 域名)
ansible.builtin.shell: |
bases="{{ groups['k3s_nodes'] | map('extract', hostvars) | map(attribute='ansible_host') | join(' ') }}"
count=0
ok=0
echo "=== 16 个目标 (4 节点 × 4 域名) HTTP ==="
echo "节点 M1(test01) M2(test02) M3(test03) M4(test04)"
for base in $bases; do
m1=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 http://test01.jackadam.top/ --resolve "test01.jackadam.top:80:$base" 2>/dev/null) || m1="fail"
m2=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 http://test02.jackadam.top/ --resolve "test02.jackadam.top:80:$base" 2>/dev/null) || m2="fail"
m3=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 http://test03.jackadam.top/ --resolve "test03.jackadam.top:80:$base" 2>/dev/null) || m3="fail"
m4=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 http://test04.jackadam.top/ --resolve "test04.jackadam.top:80:$base" 2>/dev/null) || m4="fail"
printf "%-12s %-14s %-14s %-14s %s\n" "$base" "$m1" "$m2" "$m3" "$m4"
for c in $m1 $m2 $m3 $m4; do count=$((count+1)); [ "$c" = "200" ] && ok=$((ok+1)); done
done
echo "---"
echo "共验证 $count 个目标,$ok 个返回 200"
changed_when: false
failed_when: false
- name: HTTPS curl 验证16 个目标:所有节点 × 4 域名,所有节点均为入口点)
ansible.builtin.shell: |
bases="{{ groups['k3s_nodes'] | map('extract', hostvars) | map(attribute='ansible_host') | join(' ') }}"
count=0
ok=0
echo "=== 16 个目标 (4 节点 × 4 域名) HTTPS ==="
echo "节点 M1(test01) M2(test02) M3(test03) M4(test04)"
for base in $bases; do
m1=$(curl -sk -o /dev/null -w "%{http_code}" --connect-timeout 5 https://test01.jackadam.top/ --resolve "test01.jackadam.top:443:$base" 2>/dev/null) || m1="fail"
m2=$(curl -sk -o /dev/null -w "%{http_code}" --connect-timeout 5 https://test02.jackadam.top/ --resolve "test02.jackadam.top:443:$base" 2>/dev/null) || m2="fail"
m3=$(curl -sk -o /dev/null -w "%{http_code}" --connect-timeout 5 https://test03.jackadam.top/ --resolve "test03.jackadam.top:443:$base" 2>/dev/null) || m3="fail"
m4=$(curl -sk -o /dev/null -w "%{http_code}" --connect-timeout 5 https://test04.jackadam.top/ --resolve "test04.jackadam.top:443:$base" 2>/dev/null) || m4="fail"
printf "%-12s %-14s %-14s %-14s %s\n" "$base" "$m1" "$m2" "$m3" "$m4"
for c in $m1 $m2 $m3 $m4; do count=$((count+1)); [ "$c" = "200" ] && ok=$((ok+1)); done
done
echo "---"
echo "共验证 $count 个目标,$ok 个返回 200"
changed_when: false
failed_when: false
- name: Cleanup nginx matrix TLS (mode=cleanup)
when: (mode | default('deploy')) == 'cleanup'
block:
- name: Delete nginx matrix TLS + HTTP-only resources (deployments, ingress, ingressroute, configmaps)
ansible.builtin.shell: |
KUBECONFIG={{ k3s_kubeconfig }} kubectl delete deployment,svc -n default nginx-m1 nginx-m2 nginx-m3 nginx-m4 --ignore-not-found=true
KUBECONFIG={{ k3s_kubeconfig }} kubectl delete ingress -n default nginx-m1 nginx-m3 nginx-m1-http nginx-m3-http --ignore-not-found=true
KUBECONFIG={{ k3s_kubeconfig }} kubectl delete ingressroute -n default nginx-m2 nginx-m4 nginx-m2-http nginx-m4-http --ignore-not-found=true
KUBECONFIG={{ k3s_kubeconfig }} kubectl delete configmap -n default nginx-m1-html nginx-m2-html nginx-m3-html nginx-m4-html --ignore-not-found=true
register: del_tls
changed_when: "'deleted' in del_tls.stdout"
- name: Remove copied nginx matrix TLS manifests directory
ansible.builtin.file:
path: /tmp/nginx-matrix-tls
state: absent
- name: Verify 03-02 Traefik ACME (rollout + secret)
hosts: k3s_server
become: true
run_once: true
vars:
k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml
acme_email: "{{ lookup('env', 'ACME_EMAIL') | default('', true) }}"
cf_api_token: "{{ lookup('env', 'CF_API_TOKEN') | default('', true) }}"
tasks:
- name: "Gate - skip verify when ACME_EMAIL missing"
when: acme_email | trim == ""
ansible.builtin.include_role:
name: verify_common
tasks_from: gate-debug-end-play.yml
vars:
verify_gate_message: "[GATE] skipped doc_id=03-02 reason=missing_env missing=ACME_EMAIL"
- name: Ensure Cloudflare token Secret from CF_API_TOKEN (real-pass)
when: (cf_api_token | trim | length) > 0
ansible.builtin.include_role:
name: verify_common
tasks_from: ensure-cloudflare-api-token-secret.yml
vars:
verify_cf_api_token: "{{ cf_api_token | trim }}"
- name: Check cloudflare-api-token secret exists
ansible.builtin.shell: |
set -euo pipefail
KUBECONFIG={{ k3s_kubeconfig }} kubectl -n kube-system get secret cloudflare-api-token
args:
executable: /bin/bash
changed_when: false
register: cloudflare_secret_check
failed_when: false
- name: Gate - no CF_API_TOKEN and secret missing
when: cloudflare_secret_check.rc != 0 and (cf_api_token | trim | length) == 0
ansible.builtin.include_role:
name: verify_common
tasks_from: gate-debug-end-play.yml
vars:
verify_gate_message: "[GATE] skipped doc_id=03-02 reason=missing_dependency missing=cloudflare-api-token skip_scope=traefik-acme"
- name: Fail when secret missing but CF_API_TOKEN was set
when: cloudflare_secret_check.rc != 0 and (cf_api_token | trim | length) > 0
ansible.builtin.fail:
msg: "已设置 CF_API_TOKEN 但 cloudflare-api-token Secret 仍不可用,请检查 apiserver 权限与命名空间 kube-system"
# Helm/ACME 换新 RS 时,旧 Pod 可能长期「pending termination」rollout status 永久卡住。
# 实验室验收scale 0 → 清 Pod → scale 1入口短暂不可用可接受
- name: Unstick Traefik deployment via scale down/up (kube-system)
ansible.builtin.shell: |
set -euo pipefail
export KUBECONFIG={{ k3s_kubeconfig }}
echo "[OC-ASSERT] assertion=traefik_rollout_unblock phase=scale_reset"
kubectl scale deployment traefik -n kube-system --replicas=0
for i in $(seq 1 90); do
rep=$(kubectl get deployment traefik -n kube-system -o jsonpath='{.status.replicas}' 2>/dev/null || echo 1)
[ "${rep:-1}" = "0" ] && break
sleep 2
done
for sel in "app.kubernetes.io/name=traefik" "app.kubernetes.io/instance=traefik"; do
kubectl get pods -n kube-system -l "$sel" -o name 2>/dev/null | while read -r p; do
[ -z "$p" ] && continue
kubectl delete "$p" -n kube-system --grace-period=0 --force --ignore-not-found=true || true
done
done
{ kubectl get pods -n kube-system --no-headers -o custom-columns=:metadata.name 2>/dev/null | grep -E '^traefik-[0-9a-f]+-' || true; } | while read -r n; do
[ -z "$n" ] && continue
kubectl delete pod "$n" -n kube-system --grace-period=0 --force --ignore-not-found=true || true
done
kubectl scale deployment traefik -n kube-system --replicas=1
sleep 3
args:
executable: /bin/bash
changed_when: true
failed_when: false
- name: Rollout status traefik (kube-system)
ansible.builtin.include_role:
name: verify_common
tasks_from: kubectl-rollout-status.yml
vars:
verify_rollout_ref: deployment/traefik
verify_rollout_namespace: kube-system
verify_rollout_timeout_s: 600
- name: Teardown 03-02 Traefik ACME (optional)
hosts: k3s_server
become: true
run_once: true
vars:
k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml
verify_teardown: "{{ (VERIFY_TEARDOWN | default('1')) | string }}"
acme_email: "{{ lookup('env', 'ACME_EMAIL') | default('', true) }}"
manifest_dest: /tmp/traefik-acme.yaml
tasks:
- name: Skip teardown when gated
when: acme_email | trim == ""
meta: end_play
- name: Delete resources when VERIFY_TEARDOWN=1
when: verify_teardown == "1"
ansible.builtin.shell: |
set -e
KUBECONFIG={{ k3s_kubeconfig }} kubectl delete -f {{ manifest_dest }} --ignore-not-found=true
args:
executable: /bin/bash
changed_when: true