326 lines
16 KiB
YAML
326 lines
16 KiB
YAML
---
|
||
- name: Deploy 03-02 Traefik ACME (HelmChartConfig)
|
||
hosts: k3s_server
|
||
become: true
|
||
run_once: true
|
||
vars:
|
||
k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml
|
||
manifest_src: "{{ playbook_dir }}/../../files/03-02/traefik-acme.yaml"
|
||
manifest_dest: /tmp/traefik-acme.yaml
|
||
acme_email: "{{ lookup('env', 'ACME_EMAIL') | default('', true) }}"
|
||
cf_api_token: "{{ lookup('env', 'CF_API_TOKEN') | default('', true) }}"
|
||
tasks:
|
||
- name: "Gate - skip apply when ACME_EMAIL missing"
|
||
when: acme_email | trim == ""
|
||
ansible.builtin.include_role:
|
||
name: verify_common
|
||
tasks_from: gate-debug-end-play.yml
|
||
vars:
|
||
verify_gate_message: "[GATE] skipped doc_id=03-02 reason=missing_env missing=ACME_EMAIL"
|
||
|
||
- name: Copy manifest
|
||
ansible.builtin.copy:
|
||
src: "{{ manifest_src }}"
|
||
dest: "{{ manifest_dest }}"
|
||
mode: "0644"
|
||
|
||
- name: Replace ACME email placeholder
|
||
ansible.builtin.replace:
|
||
path: "{{ manifest_dest }}"
|
||
regexp: "<YOUR_REAL_EMAIL>"
|
||
replace: "{{ acme_email | trim }}"
|
||
|
||
- name: Enable ACME staging CA when ACME_CA_STAGING=1
|
||
when: (lookup('env', 'ACME_CA_STAGING') | default('0', true) | trim) == "1"
|
||
ansible.builtin.replace:
|
||
path: "{{ manifest_dest }}"
|
||
regexp: '^\s*# - "--certificatesresolvers\.cloudflare\.acme\.caserver=https://acme-staging-v02\.api\.letsencrypt\.org/directory".*$'
|
||
replace: ' - "--certificatesresolvers.cloudflare.acme.caserver=https://acme-staging-v02.api.letsencrypt.org/directory"'
|
||
|
||
- name: Ensure Cloudflare API token Secret before Traefik ACME apply
|
||
when: (cf_api_token | trim | length) > 0
|
||
ansible.builtin.include_role:
|
||
name: verify_common
|
||
tasks_from: ensure-cloudflare-api-token-secret.yml
|
||
vars:
|
||
verify_cf_api_token: "{{ cf_api_token | trim }}"
|
||
|
||
- name: Apply manifest
|
||
ansible.builtin.shell: |
|
||
set -e
|
||
KUBECONFIG={{ k3s_kubeconfig }} kubectl apply -f {{ manifest_dest }}
|
||
args:
|
||
executable: /bin/bash
|
||
changed_when: true
|
||
|
||
- name: Deploy or cleanup nginx matrix TLS (opt-in)
|
||
hosts: k3s_server
|
||
become: true
|
||
run_once: true
|
||
vars:
|
||
k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml
|
||
_nginx_matrix_tls_enable: "{{ nginx_matrix_tls_enable | default((lookup('env', 'NGINX_MATRIX_TLS_ENABLE') | default('', true) | trim | lower in ['true', '1', 'yes']) | bool) }}"
|
||
nginx_matrix_tls_enabled: "{{ _nginx_matrix_tls_enable | bool }}"
|
||
manifests_path: "{{ playbook_dir }}/../../files/03-02"
|
||
tls_domains:
|
||
- test01.jackadam.top
|
||
- test02.jackadam.top
|
||
- test03.jackadam.top
|
||
- test04.jackadam.top
|
||
pre_tasks:
|
||
- name: Gate - skip nginx matrix TLS when nginx_matrix_tls_enable=false
|
||
when: not nginx_matrix_tls_enabled
|
||
block:
|
||
- ansible.builtin.debug:
|
||
msg: "[SKIP] optional doc_id=03-02 action=nginx-matrix-tls var=nginx_matrix_tls_enable"
|
||
- meta: end_play
|
||
tasks:
|
||
- name: Deploy nginx matrix TLS (mode=deploy)
|
||
when: (mode | default('deploy')) == 'deploy'
|
||
block:
|
||
- name: Ensure manifests path exists (controller repo path)
|
||
ansible.builtin.stat:
|
||
path: "{{ manifests_path }}"
|
||
register: manifests_stat
|
||
delegate_to: localhost
|
||
become: false
|
||
run_once: true
|
||
|
||
- name: Fail if manifests not found
|
||
ansible.builtin.fail:
|
||
msg: "manifests 未找到: {{ manifests_path }},请从仓库根目录或 ansible 同级执行"
|
||
when: not manifests_stat.stat.exists
|
||
delegate_to: localhost
|
||
become: false
|
||
run_once: true
|
||
|
||
- name: Ensure control-plane label on k3s_server nodes (for M1)
|
||
ansible.builtin.shell: |
|
||
KUBECONFIG={{ k3s_kubeconfig }} kubectl label node {{ item }} node-role.kubernetes.io/control-plane= --overwrite
|
||
loop: "{{ groups['k3s_server'] | default([]) }}"
|
||
|
||
- name: Ensure worker label on k3s_worker nodes (for M3)
|
||
ansible.builtin.shell: |
|
||
KUBECONFIG={{ k3s_kubeconfig }} kubectl label node {{ item }} node-role.kubernetes.io/worker= --overwrite
|
||
loop: "{{ groups['k3s_worker'] | default([]) }}"
|
||
|
||
- name: Copy nginx matrix TLS manifests to server
|
||
ansible.builtin.copy:
|
||
src: "{{ manifests_path }}/"
|
||
dest: /tmp/nginx-matrix-tls/
|
||
mode: "0644"
|
||
|
||
- name: Delete non-TLS nginx matrix if present (deployments, ingress, ingressroute, middleware, configmaps)
|
||
ansible.builtin.shell: |
|
||
KUBECONFIG={{ k3s_kubeconfig }} kubectl delete deployment,svc -n default nginx-m1 nginx-m2 nginx-m3 nginx-m4 --ignore-not-found=true
|
||
KUBECONFIG={{ k3s_kubeconfig }} kubectl delete ingress -n default nginx-m1 nginx-m3 --ignore-not-found=true
|
||
KUBECONFIG={{ k3s_kubeconfig }} kubectl delete ingressroute -n default nginx-m2 nginx-m4 --ignore-not-found=true
|
||
KUBECONFIG={{ k3s_kubeconfig }} kubectl delete middleware -n default stripprefix-m1 stripprefix-m2 stripprefix-m3 stripprefix-m4 --ignore-not-found=true
|
||
KUBECONFIG={{ k3s_kubeconfig }} kubectl delete configmap -n default nginx-m1-html nginx-m2-html nginx-m3-html nginx-m4-html --ignore-not-found=true
|
||
register: del_non_tls
|
||
changed_when: "'deleted' in del_non_tls.stdout"
|
||
|
||
- name: kubectl apply nginx matrix TLS + HTTP-only
|
||
ansible.builtin.shell: KUBECONFIG={{ k3s_kubeconfig }} kubectl apply -f /tmp/nginx-matrix-tls/ -R
|
||
register: k8s_apply
|
||
changed_when: "'configured' in k8s_apply.stdout or 'created' in k8s_apply.stdout"
|
||
|
||
- name: Restart nginx deployments so pods pick up ConfigMap (M1~M4 标识)
|
||
ansible.builtin.shell: KUBECONFIG={{ k3s_kubeconfig }} kubectl rollout restart deployment nginx-m1 nginx-m2 nginx-m3 nginx-m4 -n default
|
||
changed_when: true
|
||
|
||
- name: Wait for nginx rollouts stable after ConfigMap restart
|
||
ansible.builtin.shell: |
|
||
set -euo pipefail
|
||
KCFG={{ k3s_kubeconfig | quote }}
|
||
export KUBECONFIG="$KCFG"
|
||
for dep in nginx-m1 nginx-m2; do
|
||
echo "[OC-ASSERT] assertion=nginx_matrix_tls_rollout deployment=${dep} timeout=60s"
|
||
kubectl rollout status "deployment/$dep" -n default --timeout=60s
|
||
done
|
||
for dep in nginx-m3 nginx-m4; do
|
||
echo "[OC-ASSERT] assertion=nginx_matrix_tls_rollout deployment=${dep} timeout=120s"
|
||
kubectl rollout status "deployment/$dep" -n default --timeout=120s
|
||
done
|
||
args:
|
||
executable: /bin/bash
|
||
changed_when: false
|
||
|
||
- name: Verify nginx matrix TLS resources
|
||
ansible.builtin.shell: KUBECONFIG={{ k3s_kubeconfig }} kubectl get pod,svc,ing,ingressroute -n default -o wide
|
||
register: verify_tls
|
||
changed_when: false
|
||
|
||
- name: ">>> nginx matrix TLS 资源"
|
||
ansible.builtin.debug:
|
||
msg: "{{ item }}"
|
||
loop: "{{ verify_tls.stdout_lines }}"
|
||
|
||
- name: HTTP curl 验证(HTTP-only:16 个目标,所有节点 × 4 域名)
|
||
ansible.builtin.shell: |
|
||
bases="{{ groups['k3s_nodes'] | map('extract', hostvars) | map(attribute='ansible_host') | join(' ') }}"
|
||
count=0
|
||
ok=0
|
||
echo "=== 16 个目标 (4 节点 × 4 域名) HTTP ==="
|
||
echo "节点 M1(test01) M2(test02) M3(test03) M4(test04)"
|
||
for base in $bases; do
|
||
m1=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 http://test01.jackadam.top/ --resolve "test01.jackadam.top:80:$base" 2>/dev/null) || m1="fail"
|
||
m2=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 http://test02.jackadam.top/ --resolve "test02.jackadam.top:80:$base" 2>/dev/null) || m2="fail"
|
||
m3=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 http://test03.jackadam.top/ --resolve "test03.jackadam.top:80:$base" 2>/dev/null) || m3="fail"
|
||
m4=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 http://test04.jackadam.top/ --resolve "test04.jackadam.top:80:$base" 2>/dev/null) || m4="fail"
|
||
printf "%-12s %-14s %-14s %-14s %s\n" "$base" "$m1" "$m2" "$m3" "$m4"
|
||
for c in $m1 $m2 $m3 $m4; do count=$((count+1)); [ "$c" = "200" ] && ok=$((ok+1)); done
|
||
done
|
||
echo "---"
|
||
echo "共验证 $count 个目标,$ok 个返回 200"
|
||
changed_when: false
|
||
failed_when: false
|
||
|
||
- name: HTTPS curl 验证(16 个目标:所有节点 × 4 域名,所有节点均为入口点)
|
||
ansible.builtin.shell: |
|
||
bases="{{ groups['k3s_nodes'] | map('extract', hostvars) | map(attribute='ansible_host') | join(' ') }}"
|
||
count=0
|
||
ok=0
|
||
echo "=== 16 个目标 (4 节点 × 4 域名) HTTPS ==="
|
||
echo "节点 M1(test01) M2(test02) M3(test03) M4(test04)"
|
||
for base in $bases; do
|
||
m1=$(curl -sk -o /dev/null -w "%{http_code}" --connect-timeout 5 https://test01.jackadam.top/ --resolve "test01.jackadam.top:443:$base" 2>/dev/null) || m1="fail"
|
||
m2=$(curl -sk -o /dev/null -w "%{http_code}" --connect-timeout 5 https://test02.jackadam.top/ --resolve "test02.jackadam.top:443:$base" 2>/dev/null) || m2="fail"
|
||
m3=$(curl -sk -o /dev/null -w "%{http_code}" --connect-timeout 5 https://test03.jackadam.top/ --resolve "test03.jackadam.top:443:$base" 2>/dev/null) || m3="fail"
|
||
m4=$(curl -sk -o /dev/null -w "%{http_code}" --connect-timeout 5 https://test04.jackadam.top/ --resolve "test04.jackadam.top:443:$base" 2>/dev/null) || m4="fail"
|
||
printf "%-12s %-14s %-14s %-14s %s\n" "$base" "$m1" "$m2" "$m3" "$m4"
|
||
for c in $m1 $m2 $m3 $m4; do count=$((count+1)); [ "$c" = "200" ] && ok=$((ok+1)); done
|
||
done
|
||
echo "---"
|
||
echo "共验证 $count 个目标,$ok 个返回 200"
|
||
changed_when: false
|
||
failed_when: false
|
||
|
||
- name: Cleanup nginx matrix TLS (mode=cleanup)
|
||
when: (mode | default('deploy')) == 'cleanup'
|
||
block:
|
||
- name: Delete nginx matrix TLS + HTTP-only resources (deployments, ingress, ingressroute, configmaps)
|
||
ansible.builtin.shell: |
|
||
KUBECONFIG={{ k3s_kubeconfig }} kubectl delete deployment,svc -n default nginx-m1 nginx-m2 nginx-m3 nginx-m4 --ignore-not-found=true
|
||
KUBECONFIG={{ k3s_kubeconfig }} kubectl delete ingress -n default nginx-m1 nginx-m3 nginx-m1-http nginx-m3-http --ignore-not-found=true
|
||
KUBECONFIG={{ k3s_kubeconfig }} kubectl delete ingressroute -n default nginx-m2 nginx-m4 nginx-m2-http nginx-m4-http --ignore-not-found=true
|
||
KUBECONFIG={{ k3s_kubeconfig }} kubectl delete configmap -n default nginx-m1-html nginx-m2-html nginx-m3-html nginx-m4-html --ignore-not-found=true
|
||
register: del_tls
|
||
changed_when: "'deleted' in del_tls.stdout"
|
||
|
||
- name: Remove copied nginx matrix TLS manifests directory
|
||
ansible.builtin.file:
|
||
path: /tmp/nginx-matrix-tls
|
||
state: absent
|
||
|
||
- name: Verify 03-02 Traefik ACME (rollout + secret)
|
||
hosts: k3s_server
|
||
become: true
|
||
run_once: true
|
||
vars:
|
||
k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml
|
||
acme_email: "{{ lookup('env', 'ACME_EMAIL') | default('', true) }}"
|
||
cf_api_token: "{{ lookup('env', 'CF_API_TOKEN') | default('', true) }}"
|
||
tasks:
|
||
- name: "Gate - skip verify when ACME_EMAIL missing"
|
||
when: acme_email | trim == ""
|
||
ansible.builtin.include_role:
|
||
name: verify_common
|
||
tasks_from: gate-debug-end-play.yml
|
||
vars:
|
||
verify_gate_message: "[GATE] skipped doc_id=03-02 reason=missing_env missing=ACME_EMAIL"
|
||
|
||
- name: Ensure Cloudflare token Secret from CF_API_TOKEN (real-pass)
|
||
when: (cf_api_token | trim | length) > 0
|
||
ansible.builtin.include_role:
|
||
name: verify_common
|
||
tasks_from: ensure-cloudflare-api-token-secret.yml
|
||
vars:
|
||
verify_cf_api_token: "{{ cf_api_token | trim }}"
|
||
|
||
- name: Check cloudflare-api-token secret exists
|
||
ansible.builtin.shell: |
|
||
set -euo pipefail
|
||
KUBECONFIG={{ k3s_kubeconfig }} kubectl -n kube-system get secret cloudflare-api-token
|
||
args:
|
||
executable: /bin/bash
|
||
changed_when: false
|
||
register: cloudflare_secret_check
|
||
failed_when: false
|
||
|
||
- name: Gate - no CF_API_TOKEN and secret missing
|
||
when: cloudflare_secret_check.rc != 0 and (cf_api_token | trim | length) == 0
|
||
ansible.builtin.include_role:
|
||
name: verify_common
|
||
tasks_from: gate-debug-end-play.yml
|
||
vars:
|
||
verify_gate_message: "[GATE] skipped doc_id=03-02 reason=missing_dependency missing=cloudflare-api-token skip_scope=traefik-acme"
|
||
|
||
- name: Fail when secret missing but CF_API_TOKEN was set
|
||
when: cloudflare_secret_check.rc != 0 and (cf_api_token | trim | length) > 0
|
||
ansible.builtin.fail:
|
||
msg: "已设置 CF_API_TOKEN 但 cloudflare-api-token Secret 仍不可用,请检查 apiserver 权限与命名空间 kube-system"
|
||
|
||
# Helm/ACME 换新 RS 时,旧 Pod 可能长期「pending termination」,rollout status 永久卡住。
|
||
# 实验室验收:scale 0 → 清 Pod → scale 1(入口短暂不可用,可接受)。
|
||
- name: Unstick Traefik deployment via scale down/up (kube-system)
|
||
ansible.builtin.shell: |
|
||
set -euo pipefail
|
||
export KUBECONFIG={{ k3s_kubeconfig }}
|
||
echo "[OC-ASSERT] assertion=traefik_rollout_unblock phase=scale_reset"
|
||
kubectl scale deployment traefik -n kube-system --replicas=0
|
||
for i in $(seq 1 90); do
|
||
rep=$(kubectl get deployment traefik -n kube-system -o jsonpath='{.status.replicas}' 2>/dev/null || echo 1)
|
||
[ "${rep:-1}" = "0" ] && break
|
||
sleep 2
|
||
done
|
||
for sel in "app.kubernetes.io/name=traefik" "app.kubernetes.io/instance=traefik"; do
|
||
kubectl get pods -n kube-system -l "$sel" -o name 2>/dev/null | while read -r p; do
|
||
[ -z "$p" ] && continue
|
||
kubectl delete "$p" -n kube-system --grace-period=0 --force --ignore-not-found=true || true
|
||
done
|
||
done
|
||
{ kubectl get pods -n kube-system --no-headers -o custom-columns=:metadata.name 2>/dev/null | grep -E '^traefik-[0-9a-f]+-' || true; } | while read -r n; do
|
||
[ -z "$n" ] && continue
|
||
kubectl delete pod "$n" -n kube-system --grace-period=0 --force --ignore-not-found=true || true
|
||
done
|
||
kubectl scale deployment traefik -n kube-system --replicas=1
|
||
sleep 3
|
||
args:
|
||
executable: /bin/bash
|
||
changed_when: true
|
||
failed_when: false
|
||
|
||
- name: Rollout status traefik (kube-system)
|
||
ansible.builtin.include_role:
|
||
name: verify_common
|
||
tasks_from: kubectl-rollout-status.yml
|
||
vars:
|
||
verify_rollout_ref: deployment/traefik
|
||
verify_rollout_namespace: kube-system
|
||
verify_rollout_timeout_s: 600
|
||
|
||
- name: Teardown 03-02 Traefik ACME (optional)
|
||
hosts: k3s_server
|
||
become: true
|
||
run_once: true
|
||
vars:
|
||
k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml
|
||
verify_teardown: "{{ (VERIFY_TEARDOWN | default('1')) | string }}"
|
||
acme_email: "{{ lookup('env', 'ACME_EMAIL') | default('', true) }}"
|
||
manifest_dest: /tmp/traefik-acme.yaml
|
||
tasks:
|
||
- name: Skip teardown when gated
|
||
when: acme_email | trim == ""
|
||
meta: end_play
|
||
|
||
- name: Delete resources when VERIFY_TEARDOWN=1
|
||
when: verify_teardown == "1"
|
||
ansible.builtin.shell: |
|
||
set -e
|
||
KUBECONFIG={{ k3s_kubeconfig }} kubectl delete -f {{ manifest_dest }} --ignore-not-found=true
|
||
args:
|
||
executable: /bin/bash
|
||
changed_when: true
|
||
|