#!/usr/bin/env bash print_diag_summary() { echo echo "===== 自动判读(基于当前计数) =====" echo "- Traefik Pod FW 链: ${TRAEFIK_CHAIN:-N/A}" echo "- Traefik REJECT 命中: ${REJECT_PKTS:-0}" echo "- Traefik NFLOG 命中: ${NFLOG_PKTS:-0}" echo "- Service web 链: ${TRAEFIK_WEB_SVC_CHAIN:-N/A}" echo "- Service web endpoint 链: ${TRAEFIK_WEB_SEP_CHAIN:-N/A}" echo "- Worker CNI hostport链: ${WORKER_CNI_DNAT_CHAIN:-N/A}" echo "- nft 观测到 KUBE-EXT DNAT: ${NFT_DNAT_HIT:-no}" echo "- ylc61(any) SYN/SYN-ACK: ${RET_LOCAL_SYN_COUNT:-0}/${RET_LOCAL_SYNACK_COUNT:-0}" echo "- ylc61(cni0) SYN/SYN-ACK: ${RET_CNI0_SYN_COUNT:-0}/${RET_CNI0_SYNACK_COUNT:-0}" echo "- ylc62(ens18) SYN/SYN-ACK: ${RET_WORKER_SYN_COUNT:-0}/${RET_WORKER_SYNACK_COUNT:-0}" echo "- Traefik pod netns SYN/SYN-ACK: ${POD_NETNS_SYN_COUNT:-0}/${POD_NETNS_SYNACK_COUNT:-0}" if [[ "${REJECT_PKTS:-0}" =~ ^[0-9]+$ ]] && [[ "${REJECT_PKTS:-0}" -gt 0 ]]; then echo "[结论] Traefik Pod 防火墙链出现 REJECT 命中,优先检查 kube-system 下 Traefik 相关 Ingress NetworkPolicy。" elif [[ "${RET_WORKER_SYNACK_COUNT:-0}" -gt 0 ]] && [[ "${RET_LOCAL_SYNACK_COUNT:-0}" -gt 0 ]] && [[ "${RET_CNI0_SYNACK_COUNT:-0}" -gt 0 ]]; then echo "[结论] 链路已恢复:ylc62/ylc61/cni0 均观测到 SYN-ACK,62:80 已可达 Traefik。" elif [[ "${NFT_DNAT_HIT:-no}" == "yes" ]] && [[ "${RET_LOCAL_SYN_COUNT:-0}" -gt 0 ]] && [[ "${RET_LOCAL_SYNACK_COUNT:-0}" -eq 0 ]]; then echo "[结论] 流量已经在 worker 被 KUBE-EXT/KUBE-SVC DNAT 到 Traefik(10.42.0.12:8000),但 ylc61 未观察到 SYN-ACK,优先排查 Traefik Pod/宿主转发回包路径。" elif [[ -n "${WORKER_CNI_HIT_AFTER:-}" && -n "${WORKER_CNI_HIT_BEFORE:-}" ]] && \ [[ "${WORKER_CNI_HIT_AFTER}" == "${WORKER_CNI_HIT_BEFORE}" ]]; then echo "[结论] Worker CNI hostport DNAT 计数未增长。若 nft trace 显示走 KUBE-EXT/KUBE-SVC,这是正常路径提示,不构成故障根因。" else echo "[结论] 未观察到 Traefik REJECT 明确命中,优先检查回包链路(ylc61<->ylc62 flannel / ylc62 ens18 出口)。" fi } analyze_log_file() { local log_file="$1" if [[ -z "${log_file}" || ! -f "${log_file}" ]]; then echo "[ERR] analyze 模式需要有效日志文件: --log " return 1 fi local has_worker_dnat="no" local has_firewalld_reject="no" local has_traefik_reject="no" local has_syn_no_synack="no" local has_synack_recovered="no" if awk '/KUBE-EXT-.*KUBE-SVC|dnat to 10\.42\./ {hit=1} END{exit !hit}' "${log_file}"; then has_worker_dnat="yes" fi if awk '/filter_FORWARD_POLICIES.*admin-prohibited/ {hit=1} END{exit !hit}' "${log_file}"; then has_firewalld_reject="yes" fi if awk '/Traefik REJECT 命中: [1-9]/ {hit=1} END{exit !hit}' "${log_file}"; then has_traefik_reject="yes" fi if awk '/ylc61\(any\) SYN\/SYN-ACK: [1-9][0-9]*\/0/ {hit=1} END{exit !hit}' "${log_file}"; then has_syn_no_synack="yes" fi if awk '/ylc61\(any\) SYN\/SYN-ACK: [1-9][0-9]*\/[1-9][0-9]*/ {a=1} /ylc62\(ens18\) SYN\/SYN-ACK: [1-9][0-9]*\/[1-9][0-9]*/ {b=1} END{exit !(a&&b)}' "${log_file}"; then has_synack_recovered="yes" fi echo "===== 日志离线判读 =====" echo "- 日志文件: ${log_file}" echo "- 观测到 worker DNAT: ${has_worker_dnat}" echo "- 观测到 firewalld forward reject: ${has_firewalld_reject}" echo "- 观测到 Traefik Pod REJECT 命中: ${has_traefik_reject}" echo "- 观测到 ylc61 SYN 无 SYN-ACK: ${has_syn_no_synack}" echo "- 观测到链路恢复(有 SYN-ACK): ${has_synack_recovered}" if [[ "${has_firewalld_reject}" == "yes" ]]; then echo "[结论] 高概率为 ylc61 firewalld FORWARD 策略阻断 flannel.1 -> cni0。" elif [[ "${has_synack_recovered}" == "yes" ]]; then echo "[结论] 链路已恢复,入口到 Traefik 回包路径正常。" elif [[ "${has_worker_dnat}" == "yes" && "${has_syn_no_synack}" == "yes" ]]; then echo "[结论] worker 入站与 DNAT 正常,需优先排查 ylc61 到 Traefik Pod 的转发/回包链路。" elif [[ "${has_traefik_reject}" == "yes" ]]; then echo "[结论] Traefik Pod NetworkPolicy 命中拒绝,优先检查 kube-system netpol。" else echo "[结论] 日志未出现单一确定根因,建议执行 run/capture 模式重新采样。" fi }