#!/usr/bin/env bash CAPTURE_MODE="N" CAPTURE_SECONDS="12" CAPTURE_MODE_ARG="" CAP_FILE_ENS18="" CAP_PID_ENS18="" NFT_TRACE_MODE="N" NFT_TRACE_SECONDS="8" NFT_TRACE_MODE_ARG="" NFT_FILE="" NFT_PID="" NFT_TRACE_TABLE="diag_k3s_entrypath" LOCAL_NFT_TRACE_TABLE="diag61_k3s_entrypath" RETURN_TRACE_MODE="N" RETURN_TRACE_SECONDS="10" RETURN_TRACE_MODE_ARG="" RET_FILE_LOCAL_8000="" RET_FILE_LOCAL_CNI0="" RET_FILE_WORKER_ENS18="" RET_FILE_WORKER_CONNTRACK="" RET_PID_LOCAL_8000="" RET_PID_LOCAL_CNI0="" RET_PID_WORKER_ENS18="" RET_PID_WORKER_CONNTRACK="" RET_FILE_LOCAL_NFT_TRACE="" RET_PID_LOCAL_NFT_TRACE="" NFT_DNAT_HIT="no" RET_LOCAL_SYN_COUNT=0 RET_LOCAL_SYNACK_COUNT=0 RET_CNI0_SYN_COUNT=0 RET_CNI0_SYNACK_COUNT=0 RET_WORKER_SYN_COUNT=0 RET_WORKER_SYNACK_COUNT=0 POD_NETNS_TRACE_MODE="N" POD_NETNS_TRACE_MODE_ARG="" POD_NETNS_TRACE_SECONDS="" POD_NETNS_TRACE_SECONDS_ARG="" POD_NETNS_PID="" POD_NETNS_FILE="" POD_NETNS_SYN_COUNT=0 POD_NETNS_SYNACK_COUNT=0 start_worker_capture() { if [[ ! "$CAPTURE_MODE" =~ ^[Yy]$ ]]; then return 0 fi if [[ ! "$DO_REMOTE" =~ ^[Yy]$ ]] || [[ -z "${WORKER_HOST}" ]]; then echo "[WARN] 抓包模式已开启,但未启用远端检查或未提供 worker 主机,跳过抓包。" return 0 fi CAP_FILE_ENS18="$(mktemp)" say "启动 worker 抓包(ens18, ${CAPTURE_SECONDS}s): host ${CLIENT_IP} and tcp port 80" ssh "${SSH_OPTS[@]}" "${WORKER_HOST}" \ "sudo timeout ${CAPTURE_SECONDS} tcpdump -ni ens18 'host ${CLIENT_IP} and tcp port 80' 2>/dev/null || true" \ >"${CAP_FILE_ENS18}" 2>&1 & CAP_PID_ENS18="$!" sleep 1 } start_worker_nft_trace() { if [[ ! "$NFT_TRACE_MODE" =~ ^[Yy]$ ]]; then return 0 fi if [[ ! "$DO_REMOTE" =~ ^[Yy]$ ]] || [[ -z "${WORKER_HOST}" ]]; then echo "[WARN] nft trace 已开启,但未启用远端检查或未提供 worker 主机,跳过 nft trace。" return 0 fi ssh "${SSH_OPTS[@]}" "${WORKER_HOST}" \ "sudo nft add table inet ${NFT_TRACE_TABLE} 2>/dev/null || true; \ sudo nft 'add chain inet ${NFT_TRACE_TABLE} prerouting { type filter hook prerouting priority -301; policy accept; }' 2>/dev/null || true; \ sudo nft add rule inet ${NFT_TRACE_TABLE} prerouting iif \"ens18\" ip saddr ${CLIENT_IP} ip daddr ${LB_IP} tcp dport 80 meta nftrace set 1 2>/dev/null || true" \ || true NFT_FILE="$(mktemp)" say "启动 worker nft trace(${NFT_TRACE_SECONDS}s)" ssh "${SSH_OPTS[@]}" "${WORKER_HOST}" \ "sudo timeout ${NFT_TRACE_SECONDS} nft monitor trace 2>/dev/null || true" \ >"${NFT_FILE}" 2>&1 & NFT_PID="$!" sleep 1 } start_return_path_trace() { if [[ ! "$RETURN_TRACE_MODE" =~ ^[Yy]$ ]]; then return 0 fi if [[ ! "$DO_REMOTE" =~ ^[Yy]$ ]] || [[ -z "${WORKER_HOST}" ]]; then echo "[WARN] 回包链路跟踪已开启,但未启用远端检查或未提供 worker 主机,跳过。" return 0 fi RET_FILE_LOCAL_8000="$(mktemp)" RET_FILE_LOCAL_CNI0="$(mktemp)" RET_FILE_LOCAL_NFT_TRACE="$(mktemp)" RET_FILE_WORKER_ENS18="$(mktemp)" RET_FILE_WORKER_CONNTRACK="$(mktemp)" say "启动回包链路跟踪(${RETURN_TRACE_SECONDS}s)" sudo nft add table inet "${LOCAL_NFT_TRACE_TABLE}" 2>/dev/null || true sudo nft "add chain inet ${LOCAL_NFT_TRACE_TABLE} forward { type filter hook forward priority -301; policy accept; }" 2>/dev/null || true sudo nft add rule inet "${LOCAL_NFT_TRACE_TABLE}" forward iif "flannel.1" ip daddr "${TRAEFIK_IP}" tcp dport 8000 meta nftrace set 1 2>/dev/null || true sudo timeout "${RETURN_TRACE_SECONDS}" nft monitor trace 2>/dev/null \ >"${RET_FILE_LOCAL_NFT_TRACE}" 2>&1 & RET_PID_LOCAL_NFT_TRACE="$!" sudo timeout "${RETURN_TRACE_SECONDS}" tcpdump -ni any "host ${TRAEFIK_IP} and tcp port 8000" 2>/dev/null \ >"${RET_FILE_LOCAL_8000}" 2>&1 & RET_PID_LOCAL_8000="$!" sudo timeout "${RETURN_TRACE_SECONDS}" tcpdump -ni cni0 "host ${TRAEFIK_IP} and tcp port 8000" 2>/dev/null \ >"${RET_FILE_LOCAL_CNI0}" 2>&1 & RET_PID_LOCAL_CNI0="$!" ssh "${SSH_OPTS[@]}" "${WORKER_HOST}" \ "sudo timeout ${RETURN_TRACE_SECONDS} tcpdump -ni ens18 'host ${CLIENT_IP} and tcp' 2>/dev/null || true" \ >"${RET_FILE_WORKER_ENS18}" 2>&1 & RET_PID_WORKER_ENS18="$!" ssh "${SSH_OPTS[@]}" "${WORKER_HOST}" \ "if command -v conntrack >/dev/null 2>&1; then sudo timeout ${RETURN_TRACE_SECONDS} conntrack -E -p tcp 2>/dev/null || true; else echo 'conntrack: not found'; fi" \ >"${RET_FILE_WORKER_CONNTRACK}" 2>&1 & RET_PID_WORKER_CONNTRACK="$!" sleep 1 } start_pod_netns_trace() { if [[ ! "${POD_NETNS_TRACE_MODE}" =~ ^[Yy]$ ]]; then return 0 fi if ! command -v crictl >/dev/null 2>&1; then echo "[WARN] 未找到 crictl,跳过 pod netns 抓包。" return 0 fi if ! command -v nsenter >/dev/null 2>&1; then echo "[WARN] 未找到 nsenter,跳过 pod netns 抓包。" return 0 fi local sec="${POD_NETNS_TRACE_SECONDS:-$RETURN_TRACE_SECONDS}" local cid local pid local runtime_id="" runtime_id="$(sudo kubectl -n kube-system get pod "${TRAEFIK_POD}" -o jsonpath='{.status.containerStatuses[?(@.name=="traefik")].containerID}' 2>/dev/null || true)" runtime_id="${runtime_id#containerd://}" runtime_id="${runtime_id#cri-o://}" if [[ -n "${runtime_id}" ]]; then cid="${runtime_id}" else cid="$(sudo crictl ps --name traefik -q 2>/dev/null | awk 'NR==1{print; exit}' || true)" fi if [[ -z "${cid}" ]]; then echo "[WARN] 未解析到 traefik 容器ID,跳过 pod netns 抓包。" return 0 fi pid="$(sudo crictl inspect "${cid}" 2>/dev/null | awk -F': ' '/"pid":/ {gsub(/,/, "", $2); print $2; exit}' || true)" if [[ -z "${pid}" || ! "${pid}" =~ ^[0-9]+$ ]]; then echo "[WARN] 未解析到 traefik 容器 PID,跳过 pod netns 抓包。" return 0 fi POD_NETNS_FILE="$(mktemp)" say "启动 Traefik Pod netns 抓包(${sec}s, pid=${pid})" sudo timeout "${sec}" nsenter -t "${pid}" -n tcpdump -ni any "tcp port 8000" 2>/dev/null \ >"${POD_NETNS_FILE}" 2>&1 & POD_NETNS_PID="$!" sleep 1 } flush_worker_capture() { if [[ -n "${CAP_PID_ENS18}" ]]; then wait "${CAP_PID_ENS18}" || true CAP_PID_ENS18="" fi if [[ -n "${CAP_FILE_ENS18}" && -f "${CAP_FILE_ENS18}" ]]; then echo echo "===== Worker 抓包结果(ens18) =====" cat "${CAP_FILE_ENS18}" || true rm -f "${CAP_FILE_ENS18}" || true CAP_FILE_ENS18="" fi if [[ -n "${NFT_PID}" ]]; then wait "${NFT_PID}" || true NFT_PID="" fi if [[ -n "${NFT_FILE}" && -f "${NFT_FILE}" ]]; then if grep -Eq "KUBE-SEP-.*dnat to ${TRAEFIK_IP}:8000|dnat to ${TRAEFIK_IP}:8000" "${NFT_FILE}" >/dev/null 2>&1; then NFT_DNAT_HIT="yes" fi echo echo "===== Worker nft trace 结果 =====" cat "${NFT_FILE}" || true rm -f "${NFT_FILE}" || true NFT_FILE="" fi if [[ "$NFT_TRACE_MODE" =~ ^[Yy]$ ]] && [[ "$DO_REMOTE" =~ ^[Yy]$ ]] && [[ -n "${WORKER_HOST}" ]]; then ssh "${SSH_OPTS[@]}" "${WORKER_HOST}" "sudo nft delete table inet ${NFT_TRACE_TABLE} 2>/dev/null || true" || true fi if [[ -n "${RET_PID_LOCAL_8000}" ]]; then wait "${RET_PID_LOCAL_8000}" || true RET_PID_LOCAL_8000="" fi if [[ -n "${RET_PID_LOCAL_NFT_TRACE}" ]]; then wait "${RET_PID_LOCAL_NFT_TRACE}" || true RET_PID_LOCAL_NFT_TRACE="" fi if [[ -n "${RET_PID_LOCAL_CNI0}" ]]; then wait "${RET_PID_LOCAL_CNI0}" || true RET_PID_LOCAL_CNI0="" fi if [[ -n "${RET_PID_WORKER_ENS18}" ]]; then wait "${RET_PID_WORKER_ENS18}" || true RET_PID_WORKER_ENS18="" fi if [[ -n "${RET_PID_WORKER_CONNTRACK}" ]]; then wait "${RET_PID_WORKER_CONNTRACK}" || true RET_PID_WORKER_CONNTRACK="" fi if [[ -n "${RET_FILE_LOCAL_8000}" && -f "${RET_FILE_LOCAL_8000}" ]]; then RET_LOCAL_SYN_COUNT="$(count_tcpdump_flag "${RET_FILE_LOCAL_8000}" "Flags [S]")" RET_LOCAL_SYNACK_COUNT="$(count_tcpdump_flag "${RET_FILE_LOCAL_8000}" "Flags [S.]")" echo echo "===== 回包链路抓包(ylc61 any -> ${TRAEFIK_IP}:8000) =====" cat "${RET_FILE_LOCAL_8000}" || true rm -f "${RET_FILE_LOCAL_8000}" || true RET_FILE_LOCAL_8000="" fi if [[ -n "${RET_FILE_LOCAL_NFT_TRACE}" && -f "${RET_FILE_LOCAL_NFT_TRACE}" ]]; then echo echo "===== 本机 nft trace 结果(ylc61 forward) =====" cat "${RET_FILE_LOCAL_NFT_TRACE}" || true rm -f "${RET_FILE_LOCAL_NFT_TRACE}" || true RET_FILE_LOCAL_NFT_TRACE="" fi if [[ -n "${RET_FILE_LOCAL_CNI0}" && -f "${RET_FILE_LOCAL_CNI0}" ]]; then RET_CNI0_SYN_COUNT="$(count_tcpdump_flag "${RET_FILE_LOCAL_CNI0}" "Flags [S]")" RET_CNI0_SYNACK_COUNT="$(count_tcpdump_flag "${RET_FILE_LOCAL_CNI0}" "Flags [S.]")" echo echo "===== 回包链路抓包(ylc61 cni0 -> ${TRAEFIK_IP}:8000) =====" cat "${RET_FILE_LOCAL_CNI0}" || true rm -f "${RET_FILE_LOCAL_CNI0}" || true RET_FILE_LOCAL_CNI0="" fi if [[ -n "${RET_FILE_WORKER_ENS18}" && -f "${RET_FILE_WORKER_ENS18}" ]]; then RET_WORKER_SYN_COUNT="$(count_tcpdump_flag "${RET_FILE_WORKER_ENS18}" "Flags [S]")" RET_WORKER_SYNACK_COUNT="$(count_tcpdump_flag "${RET_FILE_WORKER_ENS18}" "Flags [S.]")" echo echo "===== 回包链路抓包(ylc62 ens18 <-> ${CLIENT_IP}) =====" cat "${RET_FILE_WORKER_ENS18}" || true rm -f "${RET_FILE_WORKER_ENS18}" || true RET_FILE_WORKER_ENS18="" fi if [[ -n "${RET_FILE_WORKER_CONNTRACK}" && -f "${RET_FILE_WORKER_CONNTRACK}" ]]; then echo echo "===== 回包链路 conntrack 事件(ylc62) =====" cat "${RET_FILE_WORKER_CONNTRACK}" || true rm -f "${RET_FILE_WORKER_CONNTRACK}" || true RET_FILE_WORKER_CONNTRACK="" fi sudo nft delete table inet "${LOCAL_NFT_TRACE_TABLE}" 2>/dev/null || true if [[ -n "${POD_NETNS_PID}" ]]; then wait "${POD_NETNS_PID}" || true POD_NETNS_PID="" fi if [[ -n "${POD_NETNS_FILE}" && -f "${POD_NETNS_FILE}" ]]; then POD_NETNS_SYN_COUNT="$(count_tcpdump_flag "${POD_NETNS_FILE}" "Flags [S]")" POD_NETNS_SYNACK_COUNT="$(count_tcpdump_flag "${POD_NETNS_FILE}" "Flags [S.]")" echo echo "===== Traefik Pod netns 抓包(ylc61) =====" cat "${POD_NETNS_FILE}" || true rm -f "${POD_NETNS_FILE}" || true POD_NETNS_FILE="" fi }