#!/usr/bin/env bash set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" LIB_DIR="${SCRIPT_DIR}/lib" source "${LIB_DIR}/common.sh" source "${LIB_DIR}/k8s_checks.sh" source "${LIB_DIR}/remote_checks.sh" source "${LIB_DIR}/capture.sh" source "${LIB_DIR}/analyze.sh" parse_args() { init_defaults if [[ $# -gt 0 ]]; then case "$1" in run|preflight|capture|analyze) COMMAND="$1" shift ;; esac fi while [[ $# -gt 0 ]]; do case "$1" in --worker-host) WORKER_HOST="${2:-}"; shift 2 ;; --client-host) CLIENT_HOST="${2:-}"; shift 2 ;; --client-ip) CLIENT_IP="${2:-}"; shift 2 ;; --lb-ip) LB_IP="${2:-}"; shift 2 ;; --worker-ssh-key) WORKER_SSH_KEY="${2:-}"; shift 2 ;; --ssh-key) WORKER_SSH_KEY="${2:-}"; shift 2 ;; --client-ssh-key) CLIENT_SSH_KEY="${2:-}"; shift 2 ;; --remote-check) DO_REMOTE_ARG="${2:-}"; shift 2 ;; --capture-mode) CAPTURE_MODE_ARG="${2:-}"; shift 2 ;; --capture-seconds) CAPTURE_SECONDS="${2:-12}"; shift 2 ;; --nft-trace-mode) NFT_TRACE_MODE_ARG="${2:-}"; shift 2 ;; --nft-trace-seconds) NFT_TRACE_SECONDS="${2:-8}"; shift 2 ;; --return-trace-mode) RETURN_TRACE_MODE_ARG="${2:-}"; shift 2 ;; --return-trace-seconds) RETURN_TRACE_SECONDS="${2:-10}"; shift 2 ;; --pod-netns-trace-mode) POD_NETNS_TRACE_MODE_ARG="${2:-}"; shift 2 ;; --pod-netns-trace-seconds) POD_NETNS_TRACE_SECONDS_ARG="${2:-}"; shift 2 ;; --non-interactive) NON_INTERACTIVE="1"; shift ;; --log) ANALYZE_LOG="${2:-}"; shift 2 ;; -h|--help) usage; exit 0 ;; *) echo "[ERR] 未知参数: $1" usage exit 1 ;; esac done } cmd_preflight() { local_preflight_checks prepare_runtime_context echo "=== preflight ===" echo "[OK] 依赖检查通过" echo "worker_host=${WORKER_HOST}" echo "client_host=${CLIENT_HOST:-}" echo "client_ip=${CLIENT_IP}" echo "lb_ip=${LB_IP}" echo "worker_ssh_key=${WORKER_SSH_KEY:-}" echo "client_ssh_key=${CLIENT_SSH_KEY:-}" } cmd_run() { local_preflight_checks echo "K3s 全链路一键检查(入口 -> DNAT -> Service -> Endpoint -> NetPol -> 回包)" echo "建议在 server 节点执行(例如 ylc61)。" echo # 构造针对 IPv4 LB_IP 的 curl 探测命令 local CURL_HTTP CURL_DESC CURL_HTTP="curl -I --max-time 3 http://${LB_IP}:80" CURL_DESC="curl -I --max-time 3 http://${LB_IP}:80" prepare_runtime_context setup_log_file say "日志文件: $LOG_FILE" say "worker SSH key: ${WORKER_SSH_KEY:-}" say "client SSH key: ${CLIENT_SSH_KEY:-}" collect_local_k8s_state echo resolve_runtime_modes collect_remote_worker_state echo echo ">>> 请在第三方客户端(${CLIENT_IP})执行 3 次:${CURL_DESC}" start_worker_capture start_worker_nft_trace start_return_path_trace start_pod_netns_trace if [[ -n "${CLIENT_HOST}" ]]; then say "通过 SSH 自动触发客户端探测: ${CLIENT_HOST}" run_cmd "Client 自动探测(3次)" ssh "${CLIENT_SSH_OPTS[@]}" "${CLIENT_HOST}" \ "for i in 1 2 3; do ${CURL_HTTP} || true; sleep 1; done" elif [[ "${NON_INTERACTIVE}" == "0" ]]; then read -r -p "完成后按回车继续采样..." else echo "[WARN] non-interactive 模式且未提供 --client-host:跳过等待直接采样,可能没有新流量。" fi flush_worker_capture post_remote_worker_state run_cmd "Traefik Pod FW 链复测" sudo iptables -L "${TRAEFIK_CHAIN:-KUBE-ROUTER-FORWARD}" -n -v --line-numbers run_cmd "本机访问目标LB_IP:80(仅供参考,可能本机被kube-proxy劫持)" bash -lc "${CURL_HTTP}" print_diag_summary echo echo "Traefik pod netns SYN/SYN-ACK: ${POD_NETNS_SYN_COUNT:-0}/${POD_NETNS_SYNACK_COUNT:-0}" echo echo "完成。完整日志: ${LOG_FILE}" } cmd_capture() { DO_REMOTE_ARG="y" CAPTURE_MODE_ARG="y" NFT_TRACE_MODE_ARG="y" RETURN_TRACE_MODE_ARG="y" POD_NETNS_TRACE_MODE_ARG="y" NON_INTERACTIVE="1" cmd_run } cmd_analyze() { analyze_log_file "${ANALYZE_LOG}" } main() { parse_args "$@" case "${COMMAND}" in run) cmd_run ;; preflight) cmd_preflight ;; capture) cmd_capture ;; analyze) cmd_analyze ;; *) echo "[ERR] 未知命令: ${COMMAND}"; usage; exit 1 ;; esac } main "$@"