Files
Deploy-Laboratory/scripts/diag/entrypath/entrypath.sh
2026-03-21 04:36:06 +08:00

145 lines
4.3 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
LIB_DIR="${SCRIPT_DIR}/lib"
source "${LIB_DIR}/common.sh"
source "${LIB_DIR}/k8s_checks.sh"
source "${LIB_DIR}/remote_checks.sh"
source "${LIB_DIR}/capture.sh"
source "${LIB_DIR}/analyze.sh"
parse_args() {
init_defaults
if [[ $# -gt 0 ]]; then
case "$1" in
run|preflight|capture|analyze)
COMMAND="$1"
shift
;;
esac
fi
while [[ $# -gt 0 ]]; do
case "$1" in
--worker-host) WORKER_HOST="${2:-}"; shift 2 ;;
--client-host) CLIENT_HOST="${2:-}"; shift 2 ;;
--client-ip) CLIENT_IP="${2:-}"; shift 2 ;;
--lb-ip) LB_IP="${2:-}"; shift 2 ;;
--worker-ssh-key) WORKER_SSH_KEY="${2:-}"; shift 2 ;;
--ssh-key) WORKER_SSH_KEY="${2:-}"; shift 2 ;;
--client-ssh-key) CLIENT_SSH_KEY="${2:-}"; shift 2 ;;
--remote-check) DO_REMOTE_ARG="${2:-}"; shift 2 ;;
--capture-mode) CAPTURE_MODE_ARG="${2:-}"; shift 2 ;;
--capture-seconds) CAPTURE_SECONDS="${2:-12}"; shift 2 ;;
--nft-trace-mode) NFT_TRACE_MODE_ARG="${2:-}"; shift 2 ;;
--nft-trace-seconds) NFT_TRACE_SECONDS="${2:-8}"; shift 2 ;;
--return-trace-mode) RETURN_TRACE_MODE_ARG="${2:-}"; shift 2 ;;
--return-trace-seconds) RETURN_TRACE_SECONDS="${2:-10}"; shift 2 ;;
--pod-netns-trace-mode) POD_NETNS_TRACE_MODE_ARG="${2:-}"; shift 2 ;;
--pod-netns-trace-seconds) POD_NETNS_TRACE_SECONDS_ARG="${2:-}"; shift 2 ;;
--non-interactive) NON_INTERACTIVE="1"; shift ;;
--log) ANALYZE_LOG="${2:-}"; shift 2 ;;
-h|--help) usage; exit 0 ;;
*)
echo "[ERR] 未知参数: $1"
usage
exit 1
;;
esac
done
}
cmd_preflight() {
local_preflight_checks
prepare_runtime_context
echo "=== preflight ==="
echo "[OK] 依赖检查通过"
echo "worker_host=${WORKER_HOST}"
echo "client_host=${CLIENT_HOST:-<none>}"
echo "client_ip=${CLIENT_IP}"
echo "lb_ip=${LB_IP}"
echo "worker_ssh_key=${WORKER_SSH_KEY:-<ssh默认>}"
echo "client_ssh_key=${CLIENT_SSH_KEY:-<ssh默认>}"
}
cmd_run() {
local_preflight_checks
echo "K3s 全链路一键检查(入口 -> DNAT -> Service -> Endpoint -> NetPol -> 回包)"
echo "建议在 server 节点执行(例如 ylc61。"
echo
# 构造针对 IPv4 LB_IP 的 curl 探测命令
local CURL_HTTP CURL_DESC
CURL_HTTP="curl -I --max-time 3 http://${LB_IP}:80"
CURL_DESC="curl -I --max-time 3 http://${LB_IP}:80"
prepare_runtime_context
setup_log_file
say "日志文件: $LOG_FILE"
say "worker SSH key: ${WORKER_SSH_KEY:-<ssh默认>}"
say "client SSH key: ${CLIENT_SSH_KEY:-<ssh默认>}"
collect_local_k8s_state
echo
resolve_runtime_modes
collect_remote_worker_state
echo
echo ">>> 请在第三方客户端(${CLIENT_IP})执行 3 次:${CURL_DESC}"
start_worker_capture
start_worker_nft_trace
start_return_path_trace
start_pod_netns_trace
if [[ -n "${CLIENT_HOST}" ]]; then
say "通过 SSH 自动触发客户端探测: ${CLIENT_HOST}"
run_cmd "Client 自动探测3次" ssh "${CLIENT_SSH_OPTS[@]}" "${CLIENT_HOST}" \
"for i in 1 2 3; do ${CURL_HTTP} || true; sleep 1; done"
elif [[ "${NON_INTERACTIVE}" == "0" ]]; then
read -r -p "完成后按回车继续采样..."
else
echo "[WARN] non-interactive 模式且未提供 --client-host跳过等待直接采样可能没有新流量。"
fi
flush_worker_capture
post_remote_worker_state
run_cmd "Traefik Pod FW 链复测" sudo iptables -L "${TRAEFIK_CHAIN:-KUBE-ROUTER-FORWARD}" -n -v --line-numbers
run_cmd "本机访问目标LB_IP:80仅供参考可能本机被kube-proxy劫持" bash -lc "${CURL_HTTP}"
print_diag_summary
echo
echo "Traefik pod netns SYN/SYN-ACK: ${POD_NETNS_SYN_COUNT:-0}/${POD_NETNS_SYNACK_COUNT:-0}"
echo
echo "完成。完整日志: ${LOG_FILE}"
}
cmd_capture() {
DO_REMOTE_ARG="y"
CAPTURE_MODE_ARG="y"
NFT_TRACE_MODE_ARG="y"
RETURN_TRACE_MODE_ARG="y"
POD_NETNS_TRACE_MODE_ARG="y"
NON_INTERACTIVE="1"
cmd_run
}
cmd_analyze() {
analyze_log_file "${ANALYZE_LOG}"
}
main() {
parse_args "$@"
case "${COMMAND}" in
run) cmd_run ;;
preflight) cmd_preflight ;;
capture) cmd_capture ;;
analyze) cmd_analyze ;;
*) echo "[ERR] 未知命令: ${COMMAND}"; usage; exit 1 ;;
esac
}
main "$@"