Files
Deploy-Laboratory/scripts/diag/entrypath/lib/k8s_checks.sh
2026-03-21 04:36:06 +08:00

96 lines
3.7 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env bash
local_preflight_checks() {
require_cmd bash
require_cmd sudo
require_cmd kubectl
require_cmd awk
require_cmd sed
require_cmd grep
}
prepare_runtime_context() {
if [[ "${NON_INTERACTIVE}" == "0" ]]; then
WORKER_HOST="$(read_default "Worker SSH 主机user@host留空跳过远端检查" "${WORKER_HOST}")"
CLIENT_IP="$(read_default "第三方客户端 IP用于人工发流量" "${CLIENT_IP}")"
LB_IP="$(read_default "待排查节点对外 IP如 ylc62" "${LB_IP}")"
fi
if [[ -z "${WORKER_SSH_KEY}" && -f "${DEFAULT_WORKER_SSH_KEY}" ]]; then
WORKER_SSH_KEY="${DEFAULT_WORKER_SSH_KEY}"
fi
if [[ -z "${CLIENT_SSH_KEY}" && -f "${DEFAULT_CLIENT_SSH_KEY}" ]]; then
CLIENT_SSH_KEY="${DEFAULT_CLIENT_SSH_KEY}"
fi
if [[ -z "${CLIENT_SSH_KEY}" && -n "${WORKER_SSH_KEY}" ]]; then
CLIENT_SSH_KEY="${WORKER_SSH_KEY}"
fi
SSH_OPTS=()
if [[ -n "${WORKER_SSH_KEY}" ]]; then
SSH_OPTS=(-i "${WORKER_SSH_KEY}" -o IdentitiesOnly=yes)
fi
CLIENT_SSH_OPTS=()
if [[ -n "${CLIENT_SSH_KEY}" ]]; then
CLIENT_SSH_OPTS=(-i "${CLIENT_SSH_KEY}" -o IdentitiesOnly=yes)
fi
}
setup_log_file() {
if [[ "${EUID}" -eq 0 ]]; then
LOG_DIR="/root/netpol-diag-logs"
else
LOG_DIR="${HOME}/netpol-diag-logs"
fi
mkdir -p "$LOG_DIR"
LOG_FILE="${LOG_DIR}/entrypath-$(date '+%Y%m%d-%H%M%S').log"
exec > >(tee -a "$LOG_FILE") 2>&1
}
collect_local_k8s_state() {
run_cmd "节点状态" sudo kubectl get nodes -o wide
run_cmd "kube-system 关键组件" sh -c "sudo kubectl -n kube-system get pods -o wide | grep -E 'traefik|svclb|flannel|kube-proxy' || true"
run_cmd "Traefik Service" sudo kubectl -n kube-system get svc traefik -o wide
run_cmd "Traefik Service 关键字段" sh -c "sudo kubectl -n kube-system get svc traefik -o yaml | grep -E 'type:|externalTrafficPolicy|loadBalancerSourceRanges|svccontroller.k3s.cattle.io' || true"
TRAEFIK_POD="$(sudo kubectl -n kube-system get pod -l app.kubernetes.io/name=traefik -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true)"
TRAEFIK_IP="$(sudo kubectl -n kube-system get pod -l app.kubernetes.io/name=traefik -o jsonpath='{.items[0].status.podIP}' 2>/dev/null || true)"
if [[ -z "${TRAEFIK_IP}" ]]; then
echo "[ERR] 无法解析 Traefik Pod IP终止。"
exit 1
fi
TRAEFIK_CHAIN="$(sudo iptables -L KUBE-ROUTER-FORWARD -n -v --line-numbers \
| awk -v ip="${TRAEFIK_IP}" '$0 ~ ip {print $4; exit}')"
echo
echo "Traefik pod: ${TRAEFIK_POD}"
echo "Traefik ip : ${TRAEFIK_IP}"
echo "Traefik fw : ${TRAEFIK_CHAIN:-N/A}"
if [[ -n "${TRAEFIK_CHAIN}" ]]; then
run_cmd "Traefik Pod FW 链详情" sudo iptables -L "$TRAEFIK_CHAIN" -n -v -x
run_cmd "Traefik Pod FW 链规则" sudo iptables -S "$TRAEFIK_CHAIN"
REJECT_PKTS="$(extract_pkts_for_target "" "$TRAEFIK_CHAIN" REJECT || echo 0)"
NFLOG_PKTS="$(extract_pkts_for_target "" "$TRAEFIK_CHAIN" NFLOG || echo 0)"
else
REJECT_PKTS=0
NFLOG_PKTS=0
fi
TRAEFIK_WEB_SVC_CHAIN="$(sudo iptables -t nat -S KUBE-SERVICES \
| awk '/kube-system\/traefik:web cluster IP/ && /--dport 80/ {for(i=1;i<=NF;i++) if($i=="-j"){print $(i+1); exit}}')"
TRAEFIK_WEB_SEP_CHAIN=""
if [[ -n "${TRAEFIK_WEB_SVC_CHAIN}" ]]; then
run_cmd "Traefik web Service 链" sudo iptables -t nat -L "$TRAEFIK_WEB_SVC_CHAIN" -n -v -x
TRAEFIK_WEB_SEP_CHAIN="$(extract_first_jump_target nat "$TRAEFIK_WEB_SVC_CHAIN" || true)"
fi
if [[ -n "${TRAEFIK_WEB_SEP_CHAIN}" ]]; then
run_cmd "Traefik web Endpoint 链" sudo iptables -t nat -L "$TRAEFIK_WEB_SEP_CHAIN" -n -v -x
fi
run_cmd "KUBE-SERVICES 中目标LB_IP命中" sh -c "sudo iptables -t nat -L KUBE-SERVICES -n -v --line-numbers | grep '${LB_IP}' || true"
}