基本框架
This commit is contained in:
419
scripts/diag/netpol/check-net.sh
Normal file
419
scripts/diag/netpol/check-net.sh
Normal file
@@ -0,0 +1,419 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
NS_TRAEFIK="kube-system"
|
||||
APP_TRAEFIK_LABEL="app.kubernetes.io/name=traefik"
|
||||
TIMEOUT=3
|
||||
LOG_TAIL=200
|
||||
LOG_SINCE="20m"
|
||||
LOG_DIR=""
|
||||
LOG_FILE=""
|
||||
|
||||
PROBE_CLIENT="SKIP"
|
||||
PROBE_TRAEFIK_TO_SVC="SKIP"
|
||||
PROBE_TRAEFIK_TO_POD="SKIP"
|
||||
PROBE_TRAEFIK_DNS="SKIP"
|
||||
|
||||
print_title() {
|
||||
echo
|
||||
echo "=== $1 ==="
|
||||
}
|
||||
|
||||
safe_run() {
|
||||
"$@" || true
|
||||
}
|
||||
|
||||
require_cmd() {
|
||||
local c="$1"
|
||||
if ! command -v "$c" >/dev/null 2>&1; then
|
||||
echo "[ERR] 缺少命令: $c"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
KUBECTL_PATH="$(command -v kubectl || true)"
|
||||
IPTABLES_PATH="$(command -v iptables || true)"
|
||||
USE_SUDO=""
|
||||
|
||||
init_runtime() {
|
||||
require_cmd kubectl
|
||||
require_cmd iptables
|
||||
require_cmd awk
|
||||
require_cmd grep
|
||||
require_cmd curl
|
||||
|
||||
if [[ "${EUID}" -ne 0 ]] && command -v sudo >/dev/null 2>&1; then
|
||||
# 先尝试无交互,失败则提示一次密码
|
||||
if ! sudo -n true 2>/dev/null; then
|
||||
echo "[INFO] 需要 sudo 权限以读取 iptables / kubectl 配置。"
|
||||
sudo -v
|
||||
fi
|
||||
USE_SUDO="1"
|
||||
fi
|
||||
|
||||
# 非 root 默认写到 HOME,避免 /root 权限问题
|
||||
if [[ "${EUID}" -eq 0 ]]; then
|
||||
LOG_DIR="/root/netpol-diag-logs"
|
||||
else
|
||||
LOG_DIR="${HOME}/netpol-diag-logs"
|
||||
fi
|
||||
}
|
||||
|
||||
# 统一封装,避免脚本各处手工判断是否 sudo
|
||||
kubectl() {
|
||||
if [[ -n "${USE_SUDO}" ]]; then
|
||||
sudo "${KUBECTL_PATH}" "$@"
|
||||
else
|
||||
"${KUBECTL_PATH}" "$@"
|
||||
fi
|
||||
}
|
||||
|
||||
iptables() {
|
||||
if [[ -n "${USE_SUDO}" ]]; then
|
||||
sudo "${IPTABLES_PATH}" "$@"
|
||||
else
|
||||
"${IPTABLES_PATH}" "$@"
|
||||
fi
|
||||
}
|
||||
|
||||
probe_wget_from_traefik() {
|
||||
local url="$1"
|
||||
if kubectl exec -n "${NS_TRAEFIK}" deploy/traefik -- wget -qO- "${url}" --timeout="${TIMEOUT}" >/tmp/netpol_probe.out 2>/tmp/netpol_probe.err; then
|
||||
cat /tmp/netpol_probe.out
|
||||
return 0
|
||||
fi
|
||||
cat /tmp/netpol_probe.err
|
||||
return 1
|
||||
}
|
||||
|
||||
select_scene() {
|
||||
echo "请选择诊断场景:"
|
||||
echo " 1) nginx-demo (/demo, 80)"
|
||||
echo " 2) nodejs-demo (/node, 3000)"
|
||||
echo " 3) 自定义"
|
||||
printf "输入序号 [1/2/3](默认 2): "
|
||||
read -r CHOICE
|
||||
CHOICE="${CHOICE:-2}"
|
||||
|
||||
case "${CHOICE}" in
|
||||
1)
|
||||
NS_BACKEND="default"
|
||||
APP_NAME="nginx-demo"
|
||||
APP_LABEL="app=nginx-demo"
|
||||
SVC_NAME="nginx-demo"
|
||||
PATH_PREFIX="/demo/"
|
||||
POD_PORT="80"
|
||||
;;
|
||||
2)
|
||||
NS_BACKEND="default"
|
||||
APP_NAME="nodejs-demo"
|
||||
APP_LABEL="app=nodejs-demo"
|
||||
SVC_NAME="nodejs-demo"
|
||||
PATH_PREFIX="/node/"
|
||||
POD_PORT="3000"
|
||||
;;
|
||||
3)
|
||||
printf "后端命名空间(默认 default): "
|
||||
read -r NS_BACKEND
|
||||
NS_BACKEND="${NS_BACKEND:-default}"
|
||||
|
||||
printf "应用名(Deployment/Service 名,示例 nodejs-demo): "
|
||||
read -r APP_NAME
|
||||
APP_NAME="${APP_NAME:-nodejs-demo}"
|
||||
|
||||
printf "Pod 标签选择器(默认 app=<应用名>): "
|
||||
read -r APP_LABEL
|
||||
APP_LABEL="${APP_LABEL:-app=${APP_NAME}}"
|
||||
|
||||
printf "Service 名(默认与应用名一致): "
|
||||
read -r SVC_NAME
|
||||
SVC_NAME="${SVC_NAME:-${APP_NAME}}"
|
||||
|
||||
printf "入口路径前缀(默认 /): "
|
||||
read -r PATH_PREFIX
|
||||
PATH_PREFIX="${PATH_PREFIX:-/}"
|
||||
|
||||
printf "后端 Pod 端口(默认 80): "
|
||||
read -r POD_PORT
|
||||
POD_PORT="${POD_PORT:-80}"
|
||||
;;
|
||||
*)
|
||||
echo "[WARN] 无效选择,使用 nodejs-demo 默认场景。"
|
||||
NS_BACKEND="default"
|
||||
APP_NAME="nodejs-demo"
|
||||
APP_LABEL="app=nodejs-demo"
|
||||
SVC_NAME="nodejs-demo"
|
||||
PATH_PREFIX="/node/"
|
||||
POD_PORT="3000"
|
||||
;;
|
||||
esac
|
||||
|
||||
printf "入口 IP(用于本机 curl,默认 192.168.2.61): "
|
||||
read -r ENTRY_IP
|
||||
ENTRY_IP="${ENTRY_IP:-192.168.2.61}"
|
||||
}
|
||||
|
||||
init_runtime
|
||||
select_scene
|
||||
|
||||
mkdir -p "${LOG_DIR}"
|
||||
LOG_FILE="${LOG_DIR}/diag-$(date '+%Y%m%d-%H%M%S')-${APP_NAME}.log"
|
||||
exec > >(tee -a "${LOG_FILE}") 2>&1
|
||||
|
||||
print_title "0. 诊断上下文"
|
||||
echo "TIME: $(date '+%F %T %Z')"
|
||||
echo "LOG_FILE=${LOG_FILE}"
|
||||
echo "SCENE_APP=${APP_NAME}"
|
||||
echo "SCENE_NS=${NS_BACKEND}"
|
||||
echo "SCENE_LABEL=${APP_LABEL}"
|
||||
echo "SCENE_SVC=${SVC_NAME}"
|
||||
echo "SCENE_PATH=${PATH_PREFIX}"
|
||||
echo "SCENE_POD_PORT=${POD_PORT}"
|
||||
echo "ENTRY_IP=${ENTRY_IP}"
|
||||
echo "HOSTNAME=$(hostname)"
|
||||
safe_run kubectl version --short
|
||||
|
||||
print_title "1. 集群与 Traefik 基线"
|
||||
safe_run kubectl get nodes -o wide
|
||||
safe_run kubectl get deploy -n "${NS_TRAEFIK}" traefik -o wide
|
||||
safe_run kubectl get svc -n "${NS_TRAEFIK}" traefik -o wide
|
||||
safe_run kubectl get pod -n "${NS_TRAEFIK}" -l "${APP_TRAEFIK_LABEL}" -o wide
|
||||
kubectl get pods -n kube-system -o wide | grep -E 'kube-router|flannel|traefik|svclb-traefik' || true
|
||||
|
||||
TRAEFIK_POD="$(kubectl get pod -n "${NS_TRAEFIK}" -l "${APP_TRAEFIK_LABEL}" -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true)"
|
||||
TRAEFIK_IP="$(kubectl get pod -n "${NS_TRAEFIK}" -l "${APP_TRAEFIK_LABEL}" -o jsonpath='{.items[0].status.podIP}' 2>/dev/null || true)"
|
||||
|
||||
echo "--- 1.1 kube-proxy 基线 ---"
|
||||
safe_run kubectl get pod -n kube-system -l k8s-app=kube-proxy -o wide
|
||||
safe_run kubectl get configmap -n kube-system kube-proxy -o yaml
|
||||
|
||||
KPROXY_POD="$(kubectl get pod -n kube-system -l k8s-app=kube-proxy -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true)"
|
||||
if [[ -n "${KPROXY_POD}" ]]; then
|
||||
echo "--- 1.2 kube-proxy 日志关键字(error|conntrack|iptables|ipvs|nft)---"
|
||||
kubectl logs -n kube-system "${KPROXY_POD}" --tail=200 | grep -Ei "error|fail|conntrack|iptables|ipvs|nft|sync" || true
|
||||
else
|
||||
echo "[WARN] 未找到 kube-proxy Pod(K3s 某些模式可忽略)"
|
||||
fi
|
||||
|
||||
print_title "2. 业务资源采集"
|
||||
safe_run kubectl get deploy -n "${NS_BACKEND}" "${APP_NAME}" -o wide
|
||||
safe_run kubectl get svc -n "${NS_BACKEND}" "${SVC_NAME}" -o wide
|
||||
safe_run kubectl get endpoints -n "${NS_BACKEND}" "${SVC_NAME}" -o wide
|
||||
safe_run kubectl get endpointslice -n "${NS_BACKEND}" -l kubernetes.io/service-name="${SVC_NAME}" -o wide
|
||||
safe_run kubectl get pod -n "${NS_BACKEND}" -l "${APP_LABEL}" -o wide
|
||||
safe_run kubectl get pod -n "${NS_BACKEND}" -l "${APP_LABEL}" --show-labels
|
||||
safe_run kubectl get ingress -n "${NS_BACKEND}"
|
||||
safe_run kubectl get ingressroute -n "${NS_BACKEND}"
|
||||
safe_run kubectl get networkpolicy -n "${NS_BACKEND}"
|
||||
safe_run kubectl get networkpolicy -n "${NS_TRAEFIK}"
|
||||
safe_run kubectl get ns "${NS_BACKEND}" "${NS_TRAEFIK}" --show-labels
|
||||
|
||||
BACKEND_POD_IP="$(kubectl get pod -n "${NS_BACKEND}" -l "${APP_LABEL}" -o jsonpath='{.items[0].status.podIP}' 2>/dev/null || true)"
|
||||
SVC_IP="$(kubectl get svc -n "${NS_BACKEND}" "${SVC_NAME}" -o jsonpath='{.spec.clusterIP}' 2>/dev/null || true)"
|
||||
EP_COUNT="$(kubectl get endpoints -n "${NS_BACKEND}" "${SVC_NAME}" -o jsonpath='{.subsets[*].addresses[*].ip}' 2>/dev/null | awk '{print NF}')"
|
||||
EP_COUNT="${EP_COUNT:-0}"
|
||||
|
||||
echo "TRAEFIK_POD=${TRAEFIK_POD:-<none>}"
|
||||
echo "TRAEFIK_IP=${TRAEFIK_IP:-<none>}"
|
||||
echo "BACKEND_POD_IP=${BACKEND_POD_IP:-<none>}"
|
||||
echo "SVC_IP=${SVC_IP:-<none>}"
|
||||
echo "ENDPOINTS_COUNT=${EP_COUNT}"
|
||||
echo "SERVICE_SELECTOR=$(kubectl get svc -n "${NS_BACKEND}" "${SVC_NAME}" -o jsonpath='{.spec.selector}' 2>/dev/null || echo '{}')"
|
||||
|
||||
echo "--- 2.1 EndpointSlice 条件(ready/serving/terminating/node)---"
|
||||
kubectl get endpointslice -n "${NS_BACKEND}" -l kubernetes.io/service-name="${SVC_NAME}" \
|
||||
-o jsonpath='{range .items[*]}{"slice="}{.metadata.name}{"\n"}{range .endpoints[*]}{" addr="}{.addresses[0]}{" ready="}{.conditions.ready}{" serving="}{.conditions.serving}{" terminating="}{.conditions.terminating}{" node="}{.nodeName}{"\n"}{end}{end}' \
|
||||
|| true
|
||||
|
||||
print_title "3. 主链路连通性探测"
|
||||
echo "--- 3.1 本机 -> 入口 (${ENTRY_IP}${PATH_PREFIX}) ---"
|
||||
if curl -sS -m "${TIMEOUT}" -o /tmp/netpol_client.out -w "%{http_code}" "http://${ENTRY_IP}${PATH_PREFIX}" >/tmp/netpol_client.code 2>/tmp/netpol_client.err; then
|
||||
CODE="$(cat /tmp/netpol_client.code)"
|
||||
echo "HTTP_CODE=${CODE}"
|
||||
echo "BODY_PREVIEW:"
|
||||
head -c 200 /tmp/netpol_client.out || true
|
||||
echo
|
||||
PROBE_CLIENT="OK_${CODE}"
|
||||
else
|
||||
cat /tmp/netpol_client.err
|
||||
PROBE_CLIENT="FAIL"
|
||||
fi
|
||||
|
||||
if [[ -n "${TRAEFIK_POD}" && -n "${SVC_IP}" ]]; then
|
||||
echo
|
||||
echo "--- 3.2 Traefik -> ServiceIP (${SVC_IP}:80) ---"
|
||||
if probe_wget_from_traefik "http://${SVC_IP}:80"; then
|
||||
PROBE_TRAEFIK_TO_SVC="OK"
|
||||
else
|
||||
PROBE_TRAEFIK_TO_SVC="FAIL"
|
||||
fi
|
||||
else
|
||||
echo "[SKIP] 缺少 Traefik Pod 或 ServiceIP。"
|
||||
fi
|
||||
|
||||
if [[ -n "${TRAEFIK_POD}" ]]; then
|
||||
echo
|
||||
echo "--- 3.3 Traefik -> Service DNS (${SVC_NAME}.${NS_BACKEND}.svc.cluster.local:80) ---"
|
||||
if probe_wget_from_traefik "http://${SVC_NAME}.${NS_BACKEND}.svc.cluster.local:80"; then
|
||||
PROBE_TRAEFIK_DNS="OK"
|
||||
else
|
||||
PROBE_TRAEFIK_DNS="FAIL"
|
||||
fi
|
||||
else
|
||||
echo "[SKIP] 未找到 Traefik Pod。"
|
||||
fi
|
||||
|
||||
if [[ -n "${TRAEFIK_POD}" && -n "${BACKEND_POD_IP}" ]]; then
|
||||
echo
|
||||
echo "--- 3.4 Traefik -> PodIP (${BACKEND_POD_IP}:${POD_PORT}) ---"
|
||||
if probe_wget_from_traefik "http://${BACKEND_POD_IP}:${POD_PORT}"; then
|
||||
PROBE_TRAEFIK_TO_POD="OK"
|
||||
else
|
||||
PROBE_TRAEFIK_TO_POD="FAIL"
|
||||
fi
|
||||
else
|
||||
echo "[SKIP] 缺少 Traefik Pod 或后端 PodIP。"
|
||||
fi
|
||||
|
||||
print_title "4. 路由与配置详情"
|
||||
echo "--- 4.1 Ingress ---"
|
||||
safe_run kubectl get ingress -n "${NS_BACKEND}" -o yaml
|
||||
echo "--- 4.2 IngressRoute ---"
|
||||
safe_run kubectl get ingressroute -n "${NS_BACKEND}" -o yaml
|
||||
echo "--- 4.3 Service / Endpoints ---"
|
||||
safe_run kubectl get svc -n "${NS_BACKEND}" "${SVC_NAME}" -o yaml
|
||||
safe_run kubectl get endpoints -n "${NS_BACKEND}" "${SVC_NAME}" -o yaml
|
||||
safe_run kubectl describe svc -n "${NS_BACKEND}" "${SVC_NAME}"
|
||||
echo "--- 4.4 相关 NetworkPolicy(kube-system + backend)---"
|
||||
safe_run kubectl get networkpolicy -n "${NS_TRAEFIK}" -o yaml
|
||||
safe_run kubectl get networkpolicy -n "${NS_BACKEND}" -o yaml
|
||||
echo "--- 4.5 近期事件(backend + kube-system)---"
|
||||
safe_run kubectl get events -n "${NS_BACKEND}" --sort-by=.lastTimestamp
|
||||
safe_run kubectl get events -n kube-system --sort-by=.lastTimestamp
|
||||
|
||||
print_title "5. Traefik 日志(最近 ${LOG_SINCE},最多 ${LOG_TAIL} 行)"
|
||||
safe_run kubectl logs -n "${NS_TRAEFIK}" deploy/traefik --since="${LOG_SINCE}" --tail="${LOG_TAIL}"
|
||||
echo "--- 5.1 关键字过滤(404|502|503|router|service|middleware|upstream|${SVC_NAME}|${PATH_PREFIX}) ---"
|
||||
kubectl logs -n "${NS_TRAEFIK}" deploy/traefik --since="${LOG_SINCE}" --tail="${LOG_TAIL}" | grep -Ei "404|502|503|router|service|middleware|upstream|endpoint|${SVC_NAME}|${PATH_PREFIX}" || true
|
||||
echo "--- 5.2 Traefik 访问日志候选(status=404/502/503) ---"
|
||||
kubectl logs -n "${NS_TRAEFIK}" deploy/traefik --since="${LOG_SINCE}" --tail="${LOG_TAIL}" | grep -E "\" 404 |\" 502 |\" 503 " || true
|
||||
echo "--- 5.3 Traefik 上一次容器日志(若重启过) ---"
|
||||
safe_run kubectl logs -n "${NS_TRAEFIK}" deploy/traefik --previous --tail=100
|
||||
|
||||
print_title "6. 防火墙与数据平面"
|
||||
echo "--- 6.1 防火墙状态 ---"
|
||||
if command -v firewall-cmd >/dev/null 2>&1; then
|
||||
safe_run firewall-cmd --state
|
||||
safe_run firewall-cmd --list-all
|
||||
else
|
||||
echo "firewall-cmd: not found"
|
||||
fi
|
||||
|
||||
if command -v ufw >/dev/null 2>&1; then
|
||||
safe_run ufw status verbose
|
||||
else
|
||||
echo "ufw: not found"
|
||||
fi
|
||||
|
||||
echo "--- 6.2 FORWARD 与 KUBE-ROUTER-FORWARD ---"
|
||||
safe_run iptables -L FORWARD -n -v --line-numbers
|
||||
safe_run iptables -L KUBE-ROUTER-FORWARD -n -v --line-numbers
|
||||
echo "--- 6.2.1 NAT 链(KUBE-SERVICES)---"
|
||||
safe_run iptables -t nat -L KUBE-SERVICES -n -v --line-numbers
|
||||
if [[ -n "${SVC_IP}" ]]; then
|
||||
echo "--- 6.2.2 NAT 链中 ServiceIP 相关规则 (${SVC_IP}) ---"
|
||||
iptables -t nat -S | grep "${SVC_IP}" || true
|
||||
fi
|
||||
|
||||
TRAEFIK_CHAIN=""
|
||||
BACKEND_CHAIN=""
|
||||
if [[ -n "${TRAEFIK_IP}" ]]; then
|
||||
TRAEFIK_CHAIN="$(iptables -L KUBE-ROUTER-FORWARD -n --line-numbers 2>/dev/null | awk -v ip="${TRAEFIK_IP}" '$0 ~ ip && $4 ~ /^KUBE-POD-FW-/ {print $4; exit}')"
|
||||
fi
|
||||
if [[ -n "${BACKEND_POD_IP}" ]]; then
|
||||
BACKEND_CHAIN="$(iptables -L KUBE-ROUTER-FORWARD -n --line-numbers 2>/dev/null | awk -v ip="${BACKEND_POD_IP}" '$0 ~ ip && $4 ~ /^KUBE-POD-FW-/ {print $4; exit}')"
|
||||
fi
|
||||
|
||||
echo "TRAEFIK_CHAIN=${TRAEFIK_CHAIN:-<not found>}"
|
||||
echo "BACKEND_CHAIN=${BACKEND_CHAIN:-<not found>}"
|
||||
|
||||
if [[ -n "${TRAEFIK_CHAIN}" ]]; then
|
||||
echo "--- 6.3 Traefik Pod 链 ${TRAEFIK_CHAIN} ---"
|
||||
safe_run iptables -L "${TRAEFIK_CHAIN}" -n -v -x
|
||||
echo "--- 6.4 Traefik Pod 链规则定义 ---"
|
||||
safe_run iptables -S "${TRAEFIK_CHAIN}"
|
||||
fi
|
||||
|
||||
if [[ -n "${BACKEND_CHAIN}" ]]; then
|
||||
echo "--- 6.5 Backend Pod 链 ${BACKEND_CHAIN} ---"
|
||||
safe_run iptables -L "${BACKEND_CHAIN}" -n -v -x
|
||||
echo "--- 6.6 Backend Pod 链规则定义 ---"
|
||||
safe_run iptables -S "${BACKEND_CHAIN}"
|
||||
fi
|
||||
|
||||
echo "--- 6.7 ipset(KUBE-SRC/KUBE-DST)---"
|
||||
if command -v ipset >/dev/null 2>&1; then
|
||||
ipset list -n | grep -E '^KUBE-(SRC|DST)-' || true
|
||||
else
|
||||
echo "ipset: not found"
|
||||
fi
|
||||
|
||||
echo "--- 6.8 conntrack(容量与关键连接)---"
|
||||
if command -v conntrack >/dev/null 2>&1; then
|
||||
safe_run conntrack -S
|
||||
safe_run sysctl net.netfilter.nf_conntrack_count
|
||||
safe_run sysctl net.netfilter.nf_conntrack_max
|
||||
if [[ -n "${SVC_IP}" ]]; then
|
||||
echo "conntrack by service ip (${SVC_IP}):"
|
||||
conntrack -L -d "${SVC_IP}" 2>/dev/null | head -n 100 || true
|
||||
fi
|
||||
if [[ -n "${BACKEND_POD_IP}" ]]; then
|
||||
echo "conntrack by backend pod ip (${BACKEND_POD_IP}):"
|
||||
conntrack -L -d "${BACKEND_POD_IP}" 2>/dev/null | head -n 100 || true
|
||||
fi
|
||||
else
|
||||
echo "conntrack: not found"
|
||||
fi
|
||||
|
||||
print_title "7. 自动判读(502/503/404)"
|
||||
echo "探测结果:"
|
||||
echo " CLIENT=${PROBE_CLIENT}"
|
||||
echo " TRAEFIK_TO_SVC=${PROBE_TRAEFIK_TO_SVC}"
|
||||
echo " TRAEFIK_DNS=${PROBE_TRAEFIK_DNS}"
|
||||
echo " TRAEFIK_TO_POD=${PROBE_TRAEFIK_TO_POD}"
|
||||
|
||||
if [[ "${EP_COUNT}" == "0" ]]; then
|
||||
echo "- [高概率 503] Service 无可用 Endpoints。检查 Deployment 是否 Ready、selector 是否匹配。"
|
||||
fi
|
||||
|
||||
if [[ "${PROBE_CLIENT}" == OK_404* ]]; then
|
||||
echo "- [高概率 404] 入口路由未命中。检查 Ingress/IngressRoute 的 path、host、middleware。"
|
||||
fi
|
||||
|
||||
if [[ "${PROBE_CLIENT}" == OK_503* ]]; then
|
||||
echo "- [高概率 503] 入口已命中但后端不可用。优先看 Endpoints/EndpointSlice 条件与 Traefik 日志。"
|
||||
fi
|
||||
|
||||
if [[ "${PROBE_TRAEFIK_TO_SVC}" == "FAIL" ]]; then
|
||||
echo "- [高概率 502/503] Traefik 到 Service 不通。优先检查 NetworkPolicy、kube-router 链、DNS 53 放行。"
|
||||
fi
|
||||
|
||||
if [[ "${PROBE_TRAEFIK_DNS}" == "FAIL" && "${PROBE_TRAEFIK_TO_SVC}" == "FAIL" ]]; then
|
||||
echo "- [可能 DNS/服务发现问题] Traefik 到 Service DNS 与 ServiceIP 都失败。检查 CoreDNS、kube-system egress 53。"
|
||||
fi
|
||||
|
||||
if [[ "${PROBE_TRAEFIK_TO_SVC}" == "OK" && "${PROBE_CLIENT}" == "FAIL" ]]; then
|
||||
echo "- [可能入口层问题] 集群内后端可达,但入口访问失败。检查控制节点防火墙、Traefik Service 暴露端口、外部路由。"
|
||||
fi
|
||||
|
||||
if [[ "${PROBE_TRAEFIK_TO_SVC}" == "OK" && "${PROBE_TRAEFIK_TO_POD}" == "FAIL" ]]; then
|
||||
echo "- [已知行为候选] Service 可达但 PodIP 直连失败,常见于 kube-router 同节点桥接路径。"
|
||||
fi
|
||||
|
||||
echo
|
||||
echo "下一步建议:"
|
||||
echo "1) 先修复 Endpoints=0 / 404 路由不匹配。"
|
||||
echo "2) 再看 Traefik -> Service 探测与 NetworkPolicy 命中。"
|
||||
echo "3) 最后结合 KUBE-ROUTER-FORWARD、Pod 链、ipset 判断是否为 kube-router 行为问题。"
|
||||
echo
|
||||
echo "日志已保存:${LOG_FILE}"
|
||||
Reference in New Issue
Block a user