149 lines
4.3 KiB
Bash
149 lines
4.3 KiB
Bash
#!/bin/bash
|
||
set -euo pipefail
|
||
|
||
LOG_DIR="/root/netpol-diag-logs"
|
||
mkdir -p "${LOG_DIR}"
|
||
LOG_FILE="${LOG_DIR}/recovery-$(date '+%Y%m%d-%H%M%S').log"
|
||
exec > >(tee -a "${LOG_FILE}") 2>&1
|
||
|
||
info() { echo "[INFO] $*"; }
|
||
warn() { echo "[WARN] $*"; }
|
||
|
||
confirm_once() {
|
||
local prompt="$1"
|
||
local answer=""
|
||
printf "%s (输入 YES 继续): " "${prompt}"
|
||
read -r answer
|
||
[[ "${answer}" == "YES" ]]
|
||
}
|
||
|
||
confirm_twice_high_risk() {
|
||
local answer1=""
|
||
local answer2=""
|
||
printf "高风险操作,第一次确认:输入 YES 执行: "
|
||
read -r answer1
|
||
printf "第二次确认:输入 RESET 执行: "
|
||
read -r answer2
|
||
[[ "${answer1}" == "YES" && "${answer2}" == "RESET" ]]
|
||
}
|
||
|
||
cleanup_demo_resources() {
|
||
info "清理 demo 资源(default + kube-system)"
|
||
kubectl delete ingress -n default nginx-demo nodejs-demo --ignore-not-found || true
|
||
kubectl delete ingressroute -n default nginx-demo nodejs-demo --ignore-not-found || true
|
||
kubectl delete middleware -n default nginx-demo-stripprefix nodejs-demo-stripprefix --ignore-not-found || true
|
||
kubectl delete service -n default nginx-demo nodejs-demo --ignore-not-found || true
|
||
kubectl delete deployment -n default nginx-demo nodejs-demo --ignore-not-found || true
|
||
kubectl delete networkpolicy -n default allow-traefik-to-nginx allow-traefik-to-nodejs --ignore-not-found || true
|
||
kubectl delete networkpolicy -n kube-system allow-traefik-egress-to-services --ignore-not-found || true
|
||
info "demo 资源清理完成"
|
||
}
|
||
|
||
restart_key_components() {
|
||
info "重启关键组件"
|
||
kubectl rollout restart deployment -n kube-system traefik || true
|
||
kubectl rollout restart deployment -n kube-system coredns || true
|
||
kubectl get ds -n kube-system -l k8s-app=kube-proxy -o name | while read -r ds; do
|
||
kubectl rollout restart -n kube-system "${ds}" || true
|
||
done
|
||
info "等待关键组件状态"
|
||
kubectl rollout status deployment/traefik -n kube-system --timeout=180s || true
|
||
kubectl rollout status deployment/coredns -n kube-system --timeout=180s || true
|
||
}
|
||
|
||
network_rules_guidance() {
|
||
warn "该步骤仅打印建议命令,不自动执行。"
|
||
cat <<'EOF'
|
||
建议在控制节点人工执行并逐条确认:
|
||
|
||
# 1) 备份当前规则
|
||
iptables-save > /root/iptables-backup-$(date +%F-%H%M%S).txt
|
||
|
||
# 2) 查看 KUBE-ROUTER 相关链(确认后再清理)
|
||
iptables-save | grep KUBE-ROUTER || true
|
||
|
||
# 3) 若你明确要清理 kube-router 规则(高风险)
|
||
# iptables-save | grep -v KUBE-ROUTER | iptables-restore
|
||
|
||
# 4) 查看并清理相关 ipset(高风险,按需逐个)
|
||
# ipset list -n | grep '^KUBE-'
|
||
# ipset destroy <set-name>
|
||
|
||
EOF
|
||
}
|
||
|
||
print_rebuild_runbook() {
|
||
cat <<'EOF'
|
||
K3s 重建步骤(只输出,不自动执行):
|
||
|
||
1) 在 server 节点卸载:
|
||
/usr/local/bin/k3s-uninstall.sh
|
||
|
||
2) 在 agent 节点卸载:
|
||
/usr/local/bin/k3s-agent-uninstall.sh
|
||
|
||
3) 清理残留目录(确认后):
|
||
rm -rf /etc/rancher /var/lib/rancher /var/lib/kubelet /etc/cni /opt/cni
|
||
|
||
4) 重新安装 server(带你当前需要的参数)
|
||
5) 重新 join agent
|
||
6) 先部署 04-1 / 04-2 / 04-3,再到 04-4 / 04-5
|
||
7) 最后用 /root/check-nodejs-netpol.sh 复测
|
||
EOF
|
||
}
|
||
|
||
show_menu() {
|
||
echo
|
||
echo "===== K3s 恢复脚本(独立于诊断)====="
|
||
echo "1) 仅清理 demo 资源(低风险)"
|
||
echo "2) 清理 demo + 重启关键组件(中风险)"
|
||
echo "3) 高风险网络规则清理(双重确认,默认仅打印建议)"
|
||
echo "4) 输出完整重建步骤(不自动执行)"
|
||
echo "0) 退出"
|
||
printf "请选择: "
|
||
}
|
||
|
||
main() {
|
||
info "日志文件: ${LOG_FILE}"
|
||
while true; do
|
||
show_menu
|
||
read -r choice
|
||
case "${choice}" in
|
||
1)
|
||
if confirm_once "确认执行“仅清理 demo 资源”吗?"; then
|
||
cleanup_demo_resources
|
||
else
|
||
warn "已取消"
|
||
fi
|
||
;;
|
||
2)
|
||
if confirm_once "确认执行“清理 demo + 重启关键组件”吗?"; then
|
||
cleanup_demo_resources
|
||
restart_key_components
|
||
else
|
||
warn "已取消"
|
||
fi
|
||
;;
|
||
3)
|
||
if confirm_twice_high_risk; then
|
||
network_rules_guidance
|
||
else
|
||
warn "高风险操作已取消"
|
||
fi
|
||
;;
|
||
4)
|
||
print_rebuild_runbook
|
||
;;
|
||
0)
|
||
info "退出。日志已保存:${LOG_FILE}"
|
||
break
|
||
;;
|
||
*)
|
||
warn "无效选项"
|
||
;;
|
||
esac
|
||
done
|
||
}
|
||
|
||
main
|