From be97836e0d4b8ba8f9a0d3914b0c53543801cac0 Mon Sep 17 00:00:00 2001 From: jack Date: Mon, 23 Mar 2026 19:18:55 +0800 Subject: [PATCH] =?UTF-8?q?chore:=20=E6=B8=85=E7=90=86=E8=B0=83=E8=AF=95?= =?UTF-8?q?=E8=84=9A=E6=9C=AC=E5=B9=B6=E6=94=B6=E6=95=9B=E5=88=B0=20Ansibl?= =?UTF-8?q?e=20=E6=B5=81=E7=A8=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 移除已废弃的调试/验证脚本与空目录,统一文档与脚本说明到 ansible-playbook 的部署方式,避免失效引用和误用路径。 Made-with: Cursor --- README.md | 4 +- .../README.md | 10 +- .../haproxy-http.cfg | 4 +- .../haproxy-https.cfg | 4 +- .../haproxy-no-check.cfg | 4 +- .../haproxy-proxy-http-tls.cfg | 4 +- .../haproxy-tls.cfg | 4 +- .../files/cloudflare-tunnel/cloudflared.yaml | 70 +-- .../gitlab/gitlab-ci-minimal.example.yml | 32 +- .../gitlab-ci-multi-arch-deploy.example.yml | 24 +- .../gitlab/gitlab-ci-runner-tags.example.yml | 24 +- ansible/files/homer/homer.yaml | 100 ++--- .../local-path-demo/local-path-pvc-demo.yaml | 73 +-- ansible/files/nfs-demo/nfs-pv-pvc-demo.yaml | 50 +-- .../nginx-matrix-tls/01-control-ingress.yaml | 206 ++++----- .../02-control-ingressroute.yaml | 174 ++++---- .../nginx-matrix-tls/03-worker-ingress.yaml | 198 ++++----- .../04-worker-ingressroute.yaml | 174 ++++---- .../nginx-matrix/01-control-ingress.yaml | 176 ++++---- .../nginx-matrix/02-control-ingressroute.yaml | 164 +++---- .../files/nginx-matrix/03-worker-ingress.yaml | 168 +++---- .../nginx-matrix/04-worker-ingressroute.yaml | 164 +++---- .../files/nodejs-demo/04-01-nodejs-demo.yaml | 100 ++--- .../files/nodejs-demo/04-02-nodejs-demo.yaml | 108 ++--- .../files/nodejs-demo/04-03-nodejs-demo.yaml | 134 +++--- .../files/nodejs-demo/04-04-nodejs-demo.yaml | 134 +++--- .../files/nodejs-demo/04-05-nodejs-demo.yaml | 148 +++---- .../files/nodejs-demo/04-06-nodejs-demo.yaml | 172 +++---- .../files/nodejs-demo/04-07-nodejs-demo.yaml | 176 ++++---- .../files/nodejs-demo/04-08-nodejs-demo.yaml | 202 ++++----- .../files/nodejs-demo/04-09-nodejs-demo.yaml | 236 +++++----- .../files/nodejs-demo/04-10-nodejs-demo.yaml | 238 +++++----- .../files/nodejs-demo/04-11-nodejs-demo.yaml | 248 +++++------ .../files/nodejs-demo/04-12-nodejs-demo.yaml | 256 +++++------ .../files/nodejs-demo/04-13-nodejs-demo.yaml | 294 ++++++------ .../nodejs-demo-secret.example.yaml | 14 +- ansible/files/onenav/onenav-proxy.yaml | 80 ++-- .../openclaw/openclaw-k3s-experimental.yaml | 140 +++--- ansible/files/openclaw/openclaw-proxy.yaml | 80 ++-- .../openlist/app-data-backup-cronjob.yaml | 52 +-- .../files/openlist/app-data-restore-job.yaml | 46 +- .../openlist/openlist-backup-cronjob.yaml | 52 +-- ansible/files/traefik-acme/traefik-acme.yaml | 54 +-- .../traefik-custom-ports.yaml | 44 +- .../traefik-dashboard-acme/tomcat-acme.yaml | 162 +++---- .../traefik-dashboard-acme.yaml | 123 ++--- .../traefik-dashboard/traefik-dashboard.yaml | 62 +-- docs/00-00-构建总览.md | 30 +- docs/00-01-k3s-基础概念.md | 6 +- docs/00-02-验证矩阵.md | 21 +- docs/00-03-未来规划与待补功能.md | 6 +- docs/00-04-部署环境说明.md | 6 +- docs/01-01-k3s-控制节点含traefik.md | 8 +- docs/01-02-k3s-工作节点.md | 8 +- docs/01-03-armv7-standalone-docker.md | 2 +- docs/01-04-cloudflare-tunnel.md | 44 -- ...-双控制节点ha.md => 01-04-双控制节点ha.md} | 4 +- ...服务安装.md => 01-05-armv7-nfs服务安装.md} | 2 +- ...le-实践.md => 01-06-节点初始化-ansible-实践.md} | 4 +- ...rt-haproxy.md => 01-07-openwrt-haproxy.md} | 20 +- docs/02-05-nginx-验证矩阵-一键部署.md | 2 +- docs/03-01-k3s-traefik-dashboard.md | 2 +- docs/03-02-k3s-traefik-acme.md | 4 +- docs/03-03-k3s-traefik-dashboard-acme.md | 4 +- docs/03-04-k3s-cloudflare-tunnel-配置接入.md | 207 ++++++++- docs/03-05-k3s-local-path-pvc.md | 152 ++++++- docs/03-06-k3s-使用nfs存储.md | 2 +- docs/03-08-k3s-ha-集群配置与切换.md | 4 +- docs/03-09-k3s-gitops-集群配置管理.md | 6 +- docs/04-01-k3s-nodejs-高级部署.md | 3 +- docs/06-01-k3s-networkpolicy-故障排查.md | 24 +- scripts/01-08-deploy-nginx-tls-via-ylc61.sh | 49 -- scripts/01-08-deploy-openwrt-haproxy.sh | 61 --- scripts/01-08-update-verify-matrix.py | 32 -- scripts/01-08-verify-haproxy-openwrt.sh | 11 - scripts/01-08-verify-haproxy.sh | 211 --------- scripts/02-verify-nginx-matrix-individual.sh | 106 ----- scripts/03-verify-traefik-dashboard-acme.sh | 57 --- scripts/README.md | 66 +-- .../cloudflare-delete-acme-challenge-dns.sh | 140 ++++++ scripts/diag/entrypath/README.md | 113 ----- scripts/diag/entrypath/entrypath.sh | 144 ------ scripts/diag/entrypath/lib/analyze.sh | 80 ---- scripts/diag/entrypath/lib/capture.sh | 286 ------------ scripts/diag/entrypath/lib/common.sh | 104 ----- scripts/diag/entrypath/lib/k8s_checks.sh | 95 ---- scripts/diag/entrypath/lib/remote_checks.sh | 59 --- .../setup-k3s-firewalld-interfaces.sh | 91 ---- scripts/diag/netpol/check-net.sh | 419 ------------------ scripts/diag/recovery/k3s-recovery-reset.sh | 148 ------- scripts/k3s-delete-lab-stacks.sh | 287 ++++++++++++ scripts/ssh/test-ssh.sh | 2 +- 92 files changed, 3463 insertions(+), 4855 deletions(-) rename ansible/files/{01-08-haproxy => 01-07-haproxy}/README.md (87%) rename ansible/files/{01-08-haproxy => 01-07-haproxy}/haproxy-http.cfg (87%) rename ansible/files/{01-08-haproxy => 01-07-haproxy}/haproxy-https.cfg (90%) rename ansible/files/{01-08-haproxy => 01-07-haproxy}/haproxy-no-check.cfg (87%) rename ansible/files/{01-08-haproxy => 01-07-haproxy}/haproxy-proxy-http-tls.cfg (88%) rename ansible/files/{01-08-haproxy => 01-07-haproxy}/haproxy-tls.cfg (87%) delete mode 100644 docs/01-04-cloudflare-tunnel.md rename docs/{01-05-双控制节点ha.md => 01-04-双控制节点ha.md} (96%) rename docs/{01-06-armv7-nfs服务安装.md => 01-05-armv7-nfs服务安装.md} (96%) rename docs/{01-07-节点初始化-ansible-实践.md => 01-06-节点初始化-ansible-实践.md} (98%) rename docs/{01-08-openwrt-haproxy.md => 01-07-openwrt-haproxy.md} (88%) delete mode 100644 scripts/01-08-deploy-nginx-tls-via-ylc61.sh delete mode 100644 scripts/01-08-deploy-openwrt-haproxy.sh delete mode 100644 scripts/01-08-update-verify-matrix.py delete mode 100644 scripts/01-08-verify-haproxy-openwrt.sh delete mode 100644 scripts/01-08-verify-haproxy.sh delete mode 100644 scripts/02-verify-nginx-matrix-individual.sh delete mode 100644 scripts/03-verify-traefik-dashboard-acme.sh create mode 100644 scripts/cloudflare-delete-acme-challenge-dns.sh delete mode 100644 scripts/diag/entrypath/README.md delete mode 100644 scripts/diag/entrypath/entrypath.sh delete mode 100644 scripts/diag/entrypath/lib/analyze.sh delete mode 100644 scripts/diag/entrypath/lib/capture.sh delete mode 100644 scripts/diag/entrypath/lib/common.sh delete mode 100644 scripts/diag/entrypath/lib/k8s_checks.sh delete mode 100644 scripts/diag/entrypath/lib/remote_checks.sh delete mode 100644 scripts/diag/firewalld/setup-k3s-firewalld-interfaces.sh delete mode 100644 scripts/diag/netpol/check-net.sh delete mode 100644 scripts/diag/recovery/k3s-recovery-reset.sh create mode 100644 scripts/k3s-delete-lab-stacks.sh diff --git a/README.md b/README.md index 3ebc4e1..76bf40c 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ 打开 `docs/00-00-构建总览.md`,先把整体拓扑和机器分工看明白。 2. **装 K3s 集群(两种方式二选一)** - - **自动化**:按 `docs/01-07-节点初始化-ansible-实践.md` 用 Ansible 一键完成 61~64 初始化 + server/worker 安装(已验证)。 + - **自动化**:按 `docs/01-06-节点初始化-ansible-实践.md` 用 Ansible 一键完成 61~64 初始化 + server/worker 安装(已验证)。 - **手动**:先按 `docs/01-01-k3s-控制节点含traefik.md` 装控制节点 61,再按 `docs/01-02-k3s-工作节点.md` 加工作节点 62~64。 3. **确认节点 Ready** @@ -50,7 +50,7 @@ 如果你时间有限,先只做这 4 步,跑通再扩展: -1. **装集群**:用 Ansible 按 `docs/01-07-节点初始化-ansible-实践.md` 一键安装(推荐);或按 `docs/01-01` + `docs/01-02` 手动装控制节点(61)与工作节点(62) +1. **装集群**:用 Ansible 按 `docs/01-06-节点初始化-ansible-实践.md` 一键安装(推荐);或按 `docs/01-01` + `docs/01-02` 手动装控制节点(61)与工作节点(62) 2. 执行 `kubectl get nodes`,确认节点 Ready 3. 按 `docs/04-03-k3s-nginx-demo.md` 部署 nginx 示例并访问一次 4. 若访问不通,按 `scripts/README.md` 先跑 firewalld 基线与入口链路诊断脚本 diff --git a/ansible/files/01-08-haproxy/README.md b/ansible/files/01-07-haproxy/README.md similarity index 87% rename from ansible/files/01-08-haproxy/README.md rename to ansible/files/01-07-haproxy/README.md index cd1021b..4d7a5ac 100644 --- a/ansible/files/01-08-haproxy/README.md +++ b/ansible/files/01-07-haproxy/README.md @@ -1,4 +1,4 @@ -# 01-08 HAProxy 配置 +# 01-07 HAProxy 配置 ## 核心目标 @@ -7,7 +7,7 @@ | 层次 | 含义 | 如何验证 | |------|------|----------| | **① 语法正确** | `haproxy -c -f ` 无致命错误 | 见下文「仅校验 cfg」或主验证脚本第 2 步 | -| **② 运行与后端** | 在 OpenWrt 上实际监听 18080/18443 时,经第三方主机 curl 可达 K3s/Traefik 后端 | `./scripts/01-08-verify-haproxy.sh`(完整流程,含 curl) | +| **② 运行与后端** | 在 OpenWrt 上实际监听 18080/18443 时,经第三方主机 curl 可达 K3s/Traefik 后端 | `./scripts/01-07-verify-haproxy.sh`(完整流程,含 curl) | 仓库内 **frontend 已统一为 `18080` / `18443`**(与 LuCI 的 80/443 分离);backend 仍指向各节点 **80/443**(Traefik 入口)。按环境修改 `192.168.2.61`~`192.168.2.64`。 @@ -16,7 +16,7 @@ 仅需确认 **① 语法**,在仓库根目录执行: ```bash -./scripts/01-08-verify-haproxy.sh --cfg-only +./scripts/01-07-verify-haproxy.sh --cfg-only ``` 会将本目录全部 `*.cfg` 拷到 OpenWrt 的 `/tmp/haproxy-verify/`,对每台文件执行 `haproxy -c`(与 OpenWrt 上安装的 HAProxy 版本一致)。 @@ -25,7 +25,7 @@ ## 文件一览 -| 文件 | 说明(对应 `docs/01-08-openwrt-haproxy.md`) | +| 文件 | 说明(对应 `docs/01-07-openwrt-haproxy.md`) | |------|-----------------------------------------------| | `haproxy-no-check.cfg` | §2 最简;§3.1 在其 `server` 行加 `check` | | `haproxy-http.cfg` | §3.2 HTTP 健康检查(明文 80 后端) | @@ -35,4 +35,4 @@ ## 与 Ansible / OpenWrt -可与 Ansible 共用(复制到 OpenWrt 或通过 playbook 下发)。一键把 **uhttpd 80/443 + HAProxy 18080/18443** 落到路由器见 `scripts/01-08-deploy-openwrt-haproxy.sh`。 +可与 Ansible 共用(复制到 OpenWrt 或通过 playbook 下发)。一键把 **uhttpd 80/443 + HAProxy 18080/18443** 落到路由器见 `scripts/01-07-deploy-openwrt-haproxy.sh`。 diff --git a/ansible/files/01-08-haproxy/haproxy-http.cfg b/ansible/files/01-07-haproxy/haproxy-http.cfg similarity index 87% rename from ansible/files/01-08-haproxy/haproxy-http.cfg rename to ansible/files/01-07-haproxy/haproxy-http.cfg index aad5190..e7fc0d3 100644 --- a/ansible/files/01-08-haproxy/haproxy-http.cfg +++ b/ansible/files/01-07-haproxy/haproxy-http.cfg @@ -1,6 +1,6 @@ -# 01-08 HAProxy - 3.2 HTTP 健康检查(80 明文) +# 01-07 HAProxy - 3.2 HTTP 健康检查(80 明文) # backend k3s_http 增加 option httpchk GET / -# 文档:docs/01-08-openwrt-haproxy.md 第 3.2 节 +# 文档:docs/01-07-openwrt-haproxy.md 第 3.2 节 global log /dev/log local0 maxconn 4096 diff --git a/ansible/files/01-08-haproxy/haproxy-https.cfg b/ansible/files/01-07-haproxy/haproxy-https.cfg similarity index 90% rename from ansible/files/01-08-haproxy/haproxy-https.cfg rename to ansible/files/01-07-haproxy/haproxy-https.cfg index 1fa2fbc..e854eb0 100644 --- a/ansible/files/01-08-haproxy/haproxy-https.cfg +++ b/ansible/files/01-07-haproxy/haproxy-https.cfg @@ -1,8 +1,8 @@ -# 01-08 HAProxy - 3.4 HTTPS 健康检查(443 应用层,HAProxy 终结 TLS,由 HAProxy 提供证书) +# 01-07 HAProxy - 3.4 HTTPS 健康检查(443 应用层,HAProxy 终结 TLS,由 HAProxy 提供证书) # frontend 需 bind *:443 ssl,backend mode http 连 K3s:443 做 HTTP over TLS 检查 # 将 your-ingress.example.com 改为实际 Host;将 /etc/ssl/haproxy.pem 改为实际证书路径 # 自签/内网 CA 用 verify none,生产建议 ca-file -# 文档:docs/01-08-openwrt-haproxy.md 第 3.4 节 +# 文档:docs/01-07-openwrt-haproxy.md 第 3.4 节 global log /dev/log local0 maxconn 4096 diff --git a/ansible/files/01-08-haproxy/haproxy-no-check.cfg b/ansible/files/01-07-haproxy/haproxy-no-check.cfg similarity index 87% rename from ansible/files/01-08-haproxy/haproxy-no-check.cfg rename to ansible/files/01-07-haproxy/haproxy-no-check.cfg index 932b1de..21fe808 100644 --- a/ansible/files/01-08-haproxy/haproxy-no-check.cfg +++ b/ansible/files/01-07-haproxy/haproxy-no-check.cfg @@ -1,5 +1,5 @@ -# 01-08 OpenWrt HAProxy 负载均衡 - 原生最简(无健康检查) -# 文档:docs/01-08-openwrt-haproxy.md 第 2 节 +# 01-07 OpenWrt HAProxy 负载均衡 - 原生最简(无健康检查) +# 文档:docs/01-07-openwrt-haproxy.md 第 2 节 # 将 192.168.2.61~64 按实际 K3s 节点 IP 修改 # 如需健康检查,见第 3 节对应 cfg global diff --git a/ansible/files/01-08-haproxy/haproxy-proxy-http-tls.cfg b/ansible/files/01-07-haproxy/haproxy-proxy-http-tls.cfg similarity index 88% rename from ansible/files/01-08-haproxy/haproxy-proxy-http-tls.cfg rename to ansible/files/01-07-haproxy/haproxy-proxy-http-tls.cfg index 7ffe988..872cc0d 100644 --- a/ansible/files/01-08-haproxy/haproxy-proxy-http-tls.cfg +++ b/ansible/files/01-07-haproxy/haproxy-proxy-http-tls.cfg @@ -1,6 +1,6 @@ -# 01-08 HAProxy - 健康检查升级(HTTP+TLS)+ PROXY Protocol +# 01-07 HAProxy - 健康检查升级(HTTP+TLS)+ PROXY Protocol # 组合:k3s_http 用 option httpchk,k3s_https 用 ssl-hello-chk,均带 send-proxy-v2 -# 文档:docs/01-08-openwrt-haproxy.md 第 5 节「健康检查与 PROXY 组合」 +# 文档:docs/01-07-openwrt-haproxy.md 第 5 节「健康检查与 PROXY 组合」 global log /dev/log local0 maxconn 4096 diff --git a/ansible/files/01-08-haproxy/haproxy-tls.cfg b/ansible/files/01-07-haproxy/haproxy-tls.cfg similarity index 87% rename from ansible/files/01-08-haproxy/haproxy-tls.cfg rename to ansible/files/01-07-haproxy/haproxy-tls.cfg index 7815799..4f29380 100644 --- a/ansible/files/01-08-haproxy/haproxy-tls.cfg +++ b/ansible/files/01-07-haproxy/haproxy-tls.cfg @@ -1,6 +1,6 @@ -# 01-08 HAProxy - 3.3 TLS 健康检查(443 握手,mode tcp) +# 01-07 HAProxy - 3.3 TLS 健康检查(443 握手,mode tcp) # backend k3s_https 增加 option ssl-hello-chk -# 文档:docs/01-08-openwrt-haproxy.md 第 3.3 节 +# 文档:docs/01-07-openwrt-haproxy.md 第 3.3 节 global log /dev/log local0 maxconn 4096 diff --git a/ansible/files/cloudflare-tunnel/cloudflared.yaml b/ansible/files/cloudflare-tunnel/cloudflared.yaml index 4b1c5af..dd9fed8 100644 --- a/ansible/files/cloudflare-tunnel/cloudflared.yaml +++ b/ansible/files/cloudflare-tunnel/cloudflared.yaml @@ -1,37 +1,37 @@ # docs/03-04-k3s-cloudflare-tunnel-配置接入.md — 替换 TUNNEL_TOKEN 后应用 -apiVersion: v1 -kind: Secret -metadata: - name: cloudflared-credentials - namespace: kube-system -type: Opaque -stringData: - TUNNEL_TOKEN: "" +apiVersion: v1 # Secret 使用的 Kubernetes API 版本 +kind: Secret # 资源类型:Secret(用于保存 Cloudflare Tunnel token) +metadata: # 元信息(名称/命名空间等) + name: cloudflared-credentials # Secret 名称(Deployment 中会引用) + namespace: kube-system # Secret 所在命名空间 +type: Opaque # Secret 类型(普通自定义键值) +stringData: # 以字符串方式提供 Secret 数据(便于直接写明文) + TUNNEL_TOKEN: "" # Cloudflare Tunnel Token(用你真实的 token 替换) --- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: cloudflared - namespace: kube-system -spec: - replicas: 1 - selector: - matchLabels: - app: cloudflared - template: - metadata: - labels: - app: cloudflared - spec: - containers: - - name: cloudflared - image: cloudflare/cloudflared:latest - args: - - tunnel - - run - env: - - name: TUNNEL_TOKEN - valueFrom: - secretKeyRef: - name: cloudflared-credentials - key: TUNNEL_TOKEN +apiVersion: apps/v1 # Deployment 使用的 API 版本 +kind: Deployment # 工作负载:Deployment(管理 Pod 副本) +metadata: # Deployment 元信息 + name: cloudflared # Deployment 名称 + namespace: kube-system # 部署到的命名空间 +spec: # Deployment 期望状态 + replicas: 1 # 副本数(Tunnel 通常只跑一个副本即可) + selector: # Deployment 选择器:匹配 template 的 Pod + matchLabels: # 必须与 template.metadata.labels 对齐 + app: cloudflared # 应用标签 + template: # Pod 模板 + metadata: # Pod 元信息 + labels: # Pod 标签 + app: cloudflared # 与 selector.matchLabels 相同 + spec: # Pod 规范 + containers: # 容器列表 + - name: cloudflared # 容器名 + image: cloudflare/cloudflared:latest # cloudflared 镜像 + args: # 容器启动参数 + - tunnel # 命令子参数:tunnel + - run # 命令子参数:run + env: # 环境变量 + - name: TUNNEL_TOKEN # 容器内使用的环境变量名 + valueFrom: # 从某个来源取值 + secretKeyRef: # 从 Secret 的 key 取值 + name: cloudflared-credentials # Secret 名称 + key: TUNNEL_TOKEN # Secret 中的 key diff --git a/ansible/files/gitlab/gitlab-ci-minimal.example.yml b/ansible/files/gitlab/gitlab-ci-minimal.example.yml index 44af5d5..f91a8ab 100644 --- a/ansible/files/gitlab/gitlab-ci-minimal.example.yml +++ b/ansible/files/gitlab/gitlab-ci-minimal.example.yml @@ -1,20 +1,20 @@ # docs/05-04-k3s-配置gitlab-cicd.md — 最小 .gitlab-ci.yml 示例 -stages: - - lint - - deploy +stages: # CI 阶段定义(按顺序执行) + - lint # 代码/清单检查阶段 + - deploy # 部署阶段 -variables: - KUBECONFIG: "/builds/${CI_PROJECT_PATH}/kubeconfig" +variables: # 全局变量 + KUBECONFIG: "/builds/${CI_PROJECT_PATH}/kubeconfig" # Runner 内 kubeconfig 输出路径 -lint: - stage: lint - script: - - yamllint manifests || true +lint: # 任务名:lint + stage: lint # 归属 lint 阶段 + script: # 执行脚本 + - yamllint manifests || true # 允许 lint 失败不中断(示例用途) -deploy: - stage: deploy - script: - - echo "$KUBE_CONFIG_CONTENT" > "$KUBECONFIG" - - kubectl --kubeconfig="$KUBECONFIG" apply -f manifests/ - only: - - main +deploy: # 任务名:deploy + stage: deploy # 归属 deploy 阶段 + script: # 执行脚本 + - echo "$KUBE_CONFIG_CONTENT" > "$KUBECONFIG" # 将 CI 变量写入 kubeconfig 文件 + - kubectl --kubeconfig="$KUBECONFIG" apply -f manifests/ # 应用 manifests + only: # 触发条件(旧语法) + - main # 仅 main 分支触发 diff --git a/ansible/files/gitlab/gitlab-ci-multi-arch-deploy.example.yml b/ansible/files/gitlab/gitlab-ci-multi-arch-deploy.example.yml index 9553c03..cbdcc22 100644 --- a/ansible/files/gitlab/gitlab-ci-multi-arch-deploy.example.yml +++ b/ansible/files/gitlab/gitlab-ci-multi-arch-deploy.example.yml @@ -1,14 +1,14 @@ # docs/05-04-k3s-配置gitlab-cicd.md — 多架构 Runner tags 示例 -deploy_x86: - stage: deploy - tags: [x86] - script: - - echo "$KUBE_CONFIG_CONTENT" > "$KUBECONFIG" - - kubectl --kubeconfig="$KUBECONFIG" apply -f manifests/x86/ +deploy_x86: # 任务名:x86 架构部署 + stage: deploy # 所属阶段 + tags: [x86] # 仅匹配带 x86 tag 的 Runner + script: # 执行脚本 + - echo "$KUBE_CONFIG_CONTENT" > "$KUBECONFIG" # 写入 kubeconfig + - kubectl --kubeconfig="$KUBECONFIG" apply -f manifests/x86/ # 部署 x86 清单 -deploy_arm64: - stage: deploy - tags: [arm64] - script: - - echo "$KUBE_CONFIG_CONTENT" > "$KUBECONFIG" - - kubectl --kubeconfig="$KUBECONFIG" apply -f manifests/arm64/ +deploy_arm64: # 任务名:arm64 架构部署 + stage: deploy # 所属阶段 + tags: [arm64] # 仅匹配带 arm64 tag 的 Runner + script: # 执行脚本 + - echo "$KUBE_CONFIG_CONTENT" > "$KUBECONFIG" # 写入 kubeconfig + - kubectl --kubeconfig="$KUBECONFIG" apply -f manifests/arm64/ # 部署 arm64 清单 diff --git a/ansible/files/gitlab/gitlab-ci-runner-tags.example.yml b/ansible/files/gitlab/gitlab-ci-runner-tags.example.yml index 1000a77..6ddfd38 100644 --- a/ansible/files/gitlab/gitlab-ci-runner-tags.example.yml +++ b/ansible/files/gitlab/gitlab-ci-runner-tags.example.yml @@ -1,15 +1,15 @@ # docs/05-03-k3s-安装gitlab-含runner.md — Runner tag 与 job 对应示例 -build_x86: - tags: [x86] - script: - - echo "build for x86" +build_x86: # 任务名:x86 构建 + tags: [x86] # 仅匹配 x86 Runner + script: # 执行脚本 + - echo "build for x86" # 示例输出 -build_arm64: - tags: [arm64] - script: - - echo "build for arm64" +build_arm64: # 任务名:arm64 构建 + tags: [arm64] # 仅匹配 arm64 Runner + script: # 执行脚本 + - echo "build for arm64" # 示例输出 -build_armv7: - tags: [armv7] - script: - - echo "build for armv7" +build_armv7: # 任务名:armv7 构建 + tags: [armv7] # 仅匹配 armv7 Runner + script: # 执行脚本 + - echo "build for armv7" # 示例输出 diff --git a/ansible/files/homer/homer.yaml b/ansible/files/homer/homer.yaml index 9cb4bcb..9e72d27 100644 --- a/ansible/files/homer/homer.yaml +++ b/ansible/files/homer/homer.yaml @@ -1,53 +1,53 @@ # docs/05-01-k3s-部署homer首页面板.md — 按需修改 host -apiVersion: apps/v1 -kind: Deployment -metadata: - name: homer - namespace: homer -spec: - replicas: 1 - selector: - matchLabels: - app: homer - template: - metadata: - labels: - app: homer - spec: - containers: - - name: homer - image: b4bz/homer:latest - ports: - - containerPort: 8080 +apiVersion: apps/v1 # Deployment 使用的 API 版本 +kind: Deployment # 工作负载:Deployment(管理 Pod 副本) +metadata: # 对该资源的标识信息 + name: homer # Deployment 名称 + namespace: homer # 所属命名空间(Pod 也会在该 NS 内) +spec: # Deployment 期望状态 + replicas: 1 # Pod 副本数:这里是 1(单副本更容易配合本地存储等) + selector: # Deployment 用于匹配管理 Pod 的条件 + matchLabels: # 标签匹配:必须与 template.metadata.labels 一致 + app: homer # 选择带有 app=homer 标签的 Pod + template: # Pod 模板:Deployment 会根据该模板创建/更新 Pod + metadata: # Pod 的元信息 + labels: # Pod 标签:用于 selector 匹配 Service/Deployment 等 + app: homer # Pod 上的标签 app=homer + spec: # Pod 规范 + containers: # 容器列表(本例只有一个容器) + - name: homer # 容器名称(日志/调试中会用到) + image: b4bz/homer:latest # homer 镜像 + ports: # 容器端口声明(供探测/生成文档等使用) + - containerPort: 8080 # 容器监听端口:homer 默认 8080 --- -apiVersion: v1 -kind: Service -metadata: - name: homer - namespace: homer -spec: - selector: - app: homer - ports: - - port: 80 - targetPort: 8080 +apiVersion: v1 # Service 使用的 API 版本 +kind: Service # 网络抽象:把一组 Pod 暴露为稳定的访问入口 +metadata: # Service 标识 + name: homer # Service 名称(Ingress/其他对象会引用) + namespace: homer # Service 所在命名空间 +spec: # Service 期望状态 + selector: # Service 通过标签选择要转发到的 Pod + app: homer # 选择 app=homer 的 Pod + ports: # Service 暴露端口列表 + - port: 80 # Service 端口:Ingress/集群访问时用它 + targetPort: 8080 # 转发目标端口:Pod 容器监听的端口 --- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: homer - namespace: homer - annotations: - traefik.ingress.kubernetes.io/router.entrypoints: web -spec: - rules: - - host: home.example.com - http: - paths: - - path: / - pathType: Prefix - backend: - service: - name: homer - port: - number: 80 +apiVersion: networking.k8s.io/v1 # Ingress 使用的 API 版本 +kind: Ingress # 入口资源:对外暴露 HTTP/HTTPS 路由 +metadata: # Ingress 标识 + name: homer # Ingress 名称 + namespace: homer # Ingress 所在命名空间 + annotations: # 注解:用于 Traefik 等 Ingress Controller 的额外配置 + traefik.ingress.kubernetes.io/router.entrypoints: web # Traefik 路由使用的 entrypoint(HTTP) +spec: # Ingress 规则 + rules: # 主机/路径规则列表 + - host: home.example.com # 要匹配的域名(按需修改) + http: # HTTP 路由规则 + paths: # 路径匹配列表 + - path: / # 匹配根路径及其子路径 + pathType: Prefix # Prefix 表示前缀匹配 + backend: # 匹配到后端目标 + service: # 使用 Service 作为后端 + name: homer # 后端 Service 名称 + port: # 后端端口配置 + number: 80 # 后端 Service 端口 diff --git a/ansible/files/local-path-demo/local-path-pvc-demo.yaml b/ansible/files/local-path-demo/local-path-pvc-demo.yaml index 406df5c..07258bd 100644 --- a/ansible/files/local-path-demo/local-path-pvc-demo.yaml +++ b/ansible/files/local-path-demo/local-path-pvc-demo.yaml @@ -1,38 +1,39 @@ # docs/03-05-k3s-local-path-pvc.md -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: local-pvc-demo - namespace: default -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 1Gi +apiVersion: v1 # PVC:Kubernetes 核心 API +kind: PersistentVolumeClaim # 持久卷声明(你“申请要用的存储”) +metadata: # PVC 的元信息(名称/命名空间/其它元数据) + name: local-pvc-demo # PVC 名称(Deployment 里会引用) + namespace: default # PVC 所在命名空间 +spec: # PVC 的期望状态(访问模式/存储类/容量请求) + accessModes: # 访问模式:RWO 表示同一时刻只能被一个节点上的一个 Pod 以读写方式挂载 + - ReadWriteOnce # 读写模式:单节点可读写 + storageClassName: local-path # 指定存储类:使用 K3s 的 local-path-provisioner(动态创建本地 PV) + resources: # 资源请求(本例只关心 storage 容量) + requests: # 容量配额请求(与 requests.storage 对应) + storage: 1Gi # 申请容量大小(K8s quantity:常见后缀 Ki/Mi/Gi/Ti/…,示例:512Mi、1024Mi、1Gi、1G;也可写字节值) --- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nginx-local-pvc-demo - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: nginx-local-pvc-demo - template: - metadata: - labels: - app: nginx-local-pvc-demo - spec: - containers: - - name: nginx - image: nginx:alpine - volumeMounts: - - name: data - mountPath: /usr/share/nginx/html - volumes: - - name: data - persistentVolumeClaim: - claimName: local-pvc-demo +apiVersion: apps/v1 # Deployment:apps API +kind: Deployment # 部署器:管理 Pod 副本、滚动更新等 +metadata: # Deployment 的元信息 + name: nginx-local-pvc-demo # Deployment 名称 + namespace: default # 部署到的命名空间(Deployment 里引用 PVC 时也必须同 namespace) +spec: # Deployment 的期望状态(副本数、选择器、Pod 模板等) + replicas: 1 # Pod 副本数(用于验证持久化,保持单副本更直观) + selector: # Deployment 选择器:用于匹配/管理模板 Pod + matchLabels: # 标签匹配集合(必须与 template.metadata.labels 对上) + app: nginx-local-pvc-demo # Deployment 用该 label 选择/管理自己的 Pod + template: # Pod 模板(Deployment 用它创建/更新 Pod) + metadata: # Pod 元信息 + labels: # Pod 标签:必须与 selector.matchLabels 对齐 + app: nginx-local-pvc-demo # Pod 模板 label:必须与 selector.matchLabels 对上 + spec: # Pod 规范 + containers: # 容器列表 + - name: nginx # 容器名 + image: nginx:alpine # nginx 镜像 + volumeMounts: # 容器内挂载点(把卷挂到 mountPath) + - name: data # 与下方 volumes[].name 对应:挂载哪个卷 + mountPath: /usr/share/nginx/html # 挂载点:写入此目录会落到 PVC/PV 上 + volumes: # Pod 内定义的卷列表 + - name: data # Pod 内的卷名(给 volumeMounts 用) + persistentVolumeClaim: # 使用 PVC 作为卷来源 + claimName: local-pvc-demo # 绑定到哪个 PVC(必须与上面 PVC metadata.name 且同 namespace) diff --git a/ansible/files/nfs-demo/nfs-pv-pvc-demo.yaml b/ansible/files/nfs-demo/nfs-pv-pvc-demo.yaml index 40be69c..00a9d5f 100644 --- a/ansible/files/nfs-demo/nfs-pv-pvc-demo.yaml +++ b/ansible/files/nfs-demo/nfs-pv-pvc-demo.yaml @@ -1,27 +1,27 @@ # docs/03-06-k3s-使用nfs存储.md — 按环境修改 server/path -apiVersion: v1 -kind: PersistentVolume -metadata: - name: nfs-pv-demo -spec: - capacity: - storage: 20Gi - accessModes: - - ReadWriteMany - persistentVolumeReclaimPolicy: Retain - nfs: - server: 192.168.2.22 - path: /data/nfs +apiVersion: v1 # PV/PVC 使用的核心 API 版本 +kind: PersistentVolume # 资源类型:持久卷(集群级) +metadata: # PV 元信息 + name: nfs-pv-demo # PV 名称 +spec: # PV 规格 + capacity: # 容量声明 + storage: 20Gi # PV 总容量 + accessModes: # 访问模式列表 + - ReadWriteMany # RWX:多节点可读写 + persistentVolumeReclaimPolicy: Retain # 回收策略:删除 PVC 后保留底层数据 + nfs: # 存储后端:NFS + server: 192.168.2.22 # NFS 服务器地址 + path: /data/nfs # NFS 导出目录 --- -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: nfs-pvc-demo - namespace: default -spec: - accessModes: - - ReadWriteMany - resources: - requests: - storage: 5Gi - volumeName: nfs-pv-demo +apiVersion: v1 # PVC 使用的核心 API 版本 +kind: PersistentVolumeClaim # 资源类型:持久卷声明 +metadata: # PVC 元信息 + name: nfs-pvc-demo # PVC 名称 + namespace: default # PVC 所在命名空间 +spec: # PVC 规格 + accessModes: # 访问模式要求 + - ReadWriteMany # 申请 RWX 访问模式 + resources: # 资源请求 + requests: # 配额请求 + storage: 5Gi # 申请容量 + volumeName: nfs-pv-demo # 显式绑定到指定 PV diff --git a/ansible/files/nginx-matrix-tls/01-control-ingress.yaml b/ansible/files/nginx-matrix-tls/01-control-ingress.yaml index 044392b..6688d7d 100644 --- a/ansible/files/nginx-matrix-tls/01-control-ingress.yaml +++ b/ansible/files/nginx-matrix-tls/01-control-ingress.yaml @@ -1,115 +1,115 @@ # 03-02 TLS: M1 控制节点 + Ingress,路径 /(根路径),域名 test01.jackadam.top # ConfigMap:首页 + default.conf(单文件 subPath 挂载,与 M2~M4 一致) --- -apiVersion: v1 -kind: ConfigMap -metadata: - name: nginx-m1-html - namespace: default -data: - index.html: | +apiVersion: v1 # ConfigMap 使用的 API 版本 +kind: ConfigMap # 配置资源类型:ConfigMap +metadata: # ConfigMap 标识信息 + name: nginx-m1-html # ConfigMap 名称 + namespace: default # 命名空间 +data: # ConfigMap 数据区 + index.html: | # HTML 内容:挂载到 nginx 网页目录(内部内容行不改动) M1

M1

控制节点 + Ingress

Backend: M1

- default.conf: | + default.conf: | # nginx 配置:通过 subPath 单文件挂载到 conf.d/default.conf(内部内容行不改动) server { listen 80 default_server; server_name _; root /usr/share/nginx/html; index index.html; location / { add_header X-Backend "M1"; try_files $uri $uri/ /index.html; } } --- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nginx-m1 - namespace: default - labels: - app: nginx-m1 - matrix: "03-02-m1" -spec: - replicas: 1 - selector: - matchLabels: - app: nginx-m1 - template: - metadata: - labels: - app: nginx-m1 - spec: - nodeSelector: - node-role.kubernetes.io/control-plane: "" - tolerations: - - key: node-role.kubernetes.io/control-plane - operator: Exists - effect: NoSchedule - volumes: - - name: html - configMap: - name: nginx-m1-html - containers: - - name: nginx - image: nginx:alpine - ports: - - containerPort: 80 - volumeMounts: - - name: html - mountPath: /usr/share/nginx/html/index.html - subPath: index.html - readOnly: true - - name: html - mountPath: /etc/nginx/conf.d/default.conf - subPath: default.conf - readOnly: true +apiVersion: apps/v1 # Deployment 使用的 API 版本 +kind: Deployment # 工作负载:Deployment +metadata: # Deployment 标识信息 + name: nginx-m1 # Deployment 名称 + namespace: default # 部署命名空间 + labels: # 标签 + app: nginx-m1 # 应用标签 + matrix: "03-02-m1" # 矩阵编号标签 +spec: # Deployment 期望状态 + replicas: 1 # 副本数:单副本 + selector: # 选择器 + matchLabels: # 标签匹配集合(用于选中模板 Pod) + app: nginx-m1 # 与 template.labels 对齐 + template: # Pod 模板 + metadata: # Pod 元信息 + labels: # Pod 标签 + app: nginx-m1 # Pod 标签 + spec: # Pod 规范 + nodeSelector: # 固定到 control-plane 节点 + node-role.kubernetes.io/control-plane: "" # 控制节点 selector + tolerations: # 容忍 NoSchedule 污点 + - key: node-role.kubernetes.io/control-plane # 污点 key + operator: Exists # 匹配存在 + effect: NoSchedule # 影响效果 + volumes: # 卷定义 + - name: html # 卷名 + configMap: # 来自 ConfigMap + name: nginx-m1-html # ConfigMap 名称 + containers: # 容器列表 + - name: nginx # 容器名 + image: nginx:alpine # 镜像 + ports: # 容器端口 + - containerPort: 80 # nginx HTTP 端口 + volumeMounts: # 挂载点 + - name: html # 引用卷 + mountPath: /usr/share/nginx/html/index.html # 网页首页 + subPath: index.html # ConfigMap 的 key + readOnly: true # 只读 + - name: html # 同卷第二个挂载点 + mountPath: /etc/nginx/conf.d/default.conf # nginx 配置 + subPath: default.conf # ConfigMap 的 key + readOnly: true # 只读 --- -apiVersion: v1 -kind: Service -metadata: - name: nginx-m1 - namespace: default -spec: - selector: - app: nginx-m1 - ports: - - port: 80 - targetPort: 80 +apiVersion: v1 # Service 使用的 API 版本 +kind: Service # 网络抽象:把 Pod 暴露为稳定入口 +metadata: # Service 标识 + name: nginx-m1 # Service 名称 + namespace: default # 命名空间 +spec: # Service 期望状态 + selector: # Service 选择器 + app: nginx-m1 # 选中后端 Pod + ports: # 端口映射 + - port: 80 # Service 端口 + targetPort: 80 # 转发目标端口 --- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: nginx-m1 - namespace: default - annotations: - traefik.ingress.kubernetes.io/router.entrypoints: websecure - traefik.ingress.kubernetes.io/router.tls.certresolver: cloudflare -spec: - tls: - - hosts: - - test01.jackadam.top - rules: - - host: test01.jackadam.top - http: - paths: - - path: / - pathType: Prefix - backend: - service: - name: nginx-m1 - port: - number: 80 +apiVersion: networking.k8s.io/v1 # Ingress 使用的 API 版本 +kind: Ingress # 入口资源 +metadata: # Ingress 标识 + name: nginx-m1 # 名称 + namespace: default # 命名空间 + annotations: # Traefik 注解 + traefik.ingress.kubernetes.io/router.entrypoints: websecure # 使用 HTTPS entrypoint + traefik.ingress.kubernetes.io/router.tls.certresolver: cloudflare # ACME certresolver +spec: # Ingress 规则 + tls: # TLS 配置 + - hosts: # TLS hosts + - test01.jackadam.top # 域名 + rules: # HTTP 路由规则 + - host: test01.jackadam.top # 域名匹配 + http: # HTTP 路由 + paths: # 路径列表 + - path: / # 匹配根路径 + pathType: Prefix # 前缀匹配 + backend: # 后端目标 + service: # 后端是 Service + name: nginx-m1 # Service 名称 + port: # 后端端口 + number: 80 # 端口号 --- # 03-02 HTTP-only:M1 路由(仅 web,无 TLS),与 nginx-m1 共用 Service -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: nginx-m1-http - namespace: default - annotations: - traefik.ingress.kubernetes.io/router.entrypoints: web -spec: - rules: - - host: test01.jackadam.top - http: - paths: - - path: / - pathType: Prefix - backend: - service: - name: nginx-m1 - port: - number: 80 +apiVersion: networking.k8s.io/v1 # Ingress API 版本 +kind: Ingress # Ingress 资源 +metadata: # Ingress 标识 + name: nginx-m1-http # HTTP-only Ingress 名称 + namespace: default # 命名空间 + annotations: # Traefik 注解 + traefik.ingress.kubernetes.io/router.entrypoints: web # 使用 HTTP entrypoint +spec: # Ingress 规则 + rules: # 规则列表 + - host: test01.jackadam.top # 域名 + http: # HTTP 路由 + paths: # 路径列表 + - path: / # 根路径 + pathType: Prefix # 前缀匹配 + backend: # 后端目标 + service: # 后端是 Service + name: nginx-m1 # 共用 Service + port: # 后端端口 + number: 80 # 端口号 diff --git a/ansible/files/nginx-matrix-tls/02-control-ingressroute.yaml b/ansible/files/nginx-matrix-tls/02-control-ingressroute.yaml index d51234b..27cc38f 100644 --- a/ansible/files/nginx-matrix-tls/02-control-ingressroute.yaml +++ b/ansible/files/nginx-matrix-tls/02-control-ingressroute.yaml @@ -1,98 +1,98 @@ # 03-02 TLS: M2 控制节点 + IngressRoute,路径 /(根路径),域名 test02.jackadam.top --- -apiVersion: v1 -kind: ConfigMap -metadata: - name: nginx-m2-html - namespace: default -data: - index.html: | +apiVersion: v1 # ConfigMap 使用的 API 版本 +kind: ConfigMap # 配置资源类型:ConfigMap +metadata: # ConfigMap 标识信息 + name: nginx-m2-html # ConfigMap 名称 + namespace: default # 命名空间 +data: # ConfigMap 数据区 + index.html: | # HTML 内容:挂载到 nginx 网页目录(内部内容行不改动) M2

M2

控制节点 + IngressRoute

- default.conf: | + default.conf: | # nginx 配置:通过 subPath 挂载到 conf.d/default.conf(内部内容行不改动) server { listen 80; server_name localhost; root /usr/share/nginx/html; index index.html; location / { add_header X-Backend "M2"; try_files $uri $uri/ /index.html; } } --- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nginx-m2 - namespace: default - labels: - app: nginx-m2 - matrix: "03-02-m2" -spec: - replicas: 1 - selector: - matchLabels: - app: nginx-m2 - template: - metadata: - labels: - app: nginx-m2 - spec: - nodeSelector: - kubernetes.io/hostname: ylc61 - volumes: - - name: html - configMap: - name: nginx-m2-html - containers: - - name: nginx - image: nginx:alpine - ports: - - containerPort: 80 - volumeMounts: - - name: html - mountPath: /usr/share/nginx/html/index.html - subPath: index.html - readOnly: true - - name: html - mountPath: /etc/nginx/conf.d/default.conf - subPath: default.conf - readOnly: true +apiVersion: apps/v1 # Deployment 使用的 API 版本 +kind: Deployment # 工作负载:Deployment +metadata: # Deployment 标识信息 + name: nginx-m2 # Deployment 名称 + namespace: default # 部署命名空间 + labels: # 标签 + app: nginx-m2 # 应用标签 + matrix: "03-02-m2" # 矩阵编号标签 +spec: # Deployment 期望状态 + replicas: 1 # 副本数 + selector: # 选择器 + matchLabels: # 标签匹配集合(用于选中模板 Pod) + app: nginx-m2 # 必须与 template.labels 对齐 + template: # Pod 模板 + metadata: # Pod 元信息 + labels: # Pod 标签 + app: nginx-m2 # Pod 标签 + spec: # Pod 规范 + nodeSelector: # 固定到指定主机 + kubernetes.io/hostname: ylc61 # 控制节点主机名 + volumes: # 卷定义 + - name: html # 卷名 + configMap: # 卷来源 + name: nginx-m2-html # ConfigMap 名称 + containers: # 容器列表 + - name: nginx # 容器名 + image: nginx:alpine # nginx 镜像 + ports: # 容器端口 + - containerPort: 80 # HTTP 端口 + volumeMounts: # 容器挂载 + - name: html # 引用卷 + mountPath: /usr/share/nginx/html/index.html # 网页首页 + subPath: index.html # ConfigMap 的 key + readOnly: true # 只读 + - name: html # 仍引用同一卷 + mountPath: /etc/nginx/conf.d/default.conf # nginx 配置文件 + subPath: default.conf # ConfigMap 的 key + readOnly: true # 只读 --- -apiVersion: v1 -kind: Service -metadata: - name: nginx-m2 - namespace: default -spec: - selector: - app: nginx-m2 - ports: - - port: 80 - targetPort: 80 +apiVersion: v1 # Service 使用的 API 版本 +kind: Service # 网络抽象:把 Pod 暴露为稳定入口 +metadata: # Service 标识 + name: nginx-m2 # Service 名称 + namespace: default # 命名空间 +spec: # Service 期望状态 + selector: # Service 选择器 + app: nginx-m2 # 选中后端 Pod + ports: # Service 端口 + - port: 80 # Service 端口 + targetPort: 80 # 转发端口 --- -apiVersion: traefik.io/v1alpha1 -kind: IngressRoute -metadata: - name: nginx-m2 - namespace: default -spec: - entryPoints: - - websecure - routes: - - match: Host(`test02.jackadam.top`) - kind: Rule - services: - - name: nginx-m2 - port: 80 - tls: - certResolver: cloudflare +apiVersion: traefik.io/v1alpha1 # IngressRoute API 版本 +kind: IngressRoute # Traefik 路由 CRD +metadata: # IngressRoute 标识 + name: nginx-m2 # 资源名称 + namespace: default # 命名空间 +spec: # 规则 + entryPoints: # 入口点 + - websecure # 使用 HTTPS entrypoint + routes: # 路由列表 + - match: Host(`test02.jackadam.top`) # 域名匹配 + kind: Rule # 规则类型 + services: # 后端服务 + - name: nginx-m2 # 后端 Service + port: 80 # 后端端口 + tls: # TLS 配置 + certResolver: cloudflare # 使用 cloudflare certResolver --- # 03-02 HTTP-only:M2 路由(仅 web,无 TLS),与 nginx-m2 共用 Service -apiVersion: traefik.io/v1alpha1 -kind: IngressRoute -metadata: - name: nginx-m2-http - namespace: default -spec: - entryPoints: - - web - routes: - - match: Host(`test02.jackadam.top`) - kind: Rule - services: - - name: nginx-m2 - port: 80 +apiVersion: traefik.io/v1alpha1 # IngressRoute API 版本 +kind: IngressRoute # Traefik 路由资源 +metadata: # 标识 + name: nginx-m2-http # 名称 + namespace: default # 命名空间 +spec: # 规则 + entryPoints: # 入口点列表 + - web # 使用 HTTP entrypoint + routes: # 路由列表 + - match: Host(`test02.jackadam.top`) # 域名匹配 + kind: Rule # 规则类型 + services: # 后端服务 + - name: nginx-m2 # 后端 Service + port: 80 # 端口 diff --git a/ansible/files/nginx-matrix-tls/03-worker-ingress.yaml b/ansible/files/nginx-matrix-tls/03-worker-ingress.yaml index df92644..86a4788 100644 --- a/ansible/files/nginx-matrix-tls/03-worker-ingress.yaml +++ b/ansible/files/nginx-matrix-tls/03-worker-ingress.yaml @@ -1,110 +1,110 @@ # 03-02 TLS: M3 工作节点 + Ingress,路径 /(根路径),域名 test03.jackadam.top --- -apiVersion: v1 -kind: ConfigMap -metadata: - name: nginx-m3-html - namespace: default -data: - index.html: | +apiVersion: v1 # ConfigMap 使用的 API 版本 +kind: ConfigMap # 配置资源类型:ConfigMap +metadata: # 资源标识 + name: nginx-m3-html # ConfigMap 名称 + namespace: default # 命名空间 +data: # ConfigMap 数据键值区 + index.html: | # HTML 内容:挂载到 nginx 网页目录(内部内容行不改动) M3

M3

工作节点 + Ingress

- default.conf: | + default.conf: | # nginx 配置:挂载到 conf.d/default.conf(内部内容行不改动) server { listen 80; server_name localhost; root /usr/share/nginx/html; index index.html; location / { add_header X-Backend "M3"; try_files $uri $uri/ /index.html; } } --- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nginx-m3 - namespace: default - labels: - app: nginx-m3 - matrix: "03-02-m3" -spec: - replicas: 1 - selector: - matchLabels: - app: nginx-m3 - template: - metadata: - labels: - app: nginx-m3 - spec: - nodeSelector: - node-role.kubernetes.io/worker: "" - volumes: - - name: html - configMap: - name: nginx-m3-html - containers: - - name: nginx - image: nginx:alpine - ports: - - containerPort: 80 - volumeMounts: - - name: html - mountPath: /usr/share/nginx/html/index.html - subPath: index.html - readOnly: true - - name: html - mountPath: /etc/nginx/conf.d/default.conf - subPath: default.conf - readOnly: true +apiVersion: apps/v1 # Deployment 使用的 API 版本 +kind: Deployment # 工作负载:Deployment +metadata: # Deployment 标识 + name: nginx-m3 # Deployment 名称 + namespace: default # 部署命名空间 + labels: # 标签(用于筛选/统计) + app: nginx-m3 # 应用标签 + matrix: "03-02-m3" # 矩阵编号标签 +spec: # Deployment 期望状态 + replicas: 1 # 副本数 + selector: # Deployment 选择器:匹配 Pod + matchLabels: # 标签匹配集合 + app: nginx-m3 # 选中 app 标签为 nginx-m3 的 Pod + template: # Pod 模板 + metadata: # Pod 元信息 + labels: # Pod 标签 + app: nginx-m3 # 与 selector.matchLabels 对齐 + spec: # Pod 规范 + nodeSelector: # 固定调度到 worker 节点 + node-role.kubernetes.io/worker: "" # worker 节点 selector(按你的集群约定) + volumes: # 卷定义 + - name: html # 卷名(给 volumeMounts 引用) + configMap: # 卷来源为 ConfigMap + name: nginx-m3-html # 引用的 ConfigMap 名称 + containers: # 容器列表 + - name: nginx # 容器名 + image: nginx:alpine # nginx 镜像 + ports: # 容器端口声明 + - containerPort: 80 # nginx HTTP 端口 + volumeMounts: # 挂载点(把配置映射到具体文件) + - name: html # 引用同一个卷 + mountPath: /usr/share/nginx/html/index.html # 网页首页文件路径 + subPath: index.html # 来自 ConfigMap 的 key + readOnly: true # 只读挂载 + - name: html # 同一卷再次挂载 + mountPath: /etc/nginx/conf.d/default.conf # nginx 配置文件路径 + subPath: default.conf # 来自 ConfigMap 的 key + readOnly: true # 只读挂载 --- -apiVersion: v1 -kind: Service -metadata: - name: nginx-m3 - namespace: default -spec: - selector: - app: nginx-m3 - ports: - - port: 80 - targetPort: 80 +apiVersion: v1 # Service 使用的 API 版本 +kind: Service # Service 资源 +metadata: # Service 标识 + name: nginx-m3 # Service 名称 + namespace: default # 命名空间 +spec: # Service 期望状态 + selector: # 通过标签选中后端 Pod + app: nginx-m3 # 选中 app 标签为 nginx-m3 的 Pod + ports: # 端口映射 + - port: 80 # Service 暴露端口 + targetPort: 80 # 转发到容器端口 --- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: nginx-m3 - namespace: default - annotations: - traefik.ingress.kubernetes.io/router.entrypoints: websecure - traefik.ingress.kubernetes.io/router.tls.certresolver: cloudflare -spec: - tls: - - hosts: - - test03.jackadam.top - rules: - - host: test03.jackadam.top - http: - paths: - - path: / - pathType: Prefix - backend: - service: - name: nginx-m3 - port: - number: 80 +apiVersion: networking.k8s.io/v1 # Ingress 使用的 API 版本 +kind: Ingress # Ingress 资源 +metadata: # Ingress 标识 + name: nginx-m3 # Ingress 名称 + namespace: default # 命名空间 + annotations: # Traefik 路由注解 + traefik.ingress.kubernetes.io/router.entrypoints: websecure # 使用 HTTPS entrypoint + traefik.ingress.kubernetes.io/router.tls.certresolver: cloudflare # 证书解析器 +spec: # Ingress 规则 + tls: # TLS 配置 + - hosts: # TLS 证书适用的域名列表 + - test03.jackadam.top # 域名 + rules: # HTTP/HTTPS 路由规则列表 + - host: test03.jackadam.top # 匹配域名 + http: # HTTP 规则 + paths: # 路径匹配列表 + - path: / # 根路径 + pathType: Prefix # 前缀匹配 + backend: # 后端目标 + service: # 使用 Service + name: nginx-m3 # 后端 Service 名称 + port: # 后端端口 + number: 80 # 端口号 --- # 03-02 HTTP-only:M3 路由(仅 web,无 TLS),与 nginx-m3 共用 Service -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: nginx-m3-http - namespace: default - annotations: - traefik.ingress.kubernetes.io/router.entrypoints: web -spec: - rules: - - host: test03.jackadam.top - http: - paths: - - path: / - pathType: Prefix - backend: - service: - name: nginx-m3 - port: - number: 80 +apiVersion: networking.k8s.io/v1 # Ingress 使用的 API 版本 +kind: Ingress # Ingress 资源 +metadata: # Ingress 标识 + name: nginx-m3-http # HTTP-only Ingress 名称 + namespace: default # 命名空间 + annotations: # Traefik 路由注解 + traefik.ingress.kubernetes.io/router.entrypoints: web # 使用 HTTP entrypoint +spec: # Ingress 规则 + rules: # 规则列表 + - host: test03.jackadam.top # 域名 + http: # HTTP 规则 + paths: # 路径列表 + - path: / # 根路径 + pathType: Prefix # 前缀匹配 + backend: # 后端目标 + service: # 后端 Service + name: nginx-m3 # 后端 Service 名称 + port: # 后端端口 + number: 80 # 端口号 diff --git a/ansible/files/nginx-matrix-tls/04-worker-ingressroute.yaml b/ansible/files/nginx-matrix-tls/04-worker-ingressroute.yaml index fed357c..76bdd00 100644 --- a/ansible/files/nginx-matrix-tls/04-worker-ingressroute.yaml +++ b/ansible/files/nginx-matrix-tls/04-worker-ingressroute.yaml @@ -1,98 +1,98 @@ # 03-02 TLS: M4 工作节点 + IngressRoute,路径 /(根路径),域名 test04.jackadam.top --- -apiVersion: v1 -kind: ConfigMap -metadata: - name: nginx-m4-html - namespace: default -data: - index.html: | +apiVersion: v1 # ConfigMap 使用的 API 版本 +kind: ConfigMap # 配置资源类型:ConfigMap +metadata: # 标识信息 + name: nginx-m4-html # ConfigMap 名称 + namespace: default # 命名空间 +data: # ConfigMap 数据区 + index.html: | # HTML 内容:挂载到 nginx 网页目录(内部内容行不改动) M4

M4

工作节点 + IngressRoute

- default.conf: | + default.conf: | # nginx 配置:挂载到 conf.d/default.conf(内部内容行不改动) server { listen 80; server_name localhost; root /usr/share/nginx/html; index index.html; location / { add_header X-Backend "M4"; try_files $uri $uri/ /index.html; } } --- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nginx-m4 - namespace: default - labels: - app: nginx-m4 - matrix: "03-02-m4" -spec: - replicas: 1 - selector: - matchLabels: - app: nginx-m4 - template: - metadata: - labels: - app: nginx-m4 - spec: - nodeSelector: - kubernetes.io/hostname: ylc64 - volumes: - - name: html - configMap: - name: nginx-m4-html - containers: - - name: nginx - image: nginx:alpine - ports: - - containerPort: 80 - volumeMounts: - - name: html - mountPath: /usr/share/nginx/html/index.html - subPath: index.html - readOnly: true - - name: html - mountPath: /etc/nginx/conf.d/default.conf - subPath: default.conf - readOnly: true +apiVersion: apps/v1 # Deployment 使用的 API 版本 +kind: Deployment # 工作负载:Deployment +metadata: # Deployment 标识信息 + name: nginx-m4 # Deployment 名称 + namespace: default # 部署命名空间 + labels: # 额外标签 + app: nginx-m4 # 应用标签 + matrix: "03-02-m4" # 矩阵编号标签 +spec: # Deployment 期望状态 + replicas: 1 # 副本数 + selector: # Deployment 选择器 + matchLabels: # 必须与 template.metadata.labels 对齐 + app: nginx-m4 # 选中 app 标签为 nginx-m4 的 Pod + template: # Pod 模板 + metadata: # Pod 元信息 + labels: # Pod 标签 + app: nginx-m4 # Pod 标签 + spec: # Pod 规范 + nodeSelector: # 固定跑到指定工作节点(按实际 ylc64/主机名) + kubernetes.io/hostname: ylc64 # 目标节点主机名 + volumes: # 卷定义 + - name: html # 卷名(给 volumeMounts 引用) + configMap: # 卷来源为 ConfigMap + name: nginx-m4-html # 引用的 ConfigMap 名称 + containers: # 容器列表 + - name: nginx # 容器名 + image: nginx:alpine # nginx 镜像 + ports: # 容器端口 + - containerPort: 80 # nginx HTTP 端口 + volumeMounts: # 挂载点列表 + - name: html # 对应 volumes[].name + mountPath: /usr/share/nginx/html/index.html # 网页首页文件路径 + subPath: index.html # 从 ConfigMap 取的 key + readOnly: true # 只读挂载 + - name: html # 同一卷再次挂载 + mountPath: /etc/nginx/conf.d/default.conf # nginx 配置文件路径 + subPath: default.conf # 从 ConfigMap 取的 key + readOnly: true # 只读挂载 --- -apiVersion: v1 -kind: Service -metadata: - name: nginx-m4 - namespace: default -spec: - selector: - app: nginx-m4 - ports: - - port: 80 - targetPort: 80 +apiVersion: v1 # Service 使用的 API 版本 +kind: Service # 网络抽象:为 Pod 提供稳定访问入口 +metadata: # Service 标识 + name: nginx-m4 # Service 名称 + namespace: default # 命名空间 +spec: # Service 期望状态 + selector: # Service 选择器 + app: nginx-m4 # 选中 app 标签为 nginx-m4 的 Pod + ports: # 端口映射列表 + - port: 80 # Service 暴露端口 + targetPort: 80 # 转发到 Pod 容器端口 --- -apiVersion: traefik.io/v1alpha1 -kind: IngressRoute -metadata: - name: nginx-m4 - namespace: default -spec: - entryPoints: - - websecure - routes: - - match: Host(`test04.jackadam.top`) - kind: Rule - services: - - name: nginx-m4 - port: 80 - tls: - certResolver: cloudflare +apiVersion: traefik.io/v1alpha1 # IngressRoute API 版本 +kind: IngressRoute # Traefik 路由资源类型 +metadata: # 标识信息 + name: nginx-m4 # IngressRoute 名称 + namespace: default # 命名空间 +spec: # 规则与 TLS + entryPoints: # Traefik entrypoints + - websecure # HTTPS entrypoint + routes: # 路由列表 + - match: Host(`test04.jackadam.top`) # 域名匹配 + kind: Rule # 规则类型 + services: # 后端服务列表 + - name: nginx-m4 # 后端 Service 名称 + port: 80 # 后端端口 + tls: # TLS 配置 + certResolver: cloudflare # 使用 cloudflare 证书解析器 --- # 03-02 HTTP-only:M4 路由(仅 web,无 TLS),与 nginx-m4 共用 Service -apiVersion: traefik.io/v1alpha1 -kind: IngressRoute -metadata: - name: nginx-m4-http - namespace: default -spec: - entryPoints: - - web - routes: - - match: Host(`test04.jackadam.top`) - kind: Rule - services: - - name: nginx-m4 - port: 80 +apiVersion: traefik.io/v1alpha1 # IngressRoute API 版本 +kind: IngressRoute # Traefik 路由资源类型 +metadata: # 标识信息 + name: nginx-m4-http # HTTP-only 路由名 + namespace: default # 命名空间 +spec: # 规则 + entryPoints: # HTTP entrypoint + - web # web(HTTP) + routes: # 路由列表 + - match: Host(`test04.jackadam.top`) # 域名匹配 + kind: Rule # 规则类型 + services: # 后端服务 + - name: nginx-m4 # 后端 Service 名称 + port: 80 # 后端端口 diff --git a/ansible/files/nginx-matrix/01-control-ingress.yaml b/ansible/files/nginx-matrix/01-control-ingress.yaml index 3f708f1..0ae266b 100644 --- a/ansible/files/nginx-matrix/01-control-ingress.yaml +++ b/ansible/files/nginx-matrix/01-control-ingress.yaml @@ -2,99 +2,99 @@ # 路径 /demo-m1,随机一台控制节点(nodeSelector + toleration,控制节点常有 NoSchedule 污点) # ConfigMap:首页 + default.conf(单文件 subPath 挂载,与 M2~M4 一致,便于 nginx 后续扩展) --- -apiVersion: v1 -kind: ConfigMap -metadata: - name: nginx-m1-html - namespace: default -data: - index.html: | +apiVersion: v1 # ConfigMap 使用的 API 版本 +kind: ConfigMap # 配置资源类型:ConfigMap +metadata: # 对该 ConfigMap 的标识信息 + name: nginx-m1-html # ConfigMap 名称 + namespace: default # 命名空间 +data: # ConfigMap 数据键值区 + index.html: | # HTML 内容:会挂载到 nginx 的网页目录 M1

M1

控制节点 + Ingress

Backend: M1

- default.conf: | + default.conf: | # nginx 配置:通过 subPath 单文件挂载到 conf.d/default.conf server { listen 80 default_server; server_name _; root /usr/share/nginx/html; index index.html; location / { add_header X-Backend "M1"; try_files $uri $uri/ /index.html; } } --- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nginx-m1 - namespace: default - labels: - app: nginx-m1 - matrix: "02-05-m1" -spec: - replicas: 1 - selector: - matchLabels: - app: nginx-m1 - template: - metadata: - labels: - app: nginx-m1 - spec: - nodeSelector: - node-role.kubernetes.io/control-plane: "" - tolerations: - - key: node-role.kubernetes.io/control-plane - operator: Exists - effect: NoSchedule - volumes: - - name: html - configMap: - name: nginx-m1-html - containers: - - name: nginx - image: nginx:alpine - ports: - - containerPort: 80 - volumeMounts: - - name: html - mountPath: /usr/share/nginx/html/index.html - subPath: index.html - readOnly: true - - name: html - mountPath: /etc/nginx/conf.d/default.conf - subPath: default.conf - readOnly: true +apiVersion: apps/v1 # Deployment 使用的 API 版本 +kind: Deployment # 工作负载:Deployment +metadata: # Deployment 标识信息 + name: nginx-m1 # Deployment 名称 + namespace: default # 部署命名空间 + labels: # 额外标签(用于检索/筛选) + app: nginx-m1 # 应用标签 + matrix: "02-05-m1" # 矩阵编号标签(用于你后续调试/统计) +spec: # Deployment 期望状态 + replicas: 1 # 副本数:本例为 1(便于对应路径验证) + selector: # Deployment 用于选择 Pod 的条件 + matchLabels: # 标签匹配集合(用于选中模板 Pod) + app: nginx-m1 # 必须与 template.metadata.labels 对上 + template: # Pod 模板 + metadata: # Pod 的元信息 + labels: # Pod 标签 + app: nginx-m1 # Pod 标签 + spec: # Pod 规范 + nodeSelector: # 节点选择:固定跑在 control-plane 上 + node-role.kubernetes.io/control-plane: "" # 选择带 control-plane 角色标签的节点 + tolerations: # 容忍污点:让 Pod 能调度到 control-plane + - key: node-role.kubernetes.io/control-plane # 污点 key + operator: Exists # 存在即匹配 + effect: NoSchedule # 匹配 NoSchedule 污点效果 + volumes: # Pod 内卷定义 + - name: html # 卷名:给 volumeMounts 引用 + configMap: # 卷来源:ConfigMap + name: nginx-m1-html # 引用的 ConfigMap 名称 + containers: # 容器列表 + - name: nginx # 容器名 + image: nginx:alpine # nginx 镜像 + ports: # 容器端口列表 + - containerPort: 80 # nginx HTTP 端口 + volumeMounts: # 容器内挂载点列表 + - name: html # 对应 volumes[].name + mountPath: /usr/share/nginx/html/index.html # 挂载到网页文件路径 + subPath: index.html # 从 ConfigMap 里选取单个 key + readOnly: true # 只读挂载(配置文件更安全) + - name: html # 第二处也使用同一个卷 + mountPath: /etc/nginx/conf.d/default.conf # nginx 配置文件路径 + subPath: default.conf # 从 ConfigMap 里选取对应 key + readOnly: true # 只读挂载 --- -apiVersion: v1 -kind: Service -metadata: - name: nginx-m1 - namespace: default -spec: - selector: - app: nginx-m1 - ports: - - port: 80 - targetPort: 80 +apiVersion: v1 # Service 使用的 API 版本 +kind: Service # 网络抽象:把 Pod 暴露成稳定访问入口 +metadata: # Service 标识 + name: nginx-m1 # Service 名称 + namespace: default # Service 所在命名空间 +spec: # Service 期望状态 + selector: # Service 按标签选择后端 Pod + app: nginx-m1 # 选择 nginx-m1 Pod + ports: # Service 端口映射 + - port: 80 # Service 端口 + targetPort: 80 # 转发到 Pod 的端口 --- -apiVersion: traefik.io/v1alpha1 -kind: Middleware -metadata: - name: stripprefix-m1 - namespace: default -spec: - stripPrefix: - prefixes: - - /demo-m1 +apiVersion: traefik.io/v1alpha1 # Traefik Middleware 使用的 API 版本 +kind: Middleware # 路由中间件:stripPrefix +metadata: # Middleware 标识 + name: stripprefix-m1 # Middleware 名称 + namespace: default # 命名空间 +spec: # Middleware 配置 + stripPrefix: # 去掉前缀 + prefixes: # 要剔除的前缀列表 + - /demo-m1 # 本矩阵的路径前缀 --- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: nginx-m1 - namespace: default - annotations: - traefik.ingress.kubernetes.io/router.middlewares: default-stripprefix-m1@kubernetescrd -spec: - rules: - - http: - paths: - - path: /demo-m1 - pathType: Prefix - backend: - service: - name: nginx-m1 - port: - number: 80 +apiVersion: networking.k8s.io/v1 # Ingress 使用的 API 版本 +kind: Ingress # 入口资源:把路径转发到 Service +metadata: # Ingress 标识 + name: nginx-m1 # Ingress 名称 + namespace: default # 命名空间 + annotations: # Ingress 注解:Traefik 用来绑定中间件 + traefik.ingress.kubernetes.io/router.middlewares: default-stripprefix-m1@kubernetescrd # 绑定 stripprefix-m1 +spec: # Ingress 规则 + rules: # 规则列表 + - http: # HTTP 规则 + paths: # 路径匹配列表 + - path: /demo-m1 # 匹配路径 + pathType: Prefix # 前缀匹配类型 + backend: # 后端目标 + service: # 后端 Service + name: nginx-m1 # Service 名 + port: # Service 端口 + number: 80 # 端口号 diff --git a/ansible/files/nginx-matrix/02-control-ingressroute.yaml b/ansible/files/nginx-matrix/02-control-ingressroute.yaml index 4067e79..0e3514d 100644 --- a/ansible/files/nginx-matrix/02-control-ingressroute.yaml +++ b/ansible/files/nginx-matrix/02-control-ingressroute.yaml @@ -2,93 +2,93 @@ # 路径 /demo-m2,指定一台控制节点(按实际 FQDN 修改 kubernetes.io/hostname) # ConfigMap:首页 + default.conf,X-Backend: M2 便于区分 --- -apiVersion: v1 -kind: ConfigMap -metadata: - name: nginx-m2-html - namespace: default -data: - index.html: | +apiVersion: v1 # ConfigMap 使用的 API 版本 +kind: ConfigMap # 配置资源类型:ConfigMap +metadata: # ConfigMap 标识信息 + name: nginx-m2-html # ConfigMap 名称 + namespace: default # 命名空间 +data: # ConfigMap 数据区 + index.html: | # HTML 内容:会挂载到 nginx 的网页目录 M2

M2

控制节点 + IngressRoute

- default.conf: | + default.conf: | # nginx 配置:通过 subPath 单文件挂载到 conf.d/default.conf server { listen 80; server_name localhost; root /usr/share/nginx/html; index index.html; location / { add_header X-Backend "M2"; try_files $uri $uri/ /index.html; } } --- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nginx-m2 - namespace: default - labels: - app: nginx-m2 - matrix: "02-05-m2" -spec: - replicas: 1 - selector: - matchLabels: - app: nginx-m2 - template: - metadata: - labels: - app: nginx-m2 - spec: - nodeSelector: - kubernetes.io/hostname: ylc61 - volumes: - - name: html - configMap: - name: nginx-m2-html - containers: - - name: nginx - image: nginx:alpine - ports: - - containerPort: 80 - volumeMounts: - - name: html - mountPath: /usr/share/nginx/html/index.html - subPath: index.html - readOnly: true - - name: html - mountPath: /etc/nginx/conf.d/default.conf - subPath: default.conf - readOnly: true +apiVersion: apps/v1 # Deployment 使用的 API 版本 +kind: Deployment # 工作负载:Deployment +metadata: # Deployment 标识信息 + name: nginx-m2 # Deployment 名称 + namespace: default # 部署命名空间 + labels: # 标签集合 + app: nginx-m2 # 应用标签 + matrix: "02-05-m2" # 矩阵编号标签 +spec: # Deployment 期望状态 + replicas: 1 # 副本数:单副本便于验证 + selector: # Deployment 选择 Pod + matchLabels: # 标签匹配集合(用于选中模板 Pod) + app: nginx-m2 # 必须与 template.metadata.labels 对上 + template: # Pod 模板 + metadata: # Pod 元信息 + labels: # Pod 标签 + app: nginx-m2 # Pod 标签 + spec: # Pod 规范 + nodeSelector: # 固定调度节点(按实际修改) + kubernetes.io/hostname: ylc61 # 目标节点主机名 + volumes: # 卷定义 + - name: html # 卷名 + configMap: # 卷来源为 ConfigMap + name: nginx-m2-html # 引用的 ConfigMap 名称 + containers: # 容器列表 + - name: nginx # 容器名 + image: nginx:alpine # nginx 镜像 + ports: # 容器端口声明 + - containerPort: 80 # nginx 监听端口 + volumeMounts: # 容器内挂载点 + - name: html # 对应 volumes[].name + mountPath: /usr/share/nginx/html/index.html # 挂到网页文件 + subPath: index.html # 使用 ConfigMap 的 index.html key + readOnly: true # 配置只读挂载 + - name: html # 第二处配置仍复用该卷 + mountPath: /etc/nginx/conf.d/default.conf # 挂到 nginx 配置文件 + subPath: default.conf # 使用 ConfigMap 的 default.conf key + readOnly: true # 只读挂载 --- -apiVersion: v1 -kind: Service -metadata: - name: nginx-m2 - namespace: default -spec: - selector: - app: nginx-m2 - ports: - - port: 80 - targetPort: 80 +apiVersion: v1 # Service 使用的 API 版本 +kind: Service # 网络抽象:为 Pod 提供稳定访问地址 +metadata: # Service 标识 + name: nginx-m2 # Service 名称 + namespace: default # 命名空间 +spec: # Service 期望状态 + selector: # 通过标签选择后端 Pod + app: nginx-m2 # 选择 app 标签为 nginx-m2 的 Pod + ports: # Service 端口映射 + - port: 80 # Service 暴露端口 + targetPort: 80 # 转发到 Pod 容器端口 --- -apiVersion: traefik.io/v1alpha1 -kind: Middleware -metadata: - name: stripprefix-m2 - namespace: default -spec: - stripPrefix: - prefixes: - - /demo-m2 +apiVersion: traefik.io/v1alpha1 # Traefik Middleware 使用的 API 版本 +kind: Middleware # 中间件类型:stripPrefix +metadata: # Middleware 标识 + name: stripprefix-m2 # Middleware 名称 + namespace: default # 命名空间 +spec: # 中间件配置 + stripPrefix: # 去掉路径前缀 + prefixes: # 需要剔除的前缀列表 + - /demo-m2 # 本矩阵的路径前缀 --- -apiVersion: traefik.io/v1alpha1 -kind: IngressRoute -metadata: - name: nginx-m2 - namespace: default -spec: - entryPoints: - - web - routes: - - match: PathPrefix(`/demo-m2`) - kind: Rule - middlewares: - - name: stripprefix-m2 - services: - - name: nginx-m2 - port: 80 +apiVersion: traefik.io/v1alpha1 # IngressRoute 的 API 版本 +kind: IngressRoute # 路由资源类型 +metadata: # IngressRoute 标识 + name: nginx-m2 # 路由名称 + namespace: default # 命名空间 +spec: # 路由规则 + entryPoints: # Traefik 入口点列表 + - web # 使用 web entrypoint + routes: # 路由列表 + - match: PathPrefix(`/demo-m2`) # 匹配 /demo-m2 前缀 + kind: Rule # 规则类型:Rule + middlewares: # 绑定中间件(去前缀) + - name: stripprefix-m2 # 使用 stripprefix-m2 + services: # 匹配后转发的服务 + - name: nginx-m2 # 后端 Service 名称 + port: 80 # 后端 Service 端口 diff --git a/ansible/files/nginx-matrix/03-worker-ingress.yaml b/ansible/files/nginx-matrix/03-worker-ingress.yaml index 29fd755..6c8a771 100644 --- a/ansible/files/nginx-matrix/03-worker-ingress.yaml +++ b/ansible/files/nginx-matrix/03-worker-ingress.yaml @@ -2,95 +2,95 @@ # 路径 /demo-m3,随机一台工作节点(nodeSelector: node-role.kubernetes.io/worker) # ConfigMap:首页 + default.conf,X-Backend: M3 便于区分 --- -apiVersion: v1 -kind: ConfigMap -metadata: - name: nginx-m3-html - namespace: default -data: - index.html: | +apiVersion: v1 # ConfigMap 使用的 API 版本 +kind: ConfigMap # 配置资源类型:ConfigMap +metadata: # 对该 ConfigMap 的标识信息 + name: nginx-m3-html # ConfigMap 名称 + namespace: default # 命名空间 +data: # ConfigMap 数据键值区 + index.html: | # HTML 内容:会挂载到 nginx 网页目录(内部内容行不改动) M3

M3

工作节点 + Ingress

- default.conf: | + default.conf: | # nginx 配置:通过 subPath 单文件挂载到 conf.d/default.conf(内部内容行不改动) server { listen 80; server_name localhost; root /usr/share/nginx/html; index index.html; location / { add_header X-Backend "M3"; try_files $uri $uri/ /index.html; } } --- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nginx-m3 - namespace: default - labels: - app: nginx-m3 - matrix: "02-05-m3" -spec: - replicas: 1 - selector: - matchLabels: - app: nginx-m3 - template: - metadata: - labels: - app: nginx-m3 - spec: - nodeSelector: - node-role.kubernetes.io/worker: "" - volumes: - - name: html - configMap: - name: nginx-m3-html - containers: - - name: nginx - image: nginx:alpine - ports: - - containerPort: 80 - volumeMounts: - - name: html - mountPath: /usr/share/nginx/html/index.html - subPath: index.html - readOnly: true - - name: html - mountPath: /etc/nginx/conf.d/default.conf - subPath: default.conf - readOnly: true +apiVersion: apps/v1 # Deployment 使用的 API 版本 +kind: Deployment # 工作负载:Deployment +metadata: # Deployment 标识信息 + name: nginx-m3 # Deployment 名称 + namespace: default # 部署命名空间 + labels: # 额外标签(用于筛选/统计) + app: nginx-m3 # 应用标签 + matrix: "02-05-m3" # 矩阵编号标签 +spec: # Deployment 期望状态 + replicas: 1 # 副本数:这里为 1 + selector: # Deployment 用于选择 Pod 的条件 + matchLabels: # 标签匹配集合(用于选中模板 Pod) + app: nginx-m3 # 必须与 template.metadata.labels 对上 + template: # Pod 模板 + metadata: # Pod 元信息 + labels: # Pod 标签 + app: nginx-m3 # Pod 标签 + spec: # Pod 规范 + nodeSelector: # 固定跑到 worker 节点 + node-role.kubernetes.io/worker: "" # worker 节点 selector + volumes: # 卷定义 + - name: html # 卷名(供 volumeMounts 引用) + configMap: # 卷来源:ConfigMap + name: nginx-m3-html # 引用的 ConfigMap 名称 + containers: # 容器列表 + - name: nginx # 容器名 + image: nginx:alpine # nginx 镜像 + ports: # 容器端口声明 + - containerPort: 80 # nginx HTTP 端口 + volumeMounts: # 容器内挂载点 + - name: html # 对应 volumes[].name + mountPath: /usr/share/nginx/html/index.html # 挂到网页首页 + subPath: index.html # 从 ConfigMap 取该 key + readOnly: true # 配置只读 + - name: html # 第二处仍引用同一个卷 + mountPath: /etc/nginx/conf.d/default.conf # 挂到 nginx 配置文件路径 + subPath: default.conf # 从 ConfigMap 取该 key + readOnly: true # 配置只读 --- -apiVersion: v1 -kind: Service -metadata: - name: nginx-m3 - namespace: default -spec: - selector: - app: nginx-m3 - ports: - - port: 80 - targetPort: 80 +apiVersion: v1 # Service 使用的 API 版本 +kind: Service # 网络抽象:把 Pod 暴露成稳定访问入口 +metadata: # Service 标识 + name: nginx-m3 # Service 名称 + namespace: default # 命名空间 +spec: # Service 期望状态 + selector: # Service 通过标签选中后端 Pod + app: nginx-m3 # 选择 app 标签 + ports: # Service 端口映射列表 + - port: 80 # Service 暴露端口 + targetPort: 80 # 转发到 Pod 的容器端口 --- -apiVersion: traefik.io/v1alpha1 -kind: Middleware -metadata: - name: stripprefix-m3 - namespace: default -spec: - stripPrefix: - prefixes: - - /demo-m3 +apiVersion: traefik.io/v1alpha1 # Traefik Middleware API 版本 +kind: Middleware # 中间件类型:stripPrefix +metadata: # Middleware 标识 + name: stripprefix-m3 # 名称 + namespace: default # 命名空间 +spec: # 中间件配置 + stripPrefix: # 去掉指定路径前缀 + prefixes: # 前缀列表 + - /demo-m3 # 本矩阵路径前缀 --- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: nginx-m3 - namespace: default - annotations: - traefik.ingress.kubernetes.io/router.middlewares: default-stripprefix-m3@kubernetescrd -spec: - rules: - - http: - paths: - - path: /demo-m3 - pathType: Prefix - backend: - service: - name: nginx-m3 - port: - number: 80 +apiVersion: networking.k8s.io/v1 # Ingress 使用的 API 版本 +kind: Ingress # 入口资源:把路径转发到 Service +metadata: # Ingress 标识 + name: nginx-m3 # Ingress 名称 + namespace: default # 命名空间 + annotations: # Traefik 注解:绑定中间件 + traefik.ingress.kubernetes.io/router.middlewares: default-stripprefix-m3@kubernetescrd # 绑定 stripprefix-m3 中间件 +spec: # Ingress 规则 + rules: # 规则列表 + - http: # HTTP 规则 + paths: # 路径匹配列表 + - path: /demo-m3 # 匹配路径 + pathType: Prefix # 前缀匹配类型 + backend: # 后端目标 + service: # 后端是 Service + name: nginx-m3 # Service 名称 + port: # 后端端口 + number: 80 # 端口号 diff --git a/ansible/files/nginx-matrix/04-worker-ingressroute.yaml b/ansible/files/nginx-matrix/04-worker-ingressroute.yaml index 359efb6..e17d00f 100644 --- a/ansible/files/nginx-matrix/04-worker-ingressroute.yaml +++ b/ansible/files/nginx-matrix/04-worker-ingressroute.yaml @@ -2,93 +2,93 @@ # 路径 /demo-m4,指定一台工作节点(按实际 FQDN 修改 kubernetes.io/hostname) # ConfigMap:首页 + default.conf,X-Backend: M4 便于区分 --- -apiVersion: v1 -kind: ConfigMap -metadata: - name: nginx-m4-html - namespace: default -data: - index.html: | +apiVersion: v1 # ConfigMap 使用的 API 版本 +kind: ConfigMap # 配置资源类型:ConfigMap +metadata: # ConfigMap 标识信息 + name: nginx-m4-html # ConfigMap 名称 + namespace: default # 命名空间 +data: # ConfigMap 数据区 + index.html: | # HTML 内容:挂载到 nginx 网页目录(内部内容行不改动) M4

M4

工作节点 + IngressRoute

- default.conf: | + default.conf: | # nginx 配置:通过 subPath 挂载到 conf.d/default.conf(内部内容行不改动) server { listen 80; server_name localhost; root /usr/share/nginx/html; index index.html; location / { add_header X-Backend "M4"; try_files $uri $uri/ /index.html; } } --- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nginx-m4 - namespace: default - labels: - app: nginx-m4 - matrix: "02-05-m4" -spec: - replicas: 1 - selector: - matchLabels: - app: nginx-m4 - template: - metadata: - labels: - app: nginx-m4 - spec: - nodeSelector: - kubernetes.io/hostname: ylc64 - volumes: - - name: html - configMap: - name: nginx-m4-html - containers: - - name: nginx - image: nginx:alpine - ports: - - containerPort: 80 - volumeMounts: - - name: html - mountPath: /usr/share/nginx/html/index.html - subPath: index.html - readOnly: true - - name: html - mountPath: /etc/nginx/conf.d/default.conf - subPath: default.conf - readOnly: true +apiVersion: apps/v1 # Deployment 使用的 API 版本 +kind: Deployment # 工作负载:Deployment +metadata: # Deployment 标识信息 + name: nginx-m4 # Deployment 名称 + namespace: default # 部署命名空间 + labels: # 应用标签/矩阵标签 + app: nginx-m4 # 应用标签 + matrix: "02-05-m4" # 矩阵编号 +spec: # Deployment 期望状态 + replicas: 1 # 副本数 + selector: # Deployment 选择器 + matchLabels: # 标签匹配集合(用于选中模板 Pod) + app: nginx-m4 # 必须与 template.metadata.labels 对上 + template: # Pod 模板 + metadata: # Pod 元信息 + labels: # Pod 标签 + app: nginx-m4 # Pod 标签 + spec: # Pod 规范 + nodeSelector: # 固定运行的工作节点 + kubernetes.io/hostname: ylc64 # worker 节点主机名 + volumes: # 卷定义 + - name: html # 卷名 + configMap: # 卷来源 + name: nginx-m4-html # 引用的 ConfigMap 名称 + containers: # 容器列表 + - name: nginx # 容器名 + image: nginx:alpine # nginx 镜像 + ports: # 容器端口 + - containerPort: 80 # HTTP 端口 + volumeMounts: # 容器内挂载 + - name: html # 引用 volumes[].name + mountPath: /usr/share/nginx/html/index.html # 挂到首页文件 + subPath: index.html # 取 ConfigMap 的 index.html key + readOnly: true # 只读 + - name: html # 仍复用同一个卷 + mountPath: /etc/nginx/conf.d/default.conf # 挂到 nginx 配置文件 + subPath: default.conf # 取 ConfigMap 的 default.conf key + readOnly: true # 只读 --- -apiVersion: v1 -kind: Service -metadata: - name: nginx-m4 - namespace: default -spec: - selector: - app: nginx-m4 - ports: - - port: 80 - targetPort: 80 +apiVersion: v1 # Service 使用的 API 版本 +kind: Service # 网络抽象:把 Pod 暴露为稳定入口 +metadata: # Service 标识 + name: nginx-m4 # Service 名称 + namespace: default # 命名空间 +spec: # Service 期望状态 + selector: # Service 选择器 + app: nginx-m4 # 选中后端 Pod + ports: # 端口映射列表 + - port: 80 # Service 端口 + targetPort: 80 # 转发到 Pod 容器端口 --- -apiVersion: traefik.io/v1alpha1 -kind: Middleware -metadata: - name: stripprefix-m4 - namespace: default -spec: - stripPrefix: - prefixes: - - /demo-m4 +apiVersion: traefik.io/v1alpha1 # Traefik Middleware API 版本 +kind: Middleware # 中间件:stripPrefix +metadata: # Middleware 标识 + name: stripprefix-m4 # 名称 + namespace: default # 命名空间 +spec: # 中间件配置 + stripPrefix: # 去除路径前缀 + prefixes: # 前缀列表 + - /demo-m4 # 本矩阵路径前缀 --- -apiVersion: traefik.io/v1alpha1 -kind: IngressRoute -metadata: - name: nginx-m4 - namespace: default -spec: - entryPoints: - - web - routes: - - match: PathPrefix(`/demo-m4`) - kind: Rule - middlewares: - - name: stripprefix-m4 - services: - - name: nginx-m4 - port: 80 +apiVersion: traefik.io/v1alpha1 # IngressRoute API 版本 +kind: IngressRoute # Traefik 路由 CRD +metadata: # IngressRoute 标识 + name: nginx-m4 # 路由名称 + namespace: default # 命名空间 +spec: # IngressRoute 规则 + entryPoints: # 入口点列表 + - web # web(HTTP) + routes: # 路由列表 + - match: PathPrefix(`/demo-m4`) # 匹配 /demo-m4 前缀 + kind: Rule # 规则类型 + middlewares: # 绑定中间件 + - name: stripprefix-m4 # 需要去前缀 + services: # 后端服务列表 + - name: nginx-m4 # Service 名称 + port: 80 # Service 端口 diff --git a/ansible/files/nodejs-demo/04-01-nodejs-demo.yaml b/ansible/files/nodejs-demo/04-01-nodejs-demo.yaml index 5deff0a..b5ed464 100644 --- a/ansible/files/nodejs-demo/04-01-nodejs-demo.yaml +++ b/ansible/files/nodejs-demo/04-01-nodejs-demo.yaml @@ -1,54 +1,54 @@ # 对应文档:docs/04-01-k3s-nodejs-高级部署.md # 累积:基线(Deployment + Service + Ingress) -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nodejs-demo - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: nodejs-demo - template: - metadata: - labels: - app: nodejs-demo - spec: - containers: - - name: nodejs-demo - image: node:18-alpine - command: ["node", "-e", "require('http').createServer((req,res)=>res.end('Hello World from Node.js')).listen(3000)"] - ports: - - containerPort: 3000 +apiVersion: apps/v1 # Deployment API 版本 +kind: Deployment # 工作负载:Deployment +metadata: # Deployment 元信息 + name: nodejs-demo # Deployment 名称 + namespace: default # 命名空间 +spec: # Deployment 规格 + replicas: 1 # 副本数 + selector: # Deployment 选择器 + matchLabels: # 标签匹配集合 + app: nodejs-demo # 匹配 app=nodejs-demo 的 Pod + template: # Pod 模板 + metadata: # Pod 元信息 + labels: # Pod 标签 + app: nodejs-demo # 与 selector.matchLabels 对齐 + spec: # Pod 规格 + containers: # 容器列表 + - name: nodejs-demo # 容器名 + image: node:18-alpine # Node.js 镜像 + command: ["node", "-e", "require('http').createServer((req,res)=>res.end('Hello World from Node.js')).listen(3000)"] # 直接运行内联 Node.js HTTP 服务 + ports: # 容器端口 + - containerPort: 3000 # 应用监听端口 --- -apiVersion: v1 -kind: Service -metadata: - name: nodejs-demo - namespace: default -spec: - selector: - app: nodejs-demo - ports: - - port: 80 - targetPort: 3000 +apiVersion: v1 # Service API 版本 +kind: Service # Service 资源 +metadata: # Service 元信息 + name: nodejs-demo # Service 名称 + namespace: default # 命名空间 +spec: # Service 规格 + selector: # 选择后端 Pod + app: nodejs-demo # 选中 app=nodejs-demo + ports: # 端口映射 + - port: 80 # Service 暴露端口 + targetPort: 3000 # 转发到容器端口 --- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: nodejs-demo - namespace: default - annotations: - traefik.ingress.kubernetes.io/router.entrypoints: web -spec: - rules: - - http: - paths: - - path: /node - pathType: Prefix - backend: - service: - name: nodejs-demo - port: - number: 80 +apiVersion: networking.k8s.io/v1 # Ingress API 版本 +kind: Ingress # Ingress 资源 +metadata: # Ingress 元信息 + name: nodejs-demo # Ingress 名称 + namespace: default # 命名空间 + annotations: # Traefik 注解 + traefik.ingress.kubernetes.io/router.entrypoints: web # 使用 web(HTTP) 入口 +spec: # Ingress 规则 + rules: # 规则列表 + - http: # HTTP 路由 + paths: # 路径列表 + - path: /node # 匹配路径前缀 + pathType: Prefix # 前缀匹配 + backend: # 后端目标 + service: # 后端 Service + name: nodejs-demo # Service 名称 + port: # Service 端口 + number: 80 # 端口号 diff --git a/ansible/files/nodejs-demo/04-02-nodejs-demo.yaml b/ansible/files/nodejs-demo/04-02-nodejs-demo.yaml index 3050dfc..c2773b8 100644 --- a/ansible/files/nodejs-demo/04-02-nodejs-demo.yaml +++ b/ansible/files/nodejs-demo/04-02-nodejs-demo.yaml @@ -1,58 +1,58 @@ # 对应文档:docs/04-02-nodejs-镜像与运行命令.md # 累积:04-01 + 固定镜像 tag、imagePullPolicy、command/args -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nodejs-demo - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: nodejs-demo - template: - metadata: - labels: - app: nodejs-demo - spec: - containers: - - name: nodejs-demo - image: node:18.20-alpine - imagePullPolicy: IfNotPresent - command: ["node"] - args: - - "-e" - - "require('http').createServer((req,res)=>res.end('Hello from pinned image')).listen(3000)" - ports: - - containerPort: 3000 +apiVersion: apps/v1 # Deployment API 版本 +kind: Deployment # 工作负载:Deployment +metadata: # Deployment 元信息 + name: nodejs-demo # Deployment 名称 + namespace: default # 命名空间 +spec: # Deployment 规格 + replicas: 1 # 副本数 + selector: # Deployment 选择器 + matchLabels: # 标签匹配集合 + app: nodejs-demo # 匹配 app=nodejs-demo 的 Pod + template: # Pod 模板 + metadata: # Pod 元信息 + labels: # Pod 标签 + app: nodejs-demo # 与 selector.matchLabels 对齐 + spec: # Pod 规格 + containers: # 容器列表 + - name: nodejs-demo # 容器名 + image: node:18.20-alpine # 固定 tag 的 Node.js 镜像 + imagePullPolicy: IfNotPresent # 拉取策略:本地有则不重复拉取 + command: ["node"] # 主命令 + args: # 命令参数 + - "-e" # 执行内联脚本 + - "require('http').createServer((req,res)=>res.end('Hello from pinned image')).listen(3000)" # Node.js 内联服务逻辑 + ports: # 容器端口 + - containerPort: 3000 # 应用监听端口 --- -apiVersion: v1 -kind: Service -metadata: - name: nodejs-demo - namespace: default -spec: - selector: - app: nodejs-demo - ports: - - port: 80 - targetPort: 3000 +apiVersion: v1 # Service API 版本 +kind: Service # Service 资源 +metadata: # Service 元信息 + name: nodejs-demo # Service 名称 + namespace: default # 命名空间 +spec: # Service 规格 + selector: # 选择后端 Pod + app: nodejs-demo # 选中 app=nodejs-demo + ports: # 端口映射 + - port: 80 # Service 暴露端口 + targetPort: 3000 # 转发到容器端口 --- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: nodejs-demo - namespace: default - annotations: - traefik.ingress.kubernetes.io/router.entrypoints: web -spec: - rules: - - http: - paths: - - path: /node - pathType: Prefix - backend: - service: - name: nodejs-demo - port: - number: 80 +apiVersion: networking.k8s.io/v1 # Ingress API 版本 +kind: Ingress # Ingress 资源 +metadata: # Ingress 元信息 + name: nodejs-demo # Ingress 名称 + namespace: default # 命名空间 + annotations: # Traefik 注解 + traefik.ingress.kubernetes.io/router.entrypoints: web # 使用 web(HTTP) 入口 +spec: # Ingress 规则 + rules: # 规则列表 + - http: # HTTP 路由 + paths: # 路径列表 + - path: /node # 匹配路径前缀 + pathType: Prefix # 前缀匹配 + backend: # 后端目标 + service: # 后端 Service + name: nodejs-demo # Service 名称 + port: # Service 端口 + number: 80 # 端口号 diff --git a/ansible/files/nodejs-demo/04-03-nodejs-demo.yaml b/ansible/files/nodejs-demo/04-03-nodejs-demo.yaml index 00f6c4a..9308aa6 100644 --- a/ansible/files/nodejs-demo/04-03-nodejs-demo.yaml +++ b/ansible/files/nodejs-demo/04-03-nodejs-demo.yaml @@ -1,75 +1,75 @@ # 对应文档:docs/04-03-nodejs-环境变量与配置注入.md # 累积:04-02 + ConfigMap + 通过 env 注入 APP_MSG(镜像仍用 18.20-alpine 与 04-02 一致) -apiVersion: v1 -kind: ConfigMap -metadata: - name: nodejs-demo-config - namespace: default -data: - APP_MSG: "Hello from ConfigMap" +apiVersion: v1 # ConfigMap API 版本 +kind: ConfigMap # 配置资源:ConfigMap +metadata: # ConfigMap 元信息 + name: nodejs-demo-config # ConfigMap 名称 + namespace: default # 命名空间 +data: # 配置键值 + APP_MSG: "Hello from ConfigMap" # 注入给应用的消息内容 --- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nodejs-demo - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: nodejs-demo - template: - metadata: - labels: - app: nodejs-demo - spec: - containers: - - name: nodejs-demo - image: node:18.20-alpine - imagePullPolicy: IfNotPresent - env: - - name: APP_MSG - valueFrom: - configMapKeyRef: - name: nodejs-demo-config - key: APP_MSG - command: - - node - - "-e" - - | +apiVersion: apps/v1 # Deployment API 版本 +kind: Deployment # 工作负载:Deployment +metadata: # Deployment 元信息 + name: nodejs-demo # Deployment 名称 + namespace: default # 命名空间 +spec: # Deployment 规格 + replicas: 1 # 副本数 + selector: # Deployment 选择器 + matchLabels: # 标签匹配集合 + app: nodejs-demo # 匹配 app=nodejs-demo 的 Pod + template: # Pod 模板 + metadata: # Pod 元信息 + labels: # Pod 标签 + app: nodejs-demo # 与 selector.matchLabels 对齐 + spec: # Pod 规格 + containers: # 容器列表 + - name: nodejs-demo # 容器名 + image: node:18.20-alpine # Node.js 镜像 + imagePullPolicy: IfNotPresent # 拉取策略 + env: # 环境变量注入 + - name: APP_MSG # 环境变量名 + valueFrom: # 从资源引用取值 + configMapKeyRef: # 从 ConfigMap key 读取 + name: nodejs-demo-config # ConfigMap 名称 + key: APP_MSG # ConfigMap 键名 + command: # 启动命令 + - node # 运行 node + - "-e" # 执行内联脚本 + - | # 多行 JS 脚本(内部内容不改动) const http=require('http'); const msg=process.env.APP_MSG||'no env'; http.createServer((q,s)=>s.end(msg)).listen(3000); - ports: - - containerPort: 3000 + ports: # 容器端口 + - containerPort: 3000 # 应用监听端口 --- -apiVersion: v1 -kind: Service -metadata: - name: nodejs-demo - namespace: default -spec: - selector: - app: nodejs-demo - ports: - - port: 80 - targetPort: 3000 +apiVersion: v1 # Service API 版本 +kind: Service # Service 资源 +metadata: # Service 元信息 + name: nodejs-demo # Service 名称 + namespace: default # 命名空间 +spec: # Service 规格 + selector: # 选择后端 Pod + app: nodejs-demo # 选中 app=nodejs-demo + ports: # 端口映射 + - port: 80 # Service 暴露端口 + targetPort: 3000 # 转发到容器端口 --- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: nodejs-demo - namespace: default - annotations: - traefik.ingress.kubernetes.io/router.entrypoints: web -spec: - rules: - - http: - paths: - - path: /node - pathType: Prefix - backend: - service: - name: nodejs-demo - port: - number: 80 +apiVersion: networking.k8s.io/v1 # Ingress API 版本 +kind: Ingress # Ingress 资源 +metadata: # Ingress 元信息 + name: nodejs-demo # Ingress 名称 + namespace: default # 命名空间 + annotations: # Traefik 注解 + traefik.ingress.kubernetes.io/router.entrypoints: web # 使用 web(HTTP) 入口 +spec: # Ingress 规则 + rules: # 规则列表 + - http: # HTTP 路由 + paths: # 路径列表 + - path: /node # 匹配路径前缀 + pathType: Prefix # 前缀匹配 + backend: # 后端目标 + service: # 后端 Service + name: nodejs-demo # Service 名称 + port: # Service 端口 + number: 80 # 端口号 diff --git a/ansible/files/nodejs-demo/04-04-nodejs-demo.yaml b/ansible/files/nodejs-demo/04-04-nodejs-demo.yaml index 85779af..3313608 100644 --- a/ansible/files/nodejs-demo/04-04-nodejs-demo.yaml +++ b/ansible/files/nodejs-demo/04-04-nodejs-demo.yaml @@ -1,75 +1,75 @@ # 对应文档:docs/04-04-nodejs-端口与Service.md # 累积:04-03 + 容器与进程改监听 8080,Service targetPort 对齐 -apiVersion: v1 -kind: ConfigMap -metadata: - name: nodejs-demo-config - namespace: default -data: - APP_MSG: "Hello from ConfigMap" +apiVersion: v1 # ConfigMap API 版本 +kind: ConfigMap # 配置资源:ConfigMap +metadata: # ConfigMap 元信息 + name: nodejs-demo-config # ConfigMap 名称 + namespace: default # 命名空间 +data: # 配置键值 + APP_MSG: "Hello from ConfigMap" # 注入给应用的消息内容 --- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nodejs-demo - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: nodejs-demo - template: - metadata: - labels: - app: nodejs-demo - spec: - containers: - - name: nodejs-demo - image: node:18.20-alpine - imagePullPolicy: IfNotPresent - env: - - name: APP_MSG - valueFrom: - configMapKeyRef: - name: nodejs-demo-config - key: APP_MSG - command: - - node - - "-e" - - | +apiVersion: apps/v1 # Deployment API 版本 +kind: Deployment # 工作负载:Deployment +metadata: # Deployment 元信息 + name: nodejs-demo # Deployment 名称 + namespace: default # 命名空间 +spec: # Deployment 规格 + replicas: 1 # 副本数 + selector: # Deployment 选择器 + matchLabels: # 标签匹配集合 + app: nodejs-demo # 匹配 app=nodejs-demo 的 Pod + template: # Pod 模板 + metadata: # Pod 元信息 + labels: # Pod 标签 + app: nodejs-demo # 与 selector.matchLabels 对齐 + spec: # Pod 规格 + containers: # 容器列表 + - name: nodejs-demo # 容器名 + image: node:18.20-alpine # Node.js 镜像 + imagePullPolicy: IfNotPresent # 拉取策略 + env: # 环境变量注入 + - name: APP_MSG # 环境变量名 + valueFrom: # 从资源引用取值 + configMapKeyRef: # 从 ConfigMap key 读取 + name: nodejs-demo-config # ConfigMap 名称 + key: APP_MSG # ConfigMap 键名 + command: # 启动命令 + - node # 运行 node + - "-e" # 执行内联脚本 + - | # 多行 JS 脚本(内部内容不改动) const http=require('http'); const msg=process.env.APP_MSG||'no env'; http.createServer((q,s)=>s.end(msg)).listen(8080); - ports: - - containerPort: 8080 + ports: # 容器端口 + - containerPort: 8080 # 应用监听端口 --- -apiVersion: v1 -kind: Service -metadata: - name: nodejs-demo - namespace: default -spec: - selector: - app: nodejs-demo - ports: - - port: 80 - targetPort: 8080 +apiVersion: v1 # Service API 版本 +kind: Service # Service 资源 +metadata: # Service 元信息 + name: nodejs-demo # Service 名称 + namespace: default # 命名空间 +spec: # Service 规格 + selector: # 选择后端 Pod + app: nodejs-demo # 选中 app=nodejs-demo + ports: # 端口映射 + - port: 80 # Service 暴露端口 + targetPort: 8080 # 转发到容器端口 --- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: nodejs-demo - namespace: default - annotations: - traefik.ingress.kubernetes.io/router.entrypoints: web -spec: - rules: - - http: - paths: - - path: /node - pathType: Prefix - backend: - service: - name: nodejs-demo - port: - number: 80 +apiVersion: networking.k8s.io/v1 # Ingress API 版本 +kind: Ingress # Ingress 资源 +metadata: # Ingress 元信息 + name: nodejs-demo # Ingress 名称 + namespace: default # 命名空间 + annotations: # Traefik 注解 + traefik.ingress.kubernetes.io/router.entrypoints: web # 使用 web(HTTP) 入口 +spec: # Ingress 规则 + rules: # 规则列表 + - http: # HTTP 路由 + paths: # 路径列表 + - path: /node # 匹配路径前缀 + pathType: Prefix # 前缀匹配 + backend: # 后端目标 + service: # 后端 Service + name: nodejs-demo # Service 名称 + port: # Service 端口 + number: 80 # 端口号 diff --git a/ansible/files/nodejs-demo/04-05-nodejs-demo.yaml b/ansible/files/nodejs-demo/04-05-nodejs-demo.yaml index cbec4a6..9ce0ea6 100644 --- a/ansible/files/nodejs-demo/04-05-nodejs-demo.yaml +++ b/ansible/files/nodejs-demo/04-05-nodejs-demo.yaml @@ -1,82 +1,82 @@ # 对应文档:docs/04-05-nodejs-资源请求与限制.md # 累积:04-04 + resources.requests/limits -apiVersion: v1 -kind: ConfigMap -metadata: - name: nodejs-demo-config - namespace: default -data: - APP_MSG: "Hello from ConfigMap" +apiVersion: v1 # ConfigMap API 版本 +kind: ConfigMap # 配置资源:ConfigMap +metadata: # ConfigMap 元信息 + name: nodejs-demo-config # ConfigMap 名称 + namespace: default # 命名空间 +data: # 配置键值 + APP_MSG: "Hello from ConfigMap" # 注入给应用的消息内容 --- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nodejs-demo - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: nodejs-demo - template: - metadata: - labels: - app: nodejs-demo - spec: - containers: - - name: nodejs-demo - image: node:18.20-alpine - imagePullPolicy: IfNotPresent - env: - - name: APP_MSG - valueFrom: - configMapKeyRef: - name: nodejs-demo-config - key: APP_MSG - command: - - node - - "-e" - - | +apiVersion: apps/v1 # Deployment API 版本 +kind: Deployment # 工作负载:Deployment +metadata: # Deployment 元信息 + name: nodejs-demo # Deployment 名称 + namespace: default # 命名空间 +spec: # Deployment 规格 + replicas: 1 # 副本数 + selector: # Deployment 选择器 + matchLabels: # 标签匹配集合 + app: nodejs-demo # 匹配 app=nodejs-demo 的 Pod + template: # Pod 模板 + metadata: # Pod 元信息 + labels: # Pod 标签 + app: nodejs-demo # 与 selector.matchLabels 对齐 + spec: # Pod 规格 + containers: # 容器列表 + - name: nodejs-demo # 容器名 + image: node:18.20-alpine # Node.js 镜像 + imagePullPolicy: IfNotPresent # 拉取策略 + env: # 环境变量注入 + - name: APP_MSG # 环境变量名 + valueFrom: # 从资源引用取值 + configMapKeyRef: # 从 ConfigMap key 读取 + name: nodejs-demo-config # ConfigMap 名称 + key: APP_MSG # ConfigMap 键名 + command: # 启动命令 + - node # 运行 node + - "-e" # 执行内联脚本 + - | # 多行 JS 脚本(内部内容不改动) const http=require('http'); const msg=process.env.APP_MSG||'no env'; http.createServer((q,s)=>s.end(msg)).listen(8080); - ports: - - containerPort: 8080 - resources: - requests: - cpu: "50m" - memory: "64Mi" - limits: - cpu: "500m" - memory: "256Mi" + ports: # 容器端口 + - containerPort: 8080 # 应用监听端口 + resources: # 资源请求与限制 + requests: # 最小资源请求 + cpu: "50m" # 请求 CPU + memory: "64Mi" # 请求内存 + limits: # 资源上限 + cpu: "500m" # CPU 限制 + memory: "256Mi" # 内存限制 --- -apiVersion: v1 -kind: Service -metadata: - name: nodejs-demo - namespace: default -spec: - selector: - app: nodejs-demo - ports: - - port: 80 - targetPort: 8080 +apiVersion: v1 # Service API 版本 +kind: Service # Service 资源 +metadata: # Service 元信息 + name: nodejs-demo # Service 名称 + namespace: default # 命名空间 +spec: # Service 规格 + selector: # 选择后端 Pod + app: nodejs-demo # 选中 app=nodejs-demo + ports: # 端口映射 + - port: 80 # Service 暴露端口 + targetPort: 8080 # 转发到容器端口 --- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: nodejs-demo - namespace: default - annotations: - traefik.ingress.kubernetes.io/router.entrypoints: web -spec: - rules: - - http: - paths: - - path: /node - pathType: Prefix - backend: - service: - name: nodejs-demo - port: - number: 80 +apiVersion: networking.k8s.io/v1 # Ingress API 版本 +kind: Ingress # Ingress 资源 +metadata: # Ingress 元信息 + name: nodejs-demo # Ingress 名称 + namespace: default # 命名空间 + annotations: # Traefik 注解 + traefik.ingress.kubernetes.io/router.entrypoints: web # 使用 web(HTTP) 入口 +spec: # Ingress 规则 + rules: # 规则列表 + - http: # HTTP 路由 + paths: # 路径列表 + - path: /node # 匹配路径前缀 + pathType: Prefix # 前缀匹配 + backend: # 后端目标 + service: # 后端 Service + name: nodejs-demo # Service 名称 + port: # Service 端口 + number: 80 # 端口号 diff --git a/ansible/files/nodejs-demo/04-06-nodejs-demo.yaml b/ansible/files/nodejs-demo/04-06-nodejs-demo.yaml index 41cc8b7..2dbf2ad 100644 --- a/ansible/files/nodejs-demo/04-06-nodejs-demo.yaml +++ b/ansible/files/nodejs-demo/04-06-nodejs-demo.yaml @@ -1,94 +1,94 @@ # 对应文档:docs/04-06-nodejs-探针与健康检查.md # 累积:04-05 + livenessProbe/readinessProbe(端口 8080,路径 /) -apiVersion: v1 -kind: ConfigMap -metadata: - name: nodejs-demo-config - namespace: default -data: - APP_MSG: "Hello from ConfigMap" +apiVersion: v1 # ConfigMap API 版本 +kind: ConfigMap # 配置资源:ConfigMap +metadata: # ConfigMap 元信息 + name: nodejs-demo-config # ConfigMap 名称 + namespace: default # 命名空间 +data: # 配置键值 + APP_MSG: "Hello from ConfigMap" # 注入给应用的消息内容 --- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nodejs-demo - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: nodejs-demo - template: - metadata: - labels: - app: nodejs-demo - spec: - containers: - - name: nodejs-demo - image: node:18.20-alpine - imagePullPolicy: IfNotPresent - env: - - name: APP_MSG - valueFrom: - configMapKeyRef: - name: nodejs-demo-config - key: APP_MSG - command: - - node - - "-e" - - | +apiVersion: apps/v1 # Deployment API 版本 +kind: Deployment # 工作负载:Deployment +metadata: # Deployment 元信息 + name: nodejs-demo # Deployment 名称 + namespace: default # 命名空间 +spec: # Deployment 规格 + replicas: 1 # 副本数 + selector: # Deployment 选择器 + matchLabels: # 标签匹配集合 + app: nodejs-demo # 匹配 app=nodejs-demo 的 Pod + template: # Pod 模板 + metadata: # Pod 元信息 + labels: # Pod 标签 + app: nodejs-demo # 与 selector.matchLabels 对齐 + spec: # Pod 规格 + containers: # 容器列表 + - name: nodejs-demo # 容器名 + image: node:18.20-alpine # Node.js 镜像 + imagePullPolicy: IfNotPresent # 拉取策略 + env: # 环境变量注入 + - name: APP_MSG # 环境变量名 + valueFrom: # 从资源引用取值 + configMapKeyRef: # 从 ConfigMap key 读取 + name: nodejs-demo-config # ConfigMap 名称 + key: APP_MSG # ConfigMap 键名 + command: # 启动命令 + - node # 运行 node + - "-e" # 执行内联脚本 + - | # 多行 JS 脚本(内部内容不改动) const http=require('http'); const msg=process.env.APP_MSG||'no env'; http.createServer((q,s)=>s.end(msg)).listen(8080); - ports: - - containerPort: 8080 - resources: - requests: - cpu: "50m" - memory: "64Mi" - limits: - cpu: "500m" - memory: "256Mi" - livenessProbe: - httpGet: - path: / - port: 8080 - initialDelaySeconds: 3 - periodSeconds: 10 - readinessProbe: - httpGet: - path: / - port: 8080 - initialDelaySeconds: 2 - periodSeconds: 5 + ports: # 容器端口 + - containerPort: 8080 # 应用监听端口 + resources: # 资源请求与限制 + requests: # 最小资源请求 + cpu: "50m" # 请求 CPU + memory: "64Mi" # 请求内存 + limits: # 资源上限 + cpu: "500m" # CPU 限制 + memory: "256Mi" # 内存限制 + livenessProbe: # 存活探针(判断容器是否需要重启) + httpGet: # 通过 HTTP 探测 + path: / # 探测路径 + port: 8080 # 探测端口 + initialDelaySeconds: 3 # 启动后首次探测延迟 + periodSeconds: 10 # 探测周期 + readinessProbe: # 就绪探针(判断是否接收流量) + httpGet: # 通过 HTTP 探测 + path: / # 探测路径 + port: 8080 # 探测端口 + initialDelaySeconds: 2 # 启动后首次探测延迟 + periodSeconds: 5 # 探测周期 --- -apiVersion: v1 -kind: Service -metadata: - name: nodejs-demo - namespace: default -spec: - selector: - app: nodejs-demo - ports: - - port: 80 - targetPort: 8080 +apiVersion: v1 # Service API 版本 +kind: Service # Service 资源 +metadata: # Service 元信息 + name: nodejs-demo # Service 名称 + namespace: default # 命名空间 +spec: # Service 规格 + selector: # 选择后端 Pod + app: nodejs-demo # 选中 app=nodejs-demo + ports: # 端口映射 + - port: 80 # Service 暴露端口 + targetPort: 8080 # 转发到容器端口 --- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: nodejs-demo - namespace: default - annotations: - traefik.ingress.kubernetes.io/router.entrypoints: web -spec: - rules: - - http: - paths: - - path: /node - pathType: Prefix - backend: - service: - name: nodejs-demo - port: - number: 80 +apiVersion: networking.k8s.io/v1 # Ingress API 版本 +kind: Ingress # Ingress 资源 +metadata: # Ingress 元信息 + name: nodejs-demo # Ingress 名称 + namespace: default # 命名空间 + annotations: # Traefik 注解 + traefik.ingress.kubernetes.io/router.entrypoints: web # 使用 web(HTTP) 入口 +spec: # Ingress 规则 + rules: # 规则列表 + - http: # HTTP 路由 + paths: # 路径列表 + - path: /node # 匹配路径前缀 + pathType: Prefix # 前缀匹配 + backend: # 后端目标 + service: # 后端 Service + name: nodejs-demo # Service 名称 + port: # Service 端口 + number: 80 # 端口号 diff --git a/ansible/files/nodejs-demo/04-07-nodejs-demo.yaml b/ansible/files/nodejs-demo/04-07-nodejs-demo.yaml index 6f5d486..6a36304 100644 --- a/ansible/files/nodejs-demo/04-07-nodejs-demo.yaml +++ b/ansible/files/nodejs-demo/04-07-nodejs-demo.yaml @@ -1,96 +1,96 @@ # 对应文档:docs/04-07-nodejs-调度与亲和.md # 累积:04-06 + nodeSelector(默认 ylc62,请改为本集群节点短主机名) -apiVersion: v1 -kind: ConfigMap -metadata: - name: nodejs-demo-config - namespace: default -data: - APP_MSG: "Hello from ConfigMap" +apiVersion: v1 # ConfigMap API 版本 +kind: ConfigMap # 配置资源:ConfigMap +metadata: # ConfigMap 元信息 + name: nodejs-demo-config # ConfigMap 名称 + namespace: default # 命名空间 +data: # 配置键值 + APP_MSG: "Hello from ConfigMap" # 注入给应用的消息内容 --- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nodejs-demo - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: nodejs-demo - template: - metadata: - labels: - app: nodejs-demo - spec: - nodeSelector: - kubernetes.io/hostname: ylc62 - containers: - - name: nodejs-demo - image: node:18.20-alpine - imagePullPolicy: IfNotPresent - env: - - name: APP_MSG - valueFrom: - configMapKeyRef: - name: nodejs-demo-config - key: APP_MSG - command: - - node - - "-e" - - | +apiVersion: apps/v1 # Deployment API 版本 +kind: Deployment # 工作负载:Deployment +metadata: # Deployment 元信息 + name: nodejs-demo # Deployment 名称 + namespace: default # 命名空间 +spec: # Deployment 规格 + replicas: 1 # 副本数 + selector: # Deployment 选择器 + matchLabels: # 标签匹配集合 + app: nodejs-demo # 匹配 app=nodejs-demo 的 Pod + template: # Pod 模板 + metadata: # Pod 元信息 + labels: # Pod 标签 + app: nodejs-demo # 与 selector.matchLabels 对齐 + spec: # Pod 规格 + nodeSelector: # 调度到指定节点 + kubernetes.io/hostname: ylc62 # 节点主机名(按实际修改) + containers: # 容器列表 + - name: nodejs-demo # 容器名 + image: node:18.20-alpine # Node.js 镜像 + imagePullPolicy: IfNotPresent # 拉取策略 + env: # 环境变量注入 + - name: APP_MSG # 环境变量名 + valueFrom: # 从资源引用取值 + configMapKeyRef: # 从 ConfigMap key 读取 + name: nodejs-demo-config # ConfigMap 名称 + key: APP_MSG # ConfigMap 键名 + command: # 启动命令 + - node # 运行 node + - "-e" # 执行内联脚本 + - | # 多行 JS 脚本(内部内容不改动) const http=require('http'); const msg=process.env.APP_MSG||'no env'; http.createServer((q,s)=>s.end(msg)).listen(8080); - ports: - - containerPort: 8080 - resources: - requests: - cpu: "50m" - memory: "64Mi" - limits: - cpu: "500m" - memory: "256Mi" - livenessProbe: - httpGet: - path: / - port: 8080 - initialDelaySeconds: 3 - periodSeconds: 10 - readinessProbe: - httpGet: - path: / - port: 8080 - initialDelaySeconds: 2 - periodSeconds: 5 + ports: # 容器端口 + - containerPort: 8080 # 应用监听端口 + resources: # 资源请求与限制 + requests: # 最小资源请求 + cpu: "50m" # 请求 CPU + memory: "64Mi" # 请求内存 + limits: # 资源上限 + cpu: "500m" # CPU 限制 + memory: "256Mi" # 内存限制 + livenessProbe: # 存活探针 + httpGet: # HTTP 探测 + path: / # 探测路径 + port: 8080 # 探测端口 + initialDelaySeconds: 3 # 启动后首次探测延迟 + periodSeconds: 10 # 探测周期 + readinessProbe: # 就绪探针 + httpGet: # HTTP 探测 + path: / # 探测路径 + port: 8080 # 探测端口 + initialDelaySeconds: 2 # 启动后首次探测延迟 + periodSeconds: 5 # 探测周期 --- -apiVersion: v1 -kind: Service -metadata: - name: nodejs-demo - namespace: default -spec: - selector: - app: nodejs-demo - ports: - - port: 80 - targetPort: 8080 +apiVersion: v1 # Service API 版本 +kind: Service # Service 资源 +metadata: # Service 元信息 + name: nodejs-demo # Service 名称 + namespace: default # 命名空间 +spec: # Service 规格 + selector: # 选择后端 Pod + app: nodejs-demo # 选中 app=nodejs-demo + ports: # 端口映射 + - port: 80 # Service 暴露端口 + targetPort: 8080 # 转发到容器端口 --- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: nodejs-demo - namespace: default - annotations: - traefik.ingress.kubernetes.io/router.entrypoints: web -spec: - rules: - - http: - paths: - - path: /node - pathType: Prefix - backend: - service: - name: nodejs-demo - port: - number: 80 +apiVersion: networking.k8s.io/v1 # Ingress API 版本 +kind: Ingress # Ingress 资源 +metadata: # Ingress 元信息 + name: nodejs-demo # Ingress 名称 + namespace: default # 命名空间 + annotations: # Traefik 注解 + traefik.ingress.kubernetes.io/router.entrypoints: web # 使用 web(HTTP) 入口 +spec: # Ingress 规则 + rules: # 规则列表 + - http: # HTTP 路由 + paths: # 路径列表 + - path: /node # 匹配路径前缀 + pathType: Prefix # 前缀匹配 + backend: # 后端目标 + service: # 后端 Service + name: nodejs-demo # Service 名称 + port: # Service 端口 + number: 80 # 端口号 diff --git a/ansible/files/nodejs-demo/04-08-nodejs-demo.yaml b/ansible/files/nodejs-demo/04-08-nodejs-demo.yaml index 06fc25f..a327e25 100644 --- a/ansible/files/nodejs-demo/04-08-nodejs-demo.yaml +++ b/ansible/files/nodejs-demo/04-08-nodejs-demo.yaml @@ -1,109 +1,109 @@ # 对应文档:docs/04-08-nodejs-安全上下文.md # 累积:04-07 + pod securityContext.fsGroup、容器 securityContext、只读根、/tmp emptyDir -apiVersion: v1 -kind: ConfigMap -metadata: - name: nodejs-demo-config - namespace: default -data: - APP_MSG: "Hello from ConfigMap" +apiVersion: v1 # ConfigMap API 版本 +kind: ConfigMap # 配置资源:ConfigMap +metadata: # ConfigMap 元信息 + name: nodejs-demo-config # ConfigMap 名称 + namespace: default # 命名空间 +data: # 配置键值 + APP_MSG: "Hello from ConfigMap" # 注入给应用的消息内容 --- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nodejs-demo - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: nodejs-demo - template: - metadata: - labels: - app: nodejs-demo - spec: - nodeSelector: - kubernetes.io/hostname: ylc62 - securityContext: - fsGroup: 1000 - containers: - - name: nodejs-demo - image: node:18.20-alpine - imagePullPolicy: IfNotPresent - securityContext: - allowPrivilegeEscalation: false - runAsNonRoot: true - runAsUser: 1000 - readOnlyRootFilesystem: true - env: - - name: APP_MSG - valueFrom: - configMapKeyRef: - name: nodejs-demo-config - key: APP_MSG - command: - - node - - "-e" - - | +apiVersion: apps/v1 # Deployment API 版本 +kind: Deployment # 工作负载:Deployment +metadata: # Deployment 元信息 + name: nodejs-demo # Deployment 名称 + namespace: default # 命名空间 +spec: # Deployment 规格 + replicas: 1 # 副本数 + selector: # Deployment 选择器 + matchLabels: # 标签匹配集合 + app: nodejs-demo # 匹配 app=nodejs-demo 的 Pod + template: # Pod 模板 + metadata: # Pod 元信息 + labels: # Pod 标签 + app: nodejs-demo # 与 selector.matchLabels 对齐 + spec: # Pod 规格 + nodeSelector: # 调度到指定节点 + kubernetes.io/hostname: ylc62 # 节点主机名(按实际修改) + securityContext: # Pod 级安全上下文 + fsGroup: 1000 # 挂载卷文件组 ID + containers: # 容器列表 + - name: nodejs-demo # 容器名 + image: node:18.20-alpine # Node.js 镜像 + imagePullPolicy: IfNotPresent # 拉取策略 + securityContext: # 容器级安全上下文 + allowPrivilegeEscalation: false # 禁止提权 + runAsNonRoot: true # 强制非 root 运行 + runAsUser: 1000 # 运行用户 UID + readOnlyRootFilesystem: true # 根文件系统只读 + env: # 环境变量注入 + - name: APP_MSG # 环境变量名 + valueFrom: # 从资源引用取值 + configMapKeyRef: # 从 ConfigMap key 读取 + name: nodejs-demo-config # ConfigMap 名称 + key: APP_MSG # ConfigMap 键名 + command: # 启动命令 + - node # 运行 node + - "-e" # 执行内联脚本 + - | # 多行 JS 脚本(内部内容不改动) const http=require('http'); const msg=process.env.APP_MSG||'no env'; http.createServer((q,s)=>s.end(msg)).listen(8080); - ports: - - containerPort: 8080 - resources: - requests: - cpu: "50m" - memory: "64Mi" - limits: - cpu: "500m" - memory: "256Mi" - livenessProbe: - httpGet: - path: / - port: 8080 - initialDelaySeconds: 3 - periodSeconds: 10 - readinessProbe: - httpGet: - path: / - port: 8080 - initialDelaySeconds: 2 - periodSeconds: 5 - volumeMounts: - - name: tmp - mountPath: /tmp - volumes: - - name: tmp - emptyDir: {} + ports: # 容器端口 + - containerPort: 8080 # 应用监听端口 + resources: # 资源请求与限制 + requests: # 最小资源请求 + cpu: "50m" # 请求 CPU + memory: "64Mi" # 请求内存 + limits: # 资源上限 + cpu: "500m" # CPU 限制 + memory: "256Mi" # 内存限制 + livenessProbe: # 存活探针 + httpGet: # HTTP 探测 + path: / # 探测路径 + port: 8080 # 探测端口 + initialDelaySeconds: 3 # 启动后首次探测延迟 + periodSeconds: 10 # 探测周期 + readinessProbe: # 就绪探针 + httpGet: # HTTP 探测 + path: / # 探测路径 + port: 8080 # 探测端口 + initialDelaySeconds: 2 # 启动后首次探测延迟 + periodSeconds: 5 # 探测周期 + volumeMounts: # 卷挂载 + - name: tmp # 引用临时卷 + mountPath: /tmp # 容器内临时目录 + volumes: # 卷定义 + - name: tmp # 临时卷名称 + emptyDir: {} # 空目录卷(Pod 生命周期内) --- -apiVersion: v1 -kind: Service -metadata: - name: nodejs-demo - namespace: default -spec: - selector: - app: nodejs-demo - ports: - - port: 80 - targetPort: 8080 +apiVersion: v1 # Service API 版本 +kind: Service # Service 资源 +metadata: # Service 元信息 + name: nodejs-demo # Service 名称 + namespace: default # 命名空间 +spec: # Service 规格 + selector: # 选择后端 Pod + app: nodejs-demo # 选中 app=nodejs-demo + ports: # 端口映射 + - port: 80 # Service 暴露端口 + targetPort: 8080 # 转发到容器端口 --- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: nodejs-demo - namespace: default - annotations: - traefik.ingress.kubernetes.io/router.entrypoints: web -spec: - rules: - - http: - paths: - - path: /node - pathType: Prefix - backend: - service: - name: nodejs-demo - port: - number: 80 +apiVersion: networking.k8s.io/v1 # Ingress API 版本 +kind: Ingress # Ingress 资源 +metadata: # Ingress 元信息 + name: nodejs-demo # Ingress 名称 + namespace: default # 命名空间 + annotations: # Traefik 注解 + traefik.ingress.kubernetes.io/router.entrypoints: web # 使用 web(HTTP) 入口 +spec: # Ingress 规则 + rules: # 规则列表 + - http: # HTTP 路由 + paths: # 路径列表 + - path: /node # 匹配路径前缀 + pathType: Prefix # 前缀匹配 + backend: # 后端目标 + service: # 后端 Service + name: nodejs-demo # Service 名称 + port: # Service 端口 + number: 80 # 端口号 diff --git a/ansible/files/nodejs-demo/04-09-nodejs-demo.yaml b/ansible/files/nodejs-demo/04-09-nodejs-demo.yaml index 69c5b63..5a99c3b 100644 --- a/ansible/files/nodejs-demo/04-09-nodejs-demo.yaml +++ b/ansible/files/nodejs-demo/04-09-nodejs-demo.yaml @@ -1,127 +1,127 @@ # 对应文档:docs/04-09-nodejs-存储与卷.md # 累积:04-08 + PVC nodejs-demo-data(默认 storageClassName: local-path,可按集群改为 longhorn 等)+ 挂载 /data -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: nodejs-demo-data - namespace: default -spec: - accessModes: - - ReadWriteOnce - storageClassName: local-path - resources: - requests: - storage: 1Gi +apiVersion: v1 # PVC API 版本 +kind: PersistentVolumeClaim # 持久卷声明 +metadata: # PVC 元信息 + name: nodejs-demo-data # PVC 名称 + namespace: default # 命名空间 +spec: # PVC 规格 + accessModes: # 访问模式 + - ReadWriteOnce # RWO:同一时间仅单节点挂载读写 + storageClassName: local-path # 存储类(按集群可改) + resources: # 资源请求 + requests: # 配额请求 + storage: 1Gi # 申请容量 --- -apiVersion: v1 -kind: ConfigMap -metadata: - name: nodejs-demo-config - namespace: default -data: - APP_MSG: "Hello from ConfigMap" +apiVersion: v1 # ConfigMap API 版本 +kind: ConfigMap # 配置资源 +metadata: # ConfigMap 元信息 + name: nodejs-demo-config # ConfigMap 名称 + namespace: default # 命名空间 +data: # 配置键值 + APP_MSG: "Hello from ConfigMap" # 示例消息内容 --- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nodejs-demo - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: nodejs-demo - template: - metadata: - labels: - app: nodejs-demo - spec: - nodeSelector: - kubernetes.io/hostname: ylc62 - securityContext: - fsGroup: 1000 - containers: - - name: nodejs-demo - image: node:18.20-alpine - imagePullPolicy: IfNotPresent - securityContext: - allowPrivilegeEscalation: false - runAsNonRoot: true - runAsUser: 1000 - readOnlyRootFilesystem: true - env: - - name: APP_MSG - valueFrom: - configMapKeyRef: - name: nodejs-demo-config - key: APP_MSG - command: - - node - - "-e" - - | +apiVersion: apps/v1 # Deployment API 版本 +kind: Deployment # 工作负载:Deployment +metadata: # Deployment 元信息 + name: nodejs-demo # Deployment 名称 + namespace: default # 命名空间 +spec: # Deployment 规格 + replicas: 1 # 副本数 + selector: # Pod 选择器 + matchLabels: # 标签匹配集合 + app: nodejs-demo # 匹配 app=nodejs-demo 的 Pod + template: # Pod 模板 + metadata: # Pod 元信息 + labels: # Pod 标签 + app: nodejs-demo # 与 selector.matchLabels 对齐 + spec: # Pod 规格 + nodeSelector: # 节点选择 + kubernetes.io/hostname: ylc62 # 固定到指定节点(按实际修改) + securityContext: # Pod 级安全上下文 + fsGroup: 1000 # 挂载卷文件组 ID + containers: # 容器列表 + - name: nodejs-demo # 容器名 + image: node:18.20-alpine # Node.js 镜像 + imagePullPolicy: IfNotPresent # 拉取策略 + securityContext: # 容器级安全上下文 + allowPrivilegeEscalation: false # 禁止提权 + runAsNonRoot: true # 非 root 运行 + runAsUser: 1000 # 运行用户 UID + readOnlyRootFilesystem: true # 根文件系统只读 + env: # 环境变量 + - name: APP_MSG # 环境变量名 + valueFrom: # 从引用源取值 + configMapKeyRef: # 从 ConfigMap key 读取 + name: nodejs-demo-config # ConfigMap 名称 + key: APP_MSG # ConfigMap 键名 + command: # 启动命令 + - node # 执行 node + - "-e" # 执行内联脚本 + - | # 多行 JS 脚本(内容保持原样) const http=require('http'); const msg=process.env.APP_MSG||'no env'; http.createServer((q,s)=>s.end(msg)).listen(8080); - ports: - - containerPort: 8080 - resources: - requests: - cpu: "50m" - memory: "64Mi" - limits: - cpu: "500m" - memory: "256Mi" - livenessProbe: - httpGet: - path: / - port: 8080 - initialDelaySeconds: 3 - periodSeconds: 10 - readinessProbe: - httpGet: - path: / - port: 8080 - initialDelaySeconds: 2 - periodSeconds: 5 - volumeMounts: - - name: tmp - mountPath: /tmp - - name: data - mountPath: /data - volumes: - - name: tmp - emptyDir: {} - - name: data - persistentVolumeClaim: - claimName: nodejs-demo-data + ports: # 容器端口 + - containerPort: 8080 # 监听端口 + resources: # 资源请求与限制 + requests: # 最小资源请求 + cpu: "50m" # 请求 CPU + memory: "64Mi" # 请求内存 + limits: # 资源上限 + cpu: "500m" # CPU 限制 + memory: "256Mi" # 内存限制 + livenessProbe: # 存活探针 + httpGet: # HTTP 探测 + path: / # 探测路径 + port: 8080 # 探测端口 + initialDelaySeconds: 3 # 初始延迟 + periodSeconds: 10 # 探测周期 + readinessProbe: # 就绪探针 + httpGet: # HTTP 探测 + path: / # 探测路径 + port: 8080 # 探测端口 + initialDelaySeconds: 2 # 初始延迟 + periodSeconds: 5 # 探测周期 + volumeMounts: # 卷挂载 + - name: tmp # 临时卷名称 + mountPath: /tmp # 容器内临时目录 + - name: data # 数据卷名称 + mountPath: /data # 容器内数据目录 + volumes: # 卷定义 + - name: tmp # 临时卷 + emptyDir: {} # 空目录卷 + - name: data # 数据卷 + persistentVolumeClaim: # 卷来源为 PVC + claimName: nodejs-demo-data # 绑定 PVC 名称 --- -apiVersion: v1 -kind: Service -metadata: - name: nodejs-demo - namespace: default -spec: - selector: - app: nodejs-demo - ports: - - port: 80 - targetPort: 8080 +apiVersion: v1 # Service API 版本 +kind: Service # Service 资源 +metadata: # Service 元信息 + name: nodejs-demo # Service 名称 + namespace: default # 命名空间 +spec: # Service 规格 + selector: # 选择后端 Pod + app: nodejs-demo # 选中 app=nodejs-demo + ports: # 端口映射 + - port: 80 # Service 暴露端口 + targetPort: 8080 # 转发到容器端口 --- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: nodejs-demo - namespace: default - annotations: - traefik.ingress.kubernetes.io/router.entrypoints: web -spec: - rules: - - http: - paths: - - path: /node - pathType: Prefix - backend: - service: - name: nodejs-demo - port: - number: 80 +apiVersion: networking.k8s.io/v1 # Ingress API 版本 +kind: Ingress # Ingress 资源 +metadata: # Ingress 元信息 + name: nodejs-demo # Ingress 名称 + namespace: default # 命名空间 + annotations: # Traefik 注解 + traefik.ingress.kubernetes.io/router.entrypoints: web # 使用 web(HTTP) 入口 +spec: # Ingress 规则 + rules: # 规则列表 + - http: # HTTP 路由 + paths: # 路径列表 + - path: /node # 路径前缀 + pathType: Prefix # 前缀匹配 + backend: # 后端目标 + service: # 后端 Service + name: nodejs-demo # Service 名称 + port: # Service 端口 + number: 80 # 端口号 diff --git a/ansible/files/nodejs-demo/04-10-nodejs-demo.yaml b/ansible/files/nodejs-demo/04-10-nodejs-demo.yaml index f5b41e0..c5d4a62 100644 --- a/ansible/files/nodejs-demo/04-10-nodejs-demo.yaml +++ b/ansible/files/nodejs-demo/04-10-nodejs-demo.yaml @@ -1,128 +1,128 @@ # 对应文档:docs/04-10-nodejs-Ingress与Traefik.md # 累积:04-09 + Ingress 增加 host、path 改为 /api(访问需 Host: app.example.local) -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: nodejs-demo-data - namespace: default -spec: - accessModes: - - ReadWriteOnce - storageClassName: local-path - resources: - requests: - storage: 1Gi +apiVersion: v1 # PVC API 版本 +kind: PersistentVolumeClaim # 持久卷声明 +metadata: # PVC 元信息 + name: nodejs-demo-data # PVC 名称 + namespace: default # 命名空间 +spec: # PVC 规格 + accessModes: # 访问模式 + - ReadWriteOnce # RWO:同一时间仅单节点挂载读写 + storageClassName: local-path # 存储类 + resources: # 资源请求 + requests: # 配额请求 + storage: 1Gi # 申请容量 --- -apiVersion: v1 -kind: ConfigMap -metadata: - name: nodejs-demo-config - namespace: default -data: - APP_MSG: "Hello from ConfigMap" +apiVersion: v1 # ConfigMap API 版本 +kind: ConfigMap # 配置资源 +metadata: # ConfigMap 元信息 + name: nodejs-demo-config # ConfigMap 名称 + namespace: default # 命名空间 +data: # 配置键值 + APP_MSG: "Hello from ConfigMap" # 示例消息内容 --- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nodejs-demo - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: nodejs-demo - template: - metadata: - labels: - app: nodejs-demo - spec: - nodeSelector: - kubernetes.io/hostname: ylc62 - securityContext: - fsGroup: 1000 - containers: - - name: nodejs-demo - image: node:18.20-alpine - imagePullPolicy: IfNotPresent - securityContext: - allowPrivilegeEscalation: false - runAsNonRoot: true - runAsUser: 1000 - readOnlyRootFilesystem: true - env: - - name: APP_MSG - valueFrom: - configMapKeyRef: - name: nodejs-demo-config - key: APP_MSG - command: - - node - - "-e" - - | +apiVersion: apps/v1 # Deployment API 版本 +kind: Deployment # 工作负载:Deployment +metadata: # Deployment 元信息 + name: nodejs-demo # Deployment 名称 + namespace: default # 命名空间 +spec: # Deployment 规格 + replicas: 1 # 副本数 + selector: # Pod 选择器 + matchLabels: # 标签匹配集合 + app: nodejs-demo # 匹配 app=nodejs-demo 的 Pod + template: # Pod 模板 + metadata: # Pod 元信息 + labels: # Pod 标签 + app: nodejs-demo # 与 selector.matchLabels 对齐 + spec: # Pod 规格 + nodeSelector: # 节点选择 + kubernetes.io/hostname: ylc62 # 固定到指定节点(按实际修改) + securityContext: # Pod 级安全上下文 + fsGroup: 1000 # 挂载卷文件组 ID + containers: # 容器列表 + - name: nodejs-demo # 容器名 + image: node:18.20-alpine # Node.js 镜像 + imagePullPolicy: IfNotPresent # 拉取策略 + securityContext: # 容器级安全上下文 + allowPrivilegeEscalation: false # 禁止提权 + runAsNonRoot: true # 非 root 运行 + runAsUser: 1000 # 运行用户 UID + readOnlyRootFilesystem: true # 根文件系统只读 + env: # 环境变量 + - name: APP_MSG # 环境变量名 + valueFrom: # 从引用源取值 + configMapKeyRef: # 从 ConfigMap key 读取 + name: nodejs-demo-config # ConfigMap 名称 + key: APP_MSG # ConfigMap 键名 + command: # 启动命令 + - node # 执行 node + - "-e" # 执行内联脚本 + - | # 多行 JS 脚本(内容保持原样) const http=require('http'); const msg=process.env.APP_MSG||'no env'; http.createServer((q,s)=>s.end(msg)).listen(8080); - ports: - - containerPort: 8080 - resources: - requests: - cpu: "50m" - memory: "64Mi" - limits: - cpu: "500m" - memory: "256Mi" - livenessProbe: - httpGet: - path: / - port: 8080 - initialDelaySeconds: 3 - periodSeconds: 10 - readinessProbe: - httpGet: - path: / - port: 8080 - initialDelaySeconds: 2 - periodSeconds: 5 - volumeMounts: - - name: tmp - mountPath: /tmp - - name: data - mountPath: /data - volumes: - - name: tmp - emptyDir: {} - - name: data - persistentVolumeClaim: - claimName: nodejs-demo-data + ports: # 容器端口 + - containerPort: 8080 # 监听端口 + resources: # 资源请求与限制 + requests: # 最小资源请求 + cpu: "50m" # 请求 CPU + memory: "64Mi" # 请求内存 + limits: # 资源上限 + cpu: "500m" # CPU 限制 + memory: "256Mi" # 内存限制 + livenessProbe: # 存活探针 + httpGet: # HTTP 探测 + path: / # 探测路径 + port: 8080 # 探测端口 + initialDelaySeconds: 3 # 初始延迟 + periodSeconds: 10 # 探测周期 + readinessProbe: # 就绪探针 + httpGet: # HTTP 探测 + path: / # 探测路径 + port: 8080 # 探测端口 + initialDelaySeconds: 2 # 初始延迟 + periodSeconds: 5 # 探测周期 + volumeMounts: # 卷挂载 + - name: tmp # 临时卷名称 + mountPath: /tmp # 容器内临时目录 + - name: data # 数据卷名称 + mountPath: /data # 容器内数据目录 + volumes: # 卷定义 + - name: tmp # 临时卷 + emptyDir: {} # 空目录卷 + - name: data # 数据卷 + persistentVolumeClaim: # 卷来源为 PVC + claimName: nodejs-demo-data # 绑定 PVC 名称 --- -apiVersion: v1 -kind: Service -metadata: - name: nodejs-demo - namespace: default -spec: - selector: - app: nodejs-demo - ports: - - port: 80 - targetPort: 8080 +apiVersion: v1 # Service API 版本 +kind: Service # Service 资源 +metadata: # Service 元信息 + name: nodejs-demo # Service 名称 + namespace: default # 命名空间 +spec: # Service 规格 + selector: # 选择后端 Pod + app: nodejs-demo # 选中 app=nodejs-demo + ports: # 端口映射 + - port: 80 # Service 暴露端口 + targetPort: 8080 # 转发到容器端口 --- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: nodejs-demo - namespace: default - annotations: - traefik.ingress.kubernetes.io/router.entrypoints: web -spec: - rules: - - host: app.example.local - http: - paths: - - path: /api - pathType: Prefix - backend: - service: - name: nodejs-demo - port: - number: 80 +apiVersion: networking.k8s.io/v1 # Ingress API 版本 +kind: Ingress # Ingress 资源 +metadata: # Ingress 元信息 + name: nodejs-demo # Ingress 名称 + namespace: default # 命名空间 + annotations: # Traefik 注解 + traefik.ingress.kubernetes.io/router.entrypoints: web # 使用 web(HTTP) 入口 +spec: # Ingress 规则 + rules: # 规则列表 + - host: app.example.local # 主机名匹配 + http: # HTTP 路由 + paths: # 路径列表 + - path: /api # 匹配 API 路径前缀 + pathType: Prefix # 前缀匹配 + backend: # 后端目标 + service: # 后端 Service + name: nodejs-demo # Service 名称 + port: # Service 端口 + number: 80 # 端口号 diff --git a/ansible/files/nodejs-demo/04-11-nodejs-demo.yaml b/ansible/files/nodejs-demo/04-11-nodejs-demo.yaml index 1f9e0b8..cd2d710 100644 --- a/ansible/files/nodejs-demo/04-11-nodejs-demo.yaml +++ b/ansible/files/nodejs-demo/04-11-nodejs-demo.yaml @@ -1,133 +1,133 @@ # 对应文档:docs/04-11-nodejs-副本与滚动发布.md # 累积:04-10 + replicas: 3 + RollingUpdate(maxSurge:1 maxUnavailable:0) -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: nodejs-demo-data - namespace: default -spec: - accessModes: - - ReadWriteOnce - storageClassName: local-path - resources: - requests: - storage: 1Gi +apiVersion: v1 # PVC API 版本 +kind: PersistentVolumeClaim # 持久卷声明 +metadata: # PVC 元信息 + name: nodejs-demo-data # PVC 名称 + namespace: default # 命名空间 +spec: # PVC 规格 + accessModes: # 访问模式 + - ReadWriteOnce # RWO:同一时间仅单节点挂载读写 + storageClassName: local-path # 存储类 + resources: # 资源请求 + requests: # 配额请求 + storage: 1Gi # 申请容量 --- -apiVersion: v1 -kind: ConfigMap -metadata: - name: nodejs-demo-config - namespace: default -data: - APP_MSG: "Hello from ConfigMap" +apiVersion: v1 # ConfigMap API 版本 +kind: ConfigMap # 配置资源 +metadata: # ConfigMap 元信息 + name: nodejs-demo-config # ConfigMap 名称 + namespace: default # 命名空间 +data: # 配置键值 + APP_MSG: "Hello from ConfigMap" # 示例消息内容 --- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nodejs-demo - namespace: default -spec: - replicas: 3 - strategy: - type: RollingUpdate - rollingUpdate: - maxSurge: 1 - maxUnavailable: 0 - selector: - matchLabels: - app: nodejs-demo - template: - metadata: - labels: - app: nodejs-demo - spec: - nodeSelector: - kubernetes.io/hostname: ylc62 - securityContext: - fsGroup: 1000 - containers: - - name: nodejs-demo - image: node:18.20-alpine - imagePullPolicy: IfNotPresent - securityContext: - allowPrivilegeEscalation: false - runAsNonRoot: true - runAsUser: 1000 - readOnlyRootFilesystem: true - env: - - name: APP_MSG - valueFrom: - configMapKeyRef: - name: nodejs-demo-config - key: APP_MSG - command: - - node - - "-e" - - | +apiVersion: apps/v1 # Deployment API 版本 +kind: Deployment # 工作负载:Deployment +metadata: # Deployment 元信息 + name: nodejs-demo # Deployment 名称 + namespace: default # 命名空间 +spec: # Deployment 规格 + replicas: 3 # 副本数(高可用) + strategy: # 更新策略 + type: RollingUpdate # 滚动更新 + rollingUpdate: # 滚动更新参数 + maxSurge: 1 # 更新时最多额外增加 1 个 Pod + maxUnavailable: 0 # 更新时不可用 Pod 数为 0 + selector: # Pod 选择器 + matchLabels: # 标签匹配集合 + app: nodejs-demo # 匹配 app=nodejs-demo 的 Pod + template: # Pod 模板 + metadata: # Pod 元信息 + labels: # Pod 标签 + app: nodejs-demo # 与 selector.matchLabels 对齐 + spec: # Pod 规格 + nodeSelector: # 节点选择 + kubernetes.io/hostname: ylc62 # 固定到指定节点(按实际修改) + securityContext: # Pod 级安全上下文 + fsGroup: 1000 # 挂载卷文件组 ID + containers: # 容器列表 + - name: nodejs-demo # 容器名 + image: node:18.20-alpine # Node.js 镜像 + imagePullPolicy: IfNotPresent # 拉取策略 + securityContext: # 容器级安全上下文 + allowPrivilegeEscalation: false # 禁止提权 + runAsNonRoot: true # 非 root 运行 + runAsUser: 1000 # 运行用户 UID + readOnlyRootFilesystem: true # 根文件系统只读 + env: # 环境变量 + - name: APP_MSG # 环境变量名 + valueFrom: # 从引用源取值 + configMapKeyRef: # 从 ConfigMap key 读取 + name: nodejs-demo-config # ConfigMap 名称 + key: APP_MSG # ConfigMap 键名 + command: # 启动命令 + - node # 执行 node + - "-e" # 执行内联脚本 + - | # 多行 JS 脚本(内容保持原样) const http=require('http'); const msg=process.env.APP_MSG||'no env'; http.createServer((q,s)=>s.end(msg)).listen(8080); - ports: - - containerPort: 8080 - resources: - requests: - cpu: "50m" - memory: "64Mi" - limits: - cpu: "500m" - memory: "256Mi" - livenessProbe: - httpGet: - path: / - port: 8080 - initialDelaySeconds: 3 - periodSeconds: 10 - readinessProbe: - httpGet: - path: / - port: 8080 - initialDelaySeconds: 2 - periodSeconds: 5 - volumeMounts: - - name: tmp - mountPath: /tmp - - name: data - mountPath: /data - volumes: - - name: tmp - emptyDir: {} - - name: data - persistentVolumeClaim: - claimName: nodejs-demo-data + ports: # 容器端口 + - containerPort: 8080 # 监听端口 + resources: # 资源请求与限制 + requests: # 最小资源请求 + cpu: "50m" # 请求 CPU + memory: "64Mi" # 请求内存 + limits: # 资源上限 + cpu: "500m" # CPU 限制 + memory: "256Mi" # 内存限制 + livenessProbe: # 存活探针 + httpGet: # HTTP 探测 + path: / # 探测路径 + port: 8080 # 探测端口 + initialDelaySeconds: 3 # 初始延迟 + periodSeconds: 10 # 探测周期 + readinessProbe: # 就绪探针 + httpGet: # HTTP 探测 + path: / # 探测路径 + port: 8080 # 探测端口 + initialDelaySeconds: 2 # 初始延迟 + periodSeconds: 5 # 探测周期 + volumeMounts: # 卷挂载 + - name: tmp # 临时卷名称 + mountPath: /tmp # 容器内临时目录 + - name: data # 数据卷名称 + mountPath: /data # 容器内数据目录 + volumes: # 卷定义 + - name: tmp # 临时卷 + emptyDir: {} # 空目录卷 + - name: data # 数据卷 + persistentVolumeClaim: # 卷来源为 PVC + claimName: nodejs-demo-data # 绑定 PVC 名称 --- -apiVersion: v1 -kind: Service -metadata: - name: nodejs-demo - namespace: default -spec: - selector: - app: nodejs-demo - ports: - - port: 80 - targetPort: 8080 +apiVersion: v1 # Service API 版本 +kind: Service # Service 资源 +metadata: # Service 元信息 + name: nodejs-demo # Service 名称 + namespace: default # 命名空间 +spec: # Service 规格 + selector: # 选择后端 Pod + app: nodejs-demo # 选中 app=nodejs-demo + ports: # 端口映射 + - port: 80 # Service 暴露端口 + targetPort: 8080 # 转发到容器端口 --- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: nodejs-demo - namespace: default - annotations: - traefik.ingress.kubernetes.io/router.entrypoints: web -spec: - rules: - - host: app.example.local - http: - paths: - - path: /api - pathType: Prefix - backend: - service: - name: nodejs-demo - port: - number: 80 +apiVersion: networking.k8s.io/v1 # Ingress API 版本 +kind: Ingress # Ingress 资源 +metadata: # Ingress 元信息 + name: nodejs-demo # Ingress 名称 + namespace: default # 命名空间 + annotations: # Traefik 注解 + traefik.ingress.kubernetes.io/router.entrypoints: web # 使用 web(HTTP) 入口 +spec: # Ingress 规则 + rules: # 规则列表 + - host: app.example.local # 主机名匹配 + http: # HTTP 路由 + paths: # 路径列表 + - path: /api # 匹配 API 路径前缀 + pathType: Prefix # 前缀匹配 + backend: # 后端目标 + service: # 后端 Service + name: nodejs-demo # Service 名称 + port: # Service 端口 + number: 80 # 端口号 diff --git a/ansible/files/nodejs-demo/04-12-nodejs-demo.yaml b/ansible/files/nodejs-demo/04-12-nodejs-demo.yaml index 3d0ce90..9f8d0d2 100644 --- a/ansible/files/nodejs-demo/04-12-nodejs-demo.yaml +++ b/ansible/files/nodejs-demo/04-12-nodejs-demo.yaml @@ -3,138 +3,138 @@ # 应用前请先创建 TLS Secret,例如: # kubectl create secret tls nodejs-demo-tls --cert=fullchain.pem --key=privkey.pem -n default # 证书 SAN 须覆盖 app.example.local(与 rules.host / tls.hosts 一致) -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: nodejs-demo-data - namespace: default -spec: - accessModes: - - ReadWriteOnce - storageClassName: local-path - resources: - requests: - storage: 1Gi +apiVersion: v1 # PVC API 版本 +kind: PersistentVolumeClaim # 持久卷声明 +metadata: # PVC 元信息 + name: nodejs-demo-data # PVC 名称 + namespace: default # 命名空间 +spec: # PVC 规格 + accessModes: # 访问模式 + - ReadWriteOnce # RWO:同一时间仅单节点挂载读写 + storageClassName: local-path # 存储类 + resources: # 资源请求 + requests: # 配额请求 + storage: 1Gi # 申请容量 --- -apiVersion: v1 -kind: ConfigMap -metadata: - name: nodejs-demo-config - namespace: default -data: - APP_MSG: "Hello from ConfigMap" +apiVersion: v1 # ConfigMap API 版本 +kind: ConfigMap # 配置资源 +metadata: # ConfigMap 元信息 + name: nodejs-demo-config # ConfigMap 名称 + namespace: default # 命名空间 +data: # 配置键值 + APP_MSG: "Hello from ConfigMap" # 示例消息内容 --- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nodejs-demo - namespace: default -spec: - replicas: 3 - strategy: - type: RollingUpdate - rollingUpdate: - maxSurge: 1 - maxUnavailable: 0 - selector: - matchLabels: - app: nodejs-demo - template: - metadata: - labels: - app: nodejs-demo - spec: - nodeSelector: - kubernetes.io/hostname: ylc62 - securityContext: - fsGroup: 1000 - containers: - - name: nodejs-demo - image: node:18.20-alpine - imagePullPolicy: IfNotPresent - securityContext: - allowPrivilegeEscalation: false - runAsNonRoot: true - runAsUser: 1000 - readOnlyRootFilesystem: true - env: - - name: APP_MSG - valueFrom: - configMapKeyRef: - name: nodejs-demo-config - key: APP_MSG - command: - - node - - "-e" - - | +apiVersion: apps/v1 # Deployment API 版本 +kind: Deployment # 工作负载:Deployment +metadata: # Deployment 元信息 + name: nodejs-demo # Deployment 名称 + namespace: default # 命名空间 +spec: # Deployment 规格 + replicas: 3 # 副本数 + strategy: # 更新策略 + type: RollingUpdate # 滚动更新 + rollingUpdate: # 滚动更新参数 + maxSurge: 1 # 更新时最多额外增加 1 个 Pod + maxUnavailable: 0 # 更新时不可用 Pod 数为 0 + selector: # Pod 选择器 + matchLabels: # 标签匹配集合 + app: nodejs-demo # 匹配 app=nodejs-demo 的 Pod + template: # Pod 模板 + metadata: # Pod 元信息 + labels: # Pod 标签 + app: nodejs-demo # 与 selector.matchLabels 对齐 + spec: # Pod 规格 + nodeSelector: # 节点选择 + kubernetes.io/hostname: ylc62 # 固定到指定节点(按实际修改) + securityContext: # Pod 级安全上下文 + fsGroup: 1000 # 挂载卷文件组 ID + containers: # 容器列表 + - name: nodejs-demo # 容器名 + image: node:18.20-alpine # Node.js 镜像 + imagePullPolicy: IfNotPresent # 拉取策略 + securityContext: # 容器级安全上下文 + allowPrivilegeEscalation: false # 禁止提权 + runAsNonRoot: true # 非 root 运行 + runAsUser: 1000 # 运行用户 UID + readOnlyRootFilesystem: true # 根文件系统只读 + env: # 环境变量 + - name: APP_MSG # 环境变量名 + valueFrom: # 从引用源取值 + configMapKeyRef: # 从 ConfigMap key 读取 + name: nodejs-demo-config # ConfigMap 名称 + key: APP_MSG # ConfigMap 键名 + command: # 启动命令 + - node # 执行 node + - "-e" # 执行内联脚本 + - | # 多行 JS 脚本(内容保持原样) const http=require('http'); const msg=process.env.APP_MSG||'no env'; http.createServer((q,s)=>s.end(msg)).listen(8080); - ports: - - containerPort: 8080 - resources: - requests: - cpu: "50m" - memory: "64Mi" - limits: - cpu: "500m" - memory: "256Mi" - livenessProbe: - httpGet: - path: / - port: 8080 - initialDelaySeconds: 3 - periodSeconds: 10 - readinessProbe: - httpGet: - path: / - port: 8080 - initialDelaySeconds: 2 - periodSeconds: 5 - volumeMounts: - - name: tmp - mountPath: /tmp - - name: data - mountPath: /data - volumes: - - name: tmp - emptyDir: {} - - name: data - persistentVolumeClaim: - claimName: nodejs-demo-data + ports: # 容器端口 + - containerPort: 8080 # 监听端口 + resources: # 资源请求与限制 + requests: # 最小资源请求 + cpu: "50m" # 请求 CPU + memory: "64Mi" # 请求内存 + limits: # 资源上限 + cpu: "500m" # CPU 限制 + memory: "256Mi" # 内存限制 + livenessProbe: # 存活探针 + httpGet: # HTTP 探测 + path: / # 探测路径 + port: 8080 # 探测端口 + initialDelaySeconds: 3 # 初始延迟 + periodSeconds: 10 # 探测周期 + readinessProbe: # 就绪探针 + httpGet: # HTTP 探测 + path: / # 探测路径 + port: 8080 # 探测端口 + initialDelaySeconds: 2 # 初始延迟 + periodSeconds: 5 # 探测周期 + volumeMounts: # 卷挂载 + - name: tmp # 临时卷名称 + mountPath: /tmp # 容器内临时目录 + - name: data # 数据卷名称 + mountPath: /data # 容器内数据目录 + volumes: # 卷定义 + - name: tmp # 临时卷 + emptyDir: {} # 空目录卷 + - name: data # 数据卷 + persistentVolumeClaim: # 卷来源为 PVC + claimName: nodejs-demo-data # 绑定 PVC 名称 --- -apiVersion: v1 -kind: Service -metadata: - name: nodejs-demo - namespace: default -spec: - selector: - app: nodejs-demo - ports: - - port: 80 - targetPort: 8080 +apiVersion: v1 # Service API 版本 +kind: Service # Service 资源 +metadata: # Service 元信息 + name: nodejs-demo # Service 名称 + namespace: default # 命名空间 +spec: # Service 规格 + selector: # 选择后端 Pod + app: nodejs-demo # 选中 app=nodejs-demo + ports: # 端口映射 + - port: 80 # Service 暴露端口 + targetPort: 8080 # 转发到容器端口 --- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: nodejs-demo - namespace: default - annotations: - traefik.ingress.kubernetes.io/router.entrypoints: websecure -spec: - tls: - - hosts: - - app.example.local - secretName: nodejs-demo-tls - rules: - - host: app.example.local - http: - paths: - - path: /api - pathType: Prefix - backend: - service: - name: nodejs-demo - port: - number: 80 +apiVersion: networking.k8s.io/v1 # Ingress API 版本 +kind: Ingress # Ingress 资源 +metadata: # Ingress 元信息 + name: nodejs-demo # Ingress 名称 + namespace: default # 命名空间 + annotations: # Traefik 注解 + traefik.ingress.kubernetes.io/router.entrypoints: websecure # 使用 websecure(HTTPS) 入口 +spec: # Ingress 规则 + tls: # TLS 配置 + - hosts: # 证书覆盖域名 + - app.example.local # 域名 + secretName: nodejs-demo-tls # 引用的 TLS Secret 名称 + rules: # 路由规则列表 + - host: app.example.local # 主机名匹配 + http: # HTTP 路由 + paths: # 路径列表 + - path: /api # 匹配 API 路径前缀 + pathType: Prefix # 前缀匹配 + backend: # 后端目标 + service: # 后端 Service + name: nodejs-demo # Service 名称 + port: # Service 端口 + number: 80 # 端口号 diff --git a/ansible/files/nodejs-demo/04-13-nodejs-demo.yaml b/ansible/files/nodejs-demo/04-13-nodejs-demo.yaml index 7829df9..8f05d8c 100644 --- a/ansible/files/nodejs-demo/04-13-nodejs-demo.yaml +++ b/ansible/files/nodejs-demo/04-13-nodejs-demo.yaml @@ -1,157 +1,157 @@ # 对应文档:docs/04-13-nodejs-HPA.md # 累积:04-12 + HorizontalPodAutoscaler(CPU 50%,min 1 max 5) -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: nodejs-demo-data - namespace: default -spec: - accessModes: - - ReadWriteOnce - storageClassName: local-path - resources: - requests: - storage: 1Gi +apiVersion: v1 # PVC API 版本 +kind: PersistentVolumeClaim # 持久卷声明 +metadata: # PVC 元信息 + name: nodejs-demo-data # PVC 名称 + namespace: default # 命名空间 +spec: # PVC 规格 + accessModes: # 访问模式 + - ReadWriteOnce # RWO:同一时间仅单节点挂载读写 + storageClassName: local-path # 存储类 + resources: # 资源请求 + requests: # 配额请求 + storage: 1Gi # 申请容量 --- -apiVersion: v1 -kind: ConfigMap -metadata: - name: nodejs-demo-config - namespace: default -data: - APP_MSG: "Hello from ConfigMap" +apiVersion: v1 # ConfigMap API 版本 +kind: ConfigMap # 配置资源 +metadata: # ConfigMap 元信息 + name: nodejs-demo-config # ConfigMap 名称 + namespace: default # 命名空间 +data: # 配置键值 + APP_MSG: "Hello from ConfigMap" # 示例消息内容 --- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nodejs-demo - namespace: default -spec: - replicas: 3 - strategy: - type: RollingUpdate - rollingUpdate: - maxSurge: 1 - maxUnavailable: 0 - selector: - matchLabels: - app: nodejs-demo - template: - metadata: - labels: - app: nodejs-demo - spec: - nodeSelector: - kubernetes.io/hostname: ylc62 - securityContext: - fsGroup: 1000 - containers: - - name: nodejs-demo - image: node:18.20-alpine - imagePullPolicy: IfNotPresent - securityContext: - allowPrivilegeEscalation: false - runAsNonRoot: true - runAsUser: 1000 - readOnlyRootFilesystem: true - env: - - name: APP_MSG - valueFrom: - configMapKeyRef: - name: nodejs-demo-config - key: APP_MSG - command: - - node - - "-e" - - | +apiVersion: apps/v1 # Deployment API 版本 +kind: Deployment # 工作负载:Deployment +metadata: # Deployment 元信息 + name: nodejs-demo # Deployment 名称 + namespace: default # 命名空间 +spec: # Deployment 规格 + replicas: 3 # 副本数 + strategy: # 更新策略 + type: RollingUpdate # 滚动更新 + rollingUpdate: # 滚动更新参数 + maxSurge: 1 # 更新时最多额外增加 1 个 Pod + maxUnavailable: 0 # 更新时不可用 Pod 数为 0 + selector: # Pod 选择器 + matchLabels: # 标签匹配集合 + app: nodejs-demo # 匹配 app=nodejs-demo 的 Pod + template: # Pod 模板 + metadata: # Pod 元信息 + labels: # Pod 标签 + app: nodejs-demo # 与 selector.matchLabels 对齐 + spec: # Pod 规格 + nodeSelector: # 节点选择 + kubernetes.io/hostname: ylc62 # 固定到指定节点(按实际修改) + securityContext: # Pod 级安全上下文 + fsGroup: 1000 # 挂载卷文件组 ID + containers: # 容器列表 + - name: nodejs-demo # 容器名 + image: node:18.20-alpine # Node.js 镜像 + imagePullPolicy: IfNotPresent # 拉取策略 + securityContext: # 容器级安全上下文 + allowPrivilegeEscalation: false # 禁止提权 + runAsNonRoot: true # 非 root 运行 + runAsUser: 1000 # 运行用户 UID + readOnlyRootFilesystem: true # 根文件系统只读 + env: # 环境变量 + - name: APP_MSG # 环境变量名 + valueFrom: # 从引用源取值 + configMapKeyRef: # 从 ConfigMap key 读取 + name: nodejs-demo-config # ConfigMap 名称 + key: APP_MSG # ConfigMap 键名 + command: # 启动命令 + - node # 执行 node + - "-e" # 执行内联脚本 + - | # 多行 JS 脚本(内容保持原样) const http=require('http'); const msg=process.env.APP_MSG||'no env'; http.createServer((q,s)=>s.end(msg)).listen(8080); - ports: - - containerPort: 8080 - resources: - requests: - cpu: "50m" - memory: "64Mi" - limits: - cpu: "500m" - memory: "256Mi" - livenessProbe: - httpGet: - path: / - port: 8080 - initialDelaySeconds: 3 - periodSeconds: 10 - readinessProbe: - httpGet: - path: / - port: 8080 - initialDelaySeconds: 2 - periodSeconds: 5 - volumeMounts: - - name: tmp - mountPath: /tmp - - name: data - mountPath: /data - volumes: - - name: tmp - emptyDir: {} - - name: data - persistentVolumeClaim: - claimName: nodejs-demo-data + ports: # 容器端口 + - containerPort: 8080 # 监听端口 + resources: # 资源请求与限制 + requests: # 最小资源请求 + cpu: "50m" # 请求 CPU + memory: "64Mi" # 请求内存 + limits: # 资源上限 + cpu: "500m" # CPU 限制 + memory: "256Mi" # 内存限制 + livenessProbe: # 存活探针 + httpGet: # HTTP 探测 + path: / # 探测路径 + port: 8080 # 探测端口 + initialDelaySeconds: 3 # 初始延迟 + periodSeconds: 10 # 探测周期 + readinessProbe: # 就绪探针 + httpGet: # HTTP 探测 + path: / # 探测路径 + port: 8080 # 探测端口 + initialDelaySeconds: 2 # 初始延迟 + periodSeconds: 5 # 探测周期 + volumeMounts: # 卷挂载 + - name: tmp # 临时卷名称 + mountPath: /tmp # 容器内临时目录 + - name: data # 数据卷名称 + mountPath: /data # 容器内数据目录 + volumes: # 卷定义 + - name: tmp # 临时卷 + emptyDir: {} # 空目录卷 + - name: data # 数据卷 + persistentVolumeClaim: # 卷来源为 PVC + claimName: nodejs-demo-data # 绑定 PVC 名称 --- -apiVersion: v1 -kind: Service -metadata: - name: nodejs-demo - namespace: default -spec: - selector: - app: nodejs-demo - ports: - - port: 80 - targetPort: 8080 +apiVersion: v1 # Service API 版本 +kind: Service # Service 资源 +metadata: # Service 元信息 + name: nodejs-demo # Service 名称 + namespace: default # 命名空间 +spec: # Service 规格 + selector: # 选择后端 Pod + app: nodejs-demo # 选中 app=nodejs-demo + ports: # 端口映射 + - port: 80 # Service 暴露端口 + targetPort: 8080 # 转发到容器端口 --- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: nodejs-demo - namespace: default - annotations: - traefik.ingress.kubernetes.io/router.entrypoints: websecure -spec: - tls: - - hosts: - - app.example.local - secretName: nodejs-demo-tls - rules: - - host: app.example.local - http: - paths: - - path: /api - pathType: Prefix - backend: - service: - name: nodejs-demo - port: - number: 80 +apiVersion: networking.k8s.io/v1 # Ingress API 版本 +kind: Ingress # Ingress 资源 +metadata: # Ingress 元信息 + name: nodejs-demo # Ingress 名称 + namespace: default # 命名空间 + annotations: # Traefik 注解 + traefik.ingress.kubernetes.io/router.entrypoints: websecure # 使用 websecure(HTTPS) 入口 +spec: # Ingress 规则 + tls: # TLS 配置 + - hosts: # 证书覆盖域名 + - app.example.local # 域名 + secretName: nodejs-demo-tls # 引用的 TLS Secret 名称 + rules: # 路由规则列表 + - host: app.example.local # 主机名匹配 + http: # HTTP 路由 + paths: # 路径列表 + - path: /api # 匹配 API 路径前缀 + pathType: Prefix # 前缀匹配 + backend: # 后端目标 + service: # 后端 Service + name: nodejs-demo # Service 名称 + port: # Service 端口 + number: 80 # 端口号 --- -apiVersion: autoscaling/v2 -kind: HorizontalPodAutoscaler -metadata: - name: nodejs-demo - namespace: default -spec: - scaleTargetRef: - apiVersion: apps/v1 - kind: Deployment - name: nodejs-demo - minReplicas: 1 - maxReplicas: 5 - metrics: - - type: Resource - resource: - name: cpu - target: - type: Utilization - averageUtilization: 50 +apiVersion: autoscaling/v2 # HPA API 版本 +kind: HorizontalPodAutoscaler # 水平自动扩缩容资源 +metadata: # HPA 元信息 + name: nodejs-demo # HPA 名称 + namespace: default # 命名空间 +spec: # HPA 规格 + scaleTargetRef: # 伸缩目标引用 + apiVersion: apps/v1 # 目标 API 版本 + kind: Deployment # 目标资源类型 + name: nodejs-demo # 目标 Deployment 名称 + minReplicas: 1 # 最小副本数 + maxReplicas: 5 # 最大副本数 + metrics: # 伸缩指标 + - type: Resource # 资源指标类型 + resource: # 资源指标配置 + name: cpu # 指标资源:CPU + target: # 目标值 + type: Utilization # 目标类型:利用率 + averageUtilization: 50 # 目标平均 CPU 利用率(%) diff --git a/ansible/files/nodejs-demo/nodejs-demo-secret.example.yaml b/ansible/files/nodejs-demo/nodejs-demo-secret.example.yaml index 79a41ec..0dfe049 100644 --- a/ansible/files/nodejs-demo/nodejs-demo-secret.example.yaml +++ b/ansible/files/nodejs-demo/nodejs-demo-secret.example.yaml @@ -1,8 +1,8 @@ # 示例:勿将真实密钥提交到公开仓库。对应 docs/04-03 Secret 示意。 -apiVersion: v1 -kind: Secret -metadata: - name: nodejs-demo-secret - namespace: default -stringData: - API_TOKEN: "replace-me" +apiVersion: v1 # Secret API 版本 +kind: Secret # 密钥资源 +metadata: # Secret 元信息 + name: nodejs-demo-secret # Secret 名称 + namespace: default # 命名空间 +stringData: # 明文键值(创建时会转换为 data) + API_TOKEN: "replace-me" # 示例 token(请替换,勿提交真实密钥) diff --git a/ansible/files/onenav/onenav-proxy.yaml b/ansible/files/onenav/onenav-proxy.yaml index 319d3e6..7f8611f 100644 --- a/ansible/files/onenav/onenav-proxy.yaml +++ b/ansible/files/onenav/onenav-proxy.yaml @@ -1,43 +1,43 @@ # docs/05-02-onenav首页面板.md — 修改 Endpoints IP 与 Ingress host -apiVersion: v1 -kind: Service -metadata: - name: onenav-external - namespace: default -spec: - ports: - - name: http - port: 80 - targetPort: 7070 +apiVersion: v1 # Service/Endpoints 使用的核心 API 版本 +kind: Service # 资源类型:Service(为外部后端提供集群内服务名) +metadata: # Service 元信息 + name: onenav-external # Service 名称(与 Endpoints 同名以进行绑定) + namespace: default # 命名空间 +spec: # Service 规格 + ports: # 端口映射列表 + - name: http # 端口名称 + port: 80 # Service 对内暴露端口 + targetPort: 7070 # 目标端口(与 Endpoints 端口对应) --- -apiVersion: v1 -kind: Endpoints -metadata: - name: onenav-external - namespace: default -subsets: - - addresses: - - ip: 192.168.2.22 - ports: - - port: 7070 - name: http +apiVersion: v1 # Endpoints 使用的核心 API 版本 +kind: Endpoints # 资源类型:手工定义后端地址 +metadata: # Endpoints 元信息 + name: onenav-external # 必须与 Service 同名,Service 才会路由到这些地址 + namespace: default # 命名空间 +subsets: # 后端地址与端口集合 + - addresses: # 地址列表 + - ip: 192.168.2.22 # 实际后端主机 IP(按环境修改) + ports: # 端口列表 + - port: 7070 # 后端服务真实端口 + name: http # 端口名称(与 Service 端口名保持一致更清晰) --- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: onenav - namespace: default - annotations: - traefik.ingress.kubernetes.io/router.entrypoints: web -spec: - rules: - - host: onenav.example.com - http: - paths: - - path: / - pathType: Prefix - backend: - service: - name: onenav-external - port: - number: 80 +apiVersion: networking.k8s.io/v1 # Ingress API 版本 +kind: Ingress # 资源类型:入口路由 +metadata: # Ingress 元信息 + name: onenav # Ingress 名称 + namespace: default # 命名空间 + annotations: # 控制器注解 + traefik.ingress.kubernetes.io/router.entrypoints: web # 使用 Traefik 的 web(HTTP) 入口 +spec: # Ingress 规则 + rules: # 规则列表 + - host: onenav.example.com # 访问域名(按环境修改) + http: # HTTP 规则 + paths: # 路径列表 + - path: / # 匹配根路径 + pathType: Prefix # 前缀匹配 + backend: # 后端目标 + service: # 转发到 Service + name: onenav-external # 后端 Service 名称 + port: # 后端端口 + number: 80 # Service 端口 diff --git a/ansible/files/openclaw/openclaw-k3s-experimental.yaml b/ansible/files/openclaw/openclaw-k3s-experimental.yaml index 37bc8fd..b8bc035 100644 --- a/ansible/files/openclaw/openclaw-k3s-experimental.yaml +++ b/ansible/files/openclaw/openclaw-k3s-experimental.yaml @@ -1,74 +1,74 @@ # docs/05-08-openclaw-k3s-实验部署.md — 实验用;替换镜像与域名 -apiVersion: v1 -kind: Namespace -metadata: - name: openclaw +apiVersion: v1 # Namespace 使用的核心 API 版本 +kind: Namespace # 资源类型:命名空间 +metadata: # Namespace 元信息 + name: openclaw # 命名空间名称 --- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: openclaw-gateway - namespace: openclaw -spec: - replicas: 1 - selector: - matchLabels: - app: openclaw-gateway - template: - metadata: - labels: - app: openclaw-gateway - spec: - containers: - - name: openclaw-gateway - image: registry.local/openclaw:local - imagePullPolicy: IfNotPresent - env: - - name: OPENCLAW_GATEWAY_MODE - value: "local" - ports: - - containerPort: 18789 - volumeMounts: - - name: config - mountPath: /home/node/.openclaw - - name: workspace - mountPath: /home/node/.openclaw/workspace - volumes: - - name: config - emptyDir: {} - - name: workspace - emptyDir: {} +apiVersion: apps/v1 # Deployment 使用的 API 版本 +kind: Deployment # 工作负载:Deployment +metadata: # Deployment 元信息 + name: openclaw-gateway # Deployment 名称 + namespace: openclaw # 所在命名空间 +spec: # Deployment 规格 + replicas: 1 # 副本数 + selector: # 选择器 + matchLabels: # 标签匹配 + app: openclaw-gateway # 匹配标签 app=openclaw-gateway + template: # Pod 模板 + metadata: # Pod 元信息 + labels: # Pod 标签 + app: openclaw-gateway # 与 selector 匹配 + spec: # Pod 规格 + containers: # 容器列表 + - name: openclaw-gateway # 容器名 + image: registry.local/openclaw:local # 容器镜像(按环境替换) + imagePullPolicy: IfNotPresent # 拉取策略:本地有镜像则不重复拉取 + env: # 环境变量 + - name: OPENCLAW_GATEWAY_MODE # 运行模式变量名 + value: "local" # 运行模式值 + ports: # 容器端口 + - containerPort: 18789 # 网关监听端口 + volumeMounts: # 卷挂载 + - name: config # 配置卷 + mountPath: /home/node/.openclaw # 配置目录挂载点 + - name: workspace # 工作目录卷 + mountPath: /home/node/.openclaw/workspace # 工作目录挂载点 + volumes: # 卷定义 + - name: config # 配置卷名 + emptyDir: {} # 临时卷(Pod 生命周期内) + - name: workspace # 工作目录卷名 + emptyDir: {} # 临时卷(Pod 生命周期内) --- -apiVersion: v1 -kind: Service -metadata: - name: openclaw-gateway - namespace: openclaw -spec: - selector: - app: openclaw-gateway - ports: - - port: 18789 - targetPort: 18789 - protocol: TCP - name: http +apiVersion: v1 # Service 使用的核心 API 版本 +kind: Service # Service 资源 +metadata: # Service 元信息 + name: openclaw-gateway # Service 名称 + namespace: openclaw # 命名空间 +spec: # Service 规格 + selector: # 后端 Pod 选择器 + app: openclaw-gateway # 选择 app=openclaw-gateway 的 Pod + ports: # 端口映射列表 + - port: 18789 # Service 暴露端口 + targetPort: 18789 # 转发到容器端口 + protocol: TCP # 协议 + name: http # 端口名 --- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: openclaw-gateway - namespace: openclaw - annotations: - traefik.ingress.kubernetes.io/router.entrypoints: web -spec: - rules: - - host: openclaw-k3s.example.com - http: - paths: - - path: / - pathType: Prefix - backend: - service: - name: openclaw-gateway - port: - number: 18789 +apiVersion: networking.k8s.io/v1 # Ingress API 版本 +kind: Ingress # 入口路由资源 +metadata: # Ingress 元信息 + name: openclaw-gateway # Ingress 名称 + namespace: openclaw # 命名空间 + annotations: # 控制器注解 + traefik.ingress.kubernetes.io/router.entrypoints: web # 使用 Traefik web(HTTP) 入口 +spec: # Ingress 规则 + rules: # 规则列表 + - host: openclaw-k3s.example.com # 访问域名(按环境修改) + http: # HTTP 规则 + paths: # 路径列表 + - path: / # 根路径 + pathType: Prefix # 前缀匹配 + backend: # 后端目标 + service: # 后端 Service + name: openclaw-gateway # Service 名称 + port: # Service 端口 + number: 18789 # 端口号 diff --git a/ansible/files/openclaw/openclaw-proxy.yaml b/ansible/files/openclaw/openclaw-proxy.yaml index f7ffcf7..80f3582 100644 --- a/ansible/files/openclaw/openclaw-proxy.yaml +++ b/ansible/files/openclaw/openclaw-proxy.yaml @@ -1,43 +1,43 @@ # docs/05-07-openclaw应用部署.md — 修改 IP / host -apiVersion: v1 -kind: Service -metadata: - name: openclaw-external - namespace: default -spec: - ports: - - name: http - port: 80 - targetPort: 18789 +apiVersion: v1 # Service/Endpoints 使用的核心 API 版本 +kind: Service # 资源类型:Service(对外部后端提供集群内服务名) +metadata: # Service 元信息 + name: openclaw-external # Service 名称(需与 Endpoints 同名) + namespace: default # 命名空间 +spec: # Service 规格 + ports: # 端口映射列表 + - name: http # 端口名称 + port: 80 # Service 暴露端口 + targetPort: 18789 # 目标端口(与 Endpoints 端口对应) --- -apiVersion: v1 -kind: Endpoints -metadata: - name: openclaw-external - namespace: default -subsets: - - addresses: - - ip: 192.168.2.70 - ports: - - port: 18789 - name: http +apiVersion: v1 # Endpoints 使用的核心 API 版本 +kind: Endpoints # 资源类型:手工定义后端地址 +metadata: # Endpoints 元信息 + name: openclaw-external # 必须与 Service 同名才能绑定 + namespace: default # 命名空间 +subsets: # 后端地址与端口集合 + - addresses: # 地址列表 + - ip: 192.168.2.70 # 外部后端 IP(按环境修改) + ports: # 端口列表 + - port: 18789 # 外部后端真实端口 + name: http # 端口名 --- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: openclaw - namespace: default - annotations: - traefik.ingress.kubernetes.io/router.entrypoints: web -spec: - rules: - - host: openclaw.example.com - http: - paths: - - path: / - pathType: Prefix - backend: - service: - name: openclaw-external - port: - number: 80 +apiVersion: networking.k8s.io/v1 # Ingress API 版本 +kind: Ingress # 入口资源:域名/路径转发 +metadata: # Ingress 元信息 + name: openclaw # Ingress 名称 + namespace: default # 命名空间 + annotations: # Traefik 注解 + traefik.ingress.kubernetes.io/router.entrypoints: web # 使用 web(HTTP) 入口 +spec: # Ingress 规则 + rules: # 规则列表 + - host: openclaw.example.com # 访问域名(按环境修改) + http: # HTTP 规则 + paths: # 路径列表 + - path: / # 根路径 + pathType: Prefix # 前缀匹配 + backend: # 后端目标 + service: # 后端 Service + name: openclaw-external # Service 名称 + port: # Service 端口 + number: 80 # 端口号 diff --git a/ansible/files/openlist/app-data-backup-cronjob.yaml b/ansible/files/openlist/app-data-backup-cronjob.yaml index b8b1596..3a0fdd2 100644 --- a/ansible/files/openlist/app-data-backup-cronjob.yaml +++ b/ansible/files/openlist/app-data-backup-cronjob.yaml @@ -1,27 +1,27 @@ # docs/06-03-k3s-自动备份与恢复-openlist-webdav.md — 替换镜像、hostPath、远端名 -apiVersion: batch/v1 -kind: CronJob -metadata: - name: app-data-backup - namespace: default -spec: - schedule: "0 3 * * *" - jobTemplate: - spec: - template: - spec: - containers: - - name: app-data-backup - image: your-registry/app-backup:latest - args: - - /bin/sh - - -c - - rclone sync /data openlist-webdav:backups/app-data - volumeMounts: - - name: app-data - mountPath: /data - volumes: - - name: app-data - hostPath: - path: /data/app - restartPolicy: OnFailure +apiVersion: batch/v1 # CronJob API 版本 +kind: CronJob # 定时任务资源 +metadata: # CronJob 元信息 + name: app-data-backup # CronJob 名称 + namespace: default # 命名空间 +spec: # CronJob 规格 + schedule: "0 3 * * *" # 执行计划:每天 03:00 + jobTemplate: # 每次触发创建的 Job 模板 + spec: # Job 规格 + template: # Pod 模板 + spec: # Pod 规格 + containers: # 容器列表 + - name: app-data-backup # 容器名 + image: your-registry/app-backup:latest # 备份镜像(按环境替换) + args: # 启动参数 + - /bin/sh # 使用 shell 执行 + - -c # shell 执行模式 + - rclone sync /data openlist-webdav:backups/app-data # 同步本地目录到远端备份 + volumeMounts: # 卷挂载 + - name: app-data # 引用卷名 + mountPath: /data # 容器内数据目录 + volumes: # 卷定义 + - name: app-data # 卷名 + hostPath: # 使用宿主机路径 + path: /data/app # 宿主机实际目录(按环境修改) + restartPolicy: OnFailure # 失败时重启 diff --git a/ansible/files/openlist/app-data-restore-job.yaml b/ansible/files/openlist/app-data-restore-job.yaml index 71d7a5f..231fde9 100644 --- a/ansible/files/openlist/app-data-restore-job.yaml +++ b/ansible/files/openlist/app-data-restore-job.yaml @@ -1,24 +1,24 @@ # docs/06-03-k3s-自动备份与恢复-openlist-webdav.md — 一次性恢复 Job -apiVersion: batch/v1 -kind: Job -metadata: - name: app-data-restore - namespace: default -spec: - template: - spec: - containers: - - name: app-data-restore - image: your-registry/app-backup:latest - args: - - /bin/sh - - -c - - rclone sync openlist-webdav:backups/app-data /data - volumeMounts: - - name: app-data - mountPath: /data - volumes: - - name: app-data - hostPath: - path: /data/app - restartPolicy: OnFailure +apiVersion: batch/v1 # Job 使用的 API 版本 +kind: Job # 一次性任务资源 +metadata: # Job 元信息 + name: app-data-restore # Job 名称 + namespace: default # 命名空间 +spec: # Job 规格 + template: # Pod 模板 + spec: # Pod 规格 + containers: # 容器列表 + - name: app-data-restore # 容器名 + image: your-registry/app-backup:latest # 恢复镜像(按环境替换) + args: # 启动参数 + - /bin/sh # 使用 shell 运行命令 + - -c # shell 执行模式 + - rclone sync openlist-webdav:backups/app-data /data # 从远端同步备份到本地目录 + volumeMounts: # 卷挂载 + - name: app-data # 引用 volumes[].name + mountPath: /data # 容器内恢复目标路径 + volumes: # 卷定义 + - name: app-data # 卷名 + hostPath: # 使用宿主机目录作为存储 + path: /data/app # 节点上的真实数据目录(按实际修改) + restartPolicy: OnFailure # 失败时重启,成功后结束 diff --git a/ansible/files/openlist/openlist-backup-cronjob.yaml b/ansible/files/openlist/openlist-backup-cronjob.yaml index 815807a..a3db40a 100644 --- a/ansible/files/openlist/openlist-backup-cronjob.yaml +++ b/ansible/files/openlist/openlist-backup-cronjob.yaml @@ -1,27 +1,27 @@ # docs/05-06-openlist挂载网盘与自动备份.md — 替换镜像与 PVC 名 -apiVersion: batch/v1 -kind: CronJob -metadata: - name: openlist-backup - namespace: default -spec: - schedule: "0 3 * * *" - jobTemplate: - spec: - template: - spec: - containers: - - name: openlist-backup - image: your-registry/openlist-backup:latest - args: - - /bin/sh - - -c - - /backup.sh - volumeMounts: - - name: backup-target - mountPath: /backup - volumes: - - name: backup-target - persistentVolumeClaim: - claimName: openlist-backup-pvc - restartPolicy: OnFailure +apiVersion: batch/v1 # CronJob 所属 API:batch/v1 +kind: CronJob # 定时任务:CronJob +metadata: # 资源元信息 + name: openlist-backup # CronJob 名称 + namespace: default # CronJob 所在命名空间 +spec: # CronJob 期望状态 + schedule: "0 3 * * *" # 任务调度:每天 03:00(cron 格式) + jobTemplate: # 每次触发生成的 Job 模板 + spec: # Job spec + template: # Job Pod 模板 + spec: # Pod spec + containers: # 容器列表 + - name: openlist-backup # 容器名 + image: your-registry/openlist-backup:latest # 镜像(需替换为你的镜像) + args: # 容器启动命令参数 + - /bin/sh # 使用 shell + - -c # shell 执行模式 + - /backup.sh # 运行脚本 + volumeMounts: # 挂载卷 + - name: backup-target # 引用 volumes[].name + mountPath: /backup # 容器内挂载目录 + volumes: # 卷列表 + - name: backup-target # 卷名(与 volumeMounts 对齐) + persistentVolumeClaim: # 使用 PVC 作为存储来源 + claimName: openlist-backup-pvc # 绑定的 PVC 名称(需保证存在) + restartPolicy: OnFailure # Pod 失败后重启策略:仅失败时重启 diff --git a/ansible/files/traefik-acme/traefik-acme.yaml b/ansible/files/traefik-acme/traefik-acme.yaml index 60a3fcb..0ee8442 100644 --- a/ansible/files/traefik-acme/traefik-acme.yaml +++ b/ansible/files/traefik-acme/traefik-acme.yaml @@ -2,37 +2,39 @@ # 含:ACME(Cloudflare DNS-01)、ping 健康检查(websecure)、PROXY protocol trustedIPs # 使用前:替换 ,创建 cloudflare-api-token Secret,按实际修改 nodeSelector/trustedIPs # 部署:kubectl apply -f traefik-acme.yaml(或复制到 K3s manifests 目录) +# +# 推荐(Dashboard + ACME + local-path 一份清单):见 ../traefik-dashboard-acme/traefik-dashboard-acme.yaml --- -apiVersion: helm.cattle.io/v1 -kind: HelmChartConfig -metadata: - name: traefik - namespace: kube-system -spec: - valuesContent: |- - additionalArguments: - - "--log.level=INFO" - - "--certificatesresolvers.cloudflare.acme.dnschallenge.resolvers=1.1.1.1:53,1.0.0.1:53" - - "--certificatesresolvers.cloudflare.acme.email=" - - "--certificatesresolvers.cloudflare.acme.storage=/data/acme.json" +apiVersion: helm.cattle.io/v1 # HelmChartConfig 所在的 API 版本 +kind: HelmChartConfig # HelmChartConfig:给 K3s 自带 Helm chart 注入 values 的资源 +metadata: # 该对象的标识信息 + name: traefik # chart 对应的对象名称(通常与 Traefik chart name 一致) + namespace: kube-system # HelmChartConfig 的命名空间(Traefik 默认在 kube-system) +spec: # chart 注入配置的具体内容 + valuesContent: |- # 以“字符串形式的 YAML”注入到 Helm chart values(由 chart 解析) + additionalArguments: # 追加给 Traefik 的额外启动参数列表 + - "--log.level=INFO" # 日志级别:INFO + - "--certificatesresolvers.cloudflare.acme.dnschallenge.resolvers=1.1.1.1:53,1.0.0.1:53" # DNS resolver 列表 + - "--certificatesresolvers.cloudflare.acme.email=" # ACME 注册邮箱 + - "--certificatesresolvers.cloudflare.acme.storage=/data/acme.json" # ACME 存储(容器内路径) # - "--certificatesresolvers.cloudflare.acme.caserver=https://acme-staging-v02.api.letsencrypt.org/directory" # 测试用,上线前删除 - - "--certificatesresolvers.cloudflare.acme.dnschallenge.provider=cloudflare" - - "--certificatesresolvers.cloudflare.acme.dnschallenge.propagation.delayBeforeChecks=600" + - "--certificatesresolvers.cloudflare.acme.dnschallenge.provider=cloudflare" # 使用 Cloudflare 作为 DNS-01 provider + - "--certificatesresolvers.cloudflare.acme.dnschallenge.propagation.delayBeforeChecks=600" # DNS propagation 等待时间(秒) # 健康检查:GET /ping 在 443(HTTPS) 返回 200,供 HAProxy 对 443 做 option httpchk + ssl - - "--ping=true" - - "--ping.entryPoint=websecure" + - "--ping=true" # 开启 ping healthcheck + - "--ping.entryPoint=websecure" # ping 走 websecure(HTTPS) entrypoint # PROXY protocol:trustedIPs 需包含 HAProxy 所在 IP/网段 - - "--entrypoints.web.proxyProtocol.trustedIPs=192.168.2.0/24" - - "--entrypoints.websecure.proxyProtocol.trustedIPs=192.168.2.0/24" + - "--entrypoints.web.proxyProtocol.trustedIPs=192.168.2.0/24" # HTTP entrypoint 信任的代理网段 + - "--entrypoints.websecure.proxyProtocol.trustedIPs=192.168.2.0/24" # HTTPS entrypoint 信任的代理网段 - env: - - name: CF_DNS_API_TOKEN - valueFrom: - secretKeyRef: - name: cloudflare-api-token - key: api-token + env: # 环境变量注入(给 Traefik chart) + - name: CF_DNS_API_TOKEN # Cloudflare API Token 环境变量名 + valueFrom: # 从 Secret 中读取环境变量值 + secretKeyRef: # Secret 引用方式:按 key 取值 + name: cloudflare-api-token # Secret 名称(你创建的 Cloudflare Token Secret) + key: api-token # Secret 内对应的 key 名 - nodeSelector: - kubernetes.io/hostname: ylc61 + nodeSelector: # 把 Traefik Pod 固定到指定节点(配合 RWO 本地存储更安全) + kubernetes.io/hostname: ylc61 # 固定节点主机名(按你的实际节点修改) diff --git a/ansible/files/traefik-custom-ports/traefik-custom-ports.yaml b/ansible/files/traefik-custom-ports/traefik-custom-ports.yaml index ca092a9..9efbf70 100644 --- a/ansible/files/traefik-custom-ports/traefik-custom-ports.yaml +++ b/ansible/files/traefik-custom-ports/traefik-custom-ports.yaml @@ -1,25 +1,25 @@ --- -apiVersion: helm.cattle.io/v1 -kind: HelmChartConfig -metadata: - name: traefik - namespace: kube-system -spec: - valuesContent: |- - ports: - web: - expose: true - websecure: - expose: true +apiVersion: helm.cattle.io/v1 # HelmChartConfig 所在的 API 版本 +kind: HelmChartConfig # HelmChartConfig:用于给 Traefik chart 注入 values +metadata: # 资源标识 + name: traefik # chart 对应的 name(通常与 Traefik chart 一致) + namespace: kube-system # Traefik chart 运行的命名空间 +spec: # 注入 Helm chart 的具体配置 + valuesContent: |- # “字符串形式的 YAML”注入到 Helm values(由 chart 解析) + ports: # Traefik entrypoints/端口暴露配置 + web: # 内置 HTTP entrypoint + expose: true # 启用 web entrypoint 暴露 + websecure: # 内置 HTTPS entrypoint + expose: true # 启用 websecure entrypoint 暴露 # 自定义 HTTP 入口(示例 18080) - web18080: - port: 18080 - expose: - default: true - exposedPort: 18080 + web18080: # 自定义 HTTP entrypoint 名称(示例) + port: 18080 # entrypoint 监听端口(容器端) + expose: # 对外暴露配置 + default: true # 默认启用暴露 + exposedPort: 18080 # 对外映射端口 # 自定义 HTTPS 入口(示例 18443) - websecure18443: - port: 18443 - expose: - default: true - exposedPort: 18443 + websecure18443: # 自定义 HTTPS entrypoint 名称(示例) + port: 18443 # entrypoint 监听端口(容器端) + expose: # 对外暴露配置 + default: true # 默认启用暴露 + exposedPort: 18443 # 对外映射端口 diff --git a/ansible/files/traefik-dashboard-acme/tomcat-acme.yaml b/ansible/files/traefik-dashboard-acme/tomcat-acme.yaml index 69c14a3..1dbe9d0 100644 --- a/ansible/files/traefik-dashboard-acme/tomcat-acme.yaml +++ b/ansible/files/traefik-dashboard-acme/tomcat-acme.yaml @@ -1,94 +1,94 @@ # docs/03-03 第 5 节:Tomcat + test05.jackadam.top 验证 HTTPS(请按需改域名) --- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: tomcat-test05 - namespace: default - labels: - app: tomcat-test05 -spec: - replicas: 1 - selector: - matchLabels: - app: tomcat-test05 - template: - metadata: - labels: - app: tomcat-test05 - spec: - containers: - - name: tomcat - image: tomcat:9.0 +apiVersion: apps/v1 # Deployment API 版本 +kind: Deployment # 工作负载:Deployment +metadata: # Deployment 元信息 + name: tomcat-test05 # Deployment 名称 + namespace: default # 命名空间 + labels: # 标签 + app: tomcat-test05 # 应用标签 +spec: # Deployment 规格 + replicas: 1 # 副本数 + selector: # Deployment 选择器 + matchLabels: # 标签匹配集合 + app: tomcat-test05 # 与模板标签对齐 + template: # Pod 模板 + metadata: # Pod 元信息 + labels: # Pod 标签 + app: tomcat-test05 # 与 selector.matchLabels 对齐 + spec: # Pod 规格 + containers: # 容器列表 + - name: tomcat # 容器名 + image: tomcat:9.0 # Tomcat 镜像版本 # 官方镜像默认 webapps 在 webapps.dist;整目录复制到 webapps(与 Docker Compose cp -a webapps.dist/* webapps 等价) - command: - - sh - - -c - - | + command: # 启动命令(覆盖默认 ENTRYPOINT/CMD) + - sh # 使用 shell + - -c # shell 执行模式 + - | # 多行脚本(内部内容保持原样) set -e CATALINA_HOME=/usr/local/tomcat mkdir -p "${CATALINA_HOME}/webapps" cp -a "${CATALINA_HOME}/webapps.dist/." "${CATALINA_HOME}/webapps/" exec "${CATALINA_HOME}/bin/catalina.sh" run - ports: - - containerPort: 8080 + ports: # 容器端口 + - containerPort: 8080 # Tomcat HTTP 端口 --- -apiVersion: v1 -kind: Service -metadata: - name: tomcat-test05 - namespace: default -spec: - selector: - app: tomcat-test05 - ports: - - port: 8080 - targetPort: 8080 +apiVersion: v1 # Service API 版本 +kind: Service # Service 资源 +metadata: # Service 元信息 + name: tomcat-test05 # Service 名称 + namespace: default # 命名空间 +spec: # Service 规格 + selector: # 后端 Pod 选择器 + app: tomcat-test05 # 选中 app=tomcat-test05 的 Pod + ports: # 端口映射 + - port: 8080 # Service 暴露端口 + targetPort: 8080 # 转发到容器端口 --- # HTTPS(websecure) -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: tomcat-test05-acme - namespace: default - annotations: - traefik.ingress.kubernetes.io/router.entrypoints: websecure - traefik.ingress.kubernetes.io/router.tls.certresolver: cloudflare -spec: - ingressClassName: traefik - tls: - - hosts: - - test05.jackadam.top - rules: - - host: test05.jackadam.top - http: - paths: - - path: / - pathType: Prefix - backend: - service: - name: tomcat-test05 - port: - number: 8080 +apiVersion: networking.k8s.io/v1 # Ingress API 版本 +kind: Ingress # Ingress 资源(HTTPS) +metadata: # Ingress 元信息 + name: tomcat-test05-acme # HTTPS Ingress 名称 + namespace: default # 命名空间 + annotations: # Traefik 注解 + traefik.ingress.kubernetes.io/router.entrypoints: websecure # 使用 HTTPS 入口 + traefik.ingress.kubernetes.io/router.tls.certresolver: cloudflare # 使用 Cloudflare certresolver +spec: # Ingress 规则 + ingressClassName: traefik # 指定 IngressClass + tls: # TLS 配置 + - hosts: # 证书覆盖域名 + - test05.jackadam.top # 域名 + rules: # 路由规则 + - host: test05.jackadam.top # 主机匹配 + http: # HTTP 路由定义 + paths: # 路径列表 + - path: / # 根路径 + pathType: Prefix # 前缀匹配 + backend: # 后端目标 + service: # 后端 Service + name: tomcat-test05 # Service 名称 + port: # Service 端口 + number: 8080 # 端口号 --- # HTTP(web,与 03-02 nginx-matrix-tls 一致:拆成两个 Ingress) -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: tomcat-test05-http - namespace: default - annotations: - traefik.ingress.kubernetes.io/router.entrypoints: web -spec: - ingressClassName: traefik - rules: - - host: test05.jackadam.top - http: - paths: - - path: / - pathType: Prefix - backend: - service: - name: tomcat-test05 - port: - number: 8080 +apiVersion: networking.k8s.io/v1 # Ingress API 版本 +kind: Ingress # Ingress 资源(HTTP) +metadata: # Ingress 元信息 + name: tomcat-test05-http # HTTP Ingress 名称 + namespace: default # 命名空间 + annotations: # Traefik 注解 + traefik.ingress.kubernetes.io/router.entrypoints: web # 使用 HTTP 入口 +spec: # Ingress 规则 + ingressClassName: traefik # 指定 IngressClass + rules: # 路由规则 + - host: test05.jackadam.top # 主机匹配 + http: # HTTP 路由定义 + paths: # 路径列表 + - path: / # 根路径 + pathType: Prefix # 前缀匹配 + backend: # 后端目标 + service: # 后端 Service + name: tomcat-test05 # Service 名称 + port: # Service 端口 + number: 8080 # 端口号 diff --git a/ansible/files/traefik-dashboard-acme/traefik-dashboard-acme.yaml b/ansible/files/traefik-dashboard-acme/traefik-dashboard-acme.yaml index c64bc62..9773a34 100644 --- a/ansible/files/traefik-dashboard-acme/traefik-dashboard-acme.yaml +++ b/ansible/files/traefik-dashboard-acme/traefik-dashboard-acme.yaml @@ -1,62 +1,83 @@ -# 03-03 Traefik Dashboard + ACME 合并配置(HelmChartConfig) -# 含:Dashboard、ACME(Cloudflare DNS-01)、ping、PROXY protocol(与 03-02 一致) -# 使用前:替换 ,创建 cloudflare-api-token Secret,按实际修改 nodeSelector/trustedIPs -# 部署:kubectl apply -f traefik-dashboard-acme.yaml +# 03-03 Traefik Dashboard + ACME(唯一清单,推荐) +# ============================================================================= +# 含:HelmChartConfig(local-path 持久化 /data + ACME Cloudflare DNS-01 + Dashboard) +# + IngressRoute(/dashboard、/api) +# acme.json 与 chart persistence 均落在 /data,Pod 重建后证书仍在;nodeSelector 须固定单节点(RWO) +# +# 部署:kubectl apply -f ansible/files/traefik-dashboard-acme/traefik-dashboard-acme.yaml +# 使用前:替换 、nodeSelector 主机名;Secret cloudflare-api-token 已存在(见 03-02) +# 全集群只能有一份 HelmChartConfig metadata.name=traefik +# +# --- 不要 Dashboard 时 --- +# 删除文末 IngressRoute 整段;并在 valuesContent 中删掉 ports(可选)、--api.dashboard、--api.insecure +# +# --- 临时不用持久化(不推荐)--- +# 将 persistence.enabled 改为 false 并删掉 persistence 下其余字段(证书可能随 Pod 丢失) +# ============================================================================= --- -apiVersion: helm.cattle.io/v1 -kind: HelmChartConfig -metadata: - name: traefik - namespace: kube-system -spec: - valuesContent: |- - ports: - web: - expose: true - websecure: - expose: true +apiVersion: helm.cattle.io/v1 # HelmChartConfig 所在的 API 版本 +kind: HelmChartConfig # HelmChartConfig:给 K3s/Helm 注入 values 的资源 +metadata: # 资源标识信息 + name: traefik # chart 对应的 name(需要与 Traefik chart/约定一致) + namespace: kube-system # Traefik 通常运行在 kube-system +spec: # 该资源要注入 chart 的配置 + valuesContent: |- # 以“字符串形式的 YAML”注入到 Helm chart values(由 chart 解析) + ports: # 暴露 entrypoints 给集群入口 + web: # HTTP entrypoint + expose: true # 允许暴露 web + websecure: # HTTPS entrypoint + expose: true # 允许暴露 websecure - additionalArguments: - - "--api.dashboard=true" - - "--api.insecure=true" + persistence: # chart 持久化配置:为 /data 挂载 PVC + enabled: true # 开启持久卷 + name: data # chart 创建/引用的卷名(PVC 等) + accessMode: ReadWriteOnce # RWO:同一时间只能在一个节点挂载 + size: 512Mi # 请求容量(local-path 会据此创建本地卷) + storageClass: local-path # 使用 K3s 的 local-path-provisioner + path: /data # 容器内挂载目录(与 acme.storage 一致) - - "--log.level=INFO" - - "--certificatesresolvers.cloudflare.acme.dnschallenge.resolvers=1.1.1.1:53,1.0.0.1:53" - - "--certificatesresolvers.cloudflare.acme.email=" - - "--certificatesresolvers.cloudflare.acme.storage=/data/acme.json" + additionalArguments: # 额外传给 Traefik 的 CLI 参数 + - "--api.dashboard=true" # 打开 dashboard 功能 + - "--api.insecure=true" # (k8s)允许 dashboard 在入口可用(注意安全) + + - "--log.level=INFO" # 日志级别 + - "--certificatesresolvers.cloudflare.acme.dnschallenge.resolvers=1.1.1.1:53,1.0.0.1:53" # DNS 解析器列表(用于 DNS-01) + - "--certificatesresolvers.cloudflare.acme.email=" # ACME 注册邮箱 + - "--certificatesresolvers.cloudflare.acme.storage=/data/acme.json" # 证书与账户存储(容器内 /data) # - "--certificatesresolvers.cloudflare.acme.caserver=https://acme-staging-v02.api.letsencrypt.org/directory" # 测试用,上线前删除 - - "--certificatesresolvers.cloudflare.acme.dnschallenge.provider=cloudflare" - - "--certificatesresolvers.cloudflare.acme.dnschallenge.propagation.delayBeforeChecks=600" + - "--certificatesresolvers.cloudflare.acme.dnschallenge.provider=cloudflare" # DNS-01 provider:cloudflare + - "--certificatesresolvers.cloudflare.acme.dnschallenge.propagation.delayBeforeChecks=600" # DNS-01 propagation 等待秒数 - - "--ping=true" - - "--ping.entryPoint=websecure" + - "--ping=true" # 开启 ping healthcheck + - "--ping.entryPoint=websecure" # ping 使用 websecure(HTTPS) entrypoint - - "--entrypoints.web.proxyProtocol.trustedIPs=192.168.2.0/24" - - "--entrypoints.websecure.proxyProtocol.trustedIPs=192.168.2.0/24" + - "--entrypoints.web.proxyProtocol.trustedIPs=192.168.2.0/24" # web entrypoint 信任的代理网段 + - "--entrypoints.websecure.proxyProtocol.trustedIPs=192.168.2.0/24" # websecure entrypoint 信任的代理网段 - env: - - name: CF_DNS_API_TOKEN - valueFrom: - secretKeyRef: - name: cloudflare-api-token - key: api-token + env: # 环境变量注入 + - name: CF_DNS_API_TOKEN # 供 Traefik 使用的 Cloudflare Token 环境变量名 + valueFrom: # 从 Secret 挂载 + secretKeyRef: # Secret 引用方式 + name: cloudflare-api-token # Secret 名 + key: api-token # Secret 内 key - nodeSelector: - kubernetes.io/hostname: ylc61 + nodeSelector: # 将 Traefik Pod 固定到指定节点(避免 local-path RWO 迁移导致丢数据) + kubernetes.io/hostname: ylc61 # 目标节点主机名 --- # 显式 IngressRoute(与 03-01 一致,确保 /dashboard 可达; Helm ingressRoute.dashboard 在 K3s chart 中未必生效) -apiVersion: traefik.io/v1alpha1 -kind: IngressRoute -metadata: - name: traefik-dashboard - namespace: kube-system -spec: - entryPoints: - - web - routes: - - match: PathPrefix(`/dashboard`) || PathPrefix(`/api`) - kind: Rule - services: - - name: api@internal - kind: TraefikService +apiVersion: traefik.io/v1alpha1 # IngressRoute API 版本 +kind: IngressRoute # Traefik 路由 CRD +metadata: # IngressRoute 元信息 + name: traefik-dashboard # 路由名称 + namespace: kube-system # 命名空间 +spec: # IngressRoute 规则 + entryPoints: # 入口点列表 + - web # 使用 web(HTTP) 入口 + routes: # 路由规则列表 + - match: PathPrefix(`/dashboard`) || PathPrefix(`/api`) # 匹配 Dashboard/API 路径前缀 + kind: Rule # 规则类型 + services: # 后端服务 + - name: api@internal # Traefik 内置 API 服务 + kind: TraefikService # 服务类型 + diff --git a/ansible/files/traefik-dashboard/traefik-dashboard.yaml b/ansible/files/traefik-dashboard/traefik-dashboard.yaml index f68775b..4e61a04 100644 --- a/ansible/files/traefik-dashboard/traefik-dashboard.yaml +++ b/ansible/files/traefik-dashboard/traefik-dashboard.yaml @@ -1,37 +1,37 @@ # 03-01 Traefik Dashboard(HelmChartConfig + IngressRoute) # 部署:kubectl apply -f traefik-dashboard.yaml(或复制到 K3s server/manifests/) --- -apiVersion: helm.cattle.io/v1 -kind: HelmChartConfig -metadata: - name: traefik - namespace: kube-system -spec: - valuesContent: |- - ports: - web: - expose: true - websecure: - expose: true - traefik: - expose: true +apiVersion: helm.cattle.io/v1 # HelmChartConfig 所在的 API 版本 +kind: HelmChartConfig # HelmChartConfig:给 K3s/Traefik chart 注入 values +metadata: # 该资源标识 + name: traefik # chart 对应的对象名 + namespace: kube-system # HelmChartConfig 所在命名空间 +spec: # Helm 注入配置 + valuesContent: |- # 以“字符串形式的 YAML”注入到 chart values(由 chart 解析) + ports: # entrypoint/端口暴露相关配置 + web: # HTTP entrypoint + expose: true # 启用 web 暴露 + websecure: # HTTPS entrypoint + expose: true # 启用 websecure 暴露 + traefik: #(chart 内部)traefik dashboard/管理端口暴露(按 chart 约定) + expose: true # 启用 traefik 管理端口暴露 - additionalArguments: - - "--api.dashboard=true" - - "--api.insecure=true" + additionalArguments: # 额外传给 Traefik 的 CLI 参数 + - "--api.dashboard=true" # 开启 dashboard 功能 + - "--api.insecure=true" # 允许 dashboard 在入口可用(不建议在公网直接放行) --- -apiVersion: traefik.io/v1alpha1 -kind: IngressRoute -metadata: - name: traefik-dashboard - namespace: kube-system -spec: - entryPoints: - - web - routes: - - match: PathPrefix(`/dashboard`) || PathPrefix(`/api`) - kind: Rule - services: - - name: api@internal - kind: TraefikService +apiVersion: traefik.io/v1alpha1 # IngressRoute 的 API 版本 +kind: IngressRoute # Traefik 的路由 CRD +metadata: # IngressRoute 标识 + name: traefik-dashboard # 资源名 + namespace: kube-system # IngressRoute 所在命名空间 +spec: # 路由规则 + entryPoints: # 进入的 entrypoint + - web # 使用 web(HTTP)入口 + routes: # 路由列表 + - match: PathPrefix(`/dashboard`) || PathPrefix(`/api`) # 匹配 /dashboard 或 /api 前缀 + kind: Rule # 匹配规则类型:Rule + services: # 匹配到后转发给的服务列表 + - name: api@internal # Traefik 内置 API 服务 + kind: TraefikService # 该服务的 CRD 类型 diff --git a/docs/00-00-构建总览.md b/docs/00-00-构建总览.md index f695e1e..d958cad 100644 --- a/docs/00-00-构建总览.md +++ b/docs/00-00-构建总览.md @@ -23,17 +23,16 @@ ## 推荐安装顺序 1. `00-01-k3s-基础概念.md` -2. `01-01-k3s-控制节点含traefik.md`(或直接用 `01-07-节点初始化-ansible-实践.md` 一键自动化) +2. `01-01-k3s-控制节点含traefik.md`(或直接用 `01-06-节点初始化-ansible-实践.md` 一键自动化) 3. `01-02-k3s-工作节点.md` 4. `01-03-armv7-standalone-docker.md` -5. `01-04-cloudflare-tunnel.md` -6. `01-08-openwrt-haproxy.md`(按需:网关负载均衡) -7. `04-03-k3s-nginx-demo.md` -8. `04-01-k3s-nodejs-高级部署.md` -9. `04-02-nodejs-镜像与运行命令.md` -10. `04-03-nodejs-环境变量与配置注入.md` -11. `04-04-nodejs-端口与Service.md` -12. `04-05-nodejs-资源请求与限制.md` +5. `01-07-openwrt-haproxy.md`(按需:网关负载均衡) +6. `04-03-k3s-nginx-demo.md` +7. `04-01-k3s-nodejs-高级部署.md` +8. `04-02-nodejs-镜像与运行命令.md` +9. `04-03-nodejs-环境变量与配置注入.md` +10. `04-04-nodejs-端口与Service.md` +11. `04-05-nodejs-资源请求与限制.md` 13. `04-06-nodejs-探针与健康检查.md` 14. `04-07-nodejs-调度与亲和.md` 15. `04-08-nodejs-安全上下文.md` @@ -51,7 +50,7 @@ 27. `03-05-k3s-local-path-pvc.md`(K3s 自带 local-path,单副本本地持久化) 28. `03-06-k3s-使用nfs存储.md`(按需:已有 NFS 时 PV/PVC) 29. `03-07-k3s-longhorn-持久化存储.md`(重状态、快照/备份,建议部署 GitLab 等前统一规划) -30. `03-08-k3s-ha-集群配置与切换.md`(按需:双控制节点 HA,配合 `01-05`) +30. `03-08-k3s-ha-集群配置与切换.md`(按需:双控制节点 HA,配合 `01-04`) 31. `03-09-k3s-gitops-集群配置管理.md`(框架草案:Argo CD / Flux) > 想确认这些步骤是否已经在真实环境验证,请查看 `00-02-验证矩阵.md`。 @@ -92,11 +91,10 @@ ## 专题导航 - `00-04-部署环境说明.md`(节点布局、IP、OS、K3s 版本等,便于对照与复现) -- `01-07-节点初始化-ansible-实践.md`(Ansible 一键安装 k3s 集群,已验证) -- `01-08-openwrt-haproxy.md`(按需:网关负载均衡) +- `01-06-节点初始化-ansible-实践.md`(Ansible 一键安装 k3s 集群,已验证) +- `01-07-openwrt-haproxy.md`(按需:网关负载均衡) - nginx 矩阵:`ansible/playbooks/nginx-matrix-deploy.yml`(02-05)、`ansible/playbooks/nginx-matrix-tls-deploy.yml`(03-02) -- `01-04-cloudflare-tunnel.md`(安装准备) -- `03-04-k3s-cloudflare-tunnel-配置接入.md`(集群接入) +- `03-04-k3s-cloudflare-tunnel-配置接入.md`(Cloudflare Tunnel 完整流程:Zero Trust + 集群接入) - `05-03-k3s-安装gitlab-含runner.md` - `05-04-k3s-配置gitlab-cicd.md` @@ -105,10 +103,10 @@ - `05-05-prometheus与grafana.md` - `05-07-openclaw应用部署.md` - `05-08-openclaw-k3s-实验部署.md` -- `01-06-armv7-nfs服务安装.md` +- `01-05-armv7-nfs服务安装.md` - `05-06-openlist挂载网盘与自动备份.md` - `06-02-运维小结.md` -- `01-05-双控制节点ha.md` +- `01-04-双控制节点ha.md` - `03-08-k3s-ha-集群配置与切换.md` - `03-09-k3s-gitops-集群配置管理.md`(框架草案) diff --git a/docs/00-01-k3s-基础概念.md b/docs/00-01-k3s-基础概念.md index 27970dd..65952b9 100644 --- a/docs/00-01-k3s-基础概念.md +++ b/docs/00-01-k3s-基础概念.md @@ -1,4 +1,4 @@ -# 00-01-k3s-基础概念 +# 00-01-k3s-基础概念 > 入门速查:先把核心概念看明白,再去做安装与排障。 @@ -106,7 +106,7 @@ K3s 自带 **local-path-provisioner**:当你创建 PVC 且不指定 `storageCl - **工作机制**:PVC 被创建后,provisioner 会在 **Pod 被调度到的节点** 上,在其本地磁盘创建目录(默认在 `data-dir` 下的 `storage`,例如 `/var/lib/rancher/k3s/storage` 或 `/storage`),并为之创建 PV、与 PVC 绑定。 - **绑定到节点**:数据只存在于该节点的本地目录,**与该节点绑定**;Pod 被调度到另一节点时,会拿到新的空卷,旧节点上的数据不会自动迁移。 -- **适用场景**:单副本应用、缓存、日志等,能接受 Pod 漂移后数据丢失或需手动恢复。**多副本共享数据**应使用 NFS、CSI 等共享存储(见 `01-06`)。 +- **适用场景**:单副本应用、缓存、日志等,能接受 Pod 漂移后数据丢失或需手动恢复。**多副本共享数据**应使用 NFS、CSI 等共享存储(见 `01-05`)。 - **查看**:`kubectl get storageclass` 可见 `local-path`(通常为默认);`kubectl get pv,pvc` 可查看已创建的卷。 - **操作示例**:见 `03-05-k3s-local-path-pvc.md`。 @@ -114,7 +114,7 @@ K3s 自带 **local-path-provisioner**:当你创建 PVC 且不指定 `storageCl - **Pod 可以漂移,宿主机本地数据不会跟着漂移**:用 `hostPath` 把宿主机目录挂进容器时,数据只在这台机器上;Pod 被调度到另一台节点后,那台机器没有同样目录和数据,应用就会“丢数据”。 - **K3s 不会自动帮你搬本地数据**:调度器只管 Pod 放哪台节点,不会同步 `/var/lib/...` 或自建目录;所以“节点故障自动漂移”和“数据高可用”是两件事,要分别设计。 -- **常见做法**:重要数据用共享存储(NFS / 云盘 / CSI),通过 PV/PVC 给 Pod 用(参考 `01-06`、`03-07`);缓存、临时文件用本地目录(`emptyDir` 或 `hostPath`),接受节点挂了可丢;或靠备份/同步把本地目录定期同步到别处,再在新节点恢复。 +- **常见做法**:重要数据用共享存储(NFS / 云盘 / CSI),通过 PV/PVC 给 Pod 用(参考 `01-05`、`03-07`);缓存、临时文件用本地目录(`emptyDir` 或 `hostPath`),接受节点挂了可丢;或靠备份/同步把本地目录定期同步到别处,再在新节点恢复。 **用途**:搞清楚数据放哪、节点挂了会不会丢,才能设计备份和高可用,不踩坑。 diff --git a/docs/00-02-验证矩阵.md b/docs/00-02-验证矩阵.md index 1181ce7..d9d57aa 100644 --- a/docs/00-02-验证矩阵.md +++ b/docs/00-02-验证矩阵.md @@ -30,7 +30,7 @@ - `01-01-k3s-控制节点含traefik.md` - 状态:✅ 已验证 - 备注:Fedora 43 Server + K3s v1.34.5+k3s1,单控制节点 61,已按文档装机并确认 Traefik 入口 404 可达(2026-03-10 左右)。 -- `01-07-节点初始化-ansible-实践.md` +- `01-06-节点初始化-ansible-实践.md` - 状态:✅ 已验证 - 备注:Fedora + K3s,4 节点(ylc61~64),Ansible 一键完成初始化、server/agent 安装、firewalld 基线、Traefik 标签及验证输出(2026-03 左右)。 - `01-02-k3s-工作节点.md` @@ -39,12 +39,9 @@ - `01-03-armv7-standalone-docker.md` - 状态:❓ 未验证 - 备注:待在实际 armv7 设备上按文档安装 Docker 并跑一两个容器后更新。 -- `01-04-cloudflare-tunnel.md` - - 状态:⚠️ 部分验证 - - 备注:Cloudflare 控制台端(Tunnel/域名)已实践使用,需在新环境对完整安装准备流程再跑一遍。 -- `01-08-openwrt-haproxy.md` + `01-07-openwrt-haproxy.md` - 状态:✅ 已验证 - - 备注:ImmortalWrt + HAProxy 18080/18443;经 `scripts/01-08-verify-haproxy.sh`(ssh onecloud 第三方 curl)验证;cfg 语法、HTTP/HTTPS 后端正确;可选 `--deploy-matrix http|tls` 一键部署矩阵。 + - 备注:ImmortalWrt + HAProxy 18080/18443;经 `scripts/01-07-verify-haproxy.sh`(ssh onecloud 第三方 curl)验证;cfg 语法、HTTP/HTTPS 后端正确;可选 `--deploy-matrix http|tls` 一键部署矩阵。 --- @@ -83,10 +80,10 @@ - 备注:02-05 的升级版(TLS 矩阵 + ACME);2026-03 实机跑通。 - `03-03-k3s-traefik-dashboard-acme.md` - 状态:✅ 已验证 - - 备注:03-01 Dashboard 与 03-02 ACME 合并配置已核对;模板 `ansible/files/traefik-dashboard-acme/traefik-dashboard-acme.yaml` 正确。实机 apply 需确保 acme.json 持久化或集群 DNS 可达 Let's Encrypt;可经 `scripts/03-verify-traefik-dashboard-acme.sh` 验证。2026-03。 -- `03-04-k3s-cloudflare-tunnel-配置接入.md` - - 状态:⚠️ 部分验证 - - 备注:cloudflared 侧部署与 Tunnel 接入已在其他项目跑通过,本实验室集群的完整接入流程待实机验证。 + - 备注:03-01 Dashboard 与 03-02 ACME 合并配置已核对;模板 `ansible/files/traefik-dashboard-acme/traefik-dashboard-acme.yaml` 正确(已含 local-path persistence)。实机 apply 需确保集群 DNS 可达 Let's Encrypt;可经 `scripts/03-verify-traefik-dashboard-acme.sh` 验证。2026-03。 +- `03-04-k3s-cloudflare-tunnel-配置接入.md` + - 状态:✅ 已验证 + - 备注:本实验室集群完整流程(Zero Trust、Public Hostname、cloudflared Pod、`traefik.kube-system.svc.cluster.local:80`、Dashboard 子域 + `/dashboard/` 访问)已实机跑通(2026-03)。 - `03-05-k3s-local-path-pvc.md` - 状态:❓ 未验证 - 备注:K3s 自带 local-path-provisioner,PVC 本地持久化;待实机验证。 @@ -105,10 +102,10 @@ ### 可选:依赖文档 -- `01-05-双控制节点ha.md` +- `01-04-双控制节点ha.md` - 状态:❓ 未验证 - 备注:文档已拆分安装/配置流程,尚未在双控制节点 + 外部 LB 的完整场景下全链路验证。 -- `01-06-armv7-nfs服务安装.md` +- `01-05-armv7-nfs服务安装.md` - 状态:❓ 未验证 - 备注:NFS 安装命令已经过以往经验验证,本仓库对应 armv7 环境需再跑一遍确认导出与权限。 diff --git a/docs/00-03-未来规划与待补功能.md b/docs/00-03-未来规划与待补功能.md index fcabe3d..a27f00f 100644 --- a/docs/00-03-未来规划与待补功能.md +++ b/docs/00-03-未来规划与待补功能.md @@ -1,4 +1,4 @@ -# 00-03-未来规划与待补功能 +# 00-03-未来规划与待补功能 > 给未来的自己:这里不是“必须现在就做完”的清单,而是把你已经想到、但还没系统实现的能力先写下来,等有时间再一项项补。 @@ -34,7 +34,7 @@ - 节点初始化、K3s 配置和应用部署以“手工 + scripts/”为主。 - 没有一套“从裸机/虚机到完整环境”的幂等自动化流程。 - **规划方向** - - **节点侧**:✅ 已完成 `01-07-节点初始化-ansible-实践.md`,Ansible 一键完成初始化 + k3s 安装 + firewalld 基线 + Traefik 标签(含 8472/udp、6443/tcp 端口开放)。 + - **节点侧**:✅ 已完成 `01-06-节点初始化-ansible-实践.md`,Ansible 一键完成初始化 + k3s 安装 + firewalld 基线 + Traefik 标签(含 8472/udp、6443/tcp 端口开放)。 - **集群侧**:引入 GitOps(Argo CD / Flux 二选一)管理: - K3s 核心配置与 CRD; - Ingress/IngressRoute、Traefik 配置; @@ -87,7 +87,7 @@ - 只向极少数管理设备开放; - 主要用途为 SSH、kubeconfig、底层网络排障。 - **建议文档** - - `01-08-wireguard-运维vpn-接入与实践.md` + - `01-07-wireguard-运维vpn-接入与实践.md` ## 7. 其他可选实验方向 diff --git a/docs/00-04-部署环境说明.md b/docs/00-04-部署环境说明.md index 92babd4..25bbb37 100644 --- a/docs/00-04-部署环境说明.md +++ b/docs/00-04-部署环境说明.md @@ -22,13 +22,13 @@ | ------- | ----------------- | --------------------------- | | OS | Fedora 43 Server (CoreOS) | 其他 RHEL 系 / Debian 系按文档说明适配 | | K3s | v1.34.5+k3s1 | 来自 get.k3s.io 默认 | -| Ansible | ansible-core 2.18 | 用于 `01-07` 自动化安装 | +| Ansible | ansible-core 2.18 | 用于 `01-06` 自动化安装 | ## 3. 网络与存储 - **网段**:192.168.2.0/24 -- **可选**:OpenWrt 网关(如 192.168.2.1)上配置 HAProxy 负载均衡,将 80/443 转发到 K3s 节点,见 `01-08-openwrt-haproxy.md` +- **可选**:OpenWrt 网关(如 192.168.2.1)上配置 HAProxy 负载均衡,将 80/443 转发到 K3s 节点,见 `01-07-openwrt-haproxy.md` - **数据盘方案**:`/storage`,server 与 worker 均使用 `--data-dir=/storage` - **token 路径**:`/storage/server/token` @@ -51,5 +51,5 @@ ## 6. 验证时间 -- 2026-03:4 节点集群按 `01-07` 一次性安装成功,各节点 Traefik 入口 404 可达。 +- 2026-03:4 节点集群按 `01-06` 一次性安装成功,各节点 Traefik 入口 404 可达。 diff --git a/docs/01-01-k3s-控制节点含traefik.md b/docs/01-01-k3s-控制节点含traefik.md index eb99250..f2762bc 100644 --- a/docs/01-01-k3s-控制节点含traefik.md +++ b/docs/01-01-k3s-控制节点含traefik.md @@ -2,7 +2,7 @@ > 在控制节点安装 K3s Server,确认基础组件与 Traefik 可用。 > -> 若需一键自动化安装多节点集群,可直接用 `01-07-节点初始化-ansible-实践.md`。 +> 若需一键自动化安装多节点集群,可直接用 `01-06-节点初始化-ansible-实践.md`。 ## 前置条件 @@ -20,7 +20,7 @@ K3s 默认将数据(含 local-path 卷)放在 `--data-dir` 下。系统盘 | **方案一(默认)** | `/var/lib/rancher/k3s` | 系统盘空间充足 | | **方案二(数据盘)** | `/storage` | 系统盘小,数据盘单独挂载在 `/storage` | -> 自定义 `/storage` 仅解决单节点内系统盘/数据盘分离;节点或数据盘重建后数据不会自动迁移,高可用与备份见 `01-05`、`06-03`。 +> 自定义 `/storage` 仅解决单节点内系统盘/数据盘分离;节点或数据盘重建后数据不会自动迁移,高可用与备份见 `01-04`、`06-03`。 ## 操作步骤 @@ -42,7 +42,7 @@ curl -sfL https://get.k3s.io | sh - curl -sfL https://get.k3s.io | sh -s - server --data-dir=/storage ``` -- 使用方案二时,token 路径为 `/storage/server/token`(供 01-02 工作节点加入与 01-05 HA 使用)。 +- 使用方案二时,token 路径为 `/storage/server/token`(供 01-02 工作节点加入与 01-04 HA 使用)。 ## 配置 kubectl(供当前用户使用) @@ -169,7 +169,7 @@ forward . 223.5.5.5 8.8.8.8 然后重启 CoreDNS:`kubectl -n kube-system rollout restart deploy/coredns` -> 若使用 Ansible 一键安装(`01-07`),playbook 已自动完成此配置,无需手动修改。 +> 若使用 Ansible 一键安装(`01-06`),playbook 已自动完成此配置,无需手动修改。 ## 下一步 diff --git a/docs/01-02-k3s-工作节点.md b/docs/01-02-k3s-工作节点.md index ae61601..13c9b00 100644 --- a/docs/01-02-k3s-工作节点.md +++ b/docs/01-02-k3s-工作节点.md @@ -3,7 +3,7 @@ > 本文已合并原 `01-02-k3s-工作节点.md`。 > 目标:完成工作节点加入 + Traefik 入口部署基线,并验证「**入口节点集合**的 `:80` 可达」。 > -> 若需一键自动化安装多节点集群,可直接用 `01-07-节点初始化-ansible-实践.md`。 +> 若需一键自动化安装多节点集群,可直接用 `01-06-节点初始化-ansible-实践.md`。 ## 前置条件 @@ -57,11 +57,7 @@ curl -sfL https://get.k3s.io | sh -s - agent \ 早期排障时,曾只在控制节点手工执行过少量临时放行命令即可恢复访问,那是因为当时入口 Pod 全在控制节点、所有回包都经由控制节点; 但在“Traefik 可跑在任意节点、部分节点被选为入口节点”的设计下,每个启用 firewalld 的 k3s 节点都必须持久放行本机 `flannel.1/cni0`,否则一旦入口 Pod 或业务 Pod 调度到该节点,就可能在该节点上重现同类故障。 -在**每台** k3s 节点上分别执行: - -```bash -./scripts/diag/firewalld/setup-k3s-firewalld-interfaces.sh -``` +在**每台** k3s 节点上分别执行(当前不再维护 diag 脚本,使用手动命令或 Ansible): ### 2.2 手动方式(不使用脚本) diff --git a/docs/01-03-armv7-standalone-docker.md b/docs/01-03-armv7-standalone-docker.md index b2abea6..af38ab7 100644 --- a/docs/01-03-armv7-standalone-docker.md +++ b/docs/01-03-armv7-standalone-docker.md @@ -28,4 +28,4 @@ docker ps ## 下一步 - `05-02-onenav首页面板.md` -- `01-06-armv7-nfs服务安装.md` +- `01-05-armv7-nfs服务安装.md` diff --git a/docs/01-04-cloudflare-tunnel.md b/docs/01-04-cloudflare-tunnel.md deleted file mode 100644 index ed89964..0000000 --- a/docs/01-04-cloudflare-tunnel.md +++ /dev/null @@ -1,44 +0,0 @@ -# 01-04-Cloudflare Tunnel - -> 本文只负责 Cloudflare Tunnel 的安装准备与云端侧创建。 -> K3s 侧 `cloudflared` 部署与验证见:`03-04-k3s-cloudflare-tunnel-配置接入.md`。 - ---- - -## 前置条件 - -- 控制节点已就绪:`01-01-k3s-控制节点含traefik.md` -- Traefik 已可用(单节点 K3s 也可使用 Cloudflare Tunnel) -- 域名已托管在 Cloudflare -- 已创建 Cloudflare Zero Trust 账号 - ---- - -## 云端创建 Tunnel - -1. 在 Cloudflare Zero Trust 创建一个 Tunnel -2. 记录 `Tunnel Token` 或凭据 JSON -3. 规划域名映射,例如: - - `git.example.com` -> `http://traefik.kube-system.svc.cluster.local` - - `home.example.com` -> 同上 - -## 安装准备检查 - -- 确认已拿到 Tunnel Token 或凭据文件 -- 确认域名与子域映射规划完成 -- 确认目标入口指向 Traefik(后续在 K3s 中接入) - ---- - -## 注意事项 - -- 没有 token/凭据:回到 Zero Trust 页面重新生成 -- 子域规划混乱:先固定一张映射表再做集群接入 -- 需要部署到 K3s:转到 `03-04-k3s-cloudflare-tunnel-配置接入.md` - ---- - -## 下一步 - -- `03-04-k3s-cloudflare-tunnel-配置接入.md` - diff --git a/docs/01-05-双控制节点ha.md b/docs/01-04-双控制节点ha.md similarity index 96% rename from docs/01-05-双控制节点ha.md rename to docs/01-04-双控制节点ha.md index a12d29b..09c14e1 100644 --- a/docs/01-05-双控制节点ha.md +++ b/docs/01-04-双控制节点ha.md @@ -1,4 +1,4 @@ -# 01-05-双控制节点HA(安装与准备) +# 01-04-双控制节点HA(安装与准备) > 本文只讲双控制节点 HA 的安装前准备与基础环境搭建。 > 具体集群参数切换、server 加入与迁移步骤见 `03-08-k3s-ha-集群配置与切换.md`。 @@ -53,7 +53,7 @@ sudo k3s server \ 1. **确认 worker 节点健康**: - 已按 `01-02-k3s-工作节点.md` 正常加入集群; - 无关键 Pod 仅运行在该节点(可先用 `kubectl drain` 或手动迁移工作负载)。 -2. **在 `01-05` 阶段完成外部 datastore 与 LB 准备**: +2. **在 `01-04` 阶段完成外部 datastore 与 LB 准备**: - 不要立即改动现有 server/worker 的 systemd 配置,只确保 datastore/LB 均已就绪。 3. **在 `03-09` 中按步骤将该 worker 替换为 server**: - 停止该节点上的 `k3s-agent` 服务(或执行官方卸载脚本); diff --git a/docs/01-06-armv7-nfs服务安装.md b/docs/01-05-armv7-nfs服务安装.md similarity index 96% rename from docs/01-06-armv7-nfs服务安装.md rename to docs/01-05-armv7-nfs服务安装.md index 2794494..2c7e831 100644 --- a/docs/01-06-armv7-nfs服务安装.md +++ b/docs/01-05-armv7-nfs服务安装.md @@ -1,4 +1,4 @@ -# 01-06-armv7 NFS 服务安装 +# 01-05-armv7 NFS 服务安装 > 本文只讲 armv7 主机侧 NFS 服务安装与导出配置。 diff --git a/docs/01-07-节点初始化-ansible-实践.md b/docs/01-06-节点初始化-ansible-实践.md similarity index 98% rename from docs/01-07-节点初始化-ansible-实践.md rename to docs/01-06-节点初始化-ansible-实践.md index 0fc03a8..1c9347a 100644 --- a/docs/01-07-节点初始化-ansible-实践.md +++ b/docs/01-06-节点初始化-ansible-实践.md @@ -1,4 +1,4 @@ -# 01-07-节点初始化与 k3s 自动安装(Ansible 实践) +# 01-06-节点初始化与 k3s 自动安装(Ansible 实践) > 目标:给一组已经装好 OS、可以 SSH 的裸金属/虚机,**一键完成基础初始化 + 安装 k3s server/worker**,得到与 `01-01`、`01-02` 文档一致的集群(含 `/storage` 数据盘方案)。 > @@ -22,7 +22,7 @@ - **数据盘**:若使用 `/storage` 方案,需在每台节点上提前挂载数据盘并创建 `/storage`; - 不覆盖: - 从「完全裸铁 + 无系统」开始的 PXE 装机; - - 高级 HA(多 server + 外部 datastore)——仍按 `01-05`、`03-10` 执行。 + - 高级 HA(多 server + 外部 datastore)——仍按 `01-04`、`03-10` 执行。 ## 2. 目录结构 diff --git a/docs/01-08-openwrt-haproxy.md b/docs/01-07-openwrt-haproxy.md similarity index 88% rename from docs/01-08-openwrt-haproxy.md rename to docs/01-07-openwrt-haproxy.md index 60be850..a776f7a 100644 --- a/docs/01-08-openwrt-haproxy.md +++ b/docs/01-07-openwrt-haproxy.md @@ -1,11 +1,11 @@ -# 01-08 OpenWrt HAProxy 负载均衡 +# 01-07 OpenWrt HAProxy 负载均衡 > 在 OpenWrt 上安装并配置 HAProxy,将 80/443 流量转发到 K3s 集群节点(Traefik 入口),实现单一入口与负载均衡。 ## 前置条件 - OpenWrt 与 K3s 节点同网段(如 192.168.2.0/24),OpenWrt 通常为网关(如 192.168.2.1) -- 已完成 `01-02-k3s-工作节点.md` 或 `01-07`,Traefik 入口 80/443 已在各节点可达 +- 已完成 `01-02-k3s-工作节点.md` 或 `01-06`,Traefik 入口 80/443 已在各节点可达 ## 1. 安装 HAProxy @@ -20,9 +20,9 @@ opkg install haproxy 编辑 `/etc/haproxy.cfg` 或包提供的配置路径(部分 OpenWrt 使用 `/etc/haproxy/haproxy.cfg`)。可在 `/etc/init.d/haproxy` 中查看实际配置文件路径。 -**配置目录说明与「cfg 是否正确」的验证层次**:见 `ansible/files/01-08-haproxy/README.md`(**仅语法**:`./scripts/01-08-verify-haproxy.sh --cfg-only`)。 +**配置目录说明与「cfg 是否正确」的验证层次**:见 `ansible/files/01-07-haproxy/README.md`(**仅语法**:`./scripts/01-07-verify-haproxy.sh --cfg-only`)。 -**无健康检查最简配置**:`ansible/files/01-08-haproxy/haproxy-no-check.cfg`(与 Ansible 共用,可复制到 OpenWrt 或通过 playbook 下发)。将 `192.168.2.61`~`192.168.2.64` 按实际 K3s 节点 IP 修改。如需健康检查见第 3 节;如需真实客户端 IP 见第 5 节 PROXY Protocol。 +**无健康检查最简配置**:`ansible/files/01-07-haproxy/haproxy-no-check.cfg`(与 Ansible 共用,可复制到 OpenWrt 或通过 playbook 下发)。将 `192.168.2.61`~`192.168.2.64` 按实际 K3s 节点 IP 修改。如需健康检查见第 3 节;如需真实客户端 IP 见第 5 节 PROXY Protocol。 ## 3. 健康检查 @@ -43,19 +43,19 @@ opkg install haproxy ### 3.2 HTTP(80 明文) -完整配置:`ansible/files/01-08-haproxy/haproxy-http.cfg`。`backend k3s_http` 开头加 `option httpchk GET /`,`k3s_https` 仍为 TCP 检查。 +完整配置:`ansible/files/01-07-haproxy/haproxy-http.cfg`。`backend k3s_http` 开头加 `option httpchk GET /`,`k3s_https` 仍为 TCP 检查。 ### 3.3 TLS(443 握手,`mode tcp`) -完整配置:`ansible/files/01-08-haproxy/haproxy-tls.cfg`。`backend k3s_https` 中加 `option ssl-hello-chk`,做 TLS 握手层检查。 +完整配置:`ansible/files/01-07-haproxy/haproxy-tls.cfg`。`backend k3s_https` 中加 `option ssl-hello-chk`,做 TLS 握手层检查。 ### 3.4 HTTPS(443 应用层,`mode http` + `ssl`) -完整配置:`ansible/files/01-08-haproxy/haproxy-https.cfg`。适用于 **HAProxy 在 443 终结 TLS(由 HAProxy 提供证书)** 的场景(frontend 需 `bind *:443 ssl crt ...`)。需与 Traefik 路由匹配的 `Host`;自签/内网 CA 用 `verify none`,生产建议 `ca-file`。若仍为 TCP 透传,用 3.3 即可。 +完整配置:`ansible/files/01-07-haproxy/haproxy-https.cfg`。适用于 **HAProxy 在 443 终结 TLS(由 HAProxy 提供证书)** 的场景(frontend 需 `bind *:443 ssl crt ...`)。需与 Traefik 路由匹配的 `Host`;自签/内网 CA 用 `verify none`,生产建议 `ca-file`。若仍为 TCP 透传,用 3.3 即可。 ## 4. 启动与验证 -**一键部署**(uhttpd 80/443 + HAProxy 18080/18443):`./scripts/01-08-deploy-openwrt-haproxy.sh`。将 uhttpd 恢复监听 80/443(IPv4+IPv6),HAProxy 部署到 18080/18443,与 LuCI 共存。 +**一键部署**(uhttpd 80/443 + HAProxy 18080/18443):`./scripts/01-07-deploy-openwrt-haproxy.sh`。将 uhttpd 恢复监听 80/443(IPv4+IPv6),HAProxy 部署到 18080/18443,与 LuCI 共存。 ```bash /etc/init.d/haproxy enable @@ -64,13 +64,13 @@ opkg install haproxy 验证:从内网访问 `http://:18080/` 或 `http://:18080/demo-m1/`(家庭私网常用 18080/18443),应能到达 Traefik 与后端。 -**自动验证**:`./scripts/01-08-verify-haproxy-openwrt.sh` 或 `./scripts/01-08-verify-haproxy.sh`。经 **ssh onecloud** 作为第三方发起 curl,验证 `http://:18080` 与 `https://<域名>:18443`(HTTPS 需 `--https-hosts`)。不部署、不改端口;需 OpenWrt HAProxy 已按 18080/18443 配置。可选 `--deploy-matrix http` 或 `--deploy-matrix tls` 一键部署对应 nginx 矩阵后再验证。**验证 HTTPS 时**:可先执行 `./scripts/01-08-deploy-nginx-tls-via-ylc61.sh`,经 ssh ylc61 在控制节点上一键部署 nginx TLS 矩阵,再带 `--https-hosts 'test01.jackadam.top,...'` 验证。验证通过后默认更新 `docs/00-02-验证矩阵.md`(`--no-update-matrix` 跳过)。 +**自动验证**:`./scripts/01-07-verify-haproxy-openwrt.sh` 或 `./scripts/01-07-verify-haproxy.sh`。经 **ssh onecloud** 作为第三方发起 curl,验证 `http://:18080` 与 `https://<域名>:18443`(HTTPS 需 `--https-hosts`)。不部署、不改端口;需 OpenWrt HAProxy 已按 18080/18443 配置。可选 `--deploy-matrix http` 或 `--deploy-matrix tls` 一键部署对应 nginx 矩阵后再验证。**验证 HTTPS 时**:可先执行 `./scripts/01-07-deploy-nginx-tls-via-ylc61.sh`,经 ssh ylc61 在控制节点上一键部署 nginx TLS 矩阵,再带 `--https-hosts 'test01.jackadam.top,...'` 验证。验证通过后默认更新 `docs/00-02-验证矩阵.md`(`--no-update-matrix` 跳过)。 ## 5. PROXY Protocol(可选) 若 Traefik 需获取真实客户端 IP,可在 HAProxy 后端每个 `server` 行添加 `send-proxy-v2`,并在 Traefik 配置 `trustedIPs` 包含 OpenWrt 网段(见 `03-02-k3s-traefik-acme.md`)。 -**完整配置**:`ansible/files/01-08-haproxy/haproxy-proxy-http-tls.cfg`(HTTP 检查 + TLS 检查 + PROXY)。 +**完整配置**:`ansible/files/01-07-haproxy/haproxy-proxy-http-tls.cfg`(HTTP 检查 + TLS 检查 + PROXY)。 Traefik 端需启用 PROXY protocol 监听并信任 OpenWrt 的 IP,否则会报错。UCI 配置需参考 OpenWrt HAProxy 文档中的相应选项。 diff --git a/docs/02-05-nginx-验证矩阵-一键部署.md b/docs/02-05-nginx-验证矩阵-一键部署.md index 55f9121..300011a 100644 --- a/docs/02-05-nginx-验证矩阵-一键部署.md +++ b/docs/02-05-nginx-验证矩阵-一键部署.md @@ -43,7 +43,7 @@ kubectl get pod,svc,ing,ingressroute -n default -o wide ## 验证(用 IP 访问) -直接用入口节点 IP 访问(将 `192.168.2.61` 改为你的入口 IP;按 01-02/01-07 已配 LB 时任选节点 IP)。 +直接用入口节点 IP 访问(将 `192.168.2.61` 改为你的入口 IP;按 01-02/01-06 已配 LB 时任选节点 IP)。 ```bash for path in demo-m1 demo-m2 demo-m3 demo-m4; do diff --git a/docs/03-01-k3s-traefik-dashboard.md b/docs/03-01-k3s-traefik-dashboard.md index bf69915..61fb742 100644 --- a/docs/03-01-k3s-traefik-dashboard.md +++ b/docs/03-01-k3s-traefik-dashboard.md @@ -43,7 +43,7 @@ kubectl -n kube-system rollout status deploy/traefik 3. 验证:一键对全部节点 IP 做 curl 测试(按实际环境修改 IP 列表): ```bash -# 已按 01-02 / 01-07 配置 K3s 默认 LB(Traefik 入口标签 + firewalld 基线),61~64 任一台 :80 均应返回 200/307 +# 已按 01-02 / 01-06 配置 K3s 默认 LB(Traefik 入口标签 + firewalld 基线),61~64 任一台 :80 均应返回 200/307 for ip in 192.168.2.61 192.168.2.62 192.168.2.63 192.168.2.64; do code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 3 "http://${ip}/dashboard/" 2>/dev/null || echo "---") echo "${ip}: ${code}" diff --git a/docs/03-02-k3s-traefik-acme.md b/docs/03-02-k3s-traefik-acme.md index 0f0e3c8..b8740ab 100644 --- a/docs/03-02-k3s-traefik-acme.md +++ b/docs/03-02-k3s-traefik-acme.md @@ -21,7 +21,7 @@ - **Pod / 部署**:ACME 配置通过 `HelmChartConfig` 注入到 **同一个 Traefik Deployment**。**副本数为 chart 默认值 1**(即 `deployment.replicas` 未在 values 里写时默认为 1),所以只有 1 个 Traefik Pod;与 03-01 的 Traefik 是同一套 Deployment,只是 values 里多了 ACME 参数与 env。 - **配置存在哪里**:`HelmChartConfig` 存在 **etcd**(控制节点);K3s 的 chart 控制器据此更新 Traefik 的部署参数,Traefik 进程从 **Kubernetes API** 读取 Ingress/IngressRoute,无需多 Pod 间同步。 -- **ACME 存储(证书与账户)**:`acme.storage` 指向容器内 **`/data/acme.json`**。未配 hostPath 时,K3s 默认会为 Traefik 挂载卷到 `/data`(如 emptyDir 或默认持久卷),**仅当前这一个 Traefik Pod 可写**,Pod 重建后若卷不持久则需重新申请证书。若在 values 里配置了 **hostPath**(见本页可选配置),则 `/data` 对应宿主机目录,证书写在物理机路径,便于备份与复用;Traefik 仍为 1 个 Pod,不存在多副本间同步 acme.json 的问题。 +- **ACME 存储(证书与账户)**:`acme.storage` 指向容器内 **`/data/acme.json`**。未配 hostPath 时,K3s 默认会为 Traefik 挂载卷到 `/data`(如 emptyDir 或默认持久卷),**仅当前这一个 Traefik Pod 可写**,Pod 重建后若卷不持久则需重新申请证书。若在 values 里配置了 **hostPath**(见本页可选配置),则 `/data` 对应宿主机目录,证书写在物理机路径,便于备份与复用;Traefik 仍为 1 个 Pod,不存在多副本间同步 acme.json 的问题。**推荐**:Dashboard + ACME 场景直接用 **同一份** [`traefik-dashboard-acme.yaml`](../ansible/files/traefik-dashboard-acme/traefik-dashboard-acme.yaml)(已含 **`persistence`(local-path)+ ACME**),见 `03-05-k3s-local-path-pvc.md`。不要 Dashboard 时按该文件头注释删减。 - **第一次部署随机节点、重启后怎么办**:Traefik 未指定 nodeSelector 时,首次会**随机调度**到某一节点。若使用了 **hostPath**,证书只存在于该节点的磁盘上;**Pod 被调度到其他节点**(重启、驱逐、缩容再扩容)时,新节点上的同名 hostPath 是另一块盘,**证书不会跟着走**,可能需重新申请。若希望重启或节点故障后仍保留证书,可:**① 把 Traefik 固定到某一节点**(在 HelmChartConfig 的 `deployment` 下配 `nodeSelector`,例如 `nodeSelector: { kubernetes.io/hostname: ylc61 }(节点名使用短主机名 ylc61~ylc64,便于配合 Cloudflare CDN)`),使 hostPath 始终落在同一台机;**② 或不用 hostPath**,依赖 K3s 默认持久卷(若为 local-path,则卷仍绑定某节点,Pod 重建到同节点可复用);**③ 或改用 NFS 等共享存储**挂到 `/data`,多节点可读同一证书(需自行在 values 里配 PVC/volume)。 --- @@ -90,7 +90,7 @@ kubectl -n kube-system get secret cloudflare-api-token \ > > **文件选择**:K3s 自带的 `traefik.yaml` 会被 K3s 覆盖,**不要修改**。所有自定义配置(ACME、nodeSelector、hostPath 以及其他扩展配置)都应写在 **`traefik-acme.yaml`** 这一份 HelmChartConfig 里,与默认 chart 合并生效。 -1. 在控制节点创建 `traefik-acme.yaml`,推荐放入 K3s manifests 目录(路径同 03-01)。**完整配置见 `ansible/files/traefik-acme/traefik-acme.yaml`**(与 Ansible 共用),复制后替换 `` 等占位符即可。 +1. 在控制节点创建 `traefik-acme.yaml`,推荐放入 K3s manifests 目录(路径同 03-01)。**完整配置见 `ansible/files/traefik-acme/traefik-acme.yaml`**(与 Ansible 共用),复制后替换 `` 等占位符即可。若走 **Dashboard + ACME** 且需 **证书落盘 local-path PVC**,直接用 [`traefik-dashboard-acme.yaml`](../ansible/files/traefik-dashboard-acme/traefik-dashboard-acme.yaml)(已内置 persistence,说明见 `03-05-k3s-local-path-pvc.md`)。**仅 ACME、无 Dashboard** 时仍可用本目录 [`traefik-acme.yaml`](../ansible/files/traefik-acme/traefik-acme.yaml),并自行按 `03-05` 在 Helm values 中增加 `persistence` 块(与 `/data/acme.json` 一致)。 > 将 `` 改为你的邮箱。`/data/acme.json` 为容器内路径;`caserver` 为测试服务器(staging),正式上线前删除该行即切回生产 CA。Traefik 在容器内监听 8000/8443,由 Service 和 svclb 映射到节点 80/443。 > diff --git a/docs/03-03-k3s-traefik-dashboard-acme.md b/docs/03-03-k3s-traefik-dashboard-acme.md index 751ecfe..a1126f8 100644 --- a/docs/03-03-k3s-traefik-dashboard-acme.md +++ b/docs/03-03-k3s-traefik-dashboard-acme.md @@ -20,7 +20,7 @@ kubectl -n kube-system create secret generic cloudflare-api-token \ > 说明:Traefik 的 `HelmChartConfig` 只能有一份,Dashboard 与 ACME 需合并在同一文件中。**ACME 配置基于 03-03 实机验证**(递归 DNS、propagation 等待、ping、PROXY protocol、nodeSelector)。 -创建 `traefik-dashboard-acme.yaml`,推荐放入 K3s manifests 目录(路径同 03-02)。**唯一真源**:[HelmChartConfig 完整 YAML](../ansible/files/traefik-dashboard-acme/traefik-dashboard-acme.yaml),复制后替换 `` 等占位符;或在仓库根执行 `kubectl apply -f ansible/files/traefik-dashboard-acme/traefik-dashboard-acme.yaml`。 +创建 `traefik-dashboard-acme.yaml`,推荐放入 K3s manifests 目录(路径同 03-02)。**唯一真源**(已含 **`persistence`(local-path)+ ACME + Dashboard + IngressRoute**,证书落盘 `/data/acme.json`):[`traefik-dashboard-acme.yaml`](../ansible/files/traefik-dashboard-acme/traefik-dashboard-acme.yaml);复制后替换 `` 等占位符,或在仓库根执行 `kubectl apply -f ansible/files/traefik-dashboard-acme/traefik-dashboard-acme.yaml`。细节见 `03-05-k3s-local-path-pvc.md`。 > 将 `` 替换为你的邮箱。正式上线前删除 `caserver` 该行即切回生产 Let's Encrypt。**ACME 排障**(DNS 解析错误、证书解析器不存在等)见 `03-02-k3s-traefik-acme.md` 中「常见问题」与「排查」小节。 @@ -145,5 +145,5 @@ sudo rm -f /storage/server/manifests/traefik-dashboard-acme.yaml - `03-02-k3s-traefik-acme.md`:仅 ACME 不合并 Dashboard 时,或 TLS 矩阵(test01~test04)验证、排障详情 - `03-04-k3s-cloudflare-tunnel-配置接入.md`:若需 Cloudflare Tunnel 接入 -- `01-08-openwrt-haproxy.md`:如需调整外部端口/防火墙,参考 HAProxy 监听与转发(第 6 节) +- `01-07-openwrt-haproxy.md`:如需调整外部端口/防火墙,参考 HAProxy 监听与转发(第 6 节) diff --git a/docs/03-04-k3s-cloudflare-tunnel-配置接入.md b/docs/03-04-k3s-cloudflare-tunnel-配置接入.md index 57c5562..4cd7d9d 100644 --- a/docs/03-04-k3s-cloudflare-tunnel-配置接入.md +++ b/docs/03-04-k3s-cloudflare-tunnel-配置接入.md @@ -1,18 +1,53 @@ -# 03-05-k3s Cloudflare Tunnel 配置接入 +# 03-04-k3s Cloudflare Tunnel 配置接入 -> 本文只讲 K3s 侧如何接入 Cloudflare Tunnel(`cloudflared` 部署、验证、排查)。 +> 本文覆盖 Tunnel 完整流程:Zero Trust 云端创建、域名映射,以及将 `cloudflared` 安装到 K3s 并跑起 Pod,使 **Traefik 通过 Tunnel 对外提供服务**。 +> +> **状态:已验证**(2026-03,本仓库实验室 K3s 集群;详见 `00-02-验证矩阵.md`)。 + +--- + +## 访问链路(如何通过 Tunnel 访问 K3s 资源) + +**整体流程**:公网域名 → Cloudflare Edge → Tunnel → `cloudflared` Pod → **Traefik** → 根据 Host/Path 路由到具体 Service(如 Dashboard、GitLab、Homer 等)。 + +Traefik 是唯一入口。所有流量经 Tunnel 进入后,由 Traefik 的 IngressRoute/Ingress 按 `Host` 和 `Path` 分发到不同后端。**先保证 Traefik 内有对应路由**(如 Dashboard 的 IngressRoute),再在 Zero Trust 中把域名指到 Traefik,即可访问。 + +--- ## 前置条件 -- 已完成 `01-04-cloudflare-tunnel.md` -- 已拿到 Tunnel Token 或凭据文件 -- Traefik 已可用(单节点/多节点均可) +- 控制节点已就绪:`01-01-k3s-控制节点含traefik.md` +- Traefik 已可用;若要通过 Tunnel 访问 Dashboard,需先部署 `03-01-k3s-traefik-dashboard.md` 或 `03-03-k3s-traefik-dashboard-acme.md` +- 域名已托管在 Cloudflare,且 Nameserver 已指向 Cloudflare +- 已创建 Cloudflare Zero Trust 账号 -## 操作步骤 +--- -1. 在 K3s 中创建保存 token/凭据的 Secret + Deployment。**唯一真源**:[`ansible/files/cloudflare-tunnel/cloudflared.yaml`](../ansible/files/cloudflare-tunnel/cloudflared.yaml)(替换 `TUNNEL_TOKEN` 占位符)。 +## 云端创建 Tunnel(Zero Trust 操作说明) -2. 部署 `cloudflared` 并确保重启后自动生效(按实际路径选择其一复制执行): +### 1. 创建 Tunnel + +1. 登录 [Cloudflare Zero Trust Dashboard](https://one.dash.cloudflare.com/) +2. 左侧导航:**Networks** → **Tunnels**(或 **Connectors** → **Cloudflare Tunnels**) +3. 点击 **Create a tunnel** +4. 选择 **Cloudflared** 作为 Connector 类型 +5. 输入 Tunnel 名称(如 `k3s-lab`),点击 **Save tunnel** + +### 2. 复制 Tunnel Token + +1. 在 Tunnel 创建成功后,会进入 **Install connector** 页面 +2. 选择操作系统(如 Linux) +3. 在安装命令中,找到形如 `cloudflared tunnel run --token <长串 Token>` 的 **Token** +4. **复制整个 Token**(点击复制图标,或手动选中),妥善保存 +5. 该 Token 将用于下方 K3s 中 `cloudflared` 部署 + +> 若已关闭页面:在 Tunnels 列表中点击该 Tunnel → **Configure** → **Install connector**,可重新查看/生成 Token。 + +### 3. 部署 cloudflared 到 K3s + +1. 从 **唯一真源** 复制清单:[`ansible/files/cloudflare-tunnel/cloudflared.yaml`](../ansible/files/cloudflare-tunnel/cloudflared.yaml) +2. 将 `TUNNEL_TOKEN` 占位符替换为前述 Zero Trust 中复制的 Token +3. 应用并等待 Pod 就绪(按实际 manifests 路径选择其一): ```bash # 默认路径 @@ -26,25 +61,170 @@ kubectl apply -f /storage/server/manifests/cloudflared.yaml kubectl -n kube-system rollout status deploy/cloudflared ``` -3. 将 `cloudflared.yaml` 放入上述 manifests 目录后,K3s 重启时会自动加载。 +4. 将 `cloudflared.yaml` 放入上述 manifests 目录后,K3s 重启时会自动加载。 建议要点: - 使用官方 `cloudflared` 镜像 - Secret 不写死在明文 YAML - `cloudflared` 放在 `kube-system` 或专用 namespace +- Tunnel 指向的 URL 在 Zero Trust 中配置为 Traefik Service,无需在 `cloudflared.yaml` 内指定 -## 验证命令 +#### 集群内 Traefik 地址(Public Hostname 的 URL 填什么) + +Tunnel 后端应指向 **集群内的 Traefik 入口**,常用写法: + +| 写法 | 说明 | +|------|------| +| `traefik.kube-system.svc.cluster.local:80` | 见下「与哪份 YAML / 哪些字段对应」。**不要**手写 `http://`,Zero Trust 里选 HTTP 后只填主机与端口。 | +| `192.168.2.61` | 节点 IP(与 Traefik Service **EXTERNAL-IP** 之一、端口 **80** 等价)。Public Hostname 的 URL **只填到 IP/主机**,path 在浏览器访问时带上(见步骤 6)。 | + +**和仓库里哪份 YAML 的关系** + +- 本仓库的 [`cloudflared.yaml`](../ansible/files/cloudflare-tunnel/cloudflared.yaml) **只** 定义 `cloudflared` 的 Deployment/Secret,**不包含** Traefik Service;Tunnel 后端地址写的是 **集群里已存在的 Traefik Service**,不是 `cloudflared.yaml` 里的某一行。 +- Traefik 的 **Service** 由 K3s 内置 Traefik(HelmChart)安装时创建,资源名一般为 **`traefik`**,命名空间 **`kube-system`**。若你改过 chart 或 Service 名,以下 FQDN 与端口要以 **实际 `kubectl get svc` 输出** 为准。 + +**与 `kubectl get svc traefik -o yaml` 里哪些字段对应** + +集群 DNS 完整名规则:`..svc.cluster.local:`。 + +| 你填的片段 | 对应 YAML 路径(`kubectl -n kube-system get svc traefik -o yaml`) | +|------------|-------------------------------------------------------------------| +| `traefik` | `metadata.name` | +| `kube-system` | `metadata.namespace` | +| `:80` | `spec.ports` 里 **name 常为 `web`** 的 `port: 80`(HTTP 入口;若你环境 port 不是 80,Tunnel URL 里端口改成一致) | + +示例(节选,以你集群为准): + +```yaml +metadata: + name: traefik # → FQDN 第一段 + namespace: kube-system # → FQDN 第二段 +spec: + ports: + - name: web + port: 80 # → Tunnel URL 里冒号后的端口 + # ... + type: LoadBalancer # EXTERNAL-IP 为节点 IP 列表时,也可用 IP:80 代替集群 DNS +``` + +**怎么用 kubectl 查(建议逐条执行)** + +```bash +# 1) 表格式:确认 NAME / PORT(S) / 集群 IP +kubectl -n kube-system get svc traefik -o wide + +# 2) 打印集群 DNS 主机名(无端口) +kubectl -n kube-system get svc traefik -o jsonpath='{.metadata.name}.{.metadata.namespace}.svc.cluster.local' +echo + +# 2b) 各端口名与端口(核对 HTTP 一般为 name=web、port=80) +kubectl -n kube-system get svc traefik -o jsonpath='{range .spec.ports[*]}{.name}={.port}{"\n"}{end}' + +# 3) 导出完整 YAML,人工对照 metadata / spec.ports +kubectl -n kube-system get svc traefik -o yaml +``` + +HTTP 入口一般为 **name=`web` 的 `port: 80`**;若你环境端口名不是 `web`,以第 1、2、3 条里 **实际 `port` 数字** 为准,Tunnel URL 中冒号后改为该数字。 + +`cloudflared` 与 Traefik 同集群时,**优先用** `traefik.kube-system.svc.cluster.local:80`,不依赖某台节点 IP 是否变更。 + +#### 临时验证(集群内 curl) + +官方 `cloudflared` 镜像多为 **distroless、无 `sh`**,不要用 `kubectl exec deploy/cloudflared -- sh`。 + +在 **`kube-system`** 起临时 Pod 探测 Traefik(与 Tunnel 后端同源): + +```bash +# 根路径(常见 404,无默认路由时正常) +kubectl run curl-test --rm -n kube-system --restart=Never \ + --image=curlimages/curl:latest -- \ + curl -sS -o /dev/null -w "HTTP %{http_code}\n" \ + http://traefik.kube-system.svc.cluster.local:80/ + +# Dashboard(已按 03-01/03-03 部署时期望 200) +kubectl run curl-test --rm -n kube-system --restart=Never \ + --image=curlimages/curl:latest -- \ + curl -sS -o /dev/null -w "HTTP %{http_code}\n" \ + http://traefik.kube-system.svc.cluster.local:80/dashboard/ +``` + +- **`/` → 404**:多数环境正常(未配置根路径路由)。 +- **`/dashboard/` → 200**:说明集群 DNS 与 Traefik 可达,Public Hostname 可填上述集群内地址。 + +### 4. 验证连接 ```bash kubectl -n kube-system get pods | grep cloudflared kubectl -n kube-system logs deploy/cloudflared --tail=100 ``` +确认 Pod 为 `Running`,日志中可见 `tunnel connected`。**只有 Connector 已连接后**,才能进行下一步域名配置。 + +### 5. 配置域名映射(Public Hostnames / Route tunnel) + +Zero Trust 向导顺序为:选择类型 → 命名 → **安装并运行 Connector** → **路由流量**。需等 Pod 跑起并显示已连接后,再配置 Public Hostnames。 + +1. 在 Tunnel 配置页,切换到 **Public Hostnames**(已发布应用程序)标签,点击 **Add a public hostname** +2. 配置如下: + +| 字段 | 填写说明 | +|--------------------|------------------------------------------------------------------------| +| **Subdomain** | 子域名(如 `k3s`、`git`、`home`),或留空表示根域 | +| **Domain** | 下拉选择已托管在 Cloudflare 的域名(如 `jackadam.top`) | +| **Path** | 留空表示全路径;或填正则如 `^/blog` 做路径匹配 | +| **Service type** | 选择 **HTTP**(集群内 Traefik 为 HTTP,勿选 HTTPS) | +| **URL** | 仅填 `traefik.kube-system.svc.cluster.local:80`,**不要加 `http://`**(含义与核对见上文「集群内 Traefik 地址」) | + +> **重要**:URL 输入框会根据 Service type 自动加协议前缀。选 HTTP 时只需填 `traefik.kube-system.svc.cluster.local:80`;若手写 `http://` 会变成 `http://http://...`,导致「服务 URL 无效」。 + +3. 点击 **Save hostname**,按需重复添加其他子域,均指向同一内部地址。 + +**示例**:Subdomain `k3s` + Domain `jackadam.top` → 公网 `k3s.jackadam.top` 访问 Traefik;不同子域由 Traefik 的 IngressRoute 按 Host 分发。 + +### 6. 快速验证:以 Dashboard 为例 + +若已按 03-01 或 03-03 部署 Traefik Dashboard,按上述步骤 5 添加一条 Public Hostname。**Traefik 无需修改**。 + +> **Public Hostname 的 URL 只能写到「主机 + 端口」**,不能写成 `192.168.2.61/dashboard` 这类带 path 的地址;控制台也不支持在 URL 里做 path patch/转发。路径由浏览器访问时带上,由 Traefik 按路由匹配。 + +**Dashboard 专用子域**(示例) + +| 字段 | 填写值 | +|----------------|---------------------------------------------| +| Subdomain | `dashboard` | +| Domain | `jackadam.top`(或你的域名) | +| Path | 留空 | +| Service type | HTTP | +| URL | `traefik.kube-system.svc.cluster.local:80` 或 `192.168.2.61`(仅主机或集群 DNS,**勿**在 URL 里写 `/dashboard`) | + +将 `192.168.2.61` 换成你的 Traefik 入口节点 IP(与 `kubectl get svc traefik` 中 EXTERNAL-IP 之一一致即可)。 + +**访问时在域名后带上 path**:浏览器打开 **`https://dashboard.jackadam.top/dashboard/`**(路径 `/dashboard/` 由 Traefik 的 Dashboard IngressRoute 处理)。 + +--- + +**其他用法**:单域名 `k3s.jackadam.top`(URL 同样只填到 `traefik...:80` 或节点 IP),访问时带路径,如 `https://k3s.jackadam.top/dashboard/`;或为每个应用单独配子域。 + +--- + +## 架构说明 + +- **流量路径**:公网 → Cloudflare Edge → Tunnel → `cloudflared` Pod → **Traefik Service** → 各 IngressRoute 后端 +- **配置要点**:Public Hostname 的 URL 为 `traefik.kube-system.svc.cluster.local:80`(Service type 选 HTTP),`cloudflared` 与 Traefik 同集群,可直接通过 Service 访问。 + +--- + ## 预期 -- 日志中可见 tunnel connected -- 访问域名可到达 Traefik 路由 +- Pod 为 `Running`,日志中可见 `tunnel connected` +- 配置域名后,访问公网域名可到达 Traefik 路由 + +## 注意事项 + +- 没有 token/凭据:回到 Zero Trust 页面重新生成 +- **顺序**:先跑起 Pod 并确认连接,再配置 Public Hostnames;否则「路由流量」步骤无法生效 +- URL 填写错误:Service type 选 HTTP,URL 只填 `traefik.kube-system.svc.cluster.local:80`,勿加 `http://` ## 失败排查 @@ -52,7 +232,10 @@ kubectl -n kube-system logs deploy/cloudflared --tail=100 - 返回 `404`:通常是 Traefik 路由未命中 - 返回 `502`:优先排查后端链路(`06-01-k3s-networkpolicy-故障排查.md`) +--- + ## 下一步 +- 其他应用(GitLab、Homer 等):在集群内创建 IngressRoute/Ingress 指定 Host 与后端,再在 Zero Trust 中添加对应子域的 Public Hostname 即可 - `05-03-k3s-安装gitlab-含runner.md` - `05-01-k3s-部署homer首页面板.md` diff --git a/docs/03-05-k3s-local-path-pvc.md b/docs/03-05-k3s-local-path-pvc.md index 03fd8be..54caa25 100644 --- a/docs/03-05-k3s-local-path-pvc.md +++ b/docs/03-05-k3s-local-path-pvc.md @@ -1,4 +1,4 @@ -# 03-06-k3s local-path PVC 本地持久化 +# 03-05-k3s local-path PVC 本地持久化 > K3s 自带的 **local-path-provisioner**:通过 PVC 自动创建本地 PersistentVolume,适用于单副本应用、缓存、日志等,无需 NFS 或 Longhorn。 @@ -7,42 +7,178 @@ | 方式 | 共享 | 适用场景 | |------|------|----------| | **local-path**(本页) | 否,单节点 | 单副本应用(Traefik acme.json、单机数据库等),Pod 固定调度到同一节点 | -| **NFS**(`03-08`) | 是,多节点读写 | 多副本共享目录、需跨节点访问 | -| **Longhorn**(`03-09`) | 块存储,CSI | 重状态系统、快照/备份、生产推荐 | +| **NFS**(`03-06-k3s-使用nfs存储.md`) | 是,多节点读写 | 多副本共享目录、需跨节点访问 | +| **Longhorn**(`03-07-k3s-longhorn-持久化存储.md`) | 块存储,CSI | 重状态系统、快照/备份、生产推荐 | ## 前置条件 - K3s 已安装(local-path-provisioner 默认启用) -- 无额外组件,`kubectl get storageclass` 可见 `local-path`(通常为默认) +- 无额外组件,`kubectl get storageclass` 可见 `local-path`(通常为 default),例如: + +```text +NAME PROVISIONER ... +local-path (default) rancher.io/local-path ... +``` ## 操作步骤 ### 1. 清单(PVC + Deployment) -**唯一真源**:[`ansible/files/local-path-demo/local-path-pvc-demo.yaml`](../ansible/files/local-path-demo/local-path-pvc-demo.yaml)(含 PVC `local-pvc-demo` 与 `nginx-local-pvc-demo` Deployment;`storageClassName` 可省略,K3s 默认多为 `local-path`)。 +**唯一真源**:[`ansible/files/local-path-demo/local-path-pvc-demo.yaml`](../ansible/files/local-path-demo/local-path-pvc-demo.yaml)(含 PVC `local-pvc-demo` 与 `nginx-local-pvc-demo` Deployment;清单内已写 **`storageClassName: local-path`**,与 `kubectl get storageclass` 中名称一致即可)。 ### 2. 应用与验证 +在**本仓库根目录**执行(或把 `-f` 换成清单的绝对路径): + ```bash kubectl apply -f ansible/files/local-path-demo/local-path-pvc-demo.yaml +# 等 Pod 调度、PVC 绑定后再操作(local-path 多为 WaitForFirstConsumer,前几秒 Pending 正常) +kubectl rollout status deploy/nginx-local-pvc-demo --timeout=180s + kubectl get pv,pvc kubectl get pod -o wide kubectl exec deploy/nginx-local-pvc-demo -- sh -c 'echo hello > /usr/share/nginx/html/test.txt' kubectl delete pod -l app=nginx-local-pvc-demo +kubectl rollout status deploy/nginx-local-pvc-demo --timeout=180s kubectl exec deploy/nginx-local-pvc-demo -- cat /usr/share/nginx/html/test.txt # 应仍为 hello ``` +> **勿在 Pod 仍为 Pending 时 `exec`**,否则会报 `does not have a host assigned`。先 `kubectl get pod -o wide` 确认 **NODE** 有值且 **READY 1/1**。 + +### 3. Pending 排查(PVC / Pod 长时间不 Ready) + +```bash +kubectl describe pvc local-pvc-demo -n default +kubectl describe pod -l app=nginx-local-pvc-demo -n default +kubectl get events -n default --sort-by=.lastTimestamp | tail -30 +kubectl get pods -n kube-system | grep -i local-path +kubectl logs -n kube-system deploy/local-path-provisioner --tail=80 2>/dev/null || \ + kubectl logs -n kube-system -l app=local-path-provisioner --tail=80 +``` + +常见原因:**local-path-provisioner** 未就绪或报错、节点磁盘/权限、曾留下异常 PVC。可删除后重试: + +```bash +kubectl delete deploy/nginx-local-pvc-demo -n default --ignore-not-found +kubectl delete pvc local-pvc-demo -n default --ignore-not-found +# 等待 PV 回收后再 apply +kubectl apply -f ansible/files/local-path-demo/local-path-pvc-demo.yaml +kubectl rollout status deploy/nginx-local-pvc-demo --timeout=180s +``` + ## 注意事项 -- **绑定到节点**:PV 创建在 Pod 首次调度到的节点上,Pod 重建后仍会调度到该节点(provisioner 会打 nodeAffinity) +- **WaitForFirstConsumer**:PVC 在 Pod 未调度前可长期 **Pending**;PV 在 **Pod 首次成功调度到某节点后** 才创建,且会带 nodeAffinity,Pod 重建后仍倾向同一节点 - **单副本**:`ReadWriteOnce`,同一 PVC 只能被同一节点上的一个 Pod 挂载;多副本需 NFS 或 Longhorn - **数据路径**:默认在 K3s `--data-dir` 下的 `storage`,如 `/var/lib/rancher/k3s/storage` 或 `/storage` - **回收策略**:`Delete`,删除 PVC 时 PV 及本地目录会被清理 -## Traefik acme.json 示例 +## storageClass: local-path 与「本地路径」说明 -若希望 Traefik 的 ACME 证书走 local-path PVC,需在 HelmChartConfig 的 values 中为 Traefik 配置 volume 与 volumeMount(见 `03-02-k3s-traefik-acme.md` 可选配置)。多数场景下,配合 `nodeSelector` 固定 Traefik 到同一节点,再用 hostPath 或 local-path 均可;无 hostPath 时 K3s 默认会为 Traefik 挂 emptyDir 或默认卷。 +### PVC / StorageClass 里**不能**写宿主机目录 + +在清单里写 `storageClassName: local-path`(或 Traefik Helm `persistence.storageClass: local-path`)只表示:**交给 K3s 自带的 local-path-provisioner 在某一工作节点上自动创建本地目录并绑定 PV**。 + +- **不能**在 PVC 或 StorageClass 里指定「数据必须落在 `/mnt/mydata/xxx`」这类**宿主机绝对路径**。 +- 实际在节点磁盘上的目录,由 **provisioner 的全局配置 + 内部命名规则** 生成;通常位于 K3s `--data-dir` 下的 `storage` 子树(与上文「注意事项」一致)。 + +### Traefik `persistence.path: /data` 是**容器内**挂载点 + +在 [`traefik-dashboard-acme.yaml`](../ansible/files/traefik-dashboard-acme/traefik-dashboard-acme.yaml) 中: + +| 字段 | 含义 | +|------|------| +| `persistence.path: /data` | Traefik **容器内**的挂载目录(Helm chart 把 PVC 挂到这里) | +| `acme.storage=/data/acme.json` | **容器内**证书文件路径,与上面挂载一致 | +| `storageClass: local-path` | 使用哪种**动态供给**方式,不等价于「宿主机路径」 | + +因此:**容器里永远是 `/data/...`**;宿主机上对应哪一块目录,要看该 PVC 绑定的 **PV**。 + +### 如何查看数据实际落在节点的哪个目录 + +PVC 绑定后,用 PV 反查(Traefik 示例在 `kube-system`): + +```bash +# 1) 找到 Traefik 使用的 PVC 名称(chart 创建的 claim 名因版本可能略有差异) +kubectl -n kube-system get pvc + +# 2) 从 PVC 的 Volume / Bound 信息得到 PV 名,再查看 PV(路径在 spec 或 describe 输出中) +kubectl -n kube-system describe pvc <你的-pvc-名> +kubectl describe pv <上一步看到的-pv-名> +kubectl get pv -o yaml # 在 spec 中查 path、hostPath、local、csi.volumeAttributes 等 +``` + +不同 K3s / provisioner 版本字段名可能略有差异,以 `describe` / `yaml` 实际输出为准。 + +### 需要指定「整盘根目录」时:改 local-path-provisioner 配置 + +若希望**某一类节点**上通过 `local-path` 创建的数据统一落在指定根路径下(仍由 provisioner 在根下自动分子目录),可编辑 **`kube-system`** 中的 ConfigMap **`local-path-config`**(键名多为 `config.json`),使用 **`nodePathMap`** 等为节点配置路径。 + +> 修改前建议 `kubectl -n kube-system get configmap local-path-config -o yaml` 备份;改错会导致新 PVC 无法创建。 + +示意(**仅为结构说明,请与集群内现有 JSON 合并修改,勿直接整段覆盖**): + +```json +{ + "nodePathMap": [ + { + "node": "DEFAULT_PATH_FOR_NON_LISTED_NODES", + "paths": ["/var/lib/rancher/k3s/storage"] + }, + { + "node": "你的节点主机名", + "paths": ["/data/k3s-local-path"] + } + ] +} +``` + +保存 ConfigMap 后,通常需 **重启** `local-path-provisioner` 相关负载使配置生效(以你集群实际 Deployment/DaemonSet 名为准): + +```bash +kubectl -n kube-system rollout restart deploy/local-path-provisioner 2>/dev/null || true +``` + +具体字段与默认值以当前 K3s 版本自带的 [rancher local-path-provisioner](https://github.com/rancher/local-path-provisioner) 文档为准。 + +### 与 hostPath 的区别 + +| 方式 | 说明 | +|------|------| +| **storageClass: local-path** | 动态 PV,路径由 provisioner 管理;适合一般工作负载与 Traefik ACME。 | +| **hostPath / 手写 PV** | 在清单里直接绑定节点上某一目录;需自行保证节点一致性与权限,与「local-path StorageClass」不是同一条配置路径。 | + +若 Traefik Helm chart 支持你也可使用其 **`persistence.hostPath`** 类选项(若版本提供),则属于 **显式 hostPath**,与仅写 `local-path` 的用法不同。 + +## Traefik ACME:证书固定到 local-path(推荐) + +ACME 存储路径在配置里已是 **`/data/acme.json`**(见 `03-02`、`03-03`)。K3s 自带 **Traefik Helm chart** 支持 **`persistence`**:开启后由 chart **自动创建 PVC**(`storageClass: local-path`),挂载到 **`/data`**,与 `acme.storage` 一致,**Pod 重建 / 滚动后证书仍在**。 + +**前提**:`nodeSelector` 必须把 Traefik **固定在同一节点**(与 local-path **ReadWriteOnce** 一致);若换节点,需迁卷或重新签发。 + +| 场景 | 唯一真源 | +|------|----------| +| **Dashboard + ACME + local-path(推荐)** | **仅此一份**:[`ansible/files/traefik-dashboard-acme/traefik-dashboard-acme.yaml`](../ansible/files/traefik-dashboard-acme/traefik-dashboard-acme.yaml)(**HelmChart persistence + ACME + Dashboard + IngressRoute**) | + +清单内已含:`persistence.enabled: true`、`storageClass: local-path`、`size: 512Mi`、`path: /data`,与 `acme.storage=/data/acme.json` 一致。部署前替换 **``**、`nodeSelector` 中的 **主机名**;并确保 **`cloudflare-api-token`** Secret 已存在(同 `03-02` / `03-03`)。**不需要 Dashboard** 时按该 YAML 文件头注释删减。 + +> **只能有一份** `HelmChartConfig`(`metadata.name: traefik`)。若曾用旧版无 persistence 的清单,请 **合并为** 上述 `traefik-dashboard-acme.yaml`(或 `traefik-acme.yaml` 并自行补全 persistence),避免多文件重复定义。 + +**应用与核对**(路径按你的 manifests 目录调整): + +```bash +kubectl apply -f ansible/files/traefik-dashboard-acme/traefik-dashboard-acme.yaml + +kubectl -n kube-system rollout status deploy/traefik --timeout=300s +kubectl -n kube-system get pvc | grep -i traefik +kubectl -n kube-system exec deploy/traefik -- ls -la /data/acme.json +kubectl -n kube-system logs deploy/traefik --tail=80 | grep -i acme || true +``` + +从 **emptyDir / 无持久卷** 迁到 PVC 时,若旧 Pod 里已有有效 `acme.json`,可先 `kubectl cp` 备份再切换;否则切换后会按新账户重新向 Let’s Encrypt 申请。 + +更多 ACME 排障见 `03-02-k3s-traefik-acme.md`、`03-03-k3s-traefik-dashboard-acme.md`。 ## 下一步 diff --git a/docs/03-06-k3s-使用nfs存储.md b/docs/03-06-k3s-使用nfs存储.md index 75d34fc..a31a340 100644 --- a/docs/03-06-k3s-使用nfs存储.md +++ b/docs/03-06-k3s-使用nfs存储.md @@ -4,7 +4,7 @@ ## 前置条件 -- 已完成 `01-06-armv7-nfs服务安装.md` +- 已完成 `01-05-armv7-nfs服务安装.md` - 可从 K3s 节点访问 NFS 服务器与导出目录 ## 操作步骤 diff --git a/docs/03-08-k3s-ha-集群配置与切换.md b/docs/03-08-k3s-ha-集群配置与切换.md index 080fd0f..10d3000 100644 --- a/docs/03-08-k3s-ha-集群配置与切换.md +++ b/docs/03-08-k3s-ha-集群配置与切换.md @@ -4,7 +4,7 @@ ## 前置条件 -- 已完成 `01-05-双控制节点ha.md` 安装准备 +- 已完成 `01-04-双控制节点ha.md` 安装准备 - 外部 datastore 与 `6443` LB 已可用 - 已确认可执行变更窗口 @@ -72,7 +72,7 @@ kubectl get pods -A ## 参考 -- `01-05-双控制节点ha.md` +- `01-04-双控制节点ha.md` - `01-01-k3s-控制节点含traefik.md` - `01-02-k3s-工作节点.md` diff --git a/docs/03-09-k3s-gitops-集群配置管理.md b/docs/03-09-k3s-gitops-集群配置管理.md index 6a695a0..c1f7e09 100644 --- a/docs/03-09-k3s-gitops-集群配置管理.md +++ b/docs/03-09-k3s-gitops-集群配置管理.md @@ -1,7 +1,7 @@ -# 03-09-k3s-gitops-集群配置管理(框架草案) +# 03-09-k3s-gitops-集群配置管理(框架草案) > 本文先给出 GitOps 管理 k3s 集群的大致框架,后续可以按需要再细化成完整实践。 -> 目标:在 `01-07` 自动装好 k3s 之后,由 GitOps 工具(Argo CD / Flux)自动把 Traefik、监控、应用等 YAML 下发到集群。 +> 目标:在 `01-06` 自动装好 k3s 之后,由 GitOps 工具(Argo CD / Flux)自动把 Traefik、监控、应用等 YAML 下发到集群。 ## 1. 选型与边界 @@ -43,7 +43,7 @@ homelab-k3s-gitops/ ## 4. 与现有文档的衔接 -- `01-07-节点初始化-ansible-实践.md`:负责从「可 SSH 裸机」到「k3s 就绪」; +- `01-06-节点初始化-ansible-实践.md`:负责从「可 SSH 裸机」到「k3s 就绪」; - 本篇 `03-11`:负责从「k3s 就绪」到「配置由 Git 驱动下发」; - 其他 `02-**`、`04-**`、`05-**` 文档中的部署命令,可以逐步迁移为 GitOps 仓库中的 YAML/Kustomize/Helm 定义。 diff --git a/docs/04-01-k3s-nodejs-高级部署.md b/docs/04-01-k3s-nodejs-高级部署.md index 5465c5a..65ceeea 100644 --- a/docs/04-01-k3s-nodejs-高级部署.md +++ b/docs/04-01-k3s-nodejs-高级部署.md @@ -6,8 +6,9 @@ ## 前置条件 - 已完成 `01-02-k3s-工作节点.md` -- 已完成 `01-04-cloudflare-tunnel.md`(如需外网入口) - 已完成 `03-01-k3s-traefik-dashboard.md`(可选,便于观察路由) +- 已完成 `03-04-k3s-cloudflare-tunnel-配置接入.md`(如需外网入口) + ## 基础部署步骤 diff --git a/docs/06-01-k3s-networkpolicy-故障排查.md b/docs/06-01-k3s-networkpolicy-故障排查.md index a168d97..26292a8 100644 --- a/docs/06-01-k3s-networkpolicy-故障排查.md +++ b/docs/06-01-k3s-networkpolicy-故障排查.md @@ -7,7 +7,7 @@ - 已完成 `01-02-k3s-工作节点.md` - 已有至少一个后端服务(如 nginx/nodejs) -- 可执行 `kubectl` 与排障脚本 +- 可执行 `kubectl` ## 三条先验结论 @@ -30,16 +30,20 @@ curl -I --max-time 3 http://192.168.2.61:80 curl -I --max-time 3 http://192.168.2.62:80 ``` -### 2.3 快速检查脚本 +### 2.3 快速检查命令 ```bash -./scripts/diag/netpol/check-net.sh +kubectl get pod,svc,ing -A +kubectl get networkpolicy -A +kubectl -n kube-system get pods -l app=svclb-traefik -o wide ``` -### 2.4 全链路诊断 +### 2.4 全链路检查(手工) ```bash -./scripts/diag/entrypath/entrypath.sh run ... +# 从入口节点IP验证 HTTP 入口 +curl -I --max-time 3 http://192.168.2.61:80 +curl -I --max-time 3 http://192.168.2.62:80 ``` ## 关键策略建议 @@ -52,15 +56,7 @@ curl -I --max-time 3 http://192.168.2.62:80 ## 已验证修复(Fedora/FCOS) -### 4.1 脚本修复(推荐) - -```bash -./scripts/diag/firewalld/setup-k3s-firewalld-interfaces.sh -``` - -该脚本会把 `flannel.1` 与 `cni0` 纳入 `trusted` 并持久化。 - -### 4.2 手动修复(不使用脚本) +### 4.1 手动修复 ```bash # 1) 临时生效(当前运行时) diff --git a/scripts/01-08-deploy-nginx-tls-via-ylc61.sh b/scripts/01-08-deploy-nginx-tls-via-ylc61.sh deleted file mode 100644 index c128d74..0000000 --- a/scripts/01-08-deploy-nginx-tls-via-ylc61.sh +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env bash -# 经 ssh ylc61 在控制节点上一键部署 nginx TLS 矩阵(M1~M4,test01~04.jackadam.top) -# 用法:./scripts/01-08-deploy-nginx-tls-via-ylc61.sh -# 前置:本机可 ssh 到 ylc61;脚本会同步 ansible + SSH 密钥到 ylc61 后执行 playbook -set -euo pipefail - -ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" -REMOTE_HOST="${REMOTE_HOST:-ylc61}" -REMOTE_USER="${REMOTE_USER:-root}" -REMOTE_REPO="${REMOTE_REPO:-/root/实验室建设}" -SSH_OPTS="-o BatchMode=yes -o ConnectTimeout=10" -SSH_KEY="${ROOT_DIR}/.ssh/id_ed25519_k3s_192.168.2.61" -[[ -f "$SSH_KEY" ]] && SSH_OPTS="$SSH_OPTS -i $SSH_KEY" -SSH_CMD="ssh $SSH_OPTS ${REMOTE_USER}@${REMOTE_HOST}" - -echo "=== 经 ${REMOTE_HOST} 部署 nginx TLS 矩阵 ===" - -# 1. 同步 SSH 密钥到 ylc61(ansible 连接各节点需此) -if [[ -d "${ROOT_DIR}/.ssh" ]]; then - echo "[1/3] 同步 SSH 密钥到 ${REMOTE_HOST}:~/.ssh/..." - $SSH_CMD "mkdir -p /root/.ssh && chmod 700 /root/.ssh" - for k in "${ROOT_DIR}"/.ssh/id_ed25519_k3s_192.168.2.61 "${ROOT_DIR}"/.ssh/id_ed25519_k3s_192.168.2.62 \ - "${ROOT_DIR}"/.ssh/id_ed25519_k3s_192.168.2.63 "${ROOT_DIR}"/.ssh/id_ed25519_k3s_192.168.2.64; do - [[ -f "$k" ]] || continue - scp -q $SSH_OPTS "$k" "${k}.pub" "${REMOTE_USER}@${REMOTE_HOST}:/root/.ssh/" 2>/dev/null || true - done - $SSH_CMD "chmod 600 /root/.ssh/id_ed25519_k3s_* 2>/dev/null || true" -fi - -# 2. 同步 ansible 到远程 -if [[ -d "${ROOT_DIR}/ansible" ]]; then - echo "[2/3] 同步 ansible 到 ${REMOTE_HOST}:${REMOTE_REPO}..." - rsync -az -e "ssh $SSH_OPTS" --delete \ - --exclude='.git' \ - "${ROOT_DIR}/ansible/" \ - "${REMOTE_USER}@${REMOTE_HOST}:${REMOTE_REPO}/ansible/" 2>/dev/null || { - echo " [INFO] rsync 不可用,改用 scp..." - $SSH_CMD "mkdir -p ${REMOTE_REPO}/ansible" - scp -r $SSH_OPTS "${ROOT_DIR}/ansible/"* "${REMOTE_USER}@${REMOTE_HOST}:${REMOTE_REPO}/ansible/" - } -else - echo "[2/3] 假定 ${REMOTE_HOST} 上已有 ${REMOTE_REPO}" -fi - -echo "[3/3] 在 ${REMOTE_HOST} 上执行 ansible-playbook..." -$SSH_CMD "cd ${REMOTE_REPO} && ansible-playbook -i ansible/inventory.ini ansible/playbooks/nginx-matrix-tls-deploy.yml" - -echo "" -echo "[OK] nginx TLS 矩阵已部署。验证:./scripts/01-08-verify-haproxy-openwrt.sh --https-hosts 'test01.jackadam.top,test02.jackadam.top,test03.jackadam.top,test04.jackadam.top'" diff --git a/scripts/01-08-deploy-openwrt-haproxy.sh b/scripts/01-08-deploy-openwrt-haproxy.sh deleted file mode 100644 index db7c723..0000000 --- a/scripts/01-08-deploy-openwrt-haproxy.sh +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env bash -# OpenWrt:uhttpd 改回 80/443(IPv4+IPv6),HAProxy 部署到 18080/18443 -# 用法:./scripts/01-08-deploy-openwrt-haproxy.sh [haproxy-cfg-name] -# cfg-name 默认 haproxy-tls(可选 haproxy-no-check, haproxy-http, haproxy-tls, haproxy-proxy-http-tls) -set -euo pipefail - -ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" -CFG_DIR="${ROOT_DIR}/ansible/files/01-08-haproxy" -SSH_OPENWRT="${SSH_OPENWRT:-openwrt}" -HAPROXY_CFG_NAME="${1:-haproxy-tls}" -HAPROXY_CFG_PATH="${HAPROXY_CFG_PATH:-/etc/haproxy.cfg}" - -echo "=== OpenWrt 部署:uhttpd 80/443 + HAProxy 18080/18443(${HAPROXY_CFG_NAME})===" - -# 1. uhttpd 恢复 80/443(IPv4 + IPv6) -echo "[1/4] 配置 uhttpd 监听 0.0.0.0:80、[::]:80、0.0.0.0:443、[::]:443..." -ssh "$SSH_OPENWRT" "bash -s" <<'UHTTPD' -set -e -# 清除旧 listen 并设置新的 -uci delete uhttpd.main.listen_http 2>/dev/null || true -uci delete uhttpd.main.listen_https 2>/dev/null || true -uci add_list uhttpd.main.listen_http='0.0.0.0:80' -uci add_list uhttpd.main.listen_http='[::]:80' -uci add_list uhttpd.main.listen_https='0.0.0.0:443' -uci add_list uhttpd.main.listen_https='[::]:443' -uci commit uhttpd -/etc/init.d/uhttpd restart -echo " uhttpd 已重启" -UHTTPD - -# 2. 停止 HAProxy(释放 80/443,避免与 uhttpd 冲突) -echo "[2/4] 停止 HAProxy..." -ssh "$SSH_OPENWRT" "/etc/init.d/haproxy stop 2>/dev/null || true" - -# 3. 拷贝 HAProxy cfg 并校验 -SRC_CFG="${CFG_DIR}/${HAPROXY_CFG_NAME}.cfg" -if [[ ! -f "$SRC_CFG" ]]; then - echo "[ERR] 配置文件不存在: $SRC_CFG" >&2 - exit 1 -fi - -echo "[3/4] 拷贝 ${HAPROXY_CFG_NAME}.cfg 到 ${SSH_OPENWRT}:${HAPROXY_CFG_PATH}..." -scp -q -O "$SRC_CFG" "${SSH_OPENWRT}:/tmp/haproxy-new.cfg" 2>/dev/null || { - scp -q "$SRC_CFG" "${SSH_OPENWRT}:/tmp/haproxy-new.cfg" -} - -ssh "$SSH_OPENWRT" "haproxy -c -f /tmp/haproxy-new.cfg" || { - echo "[ERR] HAProxy 配置语法校验失败" >&2 - exit 1 -} -ssh "$SSH_OPENWRT" "mv /tmp/haproxy-new.cfg ${HAPROXY_CFG_PATH}" - -# 4. 启动 HAProxy -echo "[4/4] 启动 HAProxy..." -ssh "$SSH_OPENWRT" "/etc/init.d/haproxy start" -ssh "$SSH_OPENWRT" "/etc/init.d/haproxy enable" - -echo "" -echo "[OK] 部署完成。验证:./scripts/01-08-verify-haproxy-openwrt.sh" -echo " - uhttpd: 80/443(IPv4+IPv6)" -echo " - HAProxy: 18080/18443" diff --git a/scripts/01-08-update-verify-matrix.py b/scripts/01-08-update-verify-matrix.py deleted file mode 100644 index d53bff5..0000000 --- a/scripts/01-08-update-verify-matrix.py +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python3 -"""仅更新 docs/00-02-验证矩阵.md 中 01-08-openwrt-haproxy 条目(避免 sed 范围误伤)。""" -import re -import sys -from pathlib import Path - -def main() -> int: - root = Path(__file__).resolve().parent.parent - matrix = root / "docs" / "00-02-验证矩阵.md" - if len(sys.argv) > 1: - matrix = Path(sys.argv[1]) - today = sys.argv[2] if len(sys.argv) > 2 else __import__("datetime").date.today().isoformat() - - text = matrix.read_text(encoding="utf-8") - pattern = re.compile( - r"(- `01-08-openwrt-haproxy\.md`\s*\n\s+- )状态:[^\n]+(\s*\n\s+- )备注:[^\n]+", - re.MULTILINE, - ) - repl = ( - rf"\1状态:✅ 已验证\2备注:ImmortalWrt + HAProxy 18080/18443;经 `scripts/01-08-verify-haproxy.sh` " - rf"(ssh onecloud 第三方 curl)验证;cfg 语法、HTTP/HTTPS 后端正确;可选 `--deploy-matrix http|tls` 一键部署矩阵({today})。" - ) - new_text, n = pattern.subn(repl, text, count=1) - if n != 1: - print("[WARN] 未找到 01-08 条目或格式已变,跳过更新", file=sys.stderr) - return 1 - matrix.write_text(new_text, encoding="utf-8", newline="\n") - print(f"[OK] 已更新 {matrix}") - return 0 - -if __name__ == "__main__": - sys.exit(main()) diff --git a/scripts/01-08-verify-haproxy-openwrt.sh b/scripts/01-08-verify-haproxy-openwrt.sh deleted file mode 100644 index 148d6b5..0000000 --- a/scripts/01-08-verify-haproxy-openwrt.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash -# 调用 01-08-verify-haproxy.sh,传入家庭私网默认参数(18080/18443、onecloud 第三方验证) -# 不部署、不改端口;需 OpenWrt HAProxy 已按 18080/18443 配置 -set -euo pipefail -ROOT="$(cd "$(dirname "$0")/.." && pwd)" -exec "$ROOT/scripts/01-08-verify-haproxy.sh" \ - --verify-host onecloud \ - --openwrt-ip 192.168.2.1 \ - --http-port 18080 \ - --https-port 18443 \ - "$@" diff --git a/scripts/01-08-verify-haproxy.sh b/scripts/01-08-verify-haproxy.sh deleted file mode 100644 index 3b94d1f..0000000 --- a/scripts/01-08-verify-haproxy.sh +++ /dev/null @@ -1,211 +0,0 @@ -#!/usr/bin/env bash -# HAProxy 配置与后端验证(OpenWrt 18080/18443,第三方 onecloud curl) -# 核心:ansible/files/01-08-haproxy/*.cfg 语法正确;可选经 curl 验证运行时与后端 -set -euo pipefail - -ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" -CFG_DIR="${ROOT_DIR}/ansible/files/01-08-haproxy" -MATRIX_FILE="${ROOT_DIR}/docs/00-02-验证矩阵.md" -SSH_OPENWRT="${SSH_OPENWRT:-openwrt}" -VERIFY_HOST="${VERIFY_HOST:-onecloud}" -OPENWRT_IP="${OPENWRT_IP:-192.168.2.1}" -HTTP_PORT="${HTTP_PORT:-18080}" -HTTPS_PORT="${HTTPS_PORT:-18443}" -DEPLOY_MATRIX="${DEPLOY_MATRIX:-none}" -HTTPS_HOSTS="" # 逗号分隔,如 test01.jackadam.top,test02.jackadam.top -UPDATE_MATRIX=1 -CFG_ONLY=0 # 仅 haproxy -c 校验本目录 cfg,不跑 curl - -usage() { - cat <<'EOF' -用法: - ./scripts/01-08-verify-haproxy.sh [选项] - -选项: - --cfg-only 仅校验 ansible/files/01-08-haproxy/*.cfg 语法(OpenWrt 上 haproxy -c),不跑 curl - --deploy-matrix 验证前一键部署矩阵(默认 none) - --verify-host curl 执行主机,SSH 目标(默认 onecloud) - --openwrt-ip OpenWrt/网关 IP(默认 192.168.2.1) - --http-port HAProxy HTTP 端口(默认 18080) - --https-port HAProxy HTTPS 端口(默认 18443) - --https-hosts HTTPS 校验域名,M1~M4 对应(缺省时不校验 HTTPS) - --update-matrix 验证通过后更新验证矩阵(默认启用) - --no-update-matrix 不更新验证矩阵 - -h, --help 显示帮助 - -前置:ssh openwrt 可用;完整验证还需 ssh onecloud;OpenWrt HAProxy 已按 18080/18443 配置(运行时验证) -EOF -} - -while [[ $# -gt 0 ]]; do - case "$1" in - --cfg-only) CFG_ONLY=1; shift ;; - --deploy-matrix) DEPLOY_MATRIX="${2:-none}"; shift 2 ;; - --verify-host) VERIFY_HOST="${2:-onecloud}"; shift 2 ;; - --openwrt-ip) OPENWRT_IP="${2:-192.168.2.1}"; shift 2 ;; - --http-port) HTTP_PORT="${2:-18080}"; shift 2 ;; - --https-port) HTTPS_PORT="${2:-18443}"; shift 2 ;; - --https-hosts) HTTPS_HOSTS="${2:-}"; shift 2 ;; - --update-matrix) UPDATE_MATRIX=1; shift ;; - --no-update-matrix) UPDATE_MATRIX=0; shift ;; - -h|--help) usage; exit 0 ;; - *) echo "[ERR] 未知参数: $1"; usage; exit 1 ;; - esac -done - -REMOTE_DIR="/tmp/haproxy-verify" - -if [[ $CFG_ONLY -eq 1 ]]; then - echo "=== HAProxy cfg 语法校验(${SSH_OPENWRT},ansible/files/01-08-haproxy/*.cfg)===" -else - echo "=== HAProxy 验证(${SSH_OPENWRT} → ${VERIFY_HOST} curl ${OPENWRT_IP}:${HTTP_PORT}/${HTTPS_PORT})===" -fi - -# 0. 按需部署矩阵(--cfg-only 时不部署) -if [[ $CFG_ONLY -eq 1 ]]; then - : -elif [[ "$DEPLOY_MATRIX" == "http" ]]; then - echo "[0] 部署 02-05 nginx 矩阵(http)..." - (cd "$ROOT_DIR" && ansible-playbook -i ansible/inventory.ini ansible/playbooks/nginx-matrix-deploy.yml) || { echo "[ERR] nginx-matrix-deploy 失败" >&2; exit 1; } - echo " [OK] HTTP 矩阵已部署" -elif [[ "$DEPLOY_MATRIX" == "tls" ]]; then - echo "[0] 部署 nginx 矩阵 TLS 版..." - (cd "$ROOT_DIR" && ansible-playbook -i ansible/inventory.ini ansible/playbooks/nginx-matrix-tls-deploy.yml) || { echo "[ERR] nginx-matrix-tls-deploy 失败" >&2; exit 1; } - echo " [OK] TLS 矩阵已部署" - [[ -z "$HTTPS_HOSTS" ]] && HTTPS_HOSTS="test01.jackadam.top,test02.jackadam.top,test03.jackadam.top,test04.jackadam.top" -fi - -if [[ ! -d "$CFG_DIR" ]]; then - echo "[ERR] cfg 目录不存在: $CFG_DIR" >&2 - exit 1 -fi - -# 1. 拷贝 cfg 到 OpenWrt(-O 强制旧 SCP 协议,兼容无 sftp-server 的 OpenWrt) -echo "[1/4] 拷贝 cfg 到 ${SSH_OPENWRT}:${REMOTE_DIR}..." -ssh "$SSH_OPENWRT" "mkdir -p ${REMOTE_DIR}" -scp -q -O "${CFG_DIR}"/*.cfg "${SSH_OPENWRT}:${REMOTE_DIR}/" 2>/dev/null || { - echo " [INFO] scp -O 不可用,改用 ssh 管道传输..." - for f in "${CFG_DIR}"/*.cfg; do - bn=$(basename "$f") - ssh "$SSH_OPENWRT" "cat > ${REMOTE_DIR}/${bn}" < "$f" - done -} - -# 2. 语法校验 -echo "[2/4] 校验 cfg 语法..." -SYNTAX_FAIL=0 -for cfg in haproxy-no-check haproxy-http haproxy-tls haproxy-proxy-http-tls; do - if ssh "$SSH_OPENWRT" "haproxy -c -f ${REMOTE_DIR}/${cfg}.cfg" 2>/dev/null; then - echo " [OK] ${cfg}.cfg" - else - echo " [FAIL] ${cfg}.cfg" >&2 - SYNTAX_FAIL=1 - fi -done -if ssh "$SSH_OPENWRT" "haproxy -c -f ${REMOTE_DIR}/haproxy-https.cfg" 2>/dev/null; then - echo " [OK] haproxy-https.cfg(语法;运行需 /etc/ssl/haproxy.pem)" -else - echo " [SKIP] haproxy-https.cfg(缺证书)" -fi - -if [[ $SYNTAX_FAIL -eq 1 ]]; then - echo "[ERR] 部分 cfg 语法校验失败" >&2 - exit 1 -fi - -if [[ $CFG_ONLY -eq 1 ]]; then - echo - echo "[PASS] 本目录 HAProxy cfg 语法校验通过(见 ansible/files/01-08-haproxy/README.md)" - exit 0 -fi - -# 3. SSH onecloud 执行 curl 验证 -echo "[3/4] 经 ${VERIFY_HOST} 验证 HTTP(${OPENWRT_IP}:${HTTP_PORT})..." - -# HTTP:TLS 矩阵(有 --https-hosts)按 Host 验证;否则 02-05 路径 /demo-m1~m4 -if [[ -n "$HTTPS_HOSTS" ]]; then - # TLS 矩阵:按 Host 验证,test01~test04 对应 M1~M4 - IFS=',' read -ra HOSTS <<< "$HTTPS_HOSTS" - HTTP_FAIL=0 - for i in "${!HOSTS[@]}"; do - host="${HOSTS[$i]}" - expect="M$((i+1))" - code=$(ssh "$VERIFY_HOST" "curl -s -o /dev/null -w '%{http_code}' --max-time 5 'http://${host}:${HTTP_PORT}/' --resolve '${host}:${HTTP_PORT}:${OPENWRT_IP}' 2>/dev/null" || echo "000") - body=$(ssh "$VERIFY_HOST" "curl -s --max-time 5 'http://${host}:${HTTP_PORT}/' --resolve '${host}:${HTTP_PORT}:${OPENWRT_IP}' 2>/dev/null" || echo "") - if [[ "$code" != "200" ]]; then - echo " [FAIL] http://${host}:${HTTP_PORT}/ 返回 ${code}" >&2 - HTTP_FAIL=1 - elif [[ "$body" != *"$expect"* ]]; then - echo " [FAIL] http://${host}:${HTTP_PORT}/ body 不含 ${expect}" >&2 - HTTP_FAIL=1 - else - echo " [OK] http://${host}:${HTTP_PORT}/ 200 含 ${expect}" - fi - done -else - # 02-05 路径型 - DEMO_PATHS=(demo-m1:M1 demo-m2:M2 demo-m3:M3 demo-m4:M4) - HTTP_FAIL=0 - for item in "${DEMO_PATHS[@]}"; do - path="${item%%:*}" - expect="${item##*:}" - code=$(ssh "$VERIFY_HOST" "curl -s -o /dev/null -w '%{http_code}' --max-time 5 'http://${OPENWRT_IP}:${HTTP_PORT}/${path}/' 2>/dev/null" || echo "000") - body=$(ssh "$VERIFY_HOST" "curl -s --max-time 5 'http://${OPENWRT_IP}:${HTTP_PORT}/${path}/' 2>/dev/null" || echo "") - if [[ "$code" != "200" ]]; then - echo " [FAIL] /${path}/ 返回 ${code}" >&2 - HTTP_FAIL=1 - elif [[ "$body" != *"$expect"* ]]; then - echo " [FAIL] /${path}/ body 不含 ${expect}" >&2 - HTTP_FAIL=1 - else - echo " [OK] /${path}/ 200 含 ${expect}" - fi - done -fi - -if [[ $HTTP_FAIL -eq 1 ]]; then - echo "[ERR] HTTP 验证失败" >&2 - exit 1 -fi - -# 4. HTTPS 验证(需 --https-hosts,不带 -k 校验证书) -if [[ -n "$HTTPS_HOSTS" ]]; then - echo "[4/4] 经 ${VERIFY_HOST} 验证 HTTPS(域名 :${HTTPS_PORT},校验 ACME 证书)..." - IFS=',' read -ra HOSTS <<< "$HTTPS_HOSTS" - HTTPS_FAIL=0 - for i in "${!HOSTS[@]}"; do - host="${HOSTS[$i]}" - expect="M$((i+1))" - code=$(ssh "$VERIFY_HOST" "curl -s -o /dev/null -w '%{http_code}' --max-time 10 'https://${host}:${HTTPS_PORT}/' --resolve '${host}:${HTTPS_PORT}:${OPENWRT_IP}' 2>/dev/null" || echo "000") - body=$(ssh "$VERIFY_HOST" "curl -s --max-time 10 'https://${host}:${HTTPS_PORT}/' --resolve '${host}:${HTTPS_PORT}:${OPENWRT_IP}' 2>/dev/null" || echo "") - if [[ "$code" != "200" ]]; then - echo " [FAIL] https://${host}:${HTTPS_PORT}/ 返回 ${code}" >&2 - HTTPS_FAIL=1 - elif [[ "$body" != *"$expect"* ]]; then - echo " [FAIL] https://${host}:${HTTPS_PORT}/ body 不含 ${expect}" >&2 - HTTPS_FAIL=1 - else - echo " [OK] https://${host}:${HTTPS_PORT}/ 200 含 ${expect}" - fi - done - if [[ $HTTPS_FAIL -eq 1 ]]; then - echo "[ERR] HTTPS 验证失败" >&2 - exit 1 - fi -else - echo "[4/4] 跳过 HTTPS(未指定 --https-hosts)" -fi - -echo -echo "[PASS] HAProxy 验证通过" - -# 5. 可选:更新验证矩阵 -if [[ $UPDATE_MATRIX -eq 1 ]] && [[ -f "$MATRIX_FILE" ]]; then - TODAY=$(date +%Y-%m-%d) - echo "[INFO] 更新验证矩阵..." - if command -v python3 >/dev/null 2>&1; then - python3 "${ROOT_DIR}/scripts/01-08-update-verify-matrix.py" "$MATRIX_FILE" "$TODAY" || echo " [WARN] 验证矩阵未更新" - else - echo " [WARN] 未找到 python3,请手动更新 docs/00-02-验证矩阵.md" - fi -fi diff --git a/scripts/02-verify-nginx-matrix-individual.sh b/scripts/02-verify-nginx-matrix-individual.sh deleted file mode 100644 index d918aff..0000000 --- a/scripts/02-verify-nginx-matrix-individual.sh +++ /dev/null @@ -1,106 +0,0 @@ -#!/usr/bin/env bash -# 02 系列逐个验证:清理 → 逐个部署 02-01~02-04 → TLS 矩阵 → onecloud 验证 -# 用法:./scripts/02-verify-nginx-matrix-individual.sh -# 前置:ssh ylc61、ssh onecloud 可用;OpenWrt HAProxy 18080/18443 已部署 -set -euo pipefail - -ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" -REMOTE_HOST="${REMOTE_HOST:-ylc61}" -REMOTE_USER="${REMOTE_USER:-root}" -REMOTE_REPO="${REMOTE_REPO:-/root/实验室建设}" -VERIFY_HOST="${VERIFY_HOST:-onecloud}" -OPENWRT_IP="${OPENWRT_IP:-192.168.2.1}" -HTTP_PORT="${HTTP_PORT:-18080}" -HTTPS_PORT="${HTTPS_PORT:-18443}" -KUBECONFIG="${KUBECONFIG:-/etc/rancher/k3s/k3s.yaml}" - -SSH_OPTS="-o BatchMode=yes -o ConnectTimeout=10" -SSH_KEY="${ROOT_DIR}/.ssh/id_ed25519_k3s_192.168.2.61" -[[ -f "$SSH_KEY" ]] && SSH_OPTS="$SSH_OPTS -i $SSH_KEY" -SSH_YLC="ssh $SSH_OPTS ${REMOTE_USER}@${REMOTE_HOST}" - -echo "=== 02 系列 nginx 矩阵逐个验证(${REMOTE_HOST} + ${VERIFY_HOST})===" - -# 1. 同步 SSH 密钥与 nginx-matrix 到 ylc61 -echo "[0] 同步 SSH 密钥与 ansible 到 ${REMOTE_HOST}..." -if [[ -d "${ROOT_DIR}/.ssh" ]]; then - $SSH_YLC "mkdir -p /root/.ssh && chmod 700 /root/.ssh" - for k in "${ROOT_DIR}"/.ssh/id_ed25519_k3s_192.168.2.61 "${ROOT_DIR}"/.ssh/id_ed25519_k3s_192.168.2.62 \ - "${ROOT_DIR}"/.ssh/id_ed25519_k3s_192.168.2.63 "${ROOT_DIR}"/.ssh/id_ed25519_k3s_192.168.2.64; do - [[ -f "$k" ]] || continue - scp -q $SSH_OPTS "$k" "${k}.pub" "${REMOTE_USER}@${REMOTE_HOST}:/root/.ssh/" 2>/dev/null || true - done - $SSH_YLC "chmod 600 /root/.ssh/id_ed25519_k3s_* 2>/dev/null || true" -fi - -$SSH_YLC "mkdir -p ${REMOTE_REPO}/ansible/files" -rsync -az -e "ssh $SSH_OPTS" --delete "${ROOT_DIR}/ansible/files/nginx-matrix/" \ - "${REMOTE_USER}@${REMOTE_HOST}:${REMOTE_REPO}/ansible/files/nginx-matrix/" 2>/dev/null || { - scp -r $SSH_OPTS "${ROOT_DIR}/ansible/files/nginx-matrix/"* \ - "${REMOTE_USER}@${REMOTE_HOST}:${REMOTE_REPO}/ansible/files/nginx-matrix/" -} - -# 2. 清理所有 nginx 相关资源 -echo "[1] 清理 nginx 矩阵(path-based + TLS)..." -$SSH_YLC "KUBECONFIG=${KUBECONFIG} kubectl delete deployment,svc -n default nginx-m1 nginx-m2 nginx-m3 nginx-m4 --ignore-not-found=true" -$SSH_YLC "KUBECONFIG=${KUBECONFIG} kubectl delete ingress -n default nginx-m1 nginx-m3 nginx-m1-http nginx-m3-http --ignore-not-found=true" -$SSH_YLC "KUBECONFIG=${KUBECONFIG} kubectl delete ingressroute -n default nginx-m2 nginx-m4 nginx-m2-http nginx-m4-http --ignore-not-found=true" -$SSH_YLC "KUBECONFIG=${KUBECONFIG} kubectl delete middleware -n default stripprefix-m1 stripprefix-m2 stripprefix-m3 stripprefix-m4 --ignore-not-found=true" -$SSH_YLC "KUBECONFIG=${KUBECONFIG} kubectl delete configmap -n default nginx-m1-html nginx-m2-html nginx-m3-html nginx-m4-html --ignore-not-found=true" -sleep 2 - -# 3. 逐个部署 02-01~02-04 并验证 -MATRIX=( - "01-control-ingress.yaml:demo-m1:M1" - "02-control-ingressroute.yaml:demo-m2:M2" - "03-worker-ingress.yaml:demo-m3:M3" - "04-worker-ingressroute.yaml:demo-m4:M4" -) - -for item in "${MATRIX[@]}"; do - file="${item%%:*}" - rest="${item#*:}" - path="${rest%%:*}" - expect="${rest##*:}" - echo "[2] 部署 ${file}(${path} → ${expect})..." - $SSH_YLC "KUBECONFIG=${KUBECONFIG} kubectl apply -f ${REMOTE_REPO}/ansible/files/nginx-matrix/${file}" - $SSH_YLC "KUBECONFIG=${KUBECONFIG} kubectl wait --for=condition=ready pod -l app=nginx-m${expect#M} -n default --timeout=120s" - code=$(ssh $SSH_OPTS "$VERIFY_HOST" "curl -s -o /dev/null -w '%{http_code}' --max-time 10 'http://${OPENWRT_IP}:${HTTP_PORT}/${path}/' 2>/dev/null" || echo "000") - body=$(ssh $SSH_OPTS "$VERIFY_HOST" "curl -s --max-time 10 'http://${OPENWRT_IP}:${HTTP_PORT}/${path}/' 2>/dev/null" || echo "") - if [[ "$code" != "200" ]]; then - echo " [FAIL] /${path}/ 返回 ${code}" >&2 - exit 1 - fi - if [[ "$body" != *"$expect"* ]]; then - echo " [FAIL] /${path}/ body 不含 ${expect}" >&2 - exit 1 - fi - echo " [OK] /${path}/ 200 含 ${expect}" -done - -# 4. 部署 TLS 矩阵 -echo "[3] 部署 nginx TLS 矩阵..." -"${ROOT_DIR}/scripts/01-08-deploy-nginx-tls-via-ylc61.sh" - -# 5. 验证 HTTPS(test01~04) -echo "[4] 经 ${VERIFY_HOST} 验证 HTTPS(test01~04.jackadam.top:${HTTPS_PORT})..." -HTTPS_HOSTS="test01.jackadam.top,test02.jackadam.top,test03.jackadam.top,test04.jackadam.top" -IFS=',' read -ra HOSTS <<< "$HTTPS_HOSTS" -for i in "${!HOSTS[@]}"; do - host="${HOSTS[$i]}" - expect="M$((i+1))" - code=$(ssh $SSH_OPTS "$VERIFY_HOST" "curl -s -o /dev/null -w '%{http_code}' --max-time 10 'https://${host}:${HTTPS_PORT}/' --resolve '${host}:${HTTPS_PORT}:${OPENWRT_IP}' 2>/dev/null" || echo "000") - body=$(ssh $SSH_OPTS "$VERIFY_HOST" "curl -s --max-time 10 'https://${host}:${HTTPS_PORT}/' --resolve '${host}:${HTTPS_PORT}:${OPENWRT_IP}' 2>/dev/null" || echo "") - if [[ "$code" != "200" ]]; then - echo " [FAIL] https://${host}:${HTTPS_PORT}/ 返回 ${code}" >&2 - exit 1 - fi - if [[ "$body" != *"$expect"* ]]; then - echo " [FAIL] https://${host}:${HTTPS_PORT}/ body 不含 ${expect}" >&2 - exit 1 - fi - echo " [OK] https://${host}:${HTTPS_PORT}/ 200 含 ${expect}" -done - -echo "" -echo "[PASS] 02 系列 nginx 矩阵逐个验证通过(02-01~02-04 HTTP path + TLS domain)" diff --git a/scripts/03-verify-traefik-dashboard-acme.sh b/scripts/03-verify-traefik-dashboard-acme.sh deleted file mode 100644 index 7f3c23d..0000000 --- a/scripts/03-verify-traefik-dashboard-acme.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env bash -# 03-03 Traefik Dashboard + ACME 合并配置验证 -# 用法:./scripts/03-verify-traefik-dashboard-acme.sh [--apply] -# 默认:仅核对模板与当前集群状态;加 --apply 时尝试应用 traefik-dashboard-acme 并验证(可能触发 Traefik 重启,新 Pod 需重新获取证书) -# 前置:03-02 ACME 已部署(含 cloudflare-api-token);ssh ylc61 可用 -set -euo pipefail - -ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" -REMOTE_HOST="${REMOTE_HOST:-ylc61}" -REMOTE_USER="${REMOTE_USER:-root}" -CFG_SRC="${ROOT_DIR}/ansible/files/traefik-dashboard-acme/traefik-dashboard-acme.yaml" -ENTRY_IP="${ENTRY_IP:-192.168.2.61}" -OPENWRT_IP="${OPENWRT_IP:-192.168.2.1}" -HTTPS_PORT="${HTTPS_PORT:-18443}" -DO_APPLY=0 -[[ "${1:-}" == "--apply" ]] && DO_APPLY=1 - -SSH_OPTS="-o BatchMode=yes -o ConnectTimeout=10" -SSH_KEY="${ROOT_DIR}/.ssh/id_ed25519_k3s_192.168.2.61" -[[ -f "$SSH_KEY" ]] && SSH_OPTS="$SSH_OPTS -i $SSH_KEY" -SSH_CMD="ssh $SSH_OPTS ${REMOTE_USER}@${REMOTE_HOST}" -KUBECONFIG="/etc/rancher/k3s/k3s.yaml" - -echo "=== 03-03 Traefik Dashboard + ACME 验证 ===" - -# 1. 核对 traefik-dashboard-acme 模板包含 03-01 + 03-02 要素 -echo "[1/3] 核对模板(dashboard + ACME + ping + PROXY)..." -grep -q "api.dashboard=true" "$CFG_SRC" && grep -q "api.insecure=true" "$CFG_SRC" || { echo " [FAIL] 缺少 dashboard 参数"; exit 1; } -grep -q "certificatesresolvers.cloudflare" "$CFG_SRC" && grep -q "acme.dnschallenge" "$CFG_SRC" || { echo " [FAIL] 缺少 ACME 参数"; exit 1; } -grep -q "ping.entryPoint=websecure" "$CFG_SRC" && grep -q "proxyProtocol.trustedIPs" "$CFG_SRC" || { echo " [FAIL] 缺少 ping/PROXY 参数"; exit 1; } -grep -q "ingressRoute:" "$CFG_SRC" && grep -q "dashboard:" "$CFG_SRC" || true -echo " [OK] 模板包含 03-01 + 03-02 合并要素" - -# 2. 当前集群 ACME 状态 -echo "[2/3] 当前集群 ACME(test01.jackadam.top)..." -CODE=$(curl -sk -o /dev/null -w '%{http_code}' --max-time 10 "https://test01.jackadam.top/" --resolve "test01.jackadam.top:443:${ENTRY_IP}" 2>/dev/null || echo "000") -[[ "$CODE" != "200" ]] && CODE=$(curl -sk -o /dev/null -w '%{http_code}' --max-time 10 "https://test01.jackadam.top:${HTTPS_PORT}/" --resolve "test01.jackadam.top:${HTTPS_PORT}:${OPENWRT_IP}" 2>/dev/null || echo "000") -[[ "$CODE" == "200" ]] && echo " [OK] ACME TLS 200" || echo " [WARN] ACME 返回 ${CODE}" - -# 3. 可选 apply -if [[ $DO_APPLY -eq 1 ]]; then - echo "[3/3] 应用 traefik-dashboard-acme(会触发 Traefik 重启)..." - EMAIL=$($SSH_CMD "KUBECONFIG=${KUBECONFIG} kubectl get helmchartconfig traefik -n kube-system -o jsonpath='{.spec.valuesContent}' 2>/dev/null" | grep -oE 'acme\.email=[^[:space:]\"'"'"']+' | cut -d= -f2 | head -1) - [[ -z "$EMAIL" ]] && EMAIL="" - $SSH_CMD "mkdir -p /tmp/traefik-verify" - scp -q $SSH_OPTS "$CFG_SRC" "${REMOTE_USER}@${REMOTE_HOST}:/tmp/traefik-verify/traefik-dashboard-acme.yaml" - $SSH_CMD "sed -i 's||'"$EMAIL"'|g' /tmp/traefik-verify/traefik-dashboard-acme.yaml" - $SSH_CMD "KUBECONFIG=${KUBECONFIG} kubectl apply -f /tmp/traefik-verify/traefik-dashboard-acme.yaml" - $SSH_CMD "KUBECONFIG=${KUBECONFIG} kubectl -n kube-system rollout status deploy/traefik --timeout=180s" || echo " [WARN] rollout 超时,可检查 Pod 与 ACME 日志" - CODE=$(curl -s -o /dev/null -w '%{http_code}' --max-time 10 "http://${ENTRY_IP}/dashboard/" 2>/dev/null || echo "000") - [[ "$CODE" == "200" || "$CODE" == "307" ]] && echo " [OK] Dashboard 返回 ${CODE}" || echo " [WARN] Dashboard 返回 ${CODE}" -else - echo "[3/3] 跳过 apply(加 --apply 可尝试应用并验证 Dashboard)" -fi - -echo "" -echo "[PASS] 03-03 验证完成" diff --git a/scripts/README.md b/scripts/README.md index b0eb65e..5028005 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -1,71 +1,31 @@ # Scripts 总览 -本目录集中维护 K3s 排障与恢复脚本。统一约定:**在仓库根目录执行**,使用 `./scripts/...` 路径调用。 +本目录集中维护通用运维脚本。统一约定:**在仓库根目录执行**,使用 `./scripts/...` 路径调用。 ## 目录 -- `scripts/01-08-deploy-openwrt-haproxy.sh` - - 一键部署:uhttpd 改回 80/443(IPv4+IPv6),HAProxy 部署到 18080/18443(默认 haproxy-tls) -- `scripts/01-08-deploy-nginx-tls-via-ylc61.sh` - - 经 ssh ylc61 在控制节点上一键部署 nginx TLS 矩阵(M1~M4,test01~04);同步 ansible + SSH 密钥后执行 playbook -- `scripts/03-verify-traefik-dashboard-acme.sh` - - 03-03 配置验证:核对 traefik-dashboard-acme 模板合并 03-01+03-02 要素;检查当前 ACME;可选 `--apply` 尝试应用(会触发 Traefik 重启) -- `scripts/02-verify-nginx-matrix-individual.sh` - - 02 系列逐个验证:清理 → 逐个部署 02-01~02-04(path-based)→ TLS 矩阵 → onecloud 验证 HTTP path + HTTPS domain;验证通过后需手动更新 `docs/00-02-验证矩阵.md` -- `scripts/01-08-verify-haproxy-openwrt.sh` - - 家庭私网默认:调用主脚本,18080/18443、onecloud 第三方验证(见 `docs/01-08-openwrt-haproxy.md`) -- `scripts/01-08-verify-haproxy.sh` - - **核心**:校验 `ansible/files/01-08-haproxy/*.cfg` 在 OpenWrt 上 `haproxy -c` 通过;`--cfg-only` 仅做语法校验、不 curl。完整流程另经 ssh onecloud 验证 HTTP/HTTPS;可选 `--deploy-matrix http|tls`、`--https-hosts`;验证通过可更新验证矩阵 +- `scripts/k3s-delete-lab-stacks.sh` + - **按集群里实际资源**遍历删除:用 `kubectl get` 枚举各命名空间下的 Deployment/Service/Ingress/IngressRoute 等再 `kubectl delete`(**不读仓库 YAML 目录**);默认跳过 `kube-system` 等系统命名空间;`--preview` 只列资源;`--namespaces` 限定 NS;`--with-pvc` / `--with-configmaps` / `--with-secrets`(需 `jq`)按需打开 +- `scripts/cloudflare-delete-acme-challenge-dns.sh` + - 批量删除 Cloudflare 中 `_acme-challenge` 相关 DNS 记录(ACME 残留);环境变量或脚本内 `DEFAULT_*` 配置二选一,环境变量优先;`--dry-run` 仅列出不删除 - `scripts/ssh/setup-k3s-workers-ssh.sh` - - 为 Ansible 自动化准备 SSH:为所有 k3s 节点配置 jack + root 公钥及每节点私钥(配合 `docs/01-07-节点初始化-ansible-实践.md`) -- `scripts/diag/entrypath/entrypath.sh` - - K3s 入口到 Traefik 回包链路诊断主命令 -- `scripts/diag/netpol/check-net.sh` - - NetworkPolicy/连通性快速检查脚本(交互模式,自动输出日志) -- `scripts/diag/recovery/k3s-recovery-reset.sh` - - K3s 排障场景重置与恢复脚本 -- `scripts/diag/firewalld/setup-k3s-firewalld-interfaces.sh` - - 一键写入 `flannel.1` / `cni0` 到 firewalld `trusted`(含持久化) -- `scripts/diag/ssh/setup-ssh-keys.sh` - - 交互式生成并下发 SSH 排障密钥 -- `scripts/diag/ssh/test-ssh.sh` + - 为 Ansible 自动化准备 SSH:为所有 k3s 节点配置 jack + root 公钥及每节点私钥(配合 `docs/01-06-节点初始化-ansible-实践.md`) +- `scripts/ssh/test-ssh.sh` - 验证 worker/client SSH key 登录与 sudo 可用性 ## 从仓库根执行示例 ```bash -# 1) 初始化排障 SSH 密钥(可选) -./scripts/diag/ssh/setup-ssh-keys.sh +# 1) 初始化 worker SSH 密钥(可选) +./scripts/ssh/setup-k3s-workers-ssh.sh # 2) 验证 SSH(建议) -./scripts/diag/ssh/test-ssh.sh +./scripts/ssh/test-ssh.sh -# 3) 写入 firewalld 接口基线(推荐,Fedora/FCOS) -./scripts/diag/firewalld/setup-k3s-firewalld-interfaces.sh - -# 4) 快速检查(交互) -./scripts/diag/netpol/check-net.sh - -# 5) 执行完整入口链路诊断 -./scripts/diag/entrypath/entrypath.sh run \ - --worker-host root@192.168.2.62 \ - --client-host root@192.168.2.63 \ - --worker-ssh-key ~/.ssh/id_ed25519_k3s_diag_worker \ - --client-ssh-key ~/.ssh/id_ed25519_k3s_diag_client \ - --client-ip 192.168.2.63 \ - --lb-ip 192.168.2.62 \ - --remote-check y \ - --capture-mode y \ - --capture-seconds 15 \ - --nft-trace-mode y \ - --nft-trace-seconds 10 \ - --return-trace-mode y \ - --return-trace-seconds 12 \ - --pod-netns-trace-mode y \ - --pod-netns-trace-seconds 12 \ - --non-interactive +# 3) 执行批量部署/配置(示例) +cd ansible +ansible-playbook -i inventory.ini playbooks/k3s-init-and-install.yml ``` ## 说明文档 -- 入口链路诊断详见 `scripts/diag/entrypath/README.md` - 主文档入口详见 `docs/00-00-构建总览.md` diff --git a/scripts/cloudflare-delete-acme-challenge-dns.sh b/scripts/cloudflare-delete-acme-challenge-dns.sh new file mode 100644 index 0000000..74cc14c --- /dev/null +++ b/scripts/cloudflare-delete-acme-challenge-dns.sh @@ -0,0 +1,140 @@ +#!/usr/bin/env bash +# 批量删除 Cloudflare 中 _acme-challenge 相关的 DNS 记录 +# 用法:环境变量 或 脚本内配置 二选一,环境变量优先 +# CF_API_TOKEN=xxx ZONE_NAME=jackadam.top ./scripts/cloudflare-delete-acme-challenge-dns.sh [--dry-run] +# 或在下方配置中填写,直接运行 ./scripts/cloudflare-delete-acme-challenge-dns.sh [--dry-run] + +set -e + +# ---------- 脚本内配置(环境变量未设置时生效)---------- +# Cloudflare API Token:需 Zone → DNS → Read + Edit +# (勿将含真实 Token 的脚本提交到 Git) +DEFAULT_CF_API_TOKEN="" +# 区域:填 ZONE_NAME 或 ZONE_ID 其一 +DEFAULT_ZONE_NAME="jackadam.top" +DEFAULT_ZONE_ID="" +# ------------------------------------ + +# 环境变量优先于脚本配置 +CF_API_TOKEN="${CF_API_TOKEN:-$DEFAULT_CF_API_TOKEN}" +ZONE_NAME="${ZONE_NAME:-$DEFAULT_ZONE_NAME}" +ZONE_ID="${ZONE_ID:-$DEFAULT_ZONE_ID}" + +API_BASE="https://api.cloudflare.com/client/v4" +DRY_RUN=false +BATCH_SIZE=200 + +usage() { + echo "用法: $0 [--dry-run]" + echo " 方式一:环境变量 CF_API_TOKEN=xxx ZONE_NAME=jackadam.top $0" + echo " 方式二:脚本内配置 在 DEFAULT_* 变量中填写后直接运行" + echo " --dry-run 仅列出待删除记录,不执行删除" + exit 1 +} + +for arg in "$@"; do + case "$arg" in + --dry-run) DRY_RUN=true ;; + -h|--help) usage ;; + esac +done + +if [[ -z "${CF_API_TOKEN}" ]]; then + echo "[ERROR] 请设置 CF_API_TOKEN(环境变量或脚本内 DEFAULT_CF_API_TOKEN)" >&2 + usage +fi + +# 若未填 ZONE_ID,用 ZONE_NAME 查询 +if [[ -n "${ZONE_NAME}" && -z "${ZONE_ID}" ]]; then + echo "[INFO] 查询区域 ${ZONE_NAME} 的 ZONE_ID..." + resp=$(curl -s -X GET "${API_BASE}/zones?name=${ZONE_NAME}" \ + -H "Authorization: Bearer ${CF_API_TOKEN}" \ + -H "Content-Type: application/json") + if ! echo "$resp" | jq -e '.success == true' >/dev/null 2>&1; then + echo "[ERROR] 查询区域失败: $(echo "$resp" | jq -r '.errors[0].message // .')" >&2 + exit 1 + fi + ZONE_ID=$(echo "$resp" | jq -r '.result[0].id // empty') + if [[ -z "$ZONE_ID" ]]; then + echo "[ERROR] 未找到区域: ${ZONE_NAME}" >&2 + exit 1 + fi + echo "[INFO] ZONE_ID=${ZONE_ID}" +fi + +if [[ -z "${ZONE_ID}" ]]; then + echo "[ERROR] 请设置 ZONE_NAME 或 ZONE_ID(环境变量或脚本内 DEFAULT_ZONE_*)" >&2 + usage +fi + +# 分页获取所有 DNS 记录,筛选 _acme-challenge +echo "[INFO] 获取 DNS 记录列表..." +all_ids=() +page=1 +per_page=100 + +while true; do + resp=$(curl -s -X GET "${API_BASE}/zones/${ZONE_ID}/dns_records?per_page=${per_page}&page=${page}" \ + -H "Authorization: Bearer ${CF_API_TOKEN}" \ + -H "Content-Type: application/json") + + if ! echo "$resp" | jq -e '.success == true' >/dev/null 2>&1; then + echo "[ERROR] 获取记录失败: $(echo "$resp" | jq -r '.errors[0].message // .')" >&2 + exit 1 + fi + + # 筛选 name 包含 _acme-challenge 的记录 + ids=$(echo "$resp" | jq -r '.result[] | select(.name | contains("_acme-challenge")) | .id') + names=$(echo "$resp" | jq -r '.result[] | select(.name | contains("_acme-challenge")) | "\(.type) \(.name) -> \(.content)"') + while IFS= read -r id; do + [[ -n "$id" ]] && all_ids+=("$id") + done <<< "$ids" + + if [[ -n "$names" ]]; then + echo "$names" | while read -r line; do + [[ -n "$line" ]] && echo " - $line" + done + fi + + fetched=$(echo "$resp" | jq -r '.result | length') + [[ "$fetched" -lt "$per_page" ]] && break + ((page++)) || true +done + +count=${#all_ids[@]} +echo "[INFO] 共找到 ${count} 条 _acme-challenge 相关记录" + +if [[ $count -eq 0 ]]; then + echo "[INFO] 无需删除" + exit 0 +fi + +if [[ "$DRY_RUN" == "true" ]]; then + echo "[DRY-RUN] 未执行删除,以上 ${count} 条记录将在去掉 --dry-run 后被删除" + exit 0 +fi + +# 分批删除(使用 jq 构建 JSON 避免转义问题) +deleted=0 +for ((i=0; i0)) | {deletes: map({id: .})}') + + echo "[INFO] 删除第 $((i/BATCH_SIZE + 1)) 批,共 ${#batch[@]} 条..." + resp=$(curl -s -X POST "${API_BASE}/zones/${ZONE_ID}/dns_records/batch" \ + -H "Authorization: Bearer ${CF_API_TOKEN}" \ + -H "Content-Type: application/json" \ + -d "$body") + + if ! echo "$resp" | jq -e '.success == true' >/dev/null 2>&1; then + echo "[ERROR] 批量删除失败: $(echo "$resp" | jq -r '.errors[0].message // .')" >&2 + echo "$resp" | jq '.' >&2 + exit 1 + fi + deleted=$((deleted + ${#batch[@]})) + echo "[OK] 已删除 ${deleted}/${count} 条" + # 避免 API 限流 + [[ $((i + BATCH_SIZE)) -lt $count ]] && sleep 1 +done + +echo "[DONE] 共删除 ${deleted} 条 _acme-challenge 记录" diff --git a/scripts/diag/entrypath/README.md b/scripts/diag/entrypath/README.md deleted file mode 100644 index 570fcdf..0000000 --- a/scripts/diag/entrypath/README.md +++ /dev/null @@ -1,113 +0,0 @@ -# entrypath 诊断脚本说明 - -`entrypath.sh` 用于排查 `client -> worker:80 -> kube-proxy DNAT -> Traefik Pod` 全链路问题。 - -## 命令 - -```bash -./scripts/diag/entrypath/entrypath.sh [options] -``` - -- `run`:完整检查(默认) -- `preflight`:仅检查依赖与参数环境 -- `capture`:强制开启抓包/trace能力后执行 run -- `analyze --log `:离线分析日志 - -## 关键参数 - -- `--worker-host` / `--client-host` -- `--worker-ssh-key` / `--client-ssh-key` -- `--client-ip` / `--lb-ip` -- `--remote-check y|n` -- `--capture-mode y|n` -- `--nft-trace-mode y|n` -- `--return-trace-mode y|n` -- `--pod-netns-trace-mode y|n` -- `--non-interactive` - -## 日志 - -- root 运行:`/root/netpol-diag-logs/entrypath-*.log` -- 非 root:`~/netpol-diag-logs/entrypath-*.log` - -## 典型用法 - -### 1) 预检查 - -```bash -./scripts/diag/entrypath/entrypath.sh preflight --non-interactive -``` - -### 2) 全功能在线诊断(默认值示例) - -```bash -./scripts/diag/entrypath/entrypath.sh run \ - --worker-host root@192.168.2.62 \ - --client-host root@192.168.2.63 \ - --worker-ssh-key ~/.ssh/id_ed25519_k3s_diag_worker \ - --client-ssh-key ~/.ssh/id_ed25519_k3s_diag_client \ - --client-ip 192.168.2.63 \ - --lb-ip 192.168.2.62 \ - --remote-check y \ - --capture-mode y \ - --capture-seconds 15 \ - --nft-trace-mode y \ - --nft-trace-seconds 10 \ - --return-trace-mode y \ - --return-trace-seconds 12 \ - --pod-netns-trace-mode y \ - --pod-netns-trace-seconds 12 \ - --non-interactive -``` - -### 3) 离线日志判读 - -```bash -./scripts/diag/entrypath/entrypath.sh analyze \ - --log ~/netpol-diag-logs/entrypath-20260310-195812.log -``` - -## 常见陷阱与修复 - -### 1) `62:80` 不通,但 worker 已 DNAT 到 Traefik - -若日志同时出现: - -- `nft 观测到 KUBE-EXT DNAT: yes` -- `ylc61(any) SYN/SYN-ACK: N/0` -- `filter_FORWARD_POLICIES ... reject with icmpx admin-prohibited` - -通常是 `ylc61` 的 firewalld 转发策略阻断 `flannel.1 -> cni0`。 - -修复(推荐): - -```bash -sudo firewall-cmd --zone=trusted --add-interface=flannel.1 -sudo firewall-cmd --zone=trusted --add-interface=cni0 - -sudo firewall-cmd --permanent --zone=trusted --add-interface=flannel.1 -sudo firewall-cmd --permanent --zone=trusted --add-interface=cni0 -sudo firewall-cmd --reload -``` - -### 2) `Worker CNI hostport DNAT 计数未增长` 是否异常 - -不一定。若 nft trace 明确显示走的是 `KUBE-EXT -> KUBE-SVC -> KUBE-SEP`,则 CNI hostport 计数不增长属于正常路径差异,不应作为故障根因。 - -### 3) 成功判据 - -至少满足以下任一组: - -- 客户端对 `http://:80` 返回 `404/200/...`(非连接失败) -- 自动判读中: - - `ylc62(ens18) SYN/SYN-ACK` 为 `N/N` - - `ylc61(any) SYN/SYN-ACK` 为 `N/N` - - `ylc61(cni0) SYN/SYN-ACK` 为 `N/N` - -## 模块划分 - -- `lib/common.sh`:通用工具、参数默认值 -- `lib/k8s_checks.sh`:本地 K8s 基线采样 -- `lib/remote_checks.sh`:远端 worker 采样与复测 -- `lib/capture.sh`:tcpdump / nft / conntrack / pod netns -- `lib/analyze.sh`:实时/离线判读 diff --git a/scripts/diag/entrypath/entrypath.sh b/scripts/diag/entrypath/entrypath.sh deleted file mode 100644 index 084c927..0000000 --- a/scripts/diag/entrypath/entrypath.sh +++ /dev/null @@ -1,144 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -LIB_DIR="${SCRIPT_DIR}/lib" - -source "${LIB_DIR}/common.sh" -source "${LIB_DIR}/k8s_checks.sh" -source "${LIB_DIR}/remote_checks.sh" -source "${LIB_DIR}/capture.sh" -source "${LIB_DIR}/analyze.sh" - -parse_args() { - init_defaults - - if [[ $# -gt 0 ]]; then - case "$1" in - run|preflight|capture|analyze) - COMMAND="$1" - shift - ;; - esac - fi - - while [[ $# -gt 0 ]]; do - case "$1" in - --worker-host) WORKER_HOST="${2:-}"; shift 2 ;; - --client-host) CLIENT_HOST="${2:-}"; shift 2 ;; - --client-ip) CLIENT_IP="${2:-}"; shift 2 ;; - --lb-ip) LB_IP="${2:-}"; shift 2 ;; - --worker-ssh-key) WORKER_SSH_KEY="${2:-}"; shift 2 ;; - --ssh-key) WORKER_SSH_KEY="${2:-}"; shift 2 ;; - --client-ssh-key) CLIENT_SSH_KEY="${2:-}"; shift 2 ;; - --remote-check) DO_REMOTE_ARG="${2:-}"; shift 2 ;; - --capture-mode) CAPTURE_MODE_ARG="${2:-}"; shift 2 ;; - --capture-seconds) CAPTURE_SECONDS="${2:-12}"; shift 2 ;; - --nft-trace-mode) NFT_TRACE_MODE_ARG="${2:-}"; shift 2 ;; - --nft-trace-seconds) NFT_TRACE_SECONDS="${2:-8}"; shift 2 ;; - --return-trace-mode) RETURN_TRACE_MODE_ARG="${2:-}"; shift 2 ;; - --return-trace-seconds) RETURN_TRACE_SECONDS="${2:-10}"; shift 2 ;; - --pod-netns-trace-mode) POD_NETNS_TRACE_MODE_ARG="${2:-}"; shift 2 ;; - --pod-netns-trace-seconds) POD_NETNS_TRACE_SECONDS_ARG="${2:-}"; shift 2 ;; - --non-interactive) NON_INTERACTIVE="1"; shift ;; - --log) ANALYZE_LOG="${2:-}"; shift 2 ;; - -h|--help) usage; exit 0 ;; - *) - echo "[ERR] 未知参数: $1" - usage - exit 1 - ;; - esac - done -} - -cmd_preflight() { - local_preflight_checks - prepare_runtime_context - echo "=== preflight ===" - echo "[OK] 依赖检查通过" - echo "worker_host=${WORKER_HOST}" - echo "client_host=${CLIENT_HOST:-}" - echo "client_ip=${CLIENT_IP}" - echo "lb_ip=${LB_IP}" - echo "worker_ssh_key=${WORKER_SSH_KEY:-}" - echo "client_ssh_key=${CLIENT_SSH_KEY:-}" -} - -cmd_run() { - local_preflight_checks - echo "K3s 全链路一键检查(入口 -> DNAT -> Service -> Endpoint -> NetPol -> 回包)" - echo "建议在 server 节点执行(例如 ylc61)。" - echo - - # 构造针对 IPv4 LB_IP 的 curl 探测命令 - local CURL_HTTP CURL_DESC - CURL_HTTP="curl -I --max-time 3 http://${LB_IP}:80" - CURL_DESC="curl -I --max-time 3 http://${LB_IP}:80" - - prepare_runtime_context - setup_log_file - - say "日志文件: $LOG_FILE" - say "worker SSH key: ${WORKER_SSH_KEY:-}" - say "client SSH key: ${CLIENT_SSH_KEY:-}" - - collect_local_k8s_state - echo - resolve_runtime_modes - collect_remote_worker_state - - echo - echo ">>> 请在第三方客户端(${CLIENT_IP})执行 3 次:${CURL_DESC}" - start_worker_capture - start_worker_nft_trace - start_return_path_trace - start_pod_netns_trace - if [[ -n "${CLIENT_HOST}" ]]; then - say "通过 SSH 自动触发客户端探测: ${CLIENT_HOST}" - run_cmd "Client 自动探测(3次)" ssh "${CLIENT_SSH_OPTS[@]}" "${CLIENT_HOST}" \ - "for i in 1 2 3; do ${CURL_HTTP} || true; sleep 1; done" - elif [[ "${NON_INTERACTIVE}" == "0" ]]; then - read -r -p "完成后按回车继续采样..." - else - echo "[WARN] non-interactive 模式且未提供 --client-host:跳过等待直接采样,可能没有新流量。" - fi - flush_worker_capture - - post_remote_worker_state - run_cmd "Traefik Pod FW 链复测" sudo iptables -L "${TRAEFIK_CHAIN:-KUBE-ROUTER-FORWARD}" -n -v --line-numbers - run_cmd "本机访问目标LB_IP:80(仅供参考,可能本机被kube-proxy劫持)" bash -lc "${CURL_HTTP}" - - print_diag_summary - echo - echo "Traefik pod netns SYN/SYN-ACK: ${POD_NETNS_SYN_COUNT:-0}/${POD_NETNS_SYNACK_COUNT:-0}" - echo - echo "完成。完整日志: ${LOG_FILE}" -} - -cmd_capture() { - DO_REMOTE_ARG="y" - CAPTURE_MODE_ARG="y" - NFT_TRACE_MODE_ARG="y" - RETURN_TRACE_MODE_ARG="y" - POD_NETNS_TRACE_MODE_ARG="y" - NON_INTERACTIVE="1" - cmd_run -} - -cmd_analyze() { - analyze_log_file "${ANALYZE_LOG}" -} - -main() { - parse_args "$@" - case "${COMMAND}" in - run) cmd_run ;; - preflight) cmd_preflight ;; - capture) cmd_capture ;; - analyze) cmd_analyze ;; - *) echo "[ERR] 未知命令: ${COMMAND}"; usage; exit 1 ;; - esac -} - -main "$@" diff --git a/scripts/diag/entrypath/lib/analyze.sh b/scripts/diag/entrypath/lib/analyze.sh deleted file mode 100644 index 1183a18..0000000 --- a/scripts/diag/entrypath/lib/analyze.sh +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env bash - -print_diag_summary() { - echo - echo "===== 自动判读(基于当前计数) =====" - echo "- Traefik Pod FW 链: ${TRAEFIK_CHAIN:-N/A}" - echo "- Traefik REJECT 命中: ${REJECT_PKTS:-0}" - echo "- Traefik NFLOG 命中: ${NFLOG_PKTS:-0}" - echo "- Service web 链: ${TRAEFIK_WEB_SVC_CHAIN:-N/A}" - echo "- Service web endpoint 链: ${TRAEFIK_WEB_SEP_CHAIN:-N/A}" - echo "- Worker CNI hostport链: ${WORKER_CNI_DNAT_CHAIN:-N/A}" - echo "- nft 观测到 KUBE-EXT DNAT: ${NFT_DNAT_HIT:-no}" - echo "- ylc61(any) SYN/SYN-ACK: ${RET_LOCAL_SYN_COUNT:-0}/${RET_LOCAL_SYNACK_COUNT:-0}" - echo "- ylc61(cni0) SYN/SYN-ACK: ${RET_CNI0_SYN_COUNT:-0}/${RET_CNI0_SYNACK_COUNT:-0}" - echo "- ylc62(ens18) SYN/SYN-ACK: ${RET_WORKER_SYN_COUNT:-0}/${RET_WORKER_SYNACK_COUNT:-0}" - echo "- Traefik pod netns SYN/SYN-ACK: ${POD_NETNS_SYN_COUNT:-0}/${POD_NETNS_SYNACK_COUNT:-0}" - - if [[ "${REJECT_PKTS:-0}" =~ ^[0-9]+$ ]] && [[ "${REJECT_PKTS:-0}" -gt 0 ]]; then - echo "[结论] Traefik Pod 防火墙链出现 REJECT 命中,优先检查 kube-system 下 Traefik 相关 Ingress NetworkPolicy。" - elif [[ "${RET_WORKER_SYNACK_COUNT:-0}" -gt 0 ]] && [[ "${RET_LOCAL_SYNACK_COUNT:-0}" -gt 0 ]] && [[ "${RET_CNI0_SYNACK_COUNT:-0}" -gt 0 ]]; then - echo "[结论] 链路已恢复:ylc62/ylc61/cni0 均观测到 SYN-ACK,62:80 已可达 Traefik。" - elif [[ "${NFT_DNAT_HIT:-no}" == "yes" ]] && [[ "${RET_LOCAL_SYN_COUNT:-0}" -gt 0 ]] && [[ "${RET_LOCAL_SYNACK_COUNT:-0}" -eq 0 ]]; then - echo "[结论] 流量已经在 worker 被 KUBE-EXT/KUBE-SVC DNAT 到 Traefik(10.42.0.12:8000),但 ylc61 未观察到 SYN-ACK,优先排查 Traefik Pod/宿主转发回包路径。" - elif [[ -n "${WORKER_CNI_HIT_AFTER:-}" && -n "${WORKER_CNI_HIT_BEFORE:-}" ]] && \ - [[ "${WORKER_CNI_HIT_AFTER}" == "${WORKER_CNI_HIT_BEFORE}" ]]; then - echo "[结论] Worker CNI hostport DNAT 计数未增长。若 nft trace 显示走 KUBE-EXT/KUBE-SVC,这是正常路径提示,不构成故障根因。" - else - echo "[结论] 未观察到 Traefik REJECT 明确命中,优先检查回包链路(ylc61<->ylc62 flannel / ylc62 ens18 出口)。" - fi -} - -analyze_log_file() { - local log_file="$1" - if [[ -z "${log_file}" || ! -f "${log_file}" ]]; then - echo "[ERR] analyze 模式需要有效日志文件: --log " - return 1 - fi - - local has_worker_dnat="no" - local has_firewalld_reject="no" - local has_traefik_reject="no" - local has_syn_no_synack="no" - local has_synack_recovered="no" - - if awk '/KUBE-EXT-.*KUBE-SVC|dnat to 10\.42\./ {hit=1} END{exit !hit}' "${log_file}"; then - has_worker_dnat="yes" - fi - if awk '/filter_FORWARD_POLICIES.*admin-prohibited/ {hit=1} END{exit !hit}' "${log_file}"; then - has_firewalld_reject="yes" - fi - if awk '/Traefik REJECT 命中: [1-9]/ {hit=1} END{exit !hit}' "${log_file}"; then - has_traefik_reject="yes" - fi - if awk '/ylc61\(any\) SYN\/SYN-ACK: [1-9][0-9]*\/0/ {hit=1} END{exit !hit}' "${log_file}"; then - has_syn_no_synack="yes" - fi - if awk '/ylc61\(any\) SYN\/SYN-ACK: [1-9][0-9]*\/[1-9][0-9]*/ {a=1} /ylc62\(ens18\) SYN\/SYN-ACK: [1-9][0-9]*\/[1-9][0-9]*/ {b=1} END{exit !(a&&b)}' "${log_file}"; then - has_synack_recovered="yes" - fi - - echo "===== 日志离线判读 =====" - echo "- 日志文件: ${log_file}" - echo "- 观测到 worker DNAT: ${has_worker_dnat}" - echo "- 观测到 firewalld forward reject: ${has_firewalld_reject}" - echo "- 观测到 Traefik Pod REJECT 命中: ${has_traefik_reject}" - echo "- 观测到 ylc61 SYN 无 SYN-ACK: ${has_syn_no_synack}" - echo "- 观测到链路恢复(有 SYN-ACK): ${has_synack_recovered}" - - if [[ "${has_firewalld_reject}" == "yes" ]]; then - echo "[结论] 高概率为 ylc61 firewalld FORWARD 策略阻断 flannel.1 -> cni0。" - elif [[ "${has_synack_recovered}" == "yes" ]]; then - echo "[结论] 链路已恢复,入口到 Traefik 回包路径正常。" - elif [[ "${has_worker_dnat}" == "yes" && "${has_syn_no_synack}" == "yes" ]]; then - echo "[结论] worker 入站与 DNAT 正常,需优先排查 ylc61 到 Traefik Pod 的转发/回包链路。" - elif [[ "${has_traefik_reject}" == "yes" ]]; then - echo "[结论] Traefik Pod NetworkPolicy 命中拒绝,优先检查 kube-system netpol。" - else - echo "[结论] 日志未出现单一确定根因,建议执行 run/capture 模式重新采样。" - fi -} diff --git a/scripts/diag/entrypath/lib/capture.sh b/scripts/diag/entrypath/lib/capture.sh deleted file mode 100644 index 23c94fb..0000000 --- a/scripts/diag/entrypath/lib/capture.sh +++ /dev/null @@ -1,286 +0,0 @@ -#!/usr/bin/env bash - -CAPTURE_MODE="N" -CAPTURE_SECONDS="12" -CAPTURE_MODE_ARG="" -CAP_FILE_ENS18="" -CAP_PID_ENS18="" -NFT_TRACE_MODE="N" -NFT_TRACE_SECONDS="8" -NFT_TRACE_MODE_ARG="" -NFT_FILE="" -NFT_PID="" -NFT_TRACE_TABLE="diag_k3s_entrypath" -LOCAL_NFT_TRACE_TABLE="diag61_k3s_entrypath" -RETURN_TRACE_MODE="N" -RETURN_TRACE_SECONDS="10" -RETURN_TRACE_MODE_ARG="" -RET_FILE_LOCAL_8000="" -RET_FILE_LOCAL_CNI0="" -RET_FILE_WORKER_ENS18="" -RET_FILE_WORKER_CONNTRACK="" -RET_PID_LOCAL_8000="" -RET_PID_LOCAL_CNI0="" -RET_PID_WORKER_ENS18="" -RET_PID_WORKER_CONNTRACK="" -RET_FILE_LOCAL_NFT_TRACE="" -RET_PID_LOCAL_NFT_TRACE="" -NFT_DNAT_HIT="no" -RET_LOCAL_SYN_COUNT=0 -RET_LOCAL_SYNACK_COUNT=0 -RET_CNI0_SYN_COUNT=0 -RET_CNI0_SYNACK_COUNT=0 -RET_WORKER_SYN_COUNT=0 -RET_WORKER_SYNACK_COUNT=0 -POD_NETNS_TRACE_MODE="N" -POD_NETNS_TRACE_MODE_ARG="" -POD_NETNS_TRACE_SECONDS="" -POD_NETNS_TRACE_SECONDS_ARG="" -POD_NETNS_PID="" -POD_NETNS_FILE="" -POD_NETNS_SYN_COUNT=0 -POD_NETNS_SYNACK_COUNT=0 - -start_worker_capture() { - if [[ ! "$CAPTURE_MODE" =~ ^[Yy]$ ]]; then - return 0 - fi - if [[ ! "$DO_REMOTE" =~ ^[Yy]$ ]] || [[ -z "${WORKER_HOST}" ]]; then - echo "[WARN] 抓包模式已开启,但未启用远端检查或未提供 worker 主机,跳过抓包。" - return 0 - fi - - CAP_FILE_ENS18="$(mktemp)" - say "启动 worker 抓包(ens18, ${CAPTURE_SECONDS}s): host ${CLIENT_IP} and tcp port 80" - ssh "${SSH_OPTS[@]}" "${WORKER_HOST}" \ - "sudo timeout ${CAPTURE_SECONDS} tcpdump -ni ens18 'host ${CLIENT_IP} and tcp port 80' 2>/dev/null || true" \ - >"${CAP_FILE_ENS18}" 2>&1 & - CAP_PID_ENS18="$!" - sleep 1 -} - -start_worker_nft_trace() { - if [[ ! "$NFT_TRACE_MODE" =~ ^[Yy]$ ]]; then - return 0 - fi - if [[ ! "$DO_REMOTE" =~ ^[Yy]$ ]] || [[ -z "${WORKER_HOST}" ]]; then - echo "[WARN] nft trace 已开启,但未启用远端检查或未提供 worker 主机,跳过 nft trace。" - return 0 - fi - - ssh "${SSH_OPTS[@]}" "${WORKER_HOST}" \ - "sudo nft add table inet ${NFT_TRACE_TABLE} 2>/dev/null || true; \ - sudo nft 'add chain inet ${NFT_TRACE_TABLE} prerouting { type filter hook prerouting priority -301; policy accept; }' 2>/dev/null || true; \ - sudo nft add rule inet ${NFT_TRACE_TABLE} prerouting iif \"ens18\" ip saddr ${CLIENT_IP} ip daddr ${LB_IP} tcp dport 80 meta nftrace set 1 2>/dev/null || true" \ - || true - - NFT_FILE="$(mktemp)" - say "启动 worker nft trace(${NFT_TRACE_SECONDS}s)" - ssh "${SSH_OPTS[@]}" "${WORKER_HOST}" \ - "sudo timeout ${NFT_TRACE_SECONDS} nft monitor trace 2>/dev/null || true" \ - >"${NFT_FILE}" 2>&1 & - NFT_PID="$!" - sleep 1 -} - -start_return_path_trace() { - if [[ ! "$RETURN_TRACE_MODE" =~ ^[Yy]$ ]]; then - return 0 - fi - if [[ ! "$DO_REMOTE" =~ ^[Yy]$ ]] || [[ -z "${WORKER_HOST}" ]]; then - echo "[WARN] 回包链路跟踪已开启,但未启用远端检查或未提供 worker 主机,跳过。" - return 0 - fi - - RET_FILE_LOCAL_8000="$(mktemp)" - RET_FILE_LOCAL_CNI0="$(mktemp)" - RET_FILE_LOCAL_NFT_TRACE="$(mktemp)" - RET_FILE_WORKER_ENS18="$(mktemp)" - RET_FILE_WORKER_CONNTRACK="$(mktemp)" - - say "启动回包链路跟踪(${RETURN_TRACE_SECONDS}s)" - sudo nft add table inet "${LOCAL_NFT_TRACE_TABLE}" 2>/dev/null || true - sudo nft "add chain inet ${LOCAL_NFT_TRACE_TABLE} forward { type filter hook forward priority -301; policy accept; }" 2>/dev/null || true - sudo nft add rule inet "${LOCAL_NFT_TRACE_TABLE}" forward iif "flannel.1" ip daddr "${TRAEFIK_IP}" tcp dport 8000 meta nftrace set 1 2>/dev/null || true - sudo timeout "${RETURN_TRACE_SECONDS}" nft monitor trace 2>/dev/null \ - >"${RET_FILE_LOCAL_NFT_TRACE}" 2>&1 & - RET_PID_LOCAL_NFT_TRACE="$!" - - sudo timeout "${RETURN_TRACE_SECONDS}" tcpdump -ni any "host ${TRAEFIK_IP} and tcp port 8000" 2>/dev/null \ - >"${RET_FILE_LOCAL_8000}" 2>&1 & - RET_PID_LOCAL_8000="$!" - - sudo timeout "${RETURN_TRACE_SECONDS}" tcpdump -ni cni0 "host ${TRAEFIK_IP} and tcp port 8000" 2>/dev/null \ - >"${RET_FILE_LOCAL_CNI0}" 2>&1 & - RET_PID_LOCAL_CNI0="$!" - - ssh "${SSH_OPTS[@]}" "${WORKER_HOST}" \ - "sudo timeout ${RETURN_TRACE_SECONDS} tcpdump -ni ens18 'host ${CLIENT_IP} and tcp' 2>/dev/null || true" \ - >"${RET_FILE_WORKER_ENS18}" 2>&1 & - RET_PID_WORKER_ENS18="$!" - - ssh "${SSH_OPTS[@]}" "${WORKER_HOST}" \ - "if command -v conntrack >/dev/null 2>&1; then sudo timeout ${RETURN_TRACE_SECONDS} conntrack -E -p tcp 2>/dev/null || true; else echo 'conntrack: not found'; fi" \ - >"${RET_FILE_WORKER_CONNTRACK}" 2>&1 & - RET_PID_WORKER_CONNTRACK="$!" - - sleep 1 -} - -start_pod_netns_trace() { - if [[ ! "${POD_NETNS_TRACE_MODE}" =~ ^[Yy]$ ]]; then - return 0 - fi - if ! command -v crictl >/dev/null 2>&1; then - echo "[WARN] 未找到 crictl,跳过 pod netns 抓包。" - return 0 - fi - if ! command -v nsenter >/dev/null 2>&1; then - echo "[WARN] 未找到 nsenter,跳过 pod netns 抓包。" - return 0 - fi - - local sec="${POD_NETNS_TRACE_SECONDS:-$RETURN_TRACE_SECONDS}" - local cid - local pid - local runtime_id="" - - runtime_id="$(sudo kubectl -n kube-system get pod "${TRAEFIK_POD}" -o jsonpath='{.status.containerStatuses[?(@.name=="traefik")].containerID}' 2>/dev/null || true)" - runtime_id="${runtime_id#containerd://}" - runtime_id="${runtime_id#cri-o://}" - - if [[ -n "${runtime_id}" ]]; then - cid="${runtime_id}" - else - cid="$(sudo crictl ps --name traefik -q 2>/dev/null | awk 'NR==1{print; exit}' || true)" - fi - if [[ -z "${cid}" ]]; then - echo "[WARN] 未解析到 traefik 容器ID,跳过 pod netns 抓包。" - return 0 - fi - - pid="$(sudo crictl inspect "${cid}" 2>/dev/null | awk -F': ' '/"pid":/ {gsub(/,/, "", $2); print $2; exit}' || true)" - if [[ -z "${pid}" || ! "${pid}" =~ ^[0-9]+$ ]]; then - echo "[WARN] 未解析到 traefik 容器 PID,跳过 pod netns 抓包。" - return 0 - fi - - POD_NETNS_FILE="$(mktemp)" - say "启动 Traefik Pod netns 抓包(${sec}s, pid=${pid})" - sudo timeout "${sec}" nsenter -t "${pid}" -n tcpdump -ni any "tcp port 8000" 2>/dev/null \ - >"${POD_NETNS_FILE}" 2>&1 & - POD_NETNS_PID="$!" - sleep 1 -} - -flush_worker_capture() { - if [[ -n "${CAP_PID_ENS18}" ]]; then - wait "${CAP_PID_ENS18}" || true - CAP_PID_ENS18="" - fi - if [[ -n "${CAP_FILE_ENS18}" && -f "${CAP_FILE_ENS18}" ]]; then - echo - echo "===== Worker 抓包结果(ens18) =====" - cat "${CAP_FILE_ENS18}" || true - rm -f "${CAP_FILE_ENS18}" || true - CAP_FILE_ENS18="" - fi - - if [[ -n "${NFT_PID}" ]]; then - wait "${NFT_PID}" || true - NFT_PID="" - fi - if [[ -n "${NFT_FILE}" && -f "${NFT_FILE}" ]]; then - if grep -Eq "KUBE-SEP-.*dnat to ${TRAEFIK_IP}:8000|dnat to ${TRAEFIK_IP}:8000" "${NFT_FILE}" >/dev/null 2>&1; then - NFT_DNAT_HIT="yes" - fi - echo - echo "===== Worker nft trace 结果 =====" - cat "${NFT_FILE}" || true - rm -f "${NFT_FILE}" || true - NFT_FILE="" - fi - - if [[ "$NFT_TRACE_MODE" =~ ^[Yy]$ ]] && [[ "$DO_REMOTE" =~ ^[Yy]$ ]] && [[ -n "${WORKER_HOST}" ]]; then - ssh "${SSH_OPTS[@]}" "${WORKER_HOST}" "sudo nft delete table inet ${NFT_TRACE_TABLE} 2>/dev/null || true" || true - fi - - if [[ -n "${RET_PID_LOCAL_8000}" ]]; then - wait "${RET_PID_LOCAL_8000}" || true - RET_PID_LOCAL_8000="" - fi - if [[ -n "${RET_PID_LOCAL_NFT_TRACE}" ]]; then - wait "${RET_PID_LOCAL_NFT_TRACE}" || true - RET_PID_LOCAL_NFT_TRACE="" - fi - if [[ -n "${RET_PID_LOCAL_CNI0}" ]]; then - wait "${RET_PID_LOCAL_CNI0}" || true - RET_PID_LOCAL_CNI0="" - fi - if [[ -n "${RET_PID_WORKER_ENS18}" ]]; then - wait "${RET_PID_WORKER_ENS18}" || true - RET_PID_WORKER_ENS18="" - fi - if [[ -n "${RET_PID_WORKER_CONNTRACK}" ]]; then - wait "${RET_PID_WORKER_CONNTRACK}" || true - RET_PID_WORKER_CONNTRACK="" - fi - - if [[ -n "${RET_FILE_LOCAL_8000}" && -f "${RET_FILE_LOCAL_8000}" ]]; then - RET_LOCAL_SYN_COUNT="$(count_tcpdump_flag "${RET_FILE_LOCAL_8000}" "Flags [S]")" - RET_LOCAL_SYNACK_COUNT="$(count_tcpdump_flag "${RET_FILE_LOCAL_8000}" "Flags [S.]")" - echo - echo "===== 回包链路抓包(ylc61 any -> ${TRAEFIK_IP}:8000) =====" - cat "${RET_FILE_LOCAL_8000}" || true - rm -f "${RET_FILE_LOCAL_8000}" || true - RET_FILE_LOCAL_8000="" - fi - if [[ -n "${RET_FILE_LOCAL_NFT_TRACE}" && -f "${RET_FILE_LOCAL_NFT_TRACE}" ]]; then - echo - echo "===== 本机 nft trace 结果(ylc61 forward) =====" - cat "${RET_FILE_LOCAL_NFT_TRACE}" || true - rm -f "${RET_FILE_LOCAL_NFT_TRACE}" || true - RET_FILE_LOCAL_NFT_TRACE="" - fi - if [[ -n "${RET_FILE_LOCAL_CNI0}" && -f "${RET_FILE_LOCAL_CNI0}" ]]; then - RET_CNI0_SYN_COUNT="$(count_tcpdump_flag "${RET_FILE_LOCAL_CNI0}" "Flags [S]")" - RET_CNI0_SYNACK_COUNT="$(count_tcpdump_flag "${RET_FILE_LOCAL_CNI0}" "Flags [S.]")" - echo - echo "===== 回包链路抓包(ylc61 cni0 -> ${TRAEFIK_IP}:8000) =====" - cat "${RET_FILE_LOCAL_CNI0}" || true - rm -f "${RET_FILE_LOCAL_CNI0}" || true - RET_FILE_LOCAL_CNI0="" - fi - if [[ -n "${RET_FILE_WORKER_ENS18}" && -f "${RET_FILE_WORKER_ENS18}" ]]; then - RET_WORKER_SYN_COUNT="$(count_tcpdump_flag "${RET_FILE_WORKER_ENS18}" "Flags [S]")" - RET_WORKER_SYNACK_COUNT="$(count_tcpdump_flag "${RET_FILE_WORKER_ENS18}" "Flags [S.]")" - echo - echo "===== 回包链路抓包(ylc62 ens18 <-> ${CLIENT_IP}) =====" - cat "${RET_FILE_WORKER_ENS18}" || true - rm -f "${RET_FILE_WORKER_ENS18}" || true - RET_FILE_WORKER_ENS18="" - fi - if [[ -n "${RET_FILE_WORKER_CONNTRACK}" && -f "${RET_FILE_WORKER_CONNTRACK}" ]]; then - echo - echo "===== 回包链路 conntrack 事件(ylc62) =====" - cat "${RET_FILE_WORKER_CONNTRACK}" || true - rm -f "${RET_FILE_WORKER_CONNTRACK}" || true - RET_FILE_WORKER_CONNTRACK="" - fi - - sudo nft delete table inet "${LOCAL_NFT_TRACE_TABLE}" 2>/dev/null || true - - if [[ -n "${POD_NETNS_PID}" ]]; then - wait "${POD_NETNS_PID}" || true - POD_NETNS_PID="" - fi - if [[ -n "${POD_NETNS_FILE}" && -f "${POD_NETNS_FILE}" ]]; then - POD_NETNS_SYN_COUNT="$(count_tcpdump_flag "${POD_NETNS_FILE}" "Flags [S]")" - POD_NETNS_SYNACK_COUNT="$(count_tcpdump_flag "${POD_NETNS_FILE}" "Flags [S.]")" - echo - echo "===== Traefik Pod netns 抓包(ylc61) =====" - cat "${POD_NETNS_FILE}" || true - rm -f "${POD_NETNS_FILE}" || true - POD_NETNS_FILE="" - fi -} diff --git a/scripts/diag/entrypath/lib/common.sh b/scripts/diag/entrypath/lib/common.sh deleted file mode 100644 index 0c40edf..0000000 --- a/scripts/diag/entrypath/lib/common.sh +++ /dev/null @@ -1,104 +0,0 @@ -#!/usr/bin/env bash - -now() { date '+%Y-%m-%d %H:%M:%S'; } -say() { echo "[$(now)] $*"; } - -usage() { - cat <<'EOF' -用法: - entrypath.sh [选项] - entrypath.sh [选项] # 等价于 run - -命令: - run 完整检查(默认) - preflight 仅检查本地依赖与参数环境 - capture 强制开启所有抓包/trace能力后执行 run - analyze --log 离线分析日志文件 - -通用选项: - --worker-host 远端 worker SSH 主机(默认 jack@192.168.2.62) - --client-host 远端客户端 SSH 主机(可选,用于自动发起 curl) - --client-ip 第三方客户端 IP(默认 192.168.2.63) - --lb-ip 待排查 LB 节点 IP(默认 192.168.2.62) - --worker-ssh-key worker SSH 私钥路径(默认 ~/.ssh/id_ed25519_k3s_diag_worker) - --client-ssh-key 客户端 SSH 私钥路径(默认 ~/.ssh/id_ed25519_k3s_diag_client) - --ssh-key 兼容别名,等同 --worker-ssh-key - --remote-check 是否启用远端检查(默认 n,交互可覆盖) - --capture-mode 抓包模式(worker ens18,默认 n) - --capture-seconds 抓包持续秒数(默认 12) - --nft-trace-mode nft trace 模式(worker,默认 n) - --nft-trace-seconds nft trace 持续秒数(默认 8) - --return-trace-mode 回包链路跟踪(ylc61/ylc62,默认 n) - --return-trace-seconds 回包链路跟踪持续秒数(默认 10) - --pod-netns-trace-mode Traefik Pod netns 抓包(ylc61,默认 n) - --pod-netns-trace-seconds Traefik Pod netns 抓包持续秒数(默认同 return-trace-seconds) - --non-interactive 非交互模式(需配合上面参数) - --log 仅 analyze 子命令使用 - -h, --help 显示帮助 -EOF -} - -run_cmd() { - local desc="$1" - shift - echo - echo "===== ${desc} =====" - "$@" || true -} - -require_cmd() { - local c="$1" - if ! command -v "$c" >/dev/null 2>&1; then - echo "[ERR] missing command: $c" - exit 1 - fi -} - -read_default() { - local prompt="$1" - local def="$2" - local out - printf "%s [%s]: " "$prompt" "$def" >&2 - read -r out - echo "${out:-$def}" -} - -extract_pkts_for_target() { - local table="$1" - local chain="$2" - local target="$3" - sudo iptables ${table:+-t "$table"} -L "$chain" -n -v -x 2>/dev/null \ - | awk -v t="$target" '$3==t {print $1; exit}' -} - -extract_first_jump_target() { - local table="$1" - local chain="$2" - sudo iptables ${table:+-t "$table"} -S "$chain" 2>/dev/null \ - | awk '/-j KUBE-SEP-/{for(i=1;i<=NF;i++) if($i=="-j"){print $(i+1); exit}}' -} - -count_tcpdump_flag() { - local file="$1" - local flag="$2" - if [[ ! -f "$file" ]]; then - echo 0 - return 0 - fi - awk -v f="$flag" 'BEGIN{c=0} index($0,f){c++} END{print c}' "$file" -} - -init_defaults() { - COMMAND="run" - ANALYZE_LOG="" - WORKER_HOST="jack@192.168.2.62" - CLIENT_HOST="" - CLIENT_IP="192.168.2.63" - LB_IP="192.168.2.62" - WORKER_SSH_KEY="" - CLIENT_SSH_KEY="" - DEFAULT_WORKER_SSH_KEY="${HOME}/.ssh/id_ed25519_k3s_diag_worker" - DEFAULT_CLIENT_SSH_KEY="${HOME}/.ssh/id_ed25519_k3s_diag_client" - DO_REMOTE_ARG="" - NON_INTERACTIVE="0" -} diff --git a/scripts/diag/entrypath/lib/k8s_checks.sh b/scripts/diag/entrypath/lib/k8s_checks.sh deleted file mode 100644 index 40064a7..0000000 --- a/scripts/diag/entrypath/lib/k8s_checks.sh +++ /dev/null @@ -1,95 +0,0 @@ -#!/usr/bin/env bash - -local_preflight_checks() { - require_cmd bash - require_cmd sudo - require_cmd kubectl - require_cmd awk - require_cmd sed - require_cmd grep -} - -prepare_runtime_context() { - if [[ "${NON_INTERACTIVE}" == "0" ]]; then - WORKER_HOST="$(read_default "Worker SSH 主机(user@host,留空跳过远端检查)" "${WORKER_HOST}")" - CLIENT_IP="$(read_default "第三方客户端 IP(用于人工发流量)" "${CLIENT_IP}")" - LB_IP="$(read_default "待排查节点对外 IP(如 ylc62)" "${LB_IP}")" - fi - - if [[ -z "${WORKER_SSH_KEY}" && -f "${DEFAULT_WORKER_SSH_KEY}" ]]; then - WORKER_SSH_KEY="${DEFAULT_WORKER_SSH_KEY}" - fi - if [[ -z "${CLIENT_SSH_KEY}" && -f "${DEFAULT_CLIENT_SSH_KEY}" ]]; then - CLIENT_SSH_KEY="${DEFAULT_CLIENT_SSH_KEY}" - fi - if [[ -z "${CLIENT_SSH_KEY}" && -n "${WORKER_SSH_KEY}" ]]; then - CLIENT_SSH_KEY="${WORKER_SSH_KEY}" - fi - - SSH_OPTS=() - if [[ -n "${WORKER_SSH_KEY}" ]]; then - SSH_OPTS=(-i "${WORKER_SSH_KEY}" -o IdentitiesOnly=yes) - fi - - CLIENT_SSH_OPTS=() - if [[ -n "${CLIENT_SSH_KEY}" ]]; then - CLIENT_SSH_OPTS=(-i "${CLIENT_SSH_KEY}" -o IdentitiesOnly=yes) - fi -} - -setup_log_file() { - if [[ "${EUID}" -eq 0 ]]; then - LOG_DIR="/root/netpol-diag-logs" - else - LOG_DIR="${HOME}/netpol-diag-logs" - fi - mkdir -p "$LOG_DIR" - LOG_FILE="${LOG_DIR}/entrypath-$(date '+%Y%m%d-%H%M%S').log" - exec > >(tee -a "$LOG_FILE") 2>&1 -} - -collect_local_k8s_state() { - run_cmd "节点状态" sudo kubectl get nodes -o wide - run_cmd "kube-system 关键组件" sh -c "sudo kubectl -n kube-system get pods -o wide | grep -E 'traefik|svclb|flannel|kube-proxy' || true" - run_cmd "Traefik Service" sudo kubectl -n kube-system get svc traefik -o wide - run_cmd "Traefik Service 关键字段" sh -c "sudo kubectl -n kube-system get svc traefik -o yaml | grep -E 'type:|externalTrafficPolicy|loadBalancerSourceRanges|svccontroller.k3s.cattle.io' || true" - - TRAEFIK_POD="$(sudo kubectl -n kube-system get pod -l app.kubernetes.io/name=traefik -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true)" - TRAEFIK_IP="$(sudo kubectl -n kube-system get pod -l app.kubernetes.io/name=traefik -o jsonpath='{.items[0].status.podIP}' 2>/dev/null || true)" - - if [[ -z "${TRAEFIK_IP}" ]]; then - echo "[ERR] 无法解析 Traefik Pod IP,终止。" - exit 1 - fi - - TRAEFIK_CHAIN="$(sudo iptables -L KUBE-ROUTER-FORWARD -n -v --line-numbers \ - | awk -v ip="${TRAEFIK_IP}" '$0 ~ ip {print $4; exit}')" - - echo - echo "Traefik pod: ${TRAEFIK_POD}" - echo "Traefik ip : ${TRAEFIK_IP}" - echo "Traefik fw : ${TRAEFIK_CHAIN:-N/A}" - - if [[ -n "${TRAEFIK_CHAIN}" ]]; then - run_cmd "Traefik Pod FW 链详情" sudo iptables -L "$TRAEFIK_CHAIN" -n -v -x - run_cmd "Traefik Pod FW 链规则" sudo iptables -S "$TRAEFIK_CHAIN" - REJECT_PKTS="$(extract_pkts_for_target "" "$TRAEFIK_CHAIN" REJECT || echo 0)" - NFLOG_PKTS="$(extract_pkts_for_target "" "$TRAEFIK_CHAIN" NFLOG || echo 0)" - else - REJECT_PKTS=0 - NFLOG_PKTS=0 - fi - - TRAEFIK_WEB_SVC_CHAIN="$(sudo iptables -t nat -S KUBE-SERVICES \ - | awk '/kube-system\/traefik:web cluster IP/ && /--dport 80/ {for(i=1;i<=NF;i++) if($i=="-j"){print $(i+1); exit}}')" - TRAEFIK_WEB_SEP_CHAIN="" - if [[ -n "${TRAEFIK_WEB_SVC_CHAIN}" ]]; then - run_cmd "Traefik web Service 链" sudo iptables -t nat -L "$TRAEFIK_WEB_SVC_CHAIN" -n -v -x - TRAEFIK_WEB_SEP_CHAIN="$(extract_first_jump_target nat "$TRAEFIK_WEB_SVC_CHAIN" || true)" - fi - if [[ -n "${TRAEFIK_WEB_SEP_CHAIN}" ]]; then - run_cmd "Traefik web Endpoint 链" sudo iptables -t nat -L "$TRAEFIK_WEB_SEP_CHAIN" -n -v -x - fi - - run_cmd "KUBE-SERVICES 中目标LB_IP命中" sh -c "sudo iptables -t nat -L KUBE-SERVICES -n -v --line-numbers | grep '${LB_IP}' || true" -} diff --git a/scripts/diag/entrypath/lib/remote_checks.sh b/scripts/diag/entrypath/lib/remote_checks.sh deleted file mode 100644 index 3857042..0000000 --- a/scripts/diag/entrypath/lib/remote_checks.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash - -WORKER_CNI_DNAT_CHAIN="" -WORKER_CNI_HIT_BEFORE="" -WORKER_CNI_HIT_AFTER="" - -resolve_runtime_modes() { - if [[ -n "${DO_REMOTE_ARG}" ]]; then - DO_REMOTE="${DO_REMOTE_ARG}" - else - if [[ "${NON_INTERACTIVE}" == "1" ]]; then - DO_REMOTE="N" - else - read -r -p "是否通过 SSH 拉取 worker 计数(需要可免交互 sudo)? [y/N]: " DO_REMOTE - DO_REMOTE="${DO_REMOTE:-N}" - fi - fi - - if [[ -n "${CAPTURE_MODE_ARG}" ]]; then - CAPTURE_MODE="${CAPTURE_MODE_ARG}" - fi - if [[ -n "${NFT_TRACE_MODE_ARG}" ]]; then - NFT_TRACE_MODE="${NFT_TRACE_MODE_ARG}" - fi - if [[ -n "${RETURN_TRACE_MODE_ARG}" ]]; then - RETURN_TRACE_MODE="${RETURN_TRACE_MODE_ARG}" - fi - if [[ -n "${POD_NETNS_TRACE_MODE_ARG}" ]]; then - POD_NETNS_TRACE_MODE="${POD_NETNS_TRACE_MODE_ARG}" - fi - if [[ -n "${POD_NETNS_TRACE_SECONDS_ARG}" ]]; then - POD_NETNS_TRACE_SECONDS="${POD_NETNS_TRACE_SECONDS_ARG}" - fi -} - -collect_remote_worker_state() { - if [[ ! "$DO_REMOTE" =~ ^[Yy]$ ]] || [[ -z "$WORKER_HOST" ]]; then - return 0 - fi - - say "开始远端检查: ${WORKER_HOST}" - run_cmd "Worker 基础网络状态" ssh "${SSH_OPTS[@]}" "$WORKER_HOST" "ip -br a; ip route" - run_cmd "Worker k3s-agent 状态" ssh "${SSH_OPTS[@]}" "$WORKER_HOST" "sudo systemctl is-active k3s-agent; sudo journalctl -u k3s-agent -n 40 --no-pager" - run_cmd "Worker PREROUTING 关键计数" ssh "${SSH_OPTS[@]}" "$WORKER_HOST" "sudo iptables -t nat -L PREROUTING -n -v --line-numbers | grep -E 'CNI-HOSTPORT-DNAT|KUBE-SERVICES|dpt:80' || true" - run_cmd "Worker CNI-HOSTPORT-DNAT" ssh "${SSH_OPTS[@]}" "$WORKER_HOST" "sudo iptables -t nat -L CNI-HOSTPORT-DNAT -n -v --line-numbers || true" - - WORKER_CNI_DNAT_CHAIN="$(ssh "${SSH_OPTS[@]}" "$WORKER_HOST" "sudo iptables -t nat -S CNI-HOSTPORT-DNAT 2>/dev/null | awk '/-j CNI-DN-/{for(i=1;i<=NF;i++) if(\$i==\"-j\"){print \$(i+1); exit}}'")" - if [[ -n "${WORKER_CNI_DNAT_CHAIN}" ]]; then - run_cmd "Worker 具体 CNI-DNAT 链" ssh "${SSH_OPTS[@]}" "$WORKER_HOST" "sudo iptables -t nat -L ${WORKER_CNI_DNAT_CHAIN} -n -v --line-numbers" - WORKER_CNI_HIT_BEFORE="$(ssh "${SSH_OPTS[@]}" "$WORKER_HOST" "sudo iptables -t nat -L ${WORKER_CNI_DNAT_CHAIN} -n -v -x | awk 'BEGIN{v=0} /DNAT/&&/dpt:80/{v=\$1} END{print v}'")" - fi -} - -post_remote_worker_state() { - if [[ "$DO_REMOTE" =~ ^[Yy]$ ]] && [[ -n "${WORKER_CNI_DNAT_CHAIN}" ]]; then - WORKER_CNI_HIT_AFTER="$(ssh "${SSH_OPTS[@]}" "$WORKER_HOST" "sudo iptables -t nat -L ${WORKER_CNI_DNAT_CHAIN} -n -v -x | awk 'BEGIN{v=0} /DNAT/&&/dpt:80/{v=\$1} END{print v}'")" - run_cmd "Worker CNI-DNAT 链复测" ssh "${SSH_OPTS[@]}" "$WORKER_HOST" "sudo iptables -t nat -L ${WORKER_CNI_DNAT_CHAIN} -n -v --line-numbers" - fi -} diff --git a/scripts/diag/firewalld/setup-k3s-firewalld-interfaces.sh b/scripts/diag/firewalld/setup-k3s-firewalld-interfaces.sh deleted file mode 100644 index dbbf39d..0000000 --- a/scripts/diag/firewalld/setup-k3s-firewalld-interfaces.sh +++ /dev/null @@ -1,91 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -TRUSTED_ZONE="trusted" -IFACES=("flannel.1" "cni0") -WAIT_SECONDS_DEFAULT=30 -WAIT_SECONDS="${WAIT_SECONDS_DEFAULT}" -NON_INTERACTIVE="0" - -usage() { - cat <<'EOF' -用法: - setup-k3s-firewalld-interfaces.sh [选项] - -选项: - --wait-seconds 等待接口出现的秒数(默认 30) - --non-interactive 非交互模式 - -h, --help 显示帮助 -EOF -} - -while [[ $# -gt 0 ]]; do - case "$1" in - --wait-seconds) WAIT_SECONDS="${2:-30}"; shift 2 ;; - --non-interactive) NON_INTERACTIVE="1"; shift ;; - -h|--help) usage; exit 0 ;; - *) echo "[ERR] 未知参数: $1"; usage; exit 1 ;; - esac -done - -require_cmd() { - local c="$1" - if ! command -v "$c" >/dev/null 2>&1; then - echo "[ERR] 缺少命令: $c" - exit 1 - fi -} - -require_cmd firewall-cmd -require_cmd ip - -if [[ $EUID -ne 0 ]]; then - SUDO="sudo" -else - SUDO="" -fi - -echo "=== K3s firewalld 接口基线配置 ===" -echo "目标 zone: ${TRUSTED_ZONE}" -echo "目标接口: ${IFACES[*]}" -echo "等待接口出现: ${WAIT_SECONDS}s" - -if [[ "${NON_INTERACTIVE}" == "0" ]]; then - read -r -p "继续执行?[Y/n]: " ans - ans="${ans:-Y}" - if [[ ! "${ans}" =~ ^[Yy]$ ]]; then - echo "已取消。" - exit 0 - fi -fi - -deadline=$((SECONDS + WAIT_SECONDS)) -for iface in "${IFACES[@]}"; do - while ! ip link show "${iface}" >/dev/null 2>&1; do - if (( SECONDS >= deadline )); then - echo "[ERR] 接口未出现: ${iface}(等待 ${WAIT_SECONDS}s 仍未出现)" - echo "请确认 k3s 已启动并生成 CNI 接口后重试。" - exit 1 - fi - sleep 1 - done -done - -for iface in "${IFACES[@]}"; do - echo "[RUN ] runtime add-interface ${iface} -> ${TRUSTED_ZONE}" - ${SUDO} firewall-cmd --zone="${TRUSTED_ZONE}" --add-interface="${iface}" >/dev/null - - echo "[RUN ] permanent add-interface ${iface} -> ${TRUSTED_ZONE}" - ${SUDO} firewall-cmd --permanent --zone="${TRUSTED_ZONE}" --add-interface="${iface}" >/dev/null -done - -echo "[RUN ] firewall-cmd --reload" -${SUDO} firewall-cmd --reload >/dev/null - -echo -echo "=== 验证输出 ===" -${SUDO} firewall-cmd --zone="${TRUSTED_ZONE}" --list-interfaces -${SUDO} firewall-cmd --get-active-zones - -echo -echo "[OK] 已完成 firewalld 接口基线配置。" diff --git a/scripts/diag/netpol/check-net.sh b/scripts/diag/netpol/check-net.sh deleted file mode 100644 index 2aefa5a..0000000 --- a/scripts/diag/netpol/check-net.sh +++ /dev/null @@ -1,419 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -NS_TRAEFIK="kube-system" -APP_TRAEFIK_LABEL="app.kubernetes.io/name=traefik" -TIMEOUT=3 -LOG_TAIL=200 -LOG_SINCE="20m" -LOG_DIR="" -LOG_FILE="" - -PROBE_CLIENT="SKIP" -PROBE_TRAEFIK_TO_SVC="SKIP" -PROBE_TRAEFIK_TO_POD="SKIP" -PROBE_TRAEFIK_DNS="SKIP" - -print_title() { - echo - echo "=== $1 ===" -} - -safe_run() { - "$@" || true -} - -require_cmd() { - local c="$1" - if ! command -v "$c" >/dev/null 2>&1; then - echo "[ERR] 缺少命令: $c" - exit 1 - fi -} - -KUBECTL_PATH="$(command -v kubectl || true)" -IPTABLES_PATH="$(command -v iptables || true)" -USE_SUDO="" - -init_runtime() { - require_cmd kubectl - require_cmd iptables - require_cmd awk - require_cmd grep - require_cmd curl - - if [[ "${EUID}" -ne 0 ]] && command -v sudo >/dev/null 2>&1; then - # 先尝试无交互,失败则提示一次密码 - if ! sudo -n true 2>/dev/null; then - echo "[INFO] 需要 sudo 权限以读取 iptables / kubectl 配置。" - sudo -v - fi - USE_SUDO="1" - fi - - # 非 root 默认写到 HOME,避免 /root 权限问题 - if [[ "${EUID}" -eq 0 ]]; then - LOG_DIR="/root/netpol-diag-logs" - else - LOG_DIR="${HOME}/netpol-diag-logs" - fi -} - -# 统一封装,避免脚本各处手工判断是否 sudo -kubectl() { - if [[ -n "${USE_SUDO}" ]]; then - sudo "${KUBECTL_PATH}" "$@" - else - "${KUBECTL_PATH}" "$@" - fi -} - -iptables() { - if [[ -n "${USE_SUDO}" ]]; then - sudo "${IPTABLES_PATH}" "$@" - else - "${IPTABLES_PATH}" "$@" - fi -} - -probe_wget_from_traefik() { - local url="$1" - if kubectl exec -n "${NS_TRAEFIK}" deploy/traefik -- wget -qO- "${url}" --timeout="${TIMEOUT}" >/tmp/netpol_probe.out 2>/tmp/netpol_probe.err; then - cat /tmp/netpol_probe.out - return 0 - fi - cat /tmp/netpol_probe.err - return 1 -} - -select_scene() { - echo "请选择诊断场景:" - echo " 1) nginx-demo (/demo, 80)" - echo " 2) nodejs-demo (/node, 3000)" - echo " 3) 自定义" - printf "输入序号 [1/2/3](默认 2): " - read -r CHOICE - CHOICE="${CHOICE:-2}" - - case "${CHOICE}" in - 1) - NS_BACKEND="default" - APP_NAME="nginx-demo" - APP_LABEL="app=nginx-demo" - SVC_NAME="nginx-demo" - PATH_PREFIX="/demo/" - POD_PORT="80" - ;; - 2) - NS_BACKEND="default" - APP_NAME="nodejs-demo" - APP_LABEL="app=nodejs-demo" - SVC_NAME="nodejs-demo" - PATH_PREFIX="/node/" - POD_PORT="3000" - ;; - 3) - printf "后端命名空间(默认 default): " - read -r NS_BACKEND - NS_BACKEND="${NS_BACKEND:-default}" - - printf "应用名(Deployment/Service 名,示例 nodejs-demo): " - read -r APP_NAME - APP_NAME="${APP_NAME:-nodejs-demo}" - - printf "Pod 标签选择器(默认 app=<应用名>): " - read -r APP_LABEL - APP_LABEL="${APP_LABEL:-app=${APP_NAME}}" - - printf "Service 名(默认与应用名一致): " - read -r SVC_NAME - SVC_NAME="${SVC_NAME:-${APP_NAME}}" - - printf "入口路径前缀(默认 /): " - read -r PATH_PREFIX - PATH_PREFIX="${PATH_PREFIX:-/}" - - printf "后端 Pod 端口(默认 80): " - read -r POD_PORT - POD_PORT="${POD_PORT:-80}" - ;; - *) - echo "[WARN] 无效选择,使用 nodejs-demo 默认场景。" - NS_BACKEND="default" - APP_NAME="nodejs-demo" - APP_LABEL="app=nodejs-demo" - SVC_NAME="nodejs-demo" - PATH_PREFIX="/node/" - POD_PORT="3000" - ;; - esac - - printf "入口 IP(用于本机 curl,默认 192.168.2.61): " - read -r ENTRY_IP - ENTRY_IP="${ENTRY_IP:-192.168.2.61}" -} - -init_runtime -select_scene - -mkdir -p "${LOG_DIR}" -LOG_FILE="${LOG_DIR}/diag-$(date '+%Y%m%d-%H%M%S')-${APP_NAME}.log" -exec > >(tee -a "${LOG_FILE}") 2>&1 - -print_title "0. 诊断上下文" -echo "TIME: $(date '+%F %T %Z')" -echo "LOG_FILE=${LOG_FILE}" -echo "SCENE_APP=${APP_NAME}" -echo "SCENE_NS=${NS_BACKEND}" -echo "SCENE_LABEL=${APP_LABEL}" -echo "SCENE_SVC=${SVC_NAME}" -echo "SCENE_PATH=${PATH_PREFIX}" -echo "SCENE_POD_PORT=${POD_PORT}" -echo "ENTRY_IP=${ENTRY_IP}" -echo "HOSTNAME=$(hostname)" -safe_run kubectl version --short - -print_title "1. 集群与 Traefik 基线" -safe_run kubectl get nodes -o wide -safe_run kubectl get deploy -n "${NS_TRAEFIK}" traefik -o wide -safe_run kubectl get svc -n "${NS_TRAEFIK}" traefik -o wide -safe_run kubectl get pod -n "${NS_TRAEFIK}" -l "${APP_TRAEFIK_LABEL}" -o wide -kubectl get pods -n kube-system -o wide | grep -E 'kube-router|flannel|traefik|svclb-traefik' || true - -TRAEFIK_POD="$(kubectl get pod -n "${NS_TRAEFIK}" -l "${APP_TRAEFIK_LABEL}" -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true)" -TRAEFIK_IP="$(kubectl get pod -n "${NS_TRAEFIK}" -l "${APP_TRAEFIK_LABEL}" -o jsonpath='{.items[0].status.podIP}' 2>/dev/null || true)" - -echo "--- 1.1 kube-proxy 基线 ---" -safe_run kubectl get pod -n kube-system -l k8s-app=kube-proxy -o wide -safe_run kubectl get configmap -n kube-system kube-proxy -o yaml - -KPROXY_POD="$(kubectl get pod -n kube-system -l k8s-app=kube-proxy -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || true)" -if [[ -n "${KPROXY_POD}" ]]; then - echo "--- 1.2 kube-proxy 日志关键字(error|conntrack|iptables|ipvs|nft)---" - kubectl logs -n kube-system "${KPROXY_POD}" --tail=200 | grep -Ei "error|fail|conntrack|iptables|ipvs|nft|sync" || true -else - echo "[WARN] 未找到 kube-proxy Pod(K3s 某些模式可忽略)" -fi - -print_title "2. 业务资源采集" -safe_run kubectl get deploy -n "${NS_BACKEND}" "${APP_NAME}" -o wide -safe_run kubectl get svc -n "${NS_BACKEND}" "${SVC_NAME}" -o wide -safe_run kubectl get endpoints -n "${NS_BACKEND}" "${SVC_NAME}" -o wide -safe_run kubectl get endpointslice -n "${NS_BACKEND}" -l kubernetes.io/service-name="${SVC_NAME}" -o wide -safe_run kubectl get pod -n "${NS_BACKEND}" -l "${APP_LABEL}" -o wide -safe_run kubectl get pod -n "${NS_BACKEND}" -l "${APP_LABEL}" --show-labels -safe_run kubectl get ingress -n "${NS_BACKEND}" -safe_run kubectl get ingressroute -n "${NS_BACKEND}" -safe_run kubectl get networkpolicy -n "${NS_BACKEND}" -safe_run kubectl get networkpolicy -n "${NS_TRAEFIK}" -safe_run kubectl get ns "${NS_BACKEND}" "${NS_TRAEFIK}" --show-labels - -BACKEND_POD_IP="$(kubectl get pod -n "${NS_BACKEND}" -l "${APP_LABEL}" -o jsonpath='{.items[0].status.podIP}' 2>/dev/null || true)" -SVC_IP="$(kubectl get svc -n "${NS_BACKEND}" "${SVC_NAME}" -o jsonpath='{.spec.clusterIP}' 2>/dev/null || true)" -EP_COUNT="$(kubectl get endpoints -n "${NS_BACKEND}" "${SVC_NAME}" -o jsonpath='{.subsets[*].addresses[*].ip}' 2>/dev/null | awk '{print NF}')" -EP_COUNT="${EP_COUNT:-0}" - -echo "TRAEFIK_POD=${TRAEFIK_POD:-}" -echo "TRAEFIK_IP=${TRAEFIK_IP:-}" -echo "BACKEND_POD_IP=${BACKEND_POD_IP:-}" -echo "SVC_IP=${SVC_IP:-}" -echo "ENDPOINTS_COUNT=${EP_COUNT}" -echo "SERVICE_SELECTOR=$(kubectl get svc -n "${NS_BACKEND}" "${SVC_NAME}" -o jsonpath='{.spec.selector}' 2>/dev/null || echo '{}')" - -echo "--- 2.1 EndpointSlice 条件(ready/serving/terminating/node)---" -kubectl get endpointslice -n "${NS_BACKEND}" -l kubernetes.io/service-name="${SVC_NAME}" \ - -o jsonpath='{range .items[*]}{"slice="}{.metadata.name}{"\n"}{range .endpoints[*]}{" addr="}{.addresses[0]}{" ready="}{.conditions.ready}{" serving="}{.conditions.serving}{" terminating="}{.conditions.terminating}{" node="}{.nodeName}{"\n"}{end}{end}' \ - || true - -print_title "3. 主链路连通性探测" -echo "--- 3.1 本机 -> 入口 (${ENTRY_IP}${PATH_PREFIX}) ---" -if curl -sS -m "${TIMEOUT}" -o /tmp/netpol_client.out -w "%{http_code}" "http://${ENTRY_IP}${PATH_PREFIX}" >/tmp/netpol_client.code 2>/tmp/netpol_client.err; then - CODE="$(cat /tmp/netpol_client.code)" - echo "HTTP_CODE=${CODE}" - echo "BODY_PREVIEW:" - head -c 200 /tmp/netpol_client.out || true - echo - PROBE_CLIENT="OK_${CODE}" -else - cat /tmp/netpol_client.err - PROBE_CLIENT="FAIL" -fi - -if [[ -n "${TRAEFIK_POD}" && -n "${SVC_IP}" ]]; then - echo - echo "--- 3.2 Traefik -> ServiceIP (${SVC_IP}:80) ---" - if probe_wget_from_traefik "http://${SVC_IP}:80"; then - PROBE_TRAEFIK_TO_SVC="OK" - else - PROBE_TRAEFIK_TO_SVC="FAIL" - fi -else - echo "[SKIP] 缺少 Traefik Pod 或 ServiceIP。" -fi - -if [[ -n "${TRAEFIK_POD}" ]]; then - echo - echo "--- 3.3 Traefik -> Service DNS (${SVC_NAME}.${NS_BACKEND}.svc.cluster.local:80) ---" - if probe_wget_from_traefik "http://${SVC_NAME}.${NS_BACKEND}.svc.cluster.local:80"; then - PROBE_TRAEFIK_DNS="OK" - else - PROBE_TRAEFIK_DNS="FAIL" - fi -else - echo "[SKIP] 未找到 Traefik Pod。" -fi - -if [[ -n "${TRAEFIK_POD}" && -n "${BACKEND_POD_IP}" ]]; then - echo - echo "--- 3.4 Traefik -> PodIP (${BACKEND_POD_IP}:${POD_PORT}) ---" - if probe_wget_from_traefik "http://${BACKEND_POD_IP}:${POD_PORT}"; then - PROBE_TRAEFIK_TO_POD="OK" - else - PROBE_TRAEFIK_TO_POD="FAIL" - fi -else - echo "[SKIP] 缺少 Traefik Pod 或后端 PodIP。" -fi - -print_title "4. 路由与配置详情" -echo "--- 4.1 Ingress ---" -safe_run kubectl get ingress -n "${NS_BACKEND}" -o yaml -echo "--- 4.2 IngressRoute ---" -safe_run kubectl get ingressroute -n "${NS_BACKEND}" -o yaml -echo "--- 4.3 Service / Endpoints ---" -safe_run kubectl get svc -n "${NS_BACKEND}" "${SVC_NAME}" -o yaml -safe_run kubectl get endpoints -n "${NS_BACKEND}" "${SVC_NAME}" -o yaml -safe_run kubectl describe svc -n "${NS_BACKEND}" "${SVC_NAME}" -echo "--- 4.4 相关 NetworkPolicy(kube-system + backend)---" -safe_run kubectl get networkpolicy -n "${NS_TRAEFIK}" -o yaml -safe_run kubectl get networkpolicy -n "${NS_BACKEND}" -o yaml -echo "--- 4.5 近期事件(backend + kube-system)---" -safe_run kubectl get events -n "${NS_BACKEND}" --sort-by=.lastTimestamp -safe_run kubectl get events -n kube-system --sort-by=.lastTimestamp - -print_title "5. Traefik 日志(最近 ${LOG_SINCE},最多 ${LOG_TAIL} 行)" -safe_run kubectl logs -n "${NS_TRAEFIK}" deploy/traefik --since="${LOG_SINCE}" --tail="${LOG_TAIL}" -echo "--- 5.1 关键字过滤(404|502|503|router|service|middleware|upstream|${SVC_NAME}|${PATH_PREFIX}) ---" -kubectl logs -n "${NS_TRAEFIK}" deploy/traefik --since="${LOG_SINCE}" --tail="${LOG_TAIL}" | grep -Ei "404|502|503|router|service|middleware|upstream|endpoint|${SVC_NAME}|${PATH_PREFIX}" || true -echo "--- 5.2 Traefik 访问日志候选(status=404/502/503) ---" -kubectl logs -n "${NS_TRAEFIK}" deploy/traefik --since="${LOG_SINCE}" --tail="${LOG_TAIL}" | grep -E "\" 404 |\" 502 |\" 503 " || true -echo "--- 5.3 Traefik 上一次容器日志(若重启过) ---" -safe_run kubectl logs -n "${NS_TRAEFIK}" deploy/traefik --previous --tail=100 - -print_title "6. 防火墙与数据平面" -echo "--- 6.1 防火墙状态 ---" -if command -v firewall-cmd >/dev/null 2>&1; then - safe_run firewall-cmd --state - safe_run firewall-cmd --list-all -else - echo "firewall-cmd: not found" -fi - -if command -v ufw >/dev/null 2>&1; then - safe_run ufw status verbose -else - echo "ufw: not found" -fi - -echo "--- 6.2 FORWARD 与 KUBE-ROUTER-FORWARD ---" -safe_run iptables -L FORWARD -n -v --line-numbers -safe_run iptables -L KUBE-ROUTER-FORWARD -n -v --line-numbers -echo "--- 6.2.1 NAT 链(KUBE-SERVICES)---" -safe_run iptables -t nat -L KUBE-SERVICES -n -v --line-numbers -if [[ -n "${SVC_IP}" ]]; then - echo "--- 6.2.2 NAT 链中 ServiceIP 相关规则 (${SVC_IP}) ---" - iptables -t nat -S | grep "${SVC_IP}" || true -fi - -TRAEFIK_CHAIN="" -BACKEND_CHAIN="" -if [[ -n "${TRAEFIK_IP}" ]]; then - TRAEFIK_CHAIN="$(iptables -L KUBE-ROUTER-FORWARD -n --line-numbers 2>/dev/null | awk -v ip="${TRAEFIK_IP}" '$0 ~ ip && $4 ~ /^KUBE-POD-FW-/ {print $4; exit}')" -fi -if [[ -n "${BACKEND_POD_IP}" ]]; then - BACKEND_CHAIN="$(iptables -L KUBE-ROUTER-FORWARD -n --line-numbers 2>/dev/null | awk -v ip="${BACKEND_POD_IP}" '$0 ~ ip && $4 ~ /^KUBE-POD-FW-/ {print $4; exit}')" -fi - -echo "TRAEFIK_CHAIN=${TRAEFIK_CHAIN:-}" -echo "BACKEND_CHAIN=${BACKEND_CHAIN:-}" - -if [[ -n "${TRAEFIK_CHAIN}" ]]; then - echo "--- 6.3 Traefik Pod 链 ${TRAEFIK_CHAIN} ---" - safe_run iptables -L "${TRAEFIK_CHAIN}" -n -v -x - echo "--- 6.4 Traefik Pod 链规则定义 ---" - safe_run iptables -S "${TRAEFIK_CHAIN}" -fi - -if [[ -n "${BACKEND_CHAIN}" ]]; then - echo "--- 6.5 Backend Pod 链 ${BACKEND_CHAIN} ---" - safe_run iptables -L "${BACKEND_CHAIN}" -n -v -x - echo "--- 6.6 Backend Pod 链规则定义 ---" - safe_run iptables -S "${BACKEND_CHAIN}" -fi - -echo "--- 6.7 ipset(KUBE-SRC/KUBE-DST)---" -if command -v ipset >/dev/null 2>&1; then - ipset list -n | grep -E '^KUBE-(SRC|DST)-' || true -else - echo "ipset: not found" -fi - -echo "--- 6.8 conntrack(容量与关键连接)---" -if command -v conntrack >/dev/null 2>&1; then - safe_run conntrack -S - safe_run sysctl net.netfilter.nf_conntrack_count - safe_run sysctl net.netfilter.nf_conntrack_max - if [[ -n "${SVC_IP}" ]]; then - echo "conntrack by service ip (${SVC_IP}):" - conntrack -L -d "${SVC_IP}" 2>/dev/null | head -n 100 || true - fi - if [[ -n "${BACKEND_POD_IP}" ]]; then - echo "conntrack by backend pod ip (${BACKEND_POD_IP}):" - conntrack -L -d "${BACKEND_POD_IP}" 2>/dev/null | head -n 100 || true - fi -else - echo "conntrack: not found" -fi - -print_title "7. 自动判读(502/503/404)" -echo "探测结果:" -echo " CLIENT=${PROBE_CLIENT}" -echo " TRAEFIK_TO_SVC=${PROBE_TRAEFIK_TO_SVC}" -echo " TRAEFIK_DNS=${PROBE_TRAEFIK_DNS}" -echo " TRAEFIK_TO_POD=${PROBE_TRAEFIK_TO_POD}" - -if [[ "${EP_COUNT}" == "0" ]]; then - echo "- [高概率 503] Service 无可用 Endpoints。检查 Deployment 是否 Ready、selector 是否匹配。" -fi - -if [[ "${PROBE_CLIENT}" == OK_404* ]]; then - echo "- [高概率 404] 入口路由未命中。检查 Ingress/IngressRoute 的 path、host、middleware。" -fi - -if [[ "${PROBE_CLIENT}" == OK_503* ]]; then - echo "- [高概率 503] 入口已命中但后端不可用。优先看 Endpoints/EndpointSlice 条件与 Traefik 日志。" -fi - -if [[ "${PROBE_TRAEFIK_TO_SVC}" == "FAIL" ]]; then - echo "- [高概率 502/503] Traefik 到 Service 不通。优先检查 NetworkPolicy、kube-router 链、DNS 53 放行。" -fi - -if [[ "${PROBE_TRAEFIK_DNS}" == "FAIL" && "${PROBE_TRAEFIK_TO_SVC}" == "FAIL" ]]; then - echo "- [可能 DNS/服务发现问题] Traefik 到 Service DNS 与 ServiceIP 都失败。检查 CoreDNS、kube-system egress 53。" -fi - -if [[ "${PROBE_TRAEFIK_TO_SVC}" == "OK" && "${PROBE_CLIENT}" == "FAIL" ]]; then - echo "- [可能入口层问题] 集群内后端可达,但入口访问失败。检查控制节点防火墙、Traefik Service 暴露端口、外部路由。" -fi - -if [[ "${PROBE_TRAEFIK_TO_SVC}" == "OK" && "${PROBE_TRAEFIK_TO_POD}" == "FAIL" ]]; then - echo "- [已知行为候选] Service 可达但 PodIP 直连失败,常见于 kube-router 同节点桥接路径。" -fi - -echo -echo "下一步建议:" -echo "1) 先修复 Endpoints=0 / 404 路由不匹配。" -echo "2) 再看 Traefik -> Service 探测与 NetworkPolicy 命中。" -echo "3) 最后结合 KUBE-ROUTER-FORWARD、Pod 链、ipset 判断是否为 kube-router 行为问题。" -echo -echo "日志已保存:${LOG_FILE}" diff --git a/scripts/diag/recovery/k3s-recovery-reset.sh b/scripts/diag/recovery/k3s-recovery-reset.sh deleted file mode 100644 index 0853816..0000000 --- a/scripts/diag/recovery/k3s-recovery-reset.sh +++ /dev/null @@ -1,148 +0,0 @@ -#!/bin/bash -set -euo pipefail - -LOG_DIR="/root/netpol-diag-logs" -mkdir -p "${LOG_DIR}" -LOG_FILE="${LOG_DIR}/recovery-$(date '+%Y%m%d-%H%M%S').log" -exec > >(tee -a "${LOG_FILE}") 2>&1 - -info() { echo "[INFO] $*"; } -warn() { echo "[WARN] $*"; } - -confirm_once() { - local prompt="$1" - local answer="" - printf "%s (输入 YES 继续): " "${prompt}" - read -r answer - [[ "${answer}" == "YES" ]] -} - -confirm_twice_high_risk() { - local answer1="" - local answer2="" - printf "高风险操作,第一次确认:输入 YES 执行: " - read -r answer1 - printf "第二次确认:输入 RESET 执行: " - read -r answer2 - [[ "${answer1}" == "YES" && "${answer2}" == "RESET" ]] -} - -cleanup_demo_resources() { - info "清理 demo 资源(default + kube-system)" - kubectl delete ingress -n default nginx-demo nodejs-demo --ignore-not-found || true - kubectl delete ingressroute -n default nginx-demo nodejs-demo --ignore-not-found || true - kubectl delete middleware -n default nginx-demo-stripprefix nodejs-demo-stripprefix --ignore-not-found || true - kubectl delete service -n default nginx-demo nodejs-demo --ignore-not-found || true - kubectl delete deployment -n default nginx-demo nodejs-demo --ignore-not-found || true - kubectl delete networkpolicy -n default allow-traefik-to-nginx allow-traefik-to-nodejs --ignore-not-found || true - kubectl delete networkpolicy -n kube-system allow-traefik-egress-to-services --ignore-not-found || true - info "demo 资源清理完成" -} - -restart_key_components() { - info "重启关键组件" - kubectl rollout restart deployment -n kube-system traefik || true - kubectl rollout restart deployment -n kube-system coredns || true - kubectl get ds -n kube-system -l k8s-app=kube-proxy -o name | while read -r ds; do - kubectl rollout restart -n kube-system "${ds}" || true - done - info "等待关键组件状态" - kubectl rollout status deployment/traefik -n kube-system --timeout=180s || true - kubectl rollout status deployment/coredns -n kube-system --timeout=180s || true -} - -network_rules_guidance() { - warn "该步骤仅打印建议命令,不自动执行。" - cat <<'EOF' -建议在控制节点人工执行并逐条确认: - -# 1) 备份当前规则 -iptables-save > /root/iptables-backup-$(date +%F-%H%M%S).txt - -# 2) 查看 KUBE-ROUTER 相关链(确认后再清理) -iptables-save | grep KUBE-ROUTER || true - -# 3) 若你明确要清理 kube-router 规则(高风险) -# iptables-save | grep -v KUBE-ROUTER | iptables-restore - -# 4) 查看并清理相关 ipset(高风险,按需逐个) -# ipset list -n | grep '^KUBE-' -# ipset destroy - -EOF -} - -print_rebuild_runbook() { - cat <<'EOF' -K3s 重建步骤(只输出,不自动执行): - -1) 在 server 节点卸载: - /usr/local/bin/k3s-uninstall.sh - -2) 在 agent 节点卸载: - /usr/local/bin/k3s-agent-uninstall.sh - -3) 清理残留目录(确认后): - rm -rf /etc/rancher /var/lib/rancher /var/lib/kubelet /etc/cni /opt/cni - -4) 重新安装 server(带你当前需要的参数) -5) 重新 join agent -6) 先部署 04-1 / 04-2 / 04-3,再到 04-4 / 04-5 -7) 最后用 /root/check-nodejs-netpol.sh 复测 -EOF -} - -show_menu() { - echo - echo "===== K3s 恢复脚本(独立于诊断)=====" - echo "1) 仅清理 demo 资源(低风险)" - echo "2) 清理 demo + 重启关键组件(中风险)" - echo "3) 高风险网络规则清理(双重确认,默认仅打印建议)" - echo "4) 输出完整重建步骤(不自动执行)" - echo "0) 退出" - printf "请选择: " -} - -main() { - info "日志文件: ${LOG_FILE}" - while true; do - show_menu - read -r choice - case "${choice}" in - 1) - if confirm_once "确认执行“仅清理 demo 资源”吗?"; then - cleanup_demo_resources - else - warn "已取消" - fi - ;; - 2) - if confirm_once "确认执行“清理 demo + 重启关键组件”吗?"; then - cleanup_demo_resources - restart_key_components - else - warn "已取消" - fi - ;; - 3) - if confirm_twice_high_risk; then - network_rules_guidance - else - warn "高风险操作已取消" - fi - ;; - 4) - print_rebuild_runbook - ;; - 0) - info "退出。日志已保存:${LOG_FILE}" - break - ;; - *) - warn "无效选项" - ;; - esac - done -} - -main diff --git a/scripts/k3s-delete-lab-stacks.sh b/scripts/k3s-delete-lab-stacks.sh new file mode 100644 index 0000000..85ca289 --- /dev/null +++ b/scripts/k3s-delete-lab-stacks.sh @@ -0,0 +1,287 @@ +#!/usr/bin/env bash +# 按「集群里实际存在的资源」遍历删除(全部由 kubectl 发现,不读仓库 YAML 目录) +# 在任意目录执行均可;建议在仓库根:./scripts/k3s-delete-lab-stacks.sh [选项] +# +# 默认跳过系统命名空间:kube-system、kube-public、kube-node-lease +# 每个命名空间内会跳过 Service/kubernetes(API 内置 Service) +# 可选:PVC / ConfigMap / Secret(默认不删 Secret,避免误伤账号 token) +# +# 环境:kubectl 可用;export KUBECONFIG=... 按需设置 + +set -euo pipefail + +DRY_RUN=false +YES=false +PREVIEW_ONLY=false +INCLUDE_KUBE_SYSTEM=false +WITH_PVC=false +WITH_CONFIGMAPS=false +WITH_SECRETS=false +# 空 = 自动枚举「非系统」命名空间;非空 = 仅处理列出的 NS(逗号分隔) +NAMESPACES_ARG="" + +usage() { + cat <<'EOF' +用法: k3s-delete-lab-stacks.sh [选项] + +按 kubectl 当前集群中已部署的资源逐项删除(常见工作负载 + Ingress/IngressRoute 等)。 +不依赖本仓库 ansible/files 目录。 + +选项: + --preview 只列出将参与删除的命名空间及各资源(kubectl get),不执行删除 + --dry-run 删除时使用 kubectl 的 --dry-run=client(不落库,部分环境仍会做校验) + -y, --yes 跳过确认 + --namespaces NS[,NS...] + 只处理这些命名空间(仍受 --include-kube-system 与系统 NS 规则约束) + --include-kube-system + 也处理 kube-system(极危险,可能拆掉 Traefik/Coredns 等) + --with-pvc 删除 PersistentVolumeClaim(数据卷,默认不删) + --with-configmaps 删除 ConfigMap(会跳过 kube-root-ca.crt) + --with-secrets 删除 Secret(会跳过 default-token-* 及 type=kubernetes.io/service-account-token) + -h, --help 帮助 + +示例: + export KUBECONFIG=/etc/rancher/k3s/k3s.yaml + ./scripts/k3s-delete-lab-stacks.sh --preview + ./scripts/k3s-delete-lab-stacks.sh --namespaces default -y + ./scripts/k3s-delete-lab-stacks.sh --dry-run -y +EOF + exit 0 +} + +ARGS=("$@") +i=0 +while [[ $i -lt ${#ARGS[@]} ]]; do + case "${ARGS[$i]}" in + --dry-run) DRY_RUN=true ;; + -y|--yes) YES=true ;; + --preview) PREVIEW_ONLY=true ;; + --include-kube-system) INCLUDE_KUBE_SYSTEM=true ;; + --with-pvc) WITH_PVC=true ;; + --with-configmaps) WITH_CONFIGMAPS=true ;; + --with-secrets) WITH_SECRETS=true ;; + -h|--help) usage ;; + --namespaces) + i=$((i + 1)) + if [[ $i -ge ${#ARGS[@]} ]]; then echo "[ERROR] --namespaces 需要参数" >&2; exit 1; fi + NAMESPACES_ARG="${ARGS[$i]}" + ;; + *) + echo "[ERROR] 未知参数: ${ARGS[$i]},使用 -h 查看帮助" >&2 + exit 1 + ;; + esac + i=$((i + 1)) +done + +if ! command -v kubectl &>/dev/null; then + echo "[ERROR] 未找到 kubectl" >&2 + exit 1 +fi + +KUBECTL_DELETE=(kubectl delete) +if [[ "${DRY_RUN}" == true ]]; then + KUBECTL_DELETE=(kubectl delete --dry-run=client) +fi + +# 系统命名空间:默认不扫(除非 --include-kube-system 且用户未用 --namespaces 限制) +SYSTEM_NS="kube-system kube-public kube-node-lease" + +is_system_ns() { + local n="$1" + for s in ${SYSTEM_NS}; do + [[ "${n}" == "${s}" ]] && return 0 + done + return 1 +} + +collect_namespaces() { + if [[ -n "${NAMESPACES_ARG}" ]]; then + IFS=',' read -r -a arr <<< "${NAMESPACES_ARG}" + for raw in "${arr[@]}"; do + n="${raw//[[:space:]]/}" + [[ -z "${n}" ]] && continue + if ! is_system_ns "${n}" || [[ "${INCLUDE_KUBE_SYSTEM}" == true ]]; then + echo "${n}" + else + echo "[WARN] 已忽略系统命名空间(加 --include-kube-system 可处理): ${n}" >&2 + fi + done + return + fi + + local all + all=$(kubectl get ns -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}') + while IFS= read -r n; do + [[ -z "${n}" ]] && continue + if is_system_ns "${n}" && [[ "${INCLUDE_KUBE_SYSTEM}" != true ]]; then + continue + fi + echo "${n}" + done <<< "${all}" +} + +# 若 kind 在当前集群不存在,静默跳过 +list_kind_in_ns() { + local ns="$1" + local kind="$2" + kubectl get "${kind}" -n "${ns}" -o name 2>/dev/null || true +} + +preview_kind_in_ns() { + local ns="$1" + local kind="$2" + if kubectl get "${kind}" -n "${ns}" &>/dev/null; then + echo "===== ${ns} / ${kind} =====" + kubectl get "${kind}" -n "${ns}" -o wide 2>/dev/null || true + fi +} + +# 删除某 kind 在 ns 下 kubectl -o name 列出的所有资源 +delete_kind_all_by_name() { + local ns="$1" + local kind="$2" + local line name + while IFS= read -r line; do + [[ -z "${line}" ]] && continue + # line 形如 deployment.apps/foo 或 pod/bar + name="${line##*/}" + echo "[DEL] ${line} -n ${ns}" + "${KUBECTL_DELETE[@]}" "${line}" -n "${ns}" --ignore-not-found || true + done < <(list_kind_in_ns "${ns}" "${kind}") +} + +delete_services_safe() { + local ns="$1" + local names + names=$(kubectl get svc -n "${ns}" -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' 2>/dev/null || true) + while IFS= read -r n; do + [[ -z "${n}" ]] && continue + [[ "${n}" == "kubernetes" ]] && continue + echo "[DEL] service/${n} -n ${ns}" + "${KUBECTL_DELETE[@]}" "svc" "${n}" -n "${ns}" --ignore-not-found || true + done <<< "${names}" +} + +delete_configmaps_safe() { + local ns="$1" + local cm + while IFS= read -r cm; do + [[ -z "${cm}" ]] && continue + [[ "${cm}" == "kube-root-ca.crt" ]] && continue + echo "[DEL] configmap/${cm} -n ${ns}" + "${KUBECTL_DELETE[@]}" "configmap" "${cm}" -n "${ns}" --ignore-not-found || true + done < <(kubectl get configmap -n "${ns}" -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' 2>/dev/null || true) +} + +delete_secrets_safe() { + local ns="$1" + if ! command -v jq &>/dev/null; then + echo "[WARN] 未安装 jq,无法安全过滤 Secret,已跳过 ${ns}" >&2 + return 0 + fi + local names + names=$(kubectl get secret -n "${ns}" -o json 2>/dev/null | jq -r ' + .items[]? + | select(.type != "kubernetes.io/service-account-token") + | select(.metadata.name | test("^default-token-") | not) + | .metadata.name' 2>/dev/null || true) + while IFS= read -r sn; do + [[ -z "${sn}" ]] && continue + echo "[DEL] secret/${sn} -n ${ns}" + "${KUBECTL_DELETE[@]}" "secret" "${sn}" -n "${ns}" --ignore-not-found || true + done <<< "${names}" +} + +# Traefik / 扩展:不存在则 list 为空 +TRAEFIK_KINDS=(ingressroute middleware tlsoption traefikservice serverstransport) + +process_namespace() { + local ns="$1" + + if [[ "${PREVIEW_ONLY}" == true ]]; then + echo "######## 命名空间: ${ns} ########" + for k in cronjob job deployment statefulset daemonset ingress networkpolicy horizontalpodautoscaler hpa \ + "${TRAEFIK_KINDS[@]}" service pvc configmap secret; do + preview_kind_in_ns "${ns}" "${k}" || true + done + return + fi + + echo "######## 删除: ${ns} ########" + + # 1) 定时与任务 + delete_kind_all_by_name "${ns}" "cronjob" + delete_kind_all_by_name "${ns}" "job" + + # 2) 路由(Traefik CRD 可能未装,list 为空) + delete_kind_all_by_name "${ns}" "ingress" + for tk in "${TRAEFIK_KINDS[@]}"; do + delete_kind_all_by_name "${ns}" "${tk}" + done + + # 3) 工作负载 + delete_kind_all_by_name "${ns}" "deployment" + delete_kind_all_by_name "${ns}" "statefulset" + delete_kind_all_by_name "${ns}" "daemonset" + delete_kind_all_by_name "${ns}" "replicaset" + + # 4) 其它常见附属(不主动删 Pod:由上层负载级联回收,避免误伤系统静态 Pod) + delete_kind_all_by_name "${ns}" "networkpolicy" + delete_kind_all_by_name "${ns}" "horizontalpodautoscaler" + delete_kind_all_by_name "${ns}" "hpa" + + # 5) Service(保留 kubernetes) + delete_services_safe "${ns}" + + # 6) PVC + if [[ "${WITH_PVC}" == true ]]; then + delete_kind_all_by_name "${ns}" "persistentvolumeclaim" + fi + + if [[ "${WITH_CONFIGMAPS}" == true ]]; then + delete_configmaps_safe "${ns}" + fi + + if [[ "${WITH_SECRETS}" == true ]]; then + if command -v jq &>/dev/null; then + delete_secrets_safe "${ns}" + else + echo "[WARN] ${ns}: 未安装 jq,已跳过 --with-secrets" >&2 + fi + fi +} + +NS_LIST=() +while IFS= read -r _ns; do + [[ -z "${_ns}" ]] && continue + NS_LIST+=("${_ns}") +done < <(collect_namespaces | sort -u) + +if [[ ${#NS_LIST[@]} -eq 0 ]]; then + echo "[ERROR] 没有可处理的命名空间(检查 --namespaces / 集群连接)" >&2 + exit 1 +fi + +if [[ -n "${KUBECONFIG:-}" ]]; then + echo "[INFO] KUBECONFIG=${KUBECONFIG}" +else + echo "[INFO] KUBECONFIG 未设置,使用 kubectl 默认配置" +fi +echo "[INFO] 命名空间 (${#NS_LIST[@]}): ${NS_LIST[*]}" +echo "[INFO] include-kube-system=${INCLUDE_KUBE_SYSTEM} with-pvc=${WITH_PVC} with-configmaps=${WITH_CONFIGMAPS} with-secrets=${WITH_SECRETS} preview=${PREVIEW_ONLY} dry-run=${DRY_RUN}" + +if [[ "${PREVIEW_ONLY}" != true && "${YES}" != true && "${DRY_RUN}" != true ]]; then + echo "[WARN] 将按上述命名空间删除工作负载与路由等资源(见脚本内顺序)。输入 yes 继续:" + read -r confirm + if [[ "${confirm}" != "yes" ]]; then + echo "已取消" + exit 1 + fi +fi + +for ns in "${NS_LIST[@]}"; do + process_namespace "${ns}" +done + +echo "[DONE] 完成" diff --git a/scripts/ssh/test-ssh.sh b/scripts/ssh/test-ssh.sh index 0e91fa0..3ee70e3 100644 --- a/scripts/ssh/test-ssh.sh +++ b/scripts/ssh/test-ssh.sh @@ -148,7 +148,7 @@ done echo if [[ $rc -eq 0 ]]; then - echo "[PASS] SSH 测试通过,可用于 scripts/diag/entrypath/entrypath.sh 自动模式。" + echo "[PASS] SSH 测试通过,可用于 Ansible/运维自动化。" else echo "[FAIL] 存在失败项,请先修复 SSH/key/sudo 配置。" fi