From 8c437619620e73e8229e85bcb2812d2185b4b833 Mon Sep 17 00:00:00 2001 From: jack Date: Thu, 26 Mar 2026 07:01:14 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=8C=89=20doc=5Fid=20=E9=87=8D?= =?UTF-8?q?=E7=BB=84=20ansible/files=20=E4=B8=8E=E9=AA=8C=E8=AF=81?= =?UTF-8?q?=E6=A1=86=E6=9E=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - ansible/files 改为与文档 XX-YY 对齐的目录结构,更新相关 playbook 路径 - 新增 scripts/verify.sh 与 ansible/playbooks/verify/*.yml,移除单体 verify-matrix.yml - 补充 docs/00-02 矩阵状态、00-05 验证框架与流程、00-04 环境与 ylc65 工作机说明 - 增加 k3s 存储准备、Longhorn、local-path 等 playbook 与辅助脚本 Made-with: Cursor --- .gitignore | 2 + README.md | 3 +- ansible/files/00-01-k3s-基础概念/README.md | 9 + ansible/files/00-04-部署环境说明/README.md | 9 + .../01-01-k3s-控制节点含traefik/README.md | 13 + ansible/files/01-02-k3s-工作节点/README.md | 13 + .../01-03-armv7-standalone-docker/README.md | 9 + ansible/files/01-04-双控制节点ha/README.md | 9 + .../files/01-05-armv7-nfs服务安装/README.md | 9 + .../01-06-节点初始化-ansible-实践/README.md | 13 + ansible/files/02-00-nginx-系列说明/README.md | 12 + .../02-01-nginx-control-ingress/README.md | 15 + .../README.md | 15 + .../02-03-nginx-worker-ingress/README.md | 15 + .../02-04-nginx-worker-ingressroute/README.md | 15 + .../01-control-ingress.yaml | 1 + .../02-control-ingressroute.yaml | 1 + .../03-worker-ingress.yaml | 1 + .../04-worker-ingressroute.yaml | 1 + .../README.md | 1 + .../traefik-dashboard.yaml | 1 + .../01-control-ingress.yaml | 1 + .../02-control-ingressroute.yaml | 1 + .../03-worker-ingress.yaml | 1 + .../04-worker-ingressroute.yaml | 1 + .../traefik-acme.yaml | 1 + .../tomcat-acme.yaml | 1 + .../traefik-dashboard-acme.yaml | 74 ++++ .../cloudflared.yaml | 1 + .../local-path-config-lab.json | 9 + .../local-path-pvc-demo.yaml | 1 + .../nfs-pv-pvc-demo.yaml | 5 +- ansible/files/03-07-longhorn/values-lab.yaml | 14 + .../03-08-k3s-ha-集群配置与切换/README.md | 9 + .../03-09-k3s-gitops-集群配置管理/README.md | 9 + .../traefik-custom-ports.yaml | 0 .../04-01-nodejs-demo.yaml | 1 + .../04-02-nodejs-demo.yaml | 1 + .../04-03-nodejs-demo.yaml | 1 + .../04-04-nodejs-demo.yaml | 1 + .../04-05-nodejs-demo.yaml | 1 + .../04-06-nodejs-demo.yaml | 1 + .../04-07-nodejs-demo.yaml | 1 + .../04-08-nodejs-demo.yaml | 1 + .../04-09-nodejs-demo.yaml | 1 + .../04-10-nodejs-demo.yaml | 1 + .../04-11-nodejs-demo.yaml | 1 + .../04-12-nodejs-demo.yaml | 1 + .../04-13-nodejs-demo.yaml | 1 + .../README.md | 5 +- .../nodejs-demo-secret.example.yaml | 1 + .../04-02-nodejs-镜像与运行命令/README.md | 13 + .../04-03-nodejs-环境变量与配置注入/README.md | 13 + .../04-04-nodejs-端口与Service/README.md | 13 + .../04-05-nodejs-资源请求与限制/README.md | 13 + .../04-06-nodejs-探针与健康检查/README.md | 13 + .../files/04-07-nodejs-调度与亲和/README.md | 13 + .../files/04-08-nodejs-安全上下文/README.md | 13 + ansible/files/04-09-nodejs-存储与卷/README.md | 13 + .../04-10-nodejs-Ingress与Traefik/README.md | 13 + .../04-11-nodejs-副本与滚动发布/README.md | 13 + .../files/04-12-nodejs-TLS与证书/README.md | 13 + ansible/files/04-13-nodejs-HPA/README.md | 13 + .../04-14-nodejs-GitOps与CI流水线/README.md | 9 + .../files/{homer => 05-01-homer}/homer.yaml | 43 +- .../onenav-proxy.yaml | 3 +- .../gitlab-ci-runner-tags.example.yml | 1 + .../{gitlab => 05-04-gitlab-cicd}/README.md | 3 +- .../gitlab-ci-minimal.example.yml | 1 + .../gitlab-ci-multi-arch-deploy.example.yml | 1 + .../files/05-05-prometheus与grafana/README.md | 9 + .../openlist-backup-cronjob.yaml | 1 + .../openclaw-proxy.yaml | 0 .../files/05-07-openclaw/openclaw-server.yml | 37 ++ .../openclaw-k3s-experimental.yaml | 0 .../openclaw-web.yml | 55 +++ .../README.md | 9 + ansible/files/06-02-运维小结/README.md | 9 + .../README.md | 12 + .../app-data-backup-cronjob.yaml | 1 + .../app-data-restore-job.yaml | 1 + .../traefik-dashboard-acme.yaml | 83 ---- ansible/group_vars/all.yml | 14 + .../playbooks/apply-local-path-config-lab.yml | 37 ++ ansible/playbooks/k3s-init-and-install.yml | 29 ++ ansible/playbooks/k3s-prepare-storage.yml | 106 +++++ ansible/playbooks/longhorn-install.yml | 251 +++++++++++ ansible/playbooks/nginx-matrix-deploy.yml | 6 +- ansible/playbooks/nginx-matrix-tls-deploy.yml | 4 +- ansible/playbooks/nodejs-demo-apply.yml | 4 +- ansible/playbooks/verify/00-01.yml | 10 + ansible/playbooks/verify/00-04.yml | 10 + ansible/playbooks/verify/01-01.yml | 24 + ansible/playbooks/verify/01-02.yml | 11 + ansible/playbooks/verify/01-03.yml | 10 + ansible/playbooks/verify/01-04.yml | 10 + ansible/playbooks/verify/01-05.yml | 10 + ansible/playbooks/verify/01-06.yml | 11 + ansible/playbooks/verify/01-07.yml | 10 + ansible/playbooks/verify/02-00.yml | 10 + ansible/playbooks/verify/02-01.yml | 77 ++++ ansible/playbooks/verify/02-02.yml | 81 ++++ ansible/playbooks/verify/02-03.yml | 77 ++++ ansible/playbooks/verify/02-04.yml | 77 ++++ ansible/playbooks/verify/02-05.yml | 65 +++ ansible/playbooks/verify/03-01.yml | 65 +++ ansible/playbooks/verify/03-02.yml | 98 +++++ ansible/playbooks/verify/03-03.yml | 10 + ansible/playbooks/verify/03-04.yml | 10 + ansible/playbooks/verify/03-05.yml | 66 +++ ansible/playbooks/verify/03-06.yml | 94 ++++ ansible/playbooks/verify/03-07.yml | 41 ++ ansible/playbooks/verify/03-08.yml | 10 + ansible/playbooks/verify/03-09.yml | 10 + ansible/playbooks/verify/03-10.yml | 10 + ansible/playbooks/verify/04-01.yml | 49 +++ ansible/playbooks/verify/04-02.yml | 10 + ansible/playbooks/verify/04-03.yml | 10 + ansible/playbooks/verify/04-04.yml | 10 + ansible/playbooks/verify/04-05.yml | 10 + ansible/playbooks/verify/04-06.yml | 10 + ansible/playbooks/verify/04-07.yml | 10 + ansible/playbooks/verify/04-08.yml | 10 + ansible/playbooks/verify/04-09.yml | 10 + ansible/playbooks/verify/04-10.yml | 10 + ansible/playbooks/verify/04-11.yml | 10 + ansible/playbooks/verify/04-12.yml | 10 + ansible/playbooks/verify/04-13.yml | 10 + ansible/playbooks/verify/04-14.yml | 10 + ansible/playbooks/verify/05-01.yml | 10 + ansible/playbooks/verify/05-02.yml | 10 + ansible/playbooks/verify/05-03.yml | 10 + ansible/playbooks/verify/05-04.yml | 10 + ansible/playbooks/verify/05-05.yml | 10 + ansible/playbooks/verify/05-06.yml | 10 + ansible/playbooks/verify/05-07.yml | 10 + ansible/playbooks/verify/05-08.yml | 10 + ansible/playbooks/verify/05-09.yml | 10 + ansible/playbooks/verify/06-01.yml | 10 + ansible/playbooks/verify/06-02.yml | 10 + ansible/playbooks/verify/06-03.yml | 10 + ansible/playbooks/verify/_noop-tasks.yml | 32 ++ docs/00-01-k3s-基础概念.md | 4 +- docs/00-02-验证矩阵.md | 134 +++--- docs/00-04-部署环境说明.md | 55 ++- docs/00-05-测试与验证框架.md | 196 +++++++++ docs/01-01-k3s-控制节点含traefik.md | 4 +- docs/01-02-k3s-工作节点.md | 4 +- docs/01-05-armv7-nfs服务安装.md | 241 ++++++++++- docs/01-06-节点初始化-ansible-实践.md | 51 ++- docs/01-07-openwrt-haproxy.md | 4 +- docs/02-00-nginx-系列说明.md | 2 +- docs/02-01-nginx-control-ingress.md | 6 +- docs/02-02-nginx-control-ingressroute.md | 6 +- docs/02-03-nginx-worker-ingress.md | 6 +- docs/02-04-nginx-worker-ingressroute.md | 6 +- docs/02-05-nginx-验证矩阵-一键部署.md | 18 +- docs/03-01-k3s-traefik-dashboard.md | 4 +- docs/03-02-k3s-traefik-acme.md | 20 +- docs/03-03-k3s-traefik-dashboard-acme.md | 6 +- docs/03-04-k3s-cloudflare-tunnel-配置接入.md | 4 +- docs/03-05-k3s-local-path-pvc.md | 20 +- docs/03-06-k3s-使用nfs存储.md | 168 ++++++- docs/03-07-k3s-longhorn-持久化存储.md | 409 +++++++++++++++++- docs/03-10-k3s-traefik-custom-ports.md | 53 +++ docs/04-01-k3s-nodejs-高级部署.md | 10 +- docs/04-02-nodejs-镜像与运行命令.md | 6 +- docs/04-03-nodejs-环境变量与配置注入.md | 10 +- docs/04-04-nodejs-端口与Service.md | 6 +- docs/04-05-nodejs-资源请求与限制.md | 6 +- docs/04-06-nodejs-探针与健康检查.md | 6 +- docs/04-07-nodejs-调度与亲和.md | 6 +- docs/04-08-nodejs-安全上下文.md | 6 +- docs/04-09-nodejs-存储与卷.md | 8 +- docs/04-10-nodejs-Ingress与Traefik.md | 6 +- docs/04-11-nodejs-副本与滚动发布.md | 4 +- docs/04-12-nodejs-TLS与证书.md | 4 +- docs/04-13-nodejs-HPA.md | 6 +- docs/04-14-nodejs-GitOps与CI流水线.md | 2 +- docs/05-01-k3s-部署homer首页面板.md | 64 ++- docs/05-02-onenav首页面板.md | 4 +- docs/05-03-k3s-安装gitlab-含runner.md | 2 +- docs/05-04-k3s-配置gitlab-cicd.md | 6 +- docs/05-06-openlist挂载网盘与自动备份.md | 4 +- docs/05-07-openclaw应用部署.md | 4 +- docs/05-08-openclaw-k3s-实验部署.md | 4 +- docs/05-09-openclaw-web-小游戏网页平台.md | 37 ++ ...6-03-k3s-自动备份与恢复-openlist-webdav.md | 8 +- scripts/.env.verify.example | 103 +++++ scripts/README.md | 39 ++ scripts/ssh/smoke-verify-matrix-on-ylc61.sh | 6 + scripts/verify.sh | 101 +++++ 192 files changed, 4006 insertions(+), 320 deletions(-) create mode 100644 ansible/files/00-01-k3s-基础概念/README.md create mode 100644 ansible/files/00-04-部署环境说明/README.md create mode 100644 ansible/files/01-01-k3s-控制节点含traefik/README.md create mode 100644 ansible/files/01-02-k3s-工作节点/README.md create mode 100644 ansible/files/01-03-armv7-standalone-docker/README.md create mode 100644 ansible/files/01-04-双控制节点ha/README.md create mode 100644 ansible/files/01-05-armv7-nfs服务安装/README.md create mode 100644 ansible/files/01-06-节点初始化-ansible-实践/README.md create mode 100644 ansible/files/02-00-nginx-系列说明/README.md create mode 100644 ansible/files/02-01-nginx-control-ingress/README.md create mode 100644 ansible/files/02-02-nginx-control-ingressroute/README.md create mode 100644 ansible/files/02-03-nginx-worker-ingress/README.md create mode 100644 ansible/files/02-04-nginx-worker-ingressroute/README.md rename ansible/files/{nginx-matrix => 02-05-nginx-matrix}/01-control-ingress.yaml (99%) rename ansible/files/{nginx-matrix => 02-05-nginx-matrix}/02-control-ingressroute.yaml (99%) rename ansible/files/{nginx-matrix => 02-05-nginx-matrix}/03-worker-ingress.yaml (99%) rename ansible/files/{nginx-matrix => 02-05-nginx-matrix}/04-worker-ingressroute.yaml (99%) rename ansible/files/{nginx-matrix => 02-05-nginx-matrix}/README.md (99%) rename ansible/files/{traefik-dashboard => 03-01-traefik-dashboard}/traefik-dashboard.yaml (99%) rename ansible/files/{nginx-matrix-tls => 03-02-nginx-matrix-tls}/01-control-ingress.yaml (99%) rename ansible/files/{nginx-matrix-tls => 03-02-nginx-matrix-tls}/02-control-ingressroute.yaml (99%) rename ansible/files/{nginx-matrix-tls => 03-02-nginx-matrix-tls}/03-worker-ingress.yaml (99%) rename ansible/files/{nginx-matrix-tls => 03-02-nginx-matrix-tls}/04-worker-ingressroute.yaml (99%) rename ansible/files/{traefik-acme => 03-02-traefik-acme}/traefik-acme.yaml (99%) rename ansible/files/{traefik-dashboard-acme => 03-03-traefik-dashboard-acme}/tomcat-acme.yaml (99%) create mode 100644 ansible/files/03-03-traefik-dashboard-acme/traefik-dashboard-acme.yaml rename ansible/files/{cloudflare-tunnel => 03-04-cloudflare-tunnel}/cloudflared.yaml (99%) create mode 100644 ansible/files/03-05-local-path-config/local-path-config-lab.json rename ansible/files/{local-path-demo => 03-05-local-path-demo}/local-path-pvc-demo.yaml (99%) rename ansible/files/{nfs-demo => 03-06-nfs-demo}/nfs-pv-pvc-demo.yaml (83%) create mode 100644 ansible/files/03-07-longhorn/values-lab.yaml create mode 100644 ansible/files/03-08-k3s-ha-集群配置与切换/README.md create mode 100644 ansible/files/03-09-k3s-gitops-集群配置管理/README.md rename ansible/files/{traefik-custom-ports => 03-10-traefik-custom-ports}/traefik-custom-ports.yaml (100%) rename ansible/files/{nodejs-demo => 04-01-nodejs-demo}/04-01-nodejs-demo.yaml (99%) rename ansible/files/{nodejs-demo => 04-01-nodejs-demo}/04-02-nodejs-demo.yaml (99%) rename ansible/files/{nodejs-demo => 04-01-nodejs-demo}/04-03-nodejs-demo.yaml (99%) rename ansible/files/{nodejs-demo => 04-01-nodejs-demo}/04-04-nodejs-demo.yaml (99%) rename ansible/files/{nodejs-demo => 04-01-nodejs-demo}/04-05-nodejs-demo.yaml (99%) rename ansible/files/{nodejs-demo => 04-01-nodejs-demo}/04-06-nodejs-demo.yaml (99%) rename ansible/files/{nodejs-demo => 04-01-nodejs-demo}/04-07-nodejs-demo.yaml (99%) rename ansible/files/{nodejs-demo => 04-01-nodejs-demo}/04-08-nodejs-demo.yaml (99%) rename ansible/files/{nodejs-demo => 04-01-nodejs-demo}/04-09-nodejs-demo.yaml (99%) rename ansible/files/{nodejs-demo => 04-01-nodejs-demo}/04-10-nodejs-demo.yaml (99%) rename ansible/files/{nodejs-demo => 04-01-nodejs-demo}/04-11-nodejs-demo.yaml (99%) rename ansible/files/{nodejs-demo => 04-01-nodejs-demo}/04-12-nodejs-demo.yaml (99%) rename ansible/files/{nodejs-demo => 04-01-nodejs-demo}/04-13-nodejs-demo.yaml (99%) rename ansible/files/{nodejs-demo => 04-01-nodejs-demo}/README.md (93%) rename ansible/files/{nodejs-demo => 04-01-nodejs-demo}/nodejs-demo-secret.example.yaml (99%) create mode 100644 ansible/files/04-02-nodejs-镜像与运行命令/README.md create mode 100644 ansible/files/04-03-nodejs-环境变量与配置注入/README.md create mode 100644 ansible/files/04-04-nodejs-端口与Service/README.md create mode 100644 ansible/files/04-05-nodejs-资源请求与限制/README.md create mode 100644 ansible/files/04-06-nodejs-探针与健康检查/README.md create mode 100644 ansible/files/04-07-nodejs-调度与亲和/README.md create mode 100644 ansible/files/04-08-nodejs-安全上下文/README.md create mode 100644 ansible/files/04-09-nodejs-存储与卷/README.md create mode 100644 ansible/files/04-10-nodejs-Ingress与Traefik/README.md create mode 100644 ansible/files/04-11-nodejs-副本与滚动发布/README.md create mode 100644 ansible/files/04-12-nodejs-TLS与证书/README.md create mode 100644 ansible/files/04-13-nodejs-HPA/README.md create mode 100644 ansible/files/04-14-nodejs-GitOps与CI流水线/README.md rename ansible/files/{homer => 05-01-homer}/homer.yaml (53%) rename ansible/files/{onenav => 05-02-onenav}/onenav-proxy.yaml (97%) rename ansible/files/{gitlab => 05-03-gitlab-runner}/gitlab-ci-runner-tags.example.yml (99%) rename ansible/files/{gitlab => 05-04-gitlab-cicd}/README.md (74%) rename ansible/files/{gitlab => 05-04-gitlab-cicd}/gitlab-ci-minimal.example.yml (99%) rename ansible/files/{gitlab => 05-04-gitlab-cicd}/gitlab-ci-multi-arch-deploy.example.yml (99%) create mode 100644 ansible/files/05-05-prometheus与grafana/README.md rename ansible/files/{openlist => 05-06-openlist}/openlist-backup-cronjob.yaml (99%) rename ansible/files/{openclaw => 05-07-openclaw}/openclaw-proxy.yaml (100%) create mode 100644 ansible/files/05-07-openclaw/openclaw-server.yml rename ansible/files/{openclaw => 05-08-openclaw}/openclaw-k3s-experimental.yaml (100%) create mode 100644 ansible/files/05-09-openclaw-web-小游戏网页平台/openclaw-web.yml create mode 100644 ansible/files/06-01-k3s-networkpolicy-故障排查/README.md create mode 100644 ansible/files/06-02-运维小结/README.md create mode 100644 ansible/files/06-03-k3s-自动备份与恢复-openlist-webdav/README.md rename ansible/files/{openlist => 06-03-openlist-webdav}/app-data-backup-cronjob.yaml (99%) rename ansible/files/{openlist => 06-03-openlist-webdav}/app-data-restore-job.yaml (99%) delete mode 100644 ansible/files/traefik-dashboard-acme/traefik-dashboard-acme.yaml create mode 100644 ansible/playbooks/apply-local-path-config-lab.yml create mode 100644 ansible/playbooks/k3s-prepare-storage.yml create mode 100644 ansible/playbooks/longhorn-install.yml create mode 100644 ansible/playbooks/verify/00-01.yml create mode 100644 ansible/playbooks/verify/00-04.yml create mode 100644 ansible/playbooks/verify/01-01.yml create mode 100644 ansible/playbooks/verify/01-02.yml create mode 100644 ansible/playbooks/verify/01-03.yml create mode 100644 ansible/playbooks/verify/01-04.yml create mode 100644 ansible/playbooks/verify/01-05.yml create mode 100644 ansible/playbooks/verify/01-06.yml create mode 100644 ansible/playbooks/verify/01-07.yml create mode 100644 ansible/playbooks/verify/02-00.yml create mode 100644 ansible/playbooks/verify/02-01.yml create mode 100644 ansible/playbooks/verify/02-02.yml create mode 100644 ansible/playbooks/verify/02-03.yml create mode 100644 ansible/playbooks/verify/02-04.yml create mode 100644 ansible/playbooks/verify/02-05.yml create mode 100644 ansible/playbooks/verify/03-01.yml create mode 100644 ansible/playbooks/verify/03-02.yml create mode 100644 ansible/playbooks/verify/03-03.yml create mode 100644 ansible/playbooks/verify/03-04.yml create mode 100644 ansible/playbooks/verify/03-05.yml create mode 100644 ansible/playbooks/verify/03-06.yml create mode 100644 ansible/playbooks/verify/03-07.yml create mode 100644 ansible/playbooks/verify/03-08.yml create mode 100644 ansible/playbooks/verify/03-09.yml create mode 100644 ansible/playbooks/verify/03-10.yml create mode 100644 ansible/playbooks/verify/04-01.yml create mode 100644 ansible/playbooks/verify/04-02.yml create mode 100644 ansible/playbooks/verify/04-03.yml create mode 100644 ansible/playbooks/verify/04-04.yml create mode 100644 ansible/playbooks/verify/04-05.yml create mode 100644 ansible/playbooks/verify/04-06.yml create mode 100644 ansible/playbooks/verify/04-07.yml create mode 100644 ansible/playbooks/verify/04-08.yml create mode 100644 ansible/playbooks/verify/04-09.yml create mode 100644 ansible/playbooks/verify/04-10.yml create mode 100644 ansible/playbooks/verify/04-11.yml create mode 100644 ansible/playbooks/verify/04-12.yml create mode 100644 ansible/playbooks/verify/04-13.yml create mode 100644 ansible/playbooks/verify/04-14.yml create mode 100644 ansible/playbooks/verify/05-01.yml create mode 100644 ansible/playbooks/verify/05-02.yml create mode 100644 ansible/playbooks/verify/05-03.yml create mode 100644 ansible/playbooks/verify/05-04.yml create mode 100644 ansible/playbooks/verify/05-05.yml create mode 100644 ansible/playbooks/verify/05-06.yml create mode 100644 ansible/playbooks/verify/05-07.yml create mode 100644 ansible/playbooks/verify/05-08.yml create mode 100644 ansible/playbooks/verify/05-09.yml create mode 100644 ansible/playbooks/verify/06-01.yml create mode 100644 ansible/playbooks/verify/06-02.yml create mode 100644 ansible/playbooks/verify/06-03.yml create mode 100644 ansible/playbooks/verify/_noop-tasks.yml create mode 100644 docs/00-05-测试与验证框架.md create mode 100644 docs/03-10-k3s-traefik-custom-ports.md create mode 100644 docs/05-09-openclaw-web-小游戏网页平台.md create mode 100644 scripts/.env.verify.example create mode 100644 scripts/ssh/smoke-verify-matrix-on-ylc61.sh create mode 100644 scripts/verify.sh diff --git a/.gitignore b/.gitignore index c4f32ba..d95d6bd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ .cursor .ssh +# 本地填写的验证编排环境变量(从 scripts/.env.verify.example 复制) +scripts/.env.verify _bmad _bmad-output design-artifacts diff --git a/README.md b/README.md index 76bf40c..55ef727 100644 --- a/README.md +++ b/README.md @@ -11,13 +11,14 @@ - 部署环境说明:`docs/00-04-部署环境说明.md`(节点布局、IP、版本等) - 脚本主入口:`scripts/README.md` - 验证状态一览:`docs/00-02-验证矩阵.md` +- 测试与验证框架设计:`docs/00-05-测试与验证框架.md` 简单理解这三份入口的分工: - `README.md`:新手入口,看“要做什么、按什么顺序做”; - `00-00-构建总览.md`:文档导航,看“下一步该看哪一篇”; - `00-01-k3s-基础概念.md`:概念速查,看“不懂的 K3s/Traefik/NetworkPolicy 术语”; -- `00-02-验证矩阵.md`:状态面板,看“哪些文档已经在真实环境跑通过”。 +- `00-02-验证矩阵.md`:状态面板,看“哪些文档已经在真实环境跑通过”(当前以手工验证为准)。 目录约定很简单: diff --git a/ansible/files/00-01-k3s-基础概念/README.md b/ansible/files/00-01-k3s-基础概念/README.md new file mode 100644 index 0000000..74e8766 --- /dev/null +++ b/ansible/files/00-01-k3s-基础概念/README.md @@ -0,0 +1,9 @@ +# 00-01-k3s-基础概念(占位) + +对应文档:[`docs/00-01-k3s-基础概念.md`](../../docs/00-01-k3s-基础概念.md) + +## 说明 + +- 本篇为概念性文档,**不提供可部署的 Kubernetes 清单**。 +- 验证方式:按文档理解与对照集群实际输出即可(无 `kubectl apply -f` 目标)。 + diff --git a/ansible/files/00-04-部署环境说明/README.md b/ansible/files/00-04-部署环境说明/README.md new file mode 100644 index 0000000..010e24c --- /dev/null +++ b/ansible/files/00-04-部署环境说明/README.md @@ -0,0 +1,9 @@ +# 00-04-部署环境说明(占位) + +对应文档:[`docs/00-04-部署环境说明.md`](../../docs/00-04-部署环境说明.md) + +## 说明 + +- 本篇为环境说明文档,**不提供可部署的 Kubernetes 清单**。 +- 验证方式:按文档逐项核对你的实际环境信息(节点、磁盘挂载、版本等)。 + diff --git a/ansible/files/01-01-k3s-控制节点含traefik/README.md b/ansible/files/01-01-k3s-控制节点含traefik/README.md new file mode 100644 index 0000000..6bb640b --- /dev/null +++ b/ansible/files/01-01-k3s-控制节点含traefik/README.md @@ -0,0 +1,13 @@ +# 01-01-k3s-控制节点含traefik(占位) + +对应文档:[`docs/01-01-k3s-控制节点含traefik.md`](../../docs/01-01-k3s-控制节点含traefik.md) + +## 说明 + +- 本篇主要是 **K3s 安装与集群初始化**,核心部署逻辑在 Ansible playbook 中。 +- 本目录仅作为 doc_id 对齐占位;不单独维护 K8s manifests。 + +## 关联(参考) + +- Ansible:`ansible/playbooks/k3s-init-and-install.yml` + diff --git a/ansible/files/01-02-k3s-工作节点/README.md b/ansible/files/01-02-k3s-工作节点/README.md new file mode 100644 index 0000000..666c3ad --- /dev/null +++ b/ansible/files/01-02-k3s-工作节点/README.md @@ -0,0 +1,13 @@ +# 01-02-k3s-工作节点(占位) + +对应文档:[`docs/01-02-k3s-工作节点.md`](../../docs/01-02-k3s-工作节点.md) + +## 说明 + +- 本篇主要是 **工作节点加入 K3s 集群** 与节点侧配置。 +- 本目录仅作为 doc_id 对齐占位;不单独维护 K8s manifests。 + +## 关联(参考) + +- Ansible:`ansible/playbooks/k3s-init-and-install.yml` + diff --git a/ansible/files/01-03-armv7-standalone-docker/README.md b/ansible/files/01-03-armv7-standalone-docker/README.md new file mode 100644 index 0000000..0c4cbb7 --- /dev/null +++ b/ansible/files/01-03-armv7-standalone-docker/README.md @@ -0,0 +1,9 @@ +# 01-03-armv7-standalone-docker(占位) + +对应文档:[`docs/01-03-armv7-standalone-docker.md`](../../docs/01-03-armv7-standalone-docker.md) + +## 说明 + +- 本篇为 armv7 设备的 Docker 独立部署说明,**不提供 K3s/Kubernetes 清单**。 +- 本目录仅用于 doc_id 对齐占位。 + diff --git a/ansible/files/01-04-双控制节点ha/README.md b/ansible/files/01-04-双控制节点ha/README.md new file mode 100644 index 0000000..10987ef --- /dev/null +++ b/ansible/files/01-04-双控制节点ha/README.md @@ -0,0 +1,9 @@ +# 01-04-双控制节点ha(占位) + +对应文档:[`docs/01-04-双控制节点ha.md`](../../docs/01-04-双控制节点ha.md) + +## 说明 + +- 本篇为 HA/双控制节点方案说明,部署更多依赖集群架构与外部 LB 配置。 +- 本目录仅用于 doc_id 对齐占位;不提供独立 K8s manifests。 + diff --git a/ansible/files/01-05-armv7-nfs服务安装/README.md b/ansible/files/01-05-armv7-nfs服务安装/README.md new file mode 100644 index 0000000..f6eac80 --- /dev/null +++ b/ansible/files/01-05-armv7-nfs服务安装/README.md @@ -0,0 +1,9 @@ +# 01-05-armv7-nfs服务安装(占位) + +对应文档:[`docs/01-05-armv7-nfs服务安装.md`](../../docs/01-05-armv7-nfs服务安装.md) + +## 说明 + +- 本篇为 armv7 设备上 NFS 服务安装说明,**不提供 K3s/Kubernetes 清单**。 +- 本目录仅用于 doc_id 对齐占位。 + diff --git a/ansible/files/01-06-节点初始化-ansible-实践/README.md b/ansible/files/01-06-节点初始化-ansible-实践/README.md new file mode 100644 index 0000000..dac2a09 --- /dev/null +++ b/ansible/files/01-06-节点初始化-ansible-实践/README.md @@ -0,0 +1,13 @@ +# 01-06-节点初始化-ansible-实践(占位) + +对应文档:[`docs/01-06-节点初始化-ansible-实践.md`](../../docs/01-06-节点初始化-ansible-实践.md) + +## 说明 + +- 本篇的“真源”是 Ansible playbooks(初始化、安装、验证)。 +- 本目录仅用于 doc_id 对齐占位;不单独维护 K8s manifests。 + +## 关联(参考) + +- Ansible:`ansible/playbooks/k3s-init-and-install.yml` + diff --git a/ansible/files/02-00-nginx-系列说明/README.md b/ansible/files/02-00-nginx-系列说明/README.md new file mode 100644 index 0000000..5656ee2 --- /dev/null +++ b/ansible/files/02-00-nginx-系列说明/README.md @@ -0,0 +1,12 @@ +# 02-00-nginx-系列说明(占位) + +对应文档:[`docs/02-00-nginx-系列说明.md`](../../docs/02-00-nginx-系列说明.md) + +## 清单复用说明 + +本系列(02-01~02-04)的可部署清单统一收敛在: + +- `ansible/files/02-05-nginx-matrix/` + +本目录仅用于 doc_id 对齐占位。 + diff --git a/ansible/files/02-01-nginx-control-ingress/README.md b/ansible/files/02-01-nginx-control-ingress/README.md new file mode 100644 index 0000000..98603ff --- /dev/null +++ b/ansible/files/02-01-nginx-control-ingress/README.md @@ -0,0 +1,15 @@ +# 02-01-nginx-control-ingress(占位) + +对应文档:[`docs/02-01-nginx-control-ingress.md`](../../docs/02-01-nginx-control-ingress.md) + +## 真源清单 + +- 复用清单目录:`ansible/files/02-05-nginx-matrix/` +- 对应文件:`01-control-ingress.yaml` + +应用示例: + +```bash +kubectl apply -f ansible/files/02-05-nginx-matrix/01-control-ingress.yaml +``` + diff --git a/ansible/files/02-02-nginx-control-ingressroute/README.md b/ansible/files/02-02-nginx-control-ingressroute/README.md new file mode 100644 index 0000000..c38fc43 --- /dev/null +++ b/ansible/files/02-02-nginx-control-ingressroute/README.md @@ -0,0 +1,15 @@ +# 02-02-nginx-control-ingressroute(占位) + +对应文档:[`docs/02-02-nginx-control-ingressroute.md`](../../docs/02-02-nginx-control-ingressroute.md) + +## 真源清单 + +- 复用清单目录:`ansible/files/02-05-nginx-matrix/` +- 对应文件:`02-control-ingressroute.yaml` + +应用示例: + +```bash +kubectl apply -f ansible/files/02-05-nginx-matrix/02-control-ingressroute.yaml +``` + diff --git a/ansible/files/02-03-nginx-worker-ingress/README.md b/ansible/files/02-03-nginx-worker-ingress/README.md new file mode 100644 index 0000000..163230e --- /dev/null +++ b/ansible/files/02-03-nginx-worker-ingress/README.md @@ -0,0 +1,15 @@ +# 02-03-nginx-worker-ingress(占位) + +对应文档:[`docs/02-03-nginx-worker-ingress.md`](../../docs/02-03-nginx-worker-ingress.md) + +## 真源清单 + +- 复用清单目录:`ansible/files/02-05-nginx-matrix/` +- 对应文件:`03-worker-ingress.yaml` + +应用示例: + +```bash +kubectl apply -f ansible/files/02-05-nginx-matrix/03-worker-ingress.yaml +``` + diff --git a/ansible/files/02-04-nginx-worker-ingressroute/README.md b/ansible/files/02-04-nginx-worker-ingressroute/README.md new file mode 100644 index 0000000..99f0ce2 --- /dev/null +++ b/ansible/files/02-04-nginx-worker-ingressroute/README.md @@ -0,0 +1,15 @@ +# 02-04-nginx-worker-ingressroute(占位) + +对应文档:[`docs/02-04-nginx-worker-ingressroute.md`](../../docs/02-04-nginx-worker-ingressroute.md) + +## 真源清单 + +- 复用清单目录:`ansible/files/02-05-nginx-matrix/` +- 对应文件:`04-worker-ingressroute.yaml` + +应用示例: + +```bash +kubectl apply -f ansible/files/02-05-nginx-matrix/04-worker-ingressroute.yaml +``` + diff --git a/ansible/files/nginx-matrix/01-control-ingress.yaml b/ansible/files/02-05-nginx-matrix/01-control-ingress.yaml similarity index 99% rename from ansible/files/nginx-matrix/01-control-ingress.yaml rename to ansible/files/02-05-nginx-matrix/01-control-ingress.yaml index 0ae266b..55d9759 100644 --- a/ansible/files/nginx-matrix/01-control-ingress.yaml +++ b/ansible/files/02-05-nginx-matrix/01-control-ingress.yaml @@ -98,3 +98,4 @@ spec: # Ingress 规则 name: nginx-m1 # Service 名 port: # Service 端口 number: 80 # 端口号 + diff --git a/ansible/files/nginx-matrix/02-control-ingressroute.yaml b/ansible/files/02-05-nginx-matrix/02-control-ingressroute.yaml similarity index 99% rename from ansible/files/nginx-matrix/02-control-ingressroute.yaml rename to ansible/files/02-05-nginx-matrix/02-control-ingressroute.yaml index 0e3514d..a9f5cea 100644 --- a/ansible/files/nginx-matrix/02-control-ingressroute.yaml +++ b/ansible/files/02-05-nginx-matrix/02-control-ingressroute.yaml @@ -92,3 +92,4 @@ spec: # 路由规则 services: # 匹配后转发的服务 - name: nginx-m2 # 后端 Service 名称 port: 80 # 后端 Service 端口 + diff --git a/ansible/files/nginx-matrix/03-worker-ingress.yaml b/ansible/files/02-05-nginx-matrix/03-worker-ingress.yaml similarity index 99% rename from ansible/files/nginx-matrix/03-worker-ingress.yaml rename to ansible/files/02-05-nginx-matrix/03-worker-ingress.yaml index 6c8a771..a7b93da 100644 --- a/ansible/files/nginx-matrix/03-worker-ingress.yaml +++ b/ansible/files/02-05-nginx-matrix/03-worker-ingress.yaml @@ -94,3 +94,4 @@ spec: # Ingress 规则 name: nginx-m3 # Service 名称 port: # 后端端口 number: 80 # 端口号 + diff --git a/ansible/files/nginx-matrix/04-worker-ingressroute.yaml b/ansible/files/02-05-nginx-matrix/04-worker-ingressroute.yaml similarity index 99% rename from ansible/files/nginx-matrix/04-worker-ingressroute.yaml rename to ansible/files/02-05-nginx-matrix/04-worker-ingressroute.yaml index e17d00f..8a02c6d 100644 --- a/ansible/files/nginx-matrix/04-worker-ingressroute.yaml +++ b/ansible/files/02-05-nginx-matrix/04-worker-ingressroute.yaml @@ -92,3 +92,4 @@ spec: # IngressRoute 规则 services: # 后端服务列表 - name: nginx-m4 # Service 名称 port: 80 # Service 端口 + diff --git a/ansible/files/nginx-matrix/README.md b/ansible/files/02-05-nginx-matrix/README.md similarity index 99% rename from ansible/files/nginx-matrix/README.md rename to ansible/files/02-05-nginx-matrix/README.md index d1eedac..f36986c 100644 --- a/ansible/files/nginx-matrix/README.md +++ b/ansible/files/02-05-nginx-matrix/README.md @@ -10,3 +10,4 @@ | 04-worker-ingressroute.yaml | M4 工作+IngressRoute | /demo-m4 | nodeSelector=ylc64 | M4 默认指定 ylc64,M3 随机工作节点;按实际修改。 + diff --git a/ansible/files/traefik-dashboard/traefik-dashboard.yaml b/ansible/files/03-01-traefik-dashboard/traefik-dashboard.yaml similarity index 99% rename from ansible/files/traefik-dashboard/traefik-dashboard.yaml rename to ansible/files/03-01-traefik-dashboard/traefik-dashboard.yaml index 4e61a04..0280135 100644 --- a/ansible/files/traefik-dashboard/traefik-dashboard.yaml +++ b/ansible/files/03-01-traefik-dashboard/traefik-dashboard.yaml @@ -35,3 +35,4 @@ spec: # 路由规则 services: # 匹配到后转发给的服务列表 - name: api@internal # Traefik 内置 API 服务 kind: TraefikService # 该服务的 CRD 类型 + diff --git a/ansible/files/nginx-matrix-tls/01-control-ingress.yaml b/ansible/files/03-02-nginx-matrix-tls/01-control-ingress.yaml similarity index 99% rename from ansible/files/nginx-matrix-tls/01-control-ingress.yaml rename to ansible/files/03-02-nginx-matrix-tls/01-control-ingress.yaml index 6688d7d..19c86f2 100644 --- a/ansible/files/nginx-matrix-tls/01-control-ingress.yaml +++ b/ansible/files/03-02-nginx-matrix-tls/01-control-ingress.yaml @@ -113,3 +113,4 @@ spec: # Ingress 规则 name: nginx-m1 # 共用 Service port: # 后端端口 number: 80 # 端口号 + diff --git a/ansible/files/nginx-matrix-tls/02-control-ingressroute.yaml b/ansible/files/03-02-nginx-matrix-tls/02-control-ingressroute.yaml similarity index 99% rename from ansible/files/nginx-matrix-tls/02-control-ingressroute.yaml rename to ansible/files/03-02-nginx-matrix-tls/02-control-ingressroute.yaml index 27cc38f..dc1ef92 100644 --- a/ansible/files/nginx-matrix-tls/02-control-ingressroute.yaml +++ b/ansible/files/03-02-nginx-matrix-tls/02-control-ingressroute.yaml @@ -96,3 +96,4 @@ spec: # 规则 services: # 后端服务 - name: nginx-m2 # 后端 Service port: 80 # 端口 + diff --git a/ansible/files/nginx-matrix-tls/03-worker-ingress.yaml b/ansible/files/03-02-nginx-matrix-tls/03-worker-ingress.yaml similarity index 99% rename from ansible/files/nginx-matrix-tls/03-worker-ingress.yaml rename to ansible/files/03-02-nginx-matrix-tls/03-worker-ingress.yaml index 86a4788..0ab17af 100644 --- a/ansible/files/nginx-matrix-tls/03-worker-ingress.yaml +++ b/ansible/files/03-02-nginx-matrix-tls/03-worker-ingress.yaml @@ -108,3 +108,4 @@ spec: # Ingress 规则 name: nginx-m3 # 后端 Service 名称 port: # 后端端口 number: 80 # 端口号 + diff --git a/ansible/files/nginx-matrix-tls/04-worker-ingressroute.yaml b/ansible/files/03-02-nginx-matrix-tls/04-worker-ingressroute.yaml similarity index 99% rename from ansible/files/nginx-matrix-tls/04-worker-ingressroute.yaml rename to ansible/files/03-02-nginx-matrix-tls/04-worker-ingressroute.yaml index 76bdd00..fd86bf8 100644 --- a/ansible/files/nginx-matrix-tls/04-worker-ingressroute.yaml +++ b/ansible/files/03-02-nginx-matrix-tls/04-worker-ingressroute.yaml @@ -96,3 +96,4 @@ spec: # 规则 services: # 后端服务 - name: nginx-m4 # 后端 Service 名称 port: 80 # 后端端口 + diff --git a/ansible/files/traefik-acme/traefik-acme.yaml b/ansible/files/03-02-traefik-acme/traefik-acme.yaml similarity index 99% rename from ansible/files/traefik-acme/traefik-acme.yaml rename to ansible/files/03-02-traefik-acme/traefik-acme.yaml index 0ee8442..e0c10ed 100644 --- a/ansible/files/traefik-acme/traefik-acme.yaml +++ b/ansible/files/03-02-traefik-acme/traefik-acme.yaml @@ -38,3 +38,4 @@ spec: # chart 注入配置的具体内容 nodeSelector: # 把 Traefik Pod 固定到指定节点(配合 RWO 本地存储更安全) kubernetes.io/hostname: ylc61 # 固定节点主机名(按你的实际节点修改) + diff --git a/ansible/files/traefik-dashboard-acme/tomcat-acme.yaml b/ansible/files/03-03-traefik-dashboard-acme/tomcat-acme.yaml similarity index 99% rename from ansible/files/traefik-dashboard-acme/tomcat-acme.yaml rename to ansible/files/03-03-traefik-dashboard-acme/tomcat-acme.yaml index 1dbe9d0..5fff648 100644 --- a/ansible/files/traefik-dashboard-acme/tomcat-acme.yaml +++ b/ansible/files/03-03-traefik-dashboard-acme/tomcat-acme.yaml @@ -92,3 +92,4 @@ spec: # Ingress 规则 name: tomcat-test05 # Service 名称 port: # Service 端口 number: 8080 # 端口号 + diff --git a/ansible/files/03-03-traefik-dashboard-acme/traefik-dashboard-acme.yaml b/ansible/files/03-03-traefik-dashboard-acme/traefik-dashboard-acme.yaml new file mode 100644 index 0000000..35ef0eb --- /dev/null +++ b/ansible/files/03-03-traefik-dashboard-acme/traefik-dashboard-acme.yaml @@ -0,0 +1,74 @@ +# 03-03 Traefik Dashboard + ACME(合并版 HelmChartConfig) +# 说明:同一 chart 只能有一份 HelmChartConfig(name: traefik),所以 Dashboard 与 ACME 必须合并。 +# 使用前:替换 ;创建 cloudflare-api-token Secret;按实际修改 nodeSelector/trustedIPs/hosts。 +--- +apiVersion: helm.cattle.io/v1 +kind: HelmChartConfig +metadata: + name: traefik + namespace: kube-system +spec: + valuesContent: |- + ports: + web: + expose: true + websecure: + expose: true + traefik: + expose: true + + additionalArguments: + # Dashboard + - "--api.dashboard=true" + - "--api.insecure=true" + + # ACME(Cloudflare DNS-01) + - "--certificatesresolvers.cloudflare.acme.dnschallenge.resolvers=1.1.1.1:53,1.0.0.1:53" + - "--certificatesresolvers.cloudflare.acme.email=" + - "--certificatesresolvers.cloudflare.acme.storage=/data/acme.json" + # - "--certificatesresolvers.cloudflare.acme.caserver=https://acme-staging-v02.api.letsencrypt.org/directory" + - "--certificatesresolvers.cloudflare.acme.dnschallenge.provider=cloudflare" + - "--certificatesresolvers.cloudflare.acme.dnschallenge.propagation.delayBeforeChecks=600" + + # 健康检查:/ping 走 443(给 HAProxy https httpchk 用) + - "--ping=true" + - "--ping.entryPoint=websecure" + + # PROXY protocol(HAProxy 前置时需要) + - "--entrypoints.web.proxyProtocol.trustedIPs=192.168.2.0/24" + - "--entrypoints.websecure.proxyProtocol.trustedIPs=192.168.2.0/24" + + env: + - name: CF_DNS_API_TOKEN + valueFrom: + secretKeyRef: + name: cloudflare-api-token + key: api-token + + nodeSelector: + kubernetes.io/hostname: ylc61 + + # persistence:将 /data 持久化(local-path PVC),保证 acme.json 落盘 + persistence: + enabled: true + name: data + accessMode: ReadWriteOnce + size: 128Mi + path: /data + +--- +apiVersion: traefik.io/v1alpha1 +kind: IngressRoute +metadata: + name: traefik-dashboard + namespace: kube-system +spec: + entryPoints: + - web + routes: + - match: PathPrefix(`/dashboard`) || PathPrefix(`/api`) + kind: Rule + services: + - name: api@internal + kind: TraefikService + diff --git a/ansible/files/cloudflare-tunnel/cloudflared.yaml b/ansible/files/03-04-cloudflare-tunnel/cloudflared.yaml similarity index 99% rename from ansible/files/cloudflare-tunnel/cloudflared.yaml rename to ansible/files/03-04-cloudflare-tunnel/cloudflared.yaml index dd9fed8..9ee9973 100644 --- a/ansible/files/cloudflare-tunnel/cloudflared.yaml +++ b/ansible/files/03-04-cloudflare-tunnel/cloudflared.yaml @@ -35,3 +35,4 @@ spec: # Deployment 期望状态 secretKeyRef: # 从 Secret 的 key 取值 name: cloudflared-credentials # Secret 名称 key: TUNNEL_TOKEN # Secret 中的 key + diff --git a/ansible/files/03-05-local-path-config/local-path-config-lab.json b/ansible/files/03-05-local-path-config/local-path-config-lab.json new file mode 100644 index 0000000..dbb03d8 --- /dev/null +++ b/ansible/files/03-05-local-path-config/local-path-config-lab.json @@ -0,0 +1,9 @@ +{ + "nodePathMap": [ + { + "node": "DEFAULT_PATH_FOR_NON_LISTED_NODES", + "paths": ["/storage/storage"] + } + ] +} + diff --git a/ansible/files/local-path-demo/local-path-pvc-demo.yaml b/ansible/files/03-05-local-path-demo/local-path-pvc-demo.yaml similarity index 99% rename from ansible/files/local-path-demo/local-path-pvc-demo.yaml rename to ansible/files/03-05-local-path-demo/local-path-pvc-demo.yaml index 84ff6f4..b6319f8 100644 --- a/ansible/files/local-path-demo/local-path-pvc-demo.yaml +++ b/ansible/files/03-05-local-path-demo/local-path-pvc-demo.yaml @@ -39,3 +39,4 @@ spec: # Deployment 的期望状态(副本数、选择器、Pod 模板等) - name: data # Pod 内的卷名(给 volumeMounts 用) persistentVolumeClaim: # 使用 PVC 作为卷来源 claimName: local-pvc-demo # 绑定到哪个 PVC(必须与上面 PVC metadata.name 且同 namespace) + diff --git a/ansible/files/nfs-demo/nfs-pv-pvc-demo.yaml b/ansible/files/03-06-nfs-demo/nfs-pv-pvc-demo.yaml similarity index 83% rename from ansible/files/nfs-demo/nfs-pv-pvc-demo.yaml rename to ansible/files/03-06-nfs-demo/nfs-pv-pvc-demo.yaml index 00a9d5f..1a0b1d4 100644 --- a/ansible/files/nfs-demo/nfs-pv-pvc-demo.yaml +++ b/ansible/files/03-06-nfs-demo/nfs-pv-pvc-demo.yaml @@ -10,8 +10,8 @@ spec: # PV 规格 - ReadWriteMany # RWX:多节点可读写 persistentVolumeReclaimPolicy: Retain # 回收策略:删除 PVC 后保留底层数据 nfs: # 存储后端:NFS - server: 192.168.2.22 # NFS 服务器地址 - path: /data/nfs # NFS 导出目录 + server: # NFS 服务器地址(示例:192.168.2.22;应用前必须替换) + path: # NFS 导出目录(示例:/sdcard;应用前必须替换) --- apiVersion: v1 # PVC 使用的核心 API 版本 kind: PersistentVolumeClaim # 资源类型:持久卷声明 @@ -25,3 +25,4 @@ spec: # PVC 规格 requests: # 配额请求 storage: 5Gi # 申请容量 volumeName: nfs-pv-demo # 显式绑定到指定 PV + diff --git a/ansible/files/03-07-longhorn/values-lab.yaml b/ansible/files/03-07-longhorn/values-lab.yaml new file mode 100644 index 0000000..4d2eef1 --- /dev/null +++ b/ansible/files/03-07-longhorn/values-lab.yaml @@ -0,0 +1,14 @@ +# Longhorn Helm values — 本仓库实验室(四节点 10G+32G,/storage 独立挂载) +# Chart:https://charts.longhorn.io 字段说明见官方 chart values.yaml(与 app 同版本 tag) +# 使用:helm upgrade --install longhorn longhorn/longhorn -n longhorn-system --create-namespace -f values-lab.yaml --version 1.7.2 + +defaultSettings: + defaultDataPath: /storage/longhorn + # 字符串形式,与 chart 一致;32G 数据盘实验环境先省空间,要演练 HA 可改为 "2" 或 "3" + defaultReplicaCount: "1" + +persistence: + defaultClass: true + defaultClassReplicaCount: 1 + defaultFsType: ext4 + diff --git a/ansible/files/03-08-k3s-ha-集群配置与切换/README.md b/ansible/files/03-08-k3s-ha-集群配置与切换/README.md new file mode 100644 index 0000000..2187f75 --- /dev/null +++ b/ansible/files/03-08-k3s-ha-集群配置与切换/README.md @@ -0,0 +1,9 @@ +# 03-08-k3s-ha-集群配置与切换(占位) + +对应文档:[`docs/03-08-k3s-ha-集群配置与切换.md`](../../docs/03-08-k3s-ha-集群配置与切换.md) + +## 说明 + +- 本篇偏架构/流程与配置项梳理,具体落地会涉及多节点与外部组件(如 LB/DNS/证书)。 +- 本目录仅用于 doc_id 对齐占位;暂无独立可复用 manifests。 + diff --git a/ansible/files/03-09-k3s-gitops-集群配置管理/README.md b/ansible/files/03-09-k3s-gitops-集群配置管理/README.md new file mode 100644 index 0000000..dc5e4f3 --- /dev/null +++ b/ansible/files/03-09-k3s-gitops-集群配置管理/README.md @@ -0,0 +1,9 @@ +# 03-09-k3s-gitops-集群配置管理(占位) + +对应文档:[`docs/03-09-k3s-gitops-集群配置管理.md`](../../docs/03-09-k3s-gitops-集群配置管理.md) + +## 说明 + +- 本篇为 GitOps 框架草案(Argo CD / Flux 等),最终 manifests 取决于选型与版本。 +- 本目录仅用于 doc_id 对齐占位;暂无固定清单。 + diff --git a/ansible/files/traefik-custom-ports/traefik-custom-ports.yaml b/ansible/files/03-10-traefik-custom-ports/traefik-custom-ports.yaml similarity index 100% rename from ansible/files/traefik-custom-ports/traefik-custom-ports.yaml rename to ansible/files/03-10-traefik-custom-ports/traefik-custom-ports.yaml diff --git a/ansible/files/nodejs-demo/04-01-nodejs-demo.yaml b/ansible/files/04-01-nodejs-demo/04-01-nodejs-demo.yaml similarity index 99% rename from ansible/files/nodejs-demo/04-01-nodejs-demo.yaml rename to ansible/files/04-01-nodejs-demo/04-01-nodejs-demo.yaml index b5ed464..aaa9b61 100644 --- a/ansible/files/nodejs-demo/04-01-nodejs-demo.yaml +++ b/ansible/files/04-01-nodejs-demo/04-01-nodejs-demo.yaml @@ -52,3 +52,4 @@ spec: # Ingress 规则 name: nodejs-demo # Service 名称 port: # Service 端口 number: 80 # 端口号 + diff --git a/ansible/files/nodejs-demo/04-02-nodejs-demo.yaml b/ansible/files/04-01-nodejs-demo/04-02-nodejs-demo.yaml similarity index 99% rename from ansible/files/nodejs-demo/04-02-nodejs-demo.yaml rename to ansible/files/04-01-nodejs-demo/04-02-nodejs-demo.yaml index c2773b8..8380f98 100644 --- a/ansible/files/nodejs-demo/04-02-nodejs-demo.yaml +++ b/ansible/files/04-01-nodejs-demo/04-02-nodejs-demo.yaml @@ -56,3 +56,4 @@ spec: # Ingress 规则 name: nodejs-demo # Service 名称 port: # Service 端口 number: 80 # 端口号 + diff --git a/ansible/files/nodejs-demo/04-03-nodejs-demo.yaml b/ansible/files/04-01-nodejs-demo/04-03-nodejs-demo.yaml similarity index 99% rename from ansible/files/nodejs-demo/04-03-nodejs-demo.yaml rename to ansible/files/04-01-nodejs-demo/04-03-nodejs-demo.yaml index 9308aa6..653dfd7 100644 --- a/ansible/files/nodejs-demo/04-03-nodejs-demo.yaml +++ b/ansible/files/04-01-nodejs-demo/04-03-nodejs-demo.yaml @@ -73,3 +73,4 @@ spec: # Ingress 规则 name: nodejs-demo # Service 名称 port: # Service 端口 number: 80 # 端口号 + diff --git a/ansible/files/nodejs-demo/04-04-nodejs-demo.yaml b/ansible/files/04-01-nodejs-demo/04-04-nodejs-demo.yaml similarity index 99% rename from ansible/files/nodejs-demo/04-04-nodejs-demo.yaml rename to ansible/files/04-01-nodejs-demo/04-04-nodejs-demo.yaml index 3313608..0c0d789 100644 --- a/ansible/files/nodejs-demo/04-04-nodejs-demo.yaml +++ b/ansible/files/04-01-nodejs-demo/04-04-nodejs-demo.yaml @@ -73,3 +73,4 @@ spec: # Ingress 规则 name: nodejs-demo # Service 名称 port: # Service 端口 number: 80 # 端口号 + diff --git a/ansible/files/nodejs-demo/04-05-nodejs-demo.yaml b/ansible/files/04-01-nodejs-demo/04-05-nodejs-demo.yaml similarity index 99% rename from ansible/files/nodejs-demo/04-05-nodejs-demo.yaml rename to ansible/files/04-01-nodejs-demo/04-05-nodejs-demo.yaml index 9ce0ea6..ca30089 100644 --- a/ansible/files/nodejs-demo/04-05-nodejs-demo.yaml +++ b/ansible/files/04-01-nodejs-demo/04-05-nodejs-demo.yaml @@ -80,3 +80,4 @@ spec: # Ingress 规则 name: nodejs-demo # Service 名称 port: # Service 端口 number: 80 # 端口号 + diff --git a/ansible/files/nodejs-demo/04-06-nodejs-demo.yaml b/ansible/files/04-01-nodejs-demo/04-06-nodejs-demo.yaml similarity index 99% rename from ansible/files/nodejs-demo/04-06-nodejs-demo.yaml rename to ansible/files/04-01-nodejs-demo/04-06-nodejs-demo.yaml index 2dbf2ad..2db35a0 100644 --- a/ansible/files/nodejs-demo/04-06-nodejs-demo.yaml +++ b/ansible/files/04-01-nodejs-demo/04-06-nodejs-demo.yaml @@ -92,3 +92,4 @@ spec: # Ingress 规则 name: nodejs-demo # Service 名称 port: # Service 端口 number: 80 # 端口号 + diff --git a/ansible/files/nodejs-demo/04-07-nodejs-demo.yaml b/ansible/files/04-01-nodejs-demo/04-07-nodejs-demo.yaml similarity index 99% rename from ansible/files/nodejs-demo/04-07-nodejs-demo.yaml rename to ansible/files/04-01-nodejs-demo/04-07-nodejs-demo.yaml index 6a36304..b81be5b 100644 --- a/ansible/files/nodejs-demo/04-07-nodejs-demo.yaml +++ b/ansible/files/04-01-nodejs-demo/04-07-nodejs-demo.yaml @@ -94,3 +94,4 @@ spec: # Ingress 规则 name: nodejs-demo # Service 名称 port: # Service 端口 number: 80 # 端口号 + diff --git a/ansible/files/nodejs-demo/04-08-nodejs-demo.yaml b/ansible/files/04-01-nodejs-demo/04-08-nodejs-demo.yaml similarity index 99% rename from ansible/files/nodejs-demo/04-08-nodejs-demo.yaml rename to ansible/files/04-01-nodejs-demo/04-08-nodejs-demo.yaml index a327e25..361e2bf 100644 --- a/ansible/files/nodejs-demo/04-08-nodejs-demo.yaml +++ b/ansible/files/04-01-nodejs-demo/04-08-nodejs-demo.yaml @@ -107,3 +107,4 @@ spec: # Ingress 规则 name: nodejs-demo # Service 名称 port: # Service 端口 number: 80 # 端口号 + diff --git a/ansible/files/nodejs-demo/04-09-nodejs-demo.yaml b/ansible/files/04-01-nodejs-demo/04-09-nodejs-demo.yaml similarity index 99% rename from ansible/files/nodejs-demo/04-09-nodejs-demo.yaml rename to ansible/files/04-01-nodejs-demo/04-09-nodejs-demo.yaml index 5a99c3b..433b86c 100644 --- a/ansible/files/nodejs-demo/04-09-nodejs-demo.yaml +++ b/ansible/files/04-01-nodejs-demo/04-09-nodejs-demo.yaml @@ -125,3 +125,4 @@ spec: # Ingress 规则 name: nodejs-demo # Service 名称 port: # Service 端口 number: 80 # 端口号 + diff --git a/ansible/files/nodejs-demo/04-10-nodejs-demo.yaml b/ansible/files/04-01-nodejs-demo/04-10-nodejs-demo.yaml similarity index 99% rename from ansible/files/nodejs-demo/04-10-nodejs-demo.yaml rename to ansible/files/04-01-nodejs-demo/04-10-nodejs-demo.yaml index c5d4a62..085a5ef 100644 --- a/ansible/files/nodejs-demo/04-10-nodejs-demo.yaml +++ b/ansible/files/04-01-nodejs-demo/04-10-nodejs-demo.yaml @@ -126,3 +126,4 @@ spec: # Ingress 规则 name: nodejs-demo # Service 名称 port: # Service 端口 number: 80 # 端口号 + diff --git a/ansible/files/nodejs-demo/04-11-nodejs-demo.yaml b/ansible/files/04-01-nodejs-demo/04-11-nodejs-demo.yaml similarity index 99% rename from ansible/files/nodejs-demo/04-11-nodejs-demo.yaml rename to ansible/files/04-01-nodejs-demo/04-11-nodejs-demo.yaml index cd2d710..c4f84c1 100644 --- a/ansible/files/nodejs-demo/04-11-nodejs-demo.yaml +++ b/ansible/files/04-01-nodejs-demo/04-11-nodejs-demo.yaml @@ -131,3 +131,4 @@ spec: # Ingress 规则 name: nodejs-demo # Service 名称 port: # Service 端口 number: 80 # 端口号 + diff --git a/ansible/files/nodejs-demo/04-12-nodejs-demo.yaml b/ansible/files/04-01-nodejs-demo/04-12-nodejs-demo.yaml similarity index 99% rename from ansible/files/nodejs-demo/04-12-nodejs-demo.yaml rename to ansible/files/04-01-nodejs-demo/04-12-nodejs-demo.yaml index 9f8d0d2..bf40a44 100644 --- a/ansible/files/nodejs-demo/04-12-nodejs-demo.yaml +++ b/ansible/files/04-01-nodejs-demo/04-12-nodejs-demo.yaml @@ -138,3 +138,4 @@ spec: # Ingress 规则 name: nodejs-demo # Service 名称 port: # Service 端口 number: 80 # 端口号 + diff --git a/ansible/files/nodejs-demo/04-13-nodejs-demo.yaml b/ansible/files/04-01-nodejs-demo/04-13-nodejs-demo.yaml similarity index 99% rename from ansible/files/nodejs-demo/04-13-nodejs-demo.yaml rename to ansible/files/04-01-nodejs-demo/04-13-nodejs-demo.yaml index 8f05d8c..8ab244b 100644 --- a/ansible/files/nodejs-demo/04-13-nodejs-demo.yaml +++ b/ansible/files/04-01-nodejs-demo/04-13-nodejs-demo.yaml @@ -155,3 +155,4 @@ spec: # HPA 规格 target: # 目标值 type: Utilization # 目标类型:利用率 averageUtilization: 50 # 目标平均 CPU 利用率(%) + diff --git a/ansible/files/nodejs-demo/README.md b/ansible/files/04-01-nodejs-demo/README.md similarity index 93% rename from ansible/files/nodejs-demo/README.md rename to ansible/files/04-01-nodejs-demo/README.md index 3ffa799..4a81a9c 100644 --- a/ansible/files/nodejs-demo/README.md +++ b/ansible/files/04-01-nodejs-demo/README.md @@ -30,7 +30,7 @@ ```bash # 仓库根目录 -kubectl apply -f ansible/files/nodejs-demo/04-01-nodejs-demo.yaml +kubectl apply -f ansible/files/04-01-nodejs-demo/04-01-nodejs-demo.yaml ``` 或使用 Ansible:`ansible/playbooks/nodejs-demo-apply.yml`,变量 `nodejs_demo_manifest` 指定文件名。 @@ -38,5 +38,6 @@ kubectl apply -f ansible/files/nodejs-demo/04-01-nodejs-demo.yaml ## dry-run ```bash -kubectl apply --dry-run=client -f ansible/files/nodejs-demo/04-01-nodejs-demo.yaml +kubectl apply --dry-run=client -f ansible/files/04-01-nodejs-demo/04-01-nodejs-demo.yaml ``` + diff --git a/ansible/files/nodejs-demo/nodejs-demo-secret.example.yaml b/ansible/files/04-01-nodejs-demo/nodejs-demo-secret.example.yaml similarity index 99% rename from ansible/files/nodejs-demo/nodejs-demo-secret.example.yaml rename to ansible/files/04-01-nodejs-demo/nodejs-demo-secret.example.yaml index 0dfe049..e4231bc 100644 --- a/ansible/files/nodejs-demo/nodejs-demo-secret.example.yaml +++ b/ansible/files/04-01-nodejs-demo/nodejs-demo-secret.example.yaml @@ -6,3 +6,4 @@ metadata: # Secret 元信息 namespace: default # 命名空间 stringData: # 明文键值(创建时会转换为 data) API_TOKEN: "replace-me" # 示例 token(请替换,勿提交真实密钥) + diff --git a/ansible/files/04-02-nodejs-镜像与运行命令/README.md b/ansible/files/04-02-nodejs-镜像与运行命令/README.md new file mode 100644 index 0000000..9cdba40 --- /dev/null +++ b/ansible/files/04-02-nodejs-镜像与运行命令/README.md @@ -0,0 +1,13 @@ +# 04-02-nodejs-镜像与运行命令(占位) + +对应文档:[`docs/04-02-nodejs-镜像与运行命令.md`](../../docs/04-02-nodejs-镜像与运行命令.md) + +## 真源清单(复用 04-01 累积目录) + +- 真源目录:`ansible/files/04-01-nodejs-demo/` +- 对应累积清单:`04-02-nodejs-demo.yaml` + +```bash +kubectl apply -f ansible/files/04-01-nodejs-demo/04-02-nodejs-demo.yaml +``` + diff --git a/ansible/files/04-03-nodejs-环境变量与配置注入/README.md b/ansible/files/04-03-nodejs-环境变量与配置注入/README.md new file mode 100644 index 0000000..df44f17 --- /dev/null +++ b/ansible/files/04-03-nodejs-环境变量与配置注入/README.md @@ -0,0 +1,13 @@ +# 04-03-nodejs-环境变量与配置注入(占位) + +对应文档:[`docs/04-03-nodejs-环境变量与配置注入.md`](../../docs/04-03-nodejs-环境变量与配置注入.md) + +## 真源清单(复用 04-01 累积目录) + +- 真源目录:`ansible/files/04-01-nodejs-demo/` +- 对应累积清单:`04-03-nodejs-demo.yaml` + +```bash +kubectl apply -f ansible/files/04-01-nodejs-demo/04-03-nodejs-demo.yaml +``` + diff --git a/ansible/files/04-04-nodejs-端口与Service/README.md b/ansible/files/04-04-nodejs-端口与Service/README.md new file mode 100644 index 0000000..6359c89 --- /dev/null +++ b/ansible/files/04-04-nodejs-端口与Service/README.md @@ -0,0 +1,13 @@ +# 04-04-nodejs-端口与Service(占位) + +对应文档:[`docs/04-04-nodejs-端口与Service.md`](../../docs/04-04-nodejs-端口与Service.md) + +## 真源清单(复用 04-01 累积目录) + +- 真源目录:`ansible/files/04-01-nodejs-demo/` +- 对应累积清单:`04-04-nodejs-demo.yaml` + +```bash +kubectl apply -f ansible/files/04-01-nodejs-demo/04-04-nodejs-demo.yaml +``` + diff --git a/ansible/files/04-05-nodejs-资源请求与限制/README.md b/ansible/files/04-05-nodejs-资源请求与限制/README.md new file mode 100644 index 0000000..9bed620 --- /dev/null +++ b/ansible/files/04-05-nodejs-资源请求与限制/README.md @@ -0,0 +1,13 @@ +# 04-05-nodejs-资源请求与限制(占位) + +对应文档:[`docs/04-05-nodejs-资源请求与限制.md`](../../docs/04-05-nodejs-资源请求与限制.md) + +## 真源清单(复用 04-01 累积目录) + +- 真源目录:`ansible/files/04-01-nodejs-demo/` +- 对应累积清单:`04-05-nodejs-demo.yaml` + +```bash +kubectl apply -f ansible/files/04-01-nodejs-demo/04-05-nodejs-demo.yaml +``` + diff --git a/ansible/files/04-06-nodejs-探针与健康检查/README.md b/ansible/files/04-06-nodejs-探针与健康检查/README.md new file mode 100644 index 0000000..f0ab96b --- /dev/null +++ b/ansible/files/04-06-nodejs-探针与健康检查/README.md @@ -0,0 +1,13 @@ +# 04-06-nodejs-探针与健康检查(占位) + +对应文档:[`docs/04-06-nodejs-探针与健康检查.md`](../../docs/04-06-nodejs-探针与健康检查.md) + +## 真源清单(复用 04-01 累积目录) + +- 真源目录:`ansible/files/04-01-nodejs-demo/` +- 对应累积清单:`04-06-nodejs-demo.yaml` + +```bash +kubectl apply -f ansible/files/04-01-nodejs-demo/04-06-nodejs-demo.yaml +``` + diff --git a/ansible/files/04-07-nodejs-调度与亲和/README.md b/ansible/files/04-07-nodejs-调度与亲和/README.md new file mode 100644 index 0000000..f69696b --- /dev/null +++ b/ansible/files/04-07-nodejs-调度与亲和/README.md @@ -0,0 +1,13 @@ +# 04-07-nodejs-调度与亲和(占位) + +对应文档:[`docs/04-07-nodejs-调度与亲和.md`](../../docs/04-07-nodejs-调度与亲和.md) + +## 真源清单(复用 04-01 累积目录) + +- 真源目录:`ansible/files/04-01-nodejs-demo/` +- 对应累积清单:`04-07-nodejs-demo.yaml` + +```bash +kubectl apply -f ansible/files/04-01-nodejs-demo/04-07-nodejs-demo.yaml +``` + diff --git a/ansible/files/04-08-nodejs-安全上下文/README.md b/ansible/files/04-08-nodejs-安全上下文/README.md new file mode 100644 index 0000000..94a09c0 --- /dev/null +++ b/ansible/files/04-08-nodejs-安全上下文/README.md @@ -0,0 +1,13 @@ +# 04-08-nodejs-安全上下文(占位) + +对应文档:[`docs/04-08-nodejs-安全上下文.md`](../../docs/04-08-nodejs-安全上下文.md) + +## 真源清单(复用 04-01 累积目录) + +- 真源目录:`ansible/files/04-01-nodejs-demo/` +- 对应累积清单:`04-08-nodejs-demo.yaml` + +```bash +kubectl apply -f ansible/files/04-01-nodejs-demo/04-08-nodejs-demo.yaml +``` + diff --git a/ansible/files/04-09-nodejs-存储与卷/README.md b/ansible/files/04-09-nodejs-存储与卷/README.md new file mode 100644 index 0000000..88e82ca --- /dev/null +++ b/ansible/files/04-09-nodejs-存储与卷/README.md @@ -0,0 +1,13 @@ +# 04-09-nodejs-存储与卷(占位) + +对应文档:[`docs/04-09-nodejs-存储与卷.md`](../../docs/04-09-nodejs-存储与卷.md) + +## 真源清单(复用 04-01 累积目录) + +- 真源目录:`ansible/files/04-01-nodejs-demo/` +- 对应累积清单:`04-09-nodejs-demo.yaml` + +```bash +kubectl apply -f ansible/files/04-01-nodejs-demo/04-09-nodejs-demo.yaml +``` + diff --git a/ansible/files/04-10-nodejs-Ingress与Traefik/README.md b/ansible/files/04-10-nodejs-Ingress与Traefik/README.md new file mode 100644 index 0000000..4894b86 --- /dev/null +++ b/ansible/files/04-10-nodejs-Ingress与Traefik/README.md @@ -0,0 +1,13 @@ +# 04-10-nodejs-Ingress与Traefik(占位) + +对应文档:[`docs/04-10-nodejs-Ingress与Traefik.md`](../../docs/04-10-nodejs-Ingress与Traefik.md) + +## 真源清单(复用 04-01 累积目录) + +- 真源目录:`ansible/files/04-01-nodejs-demo/` +- 对应累积清单:`04-10-nodejs-demo.yaml` + +```bash +kubectl apply -f ansible/files/04-01-nodejs-demo/04-10-nodejs-demo.yaml +``` + diff --git a/ansible/files/04-11-nodejs-副本与滚动发布/README.md b/ansible/files/04-11-nodejs-副本与滚动发布/README.md new file mode 100644 index 0000000..1239624 --- /dev/null +++ b/ansible/files/04-11-nodejs-副本与滚动发布/README.md @@ -0,0 +1,13 @@ +# 04-11-nodejs-副本与滚动发布(占位) + +对应文档:[`docs/04-11-nodejs-副本与滚动发布.md`](../../docs/04-11-nodejs-副本与滚动发布.md) + +## 真源清单(复用 04-01 累积目录) + +- 真源目录:`ansible/files/04-01-nodejs-demo/` +- 对应累积清单:`04-11-nodejs-demo.yaml` + +```bash +kubectl apply -f ansible/files/04-01-nodejs-demo/04-11-nodejs-demo.yaml +``` + diff --git a/ansible/files/04-12-nodejs-TLS与证书/README.md b/ansible/files/04-12-nodejs-TLS与证书/README.md new file mode 100644 index 0000000..b8a7c35 --- /dev/null +++ b/ansible/files/04-12-nodejs-TLS与证书/README.md @@ -0,0 +1,13 @@ +# 04-12-nodejs-TLS与证书(占位) + +对应文档:[`docs/04-12-nodejs-TLS与证书.md`](../../docs/04-12-nodejs-TLS与证书.md) + +## 真源清单(复用 04-01 累积目录) + +- 真源目录:`ansible/files/04-01-nodejs-demo/` +- 对应累积清单:`04-12-nodejs-demo.yaml` + +```bash +kubectl apply -f ansible/files/04-01-nodejs-demo/04-12-nodejs-demo.yaml +``` + diff --git a/ansible/files/04-13-nodejs-HPA/README.md b/ansible/files/04-13-nodejs-HPA/README.md new file mode 100644 index 0000000..ec89a09 --- /dev/null +++ b/ansible/files/04-13-nodejs-HPA/README.md @@ -0,0 +1,13 @@ +# 04-13-nodejs-HPA(占位) + +对应文档:[`docs/04-13-nodejs-HPA.md`](../../docs/04-13-nodejs-HPA.md) + +## 真源清单(复用 04-01 累积目录) + +- 真源目录:`ansible/files/04-01-nodejs-demo/` +- 对应累积清单:`04-13-nodejs-demo.yaml` + +```bash +kubectl apply -f ansible/files/04-01-nodejs-demo/04-13-nodejs-demo.yaml +``` + diff --git a/ansible/files/04-14-nodejs-GitOps与CI流水线/README.md b/ansible/files/04-14-nodejs-GitOps与CI流水线/README.md new file mode 100644 index 0000000..69c663b --- /dev/null +++ b/ansible/files/04-14-nodejs-GitOps与CI流水线/README.md @@ -0,0 +1,9 @@ +# 04-14-nodejs-GitOps与CI流水线(占位) + +对应文档:[`docs/04-14-nodejs-GitOps与CI流水线.md`](../../docs/04-14-nodejs-GitOps与CI流水线.md) + +## 说明 + +- 本篇为流程/方法论文档,通常不会提供一份固定可复用的 K8s 清单。 +- 如需参考示例清单,可从 `ansible/files/04-01-nodejs-demo/` 选择对应阶段的累积 YAML。 + diff --git a/ansible/files/homer/homer.yaml b/ansible/files/05-01-homer/homer.yaml similarity index 53% rename from ansible/files/homer/homer.yaml rename to ansible/files/05-01-homer/homer.yaml index 9e72d27..0e72dcd 100644 --- a/ansible/files/homer/homer.yaml +++ b/ansible/files/05-01-homer/homer.yaml @@ -1,4 +1,28 @@ -# docs/05-01-k3s-部署homer首页面板.md — 按需修改 host +# docs/05-01-k3s-部署homer首页面板.md — 按需修改 host、ConfigMap 内 config.yml +# Homer 官方镜像约定:自定义配置挂在容器内 /www/assets/config.yml(见 b4bz/homer 说明) +# 若不想用 ConfigMap:删除本文件最上方的 ConfigMap,并去掉 Deployment 里 env/volumes/volumeMounts 三段 +--- +apiVersion: v1 # ConfigMap:存放 Homer 的 config.yml 文本 +kind: ConfigMap # 非机密配置,适合放导航 YAML +metadata: # 元数据 + name: homer-config # 名称须与 Deployment 中 volume 引用一致 + namespace: homer # 与 Deployment 同命名空间 +data: # 键值:键名 config.yml 会映射为容器内文件名 + config.yml: | # Homer 主配置(修改导航只改这里,不必为每个链接单独写 K8s YAML) + --- + title: "实验室导航" # 页面主标题 + subtitle: "Homer" # 副标题 + theme: default # 主题:default / dark 等(见官方文档) + connectivityCheck: false # 是否探测链接可达(实验环境可先关) + columns: 3 # 桌面端列数 + services: # 分组与书签(在此集中维护) + - name: "示例分组" # 分组名 + icon: "fas fa-layer-group" # Font Awesome 图标类名 + items: # 该分组下的链接列表 + - name: "Homer 项目" # 卡片标题 + url: "https://github.com/bastienwirtz/homer" # 跳转地址 + target: "_blank" # 新标签页打开 +--- apiVersion: apps/v1 # Deployment 使用的 API 版本 kind: Deployment # 工作负载:Deployment(管理 Pod 副本) metadata: # 对该资源的标识信息 @@ -14,11 +38,25 @@ spec: # Deployment 期望状态 labels: # Pod 标签:用于 selector 匹配 Service/Deployment 等 app: homer # Pod 上的标签 app=homer spec: # Pod 规范 + volumes: # Pod 级卷:把 ConfigMap 挂进容器 + - name: homer-config # 卷名,供 volumeMounts 引用 + configMap: # 来自上方 homer-config + name: homer-config # ConfigMap 名称 + items: # 只挂载需要的键,文件名与键名一致 + - key: config.yml # ConfigMap.data 中的键 + path: config.yml # 在挂载目录下生成的文件名 containers: # 容器列表(本例只有一个容器) - name: homer # 容器名称(日志/调试中会用到) - image: b4bz/homer:latest # homer 镜像 + image: b4bz/homer:latest # Homer 官方镜像(Docker Hub 命名空间 b4bz) + env: # 环境变量 + - name: INIT_ASSETS # 启动时是否从镜像复制默认 assets + value: "0" # 使用 ConfigMap 提供 config.yml 时设为 0,避免覆盖自定义配置 ports: # 容器端口声明(供探测/生成文档等使用) - containerPort: 8080 # 容器监听端口:homer 默认 8080 + volumeMounts: # 把 config.yml 挂到 Homer 读取路径 + - name: homer-config # 对应 volumes[].name + mountPath: /www/assets/config.yml # 官方镜像中配置文件路径 + subPath: config.yml # 单文件挂载(不覆盖整个 /www/assets 目录) --- apiVersion: v1 # Service 使用的 API 版本 kind: Service # 网络抽象:把一组 Pod 暴露为稳定的访问入口 @@ -51,3 +89,4 @@ spec: # Ingress 规则 name: homer # 后端 Service 名称 port: # 后端端口配置 number: 80 # 后端 Service 端口 + diff --git a/ansible/files/onenav/onenav-proxy.yaml b/ansible/files/05-02-onenav/onenav-proxy.yaml similarity index 97% rename from ansible/files/onenav/onenav-proxy.yaml rename to ansible/files/05-02-onenav/onenav-proxy.yaml index 7f8611f..1e70df8 100644 --- a/ansible/files/onenav/onenav-proxy.yaml +++ b/ansible/files/05-02-onenav/onenav-proxy.yaml @@ -40,4 +40,5 @@ spec: # Ingress 规则 service: # 转发到 Service name: onenav-external # 后端 Service 名称 port: # 后端端口 - number: 80 # Service 端口 + number: 80 # 端口号 + diff --git a/ansible/files/gitlab/gitlab-ci-runner-tags.example.yml b/ansible/files/05-03-gitlab-runner/gitlab-ci-runner-tags.example.yml similarity index 99% rename from ansible/files/gitlab/gitlab-ci-runner-tags.example.yml rename to ansible/files/05-03-gitlab-runner/gitlab-ci-runner-tags.example.yml index 6ddfd38..b768224 100644 --- a/ansible/files/gitlab/gitlab-ci-runner-tags.example.yml +++ b/ansible/files/05-03-gitlab-runner/gitlab-ci-runner-tags.example.yml @@ -13,3 +13,4 @@ build_armv7: # 任务名:armv7 构建 tags: [armv7] # 仅匹配 armv7 Runner script: # 执行脚本 - echo "build for armv7" # 示例输出 + diff --git a/ansible/files/gitlab/README.md b/ansible/files/05-04-gitlab-cicd/README.md similarity index 74% rename from ansible/files/gitlab/README.md rename to ansible/files/05-04-gitlab-cicd/README.md index 08af83c..27531e3 100644 --- a/ansible/files/gitlab/README.md +++ b/ansible/files/05-04-gitlab-cicd/README.md @@ -4,6 +4,7 @@ |------|------| | `gitlab-ci-minimal.example.yml` | `docs/05-04-k3s-配置gitlab-cicd.md` | | `gitlab-ci-multi-arch-deploy.example.yml` | `docs/05-04-k3s-配置gitlab-cicd.md` | -| `gitlab-ci-runner-tags.example.yml` | `docs/05-03-k3s-安装gitlab-含runner.md` | +| `../05-03-gitlab-runner/gitlab-ci-runner-tags.example.yml` | `docs/05-03-k3s-安装gitlab-含runner.md` | 复制为 `.gitlab-ci.yml` 或 `include` 引用;变量与 Runner 以文档为准。 + diff --git a/ansible/files/gitlab/gitlab-ci-minimal.example.yml b/ansible/files/05-04-gitlab-cicd/gitlab-ci-minimal.example.yml similarity index 99% rename from ansible/files/gitlab/gitlab-ci-minimal.example.yml rename to ansible/files/05-04-gitlab-cicd/gitlab-ci-minimal.example.yml index f91a8ab..3264e63 100644 --- a/ansible/files/gitlab/gitlab-ci-minimal.example.yml +++ b/ansible/files/05-04-gitlab-cicd/gitlab-ci-minimal.example.yml @@ -18,3 +18,4 @@ deploy: # 任务名:deploy - kubectl --kubeconfig="$KUBECONFIG" apply -f manifests/ # 应用 manifests only: # 触发条件(旧语法) - main # 仅 main 分支触发 + diff --git a/ansible/files/gitlab/gitlab-ci-multi-arch-deploy.example.yml b/ansible/files/05-04-gitlab-cicd/gitlab-ci-multi-arch-deploy.example.yml similarity index 99% rename from ansible/files/gitlab/gitlab-ci-multi-arch-deploy.example.yml rename to ansible/files/05-04-gitlab-cicd/gitlab-ci-multi-arch-deploy.example.yml index cbdcc22..576ba0d 100644 --- a/ansible/files/gitlab/gitlab-ci-multi-arch-deploy.example.yml +++ b/ansible/files/05-04-gitlab-cicd/gitlab-ci-multi-arch-deploy.example.yml @@ -12,3 +12,4 @@ deploy_arm64: # 任务名:arm64 架构部署 script: # 执行脚本 - echo "$KUBE_CONFIG_CONTENT" > "$KUBECONFIG" # 写入 kubeconfig - kubectl --kubeconfig="$KUBECONFIG" apply -f manifests/arm64/ # 部署 arm64 清单 + diff --git a/ansible/files/05-05-prometheus与grafana/README.md b/ansible/files/05-05-prometheus与grafana/README.md new file mode 100644 index 0000000..e366f88 --- /dev/null +++ b/ansible/files/05-05-prometheus与grafana/README.md @@ -0,0 +1,9 @@ +# 05-05-prometheus与grafana(占位) + +对应文档:[`docs/05-05-prometheus与grafana.md`](../../docs/05-05-prometheus与grafana.md) + +## 说明 + +- 监控栈通常通过 Helm Chart(如 kube-prometheus-stack)安装,清单会随版本变化。 +- 本目录仅用于 doc_id 对齐占位;后续若固化 values/Chart 版本,可在此补齐 manifests/values。 + diff --git a/ansible/files/openlist/openlist-backup-cronjob.yaml b/ansible/files/05-06-openlist/openlist-backup-cronjob.yaml similarity index 99% rename from ansible/files/openlist/openlist-backup-cronjob.yaml rename to ansible/files/05-06-openlist/openlist-backup-cronjob.yaml index a3db40a..0df2132 100644 --- a/ansible/files/openlist/openlist-backup-cronjob.yaml +++ b/ansible/files/05-06-openlist/openlist-backup-cronjob.yaml @@ -25,3 +25,4 @@ spec: # CronJob 期望状态 persistentVolumeClaim: # 使用 PVC 作为存储来源 claimName: openlist-backup-pvc # 绑定的 PVC 名称(需保证存在) restartPolicy: OnFailure # Pod 失败后重启策略:仅失败时重启 + diff --git a/ansible/files/openclaw/openclaw-proxy.yaml b/ansible/files/05-07-openclaw/openclaw-proxy.yaml similarity index 100% rename from ansible/files/openclaw/openclaw-proxy.yaml rename to ansible/files/05-07-openclaw/openclaw-proxy.yaml diff --git a/ansible/files/05-07-openclaw/openclaw-server.yml b/ansible/files/05-07-openclaw/openclaw-server.yml new file mode 100644 index 0000000..4f96a72 --- /dev/null +++ b/ansible/files/05-07-openclaw/openclaw-server.yml @@ -0,0 +1,37 @@ +# docs/05-07-openclaw局域网联机.md — 按需修改 NodePort/镜像 +apiVersion: apps/v1 # Deployment API 版本 +kind: Deployment # 部署控制器 +metadata: # 元数据 + name: openclaw-server # Deployment 名称 + namespace: default # 命名空间 +spec: # 期望状态 + replicas: 1 # 副本数 + selector: # 选择器 + matchLabels: # 匹配 labels + app: openclaw-server # 标签值 + template: # Pod 模板 + metadata: # Pod 元信息 + labels: # Pod 标签 + app: openclaw-server # 标签值 + spec: # Pod 规格 + containers: # 容器 + - name: openclaw-server # 容器名 + image: ghcr.io/your/openclaw-server:latest # 镜像(按环境修改) + ports: # 容器端口 + - containerPort: 27015 # 示例端口(按应用实际修改) +--- +apiVersion: v1 # Service API 版本 +kind: Service # Service +metadata: # 元数据 + name: openclaw-server # Service 名称 + namespace: default # 命名空间 +spec: # 规格 + type: NodePort # NodePort 暴露到节点 + selector: # 选择后端 Pod + app: openclaw-server # 标签选择器 + ports: # 端口列表 + - name: game # 端口名 + port: 27015 # Service 端口 + targetPort: 27015 # Pod 端口 + nodePort: 32715 # NodePort(按需修改,需在范围内) + diff --git a/ansible/files/openclaw/openclaw-k3s-experimental.yaml b/ansible/files/05-08-openclaw/openclaw-k3s-experimental.yaml similarity index 100% rename from ansible/files/openclaw/openclaw-k3s-experimental.yaml rename to ansible/files/05-08-openclaw/openclaw-k3s-experimental.yaml diff --git a/ansible/files/05-09-openclaw-web-小游戏网页平台/openclaw-web.yml b/ansible/files/05-09-openclaw-web-小游戏网页平台/openclaw-web.yml new file mode 100644 index 0000000..f1f3932 --- /dev/null +++ b/ansible/files/05-09-openclaw-web-小游戏网页平台/openclaw-web.yml @@ -0,0 +1,55 @@ +# docs/05-09-openclaw-web-小游戏网页平台.md — 按需修改 Ingress host/镜像 +apiVersion: apps/v1 # Deployment API 版本 +kind: Deployment # 部署 +metadata: # 元信息 + name: openclaw-web # 名称 + namespace: default # 命名空间 +spec: # 规格 + replicas: 1 # 副本数 + selector: # 选择器 + matchLabels: # 匹配标签 + app: openclaw-web # 标签 + template: # Pod 模板 + metadata: # Pod 元信息 + labels: # Pod 标签 + app: openclaw-web # 标签 + spec: # Pod 规格 + containers: # 容器列表 + - name: openclaw-web # 容器名 + image: ghcr.io/your/openclaw-web:latest # 镜像(按环境修改) + ports: # 容器端口 + - containerPort: 80 # Web 端口 +--- +apiVersion: v1 # Service API 版本 +kind: Service # Service +metadata: # 元信息 + name: openclaw-web # Service 名称 + namespace: default # 命名空间 +spec: # 规格 + selector: # 选择后端 Pod + app: openclaw-web # 标签 + ports: # 端口 + - name: http # 名称 + port: 80 # Service 端口 + targetPort: 80 # Pod 端口 +--- +apiVersion: networking.k8s.io/v1 # Ingress API 版本 +kind: Ingress # Ingress +metadata: # 元信息 + name: openclaw-web # 名称 + namespace: default # 命名空间 + annotations: # 注解 + traefik.ingress.kubernetes.io/router.entrypoints: web # Traefik entrypoint +spec: # 规格 + rules: # 规则 + - host: openclaw.example.com # 域名(按环境修改) + http: # HTTP + paths: # 路径 + - path: / # 根路径 + pathType: Prefix # 前缀匹配 + backend: # 后端 + service: # Service + name: openclaw-web # 后端 service + port: # 端口 + number: 80 # 端口号 + diff --git a/ansible/files/06-01-k3s-networkpolicy-故障排查/README.md b/ansible/files/06-01-k3s-networkpolicy-故障排查/README.md new file mode 100644 index 0000000..3240e73 --- /dev/null +++ b/ansible/files/06-01-k3s-networkpolicy-故障排查/README.md @@ -0,0 +1,9 @@ +# 06-01-k3s-networkpolicy-故障排查(占位) + +对应文档:[`docs/06-01-k3s-networkpolicy-故障排查.md`](../../docs/06-01-k3s-networkpolicy-故障排查.md) + +## 说明 + +- 本篇为排障手册/命令集合,**不提供固定可部署清单**。 +- 本目录仅用于 doc_id 对齐占位。 + diff --git a/ansible/files/06-02-运维小结/README.md b/ansible/files/06-02-运维小结/README.md new file mode 100644 index 0000000..22d4a72 --- /dev/null +++ b/ansible/files/06-02-运维小结/README.md @@ -0,0 +1,9 @@ +# 06-02-运维小结(占位) + +对应文档:[`docs/06-02-运维小结.md`](../../docs/06-02-运维小结.md) + +## 说明 + +- 本篇为运维建议/巡检要点总结,通常不对应单一可部署清单。 +- 本目录仅用于 doc_id 对齐占位。 + diff --git a/ansible/files/06-03-k3s-自动备份与恢复-openlist-webdav/README.md b/ansible/files/06-03-k3s-自动备份与恢复-openlist-webdav/README.md new file mode 100644 index 0000000..881d2b9 --- /dev/null +++ b/ansible/files/06-03-k3s-自动备份与恢复-openlist-webdav/README.md @@ -0,0 +1,12 @@ +# 06-03-k3s-自动备份与恢复-openlist-webdav(对齐 README) + +对应文档:[`docs/06-03-k3s-自动备份与恢复-openlist-webdav.md`](../../docs/06-03-k3s-自动备份与恢复-openlist-webdav.md) + +## 真源清单目录 + +本篇可部署清单当前收敛在: + +- `ansible/files/06-03-openlist-webdav/` + +说明:该目录名未镜像 docs 文件名;为满足“doc_id 目录对齐”口径,本目录仅作为桥接与入口。 + diff --git a/ansible/files/openlist/app-data-backup-cronjob.yaml b/ansible/files/06-03-openlist-webdav/app-data-backup-cronjob.yaml similarity index 99% rename from ansible/files/openlist/app-data-backup-cronjob.yaml rename to ansible/files/06-03-openlist-webdav/app-data-backup-cronjob.yaml index 3a0fdd2..c319ce5 100644 --- a/ansible/files/openlist/app-data-backup-cronjob.yaml +++ b/ansible/files/06-03-openlist-webdav/app-data-backup-cronjob.yaml @@ -25,3 +25,4 @@ spec: # CronJob 规格 hostPath: # 使用宿主机路径 path: /data/app # 宿主机实际目录(按环境修改) restartPolicy: OnFailure # 失败时重启 + diff --git a/ansible/files/openlist/app-data-restore-job.yaml b/ansible/files/06-03-openlist-webdav/app-data-restore-job.yaml similarity index 99% rename from ansible/files/openlist/app-data-restore-job.yaml rename to ansible/files/06-03-openlist-webdav/app-data-restore-job.yaml index 231fde9..3e7971a 100644 --- a/ansible/files/openlist/app-data-restore-job.yaml +++ b/ansible/files/06-03-openlist-webdav/app-data-restore-job.yaml @@ -22,3 +22,4 @@ spec: # Job 规格 hostPath: # 使用宿主机目录作为存储 path: /data/app # 节点上的真实数据目录(按实际修改) restartPolicy: OnFailure # 失败时重启,成功后结束 + diff --git a/ansible/files/traefik-dashboard-acme/traefik-dashboard-acme.yaml b/ansible/files/traefik-dashboard-acme/traefik-dashboard-acme.yaml deleted file mode 100644 index 9773a34..0000000 --- a/ansible/files/traefik-dashboard-acme/traefik-dashboard-acme.yaml +++ /dev/null @@ -1,83 +0,0 @@ -# 03-03 Traefik Dashboard + ACME(唯一清单,推荐) -# ============================================================================= -# 含:HelmChartConfig(local-path 持久化 /data + ACME Cloudflare DNS-01 + Dashboard) -# + IngressRoute(/dashboard、/api) -# acme.json 与 chart persistence 均落在 /data,Pod 重建后证书仍在;nodeSelector 须固定单节点(RWO) -# -# 部署:kubectl apply -f ansible/files/traefik-dashboard-acme/traefik-dashboard-acme.yaml -# 使用前:替换 、nodeSelector 主机名;Secret cloudflare-api-token 已存在(见 03-02) -# 全集群只能有一份 HelmChartConfig metadata.name=traefik -# -# --- 不要 Dashboard 时 --- -# 删除文末 IngressRoute 整段;并在 valuesContent 中删掉 ports(可选)、--api.dashboard、--api.insecure -# -# --- 临时不用持久化(不推荐)--- -# 将 persistence.enabled 改为 false 并删掉 persistence 下其余字段(证书可能随 Pod 丢失) -# ============================================================================= ---- -apiVersion: helm.cattle.io/v1 # HelmChartConfig 所在的 API 版本 -kind: HelmChartConfig # HelmChartConfig:给 K3s/Helm 注入 values 的资源 -metadata: # 资源标识信息 - name: traefik # chart 对应的 name(需要与 Traefik chart/约定一致) - namespace: kube-system # Traefik 通常运行在 kube-system -spec: # 该资源要注入 chart 的配置 - valuesContent: |- # 以“字符串形式的 YAML”注入到 Helm chart values(由 chart 解析) - ports: # 暴露 entrypoints 给集群入口 - web: # HTTP entrypoint - expose: true # 允许暴露 web - websecure: # HTTPS entrypoint - expose: true # 允许暴露 websecure - - persistence: # chart 持久化配置:为 /data 挂载 PVC - enabled: true # 开启持久卷 - name: data # chart 创建/引用的卷名(PVC 等) - accessMode: ReadWriteOnce # RWO:同一时间只能在一个节点挂载 - size: 512Mi # 请求容量(local-path 会据此创建本地卷) - storageClass: local-path # 使用 K3s 的 local-path-provisioner - path: /data # 容器内挂载目录(与 acme.storage 一致) - - additionalArguments: # 额外传给 Traefik 的 CLI 参数 - - "--api.dashboard=true" # 打开 dashboard 功能 - - "--api.insecure=true" # (k8s)允许 dashboard 在入口可用(注意安全) - - - "--log.level=INFO" # 日志级别 - - "--certificatesresolvers.cloudflare.acme.dnschallenge.resolvers=1.1.1.1:53,1.0.0.1:53" # DNS 解析器列表(用于 DNS-01) - - "--certificatesresolvers.cloudflare.acme.email=" # ACME 注册邮箱 - - "--certificatesresolvers.cloudflare.acme.storage=/data/acme.json" # 证书与账户存储(容器内 /data) - # - "--certificatesresolvers.cloudflare.acme.caserver=https://acme-staging-v02.api.letsencrypt.org/directory" # 测试用,上线前删除 - - "--certificatesresolvers.cloudflare.acme.dnschallenge.provider=cloudflare" # DNS-01 provider:cloudflare - - "--certificatesresolvers.cloudflare.acme.dnschallenge.propagation.delayBeforeChecks=600" # DNS-01 propagation 等待秒数 - - - "--ping=true" # 开启 ping healthcheck - - "--ping.entryPoint=websecure" # ping 使用 websecure(HTTPS) entrypoint - - - "--entrypoints.web.proxyProtocol.trustedIPs=192.168.2.0/24" # web entrypoint 信任的代理网段 - - "--entrypoints.websecure.proxyProtocol.trustedIPs=192.168.2.0/24" # websecure entrypoint 信任的代理网段 - - env: # 环境变量注入 - - name: CF_DNS_API_TOKEN # 供 Traefik 使用的 Cloudflare Token 环境变量名 - valueFrom: # 从 Secret 挂载 - secretKeyRef: # Secret 引用方式 - name: cloudflare-api-token # Secret 名 - key: api-token # Secret 内 key - - nodeSelector: # 将 Traefik Pod 固定到指定节点(避免 local-path RWO 迁移导致丢数据) - kubernetes.io/hostname: ylc61 # 目标节点主机名 - ---- -# 显式 IngressRoute(与 03-01 一致,确保 /dashboard 可达; Helm ingressRoute.dashboard 在 K3s chart 中未必生效) -apiVersion: traefik.io/v1alpha1 # IngressRoute API 版本 -kind: IngressRoute # Traefik 路由 CRD -metadata: # IngressRoute 元信息 - name: traefik-dashboard # 路由名称 - namespace: kube-system # 命名空间 -spec: # IngressRoute 规则 - entryPoints: # 入口点列表 - - web # 使用 web(HTTP) 入口 - routes: # 路由规则列表 - - match: PathPrefix(`/dashboard`) || PathPrefix(`/api`) # 匹配 Dashboard/API 路径前缀 - kind: Rule # 规则类型 - services: # 后端服务 - - name: api@internal # Traefik 内置 API 服务 - kind: TraefikService # 服务类型 - diff --git a/ansible/group_vars/all.yml b/ansible/group_vars/all.yml index 5c49bc2..19613b5 100644 --- a/ansible/group_vars/all.yml +++ b/ansible/group_vars/all.yml @@ -9,6 +9,20 @@ k3s_version: "" # 为空表示用 get.k3s.io 默认最新 k3s_data_dir: "/storage" k3s_server_ip: "192.168.2.61" +# 安装 k3s 前校验:/storage 为挂载点且与 / 不同设备(实验室 10G+32G 建议 true;「目录式假 /storage」旧环境可 false) +k3s_verify_storage_mount: true + +# 可选:由 playbooks/k3s-prepare-storage.yml 对第二块整盘分区、格式化并挂载到 k3s_data_dir(会清空该盘,见 01-06) +k3s_prepare_storage: false +# k3s_data_disk_device: "/dev/vdb" +# NVMe 整盘一般为 /dev/nvme0n1,首分区为 /dev/nvme0n1p1,playbook 会按设备名自动加 1 或 p1 + +# Longhorn Helm(playbooks/longhorn-install.yml) +longhorn_chart_version: "1.7.2" +longhorn_install_node_packages: true +# 是否在 longhorn-install 末尾应用本仓库 local-path 实验室 ConfigMap +longhorn_apply_local_path_lab: false + # 可选:是否管理 /etc/hosts、firewalld 基线 k3s_manage_hosts: true k3s_manage_firewalld: true diff --git a/ansible/playbooks/apply-local-path-config-lab.yml b/ansible/playbooks/apply-local-path-config-lab.yml new file mode 100644 index 0000000..9f6b585 --- /dev/null +++ b/ansible/playbooks/apply-local-path-config-lab.yml @@ -0,0 +1,37 @@ +--- +# 仅应用本仓库 local-path 实验室 ConfigMap(不安装 Longhorn)。在 k3s_server 上执行。 +# 与 docs/03-05 中「方法一」一致,真源:ansible/files/03-05-local-path-config/local-path-config-lab.json + +- name: Apply local-path-config lab JSON + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + local_path_json_src: "{{ playbook_dir }}/../files/03-05-local-path-config/local-path-config-lab.json" + local_path_json_dest: /root/local-path-config-lab.json + tasks: + - name: Copy local-path lab json + ansible.builtin.copy: + src: "{{ local_path_json_src }}" + dest: "{{ local_path_json_dest }}" + mode: "0644" + + - name: Apply local-path-config ConfigMap + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl -n kube-system create configmap local-path-config \ + --from-file=config.json={{ local_path_json_dest }} \ + --dry-run=client -o yaml | KUBECONFIG={{ k3s_kubeconfig }} kubectl apply -f - + args: + executable: /bin/bash + changed_when: true + + - name: Restart local-path-provisioner if present + ansible.builtin.shell: | + KUBECONFIG={{ k3s_kubeconfig }} kubectl -n kube-system rollout restart deploy/local-path-provisioner + args: + executable: /bin/bash + register: lp_restart + failed_when: false + changed_when: lp_restart.rc == 0 diff --git a/ansible/playbooks/k3s-init-and-install.yml b/ansible/playbooks/k3s-init-and-install.yml index 1cfef22..4397316 100644 --- a/ansible/playbooks/k3s-init-and-install.yml +++ b/ansible/playbooks/k3s-init-and-install.yml @@ -1,4 +1,33 @@ --- +- name: Verify /storage is a separate mount (optional) + hosts: k3s_nodes + become: true + tasks: + - name: Check / and /storage mount sources + when: k3s_verify_storage_mount | default(false) | bool + block: + - name: Get mount source for / + ansible.builtin.command: findmnt -n -o SOURCE / + register: mnt_root + changed_when: false + + - name: Get mount source for /storage + ansible.builtin.command: findmnt -n -o SOURCE /storage + register: mnt_storage + changed_when: false + failed_when: false + + - name: Assert /storage is mounted on a different device than / + ansible.builtin.assert: + that: + - mnt_storage.rc == 0 + - (mnt_root.stdout | trim | length) > 0 + - (mnt_storage.stdout | trim | length) > 0 + - (mnt_root.stdout | trim) != (mnt_storage.stdout | trim) + fail_msg: >- + /storage must be a mount point on a block device different from /. + See docs/00-04-部署环境说明.md and docs/01-06-节点初始化-ansible-实践.md + - name: Init base system hosts: k3s_nodes become: true diff --git a/ansible/playbooks/k3s-prepare-storage.yml b/ansible/playbooks/k3s-prepare-storage.yml new file mode 100644 index 0000000..34d1af3 --- /dev/null +++ b/ansible/playbooks/k3s-prepare-storage.yml @@ -0,0 +1,106 @@ +--- +# 可选:在空白数据盘上创建单分区、ext4、fstab 并挂载到 k3s_data_dir(默认 /storage)。 +# 启用前在 group_vars/all.yml 设置 k3s_prepare_storage: true 与 k3s_data_disk_device(如 /dev/vdb)。 +# 会清空该磁盘上的数据。若 /storage 已是挂载点则跳过。 + +- name: Prepare data disk and mount to k3s_data_dir + hosts: k3s_nodes + become: true + tasks: + - name: Skip notice when storage prep disabled + ansible.builtin.debug: + msg: "k3s_prepare_storage is false — skipping (see group_vars/all.yml)" + when: not (k3s_prepare_storage | default(false) | bool) + + - name: Prepare block storage for k3s_data_dir + when: k3s_prepare_storage | default(false) | bool + block: + - name: Require k3s_data_disk_device when k3s_prepare_storage is true + ansible.builtin.assert: + that: + - k3s_data_disk_device is defined + - (k3s_data_disk_device | string | length) > 0 + fail_msg: "Set k3s_data_disk_device (e.g. /dev/vdb) in group_vars or host_vars" + + - name: Verify k3s_data_disk_device is a block device + ansible.builtin.command: test -b {{ k3s_data_disk_device }} + changed_when: false + + - name: Check whether k3s_data_dir is already a mountpoint + ansible.builtin.command: mountpoint -q {{ k3s_data_dir }} + register: mp_k3s + changed_when: false + failed_when: false + + - name: Skip when k3s_data_dir already mounted + ansible.builtin.debug: + msg: "{{ k3s_data_dir }} already mounted — skipping partitioning on {{ inventory_hostname }}" + when: mp_k3s.rc == 0 + + - name: Install partitioning and filesystem tools + ansible.builtin.package: + name: + - parted + - e2fsprogs + state: present + when: mp_k3s.rc != 0 + + - name: Compute first partition path (nvme*n* -> p1, else 1) + ansible.builtin.set_fact: + k3s_data_partition: >- + {{ k3s_data_disk_device }}{{ 'p1' if (k3s_data_disk_device | regex_search('nvme[0-9]+n[0-9]+$')) else '1' }} + when: mp_k3s.rc != 0 + + - name: Create GPT and single ext4 partition + ansible.builtin.command: >- + parted -s {{ k3s_data_disk_device }} mklabel gpt mkpart primary ext4 0% 100% + args: + creates: "{{ k3s_data_partition }}" + when: mp_k3s.rc != 0 + + - name: Wait for partition node in /dev + ansible.builtin.wait_for: + path: "{{ k3s_data_partition }}" + state: present + timeout: 60 + when: mp_k3s.rc != 0 + + - name: Detect existing filesystem on partition + ansible.builtin.command: blkid -s TYPE -o value {{ k3s_data_partition }} + register: fs_type + changed_when: false + failed_when: false + when: mp_k3s.rc != 0 + + - name: Create ext4 on partition + ansible.builtin.command: mkfs.ext4 -F {{ k3s_data_partition }} + when: + - mp_k3s.rc != 0 + - (fs_type.stdout | default('') | trim | length) == 0 + + - name: Read UUID of partition + ansible.builtin.command: blkid -s UUID -o value {{ k3s_data_partition }} + register: blk_uuid + changed_when: false + when: mp_k3s.rc != 0 + + - name: Ensure mount directory exists + ansible.builtin.file: + path: "{{ k3s_data_dir }}" + state: directory + mode: "0755" + when: mp_k3s.rc != 0 + + - name: Add fstab entry for k3s_data_dir + ansible.builtin.lineinfile: + path: /etc/fstab + regexp: "^UUID={{ blk_uuid.stdout | trim }}\\s" + line: "UUID={{ blk_uuid.stdout | trim }} {{ k3s_data_dir }} ext4 defaults,nofail 0 2" + create: true + mode: "0644" + when: mp_k3s.rc != 0 + + - name: Mount all from fstab + ansible.builtin.command: mount -a + changed_when: true + when: mp_k3s.rc != 0 diff --git a/ansible/playbooks/longhorn-install.yml b/ansible/playbooks/longhorn-install.yml new file mode 100644 index 0000000..6f78085 --- /dev/null +++ b/ansible/playbooks/longhorn-install.yml @@ -0,0 +1,251 @@ +--- +# Helm 安装 Longhorn(与 docs/03-07 一致)。在控制节点执行,依赖 KUBECONFIG=/etc/rancher/k3s/k3s.yaml +# 变量:group_vars/all.yml 中 longhorn_chart_version、longhorn_install_node_packages、longhorn_apply_local_path_lab + +- name: Longhorn node packages (iSCSI, NFS client) + hosts: k3s_nodes + become: true + tasks: + - name: Install Longhorn OS dependencies + when: longhorn_install_node_packages | default(true) | bool + block: + - name: Install iscsi + nfs (dnf/yum) + ansible.builtin.package: + name: + - iscsi-initiator-utils + - nfs-utils + state: present + + - name: Enable iscsid + ansible.builtin.systemd: + name: iscsid + enabled: true + state: started + + - name: Ensure Longhorn data subdirectory exists on all nodes + ansible.builtin.file: + path: "{{ k3s_data_dir }}/longhorn" + state: directory + mode: "0700" + + - name: Pre-pull Longhorn images on all nodes (optional, avoid DockerHub EOF/ImagePullBackOff) + when: longhorn_prepull_images | default(true) | bool + ansible.builtin.shell: | + set -e + CTR="ctr --address /run/k3s/containerd/containerd.sock -n k8s.io" + + imgs=( + "docker.io/longhornio/longhorn-manager:v{{ longhorn_chart_version }}" + "docker.io/longhornio/longhorn-ui:v{{ longhorn_chart_version }}" + "docker.io/longhornio/longhorn-share-manager:v{{ longhorn_chart_version }}" + "docker.io/longhornio/longhorn-engine:v{{ longhorn_chart_version }}" + "docker.io/longhornio/longhorn-instance-manager:v{{ longhorn_chart_version }}" + "docker.io/longhornio/backing-image-manager:v{{ longhorn_chart_version }}" + "docker.io/longhornio/support-bundle-kit:v0.0.45" + ) + + for img in "${imgs[@]}"; do + ok=0 + for i in 1 2 3 4 5; do + echo "[pull] $img (try $i/5)" + if $CTR images pull "$img"; then + ok=1 + break + fi + sleep $((i * 3)) + done + if [ "$ok" -ne 1 ]; then + echo "[ERR] failed pulling $img after retries" + exit 1 + fi + done + args: + executable: /bin/bash + changed_when: true + +- name: Install Longhorn with Helm on first server + hosts: k3s_server + become: true + run_once: true + vars: + longhorn_values_src: "{{ playbook_dir }}/../files/03-07-longhorn/values-lab.yaml" + longhorn_values_dest: /root/longhorn-values-lab.yaml + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + tasks: + - name: Install helm package (Fedora/RHEL family) + ansible.builtin.package: + name: helm + state: present + ignore_errors: true + register: helm_pkg + + - name: Hint if helm package install failed (install Helm 3 manually if needed) + ansible.builtin.debug: + msg: "dnf/yum 未装上 helm 时,请见 https://helm.sh/docs/intro/install/" + when: helm_pkg.failed | default(false) + + - name: Fail if helm binary still unavailable + ansible.builtin.command: which helm + register: helm_which + changed_when: false + failed_when: helm_which.rc != 0 + + - name: Copy lab values to server + ansible.builtin.copy: + src: "{{ longhorn_values_src }}" + dest: "{{ longhorn_values_dest }}" + mode: "0600" + + - name: Ensure longhorn-system namespace is not stuck Terminating (force finalize if needed) + ansible.builtin.shell: | + set -e + export KUBECONFIG={{ k3s_kubeconfig }} + ns="longhorn-system" + phase="$(kubectl get ns "$ns" -o jsonpath='{.status.phase}' 2>/dev/null || true)" + if [ "$phase" = "Terminating" ]; then + echo "[WARN] namespace $ns is Terminating; force finalize to unblock install" + kubectl get ns "$ns" -o json > /tmp/ns.json + python3 -c "import json; obj=json.load(open('/tmp/ns.json')); obj.setdefault('spec',{}); obj['spec']['finalizers']=[]; json.dump(obj, open('/tmp/ns-finalize.json','w'))" + kubectl replace --raw "/api/v1/namespaces/$ns/finalize" -f /tmp/ns-finalize.json >/dev/null + fi + args: + executable: /bin/bash + changed_when: true + failed_when: false + + - name: Ensure longhorn Helm repo + ansible.builtin.shell: | + set -e + if ! helm repo list 2>/dev/null | grep -q '^longhorn'; then + helm repo add longhorn https://charts.longhorn.io + fi + helm repo update + environment: + KUBECONFIG: "{{ k3s_kubeconfig }}" + args: + executable: /bin/bash + changed_when: true + + - name: Delete leftover longhorn PriorityClass (cluster-scoped) to avoid Helm ownership conflicts + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl delete priorityclass longhorn-critical --ignore-not-found=true + args: + executable: /bin/bash + changed_when: true + failed_when: false + + - name: Delete leftover Longhorn CRDs (cluster-scoped) to avoid Helm ownership conflicts + ansible.builtin.shell: | + set -e + export KUBECONFIG={{ k3s_kubeconfig }} + crd_list="$(kubectl get crd -o name 2>/dev/null | grep 'longhorn.io' || true)" + if [ -n "$crd_list" ]; then + echo "$crd_list" | while read -r crd; do + [ -z "$crd" ] && continue + timeout 20s kubectl delete "$crd" --ignore-not-found=true || true + done + fi + args: + executable: /bin/bash + changed_when: true + failed_when: false + + - name: Delete leftover Longhorn ClusterRole/ClusterRoleBinding (cluster-scoped) + ansible.builtin.shell: | + set -e + export KUBECONFIG={{ k3s_kubeconfig }} + + role_list="$(kubectl get clusterrole -o name 2>/dev/null | grep 'longhorn' || true)" + if [ -n "$role_list" ]; then + echo "$role_list" | while read -r role; do + [ -z "$role" ] && continue + timeout 20s kubectl delete "$role" --ignore-not-found=true || true + done + fi + + binding_list="$(kubectl get clusterrolebinding -o name 2>/dev/null | grep 'longhorn' || true)" + if [ -n "$binding_list" ]; then + echo "$binding_list" | while read -r binding; do + [ -z "$binding" ] && continue + timeout 20s kubectl delete "$binding" --ignore-not-found=true || true + done + fi + args: + executable: /bin/bash + changed_when: true + failed_when: false + + - name: Cleanup leftover Helm release records for Longhorn (default + longhorn-system) + ansible.builtin.shell: | + set -e + export KUBECONFIG={{ k3s_kubeconfig }} + + # 有些失败/中断的安装会把 release secret 留在 default 或 longhorn-system,导致后续: + # - "cannot re-use a name that is still in use" + # - cluster-scoped 资源的 meta.helm.sh/release-namespace 注解冲突 + for ns in longhorn-system default; do + if helm -n "$ns" list --all 2>/dev/null | grep -q '^longhorn'; then + # uninstall 可能卡住(例如 uninstall job / hook),避免阻塞整个自动化流程 + timeout 120s helm -n "$ns" uninstall longhorn --no-hooks || true + fi + + sec_list="$(kubectl -n "$ns" get secret -o name 2>/dev/null | grep '^secret/sh\\.helm\\.release\\.v1\\.longhorn\\.' || true)" + if [ -n "$sec_list" ]; then + echo "$sec_list" | xargs -n1 kubectl -n "$ns" delete --ignore-not-found=true + fi + done + environment: + KUBECONFIG: "{{ k3s_kubeconfig }}" + args: + executable: /bin/bash + changed_when: true + failed_when: false + + - name: Helm upgrade/install Longhorn(失败兜底:install --replace) + ansible.builtin.shell: | + set -e + helm upgrade --install longhorn longhorn/longhorn --namespace longhorn-system --create-namespace -f {{ longhorn_values_dest }} --version {{ longhorn_chart_version }} --wait --timeout 15m || helm install --replace longhorn longhorn/longhorn --namespace longhorn-system --create-namespace -f {{ longhorn_values_dest }} --version {{ longhorn_chart_version }} --wait --timeout 15m + environment: + KUBECONFIG: "{{ k3s_kubeconfig }}" + args: + executable: /bin/bash + register: helm_longhorn + changed_when: true + +- name: Apply local-path-config lab defaults (optional) + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + local_path_json_src: "{{ playbook_dir }}/../files/03-05-local-path-config/local-path-config-lab.json" + local_path_json_dest: /root/local-path-config-lab.json + tasks: + - name: Apply local-path-config lab defaults (optional) + when: longhorn_apply_local_path_lab | default(false) | bool + block: + - name: Copy local-path lab json + ansible.builtin.copy: + src: "{{ local_path_json_src }}" + dest: "{{ local_path_json_dest }}" + mode: "0644" + + - name: Apply local-path-config ConfigMap + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl -n kube-system create configmap local-path-config \ + --from-file=config.json={{ local_path_json_dest }} \ + --dry-run=client -o yaml | KUBECONFIG={{ k3s_kubeconfig }} kubectl apply -f - + args: + executable: /bin/bash + changed_when: true + + - name: Restart local-path-provisioner if present + ansible.builtin.shell: | + KUBECONFIG={{ k3s_kubeconfig }} kubectl -n kube-system rollout restart deploy/local-path-provisioner + args: + executable: /bin/bash + register: lp_restart + failed_when: false + changed_when: lp_restart.rc == 0 diff --git a/ansible/playbooks/nginx-matrix-deploy.yml b/ansible/playbooks/nginx-matrix-deploy.yml index 65e6b32..1c09aaf 100644 --- a/ansible/playbooks/nginx-matrix-deploy.yml +++ b/ansible/playbooks/nginx-matrix-deploy.yml @@ -3,7 +3,7 @@ # 对应文档:docs/02-05-nginx-验证矩阵-一键部署.md(02-01~02-04 分篇已整合) # # 说明:复制 manifests → kubectl apply → 等待 Pod 就绪 → 验证 Pod 节点分布 → curl 16 目标 -# manifests:ansible/files/nginx-matrix/,M1 control-plane / M2 ylc61 / M3 worker / M4 ylc64,按实际修改 02/04 hostname +# manifests:ansible/files/02-05-nginx-matrix/,M1 control-plane / M2 ylc61 / M3 worker / M4 ylc64,按实际修改 02/04 hostname # # 执行(在 ansible/ 目录下): # ansible-playbook -i inventory.ini playbooks/nginx-matrix-deploy.yml @@ -15,8 +15,8 @@ run_once: true vars: k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml - # manifests 在 ansible/files/nginx-matrix/,与 playbook 同项目 - manifests_path: "{{ playbook_dir }}/../files/nginx-matrix" + # manifests 在 ansible/files/02-05-nginx-matrix/,与 playbook 同项目 + manifests_path: "{{ playbook_dir }}/../files/02-05-nginx-matrix" tasks: - name: Ensure manifests path exists ansible.builtin.stat: diff --git a/ansible/playbooks/nginx-matrix-tls-deploy.yml b/ansible/playbooks/nginx-matrix-tls-deploy.yml index 4917d2c..1797a5b 100644 --- a/ansible/playbooks/nginx-matrix-tls-deploy.yml +++ b/ansible/playbooks/nginx-matrix-tls-deploy.yml @@ -3,7 +3,7 @@ # 对应文档:docs/03-02-k3s-traefik-acme.md # # 说明:复制 TLS + HTTP-only manifests → 自动删除已存在的不含 TLS 的 nginx 矩阵(02-05)→ kubectl apply(含 TLS 与 HTTP-only 共 8 个路由)→ 等待 Pod 就绪 → HTTP-only / HTTPS curl 矩阵验证(test01~test04.jackadam.top) -# manifests:ansible/files/nginx-matrix-tls/,域名为 test01~test04.jackadam.top,M2/M4 hostname 按实际修改;Ingress/IngressRoute 中 TLS 路由仅绑定 websecure,HTTP-only 路由仅绑定 web +# manifests:ansible/files/03-02-nginx-matrix-tls/,域名为 test01~test04.jackadam.top,M2/M4 hostname 按实际修改;Ingress/IngressRoute 中 TLS 路由仅绑定 websecure,HTTP-only 路由仅绑定 web # 前置:已按 03-02 配置 ACME(Secret + traefik-acme.yaml),且 test01~test04.jackadam.top 已解析到入口 IP # # 执行(在 ansible/ 目录下): @@ -18,7 +18,7 @@ vars: # mode 由 -e mode=cleanup 传入,未传时默认为 deploy(勿在 vars 中写 mode: "{{ mode | default('deploy') }}" 会递归) k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml - manifests_path: "{{ playbook_dir }}/../files/nginx-matrix-tls" + manifests_path: "{{ playbook_dir }}/../files/03-02-nginx-matrix-tls" tls_domains: - test01.jackadam.top - test02.jackadam.top diff --git a/ansible/playbooks/nodejs-demo-apply.yml b/ansible/playbooks/nodejs-demo-apply.yml index b54a8a7..a5e37f9 100644 --- a/ansible/playbooks/nodejs-demo-apply.yml +++ b/ansible/playbooks/nodejs-demo-apply.yml @@ -1,5 +1,5 @@ --- -# 一键应用 Node.js demo 清单(与 docs/04-01~04-13 + ansible/files/nodejs-demo 对齐) +# 一键应用 Node.js demo 清单(与 docs/04-01~04-13 + ansible/files/04-01-nodejs-demo 对齐) # # 执行(在仓库根目录): # ansible-playbook -i ansible/inventory.ini ansible/playbooks/nodejs-demo-apply.yml \ @@ -13,7 +13,7 @@ vars: k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml nodejs_demo_manifest: "04-01-nodejs-demo.yaml" - manifests_dir: "{{ playbook_dir }}/../files/nodejs-demo" + manifests_dir: "{{ playbook_dir }}/../files/04-01-nodejs-demo" tasks: - name: Ensure manifest file exists ansible.builtin.stat: diff --git a/ansible/playbooks/verify/00-01.yml b/ansible/playbooks/verify/00-01.yml new file mode 100644 index 0000000..15d87ff --- /dev/null +++ b/ansible/playbooks/verify/00-01.yml @@ -0,0 +1,10 @@ +- name: "00-01 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "00-01" + doc_filename: "00-01-k3s-基础概念.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/00-04.yml b/ansible/playbooks/verify/00-04.yml new file mode 100644 index 0000000..2644d28 --- /dev/null +++ b/ansible/playbooks/verify/00-04.yml @@ -0,0 +1,10 @@ +- name: "00-04 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "00-04" + doc_filename: "00-04-部署环境说明.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/01-01.yml b/ansible/playbooks/verify/01-01.yml new file mode 100644 index 0000000..61328ef --- /dev/null +++ b/ansible/playbooks/verify/01-01.yml @@ -0,0 +1,24 @@ +- name: "01-01 k3s baseline verify (nodes + core deploys)" + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + tasks: + - name: kubectl get nodes + ansible.builtin.shell: KUBECONFIG={{ k3s_kubeconfig }} kubectl get nodes -o wide + changed_when: false + + - name: kube-system pods summary + ansible.builtin.shell: KUBECONFIG={{ k3s_kubeconfig }} kubectl get pods -n kube-system -o wide + changed_when: false + + - name: Assert core components exist (coredns, traefik) + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl -n kube-system get deploy coredns + KUBECONFIG={{ k3s_kubeconfig }} kubectl -n kube-system get deploy traefik + args: + executable: /bin/bash + changed_when: false + diff --git a/ansible/playbooks/verify/01-02.yml b/ansible/playbooks/verify/01-02.yml new file mode 100644 index 0000000..ba1a0f5 --- /dev/null +++ b/ansible/playbooks/verify/01-02.yml @@ -0,0 +1,11 @@ +- name: "01-02 k3s baseline verify (nodes)" + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + tasks: + - name: kubectl get nodes + ansible.builtin.shell: KUBECONFIG={{ k3s_kubeconfig }} kubectl get nodes -o wide + changed_when: false + diff --git a/ansible/playbooks/verify/01-03.yml b/ansible/playbooks/verify/01-03.yml new file mode 100644 index 0000000..92d39a4 --- /dev/null +++ b/ansible/playbooks/verify/01-03.yml @@ -0,0 +1,10 @@ +- name: "01-03 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "01-03" + doc_filename: "01-03-armv7-standalone-docker.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/01-04.yml b/ansible/playbooks/verify/01-04.yml new file mode 100644 index 0000000..9668d3e --- /dev/null +++ b/ansible/playbooks/verify/01-04.yml @@ -0,0 +1,10 @@ +- name: "01-04 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "01-04" + doc_filename: "01-04-双控制节点ha.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/01-05.yml b/ansible/playbooks/verify/01-05.yml new file mode 100644 index 0000000..dea0ecc --- /dev/null +++ b/ansible/playbooks/verify/01-05.yml @@ -0,0 +1,10 @@ +- name: "01-05 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "01-05" + doc_filename: "01-05-armv7-nfs服务安装.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/01-06.yml b/ansible/playbooks/verify/01-06.yml new file mode 100644 index 0000000..1d0c0bd --- /dev/null +++ b/ansible/playbooks/verify/01-06.yml @@ -0,0 +1,11 @@ +- name: "01-06 k3s baseline verify (kube-system pods)" + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + tasks: + - name: kube-system pods summary + ansible.builtin.shell: KUBECONFIG={{ k3s_kubeconfig }} kubectl get pods -n kube-system -o wide + changed_when: false + diff --git a/ansible/playbooks/verify/01-07.yml b/ansible/playbooks/verify/01-07.yml new file mode 100644 index 0000000..d38a38d --- /dev/null +++ b/ansible/playbooks/verify/01-07.yml @@ -0,0 +1,10 @@ +- name: "01-07 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "01-07" + doc_filename: "01-07-openwrt-haproxy.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/02-00.yml b/ansible/playbooks/verify/02-00.yml new file mode 100644 index 0000000..455cec9 --- /dev/null +++ b/ansible/playbooks/verify/02-00.yml @@ -0,0 +1,10 @@ +- name: "02-00 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "02-00" + doc_filename: "02-00-nginx-系列说明.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/02-01.yml b/ansible/playbooks/verify/02-01.yml new file mode 100644 index 0000000..ff51ce8 --- /dev/null +++ b/ansible/playbooks/verify/02-01.yml @@ -0,0 +1,77 @@ +- name: Deploy 02-01 nginx control + Ingress (M1) + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + manifest_src: "{{ playbook_dir }}/../../files/02-05-nginx-matrix/01-control-ingress.yaml" + manifest_dest: /tmp/nginx-m1.yaml + tasks: + - name: Copy manifest + ansible.builtin.copy: + src: "{{ manifest_src }}" + dest: "{{ manifest_dest }}" + mode: "0644" + + - name: Apply manifest + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl apply -f {{ manifest_dest }} + args: + executable: /bin/bash + changed_when: true + +- name: Verify 02-01 nginx control + Ingress (M1) + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + verify_entry_base: "{{ nginx_entry_base | default('http://' ~ k3s_server_ip) }}" + tasks: + - name: Rollout status nginx-m1 + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl rollout status deployment/nginx-m1 -n default --timeout=180s + args: + executable: /bin/bash + changed_when: false + + - name: HTTP check /demo-m1 (retry 503 for convergence) + ansible.builtin.shell: | + set -e + base="{{ verify_entry_base | trim | regex_replace('/+$','') }}" + url="$base/demo-m1/" + ok=0 + for i in 1 2 3 4 5 6 7 8 9 10; do + code=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 3 --max-time 8 "$url" 2>/dev/null || echo "000") + echo "try $i: $url -> $code" + if [ "$code" = "200" ]; then ok=1; break; fi + sleep 2 + done + test "$ok" = "1" + backend=$(curl -sS -D - -o /dev/null --connect-timeout 3 --max-time 8 "$url" 2>/dev/null | awk -F': ' '/^X-Backend:/{print $2; exit}' | tr -d '\r') + echo "X-Backend=$backend" + test "$backend" = "M1" + args: + executable: /bin/bash + changed_when: false + +- name: Teardown 02-01 nginx control + Ingress (M1) + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + verify_teardown: "{{ (VERIFY_TEARDOWN | default('1')) | string }}" + manifest_dest: /tmp/nginx-m1.yaml + tasks: + - name: Delete resources when VERIFY_TEARDOWN=1 + when: verify_teardown == "1" + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl delete -f {{ manifest_dest }} --ignore-not-found=true + args: + executable: /bin/bash + changed_when: true + diff --git a/ansible/playbooks/verify/02-02.yml b/ansible/playbooks/verify/02-02.yml new file mode 100644 index 0000000..12e4178 --- /dev/null +++ b/ansible/playbooks/verify/02-02.yml @@ -0,0 +1,81 @@ +--- +# 02-02-nginx-control-ingressroute.md +# nginx M2:控制节点 + IngressRoute,路径 /demo-m2 + +- name: Deploy 02-02 nginx control + IngressRoute (M2) + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + manifest_src: "{{ playbook_dir }}/../../files/02-05-nginx-matrix/02-control-ingressroute.yaml" + manifest_dest: /tmp/nginx-m2.yaml + tasks: + - name: Copy manifest + ansible.builtin.copy: + src: "{{ manifest_src }}" + dest: "{{ manifest_dest }}" + mode: "0644" + + - name: Apply manifest + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl apply -f {{ manifest_dest }} + args: + executable: /bin/bash + changed_when: true + +- name: Verify 02-02 nginx control + IngressRoute (M2) + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + verify_entry_base: "{{ nginx_entry_base | default('http://' ~ k3s_server_ip) }}" + tasks: + - name: Rollout status nginx-m2 + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl rollout status deployment/nginx-m2 -n default --timeout=180s + args: + executable: /bin/bash + changed_when: false + + - name: HTTP check /demo-m2 (retry 503 for convergence) + ansible.builtin.shell: | + set -e + base="{{ verify_entry_base | trim | regex_replace('/+$','') }}" + url="$base/demo-m2/" + ok=0 + for i in 1 2 3 4 5 6 7 8 9 10; do + code=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 3 --max-time 8 "$url" 2>/dev/null || echo "000") + echo "try $i: $url -> $code" + if [ "$code" = "200" ]; then ok=1; break; fi + sleep 2 + done + test "$ok" = "1" + backend=$(curl -sS -D - -o /dev/null --connect-timeout 3 --max-time 8 "$url" 2>/dev/null | awk -F': ' '/^X-Backend:/{print $2; exit}' | tr -d '\r') + echo "X-Backend=$backend" + test "$backend" = "M2" + args: + executable: /bin/bash + changed_when: false + +- name: Teardown 02-02 nginx control + IngressRoute (M2) + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + verify_teardown: "{{ (VERIFY_TEARDOWN | default('1')) | string }}" + manifest_dest: /tmp/nginx-m2.yaml + tasks: + - name: Delete resources when VERIFY_TEARDOWN=1 + when: verify_teardown == "1" + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl delete -f {{ manifest_dest }} --ignore-not-found=true + args: + executable: /bin/bash + changed_when: true + diff --git a/ansible/playbooks/verify/02-03.yml b/ansible/playbooks/verify/02-03.yml new file mode 100644 index 0000000..ca55b5b --- /dev/null +++ b/ansible/playbooks/verify/02-03.yml @@ -0,0 +1,77 @@ +- name: Deploy 02-03 nginx worker + Ingress (M3) + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + manifest_src: "{{ playbook_dir }}/../../files/02-05-nginx-matrix/03-worker-ingress.yaml" + manifest_dest: /tmp/nginx-m3.yaml + tasks: + - name: Copy manifest + ansible.builtin.copy: + src: "{{ manifest_src }}" + dest: "{{ manifest_dest }}" + mode: "0644" + + - name: Apply manifest + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl apply -f {{ manifest_dest }} + args: + executable: /bin/bash + changed_when: true + +- name: Verify 02-03 nginx worker + Ingress (M3) + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + verify_entry_base: "{{ nginx_entry_base | default('http://' ~ k3s_server_ip) }}" + tasks: + - name: Rollout status nginx-m3 + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl rollout status deployment/nginx-m3 -n default --timeout=180s + args: + executable: /bin/bash + changed_when: false + + - name: HTTP check /demo-m3 (retry 503 for convergence) + ansible.builtin.shell: | + set -e + base="{{ verify_entry_base | trim | regex_replace('/+$','') }}" + url="$base/demo-m3/" + ok=0 + for i in 1 2 3 4 5 6 7 8 9 10; do + code=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 3 --max-time 8 "$url" 2>/dev/null || echo "000") + echo "try $i: $url -> $code" + if [ "$code" = "200" ]; then ok=1; break; fi + sleep 2 + done + test "$ok" = "1" + backend=$(curl -sS -D - -o /dev/null --connect-timeout 3 --max-time 8 "$url" 2>/dev/null | awk -F': ' '/^X-Backend:/{print $2; exit}' | tr -d '\r') + echo "X-Backend=$backend" + test "$backend" = "M3" + args: + executable: /bin/bash + changed_when: false + +- name: Teardown 02-03 nginx worker + Ingress (M3) + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + verify_teardown: "{{ (VERIFY_TEARDOWN | default('1')) | string }}" + manifest_dest: /tmp/nginx-m3.yaml + tasks: + - name: Delete resources when VERIFY_TEARDOWN=1 + when: verify_teardown == "1" + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl delete -f {{ manifest_dest }} --ignore-not-found=true + args: + executable: /bin/bash + changed_when: true + diff --git a/ansible/playbooks/verify/02-04.yml b/ansible/playbooks/verify/02-04.yml new file mode 100644 index 0000000..3f5c179 --- /dev/null +++ b/ansible/playbooks/verify/02-04.yml @@ -0,0 +1,77 @@ +- name: Deploy 02-04 nginx worker + IngressRoute (M4) + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + manifest_src: "{{ playbook_dir }}/../../files/02-05-nginx-matrix/04-worker-ingressroute.yaml" + manifest_dest: /tmp/nginx-m4.yaml + tasks: + - name: Copy manifest + ansible.builtin.copy: + src: "{{ manifest_src }}" + dest: "{{ manifest_dest }}" + mode: "0644" + + - name: Apply manifest + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl apply -f {{ manifest_dest }} + args: + executable: /bin/bash + changed_when: true + +- name: Verify 02-04 nginx worker + IngressRoute (M4) + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + verify_entry_base: "{{ nginx_entry_base | default('http://' ~ k3s_server_ip) }}" + tasks: + - name: Rollout status nginx-m4 + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl rollout status deployment/nginx-m4 -n default --timeout=180s + args: + executable: /bin/bash + changed_when: false + + - name: HTTP check /demo-m4 (retry 503 for convergence) + ansible.builtin.shell: | + set -e + base="{{ verify_entry_base | trim | regex_replace('/+$','') }}" + url="$base/demo-m4/" + ok=0 + for i in 1 2 3 4 5 6 7 8 9 10; do + code=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 3 --max-time 8 "$url" 2>/dev/null || echo "000") + echo "try $i: $url -> $code" + if [ "$code" = "200" ]; then ok=1; break; fi + sleep 2 + done + test "$ok" = "1" + backend=$(curl -sS -D - -o /dev/null --connect-timeout 3 --max-time 8 "$url" 2>/dev/null | awk -F': ' '/^X-Backend:/{print $2; exit}' | tr -d '\r') + echo "X-Backend=$backend" + test "$backend" = "M4" + args: + executable: /bin/bash + changed_when: false + +- name: Teardown 02-04 nginx worker + IngressRoute (M4) + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + verify_teardown: "{{ (VERIFY_TEARDOWN | default('1')) | string }}" + manifest_dest: /tmp/nginx-m4.yaml + tasks: + - name: Delete resources when VERIFY_TEARDOWN=1 + when: verify_teardown == "1" + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl delete -f {{ manifest_dest }} --ignore-not-found=true + args: + executable: /bin/bash + changed_when: true + diff --git a/ansible/playbooks/verify/02-05.yml b/ansible/playbooks/verify/02-05.yml new file mode 100644 index 0000000..99afbb6 --- /dev/null +++ b/ansible/playbooks/verify/02-05.yml @@ -0,0 +1,65 @@ +- import_playbook: "{{ playbook_dir }}/../nginx-matrix-deploy.yml" + +- name: Verify 02-05 nginx matrix (HTTP paths) + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + verify_entry_base: "{{ nginx_entry_base | default('http://' ~ k3s_server_ip) }}" + tasks: + - name: Verify M1~M4 deployments ready + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl rollout status deployment/nginx-m1 -n default --timeout=120s + KUBECONFIG={{ k3s_kubeconfig }} kubectl rollout status deployment/nginx-m2 -n default --timeout=120s + KUBECONFIG={{ k3s_kubeconfig }} kubectl rollout status deployment/nginx-m3 -n default --timeout=180s + KUBECONFIG={{ k3s_kubeconfig }} kubectl rollout status deployment/nginx-m4 -n default --timeout=180s + args: + executable: /bin/bash + changed_when: false + + - name: HTTP check 4 paths (expect 200 and X-Backend marker) + ansible.builtin.shell: | + set +e + base="{{ verify_entry_base | trim | regex_replace('/+$','') }}" + fail=0 + for id in 1 2 3 4; do + url="$base/demo-m$id/" + code=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 3 --max-time 8 "$url" 2>/dev/null || echo "000") + echo "$url -> $code" + if [ "$code" != "200" ]; then + echo "$url -> unexpected http_code=$code" + fail=1 + continue + fi + backend=$(curl -sS -D - -o /dev/null --connect-timeout 3 --max-time 8 "$url" 2>/dev/null \ + | awk -F': ' '/^X-Backend:/{print $2; exit}' \ + | tr -d '\r' || true) + echo "$url -> X-Backend: ${backend:-}" + if [ "$backend" != "M$id" ]; then + fail=1 + fi + done + exit $fail + args: + executable: /bin/bash + changed_when: false + +- name: Teardown 02-05 nginx matrix (optional) + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + verify_teardown: "{{ (VERIFY_TEARDOWN | default('1')) | string }}" + tasks: + - name: Delete nginx matrix resources when VERIFY_TEARDOWN=1 + when: verify_teardown == "1" + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl delete -f /tmp/nginx-matrix/ -R --ignore-not-found=true + args: + executable: /bin/bash + changed_when: true + diff --git a/ansible/playbooks/verify/03-01.yml b/ansible/playbooks/verify/03-01.yml new file mode 100644 index 0000000..761c95b --- /dev/null +++ b/ansible/playbooks/verify/03-01.yml @@ -0,0 +1,65 @@ +- name: Deploy 03-01 Traefik Dashboard + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + manifest_src: "{{ playbook_dir }}/../../files/03-01-traefik-dashboard/traefik-dashboard.yaml" + manifest_dest: /tmp/traefik-dashboard.yaml + tasks: + - name: Copy manifest + ansible.builtin.copy: + src: "{{ manifest_src }}" + dest: "{{ manifest_dest }}" + mode: "0644" + + - name: Apply manifest + restart traefik + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl apply -f {{ manifest_dest }} + KUBECONFIG={{ k3s_kubeconfig }} kubectl -n kube-system rollout restart deploy/traefik || true + args: + executable: /bin/bash + changed_when: true + +- name: Verify 03-01 Traefik Dashboard + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + tasks: + - name: Wait traefik rollout + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl -n kube-system rollout status deploy/traefik --timeout=180s + args: + executable: /bin/bash + changed_when: false + + - name: Assert traefik-dashboard IngressRoute exists + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl -n kube-system get ingressroute.traefik.io/traefik-dashboard + args: + executable: /bin/bash + changed_when: false + +- name: Teardown 03-01 Traefik Dashboard (optional) + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + verify_teardown: "{{ (VERIFY_TEARDOWN | default('1')) | string }}" + manifest_dest: /tmp/traefik-dashboard.yaml + tasks: + - name: Delete resources when VERIFY_TEARDOWN=1 + when: verify_teardown == "1" + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl delete -f {{ manifest_dest }} --ignore-not-found=true + args: + executable: /bin/bash + changed_when: true + diff --git a/ansible/playbooks/verify/03-02.yml b/ansible/playbooks/verify/03-02.yml new file mode 100644 index 0000000..0115cf4 --- /dev/null +++ b/ansible/playbooks/verify/03-02.yml @@ -0,0 +1,98 @@ +- name: Deploy 03-02 Traefik ACME (gated) + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + manifest_src: "{{ playbook_dir }}/../../files/03-02-traefik-acme/traefik-acme.yaml" + manifest_dest: /tmp/traefik-acme.yaml + acme_email: "{{ ACME_EMAIL | default('') }}" + tasks: + - name: "Gate - require ACME_EMAIL and cloudflare-api-token secret" + ansible.builtin.shell: | + set -e + test -n "{{ acme_email }}" + KUBECONFIG={{ k3s_kubeconfig }} kubectl -n kube-system get secret cloudflare-api-token >/dev/null + args: + executable: /bin/bash + register: acme_gate + changed_when: false + failed_when: false + + - name: Copy manifest + when: acme_gate.rc == 0 + ansible.builtin.copy: + src: "{{ manifest_src }}" + dest: "{{ manifest_dest }}" + mode: "0644" + + - name: Replace ACME email placeholder + when: acme_gate.rc == 0 + ansible.builtin.shell: | + set -e + sed -i "s//{{ acme_email | replace('/', '\\/') }}/g" {{ manifest_dest }} + args: + executable: /bin/bash + changed_when: true + + - name: Apply manifest + restart traefik + when: acme_gate.rc == 0 + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl apply -f {{ manifest_dest }} + KUBECONFIG={{ k3s_kubeconfig }} kubectl -n kube-system rollout restart deploy/traefik || true + args: + executable: /bin/bash + changed_when: true + +- name: Verify 03-02 Traefik ACME (gated) + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + acme_email: "{{ ACME_EMAIL | default('') }}" + tasks: + - name: "Gate - require ACME_EMAIL and cloudflare-api-token secret" + ansible.builtin.shell: | + set -e + test -n "{{ acme_email }}" + KUBECONFIG={{ k3s_kubeconfig }} kubectl -n kube-system get secret cloudflare-api-token >/dev/null + args: + executable: /bin/bash + register: acme_gate + changed_when: false + failed_when: false + + - name: Wait traefik rollout + when: acme_gate.rc == 0 + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl -n kube-system rollout status deploy/traefik --timeout=180s + args: + executable: /bin/bash + changed_when: false + +- name: Teardown 03-02 Traefik ACME (optional) + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + verify_teardown: "{{ (VERIFY_TEARDOWN | default('1')) | string }}" + manifest_dest: /tmp/traefik-acme.yaml + acme_email: "{{ ACME_EMAIL | default('') }}" + tasks: + - name: Delete resources when VERIFY_TEARDOWN=1 + when: verify_teardown == "1" + ansible.builtin.shell: | + set -e + # gated:只有在 deploy gate 通过且文件存在时才清理;否则跳过,避免 fail-fast。 + test -n "{{ acme_email }}" + test -f "{{ manifest_dest }}" + KUBECONFIG={{ k3s_kubeconfig }} kubectl delete -f {{ manifest_dest }} --ignore-not-found=true + args: + executable: /bin/bash + changed_when: true + failed_when: false + diff --git a/ansible/playbooks/verify/03-03.yml b/ansible/playbooks/verify/03-03.yml new file mode 100644 index 0000000..2a48166 --- /dev/null +++ b/ansible/playbooks/verify/03-03.yml @@ -0,0 +1,10 @@ +- name: "03-03 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "03-03" + doc_filename: "03-03-k3s-traefik-dashboard-acme.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/03-04.yml b/ansible/playbooks/verify/03-04.yml new file mode 100644 index 0000000..733802d --- /dev/null +++ b/ansible/playbooks/verify/03-04.yml @@ -0,0 +1,10 @@ +- name: "03-04 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "03-04" + doc_filename: "03-04-k3s-cloudflare-tunnel-配置接入.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/03-05.yml b/ansible/playbooks/verify/03-05.yml new file mode 100644 index 0000000..84465ab --- /dev/null +++ b/ansible/playbooks/verify/03-05.yml @@ -0,0 +1,66 @@ +- name: Deploy 03-05 local-path PVC demo + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + manifest_src: "{{ playbook_dir }}/../../files/03-05-local-path-demo/local-path-pvc-demo.yaml" + manifest_dest: /tmp/local-path-pvc-demo.yaml + tasks: + - name: Copy manifest to server + ansible.builtin.copy: + src: "{{ manifest_src }}" + dest: "{{ manifest_dest }}" + mode: "0644" + + - name: kubectl apply + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl apply -f {{ manifest_dest }} + args: + executable: /bin/bash + changed_when: true + +- name: Verify 03-05 local-path PVC demo + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + tasks: + - name: Wait nginx-local-pvc-demo deployment ready + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl rollout status deployment/nginx-local-pvc-demo -n default --timeout=180s + args: + executable: /bin/bash + changed_when: false + + - name: Assert PVC is Bound + ansible.builtin.shell: | + set -e + phase=$(KUBECONFIG={{ k3s_kubeconfig }} kubectl get pvc local-pvc-demo -n default -o jsonpath='{.status.phase}') + echo "pvc phase=$phase" + test "$phase" = "Bound" + args: + executable: /bin/bash + changed_when: false + +- name: Teardown 03-05 local-path PVC demo (optional) + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + verify_teardown: "{{ (VERIFY_TEARDOWN | default('1')) | string }}" + manifest_dest: /tmp/local-path-pvc-demo.yaml + tasks: + - name: Delete resources when VERIFY_TEARDOWN=1 + when: verify_teardown == "1" + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl delete -f {{ manifest_dest }} --ignore-not-found=true + args: + executable: /bin/bash + changed_when: true + diff --git a/ansible/playbooks/verify/03-06.yml b/ansible/playbooks/verify/03-06.yml new file mode 100644 index 0000000..55e4b4a --- /dev/null +++ b/ansible/playbooks/verify/03-06.yml @@ -0,0 +1,94 @@ +- name: Deploy 03-06 NFS PV/PVC demo (gated) + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + nfs_server_ip: "{{ NFS_SERVER_IP | default('') }}" + nfs_export_path: "{{ NFS_EXPORT_PATH | default('') }}" + manifest_src: "{{ playbook_dir }}/../../files/03-06-nfs-demo/nfs-pv-pvc-demo.yaml" + manifest_dest: /tmp/nfs-pv-pvc-demo.yaml + tasks: + - name: "Gate - require NFS_SERVER_IP and NFS_EXPORT_PATH" + ansible.builtin.shell: | + set -e + test -n "{{ nfs_server_ip }}" + test -n "{{ nfs_export_path }}" + args: + executable: /bin/bash + register: nfs_gate + changed_when: false + failed_when: false + + - name: Copy manifest + when: nfs_gate.rc == 0 + ansible.builtin.copy: + src: "{{ manifest_src }}" + dest: "{{ manifest_dest }}" + mode: "0644" + + - name: kubectl apply + when: nfs_gate.rc == 0 + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl apply -f {{ manifest_dest }} + args: + executable: /bin/bash + changed_when: true + +- name: Verify 03-06 NFS PV/PVC demo (gated) + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + nfs_server_ip: "{{ NFS_SERVER_IP | default('') }}" + nfs_export_path: "{{ NFS_EXPORT_PATH | default('') }}" + tasks: + - name: "Gate - require NFS_SERVER_IP and NFS_EXPORT_PATH" + ansible.builtin.shell: | + set -e + test -n "{{ nfs_server_ip }}" + test -n "{{ nfs_export_path }}" + args: + executable: /bin/bash + register: nfs_gate + changed_when: false + failed_when: false + + - name: Assert PVC Bound + when: nfs_gate.rc == 0 + ansible.builtin.shell: | + set -e + phase=$(KUBECONFIG={{ k3s_kubeconfig }} kubectl -n default get pvc nfs-pvc-demo -o jsonpath='{.status.phase}') + echo "pvc phase=$phase" + test "$phase" = "Bound" + args: + executable: /bin/bash + changed_when: false + +- name: Teardown 03-06 NFS PV/PVC demo (optional) + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + verify_teardown: "{{ (VERIFY_TEARDOWN | default('1')) | string }}" + manifest_dest: /tmp/nfs-pv-pvc-demo.yaml + nfs_server_ip: "{{ NFS_SERVER_IP | default('') }}" + nfs_export_path: "{{ NFS_EXPORT_PATH | default('') }}" + tasks: + - name: Delete resources when VERIFY_TEARDOWN=1 + when: verify_teardown == "1" + ansible.builtin.shell: | + set -e + # gated:只有在 deploy gate 通过且文件存在时才清理;否则跳过,避免 fail-fast。 + test -n "{{ nfs_server_ip }}" + test -n "{{ nfs_export_path }}" + test -f "{{ manifest_dest }}" + KUBECONFIG={{ k3s_kubeconfig }} kubectl delete -f {{ manifest_dest }} --ignore-not-found=true + args: + executable: /bin/bash + changed_when: true + failed_when: false + diff --git a/ansible/playbooks/verify/03-07.yml b/ansible/playbooks/verify/03-07.yml new file mode 100644 index 0000000..dbc6016 --- /dev/null +++ b/ansible/playbooks/verify/03-07.yml @@ -0,0 +1,41 @@ +- import_playbook: "{{ playbook_dir }}/../longhorn-install.yml" + +- name: Verify 03-07 Longhorn (namespace pods) + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + longhorn_ns: "{{ longhorn_namespace | default('longhorn-system') }}" + tasks: + - name: Check longhorn pods + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl get pods -n {{ longhorn_ns }} -o wide + args: + executable: /bin/bash + changed_when: false + +- name: Teardown 03-07 Longhorn (optional) + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + verify_teardown: "{{ (VERIFY_TEARDOWN | default('1')) | string }}" + longhorn_ns: "{{ longhorn_namespace | default('longhorn-system') }}" + tasks: + - name: Uninstall longhorn helm release when VERIFY_TEARDOWN=1 + when: verify_teardown == "1" + ansible.builtin.shell: | + set -e + export KUBECONFIG={{ k3s_kubeconfig }} + if helm -n {{ longhorn_ns }} list 2>/dev/null | grep -q longhorn; then + timeout 180s helm -n {{ longhorn_ns }} uninstall longhorn --no-hooks || true + fi + kubectl delete ns {{ longhorn_ns }} --ignore-not-found=true --wait=false || true + args: + executable: /bin/bash + changed_when: true + failed_when: false + diff --git a/ansible/playbooks/verify/03-08.yml b/ansible/playbooks/verify/03-08.yml new file mode 100644 index 0000000..24f7770 --- /dev/null +++ b/ansible/playbooks/verify/03-08.yml @@ -0,0 +1,10 @@ +- name: "03-08 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "03-08" + doc_filename: "03-08-k3s-ha-集群配置与切换.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/03-09.yml b/ansible/playbooks/verify/03-09.yml new file mode 100644 index 0000000..92e7316 --- /dev/null +++ b/ansible/playbooks/verify/03-09.yml @@ -0,0 +1,10 @@ +- name: "03-09 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "03-09" + doc_filename: "03-09-k3s-gitops-集群配置管理.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/03-10.yml b/ansible/playbooks/verify/03-10.yml new file mode 100644 index 0000000..90f9628 --- /dev/null +++ b/ansible/playbooks/verify/03-10.yml @@ -0,0 +1,10 @@ +- name: "03-10 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "03-10" + doc_filename: "03-10-k3s-traefik-custom-ports.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/04-01.yml b/ansible/playbooks/verify/04-01.yml new file mode 100644 index 0000000..554ac5e --- /dev/null +++ b/ansible/playbooks/verify/04-01.yml @@ -0,0 +1,49 @@ +- import_playbook: "{{ playbook_dir }}/../nodejs-demo-apply.yml" + +- name: Verify 04-01 nodejs demo (rollout + HTTP) + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + verify_entry_base: "{{ nodejs_entry_base | default('http://' ~ k3s_server_ip) }}" + tasks: + - name: Rollout status nodejs-demo + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl rollout status deployment/nodejs-demo -n default --timeout=180s + args: + executable: /bin/bash + changed_when: false + + - name: HTTP check /node (expect 200 and Hello World) + ansible.builtin.shell: | + set -e + base="{{ verify_entry_base | trim | regex_replace('/+$','') }}" + url="$base/node" + code=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 3 --max-time 8 "$url" 2>/dev/null || echo "000") + echo "$url -> $code" + test "$code" = "200" + body=$(curl -sS --connect-timeout 3 --max-time 8 "$url" 2>/dev/null || true) + echo "$body" | grep -q "Hello World from Node.js" + args: + executable: /bin/bash + changed_when: false + +- name: Teardown 04-01 nodejs demo (optional) + hosts: k3s_server + become: true + run_once: true + vars: + k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml + verify_teardown: "{{ (VERIFY_TEARDOWN | default('1')) | string }}" + tasks: + - name: Delete nodejs-demo resources when VERIFY_TEARDOWN=1 + when: verify_teardown == "1" + ansible.builtin.shell: | + set -e + KUBECONFIG={{ k3s_kubeconfig }} kubectl delete deploy/nodejs-demo svc/nodejs-demo ing/nodejs-demo -n default --ignore-not-found=true + args: + executable: /bin/bash + changed_when: true + diff --git a/ansible/playbooks/verify/04-02.yml b/ansible/playbooks/verify/04-02.yml new file mode 100644 index 0000000..fc0123f --- /dev/null +++ b/ansible/playbooks/verify/04-02.yml @@ -0,0 +1,10 @@ +- name: "04-02 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "04-02" + doc_filename: "04-02-nodejs-镜像与运行命令.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/04-03.yml b/ansible/playbooks/verify/04-03.yml new file mode 100644 index 0000000..7af2a77 --- /dev/null +++ b/ansible/playbooks/verify/04-03.yml @@ -0,0 +1,10 @@ +- name: "04-03 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "04-03" + doc_filename: "04-03-nodejs-环境变量与配置注入.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/04-04.yml b/ansible/playbooks/verify/04-04.yml new file mode 100644 index 0000000..a988d41 --- /dev/null +++ b/ansible/playbooks/verify/04-04.yml @@ -0,0 +1,10 @@ +- name: "04-04 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "04-04" + doc_filename: "04-04-nodejs-端口与Service.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/04-05.yml b/ansible/playbooks/verify/04-05.yml new file mode 100644 index 0000000..7660e06 --- /dev/null +++ b/ansible/playbooks/verify/04-05.yml @@ -0,0 +1,10 @@ +- name: "04-05 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "04-05" + doc_filename: "04-05-nodejs-资源请求与限制.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/04-06.yml b/ansible/playbooks/verify/04-06.yml new file mode 100644 index 0000000..6cad20d --- /dev/null +++ b/ansible/playbooks/verify/04-06.yml @@ -0,0 +1,10 @@ +- name: "04-06 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "04-06" + doc_filename: "04-06-nodejs-探针与健康检查.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/04-07.yml b/ansible/playbooks/verify/04-07.yml new file mode 100644 index 0000000..6acff34 --- /dev/null +++ b/ansible/playbooks/verify/04-07.yml @@ -0,0 +1,10 @@ +- name: "04-07 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "04-07" + doc_filename: "04-07-nodejs-调度与亲和.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/04-08.yml b/ansible/playbooks/verify/04-08.yml new file mode 100644 index 0000000..41010ab --- /dev/null +++ b/ansible/playbooks/verify/04-08.yml @@ -0,0 +1,10 @@ +- name: "04-08 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "04-08" + doc_filename: "04-08-nodejs-安全上下文.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/04-09.yml b/ansible/playbooks/verify/04-09.yml new file mode 100644 index 0000000..5e5148b --- /dev/null +++ b/ansible/playbooks/verify/04-09.yml @@ -0,0 +1,10 @@ +- name: "04-09 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "04-09" + doc_filename: "04-09-nodejs-存储与卷.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/04-10.yml b/ansible/playbooks/verify/04-10.yml new file mode 100644 index 0000000..392d332 --- /dev/null +++ b/ansible/playbooks/verify/04-10.yml @@ -0,0 +1,10 @@ +- name: "04-10 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "04-10" + doc_filename: "04-10-nodejs-Ingress与Traefik.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/04-11.yml b/ansible/playbooks/verify/04-11.yml new file mode 100644 index 0000000..cd56169 --- /dev/null +++ b/ansible/playbooks/verify/04-11.yml @@ -0,0 +1,10 @@ +- name: "04-11 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "04-11" + doc_filename: "04-11-nodejs-副本与滚动发布.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/04-12.yml b/ansible/playbooks/verify/04-12.yml new file mode 100644 index 0000000..8ab00e2 --- /dev/null +++ b/ansible/playbooks/verify/04-12.yml @@ -0,0 +1,10 @@ +- name: "04-12 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "04-12" + doc_filename: "04-12-nodejs-TLS与证书.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/04-13.yml b/ansible/playbooks/verify/04-13.yml new file mode 100644 index 0000000..643ef9c --- /dev/null +++ b/ansible/playbooks/verify/04-13.yml @@ -0,0 +1,10 @@ +- name: "04-13 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "04-13" + doc_filename: "04-13-nodejs-HPA.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/04-14.yml b/ansible/playbooks/verify/04-14.yml new file mode 100644 index 0000000..642fc26 --- /dev/null +++ b/ansible/playbooks/verify/04-14.yml @@ -0,0 +1,10 @@ +- name: "04-14 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "04-14" + doc_filename: "04-14-nodejs-GitOps与CI流水线.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/05-01.yml b/ansible/playbooks/verify/05-01.yml new file mode 100644 index 0000000..f276930 --- /dev/null +++ b/ansible/playbooks/verify/05-01.yml @@ -0,0 +1,10 @@ +- name: "05-01 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "05-01" + doc_filename: "05-01-k3s-部署homer首页面板.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/05-02.yml b/ansible/playbooks/verify/05-02.yml new file mode 100644 index 0000000..ba0b74b --- /dev/null +++ b/ansible/playbooks/verify/05-02.yml @@ -0,0 +1,10 @@ +- name: "05-02 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "05-02" + doc_filename: "05-02-onenav首页面板.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/05-03.yml b/ansible/playbooks/verify/05-03.yml new file mode 100644 index 0000000..512b725 --- /dev/null +++ b/ansible/playbooks/verify/05-03.yml @@ -0,0 +1,10 @@ +- name: "05-03 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "05-03" + doc_filename: "05-03-k3s-安装gitlab-含runner.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/05-04.yml b/ansible/playbooks/verify/05-04.yml new file mode 100644 index 0000000..2141884 --- /dev/null +++ b/ansible/playbooks/verify/05-04.yml @@ -0,0 +1,10 @@ +- name: "05-04 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "05-04" + doc_filename: "05-04-k3s-配置gitlab-cicd.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/05-05.yml b/ansible/playbooks/verify/05-05.yml new file mode 100644 index 0000000..6722153 --- /dev/null +++ b/ansible/playbooks/verify/05-05.yml @@ -0,0 +1,10 @@ +- name: "05-05 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "05-05" + doc_filename: "05-05-prometheus与grafana.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/05-06.yml b/ansible/playbooks/verify/05-06.yml new file mode 100644 index 0000000..45beede --- /dev/null +++ b/ansible/playbooks/verify/05-06.yml @@ -0,0 +1,10 @@ +- name: "05-06 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "05-06" + doc_filename: "05-06-openlist挂载网盘与自动备份.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/05-07.yml b/ansible/playbooks/verify/05-07.yml new file mode 100644 index 0000000..42c277b --- /dev/null +++ b/ansible/playbooks/verify/05-07.yml @@ -0,0 +1,10 @@ +- name: "05-07 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "05-07" + doc_filename: "05-07-openclaw应用部署.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/05-08.yml b/ansible/playbooks/verify/05-08.yml new file mode 100644 index 0000000..bacc73e --- /dev/null +++ b/ansible/playbooks/verify/05-08.yml @@ -0,0 +1,10 @@ +- name: "05-08 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "05-08" + doc_filename: "05-08-openclaw-k3s-实验部署.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/05-09.yml b/ansible/playbooks/verify/05-09.yml new file mode 100644 index 0000000..1a1b15c --- /dev/null +++ b/ansible/playbooks/verify/05-09.yml @@ -0,0 +1,10 @@ +- name: "05-09 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "05-09" + doc_filename: "05-09-openclaw-web-小游戏网页平台.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/06-01.yml b/ansible/playbooks/verify/06-01.yml new file mode 100644 index 0000000..0ee5483 --- /dev/null +++ b/ansible/playbooks/verify/06-01.yml @@ -0,0 +1,10 @@ +- name: "06-01 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "06-01" + doc_filename: "06-01-k3s-networkpolicy-故障排查.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/06-02.yml b/ansible/playbooks/verify/06-02.yml new file mode 100644 index 0000000..def2a6d --- /dev/null +++ b/ansible/playbooks/verify/06-02.yml @@ -0,0 +1,10 @@ +- name: "06-02 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "06-02" + doc_filename: "06-02-运维小结.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/06-03.yml b/ansible/playbooks/verify/06-03.yml new file mode 100644 index 0000000..73f87e2 --- /dev/null +++ b/ansible/playbooks/verify/06-03.yml @@ -0,0 +1,10 @@ +- name: "06-03 noop verify" + hosts: localhost + gather_facts: false + vars: + repo_root: "{{ playbook_dir }}/../../.." + doc_id: "06-03" + doc_filename: "06-03-k3s-自动备份与恢复-openlist-webdav.md" + tasks: + - ansible.builtin.import_tasks: "{{ playbook_dir }}/_noop-tasks.yml" + diff --git a/ansible/playbooks/verify/_noop-tasks.yml b/ansible/playbooks/verify/_noop-tasks.yml new file mode 100644 index 0000000..b50fb63 --- /dev/null +++ b/ansible/playbooks/verify/_noop-tasks.yml @@ -0,0 +1,32 @@ +- name: Assert docs file exists + ansible.builtin.stat: + path: "{{ repo_root }}/docs/{{ doc_filename }}" + register: _doc_stat + +- name: Fail when docs file missing + ansible.builtin.assert: + that: + - _doc_stat.stat.exists + fail_msg: "docs file missing: docs/{{ doc_filename }}" + +- name: Find matching ansible/files doc_id directory + ansible.builtin.find: + paths: "{{ repo_root }}/ansible/files" + file_type: directory + patterns: "{{ doc_id }}-*" + use_regex: false + register: _files_dirs + +- name: Fail when ansible/files doc_id directory missing + ansible.builtin.assert: + that: + - _files_dirs.matched | int >= 1 + fail_msg: "ansible/files missing doc_id directory: ansible/files/{{ doc_id }}-*" + +- name: Show noop verification summary + ansible.builtin.debug: + msg: + - "doc_id={{ doc_id }}" + - "doc={{ doc_filename }}" + - "files_dirs={{ _files_dirs.files | map(attribute='path') | list }}" + diff --git a/docs/00-01-k3s-基础概念.md b/docs/00-01-k3s-基础概念.md index 65952b9..6e77cf3 100644 --- a/docs/00-01-k3s-基础概念.md +++ b/docs/00-01-k3s-基础概念.md @@ -124,8 +124,8 @@ K3s 自带 **local-path-provisioner**:当你创建 PVC 且不指定 `storageCl - **用法**:用部署时的 YAML 删除,与 `apply` 一一对应;或按资源类型和名称逐个删除。 - **示例**: - `kubectl delete -f nginx-matrix.yaml`:删除该文件定义的所有资源 - - `kubectl delete -f ansible/files/nginx-matrix/ -R`:递归删除该目录下所有 manifest 定义的资源(02-05 矩阵) - - `kubectl delete -f ansible/files/nginx-matrix-tls/ -R`:删除 03-02 TLS 矩阵(或见该文档 / playbook `nginx-matrix-tls-deploy.yml -e mode=cleanup`) + - `kubectl delete -f ansible/files/02-05-nginx-matrix/ -R`:递归删除该目录下所有 manifest 定义的资源(02-05 矩阵) + - `kubectl delete -f ansible/files/03-02-nginx-matrix-tls/ -R`:删除 03-02 TLS 矩阵(或见该文档 / playbook `nginx-matrix-tls-deploy.yml -e mode=cleanup`) - `kubectl delete deployment nginx-m1 -n default`:按名称删除单个 Deployment - **用途**:清理测试应用、下线服务、重装部署前先删除旧资源。资源删除后对应 Pod 会被终止,数据(etcd 中记录)一并移除;若用了 PVC,PVC 本身通常需单独删除。 diff --git a/docs/00-02-验证矩阵.md b/docs/00-02-验证矩阵.md index 96e2639..abf9ff1 100644 --- a/docs/00-02-验证矩阵.md +++ b/docs/00-02-验证矩阵.md @@ -5,6 +5,10 @@ > **清单位置**:可部署的 Kubernetes YAML 以仓库 [`ansible/files/`](../ansible/files/) 为唯一真源(与 `docs/` 交叉引用);验证时请以该目录下文件为准。 > > 写文档的人、做实验的人,都以这里为准,不用在每篇文档里翻记录。 +> +> 本页当前以“待验证列表”为主:在你的实验环境中按每篇文档从头到尾走通一次,然后把状态从“未验证/部分验证”补成“已验证”。 +> +> 自动化验证入口:`scripts/verify.sh`(在控制节点仓库根执行;按本矩阵顺序逐个 `doc_id` 跑 `ansible/playbooks/verify/.yml`,缺 playbook 即失败)。分层说明见 [`docs/00-05-测试与验证框架.md`](../docs/00-05-测试与验证框架.md)。 ## 状态说明 @@ -17,6 +21,10 @@ - 真机按文档全部走完后,再把状态从“未验证/部分验证”改成“已验证”,并写清 **OS / K3s 版本 / 时间**。 - 以后如果对文档步骤做了较大调整,记得把这里对应条目先打回“未验证”或“部分验证”,等新流程再跑一遍。 +### 编排约定与文档 id + +- 下文每条 `docs/XX-YY-*.md` 的 **id 约定为文件名中的 `XX-YY`**,与 `scripts/verify.sh` 选用的 playbook 文件名一致;矩阵正文仍以人工结论为准,脚本结果写在各条「备注」里。 + --- ## 1. 主线安装(01-*) @@ -26,135 +34,135 @@ - 备注:概念性文档,不涉及命令执行。 - `00-04-部署环境说明.md` - 状态:✅ 已验证 - - 备注:说明性文档,描述本仓库验证环境(ylc61~64、Fedora、K3s v1.34.5+k3s1、/storage 等),与当前实际部署一致。 + - 备注:说明性文档,描述本仓库验证环境(ylc61~64、Fedora、K3s v1.34.5+k3s1、每节点 **10G 系统盘 + 32G 数据盘挂载 `/storage`** 等),与当前实际部署对照调整;**2026-03-25** 记录的集群仍为四节点 Ready。 - `01-01-k3s-控制节点含traefik.md` - 状态:✅ 已验证 - - 备注:Fedora 43 Server + K3s v1.34.5+k3s1,单控制节点 61,已按文档装机并确认 Traefik 入口 404 可达(2026-03-10 左右)。 + - 备注:Fedora 43 Server + K3s v1.34.5+k3s1,单控制节点 61,Traefik 与节点入口 80/443 可达(404 为无路由时的正常表现);**2026-03-25** 与 `verify-g1-baseline` 复验一致。 - `01-06-节点初始化-ansible-实践.md` - 状态:✅ 已验证 - - 备注:Fedora + K3s,4 节点(ylc61~64),Ansible 一键完成初始化、server/agent 安装、firewalld 基线、Traefik 标签及验证输出(2026-03 左右)。 + - 备注:Fedora + K3s,4 节点(ylc61~64),Ansible `k3s-init-and-install.yml` 完成 server/agent、firewalld、CNI trusted、CoreDNS、Traefik 及 playbook 内置验证;**2026-03-25** 于 ylc61 复跑 `phase2-k3s`(`PLAY RECAP` 全节点 `failed=0`)。 - `01-02-k3s-工作节点.md` - 状态:✅ 已验证 - - 备注:在同一环境下成功加入工作节点 62,并通过 `kubectl get nodes` 看到双节点 Ready(2026-03-10 左右)。 + - 备注:ylc62~ylc64 工作节点加入同一集群,`kubectl get nodes` 四节点 Ready;K3s v1.34.5+k3s1;**2026-03-25** 与 `check-cluster` 复验(早期记录曾为双节点)。 - `01-03-armv7-standalone-docker.md` - 状态:❓ 未验证 - - 备注:待在实际 armv7 设备上按文档安装 Docker 并跑一两个容器后更新。 - `01-07-openwrt-haproxy.md` - - 状态:✅ 已验证 - - 备注:ImmortalWrt + HAProxy 18080/18443;经 `scripts/01-07-verify-haproxy.sh`(ssh onecloud 第三方 curl)验证;cfg 语法、HTTP/HTTPS 后端正确;可选 `--deploy-matrix http|tls` 一键部署矩阵。 + - 备注:待在实际 armv7 设备上按文档安装 Docker 并跑一两个容器后更新。**2026-03-25** ylc61 `verify.sh` noop(未在 armv7 设备执行文档步骤)。 +- `01-07-openwrt-haproxy.md` + - 状态:⚠️ 部分验证 + - 备注:ImmortalWrt + HAProxy(如 18080/18443)曾实机验证过;当前仓库未提供对应自动化脚本。**2026-03-25** ylc61 `verify.sh` noop;仍以 onecloud 等第三方机 curl 手工为准。 --- ## 2. 简单部署nginx(02-*) - `02-00-nginx-系列说明.md` - - 状态:✅ 已验证(说明性文档) - - 备注:整理节点调度与 Ingress/IngressRoute 差异,与 02-01~02-04 一并验证。 + - 状态:⚠️ 部分验证(说明性文档) + - 备注:内容与 02-01~02-04 一致。**2026-03-25** ylc61 `verify.sh` noop;未重读全文,依赖历史核对。 - `02-01-nginx-control-ingress.md` - 状态:✅ 已验证 - - 备注:经 `scripts/02-verify-nginx-matrix-individual.sh` 在 ylc61 上逐个部署,onecloud curl 验证 HTTP(path /demo-m1)与 HTTPS(domain test01.jackadam.top:18443);2026-03。 + - 备注:**本仓库约定验收**:`scripts/verify.sh` → `ansible/playbooks/verify/02-01.yml`(M1:`ansible/files/02-05-nginx-matrix/01-control-ingress.yaml` apply → rollout → 入口 HTTP 校验 `X-Backend: M1` → teardown)。**2026-03-25** ylc61 四节点集群跑通。历史上另有 onecloud curl;四路径总览见 [`02-05`](02-05-nginx-验证矩阵-一键部署.md)。HTTPS 不在本篇,见 `03-02`。 - `02-02-nginx-control-ingressroute.md` - 状态:✅ 已验证 - - 备注:同上,path /demo-m2,IngressRoute 路由链路。 + - 备注:同上,playbook `verify/02-02.yml`,清单 `02-control-ingressroute.yaml`,`X-Backend: M2`。**2026-03-25** ylc61。 - `02-03-nginx-worker-ingress.md` - 状态:✅ 已验证 - - 备注:同上,path /demo-m3,工作节点 Ingress。 + - 备注:同上,`verify/02-03.yml`,`03-worker-ingress.yaml`,`X-Backend: M3`。**2026-03-25** ylc61。 - `02-04-nginx-worker-ingressroute.md` - 状态:✅ 已验证 - - 备注:同上,path /demo-m4,IngressRoute 变体。 + - 备注:同上,`verify/02-04.yml`,`04-worker-ingressroute.yaml`,`X-Backend: M4`。**2026-03-25** ylc61。 - `02-05-nginx-验证矩阵-一键部署.md` - - 状态:✅ 已验证(4 种组合 M1~M4 整合) - - 备注:HTTP-only(无域名学习);有域名时用 03-02 升级版。 + - 状态:✅ 已验证 + - 备注:**本仓库约定验收**:`verify.sh` 串跑 `02-01`~`02-04`(或等价 `ansible/playbooks/nginx-matrix-deploy.yml` 一次部署四路径)+ 各路径 HTTP `X-Backend` + teardown。**2026-03-25** ylc61 `run-all` 通过。TLS/域名与 `03-02` 衔接另用 `nginx-matrix-tls-deploy.yml` / `verify/03-02.yml` 等验。 --- ## 3. k3s 常用配置 -- `02-00-nginx-系列说明.md` - - 状态:✅ 已验证(说明性文档) - - 备注:整理节点调度与 Ingress/IngressRoute 差异(nodeSelector/labels/tolerations 通用排查思路),与 02-01~02-04 一并验证。 - `03-01-k3s-traefik-dashboard.md` - - 状态:✅ 已验证 - - 备注:在 61/62/63/64 环境各节点启用过 Dashboard 并确认能访问,日志正常。模板:`ansible/files/traefik-dashboard/traefik-dashboard.yaml`。 + - 状态:⚠️ 部分验证 + - 备注:模板见 `ansible/files/03-01-traefik-dashboard/`。**2026-03-25** 仅确认集群内 `traefik` Deployment 可用(`verify-g3`),未按文档重新 apply Dashboard Ingress/IngressRoute 并浏览器验收。 - `03-02-k3s-traefik-acme.md` - - 状态:✅ 已验证 - - 备注:02-05 的升级版(TLS 矩阵 + ACME);2026-03 实机跑通。 + - 状态:⚠️ 部分验证 + - 备注:历史上 TLS + ACME 曾跑通;**2026-03-25** 办公机 `ACME_EMAIL` 未配置,未复验 Let's Encrypt 签发;恢复 ✅ 需按文档 + 有效邮箱与 DNS。 - `03-03-k3s-traefik-dashboard-acme.md` - - 状态:✅ 已验证 - - 备注:03-01 Dashboard 与 03-02 ACME 合并配置已核对;模板 `ansible/files/traefik-dashboard-acme/traefik-dashboard-acme.yaml` 正确(已含 local-path persistence)。实机 apply 需确保集群 DNS 可达 Let's Encrypt;可经 `scripts/03-verify-traefik-dashboard-acme.sh` 验证。2026-03。 -- `03-04-k3s-cloudflare-tunnel-配置接入.md` - - 状态:✅ 已验证 - - 备注:本实验室集群完整流程(Zero Trust、Public Hostname、cloudflared Pod、`traefik.kube-system.svc.cluster.local:80`、Dashboard 子域 + `/dashboard/` 访问)已实机跑通(2026-03)。 + - 状态:⚠️ 部分验证 + - 备注:合并版 YAML 仍在 `ansible/files/03-03-traefik-dashboard-acme/`。**2026-03-25** ylc61 `verify.sh` noop(未实机 apply 合并栈)。 +- `03-04-k3s-cloudflare-tunnel-配置接入.md` + - 状态:⚠️ 部分验证 + - 备注:历史上实验室曾跑通 Tunnel + Traefik。**2026-03-25** ylc61 `verify.sh` noop;`CF_TUNNEL_TOKEN` 等未加载时亦不会自动复验隧道。 - `03-05-k3s-local-path-pvc.md` - - 状态:✅ 已验证 - - 备注:K3s 自带 local-path-provisioner,PVC 本地持久化;待实机验证。 + - 状态:⚠️ 部分验证 + - 备注:**2026-03-25** ylc61 `verify.sh`:demo 清单 apply → Deployment rollout → PVC `Bound` → teardown。此前 `verify-g1-baseline` 亦确认 `local-path` 就绪。 - `03-06-k3s-使用nfs存储.md` - 状态:❓ 未验证 - - 备注:待在实际 NFS 服务器 + K3s 集群上完成 PV/PVC + Pod 挂载验证。 + - 备注:待在实际 NFS 服务器 + K3s 集群上完成 PV/PVC + Pod 挂载验证。**2026-03-25** ylc61 `verify.sh`:因 `NFS_SERVER_IP` / `NFS_EXPORT_PATH` 未配齐 gate 跳过;teardown 已对齐无文件不删。playbook 亦支持仓库 `03-06-nfs-demo` 清单路径。 - `03-07-k3s-longhorn-持久化存储.md` - - 状态:❓ 未验证 - - 备注:Longhorn 安装与 PVC 流程待在本环境实机验证。 + - 状态:⚠️ 部分验证 + - 备注:**2026-03-25** ylc61 `verify.sh`:`longhorn-install.yml` 安装 + `longhorn-system` Pod 列表明细 + teardown(Helm uninstall/删 ns 带超时)。**未**按文档完整跑 PVC 业务读写与灾备流程,故不设 ✅。 - `03-08-k3s-ha-集群配置与切换.md` - 状态:❓ 未验证 - - 备注:HA 场景步骤已整理,尚未在当前环境完成双 server + 切换演练。 + - 备注:HA 场景步骤已整理,尚未在当前环境完成双 server + 切换演练。**2026-03-25** `verify.sh` 仅为 noop(docs + `ansible/files` 目录存在性)。 - `03-09-k3s-gitops-集群配置管理.md` - 状态:❓ 未验证 - - 备注:框架草案,待选定 Argo CD 或 Flux 后细化。 + - 备注:框架草案,待选定 Argo CD 或 Flux 后细化。**2026-03-25** `verify.sh` 仅为 noop。 +- `03-10-k3s-traefik-custom-ports.md` + - 状态:❓ 未验证 + - 备注:需在实际环境应用 `HelmChartConfig` 并确认 Traefik Service/入口端口。**2026-03-25** `verify.sh` 仅为 noop(清单在 `ansible/files/03-10-traefik-custom-ports/`)。 ### 可选:依赖文档 - `01-04-双控制节点ha.md` - 状态:❓ 未验证 - - 备注:文档已拆分安装/配置流程,尚未在双控制节点 + 外部 LB 的完整场景下全链路验证。 + - 备注:文档已拆分安装/配置流程,尚未在双控制节点 + 外部 LB 的完整场景下全链路验证。**2026-03-25** ylc61 `verify.sh` noop。 - `01-05-armv7-nfs服务安装.md` - 状态:❓ 未验证 - - 备注:NFS 安装命令已经过以往经验验证,本仓库对应 armv7 环境需再跑一遍确认导出与权限。 + - 备注:NFS 安装命令已经过以往经验验证,本仓库对应 armv7 环境需再跑一遍确认导出与权限。**2026-03-25** ylc61 `verify.sh` noop(未在 armv7 实机执行文档步骤)。 --- ## 4. 高级 Node.js(04-01~04-14) - `04-01-k3s-nodejs-高级部署.md` - - 状态:❓ 未验证 - - 备注:主入口;具体能力验证依赖 `04-02`~`04-14` 分项。 + - 状态:⚠️ 部分验证 + - 备注:主入口。**2026-03-25** ylc61 `verify.sh`:`04-01` 累积清单 apply → `/node` HTTP(Hello World)→ teardown。`04-02`~`04-14` 分项仍待按文档逐项实机。 - `04-02-nodejs-镜像与运行命令.md` - 状态:❓ 未验证 - - 备注:镜像 tag/`imagePullPolicy`/`command`/`args` 在实机拉取与启动验证。 + - 备注:镜像 tag/`imagePullPolicy`/`command`/`args` 在实机拉取与启动验证。**2026-03-25** `verify.sh` 仅为 noop(仓库结构)。 - `04-03-nodejs-环境变量与配置注入.md` - 状态:❓ 未验证 - 备注:ConfigMap/Secret 注入与 `printenv`/`curl` 结果一致。 - `04-04-nodejs-端口与Service.md` - 状态:❓ 未验证 - - 备注:`targetPort` 与进程监听一致;Endpoints 有地址。 + - 备注:`targetPort` 与进程监听一致;Endpoints 有地址。**2026-03-25** `verify.sh` noop。 - `04-05-nodejs-资源请求与限制.md` - 状态:❓ 未验证 - - 备注:`kubectl top` 与 OOM/节流行为符合预期。 + - 备注:`kubectl top` 与 OOM/节流行为符合预期。**2026-03-25** `verify.sh` noop。 - `04-06-nodejs-探针与健康检查.md` - 状态:❓ 未验证 - - 备注:readiness/liveness 与 Endpoint/重启行为验证。 + - 备注:readiness/liveness 与 Endpoint/重启行为验证。**2026-03-25** `verify.sh` noop。 - `04-07-nodejs-调度与亲和.md` - 状态:❓ 未验证 - - 备注:`nodeSelector`/亲和/容忍与节点标签实机一致。 + - 备注:`nodeSelector`/亲和/容忍与节点标签实机一致。**2026-03-25** `verify.sh` noop。 - `04-08-nodejs-安全上下文.md` - 状态:❓ 未验证 - - 备注:非 root/只读根等策略下应用仍可运行。 + - 备注:非 root/只读根等策略下应用仍可运行。**2026-03-25** `verify.sh` noop。 - `04-09-nodejs-存储与卷.md` - 状态:❓ 未验证 - - 备注:PVC/emptyDir 挂载与读写、配合 `03-05`/`03-07` 存储选型。 + - 备注:PVC/emptyDir 挂载与读写、配合 `03-05`/`03-07` 存储选型。**2026-03-25** `verify.sh` noop。 - `04-10-nodejs-Ingress与Traefik.md` - 状态:❓ 未验证 - - 备注:path/host/入口点注解与 Traefik 路由一致。 + - 备注:path/host/入口点注解与 Traefik 路由一致。**2026-03-25** `verify.sh` noop。 - `04-11-nodejs-副本与滚动发布.md` - 状态:❓ 未验证 - - 备注:多副本与 `rollout`/`undo` 实机验证。 + - 备注:多副本与 `rollout`/`undo` 实机验证。**2026-03-25** `verify.sh` noop。 - `04-12-nodejs-TLS与证书.md` - 状态:❓ 未验证 - - 备注:HTTPS 与 `03-02` ACME/Secret 配合验证证书与域名。 + - 备注:HTTPS 与 `03-02` ACME/Secret 配合验证证书与域名。**2026-03-25** `verify.sh` noop。 - `04-13-nodejs-HPA.md` - 状态:❓ 未验证 - - 备注:metrics-server 可用;压测触发扩缩。 + - 备注:metrics-server 可用;压测触发扩缩。**2026-03-25** `verify.sh` noop。 - `04-14-nodejs-GitOps与CI流水线.md` - 状态:❓ 未验证 - - 备注:流程文档;按 `05-03`/`05-04`/`03-09` 任选一条链路实机跑通后更新。 + - 备注:流程文档;按 `05-03`/`05-04`/`03-09` 任选一条链路实机跑通后更新。**2026-03-25** `verify.sh` noop。 --- @@ -162,28 +170,31 @@ - `05-01-k3s-部署homer首页面板.md` - 状态:❓ 未验证 - - 备注:待在集群内按文档部署 Homer,并确认首页可访问。 + - 备注:待在集群内按文档部署 Homer,并确认首页可访问。**2026-03-25** `verify.sh` noop。 - `05-02-onenav首页面板.md` - 状态:❓ 未验证 - - 备注:包含 armv7 独立部署 + K3s 反向代理两个部分,需分别验证。 + - 备注:包含 armv7 独立部署 + K3s 反向代理两个部分,需分别验证。**2026-03-25** `verify.sh` noop。 - `05-03-k3s-安装gitlab-含runner.md` - 状态:❓ 未验证 - - 备注:待完成 GitLab + Runner 安装与基础流水线运行。 + - 备注:待完成 GitLab + Runner 安装与基础流水线运行。**2026-03-25** `verify.sh` noop。 - `05-04-k3s-配置gitlab-cicd.md` - 状态:❓ 未验证 - - 备注:需在真实仓库上跑通一次 K3s 部署流水线。 + - 备注:需在真实仓库上跑通一次 K3s 部署流水线。**2026-03-25** `verify.sh` noop。 - `05-05-prometheus与grafana.md` - 状态:❓ 未验证 - - 备注:待完成 kube-prometheus-stack 安装与 Dashboard 访问。 + - 备注:待完成 kube-prometheus-stack 安装与 Dashboard 访问。**2026-03-25** `verify.sh` noop。 - `05-06-openlist挂载网盘与自动备份.md` - 状态:❓ 未验证 - - 备注:待在实际网盘与备份目录上验证周期备份任务。 + - 备注:待在实际网盘与备份目录上验证周期备份任务。**2026-03-25** `verify.sh` noop。 - `05-07-openclaw应用部署.md` - 状态:❓ 未验证 - - 备注:待在 x86 主机用 Docker 部署 OpenClaw,并在 K3s 中完成静态转发验证。 + - 备注:待在 x86 主机用 Docker 部署 OpenClaw,并在 K3s 中完成静态转发验证。**2026-03-25** `verify.sh` noop。 - `05-08-openclaw-k3s-实验部署.md` - 状态:❓ 未验证 - - 备注:待在 K3s 内按实验文档直接部署 OpenClaw Gateway,并确认入口可访问。 + - 备注:待在 K3s 内按实验文档直接部署 OpenClaw Gateway,并确认入口可访问。**2026-03-25** `verify.sh` noop。 +- `05-09-openclaw-web-小游戏网页平台.md` + - 状态:❓ 未验证 + - 备注:前端示例清单在 `ansible/files/05-09-openclaw-web-小游戏网页平台/`;**2026-03-25** `verify.sh` noop(未 apply 镜像示例)。 --- @@ -194,7 +205,10 @@ - 备注:已在 Fedora 43 + K3s 环境排查并修复过“62:80 不通 / firewalld 拦截 flannel.1 <-> cni0”的问题,脚本与命令均来自实战过程。 - `06-02-运维小结.md` - 状态:❓ 未验证 - - 备注:运维建议为经验总结,后续可在日常巡检/备份流程固化后逐条打勾。 + - 备注:运维建议为经验总结,后续可在日常巡检/备份流程固化后逐条打勾。**2026-03-25** `verify.sh` noop。 +- `06-03-k3s-自动备份与恢复-openlist-webdav.md` + - 状态:❓ 未验证 + - 备注:按文档配置 WebDAV 备份与恢复 Job/CronJob,并验证一次完整链路。**2026-03-25** `verify.sh` noop(清单真源见 `ansible/files/06-03-openlist-webdav/` 与桥接目录 `06-03-k3s-自动备份与恢复-openlist-webdav/`)。 --- diff --git a/docs/00-04-部署环境说明.md b/docs/00-04-部署环境说明.md index 25bbb37..847843d 100644 --- a/docs/00-04-部署环境说明.md +++ b/docs/00-04-部署环境说明.md @@ -4,16 +4,19 @@ ## 1. 节点与角色 -| 主机名 | IP | 角色 | 说明 | -| ----- | ------------ | ---------- | -------------------------- | -| ylc61 | 192.168.2.61 | k3s server | 控制节点,运行 API、etcd、Traefik 等 | -| ylc62 | 192.168.2.62 | k3s worker | 工作节点 | -| ylc63 | 192.168.2.63 | k3s worker | 工作节点 | -| ylc64 | 192.168.2.64 | k3s worker | 工作节点 | +| 主机名 | IP | 角色 | 说明 | +| -------- | ------------- | --------------- | --------------------------------------- | +| ylc61 | 192.168.2.61 | k3s server | 控制节点,运行 API、etcd、Traefik 等 | +| ylc62 | 192.168.2.62 | k3s worker | 工作节点 | +| ylc63 | 192.168.2.63 | k3s worker | 工作节点 | +| ylc64 | 192.168.2.64 | k3s worker | 工作节点 | +| ylc65 | 192.168.2.65 | Linux 工作机(非 K3s) | **不参与** `kubectl get nodes`;提供日常 **Linux/x86_64** 环境,用于克隆本仓库、跑 `ansible-playbook` / `scripts/verify.sh`、编辑与排障;通过 SSH 连 ylc61~ylc64 执行自动化,磁盘与 **§3.1 K3s 节点约定无关**(按本机实际即可) | +| openwrt | 192.168.2.1 | OpenWrt 主路由 | 局域网网关;可选 HAProxy 将 80/443 转发至 K3s,见 `01-07` | +| onecloud | 192.168.2.22 | ARM32(Armbian) | 非 K3s 集群节点;armv7 / NFS 等文档的实验或外部 curl 来源 | -- Kubernetes 中的节点名使用短主机名(如 `ylc61`~`ylc64`),与 inventory 中主机名一致,便于配合 Cloudflare CDN(若计算机 hostname 为 FQDN,本机解析会优先走本地导致无法访问)。 -- 控制机(运行 `ansible-playbook`)可任选一台,通常为 ylc61 或本机。 +- Kubernetes 中的节点名使用短主机名(**仅** `ylc61`~`ylc64` 四类 K3s 机器),与 inventory 中 `[k3s_server]` / `[k3s_worker]` 一致;`ylc65` **不是**集群成员。便于配合 Cloudflare CDN(若计算机 hostname 为 FQDN,本机解析会优先走本地导致无法访问)。 +- **控制机**(运行 `ansible-playbook`、`verify.sh`):推荐 **`ylc65`(Linux 工作机)** 或 ylc61;亦可在你的本机 Linux 上执行,只要装好 Ansible、能 SSH 到 inventory 中的节点。 ## 2. 软件版本(已验证) @@ -28,10 +31,32 @@ ## 3. 网络与存储 - **网段**:192.168.2.0/24 -- **可选**:OpenWrt 网关(如 192.168.2.1)上配置 HAProxy 负载均衡,将 80/443 转发到 K3s 节点,见 `01-07-openwrt-haproxy.md` -- **数据盘方案**:`/storage`,server 与 worker 均使用 `--data-dir=/storage` +- **可选**:OpenWrt 主路由(上表 `openwrt`,192.168.2.1)上配置 HAProxy 负载均衡,将 80/443 转发到 K3s 节点,见 `01-07-openwrt-haproxy.md` + +### 3.1 磁盘规划(四台 K3s 节点统一:10G + 32G) + +本仓库验证环境约定:**每台 K3s 节点**(`ylc61`~`ylc64`,即 1 server + 3 worker)各用一块约 **10G 系统盘**(根文件系统)+ 一块约 **32G 数据盘**,与 Longhorn、K3s `--data-dir` 实验相匹配。**`ylc65` 为工作机,不适用本节 `/storage` 与 K3s 数据盘约定。** + +- **`/storage` 必须是独立挂载点**(数据盘格式化后挂载),**不能**只是根分区上的普通目录;否则镜像、local-path、Longhorn 仍会写满系统盘,控制节点易出现 **DiskPressure**。 +- **K3s**:server 与 worker 均使用 `--data-dir=/storage`(见 `ansible/group_vars/all.yml`)。 - **token 路径**:`/storage/server/token` +**自检(每台执行)**: + +```bash +findmnt -n -o SOURCE / /storage +lsblk -f +``` + +两行 `SOURCE` 应指向**不同**块设备(或不同 LV);若 `/storage` 未单独挂载,请先完成分区、格式化、`/etc/fstab` 再装 K3s(见 `01-06`)。 + +### 3.2 推荐自动化顺序 + +1. (可选)`ansible/playbooks/k3s-prepare-storage.yml`:声明 `k3s_data_disk_device` 并启用 `k3s_prepare_storage` 时,幂等准备 `/storage`。 +2. `ansible/playbooks/k3s-init-and-install.yml`:安装 K3s(可开启 `k3s_verify_storage_mount` 校验挂载)。 +3. (可选)`ansible/playbooks/longhorn-install.yml`:Helm 安装 Longhorn(`ansible/files/03-07-longhorn/values-lab.yaml`)。 +4. (可选)按 `03-05` 应用本仓库 **local-path** ConfigMap 真源(`ansible/files/03-05-local-path-config/local-path-config-lab.json`)。 + ## 4. 防火墙 - **firewalld**:启用 @@ -42,14 +67,16 @@ ## 5. Ansible 相关 -- **inventory**:`ansible/inventory.ini`,分组 `k3s_server`、`k3s_worker`、`k3s_nodes` +- **inventory**:`ansible/inventory.ini`,分组 `k3s_server`、`k3s_worker`、`k3s_nodes`(**勿**将 `ylc65` 列入 K3s 分组;工作机只作为 Ansible 控制端) - **变量**:`ansible/group_vars/all.yml`,含 `k3s_data_dir`、`k3s_server_ip`、`k3s_manage_`* 等 - **playbook(k3s)**:`ansible/playbooks/k3s-init-and-install.yml` -- **playbook(nginx 矩阵)**:`ansible/playbooks/nginx-matrix-deploy.yml`(manifests 在 `ansible/files/nginx-matrix/`,文档 `02-05`) -- **playbook(nginx TLS 矩阵)**:`ansible/playbooks/nginx-matrix-tls-deploy.yml`(manifests 在 `ansible/files/nginx-matrix-tls/`,文档 `03-02`(02-05 升级版)) +- **playbook(数据盘,可选)**:`ansible/playbooks/k3s-prepare-storage.yml` +- **playbook(Longhorn,可选)**:`ansible/playbooks/longhorn-install.yml`(Helm + `ansible/files/03-07-longhorn/values-lab.yaml`,文档 `03-07`) +- **playbook(nginx 矩阵)**:`ansible/playbooks/nginx-matrix-deploy.yml`(manifests 在 `ansible/files/02-05-nginx-matrix/`,文档 `02-05`) +- **playbook(nginx TLS 矩阵)**:`ansible/playbooks/nginx-matrix-tls-deploy.yml`(manifests 在 `ansible/files/03-02-nginx-matrix-tls/`,文档 `03-02`(02-05 升级版)) - **SSH**:root 连接,`scripts/ssh/setup-k3s-workers-ssh.sh` 预配密钥 ## 6. 验证时间 -- 2026-03:4 节点集群按 `01-06` 一次性安装成功,各节点 Traefik 入口 404 可达。 +- 2026-03:**4 节点**(1 server + 3 worker)集群按 `01-06` 一次性安装成功,各节点 Traefik 入口 404 可达。自动化与验证常在 **`ylc65` Linux 工作机**上执行,该主机不参与 K3s。 diff --git a/docs/00-05-测试与验证框架.md b/docs/00-05-测试与验证框架.md new file mode 100644 index 0000000..ae44522 --- /dev/null +++ b/docs/00-05-测试与验证框架.md @@ -0,0 +1,196 @@ +# 00-05-测试与验证框架(设计说明) + +> 本页是“测试与验证框架”的设计说明,并与仓库里已落地的 `scripts/verify.sh` + `ansible/playbooks/verify/` 对齐。 + +## 1. 为什么需要它 + +仓库里 `docs/00-02-验证矩阵.md` 目前扮演“待验证列表/状态记录”的角色,用来回答: + +- 这篇文档(`XX-YY`)是否已经在你的实验环境中从头到尾跑通? +- 如果没跑通,缺口在哪里? + +而“自动化执行”和“状态记录”是两件不同的事。测试框架需要把自动化执行能力,拆成可维护的小块,并通过统一的 id/索引把文档与用例关联起来。 + +## 2. 自动化验证流程(一般步骤) + +下面是一条**从操作者视角**的通用流水线;本仓库里对应关系已写在各步括号中。 + +1. **接入目标环境** + - 用 SSH 登录**控制节点**(或在本机配置好到控制节点的 Ansible `inventory`,由 Ansible 代你 SSH)。 + - 在仓库根(或文档约定目录)准备好代码:`git pull` / `scp` 同步等,与 [`docs/00-04-部署环境说明.md`](00-04-部署环境说明.md) 一致。 + - 按需加载验证环境变量:复制并填写 [`scripts/.env.verify.example`](../scripts/.env.verify.example) 为 `scripts/.env.verify`,执行前 `source`(`verify.sh` 会自动尝试加载)。 + +2. **环境与前置清理(按验证目标选择深度)** + - **基本检查**:`kubectl get nodes`、磁盘/内核版本、防火墙与文档是否一致;必要时对照 `00-04`。 + - **轻量清理(本仓库 `verify.sh` 的常态)**:默认不卸载整个 K3s;每个 `verify/XX-YY.yml` 在 **teardown** 阶段只删除**本篇** apply 过的资源(或 gate 未执行 apply 时跳过删除),避免污染下一用例。 + - **重度清理(重装/复现安装文档时)**:若你要从「空机」验证 `01-01` 等**整集群安装**流程,才需要按文档执行 `k3s-uninstall.sh`、删数据目录、清 iptables 等——这与日常「矩阵逐项验收」是**不同场景**,不要默认混进每一次 `run-all`。 + +3. **部署** + - **推荐(本仓库)**:用 Ansible playbook 部署——要么是正式安装/初始化类(如 `k3s-init-and-install.yml`),要么是验证用例里的 `kubectl apply` / `helm install` / `import_playbook`。 + - **文档中的 bash 一键命令**:仍可按 `docs/` 逐步执行;适合排障或 playbook 尚未覆盖的边角。自动化验收应尽量**收敛进** `ansible/playbooks/verify/*.yml`,避免「文档一套、手敲一套」长期分叉。 + +4. **按设计目标做断言** + - **集群侧**:`kubectl get` / `describe` / `logs`、`kubectl rollout status`、必要时看事件与 `Endpoints`。 + - **入口侧**:在控制节点或文档指定的入口上对 `Service`/`Ingress`/`IngressRoute` 做 `curl`(本仓库 nginx 矩阵等用响应头 `X-Backend` 或状态码区分路径)。 + - **Helm / 存储 / 网络**:按该篇文档的「预期」增查命令(如 `helm list`、`PVC Bound`、跨节点 curl)。 + - 依赖外部云账号、NFS、ACME 邮箱等时:未满足条件可用 **gate 跳过** apply,并在矩阵备注中写明「未配变量未验」。 + +5. **收尾与记录** + - 默认 **`VERIFY_TEARDOWN=1`**:验证通过后删除临时资源,减少对共享实验集群的干扰;调试时可设 `0` 保留现场。 + - 将结论写回 [`docs/00-02-验证矩阵.md`](00-02-验证矩阵.md)(状态与备注),必要时更新对应 `docs/XX-YY-*.md` 中的命令或版本说明。 + +6. **本仓库一键串联** + - 在控制节点仓库根执行:`./scripts/verify.sh run-all`(或 `run `),即按矩阵顺序重复「部署 → 断言 → teardown」的自动化版本;缺 playbook 会 **fail-fast**。 + +### 2.1 局限与约定补全(建议在文档与 `verify/XX-YY.yml` 中写死) + +下列能力 **不会** 由 `verify.sh` 自动推断;必须在对应 `docs/XX-YY-*.md` 里写清「谁执行、对哪里发请求、怎样算过」,并在 playbook 里逐项实现。**未写进 playbook 的步骤即视为未自动化覆盖**。 + +| 主题 | 建议约定 | +|------|----------| +| **多节点:在哪台机器 `curl`** | **默认**:在 inventory 的 **`k3s_server`(控制节点)** 上,对 **集群入口** 发 HTTP(如 `nginx_entry_base` / `http://<控制节点或 LB IP>`),与「从集群外经 NodePort/主机网络进 Traefik」一致。**例外**(必须显式写):要验 worker 仅内网、跨节点路径、或「必须从某台 agent 访问」时,在 playbook 里对指定 host 执行 `curl`(或 `delegate_to` / 专用 play),并在文档「验证命令」中写明 **执行主机与目标 URL**,避免隐含「任意节点等价」。 | +| **TLS / SNI** | 自签或跳过校验仅用于排障:`curl -k`。**验收**应优先:真实证书路径下用 `curl -v` 看证书链;或用 `curl --resolve <域名>:443:<入口IP> https://<域名>/...` 在 **无 DNS** 时模拟 SNI。需要时用 `openssl s_client -connect host:443 -servername <域名> ansible/playbooks/verify/.yml`;缺对应 playbook 则 **fail-fast**。 +3. **`ansible/playbooks/verify/.yml`**:单篇用例,通常拆成「部署 → 验证 → 清理」多个 **play**(默认 **`VERIFY_TEARDOWN=1`** 做 teardown)。 +4. **特例**:无集群动作的文档可走 **`verify/_noop-tasks.yml`**(仓库路径/文件存在性);依赖 NFS、ACME、Cloudflare 等外部条件的可用 **gate 跳过** apply,teardown 需避免「无清单仍删」类失败(各 playbook 已按此收敛)。 + +**真源**:可部署清单以 **`ansible/files/`** 为准;`docs/XX-YY-*.md` 与矩阵通过同一 **`doc_id`** 与 playbook 对齐。矩阵里的状态/备注仍建议 **手工** 维护(见 §7)。 + +## 4. 文档 id 与用例索引 + +约定: + +- `docs/XX-YY-*.md` 的文档 id 为 `XX-YY`(例如 `02-05`)。 +- 自动化用例文件名为 `verify/XX-YY.yml`,与 `doc_id` 一致即可(playbook 内不必再写 YAML 字段 `doc_id`,除非你想自检)。 +- 框架通过 `doc_id` 把“文档”映射到 `verify/.yml`,从而实现按篇自动执行(`verify.sh`)。 + +这样你在 `00-02` 里更新状态时,不需要关心脚本内部结构;只要 id 一致就能追溯。 + +## 5. 用例数据模型(建议) + +建议把用例写成“按文档 id 编排的任务集合”。在本仓库里,**用例落在** `ansible/playbooks/verify/.yml`,不再使用单体 `verify-matrix.yml`(已移除,避免与拆分后的 playbook 双份维护)。 + +- 文档 id `XX-YY` → 文件 `verify/XX-YY.yml` +- 每个文件内一般拆为三段(多个 play 或顺序 tasks): + +示例(02-05):`./scripts/verify.sh run 02-05` 执行 `ansible/playbooks/verify/02-05.yml`(内部 `import_playbook` `nginx-matrix-deploy.yml`,再 HTTP 校验四路径,最后 teardown)。`02-01`~`02-04` 另有单路径 playbook,便于单独调试。 + +每个 `verify/XX-YY.yml` 的典型结构为三段: + +- **deploy**:`kubectl apply` / `helm install` +- **verify**:`kubectl rollout status` / `curl` / `assert` +- **teardown**:`kubectl delete` / `helm uninstall`(默认执行,可用 `VERIFY_TEARDOWN=0` 关闭) + +```yaml +doc_id: "02-05" +case_id: "nginx-matrix-all" +description: "02-05 四条路径均返回 200,并区分后端内容" + +apply: + paths: + - "ansible/files/02-05-nginx-matrix/" + strategy: "apply-r" + +wait: + namespace: "default" + deployments: + - "nginx-m1" + - "nginx-m2" + - "nginx-m3" + - "nginx-m4" + timeout: "180s" + +http_check: + entry_base: "http://<入口IP>" + paths: + - path: "/demo-m1/" + expect_status: 200 + expect_body_contains: "Backend: M1" + - path: "/demo-m2/" + expect_status: 200 + expect_body_contains: "Backend: M2" +``` + +## 6. 执行器(Executor)——两类(在 Ansible 中落地) + +测试框架的执行器应当清晰分成两类,对应两种“被测对象”(机器 vs 集群): + +### 6.1 Ansible 远程命令类(普通 Linux / 设备执行命令) + +- 目标:在普通 Linux/路由/设备上通过 `ssh` 执行命令,并做断言(例如 `exit_code`、`stdout_contains/regex`、`file_exists`)。 +- 不强制要求在这类执行器里做 HTTP/curl 校验;HTTP/服务可用性校验归到 K3s 类用例。 + +落地方式:Ansible 的 `command/shell` + `assert`(inventory 决定 SSH 目标与用户/密钥)。 + +### 6.2 Ansible K3s 集群类(部署 + 结果校验 + 可选清理) + +- 目标:对 K3s 集群做 `apply` / `wait` / `check` / `http_check`(可选),并支持可选 `teardown/delete`。 +- 执行位置分两种: + - 本机直接 `kubectl` + - 或 `kubectl` 需要通过 SSH 在控制节点执行(复用你现有的 `.env.verify` 变量语义) + +落地方式:Ansible 在控制节点(如 ylc61)执行 `kubectl` / `helm` / `curl`,多个 play 顺序执行。默认策略:验证完成后 **执行 teardown**(清理部署),可通过 `VERIFY_TEARDOWN=0` 关闭。 + +## 7. 状态记录与写回策略 + +你已经在 `docs/00-02-验证矩阵.md` 里定义了状态含义(未验证/部分验证/已验证)。 + +建议未来的写回策略分两步: + +- 首先:仍然由你手工更新 `00-02`(减少自动化失败导致的误写) +- 之后:如果要自动写回,则需要明确“失败判定标准、覆盖范围、并发策略”,避免多个执行器同时写同一条状态。 + +## 8. 与旧自动化的关系 + +- `docs/00-02-验证矩阵.md` 不承担执行细节,只作待验证列表与状态记录 +- 自动化执行以 `scripts/verify.sh` 与 `ansible/playbooks/verify/*.yml` 为准;本页描述其约定与扩展方式 + +## 9. 可选扩展(未落地) + +当前「一篇文档 → 一个 `verify/XX-YY.yml`」在规模小时最简单:**入口仍是 `scripts/verify.sh`**,不必为了「架构感」提前建一堆目录。当出现下面任一情况时,再考虑本节里的拆法即可。 + +### 9.1 何时值得拆 + +- **重复**:多个 playbook 里出现相同的 `kubectl rollout status`、`curl` 重试、gate + teardown 模板,改一处要改十处。 +- **体积**:单个 `XX-YY.yml` 过长,难以一眼看清「本篇到底验了什么」。 +- **复用非 Ansible 逻辑**:例如要在本机做纯文本处理、复杂拼接,再交给 Ansible;这类少量逻辑可以放在 shell,大量仍建议用 Ansible(inventory、变量、幂等已有约定)。 + +### 9.2 Ansible 侧:角色与 `include_tasks`(优先) + +与正式部署 playbook 一样,验证逻辑**优先**留在 Ansible 生态里拆: + +- **`include_tasks`** 或 **`import_tasks`**:把「等 Deployment」「带重试的 HTTP 检查」「安全 delete」抽成 `ansible/playbooks/verify/_*.yml` 小文件,由各 `XX-YY.yml` 引用。 +- **Role**(例如 `ansible/roles/k3s_verify_http/`):当同一套「变量约定 + 多 task」在多篇文档复用时,用 role 比复制粘贴更清晰;`verify.sh` **不需要改**,仍只调用 `verify/XX-YY.yml`,由该文件 `roles:` 或 `import_role` 即可。 + +原则:**`verify.sh` 只认 `verify/.yml` 这一层文件名**;底下怎么组织 include/role 是内部重构。 + +### 9.3 `scripts/lib/`(次要) + +适合只放 **bash 层**的薄工具:解析矩阵、拼 `ansible-playbook` 参数、统一日志格式等。若把「怎么 curl」「怎么 kubectl wait」写进大量 shell,容易和 inventory、sudo、`KUBECONFIG` 两套路径打架,**一般不如 Ansible task**。 + +### 9.4 单独的 `tests/` 目录(与现框架并列) + +若将来引入 **非 Ansible** 的测试运行器(例如用编程语言写契约测试、或只跑静态检查),可以建 `tests/` 存放用例与配置,由 **CI 或另一条脚本** 调用;它与 `verify.sh` 是 **并列流水线**,而不是替换关系: + +- **矩阵验收 / 真机集群路径**:仍以 `verify.sh` + `verify/*.yml` 为准。 +- **PR 上的快速反馈**:可对 `ansible/files/**/*.yaml` 跑 yamllint、kubeconform、`kubectl apply --dry-run=server`(需集群凭据时再决定挂不挂 CI)。 + +这样不会出现「同一篇文档到底以哪套测试为准」的模糊地带:文档级约定验收看矩阵 + verify playbook;代码库卫生看 CI。 + +### 9.5 静态检查(再次强调) + +yamllint、ansible-lint、schema 校验等 **不放进 `verify.sh`** 亦可:在 GitHub Actions / 本地 pre-commit 里单独跑即可。与 §3 一致——与运行时验证 **并列**,互不嵌套。 + diff --git a/docs/01-01-k3s-控制节点含traefik.md b/docs/01-01-k3s-控制节点含traefik.md index f2762bc..6379f8c 100644 --- a/docs/01-01-k3s-控制节点含traefik.md +++ b/docs/01-01-k3s-控制节点含traefik.md @@ -9,11 +9,11 @@ - 控制节点系统已完成基础网络配置 - 可使用 `sudo`,并可访问公网或本地镜像源 - 节点时间已同步(NTP) -- **方案二(数据盘)**:若使用自定义存储目录,需先挂载数据盘并创建 `/storage`(如 10G 系统盘 + 128G 数据盘场景) +- **方案二(数据盘)**:若使用自定义存储目录,需先将**独立数据盘**挂载到 `/storage`(本仓库验证环境为 **约 10G 系统盘 + 约 32G 数据盘**,四节点同构;**控制节点也必须**有独立数据盘,否则 Longhorn / containerd 与系统争用根分区,易出现 DiskPressure)。 ## 存储方案说明 -K3s 默认将数据(含 local-path 卷)放在 `--data-dir` 下。系统盘较小时,可将数据目录放到数据盘(如 `/storage`),避免占满系统盘。 +K3s 默认将数据(含 local-path 卷)放在 `--data-dir` 下。系统盘较小时,应将数据目录放到**单独挂载**的数据盘(如 `/storage`),避免占满系统盘。 | 方案 | 数据目录 | 适用场景 | |------|----------|----------| diff --git a/docs/01-02-k3s-工作节点.md b/docs/01-02-k3s-工作节点.md index 13c9b00..290b778 100644 --- a/docs/01-02-k3s-工作节点.md +++ b/docs/01-02-k3s-工作节点.md @@ -10,11 +10,11 @@ - 已完成 `01-01-k3s-控制节点含traefik.md` - 已拿到 token:默认方案为 `/var/lib/rancher/k3s/server/token`;若控制节点采用**数据盘方案**则为 `/storage/server/token` - 控制节点可执行 `kubectl` -- **方案二(数据盘)**:若工作节点也使用数据盘,需先挂载数据盘并创建 `/storage` +- **方案二(数据盘)**:若工作节点也使用数据盘,需先将**独立数据盘**挂载到 `/storage`(与 `00-04`、`01-01` 一致:**四节点**均为 10G+32G 时,**每台**都应挂载 `/storage` 再装 agent)。 ## 工作节点加入集群(在工作节点执行) -与 01-01 存储方案一致:控制节点用默认则工作节点用方案一;控制节点用数据盘则建议工作节点也用方案二,便于统一路径。 +与 01-01 存储方案一致:控制节点用默认则工作节点用方案一;控制节点用数据盘则**工作节点也用方案二**,且 **`/storage` 须为独立挂载**(与根分区不同盘),便于统一路径并满足 Longhorn 实验前提。 ### 方案一:默认数据目录 diff --git a/docs/01-05-armv7-nfs服务安装.md b/docs/01-05-armv7-nfs服务安装.md index 2c7e831..3dbe39d 100644 --- a/docs/01-05-armv7-nfs服务安装.md +++ b/docs/01-05-armv7-nfs服务安装.md @@ -1,42 +1,261 @@ # 01-05-armv7 NFS 服务安装 -> 本文只讲 armv7 主机侧 NFS 服务安装与导出配置。 +> 本文只讲 armv7 主机侧 NFS 服务安装与导出配置,目标是把 **`/sdcard`** 作为 NFS 共享目录导出给 K3s 节点使用。 ## 前置条件 - 已完成 `01-03-armv7-standalone-docker.md` - armv7 与 K3s 节点网络互通 +- armv7 上存在挂载点 `/sdcard`(先用 `mount | grep /sdcard` 确认) ## 操作步骤 -1. 在 armv7 安装 NFS 服务(nfs-utils / nfs-kernel-server) -2. 创建导出目录(例如 `/data/nfs`) +1. 在 armv7 安装 NFS 服务(`nfs-utils` 或 `nfs-kernel-server`) +2. 将 `/sdcard` 目录配置为导出目录 3. 配置 `/etc/exports` -4. 放行 NFS 端口并启用开机自启 +4. 使导出生效并启用开机自启 +5. (可选)配置防火墙 -示例(按发行版调整): +### 1) 安装 NFS 服务(按发行版二选一) ```bash -sudo mkdir -p /data/nfs -sudo chown -R nobody:nogroup /data/nfs -echo "/data/nfs 192.168.2.0/24(rw,sync,no_subtree_check,no_root_squash)" | sudo tee /etc/exports -sudo exportfs -rav +# RHEL/CentOS/Fedora +sudo dnf install -y nfs-utils sudo systemctl enable --now nfs-server + +# Debian/Ubuntu +# sudo apt update +# sudo apt install -y nfs-kernel-server +# sudo systemctl enable --now nfs-kernel-server ``` +### 2) 确认 `/sdcard` 可用 + +```bash +mount | grep /sdcard +ls -ld /sdcard +``` + +> 如果 `/sdcard` 是外置存储(SD 卡/U 盘),建议先确认它已在系统启动后自动挂载,再做 NFS 导出。 + +### 3) 配置 `/etc/exports` 导出 `/sdcard` + +示例(允许 `192.168.2.0/24` 网段读写): + +```bash +echo "/sdcard 192.168.2.0/24(rw,sync,no_subtree_check,no_root_squash)" | sudo tee /etc/exports +sudo exportfs -rav +``` + +参数说明(常用): + +- `rw`:允许读写 +- `sync`:同步写入,数据更稳妥 +- `no_subtree_check`:减少子目录检查开销 +- `no_root_squash`:客户端 root 保持 root 权限(仅在可信内网使用) + +### 4) 防火墙(可选,按环境) + +如果 armv7 启用了 firewalld: + +```bash +sudo firewall-cmd --add-service=nfs --permanent +sudo firewall-cmd --add-service=mountd --permanent +sudo firewall-cmd --add-service=rpc-bind --permanent +sudo firewall-cmd --reload +``` + +如果是 Debian/Ubuntu + UFW,请按实际策略放行 NFS 相关端口/服务。 + ## 验证命令 ```bash showmount -e localhost sudo exportfs -v -sudo systemctl status nfs-server --no-pager +sudo systemctl status nfs-server --no-pager || sudo systemctl status nfs-kernel-server --no-pager ``` ## 预期 -- `showmount -e` 可看到导出目录 +- `showmount -e` 可看到导出目录 `/sdcard` - NFS 服务为运行状态 +## 客户端快速验证(在任一 K3s 节点) + +```bash +sudo mkdir -p /mnt/nfs-sdcard-test +sudo mount -t nfs :/sdcard /mnt/nfs-sdcard-test +df -h | grep nfs-sdcard-test +ls -la /mnt/nfs-sdcard-test +``` + +> 若内网 DNS/主机名不可解析,请直接使用 armv7 的内网 IP(例如 `192.168.2.22:/sdcard`)。 + +验证完成后可卸载: + +```bash +sudo umount /mnt/nfs-sdcard-test +``` + +## 本次实机验证记录(onecloud -> ylc61) + +- 服务端(onecloud): + - `showmount -e localhost` 返回 `/sdcard 192.168.2.0/24` + - `exportfs -v` 显示 `/sdcard` 已按配置导出 +- 客户端(ylc61): + - `mount -t nfs 192.168.2.22:/sdcard /mnt/nfs-sdcard-test` 成功 + - 可写入测试文件 `.nfs_write_test` + - `umount /mnt/nfs-sdcard-test` 成功 + +## NFS 安全验证与加固 + +### 先明确:默认 NFS 没有“用户名/密码登录” + +当前这种 `/etc/exports` 用法(如 `192.168.2.0/24(...)`)主要基于: + +- 客户端来源 IP/网段控制 +- UID/GID 映射(`root_squash`/`all_squash`) + +它不是“账号密码”或“密钥登录”模型。若需要强身份认证与加密,应使用 NFSv4 + Kerberos(见下文)。 + +### 基础安全加固(内网实用版) + +先解释你关心的几个参数(核心差异): + +- `no_root_squash`:客户端 root 在服务端仍是 root(权限非常大,不推荐默认使用)。 +- `root_squash`:客户端 root 会被压缩为匿名用户(更安全,推荐默认)。 +- `all_squash`:不仅 root,连普通用户也统一映射为匿名用户(最“收敛”,但权限控制最粗)。 + +> 你说的“通常 NFS 只是数据目录,普通读写就够了”,这个判断是对的。 +> 对大多数家庭实验室,优先 `root_squash` 即可,不必一上来 `no_root_squash`。 + +#### 推荐配置 A(默认推荐:保留普通用户语义,仅压缩 root) + +```bash +echo "/sdcard 192.168.2.0/24(rw,sync,no_subtree_check,root_squash)" | sudo tee /etc/exports +sudo exportfs -rav +``` + +适用:一般数据目录读写场景,既降低 root 风险,又不过度收紧普通用户行为。 + +若切到 `root_squash` 后客户端出现 `Permission denied`,通常是导出目录权限还停留在 `root:root`。可在服务端先调整目录属主与权限(按你的风险接受度选择更细粒度权限): + +```bash +sudo chown nobody:nogroup /sdcard +sudo chmod 0777 /sdcard +``` + +> 说明:这是“保证先可写”的快速做法。更严格场景建议用更小权限(例如按业务 UID/GID 精细授权),不要长期依赖 `0777`。 + +**更小权限的实操示例(推荐)** + +> 多容器/多应用共享 NFS 时,不建议只改 `/sdcard` 根目录权限;更推荐“**根目录只做入口,每个应用单独子目录分权**”。 + +示例:应用 A 用 `UID/GID=1000`,应用 B 用 `UID/GID=1001`。 + +```bash +# 1) 根目录:保守权限(可遍历,不建议直接 0777) +sudo chown root:root /sdcard +sudo chmod 0755 /sdcard + +# 2) 应用A子目录 +sudo mkdir -p /sdcard/app-a +sudo chown 1000:1000 /sdcard/app-a +sudo chmod 0770 /sdcard/app-a + +# 3) 应用B子目录 +sudo mkdir -p /sdcard/app-b +sudo chown 1001:1001 /sdcard/app-b +sudo chmod 0770 /sdcard/app-b +``` + +如果你选择 `all_squash`,再配合匿名映射到同一业务账号: + +```bash +echo "/sdcard 192.168.2.0/24(rw,sync,no_subtree_check,root_squash,all_squash,anonuid=1000,anongid=1000)" | sudo tee /etc/exports +sudo exportfs -rav +``` + +验证(服务端): + +```bash +sudo exportfs -v +ls -ldn /sdcard +ls -ldn /sdcard/app-a /sdcard/app-b +``` + +你应能看到: + +- `exportfs -v` 中导出参数已是目标策略(如 `root_squash` / `all_squash,anonuid=1000`) +- 导出参数符合目标策略 +- 各应用子目录的属主属组与业务 UID/GID 一致(如 `app-a` 为 `1000:1000`) + +在 K8s Pod 侧,建议与之对应: + +- 应用 A:`runAsUser/runAsGroup/fsGroup` 使用 `1000` +- 应用 B:`runAsUser/runAsGroup/fsGroup` 使用 `1001` + +这样比把 `/sdcard` 全盘 `0777` 更可控:权限边界在“每个业务子目录”,不同应用互不踩权限。 + +#### 推荐配置 B(更严格:所有用户都匿名) + +```bash +echo "/sdcard 192.168.2.0/24(rw,sync,no_subtree_check,root_squash,all_squash,anonuid=65534,anongid=65534)" | sudo tee /etc/exports +sudo exportfs -rav +``` + +“所有用户都匿名”的含义与影响: + +- 含义:客户端不论是 root 还是普通用户,服务端都按同一个匿名身份(通常 `nobody:nogroup`)处理。 +- 影响:权限简单统一,但会丢失“按用户区分权限”的能力;某些应用可能因属主/权限不匹配而报错。 +- 建议:仅在你明确要“统一身份写入”时启用。 + +#### 不推荐长期保留(除非明确必须) + +```bash +echo "/sdcard 192.168.2.0/24(rw,sync,no_subtree_check,no_root_squash)" | sudo tee /etc/exports +sudo exportfs -rav +``` + +`no_root_squash` 只建议临时排障或已做严格网络隔离时短期使用。 + +配套建议: + +- 防火墙只放行必要客户端 IP(不要整段网段) +- NFS 服务只暴露在可信内网/VLAN +- 客户端挂载可加 `nosuid,nodev,noexec`(按业务兼容性评估) + +### 安全验证命令(基础版) + +```bash +# 服务端:确认导出参数 +sudo exportfs -v + +# 客户端:确认挂载参数 +mount | grep nfs +``` + +核对点: + +- 采用推荐配置 A/B 时,`exportfs -v` 中不应出现 `no_root_squash` +- 挂载后普通读写符合预期 +- 若启用 `all_squash`,需额外确认应用对匿名 UID/GID 的权限兼容性 + +### 强认证路线(进阶):NFSv4 + Kerberos + +如果你需要“身份认证/完整性/加密”而不是仅靠 IP 白名单: + +- 使用 `sec=krb5`(认证) +- 或 `sec=krb5i`(认证 + 完整性) +- 或 `sec=krb5p`(认证 + 完整性 + 加密,安全最高) + +这条路线需要额外建设 KDC、principal、keytab、时钟同步,复杂度明显高于内网基础版;家庭实验室通常先做“基础加固版”,再按需要升级。 + +## 与 K3s 对接(入口) + +在 K3s 里使用 NFS,通常不需要先把 NFS 手工挂到每台节点主机。 +详细做法(Pod 直挂 / PV+PVC)请见:`03-06-k3s-使用nfs存储.md`。 + ## 下一步 - `03-06-k3s-使用nfs存储.md` diff --git a/docs/01-06-节点初始化-ansible-实践.md b/docs/01-06-节点初始化-ansible-实践.md index 1c9347a..a329812 100644 --- a/docs/01-06-节点初始化-ansible-实践.md +++ b/docs/01-06-节点初始化-ansible-实践.md @@ -19,11 +19,42 @@ - IP 规划、主机名已大致确定,例如: - `ylc61`:k3s server,IP `192.168.2.61` - `ylc62` ~ `ylc64`:k3s worker,IP `192.168.2.62` ~ `192.168.2.64` - - **数据盘**:若使用 `/storage` 方案,需在每台节点上提前挂载数据盘并创建 `/storage`; + - **数据盘**:若使用 `/storage` 方案,每台节点须将**独立数据盘**挂载到 `/storage`(与 `/` 不同设备),详见 `00-04` 与下文「数据盘准备」。 - 不覆盖: - 从「完全裸铁 + 无系统」开始的 PXE 装机; - 高级 HA(多 server + 外部 datastore)——仍按 `01-04`、`03-10` 执行。 +### 1.1 数据盘准备(手工,或与自动化二选一) + +在运行 k3s 安装 playbook 之前,**每台**节点应满足:`mountpoint /storage` 为真,且 `findmnt -n -o SOURCE /` 与 `findmnt -n -o SOURCE /storage` **不相同**。 + +**手工示例**(第二块盘为 `/dev/vdb`,请按 `lsblk` 实际设备名修改;**误选设备会清空该盘**): + +```bash +sudo parted -s /dev/vdb mklabel gpt mkpart primary ext4 0% 100% +sudo mkfs.ext4 -F /dev/vdb1 +UUID=$(sudo blkid -s UUID -o value /dev/vdb1) +echo "UUID=$UUID /storage ext4 defaults,nofail 0 2" | sudo tee -a /etc/fstab +sudo mkdir -p /storage && sudo mount -a +``` + +XFS 用户将 `mkfs.ext4` / `fstab` 类型改为 `xfs` 即可(Longhorn 支持 ext4/XFS)。 + +**自动化(可选)**:在 `group_vars/all.yml` 中设置 `k3s_prepare_storage: true` 与 `k3s_data_disk_device: /dev/vdb`(四台盘符一致时一条即可;不一致则用 `host_vars/.yml` 覆盖),然后执行: + +```bash +ansible-playbook -i inventory.ini playbooks/k3s-prepare-storage.yml +``` + +该 playbook 在 `/storage` 已是独立挂载时会跳过,避免重复执行。 + +### 1.2 推荐执行顺序(10G + 32G 四节点) + +1. (可选)`playbooks/k3s-prepare-storage.yml` +2. `playbooks/k3s-init-and-install.yml`(可在 `group_vars` 中设 `k3s_verify_storage_mount: true` 强制校验 `/` 与 `/storage` 不同源) +3. (可选)`playbooks/longhorn-install.yml`(Helm,见 `03-07`) +4. (可选)`playbooks/apply-local-path-config-lab.yml`,或 `longhorn_apply_local_path_lab: true` 随 Longhorn 一并应用(真源:`files/kube-system/local-path-config-lab.json`,见 `03-05`) + ## 2. 目录结构 本仓库已有 `ansible/`: @@ -35,7 +66,13 @@ ansible/ group_vars/ all.yml playbooks/ + k3s-prepare-storage.yml # 可选:第二块盘分区、挂载 /storage k3s-init-and-install.yml # 标准 IPv4 安装 + longhorn-install.yml # 可选:Helm 安装 Longhorn + apply-local-path-config-lab.yml # 可选:仅应用 local-path 实验室 ConfigMap + files/ + longhorn/values-lab.yaml # 实验室 Helm values + kube-system/local-path-config-lab.json ``` ## 3. 示例 inventory @@ -77,6 +114,14 @@ k3s_worker 禁用时设 `k3s_manage_coredns: false`。 +**存储挂载校验**(推荐实验室开启): + +- `k3s_verify_storage_mount: true`:在 `k3s-init-and-install.yml` 安装 k3s **之前**,断言 `/storage` 为挂载点且与 `/` 不同块设备;失败时提示查阅 `00-04`。已有「目录式假 /storage」的旧环境可临时设为 `false`。 + +**数据盘自动化**(可选): + +- `k3s_prepare_storage: true` 且 `k3s_data_disk_device: /dev/vdb`:由 `k3s-prepare-storage.yml` 执行(见 §1.1)。 + ## 5. 执行流程概览 playbook 依次执行: @@ -113,8 +158,12 @@ playbook 依次执行: ```bash cd ansible +# (可选)先准备数据盘挂载 /storage +# ansible-playbook -i inventory.ini playbooks/k3s-prepare-storage.yml # 标准 IPv4 安装 ansible-playbook -i inventory.ini playbooks/k3s-init-and-install.yml +# (可选)Helm 安装 Longhorn +# ansible-playbook -i inventory.ini playbooks/longhorn-install.yml ``` 执行结束后,playbook 会输出: diff --git a/docs/01-07-openwrt-haproxy.md b/docs/01-07-openwrt-haproxy.md index a776f7a..7856cd4 100644 --- a/docs/01-07-openwrt-haproxy.md +++ b/docs/01-07-openwrt-haproxy.md @@ -64,7 +64,9 @@ opkg install haproxy 验证:从内网访问 `http://:18080/` 或 `http://:18080/demo-m1/`(家庭私网常用 18080/18443),应能到达 Traefik 与后端。 -**自动验证**:`./scripts/01-07-verify-haproxy-openwrt.sh` 或 `./scripts/01-07-verify-haproxy.sh`。经 **ssh onecloud** 作为第三方发起 curl,验证 `http://:18080` 与 `https://<域名>:18443`(HTTPS 需 `--https-hosts`)。不部署、不改端口;需 OpenWrt HAProxy 已按 18080/18443 配置。可选 `--deploy-matrix http` 或 `--deploy-matrix tls` 一键部署对应 nginx 矩阵后再验证。**验证 HTTPS 时**:可先执行 `./scripts/01-07-deploy-nginx-tls-via-ylc61.sh`,经 ssh ylc61 在控制节点上一键部署 nginx TLS 矩阵,再带 `--https-hosts 'test01.jackadam.top,...'` 验证。验证通过后默认更新 `docs/00-02-验证矩阵.md`(`--no-update-matrix` 跳过)。 +**验证**:经 **ssh onecloud**(或你可访问的第三方机器)发起 curl,验证 `http://:18080` 与 `https://<域名>:18443`(HTTPS 需正确设置 Host/SNI,例如 `curl --https-hosts ...`)。不部署、不改端口;需 OpenWrt HAProxy 已按 18080/18443 配置。 + +验证通过后,请**手工**在 `docs/00-02-验证矩阵.md` 补充状态与备注(当前仓库已下线“自动更新矩阵”的执行入口)。 ## 5. PROXY Protocol(可选) diff --git a/docs/02-00-nginx-系列说明.md b/docs/02-00-nginx-系列说明.md index 68534cb..1ebdb01 100644 --- a/docs/02-00-nginx-系列说明.md +++ b/docs/02-00-nginx-系列说明.md @@ -97,7 +97,7 @@ kubectl describe node <节点名> 通用删除建议使用 manifests 目录(一键清理同一个场景): ```bash -kubectl delete -f ansible/files/nginx-matrix/ -R +kubectl delete -f ansible/files/02-05-nginx-matrix/ -R ``` 或按具体文件删单个场景(见各分篇的 `## 删除` 小节)。 diff --git a/docs/02-01-nginx-control-ingress.md b/docs/02-01-nginx-control-ingress.md index 420d80a..ec94664 100644 --- a/docs/02-01-nginx-control-ingress.md +++ b/docs/02-01-nginx-control-ingress.md @@ -14,12 +14,12 @@ 2. 创建 Middleware + Ingress(`/demo-m1` -> nginx-m1:80) 3. 等待 Pod 与 Ingress 就绪 -示例 YAML 见 `ansible/files/nginx-matrix/01-control-ingress.yaml`。 +示例 YAML 见 `ansible/files/02-05-nginx-matrix/01-control-ingress.yaml`。 ## 部署命令 ```bash -kubectl apply -f ansible/files/nginx-matrix/01-control-ingress.yaml +kubectl apply -f ansible/files/02-05-nginx-matrix/01-control-ingress.yaml ``` ## 验证命令 @@ -37,7 +37,7 @@ curl -i --max-time 3 http://<入口节点IP>/demo-m1/ ## 删除 ```bash -kubectl delete -f ansible/files/nginx-matrix/01-control-ingress.yaml +kubectl delete -f ansible/files/02-05-nginx-matrix/01-control-ingress.yaml ``` ## 失败排查 diff --git a/docs/02-02-nginx-control-ingressroute.md b/docs/02-02-nginx-control-ingressroute.md index 2c76f57..98f6f60 100644 --- a/docs/02-02-nginx-control-ingressroute.md +++ b/docs/02-02-nginx-control-ingressroute.md @@ -14,12 +14,12 @@ 2. 创建 Middleware + IngressRoute(`PathPrefix(/demo-m2)`) 3. 等待资源就绪 -示例 YAML 见 `ansible/files/nginx-matrix/02-control-ingressroute.yaml`。 +示例 YAML 见 `ansible/files/02-05-nginx-matrix/02-control-ingressroute.yaml`。 ## 部署命令 ```bash -kubectl apply -f ansible/files/nginx-matrix/02-control-ingressroute.yaml +kubectl apply -f ansible/files/02-05-nginx-matrix/02-control-ingressroute.yaml ``` ## 验证命令 @@ -38,7 +38,7 @@ curl -i --max-time 3 http://<入口节点IP>/demo-m2/ ## 删除 ```bash -kubectl delete -f ansible/files/nginx-matrix/02-control-ingressroute.yaml +kubectl delete -f ansible/files/02-05-nginx-matrix/02-control-ingressroute.yaml ``` ## 失败排查 diff --git a/docs/02-03-nginx-worker-ingress.md b/docs/02-03-nginx-worker-ingress.md index a24ea81..d71b42a 100644 --- a/docs/02-03-nginx-worker-ingress.md +++ b/docs/02-03-nginx-worker-ingress.md @@ -14,12 +14,12 @@ 2. 创建 Middleware + Ingress(`/demo-m3` -> nginx-m3:80) 3. 等待资源就绪 -示例 YAML 见 `ansible/files/nginx-matrix/03-worker-ingress.yaml`。 +示例 YAML 见 `ansible/files/02-05-nginx-matrix/03-worker-ingress.yaml`。 ## 部署命令 ```bash -kubectl apply -f ansible/files/nginx-matrix/03-worker-ingress.yaml +kubectl apply -f ansible/files/02-05-nginx-matrix/03-worker-ingress.yaml ``` ## 验证命令 @@ -37,7 +37,7 @@ curl -i --max-time 3 http://<入口节点IP>/demo-m3/ ## 删除 ```bash -kubectl delete -f ansible/files/nginx-matrix/03-worker-ingress.yaml +kubectl delete -f ansible/files/02-05-nginx-matrix/03-worker-ingress.yaml ``` ## 失败排查 diff --git a/docs/02-04-nginx-worker-ingressroute.md b/docs/02-04-nginx-worker-ingressroute.md index 71c38c2..80af58b 100644 --- a/docs/02-04-nginx-worker-ingressroute.md +++ b/docs/02-04-nginx-worker-ingressroute.md @@ -15,12 +15,12 @@ 2. 创建 Middleware + IngressRoute(`PathPrefix(/demo-m4)`) 3. 等待资源就绪 -示例 YAML 见 `ansible/files/nginx-matrix/04-worker-ingressroute.yaml`。 +示例 YAML 见 `ansible/files/02-05-nginx-matrix/04-worker-ingressroute.yaml`。 ## 部署命令 ```bash -kubectl apply -f ansible/files/nginx-matrix/04-worker-ingressroute.yaml +kubectl apply -f ansible/files/02-05-nginx-matrix/04-worker-ingressroute.yaml ``` ## 验证命令 @@ -39,7 +39,7 @@ curl -i --max-time 3 http://<入口节点IP>/demo-m4/ ## 删除 ```bash -kubectl delete -f ansible/files/nginx-matrix/04-worker-ingressroute.yaml +kubectl delete -f ansible/files/02-05-nginx-matrix/04-worker-ingressroute.yaml ``` ## 失败排查 diff --git a/docs/02-05-nginx-验证矩阵-一键部署.md b/docs/02-05-nginx-验证矩阵-一键部署.md index 300011a..e8be19c 100644 --- a/docs/02-05-nginx-验证矩阵-一键部署.md +++ b/docs/02-05-nginx-验证矩阵-一键部署.md @@ -23,7 +23,7 @@ ## 完整配置(与 Ansible 共用) -配置位于 `ansible/files/nginx-matrix/`(4 个文件对应 M1~M4),文档与 Ansible 共用此目录: +配置位于 `ansible/files/02-05-nginx-matrix/`(4 个文件对应 M1~M4),文档与 Ansible 共用此目录: | 文件 | 场景 | 路径 | 节点 | |------|------|------|------| @@ -37,7 +37,7 @@ ## 部署 ```bash -kubectl apply -f ansible/files/nginx-matrix/ -R +kubectl apply -f ansible/files/02-05-nginx-matrix/ -R kubectl get pod,svc,ing,ingressroute -n default -o wide ``` @@ -110,10 +110,10 @@ Manifest 里四份写法一致,若只有 M1 仍显示默认页,多半是集 ```bash # 在仓库根目录执行时: -kubectl apply -f ansible/files/nginx-matrix/ -R +kubectl apply -f ansible/files/02-05-nginx-matrix/ -R # 若当前在 ansible/ 目录下,改用: -kubectl apply -f files/nginx-matrix/ -R +kubectl apply -f files/02-05-nginx-matrix/ -R ``` **M1 未生效时:删除部署再重新部署(推荐)** @@ -125,10 +125,10 @@ kubectl apply -f files/nginx-matrix/ -R kubectl delete deployment nginx-m1 -n default # 2. 重新部署 M1(在 ansible/ 目录下) -kubectl apply -f files/nginx-matrix/01-control-ingress.yaml +kubectl apply -f files/02-05-nginx-matrix/01-control-ingress.yaml # 若在仓库根目录: -# kubectl apply -f ansible/files/nginx-matrix/01-control-ingress.yaml +# kubectl apply -f ansible/files/02-05-nginx-matrix/01-control-ingress.yaml # 3. 等 Pod Running 后验证 kubectl get pod -n default -l app=nginx-m1 @@ -143,7 +143,7 @@ kubectl exec -n default deployment/nginx-m1 -- cat /etc/nginx/conf.d/default.con 可使用 Ansible playbook 自动完成复制 manifests、apply、等待 Pod 就绪及 curl 验证: - **Playbook**:`ansible/playbooks/nginx-matrix-deploy.yml` -- **Manifests 位置**:`ansible/files/nginx-matrix/`(M1 control-plane / M2 M4 节点名 ylc61、ylc64,M3 worker;按实际修改 M2/M4 节点名) +- **Manifests 位置**:`ansible/files/02-05-nginx-matrix/`(M1 control-plane / M2 M4 节点名 ylc61、ylc64,M3 worker;按实际修改 M2/M4 节点名) - **执行(在 ansible/ 目录下)**: ```bash @@ -164,14 +164,14 @@ Playbook 会:拷贝 manifests 到控制节点 → **先删除全部 nginx 矩 **手动 kubectl apply 的**:用同一目录删除 ```bash -kubectl delete -f ansible/files/nginx-matrix/ -R +kubectl delete -f ansible/files/02-05-nginx-matrix/ -R ``` **Ansible playbook 部署的**:在仓库根或 ansible 同级的机器上,用 manifests 删除(需配置 KUBECONFIG) ```bash export KUBECONFIG=/etc/rancher/k3s/k3s.yaml # 或从控制节点拷贝 kubeconfig -kubectl delete -f ansible/files/nginx-matrix/ -R +kubectl delete -f ansible/files/02-05-nginx-matrix/ -R ``` 若控制节点上 `/tmp/nginx-matrix/` 仍存在,也可在控制节点执行: diff --git a/docs/03-01-k3s-traefik-dashboard.md b/docs/03-01-k3s-traefik-dashboard.md index 61fb742..d5f1f89 100644 --- a/docs/03-01-k3s-traefik-dashboard.md +++ b/docs/03-01-k3s-traefik-dashboard.md @@ -20,10 +20,10 @@ - **默认路径**:`/var/lib/rancher/k3s/server/manifests/traefik-dashboard.yaml` - **自定义 data-dir**(如 `--data-dir=/storage`):`/server/manifests/traefik-dashboard.yaml` - **唯一真源(勿与文档内联重复)**:[HelmChartConfig + IngressRoute 完整 YAML](../../ansible/files/traefik-dashboard/traefik-dashboard.yaml)。复制到上述 manifests 路径,或在仓库根执行: + **唯一真源(勿与文档内联重复)**:[HelmChartConfig + IngressRoute 完整 YAML](../../ansible/files/03-01-traefik-dashboard/traefik-dashboard.yaml)。复制到上述 manifests 路径,或在仓库根执行: ```bash - kubectl apply -f ansible/files/traefik-dashboard/traefik-dashboard.yaml + kubectl apply -f ansible/files/03-01-traefik-dashboard/traefik-dashboard.yaml ``` 2. 应用配置并等待 Traefik 重载(按实际路径选择其一复制执行): diff --git a/docs/03-02-k3s-traefik-acme.md b/docs/03-02-k3s-traefik-acme.md index b8740ab..30925f9 100644 --- a/docs/03-02-k3s-traefik-acme.md +++ b/docs/03-02-k3s-traefik-acme.md @@ -21,7 +21,7 @@ - **Pod / 部署**:ACME 配置通过 `HelmChartConfig` 注入到 **同一个 Traefik Deployment**。**副本数为 chart 默认值 1**(即 `deployment.replicas` 未在 values 里写时默认为 1),所以只有 1 个 Traefik Pod;与 03-01 的 Traefik 是同一套 Deployment,只是 values 里多了 ACME 参数与 env。 - **配置存在哪里**:`HelmChartConfig` 存在 **etcd**(控制节点);K3s 的 chart 控制器据此更新 Traefik 的部署参数,Traefik 进程从 **Kubernetes API** 读取 Ingress/IngressRoute,无需多 Pod 间同步。 -- **ACME 存储(证书与账户)**:`acme.storage` 指向容器内 **`/data/acme.json`**。未配 hostPath 时,K3s 默认会为 Traefik 挂载卷到 `/data`(如 emptyDir 或默认持久卷),**仅当前这一个 Traefik Pod 可写**,Pod 重建后若卷不持久则需重新申请证书。若在 values 里配置了 **hostPath**(见本页可选配置),则 `/data` 对应宿主机目录,证书写在物理机路径,便于备份与复用;Traefik 仍为 1 个 Pod,不存在多副本间同步 acme.json 的问题。**推荐**:Dashboard + ACME 场景直接用 **同一份** [`traefik-dashboard-acme.yaml`](../ansible/files/traefik-dashboard-acme/traefik-dashboard-acme.yaml)(已含 **`persistence`(local-path)+ ACME**),见 `03-05-k3s-local-path-pvc.md`。不要 Dashboard 时按该文件头注释删减。 +- **ACME 存储(证书与账户)**:`acme.storage` 指向容器内 **`/data/acme.json`**。未配 hostPath 时,K3s 默认会为 Traefik 挂载卷到 `/data`(如 emptyDir 或默认持久卷),**仅当前这一个 Traefik Pod 可写**,Pod 重建后若卷不持久则需重新申请证书。若在 values 里配置了 **hostPath**(见本页可选配置),则 `/data` 对应宿主机目录,证书写在物理机路径,便于备份与复用;Traefik 仍为 1 个 Pod,不存在多副本间同步 acme.json 的问题。**推荐**:Dashboard + ACME 场景直接用 **同一份** [`traefik-dashboard-acme.yaml`](../ansible/files/03-03-traefik-dashboard-acme/traefik-dashboard-acme.yaml)(已含 **`persistence`(local-path)+ ACME**),见 `03-05-k3s-local-path-pvc.md`。不要 Dashboard 时按该文件头注释删减。 - **第一次部署随机节点、重启后怎么办**:Traefik 未指定 nodeSelector 时,首次会**随机调度**到某一节点。若使用了 **hostPath**,证书只存在于该节点的磁盘上;**Pod 被调度到其他节点**(重启、驱逐、缩容再扩容)时,新节点上的同名 hostPath 是另一块盘,**证书不会跟着走**,可能需重新申请。若希望重启或节点故障后仍保留证书,可:**① 把 Traefik 固定到某一节点**(在 HelmChartConfig 的 `deployment` 下配 `nodeSelector`,例如 `nodeSelector: { kubernetes.io/hostname: ylc61 }(节点名使用短主机名 ylc61~ylc64,便于配合 Cloudflare CDN)`),使 hostPath 始终落在同一台机;**② 或不用 hostPath**,依赖 K3s 默认持久卷(若为 local-path,则卷仍绑定某节点,Pod 重建到同节点可复用);**③ 或改用 NFS 等共享存储**挂到 `/data`,多节点可读同一证书(需自行在 values 里配 PVC/volume)。 --- @@ -90,7 +90,7 @@ kubectl -n kube-system get secret cloudflare-api-token \ > > **文件选择**:K3s 自带的 `traefik.yaml` 会被 K3s 覆盖,**不要修改**。所有自定义配置(ACME、nodeSelector、hostPath 以及其他扩展配置)都应写在 **`traefik-acme.yaml`** 这一份 HelmChartConfig 里,与默认 chart 合并生效。 -1. 在控制节点创建 `traefik-acme.yaml`,推荐放入 K3s manifests 目录(路径同 03-01)。**完整配置见 `ansible/files/traefik-acme/traefik-acme.yaml`**(与 Ansible 共用),复制后替换 `` 等占位符即可。若走 **Dashboard + ACME** 且需 **证书落盘 local-path PVC**,直接用 [`traefik-dashboard-acme.yaml`](../ansible/files/traefik-dashboard-acme/traefik-dashboard-acme.yaml)(已内置 persistence,说明见 `03-05-k3s-local-path-pvc.md`)。**仅 ACME、无 Dashboard** 时仍可用本目录 [`traefik-acme.yaml`](../ansible/files/traefik-acme/traefik-acme.yaml),并自行按 `03-05` 在 Helm values 中增加 `persistence` 块(与 `/data/acme.json` 一致)。 +1. 在控制节点创建 `traefik-acme.yaml`,推荐放入 K3s manifests 目录(路径同 03-01)。**完整配置见 `ansible/files/03-02-traefik-acme/traefik-acme.yaml`**(与 Ansible 共用),复制后替换 `` 等占位符即可。若走 **Dashboard + ACME** 且需 **证书落盘 local-path PVC**,直接用 [`traefik-dashboard-acme.yaml`](../ansible/files/03-03-traefik-dashboard-acme/traefik-dashboard-acme.yaml)(已内置 persistence,说明见 `03-05-k3s-local-path-pvc.md`)。**仅 ACME、无 Dashboard** 时仍可用本目录 [`traefik-acme.yaml`](../ansible/files/03-02-traefik-acme/traefik-acme.yaml),并自行按 `03-05` 在 Helm values 中增加 `persistence` 块(与 `/data/acme.json` 一致)。 > 将 `` 改为你的邮箱。`/data/acme.json` 为容器内路径;`caserver` 为测试服务器(staging),正式上线前删除该行即切回生产 CA。Traefik 在容器内监听 8000/8443,由 Service 和 svclb 映射到节点 80/443。 > @@ -185,7 +185,7 @@ kubectl -n kube-system exec -it "$POD" -- nslookup acme-v02.api.letsigncrypt.org ## TLS 矩阵清单(02-05 升级版) -> **唯一真源**:[`ansible/files/nginx-matrix-tls/`](../../ansible/files/nginx-matrix-tls/)(`01-control-ingress.yaml`~`04-worker-ingressroute.yaml`),与 [`ansible/playbooks/nginx-matrix-tls-deploy.yml`](../../ansible/playbooks/nginx-matrix-tls-deploy.yml) 共用;**本文不再内联整份 YAML**。 +> **唯一真源**:[`ansible/files/03-02-nginx-matrix-tls/`](../../ansible/files/03-02-nginx-matrix-tls/)(`01-control-ingress.yaml`~`04-worker-ingressroute.yaml`),与 [`ansible/playbooks/nginx-matrix-tls-deploy.yml`](../../ansible/playbooks/nginx-matrix-tls-deploy.yml) 共用;**本文不再内联整份 YAML**。 **相对 02-05 的差异摘要**:基于域名根路径 `/`;TLS 仅绑 `websecure`;含 HTTP-only(仅 `web`)路由;与 02-05 的 `/demo-mx` 为两套资源;M2/M4 节点名与域名请在清单内编辑。 @@ -197,12 +197,12 @@ kubectl -n kube-system exec -it "$POD" -- nslookup acme-v02.api.letsigncrypt.org **方式一:使用仓库 YAML 目录(推荐与文档一致)** -1. 在仓库中编辑 [`ansible/files/nginx-matrix-tls/`](../../ansible/files/nginx-matrix-tls/) 内各文件(M2/M4 节点名、域名等)。 -2. 按 k3s 存储方案可将整个目录复制到控制节点 manifests,或直接在仓库根执行 `kubectl apply -f ansible/files/nginx-matrix-tls/ -R`(与 `01-01-k3s-控制节点含traefik.md` 存储路径说明一致)。 +1. 在仓库中编辑 [`ansible/files/03-02-nginx-matrix-tls/`](../../ansible/files/03-02-nginx-matrix-tls/) 内各文件(M2/M4 节点名、域名等)。 +2. 按 k3s 存储方案可将整个目录复制到控制节点 manifests,或直接在仓库根执行 `kubectl apply -f ansible/files/03-02-nginx-matrix-tls/ -R`(与 `01-01-k3s-控制节点含traefik.md` 存储路径说明一致)。 3. 清理示例(路径与 apply 时一致): ```bash - kubectl delete -f ansible/files/nginx-matrix-tls/ -R --ignore-not-found=true + kubectl delete -f ansible/files/03-02-nginx-matrix-tls/ -R --ignore-not-found=true ``` 或沿用下文按资源名删除。 或按资源名删除(与路径无关): @@ -217,7 +217,7 @@ kubectl -n kube-system exec -it "$POD" -- nslookup acme-v02.api.letsigncrypt.org - 直接使用仓库中已合并好的 4 个文件(每个 Mx 含 TLS + HTTP-only),在**仓库根目录**执行: ```bash - kubectl apply -f ansible/files/nginx-matrix-tls/ -R + kubectl apply -f ansible/files/03-02-nginx-matrix-tls/ -R ``` 需保证当前环境已设置 KUBECONFIG 或 `kubectl` 已指向目标集群(例如在控制节点上或已配置远程 kubeconfig)。 - 一键部署/清理推荐用 Playbook(会先删 02-05 残留、再 apply、并做就绪与 curl 验证): @@ -258,7 +258,7 @@ done ```bash kubectl get ingress -n default nginx-m1 -o yaml | grep -A5 "tls:\|host:\|certresolver" ``` - 若无 `tls` / `host` / `certresolver`,说明当前是 02-05 的非 TLS Ingress,需执行 `kubectl apply -f ansible/files/nginx-matrix-tls/ -R`(或跑 Ansible playbook `nginx-matrix-tls-deploy.yml`)。 + 若无 `tls` / `host` / `certresolver`,说明当前是 02-05 的非 TLS Ingress,需执行 `kubectl apply -f ansible/files/03-02-nginx-matrix-tls/ -R`(或跑 Ansible playbook `nginx-matrix-tls-deploy.yml`)。 2. **看 Traefik 是否尝试/成功申请证书**: ```bash @@ -281,7 +281,7 @@ done 6. **日志出现 “service not found” / “kubernetes service not found: default/nginx-m2” / “middleware … does not exist”**:说明 Ingress/IngressRoute 已存在,但对应的 **Service 或 Middleware 缺失**(例如只 apply 了部分 TLS 矩阵,或先删后 apply 时 Traefik 在中间时刻读到不完整状态)。需**完整** apply TLS 矩阵,保证 M1~M4 的 Deployment、Service、Middleware、Ingress/IngressRoute 一起就绪: ```bash - kubectl apply -f ansible/files/nginx-matrix-tls/ -R + kubectl apply -f ansible/files/03-02-nginx-matrix-tls/ -R kubectl get svc,middleware -n default | grep -E "nginx-m|stripprefix" ``` 确认 nginx-m1~m4 的 Service 与 stripprefix-m1~m4 的 Middleware 均存在后,Traefik 会重新同步路由;证书仍需按上一步确保 ACME 配置生效。 @@ -312,7 +312,7 @@ done 可使用 Ansible 自动部署 / 清理 TLS 矩阵(test01~test04.jackadam.top)并做 HTTPS 验证: - **Playbook**:`ansible/playbooks/nginx-matrix-tls-deploy.yml` -- **Manifests**:`ansible/files/nginx-matrix-tls/`(M1~M4 带 TLS,域名为 test01~test04.jackadam.top;按实际修改 M2/M4 节点名 ylc61/ylc64) +- **Manifests**:`ansible/files/03-02-nginx-matrix-tls/`(M1~M4 带 TLS,域名为 test01~test04.jackadam.top;按实际修改 M2/M4 节点名 ylc61/ylc64) - **前置**:已按本页完成 ACME 配置,且 test01~test04.jackadam.top 已解析到入口 IP ```bash diff --git a/docs/03-03-k3s-traefik-dashboard-acme.md b/docs/03-03-k3s-traefik-dashboard-acme.md index a1126f8..6f2f683 100644 --- a/docs/03-03-k3s-traefik-dashboard-acme.md +++ b/docs/03-03-k3s-traefik-dashboard-acme.md @@ -20,7 +20,7 @@ kubectl -n kube-system create secret generic cloudflare-api-token \ > 说明:Traefik 的 `HelmChartConfig` 只能有一份,Dashboard 与 ACME 需合并在同一文件中。**ACME 配置基于 03-03 实机验证**(递归 DNS、propagation 等待、ping、PROXY protocol、nodeSelector)。 -创建 `traefik-dashboard-acme.yaml`,推荐放入 K3s manifests 目录(路径同 03-02)。**唯一真源**(已含 **`persistence`(local-path)+ ACME + Dashboard + IngressRoute**,证书落盘 `/data/acme.json`):[`traefik-dashboard-acme.yaml`](../ansible/files/traefik-dashboard-acme/traefik-dashboard-acme.yaml);复制后替换 `` 等占位符,或在仓库根执行 `kubectl apply -f ansible/files/traefik-dashboard-acme/traefik-dashboard-acme.yaml`。细节见 `03-05-k3s-local-path-pvc.md`。 +创建 `traefik-dashboard-acme.yaml`,推荐放入 K3s manifests 目录(路径同 03-02)。**唯一真源**(已含 **`persistence`(local-path)+ ACME + Dashboard + IngressRoute**,证书落盘 `/data/acme.json`):[`traefik-dashboard-acme.yaml`](../ansible/files/03-03-traefik-dashboard-acme/traefik-dashboard-acme.yaml);复制后替换 `` 等占位符,或在仓库根执行 `kubectl apply -f ansible/files/03-03-traefik-dashboard-acme/traefik-dashboard-acme.yaml`。细节见 `03-05-k3s-local-path-pvc.md`。 > 将 `` 替换为你的邮箱。正式上线前删除 `caserver` 该行即切回生产 Let's Encrypt。**ACME 排障**(DNS 解析错误、证书解析器不存在等)见 `03-02-k3s-traefik-acme.md` 中「常见问题」与「排查」小节。 @@ -76,12 +76,12 @@ kubectl -n kube-system logs deploy/traefik --tail=100 | grep -i acme > 本节给出一个**完整、独立**的 Tomcat 示例:包含 Deployment + Service + Ingress(三段 YAML),域名为 `test05.jackadam.top`。前提是已经按本页前文配置并成功加载了 ACME(`traefik-acme.yaml` 或 `traefik-dashboard-acme.yaml`)。 -1. **唯一真源**:[`ansible/files/traefik-dashboard-acme/tomcat-acme.yaml`](../ansible/files/traefik-dashboard-acme/tomcat-acme.yaml)。将其中域名改成你实际解析到集群入口 IP 的 FQDN。 +1. **唯一真源**:[`ansible/files/03-03-traefik-dashboard-acme/tomcat-acme.yaml`](../ansible/files/03-03-traefik-dashboard-acme/tomcat-acme.yaml)。将其中域名改成你实际解析到集群入口 IP 的 FQDN。 2. 应用并查看 ACME 日志 + 访问验证: ```bash -kubectl apply -f ansible/files/traefik-dashboard-acme/tomcat-acme.yaml +kubectl apply -f ansible/files/03-03-traefik-dashboard-acme/tomcat-acme.yaml # 查看 ACME 相关日志(证书申请、签发情况) kubectl -n kube-system logs deploy/traefik --tail=200 | grep -i acme || true diff --git a/docs/03-04-k3s-cloudflare-tunnel-配置接入.md b/docs/03-04-k3s-cloudflare-tunnel-配置接入.md index 4cd7d9d..947f8d6 100644 --- a/docs/03-04-k3s-cloudflare-tunnel-配置接入.md +++ b/docs/03-04-k3s-cloudflare-tunnel-配置接入.md @@ -45,7 +45,7 @@ Traefik 是唯一入口。所有流量经 Tunnel 进入后,由 Traefik 的 Ing ### 3. 部署 cloudflared 到 K3s -1. 从 **唯一真源** 复制清单:[`ansible/files/cloudflare-tunnel/cloudflared.yaml`](../ansible/files/cloudflare-tunnel/cloudflared.yaml) +1. 从 **唯一真源** 复制清单:[`ansible/files/03-04-cloudflare-tunnel/cloudflared.yaml`](../ansible/files/03-04-cloudflare-tunnel/cloudflared.yaml) 2. 将 `TUNNEL_TOKEN` 占位符替换为前述 Zero Trust 中复制的 Token 3. 应用并等待 Pod 就绪(按实际 manifests 路径选择其一): @@ -81,7 +81,7 @@ Tunnel 后端应指向 **集群内的 Traefik 入口**,常用写法: **和仓库里哪份 YAML 的关系** -- 本仓库的 [`cloudflared.yaml`](../ansible/files/cloudflare-tunnel/cloudflared.yaml) **只** 定义 `cloudflared` 的 Deployment/Secret,**不包含** Traefik Service;Tunnel 后端地址写的是 **集群里已存在的 Traefik Service**,不是 `cloudflared.yaml` 里的某一行。 +- 本仓库的 [`cloudflared.yaml`](../ansible/files/03-04-cloudflare-tunnel/cloudflared.yaml) **只** 定义 `cloudflared` 的 Deployment/Secret,**不包含** Traefik Service;Tunnel 后端地址写的是 **集群里已存在的 Traefik Service**,不是 `cloudflared.yaml` 里的某一行。 - Traefik 的 **Service** 由 K3s 内置 Traefik(HelmChart)安装时创建,资源名一般为 **`traefik`**,命名空间 **`kube-system`**。若你改过 chart 或 Service 名,以下 FQDN 与端口要以 **实际 `kubectl get svc` 输出** 为准。 **与 `kubectl get svc traefik -o yaml` 里哪些字段对应** diff --git a/docs/03-05-k3s-local-path-pvc.md b/docs/03-05-k3s-local-path-pvc.md index 3330e92..a0d386b 100644 --- a/docs/03-05-k3s-local-path-pvc.md +++ b/docs/03-05-k3s-local-path-pvc.md @@ -34,7 +34,12 @@ kubectl -n kube-system get configmap local-path-config -o yaml > /tmp/local-path kubectl -n kube-system edit configmap local-path-config ``` -配置结构示意(请与现有 JSON 合并,不要盲目整段覆盖): +**本仓库实验室真源**(四节点 **10G+32G**、K3s `--data-dir=/storage` 统一拓扑):[`ansible/files/03-05-local-path-config/local-path-config-lab.json`](../ansible/files/03-05-local-path-config/local-path-config-lab.json) —— 仅含 **`DEFAULT_PATH_FOR_NON_LISTED_NODES` → `/storage/storage`**。应用方式: + +- Ansible:`ansible-playbook -i inventory.ini playbooks/apply-local-path-config-lab.yml`,或在 `group_vars/all.yml` 设 `longhorn_apply_local_path_lab: true` 后执行 `longhorn-install.yml`(见 `01-06`、`03-07`)。 +- 手工:备份后编辑 ConfigMap,将 `config.json` 与真源 JSON 对齐,再 `rollout restart` provisioner。 + +配置结构示意(**四节点统一基路径**时只需 `DEFAULT` 一条;请与现有 JSON 合并,不要盲目整段覆盖): ```json { @@ -42,10 +47,6 @@ kubectl -n kube-system edit configmap local-path-config { "node": "DEFAULT_PATH_FOR_NON_LISTED_NODES", "paths": ["/storage/storage"] - }, - { - "node": "ylc61", - "paths": ["/data/k3s-local-path"] } ] } @@ -54,9 +55,8 @@ kubectl -n kube-system edit configmap local-path-config 说明: - `DEFAULT_PATH_FOR_NON_LISTED_NODES` 是兜底规则,不是实际节点名;凡是未单独列出的节点都走它。 -- `ylc61` 是单独覆盖规则;该节点会优先使用这里配置的基路径。 -- `paths` 是数组是因为支持“一个节点多个候选基路径”;示例里每个节点只写了一个路径。 -- 因此这段不是“一个节点两个路径”,而是“两个节点规则(默认 + 指定)”。 +- **仅当**集群里仍存在「部分节点系统盘小、部分节点有大盘」等**混合规格**时,才需要再增加按节点名的条目(例如为某主机名单独指定另一基路径);实验室四节点同拓扑时**不要**再为 `ylc61` 等单独写小盘路径,以免与 `00-04` 基线冲突。 +- `paths` 是数组是因为支持“一个节点多个候选基路径”;上例每个规则只写了一个路径。 ```bash # 3. 重启 provisioner 使配置生效 @@ -65,12 +65,12 @@ kubectl -n kube-system rollout restart deploy/local-path-provisioner 2>/dev/null ### 3) 用 demo 验证(PVC -> PV -> 节点 -> 落地目录) -Demo 清单:[`ansible/files/local-path-demo/local-path-pvc-demo.yaml`](../ansible/files/local-path-demo/local-path-pvc-demo.yaml) +Demo 清单:[`ansible/files/03-05-local-path-demo/local-path-pvc-demo.yaml`](../ansible/files/03-05-local-path-demo/local-path-pvc-demo.yaml) > 该 demo 已包含 `nodeSelector` 固定节点(示例为 `ylc61`),使用前请按你的节点主机名修改。 ```bash -kubectl apply -f ansible/files/local-path-demo/local-path-pvc-demo.yaml +kubectl apply -f ansible/files/03-05-local-path-demo/local-path-pvc-demo.yaml kubectl rollout status deploy/nginx-local-pvc-demo --timeout=180s ``` diff --git a/docs/03-06-k3s-使用nfs存储.md b/docs/03-06-k3s-使用nfs存储.md index a31a340..9adac41 100644 --- a/docs/03-06-k3s-使用nfs存储.md +++ b/docs/03-06-k3s-使用nfs存储.md @@ -1,4 +1,4 @@ -# 03-07-k3s 使用 NFS 存储 +# 03-06-k3s 使用 NFS 存储 > 本文只讲 K3s 集群侧如何使用已安装好的 NFS。 @@ -7,19 +7,167 @@ - 已完成 `01-05-armv7-nfs服务安装.md` - 可从 K3s 节点访问 NFS 服务器与导出目录 +## 方式对比(从简单到复杂) + +| 项 | 方式 1:Pod 直接挂 NFS | 方式 2:静态 NFS(PV + PVC) | 方式 3:动态 NFS(选装 provisioner) | +|---|---|---|---| +| 复杂度 | 低 | 中 | 高 | +| 最少 YAML 段数 | 1 段(Deployment/Pod) | 2 段(PV+PVC) | 通常 1 段 PVC(PV/目录动态生成) | +| 目录创建 | 需手工提前创建 | 需手工提前创建 | 通常由 provisioner 自动创建子目录 | +| 与应用耦合 | 高(写死 server/path) | 低(应用只引用 PVC) | 低(应用只引用 PVC) | +| 适用场景 | 临时验证、概念验证(先跑通) | 稳定运行、团队协作 | 大规模/多团队、追求自动化 | + ## 操作步骤 -1. 创建 NFS 类型 `PersistentVolume` -2. 创建 `PersistentVolumeClaim` -3. 在业务 Pod 中挂载 PVC +1. 按复杂度选择一种方式 +2. 应用清单 +3. 验证读写 -**唯一真源**:[`ansible/files/nfs-demo/nfs-pv-pvc-demo.yaml`](../ansible/files/nfs-demo/nfs-pv-pvc-demo.yaml)(按你的 NFS `server` / `path` 修改)。 +> 结论:K3s 使用 NFS **不需要**先把 NFS 手工挂到每台节点主机;Pod/PVC 可直接挂远端 NFS。 + +### 方式 1:Pod 直接挂 NFS(最简单,快速验证) + +> 说明:`nfs.path` 指向的目录(或子目录)需要在 NFS 服务端**提前创建并设置好权限**;K3s 不会自动在 NFS 服务端创建该目录。 +> +> 例如在 onecloud 上预创建子目录: +> +> ```bash +> sudo mkdir -p /sdcard/app-a +> sudo chown 1000:1000 /sdcard/app-a +> sudo chmod 0770 /sdcard/app-a +> ``` + +```yaml +apiVersion: apps/v1 # Deployment 使用的 API 版本 +kind: Deployment # 资源类型:Deployment +metadata: # 资源元信息 + name: nfs-direct-demo # Deployment 名称 + namespace: default # 命名空间 +spec: # Deployment 规格 + replicas: 1 # 副本数 + selector: # Pod 选择器 + matchLabels: # 标签匹配条件 + app: nfs-direct-demo # 选择带 app=nfs-direct-demo 的 Pod + template: # Pod 模板 + metadata: # Pod 元信息 + labels: # Pod 标签 + app: nfs-direct-demo # Pod 标签值 + spec: # Pod 规格 + containers: # 容器列表 + - name: app # 容器名称 + image: nginx:alpine # 容器镜像 + volumeMounts: # 容器内挂载点 + - name: nfs-data # 引用下方 volumes 的卷名 + mountPath: /usr/share/nginx/html # 挂载到容器内目录 + volumes: # Pod 卷定义 + - name: nfs-data # 卷名 + nfs: # 直接使用 NFS 卷 + server: # NFS 服务器地址(应用前替换) + path: # NFS 导出目录或子目录(应用前替换) +``` + +### 方式 2:静态 NFS(PV + PVC,推荐) + +**唯一真源**:[`ansible/files/03-06-nfs-demo/nfs-pv-pvc-demo.yaml`](../ansible/files/03-06-nfs-demo/nfs-pv-pvc-demo.yaml)。 + +> 为减少硬编码,示例清单已改为占位符:``、``。 +> 应用前必须先替换(例如 `192.168.2.22`、`/sdcard`)。 + +> 静态 NFS 同样需要在服务端提前创建目录并设置权限;不会自动创建目录。 + +### 方式 3:动态 NFS(选装 provisioner) + +这是选装增强,不是 K3s 内置默认能力。常见组件是 `nfs-subdir-external-provisioner`。 + +### 3.1 安装 provisioner(Helm) + +> 前提:NFS 服务端已可用(例如 `192.168.2.22:/sdcard`),且 K3s 节点网络可达。 + +```bash +# 1) 添加 chart 仓库 +helm repo add nfs-subdir-external-provisioner https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner/ +helm repo update + +# 2) 安装(建议放到 kube-system) +helm upgrade --install nfs-subdir-external-provisioner \ + nfs-subdir-external-provisioner/nfs-subdir-external-provisioner \ + -n kube-system --create-namespace \ + --set nfs.server=192.168.2.22 \ + --set nfs.path=/sdcard \ + --set storageClass.name=nfs-client \ + --set storageClass.defaultClass=false \ + --set storageClass.reclaimPolicy=Delete \ + --set storageClass.archiveOnDelete=true +``` + +参数说明(最常改): + +- `nfs.server`:NFS 服务器地址 +- `nfs.path`:NFS 导出根目录(provisioner 会在其下创建子目录) +- `storageClass.name`:动态供给使用的 StorageClass 名称 +- `archiveOnDelete`:删除 PVC 时是否归档目录(`true` 更稳妥,`false` 更干净) + +### 3.2 验证 provisioner 与 StorageClass + +```bash +kubectl -n kube-system get pod -l app.kubernetes.io/name=nfs-subdir-external-provisioner -o wide +kubectl get storageclass +``` + +### 3.3 用动态 PVC 验证自动建卷 + +创建一个最小 PVC(示例): + +```yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: nfs-dynamic-pvc-demo + namespace: default +spec: + accessModes: + - ReadWriteMany + storageClassName: nfs-client + resources: + requests: + storage: 5Gi +``` + +应用并验证: + +```bash +kubectl apply -f /tmp/nfs-dynamic-pvc-demo.yaml +kubectl get pvc nfs-dynamic-pvc-demo -n default +kubectl get pv | grep nfs-dynamic-pvc-demo +``` + +当 PVC `Bound` 后,通常可在 NFS 服务器的 `/sdcard` 下看到自动创建的子目录(命名规则由 provisioner 管理)。 + +### 3.5 本次实机验证记录(ylc61 + onecloud) + +- `nfs-subdir-external-provisioner` 安装成功并 `Running` +- `StorageClass nfs-client` 创建成功 +- 动态 PVC `nfs-dynamic-pvc-demo` 成功 `Bound` +- 自动创建 PV 成功,并在 NFS 服务端目录下生成子目录: + - `/sdcard/default-nfs-dynamic-pvc-demo-pvc-` +- 删除临时 PVC 后,PV 按 `Delete` 回收完成 + +> 备注:验证中曾出现过镜像拉取 `EOF`(偶发网络抖动),重试后官方镜像 `registry.k8s.io/sig-storage/nfs-subdir-external-provisioner:v4.0.2` 已可正常拉取并启动。 + +### 3.4 何时选动态 NFS + +- 目录数量多、变更频繁 +- 希望减少手工维护 PV/PVC 对应关系 +- 团队里希望应用只声明 PVC,平台统一管理 StorageClass ## 验证命令(若 YAML 在 manifests 目录,按实际路径选择其一复制执行) ```bash # 仓库根直接应用 -kubectl apply -f ansible/files/nfs-demo/nfs-pv-pvc-demo.yaml +# 先替换 ansible/files/03-06-nfs-demo/nfs-pv-pvc-demo.yaml 里的占位符 +# -> 例如 192.168.2.22 +# -> 例如 /sdcard +kubectl apply -f ansible/files/03-06-nfs-demo/nfs-pv-pvc-demo.yaml ``` ```bash @@ -36,6 +184,13 @@ kubectl get pv,pvc -A kubectl describe pv nfs-pv-demo ``` +```bash +# 对方式 1(Pod 直挂)可这样验证 +kubectl apply -f /tmp/nfs-direct-demo.yaml +kubectl rollout status deploy/nfs-direct-demo --timeout=180s +kubectl exec deploy/nfs-direct-demo -- sh -c 'echo nfs-direct-ok > /usr/share/nginx/html/nfs.txt && cat /usr/share/nginx/html/nfs.txt' +``` + ## 预期 - PV/PVC 状态为 `Bound` @@ -46,6 +201,7 @@ kubectl describe pv nfs-pv-demo - 检查 NFS 服务与导出目录权限 - 检查节点到 NFS 服务器网络 - 检查 `path` 与 `server` 配置是否正确 +- 若报 `Permission denied`,回到 `01-05` 的 `root_squash` 权限章节,确认导出目录与业务 UID/GID 对齐 ## 下一步 diff --git a/docs/03-07-k3s-longhorn-持久化存储.md b/docs/03-07-k3s-longhorn-持久化存储.md index 785fcf1..b20b651 100644 --- a/docs/03-07-k3s-longhorn-持久化存储.md +++ b/docs/03-07-k3s-longhorn-持久化存储.md @@ -1,6 +1,19 @@ -# 03-08-k3s Longhorn 持久化存储(单节点自用生产) +# 03-07-k3s Longhorn 持久化存储(4 节点实验环境) -> 适用:**没有 NFS**、希望在 K3s 中部署 GitLab 等“重状态”系统,并且能接受“单节点不做高可用、但要可重建/可备份”。 +> 本实验:**K3s 四节点集群**、**每节点 8 GiB 内存**(与下文「系统资源」建议档一致)、**磁盘基线为约 10G 系统盘 + 32G 数据盘**,**`/storage`** 必须挂在独立数据盘上(与 `/` 不同设备),详见 `00-04-部署环境说明.md`。**没有 NFS**,用 Longhorn 做集群内动态块存储;若后续要部署 GitLab 等重状态系统,可在此基础上接 PVC。副本数可按实验目标在「省空间」与「模拟高可用」之间取舍。 + +--- + +## 磁盘与 `/storage` 前提 + +Longhorn 与 K3s 的 `containerd`/镜像、local-path 都会大量占用 **`k3s_data_dir`(本仓库默认 `/storage`)**。若 `/storage` 只是根分区上的普通目录,控制节点易出现 **DiskPressure / Evicted**。 + +- **请先阅读**:[`00-04-部署环境说明.md`](00-04-部署环境说明.md)(四节点统一拓扑、自检命令、推荐 playbook 顺序)。 +- **自检**(每台节点):`mountpoint -q /storage && findmnt -n -o SOURCE /` 与 `findmnt -n -o SOURCE /storage` 输出须**不同**。 +- **Ansible**:`k3s-init-and-install.yml` 在 `k3s_verify_storage_mount: true`(`group_vars/all.yml` 默认)时会在安装前校验上述条件;可选先跑 `k3s-prepare-storage.yml` 准备第二块盘,见 `01-06-节点初始化-ansible-实践.md`。 +- Longhorn 数据目录建议为 **`/storage/longhorn`**(与 Helm `values-lab.yaml` 一致),勿与系统盘混用。 + +**容量与副本数**:每节点数据盘约 **32G** 时,`defaultReplicaCount` 为 **2 或 3** 会使同一份逻辑卷在集群内占用 **多倍物理空间**(各副本落在不同节点上各占一份),且 Longhorn 元数据与系统组件仍有开销;实验环境可先用副本 **1**,要演练跨节点冗余再调高并预留磁盘。 --- @@ -8,19 +21,19 @@ - **容器文件系统**:Pod 重建即丢,基本不可用 - **hostPath 固定目录**:能落盘,但和调度强绑定,迁移/扩缩容/备份都更麻烦 -- **local-path PVC**(`03-07`):K3s 自带,单副本够用;无快照/备份,多副本需 NFS 或 Longhorn +- **local-path PVC**(见 `03-05-k3s-local-path-pvc.md`):K3s 自带,单副本够用;无快照/备份,多副本需 NFS 或 Longhorn - **Longhorn(CSI 块存储)**:对 K8s 来说是标准 PVC;即使你只设 **副本数=1**,也能获得: - 统一的 PVC 管理与回收策略 - 快照(snapshot) - 备份(backup target,可推到对象存储) -> 重要:单节点 + 副本=1 **不是高可用**。想要节点级容灾,需要多节点副本或备份到外部介质。 +> 重要:**副本数=1** 时,卷只在集群里有一份数据,**节点故障仍可能丢卷**。四节点集群里若要演练节点级容灾,把卷的副本数调到 2 或 3,并配合备份到外部介质。 --- ## 前置条件(CentOS) -在所有计划作为 Longhorn 存储节点的机器上安装依赖(单节点就只装这一台): +在**每一台**计划参与 Longhorn 的节点上安装依赖(本实验为 **4 节点**,通常四台都要装;若你明确只让部分节点承担存储,也至少要保证这些节点已装齐): ```bash sudo yum install -y iscsi-initiator-utils nfs-utils @@ -36,8 +49,170 @@ sudo chmod 700 /storage/longhorn --- +## 系统资源(CPU / 内存) + +Longhorn **没有**在文档里给一张适用于所有场景的「每节点必须 ≥X 核、≥Y GB 内存」的固定表。安装前置、磁盘与网络等见官方 [Installation requirements(v1.7.2)](https://longhorn.io/docs/archives/1.7.2/deploy/install/) 与 [Best practices - Minimum recommended hardware](https://longhorn.io/docs/archives/1.7.2/best-practices/#minimum-recommended-hardware)(其中对**磁盘** IOPS/吞吐、SSD 建议、节点间带宽等描述较多)。下面按「能规划容量」和「能对照官方默认值」两部分说明。 + +### CPU(与官方默认机制相关) + +- 使用默认的 **V1 数据引擎**时,全局设置 **[Guaranteed Instance Manager CPU](https://longhorn.io/docs/archives/1.7.2/references/settings/#guaranteed-instance-manager-cpu)** 默认值为 **12**:表示为每个 **Instance Manager** Pod 预留该节点 **可分配 CPU(allocatable)的 12%**,用于保障引擎与副本稳定(详见 [Best practices - V1 Data Engine](https://longhorn.io/docs/archives/1.7.2/best-practices/#v1-data-engine))。 +- 节点 **CPU 越小**,在其它负载不变时,Longhorn 与业务争抢余量的压力越大;卷多、重建/备份繁忙时更明显。 + +**实验环境规划(在 K3s 与系统开销之外,仍能跑少量业务)**: + +| 档位 | 每节点 CPU(约) | 说明 | +| --- | --- | --- | +| 建议 | **≥ 4 vCPU** | 与常见实验/小型业务负载较匹配 | +| 最低(仅验证组件) | **2 vCPU** | 可能频繁触顶、调度排队;与 Instance Manager 预留叠加后余量偏紧 | + +### 内存 + +- 除 **longhorn-manager、CSI、engine-image** 等常驻组件外,**Instance Manager** 等会随卷、I/O、**副本数**、备份与重建任务变化;官方不以「每节点固定 GB」一条公式概括。 +- 若启用 **V2 数据引擎**,节点还需按官方说明预留 **Huge Pages** 等(见 [V2 前置条件](https://longhorn.io/docs/archives/1.7.2/v2-data-engine/prerequisites/)),与 V1 场景不同,勿与下表混用。 + +**实验环境规划(V1、常规用途)**: + +| 档位 | 每节点内存(约) | 说明 | +| --- | --- | --- | +| 建议 | **≥ 8 GiB** | 留给 OS、kubelet、系统 Pod、Longhorn、业务 | +| 偏低 | **4 GiB** | 仅适合极简演示;卷与业务稍多时 **OOM 风险**高 | + +**ARM64、整机只有 4 GiB 内存的设备**:Longhorn **支持** ARM64(见官方 [Best practices - Architecture](https://longhorn.io/docs/archives/1.7.2/best-practices/#architecture)),限制主要来自 **总内存与 CPU 余量**,不是「ARM 能不能装」。在 **4 GiB 整机**上还要跑 K3s 系统组件 + Longhorn DaemonSet/CSI/Instance Manager 时,**不建议**把该节点当作 Longhorn 的**常规存储节点**(更不适合再叠加重状态业务)。更稳妥的做法是:存储用 **`03-05-k3s-local-path-pvc.md`** 的 local-path 或明确路径的 hostPath;或仅在 **≥8 GiB** 的节点上跑 Longhorn,**4 GiB 的 ARM 板**尽量只做**计算节点**并在 Longhorn UI 中**禁用其磁盘/不参与副本**(若仍装 Longhorn 组件,至少减轻数据面压力)。 + +官方仓库中的「中等规模」**性能测试参考节点**为 **8 vCPU、32 GB RAM**(用于可复现压测,**不是**最低装机要求),见 [medium node spec](https://github.com/longhorn/longhorn/blob/master/scalability/reference-setup-performance-scalability-and-sizing-guidelines/public-cloud/medium-node-spec.md)。 + +### 部署后自检(推荐) + +已安装 **metrics-server** 时: + +```bash +kubectl top node +kubectl top pod -n longhorn-system +``` + +未安装时,至少查看节点 **Allocatable** 与 Longhorn Pod 分布: + +```bash +kubectl describe node +kubectl get pod -n longhorn-system -o wide +``` + +--- + +## SSH 部署试跑顺序(4×8 GiB) + +当前规格 **四台、各 8 GiB**,适合按本篇做一次完整试跑:`kubectl apply` 只在**控制节点**执行一次;**四台**都要完成 iSCSI/NFS 依赖与数据目录(见「前置条件」)。 + +与本仓库 **Ansible 清单**对应时,主机名为 **`ylc61`**(控制 / `k3s_server`)、**`ylc62`–`ylc64`**(工作节点),见 `ansible/inventory.ini`。下列命令里的 `Host` 请换成你 **SSH 已通** 的名字(或 `root@192.168.2.61` 等形式)。 + +### SSH 配置说明(本机能否直连「各节点」) + +- **`ylc61`(控制节点)**:常见做法是在本机 `~/.ssh/config` 里配置 `Host ylc61`,`IdentityFile` 指向**该节点专用私钥**(例如仓库内 `.ssh/id_ed25519_k3s_192.168.2.61`,与 `01-06` / 建链脚本一致)。配好后可 **`ssh ylc61`**,并在其上执行 **`kubectl`**(设好 `KUBECONFIG`),**不必**强求本机安装 kubectl 或直连 API Server。 +- **`ylc62`–`ylc64`(工作节点)**:`ansible/inventory.ini` 里为**每台**配置了**不同**的 `ansible_ssh_private_key_file`(如 `~/.ssh/id_ed25519_k3s_192.168.2.62` …)。若本机 `~/.ssh/config` **没有**对应 `Host ylc62` …,则 **`ssh ylc62` 会 `Permission denied`**(用错成控制节点密钥时尤其常见)。需要本机循环 SSH 四台时,请为 **62–64** 各写一段 `Host`,`IdentityFile` 与清单路径一致。 +- **只做 Longhorn 安装与排查时**:多数步骤只需 **`ssh ylc61` + `kubectl`**;只有要到**具体工作节点**执行 **`ctr` 预拉镜像**、看 **kubelet/containerd** 时,才必须能登录该节点(直连、串口、或 Ansible `-l ylc63` 等均可)。 + +`~/.ssh/config` 示例(路径按你机器上实际私钥位置修改): + +```sshconfig +Host ylc62 + HostName 192.168.2.62 + User root + IdentityFile ~/.ssh/id_ed25519_k3s_192.168.2.62 + +Host ylc63 + HostName 192.168.2.63 + User root + IdentityFile ~/.ssh/id_ed25519_k3s_192.168.2.63 + +Host ylc64 + HostName 192.168.2.64 + User root + IdentityFile ~/.ssh/id_ed25519_k3s_192.168.2.64 +``` + +### 1. 四台节点:依赖 + 目录(从办公机循环 SSH) + +```bash +for h in ylc61 ylc62 ylc63 ylc64; do + echo "=== $h ===" + ssh "$h" 'sudo bash -s' <<'REMOTE' +if command -v dnf >/dev/null 2>&1; then + dnf install -y iscsi-initiator-utils nfs-utils +else + yum install -y iscsi-initiator-utils nfs-utils +fi +systemctl enable --now iscsid +mkdir -p /storage/longhorn +chmod 700 /storage/longhorn +REMOTE +done +``` + +### 2. 控制节点:安装 Longhorn(只做一次) + +**首选:Helm + 本仓库 `values-lab.yaml`**(与 K3s 常见实践一致,版本与实验室变量集中在 `ansible/group_vars/all.yml` 的 `longhorn_chart_version`)。 + +- **Ansible(推荐)**:在控制机执行(与 `01-06` 顺序一致): + +```bash +cd ansible +ansible-playbook -i inventory.ini playbooks/longhorn-install.yml +``` + +该 playbook 会在各节点安装 iSCSI/NFS 依赖、在控制节点安装 Helm(若 `dnf/yum` 无 `helm` 包则需按 [Helm 安装文档](https://helm.sh/docs/intro/install/)手工安装后重跑)、再 `helm upgrade --install`。values 真源:[`ansible/files/03-07-longhorn/values-lab.yaml`](../ansible/files/03-07-longhorn/values-lab.yaml)。可选:`longhorn_apply_local_path_lab: true` 时一并应用 `03-05` 中的 local-path 实验室 ConfigMap。 + +- **手工 Helm**(在 **`ylc61`** 或任意已配置 `KUBECONFIG` 的机器上;kubeconfig 见 `01-01-k3s-控制节点含traefik.md`): + +```bash +ssh ylc61 +export KUBECONFIG=/etc/rancher/k3s/k3s.yaml + +helm repo add longhorn https://charts.longhorn.io +helm repo update +# 将仓库内 ansible/files/03-07-longhorn/values-lab.yaml 拷到本机路径后: +helm upgrade --install longhorn longhorn/longhorn \ + --namespace longhorn-system --create-namespace \ + -f ./values-lab.yaml \ + --version 1.7.2 \ + --wait --timeout 15m +``` + +Helm 安装时 **`values-lab.yaml` 已包含** `defaultDataPath`、`defaultReplicaCount`、默认 StorageClass 等,**一般无需**再执行下文针对 `kubectl apply` 方式的 **`default-data-path` / StorageClass patch**。 + +**备选:官方清单 `kubectl apply`**(无 Helm 或需与旧文档对齐时使用): + +```bash +export KUBECONFIG=/etc/rancher/k3s/k3s.yaml +kubectl apply -f https://raw.githubusercontent.com/longhorn/longhorn/v1.7.2/deploy/longhorn.yaml +kubectl -n longhorn-system rollout status deploy/longhorn-ui --timeout=300s +``` + +然后按下文 **「安装 Longhorn」** 中 **kubectl 路径**补 **`default-data-path` 补丁**、**默认 StorageClass** 等。若经 Windows 再 SSH 导致 JSON patch 失败,用文中 **`python3` 补丁** 段落。 + +**Longhorn UI 访问**、**手动验证**与下文各节相同。 + +### 3. 试跑成功判据(最短) + +- `kubectl -n longhorn-system get pod` 中关键 Pod 为 **Running**(四节点时 DaemonSet 应对齐节点数;若有 `ImagePullBackOff`,到**对应节点** `ctr` 预拉镜像,见「手动验证」)。 +- 能打开 UI(**port-forward** 或临时 **NodePort**),Node 页可见四节点。 +- 可选:下文「最小读写验证」PVC + Pod 写文件通过。 + +--- + ## 安装 Longhorn +**不是**在每台机器上各执行一遍安装:无论 **Helm** 还是 **`kubectl apply`**,**对整个集群只做一次**(在能访问 API 的机器上执行即可),Longhorn 的控制面、CSI、DaemonSet 等会由 Kubernetes 统一下发。 + +**每台节点**仍要做的是:前文「磁盘前提」「前置条件」里的 **OS 依赖**与 **`/storage/longhorn` 目录**(Ansible `longhorn-install.yml` 会创建)。若节点在集群内且未被 cordon,Longhorn 的 DaemonSet 往往也会在**各节点**拉起组件 Pod,因此各节点通常都要能 **拉取镜像**;只有「存数据」可以只在部分节点上开启(见下文「只让有大盘的节点承载数据」)。 + +### 首选:Helm + `values-lab.yaml` + +与上文 **「SSH 试跑顺序 §2」**一致:使用 **`ansible-playbook ... longhorn-install.yml`** 或手工 **`helm upgrade --install ... -f values-lab.yaml`**。Chart 仓库:`https://charts.longhorn.io`;values 字段以 [Longhorn Helm Chart](https://github.com/longhorn/longhorn/tree/master/chart) 当前版本为准。 + +### 备选:`kubectl apply` 官方清单 + +未使用 Helm 时: + ```bash kubectl apply -f https://raw.githubusercontent.com/longhorn/longhorn/v1.7.2/deploy/longhorn.yaml kubectl -n longhorn-system rollout status deploy/longhorn-ui @@ -50,6 +225,23 @@ kubectl -n longhorn-system patch settings.longhorn.io default-data-path \ --type=merge -p '{"value":"/storage/longhorn"}' ``` +若出现 `invalid character ':' in string escape code`(多经一层 SSH/脚本时 JSON 被错误转义),在**控制节点**用下面方式补丁(不依赖引号嵌套): + +```bash +python3 - <<'PY' +import subprocess, json +patch = json.dumps({"value": "/storage/longhorn"}) +subprocess.check_call([ + "kubectl", "-n", "longhorn-system", "patch", "settings.longhorn.io", "default-data-path", + "--type=merge", "-p", patch, +]) +subprocess.run([ + "kubectl", "-n", "longhorn-system", "get", "settings.longhorn.io", "default-data-path", + "-o", "jsonpath={.status.value}{'\n'}", +]) +PY +``` + 将 `longhorn` 设为默认 StorageClass(推荐): ```bash @@ -57,21 +249,123 @@ kubectl get storageclass kubectl patch storageclass longhorn -p '{"metadata":{"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}' ``` +若 `patch storageclass` 同样因转义失败,可改用: + +```bash +python3 - <<'PY' +import subprocess, json +p = json.dumps({"metadata": {"annotations": {"storageclass.kubernetes.io/is-default-class": "true"}}}) +subprocess.check_call(["kubectl", "patch", "storageclass", "longhorn", "-p", p]) +PY +``` + --- -## 单节点“非 HA”建议配置 + +## Longhorn UI(部署与访问) + +### 是否需要单独部署 + +**不需要。** 官方 `longhorn.yaml` 里已包含 **Deployment `longhorn-ui`** 和 **Service `longhorn-frontend`**(命名空间 `longhorn-system`)。执行上文 `kubectl apply -f .../longhorn.yaml` 后,UI 会与其它组件一起创建;安装阶段里的 `kubectl ... rollout status deploy/longhorn-ui` 就是在等 UI 就绪。 + +若 UI 长期不 Ready,优先按节点查 **镜像拉取** 与 **Pod 事件**(见下文「手动验证」)。 + +### 部署与网络对象核验 + +```bash +kubectl -n longhorn-system get deploy longhorn-ui +kubectl -n longhorn-system get svc longhorn-frontend +kubectl -n longhorn-system get pod -l app=longhorn-ui -o wide +kubectl -n longhorn-system rollout status deploy/longhorn-ui --timeout=300s +``` + +`longhorn-frontend` 默认多为 **ClusterIP**,集群外浏览器**不能直接**访问 Service VIP,需用 **port-forward**、**NodePort** 或 **Ingress** 之一。 + +### 访问方式一:`kubectl port-forward`(实验环境推荐) + +在**已配置 kubeconfig、能访问 API** 的机器上执行(可以是办公机,也可以是某台 SSH 上去的控制节点): + +```bash +kubectl -n longhorn-system port-forward svc/longhorn-frontend 8080:80 +``` + +浏览器打开:**http://127.0.0.1:8080** + +**常见场景**: + +- 只在控制节点上有 `kubectl`:先 SSH 到控制节点执行上述命令,再用浏览器访问——若浏览器在本地电脑,需 **SSH 本地转发**,例如在本机执行 + `ssh -L 8080:127.0.0.1:8080 user@控制节点`, + 然后在控制节点上 `port-forward` 绑定 `127.0.0.1:8080`,本地浏览器仍访问 `http://127.0.0.1:8080`。 +- 需要把转发绑到非本机接口时(慎用,内网限定): + `kubectl -n longhorn-system port-forward --address 0.0.0.0 svc/longhorn-frontend 8080:80` + +**无图形界面时的快速验证**(`port-forward` 占用当前终端时,另开一个终端): + +```bash +curl -sI http://127.0.0.1:8080 | head -n 5 +``` + +能返回 `HTTP/1.1 200` 或 `302` 等即说明 UI 服务已响应(具体状态码随版本可能略有不同)。 + +### 访问方式二:临时改为 NodePort(仅实验内网) + +便于用「任意节点 IP + 端口」访问,**不要对公网暴露**。 + +```bash +kubectl -n longhorn-system patch svc longhorn-frontend -p '{"spec":{"type":"NodePort"}}' +kubectl -n longhorn-system get svc longhorn-frontend +``` + +在输出里查看 `PORT(S)` 一列的 `80:3xxxx/TCP`,用浏览器访问:**`http://` + 任意节点内网 IP + `:` + NodePort 端口**。 + +实验结束后若要改回 **ClusterIP**(与默认清单一致,便于继续只用 port-forward): + +```bash +kubectl -n longhorn-system patch svc longhorn-frontend -p '{"spec":{"type":"ClusterIP"}}' +``` + +一般由 apiserver 重新分配 Cluster IP;若与预期不符,从当前版本的 `longhorn.yaml` 中仅重新 `apply` **Service `longhorn-frontend`** 那一段即可。 + +### 访问方式三:Ingress + Traefik(可选,生产需认证与 TLS) + +K3s 默认带 Traefik 时,可为 `longhorn-frontend` 建 **Ingress**,用域名访问。**生产或跨网段**务必配 **HTTPS** 与 **身份认证**(Longhorn 支持多种认证方式,以[官方文档](https://longhorn.io/docs/)当前版本为准),实验环境若仅用内网域名也至少限制来源网段。 + +### 界面内验证要点 + +打开 UI 后确认: + +- **Node** 页能看到集群节点,状态与磁盘信息合理(与 `kubectl get nodes` 对照)。 +- **Volume** 在创建测试 PVC 后出现对应卷(可与下文「最小读写验证」联动)。 +- 需要改**默认副本数**、**节点磁盘是否可调度**时,在 **Setting** / **Node** 中操作(与上文「实验环境建议配置」一致)。 + +若页面无法加载或接口报错,查看 UI 与后端: + +```bash +kubectl -n longhorn-system logs deploy/longhorn-ui --tail=80 +kubectl -n longhorn-system describe pod -l app=longhorn-ui +``` + +--- + +## 实验环境建议配置(4 节点) + +### DaemonSet 与镜像 + +- `longhorn-manager`、`engine-image` 等会以 DaemonSet 形式跑在**多个节点**上;四节点时**每个节点**都应对应有 Pod 最终就绪(若某台 `ImagePullBackOff`,先在**该节点**用 `ctr` 预拉镜像,见下文「手动验证」)。 +- 数据路径与 `default-data-path` 是**按节点**生效的:四台都要能访问你设定的目录(例如每台都有 `/storage/longhorn`,或只在有盘的节点上建目录并在 UI 里禁用无盘节点)。 ### 副本数 -- 建议将 Longhorn 的 **默认副本数**设为 1(节省空间,也符合“非 HA”定位) -- 需要迁移卷/临时容灾时,可手动把某个卷副本数调到 2,待同步完成再调回 1 +- **默认副本数=1**:省磁盘、适合先跑通功能;四节点集群里卷仍可能只落在某一个节点上。在 **每节点约 32G 数据盘** 的实验室基线下,优先用 **1** 避免多副本占满盘。 +- **默认副本数=2 或 3**:利用多节点做副本,更接近生产里的冗余;**物理占用约为逻辑卷大小 × 副本数**(再加引擎与文件系统开销),32G 盘上调高前务必在 UI 或 `kubectl` 中确认各节点剩余空间。 +- 迁移卷或临时演练容灾时,可对单个卷调高副本数,待同步完成再调回。 ### 只让“有大盘”的节点承载数据 -如果你是多节点集群但只有少数节点有 `/storage` 大盘: +若 4 节点里只有部分机器有 `/storage` 或大盘: -- 只把这些节点加入 Longhorn 可调度存储(Longhorn UI 中将其它节点的 disk 设为不可调度) -- 或者给存储节点打标签,配合工作负载的 nodeSelector/affinity(让应用尽量靠近数据) +- 在 Longhorn UI 里把无盘或不想参与存储的节点的 disk 设为**不可调度**。 +- 或给存储节点打标签,配合工作负载的 nodeSelector/affinity(让应用尽量靠近数据)。 > 注意:副本=1 时,卷不会“随使用自动从小盘迁到大盘”,需要你手动迁移或从源头限制调度。 @@ -104,11 +398,102 @@ kubectl get pvc -A kubectl get pv ``` +### 手动验证(按顺序,便于排障) + +在 **K3s 节点**上(与 kubelet 使用同一套 containerd),可先手动拉镜像,区分「网络/仓库问题」与「Longhorn 配置问题」: + +**镜像与节点**:K3s 每台节点自带 containerd,**镜像按节点本地缓存**,不会在节点间自动同步。Pod 第一次调度到某节点时,该节点会自己去仓库拉取(联网正常则不必事先每台手动 `ctr pull`)。Longhorn 的 DaemonSet 在 **4 节点**上各跑一份时,**每台**都可能要拉同一批镜像;某台若 `ImagePullBackOff`,到**该台**上执行下面的 `ctr pull` 做预热或排障。 + +```bash +# 使用 k3s 的 containerd 命名空间 +CTR="sudo ctr --address /run/k3s/containerd/containerd.sock -n k8s.io" + +$CTR images pull docker.io/longhornio/longhorn-manager:v1.7.2 +$CTR images pull docker.io/longhornio/longhorn-ui:v1.7.2 +$CTR images pull docker.io/longhornio/longhorn-share-manager:v1.7.2 +$CTR images pull docker.io/longhornio/longhorn-engine:v1.7.2 +$CTR images pull docker.io/longhornio/longhorn-instance-manager:v1.7.2 +$CTR images pull docker.io/longhornio/backing-image-manager:v1.7.2 + +# 其余 longhornio/* 镜像以你安装的版本清单为准(避免 tag 与文档漂移): +# curl -sSL https://raw.githubusercontent.com/longhorn/longhorn/v1.7.2/deploy/longhorn.yaml | grep -E 'image:.*longhornio' | sort -u + +# 对照:非 Longhorn 镜像(判断是否为 Docker Hub 整体不通) +$CTR images pull docker.io/library/nginx:alpine +``` + +确认控制面与 DaemonSet(`longhorn-manager` 每节点一份;**4 节点**时应看到与节点数相关的 Pod 分布,`engine-image` 等会陆续就绪): + +```bash +kubectl -n longhorn-system get pod,deploy,ds -o wide +kubectl get nodes -o wide +kubectl -n longhorn-system rollout status deploy/longhorn-ui --timeout=300s +kubectl -n longhorn-system rollout status deploy/longhorn-driver-deployer --timeout=300s +``` + +确认默认数据路径与 StorageClass: + +```bash +kubectl -n longhorn-system get settings.longhorn.io default-data-path -o yaml +kubectl get storageclass +``` + +若 `longhorn` 与 `local-path` 同时带有 `storageclass.kubernetes.io/is-default-class: "true"`,未写 `storageClassName` 的 PVC 行为可能不符合预期;建议只保留一个默认类,或业务 PVC 显式写 `storageClassName: longhorn`。 + +**最小读写验证**(动态卷 + 临时 Pod): + +```bash +kubectl apply -f - <<'EOF' +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: longhorn-pvc-smoke + namespace: default +spec: + accessModes: + - ReadWriteOnce + storageClassName: longhorn + resources: + requests: + storage: 2Gi +--- +apiVersion: v1 +kind: Pod +metadata: + name: longhorn-smoke + namespace: default +spec: + containers: + - name: app + image: busybox:1.36 + command: ["sh", "-c", "echo ok-longhorn > /data/test.txt && sleep 3600"] + volumeMounts: + - name: data + mountPath: /data + volumes: + - name: data + persistentVolumeClaim: + claimName: longhorn-pvc-smoke +EOF + +kubectl wait --for=condition=Ready pod/longhorn-smoke -n default --timeout=180s +kubectl exec -n default longhorn-smoke -- cat /data/test.txt +``` + +清理冒烟资源: + +```bash +kubectl delete pod longhorn-smoke -n default --ignore-not-found +kubectl delete pvc longhorn-pvc-smoke -n default --ignore-not-found +``` + +Longhorn UI 的部署核对、port-forward、NodePort 与界面内验证要点见上文 **「Longhorn UI(部署与访问)」**。 + --- ## 下一步 - `03-05-k3s-local-path-pvc.md`:单副本、无快照需求时,用 K3s 自带 local-path 即可 -- 返回 `03-09-k3s-gitops-集群配置管理.md` 或进入业务部署(如 GitLab)章节 +- 返回后续 GitOps / 业务部署(如 GitLab)章节 diff --git a/docs/03-10-k3s-traefik-custom-ports.md b/docs/03-10-k3s-traefik-custom-ports.md new file mode 100644 index 0000000..25e8d23 --- /dev/null +++ b/docs/03-10-k3s-traefik-custom-ports.md @@ -0,0 +1,53 @@ +# 03-10-k3s-traefik-custom-ports + +> 为 K3s 内置 Traefik 增加 **自定义 entrypoints**(额外暴露端口),用于多入口/旁路调试/特定设备转发等场景。 + +## 前置条件 + +- 已完成 `01-01-k3s-控制节点含traefik.md`,集群内 `kube-system` 命名空间的 Traefik 正常运行。 +- 了解 Traefik 的 entrypoints 概念(`web`/`websecure`)。 + +## 清单路径(唯一真源) + +| 项 | 路径 | +|----|------| +| Traefik 自定义端口(HelmChartConfig) | [`ansible/files/03-10-traefik-custom-ports/traefik-custom-ports.yaml`](../ansible/files/03-10-traefik-custom-ports/traefik-custom-ports.yaml) | +| 应用 | `kubectl apply -f ansible/files/03-10-traefik-custom-ports/traefik-custom-ports.yaml` | +| 删除 | `kubectl delete -f ansible/files/03-10-traefik-custom-ports/traefik-custom-ports.yaml` | + +## 做了什么 + +该清单通过 `HelmChartConfig` 给 K3s 内置 Traefik 注入 values,新增两个示例入口: + +- `web18080`:HTTP 端口 `18080` +- `websecure18443`:HTTPS 端口 `18443` + +你可以按需要改 entrypoint 名称与端口号。 + +## 部署与验证 + +```bash +kubectl apply -f ansible/files/03-10-traefik-custom-ports/traefik-custom-ports.yaml + +# 等待 Traefik 重载(可能触发 rollout) +kubectl -n kube-system rollout status deploy/traefik --timeout=180s + +# 验证 Service 端口是否暴露(不同版本/安装方式字段略有差异,以输出为准) +kubectl -n kube-system get svc traefik -o wide +kubectl -n kube-system describe svc traefik | sed -n '/Ports:/,/Selector:/p' +``` + +预期:`traefik` Service 出现新增端口(如 `18080/TCP`、`18443/TCP`)或对应的暴露配置生效;若节点/上游负载均衡允许访问,则可在对应端口访问路由(路由规则仍由 Ingress/IngressRoute 决定)。 + +## 失败排查 + +- **端口没有出现**:确认清单应用在 `kube-system`,并且 `metadata.name: traefik` 匹配 K3s 内置 chart 名称;查看 Traefik Pod 日志与 Deployment 是否滚动更新。 +- **端口出现但访问不通**:检查节点防火墙/上游转发(如 OpenWrt/HAProxy)、以及 Traefik Service 类型(ClusterIP/NodePort/LoadBalancer)对外可达性。 +- **回滚**:执行删除命令后等待 Traefik rollout 结束。 + +## 相关文档 + +- `03-01-k3s-traefik-dashboard.md` +- `03-02-k3s-traefik-acme.md` +- `01-07-openwrt-haproxy.md` + diff --git a/docs/04-01-k3s-nodejs-高级部署.md b/docs/04-01-k3s-nodejs-高级部署.md index 65ceeea..3cd2f97 100644 --- a/docs/04-01-k3s-nodejs-高级部署.md +++ b/docs/04-01-k3s-nodejs-高级部署.md @@ -22,11 +22,11 @@ | 项 | 路径 / 命令 | |----|-------------| -| 清单文件 | [`ansible/files/nodejs-demo/04-01-nodejs-demo.yaml`](../ansible/files/nodejs-demo/04-01-nodejs-demo.yaml) | -| 手工应用 | `kubectl apply -f ansible/files/nodejs-demo/04-01-nodejs-demo.yaml` | +| 清单文件 | [`ansible/files/04-01-nodejs-demo/04-01-nodejs-demo.yaml`](../ansible/files/04-01-nodejs-demo/04-01-nodejs-demo.yaml) | +| 手工应用 | `kubectl apply -f ansible/files/04-01-nodejs-demo/04-01-nodejs-demo.yaml` | | Ansible | `ansible-playbook -i ansible/inventory.ini ansible/playbooks/nodejs-demo-apply.yml -e nodejs_demo_manifest=04-01-nodejs-demo.yaml` | -索引与累积说明见 [`ansible/files/nodejs-demo/README.md`](../ansible/files/nodejs-demo/README.md)。 +索引与累积说明见 [`ansible/files/04-01-nodejs-demo/README.md`](../ansible/files/04-01-nodejs-demo/README.md)。 ### 相对上游 @@ -41,7 +41,7 @@ 应用方式: ```bash -kubectl apply -f ansible/files/nodejs-demo/04-01-nodejs-demo.yaml +kubectl apply -f ansible/files/04-01-nodejs-demo/04-01-nodejs-demo.yaml ``` ## 基础验证 @@ -61,7 +61,7 @@ curl -s --max-time 3 http://192.168.2.62/node/ ## 部署阶段扩展(分项导航) -在本文 `nodejs-demo` 基线上按主题增量实践(建议顺序大致由上到下)。**每篇分项均链接到 `ansible/files/nodejs-demo/` 下累积清单**,并附 **相对上一篇的变更表**;与 [`ansible/playbooks/nodejs-demo-apply.yml`](../ansible/playbooks/nodejs-demo-apply.yml) 共用。 +在本文 `nodejs-demo` 基线上按主题增量实践(建议顺序大致由上到下)。**每篇分项均链接到 `ansible/files/04-01-nodejs-demo/` 下累积清单**,并附 **相对上一篇的变更表**;与 [`ansible/playbooks/nodejs-demo-apply.yml`](../ansible/playbooks/nodejs-demo-apply.yml) 共用。 - `04-02-nodejs-镜像与运行命令.md`:镜像 tag、`imagePullPolicy`、`command`/`args` - `04-03-nodejs-环境变量与配置注入.md`:ConfigMap/Secret、`env`/`envFrom` diff --git a/docs/04-02-nodejs-镜像与运行命令.md b/docs/04-02-nodejs-镜像与运行命令.md index 1927c86..2a3c8a8 100644 --- a/docs/04-02-nodejs-镜像与运行命令.md +++ b/docs/04-02-nodejs-镜像与运行命令.md @@ -10,8 +10,8 @@ | 项 | 路径 / 命令 | |----|-------------| -| 本篇完整清单(累积至 04-02) | [`ansible/files/nodejs-demo/04-02-nodejs-demo.yaml`](../ansible/files/nodejs-demo/04-02-nodejs-demo.yaml) | -| 手工应用 | `kubectl apply -f ansible/files/nodejs-demo/04-02-nodejs-demo.yaml` | +| 本篇完整清单(累积至 04-02) | [`ansible/files/04-01-nodejs-demo/04-02-nodejs-demo.yaml`](../ansible/files/04-01-nodejs-demo/04-02-nodejs-demo.yaml) | +| 手工应用 | `kubectl apply -f ansible/files/04-01-nodejs-demo/04-02-nodejs-demo.yaml` | | Ansible | `ansible-playbook -i ansible/inventory.ini ansible/playbooks/nodejs-demo-apply.yml -e nodejs_demo_manifest=04-02-nodejs-demo.yaml` | 若你更喜欢命令行换镜像,文末也给了 **`kubectl set image`**,可不改仓库清单。 @@ -34,7 +34,7 @@ 应用: ```bash -kubectl apply -f ansible/files/nodejs-demo/04-02-nodejs-demo.yaml +kubectl apply -f ansible/files/04-01-nodejs-demo/04-02-nodejs-demo.yaml # 或仅打补丁(示意) kubectl set image deployment/nodejs-demo nodejs-demo=node:18.20-alpine -n default ``` diff --git a/docs/04-03-nodejs-环境变量与配置注入.md b/docs/04-03-nodejs-环境变量与配置注入.md index 3cf5a94..2ab4656 100644 --- a/docs/04-03-nodejs-环境变量与配置注入.md +++ b/docs/04-03-nodejs-环境变量与配置注入.md @@ -10,9 +10,9 @@ | 项 | 路径 / 命令 | |----|-------------| -| 本篇完整清单(累积至 04-03,含 ConfigMap + Deployment + Service + Ingress) | [`ansible/files/nodejs-demo/04-03-nodejs-demo.yaml`](../ansible/files/nodejs-demo/04-03-nodejs-demo.yaml) | -| Secret 示例(勿提交真密钥) | [`ansible/files/nodejs-demo/nodejs-demo-secret.example.yaml`](../ansible/files/nodejs-demo/nodejs-demo-secret.example.yaml) | -| 手工应用 | `kubectl apply -f ansible/files/nodejs-demo/04-03-nodejs-demo.yaml` | +| 本篇完整清单(累积至 04-03,含 ConfigMap + Deployment + Service + Ingress) | [`ansible/files/04-01-nodejs-demo/04-03-nodejs-demo.yaml`](../ansible/files/04-01-nodejs-demo/04-03-nodejs-demo.yaml) | +| Secret 示例(勿提交真密钥) | [`ansible/files/04-01-nodejs-demo/nodejs-demo-secret.example.yaml`](../ansible/files/04-01-nodejs-demo/nodejs-demo-secret.example.yaml) | +| 手工应用 | `kubectl apply -f ansible/files/04-01-nodejs-demo/04-03-nodejs-demo.yaml` | | Ansible | `ansible-playbook ... -e nodejs_demo_manifest=04-03-nodejs-demo.yaml` | ## 场景说明(白话) @@ -32,7 +32,7 @@ 应用: ```bash -kubectl apply -f ansible/files/nodejs-demo/04-03-nodejs-demo.yaml +kubectl apply -f ansible/files/04-01-nodejs-demo/04-03-nodejs-demo.yaml ``` ## 验证 @@ -45,7 +45,7 @@ curl -s --max-time 3 http://<节点IP>/node/ ## Secret 示例(仅示意) -**说明**:示例文件为 [`nodejs-demo-secret.example.yaml`](../ansible/files/nodejs-demo/nodejs-demo-secret.example.yaml);也可 `kubectl create secret generic ...`。在 Pod 中用 `env.valueFrom.secretKeyRef` 引用;验证 `printenv API_TOKEN`(注意日志勿打印密钥)。 +**说明**:示例文件为 [`nodejs-demo-secret.example.yaml`](../ansible/files/04-01-nodejs-demo/nodejs-demo-secret.example.yaml);也可 `kubectl create secret generic ...`。在 Pod 中用 `env.valueFrom.secretKeyRef` 引用;验证 `printenv API_TOKEN`(注意日志勿打印密钥)。 ## 删除 diff --git a/docs/04-04-nodejs-端口与Service.md b/docs/04-04-nodejs-端口与Service.md index 6ecaa12..a08a01f 100644 --- a/docs/04-04-nodejs-端口与Service.md +++ b/docs/04-04-nodejs-端口与Service.md @@ -10,8 +10,8 @@ | 项 | 路径 | |----|------| -| 本篇完整清单(累积至 04-04) | [`ansible/files/nodejs-demo/04-04-nodejs-demo.yaml`](../ansible/files/nodejs-demo/04-04-nodejs-demo.yaml) | -| 应用 | `kubectl apply -f ansible/files/nodejs-demo/04-04-nodejs-demo.yaml` | +| 本篇完整清单(累积至 04-04) | [`ansible/files/04-01-nodejs-demo/04-04-nodejs-demo.yaml`](../ansible/files/04-01-nodejs-demo/04-04-nodejs-demo.yaml) | +| 应用 | `kubectl apply -f ansible/files/04-01-nodejs-demo/04-04-nodejs-demo.yaml` | 自 **04-04** 起,累积清单中应用监听 **8080**(与 `04-01` 文档中的 3000 不同,便于与后续探针、分项对齐)。 @@ -45,7 +45,7 @@ ## 部署与验证 ```bash -kubectl apply -f ansible/files/nodejs-demo/04-04-nodejs-demo.yaml +kubectl apply -f ansible/files/04-01-nodejs-demo/04-04-nodejs-demo.yaml kubectl get svc nodejs-demo -n default -o wide kubectl get endpoints nodejs-demo -n default curl -s --max-time 3 http://<节点IP>/node/ diff --git a/docs/04-05-nodejs-资源请求与限制.md b/docs/04-05-nodejs-资源请求与限制.md index 7b7a10f..d120d6d 100644 --- a/docs/04-05-nodejs-资源请求与限制.md +++ b/docs/04-05-nodejs-资源请求与限制.md @@ -8,8 +8,8 @@ ## 清单路径(唯一真源) -| 本篇完整清单 | [`ansible/files/nodejs-demo/04-05-nodejs-demo.yaml`](../ansible/files/nodejs-demo/04-05-nodejs-demo.yaml) | -| 应用 | `kubectl apply -f ansible/files/nodejs-demo/04-05-nodejs-demo.yaml` | +| 本篇完整清单 | [`ansible/files/04-01-nodejs-demo/04-05-nodejs-demo.yaml`](../ansible/files/04-01-nodejs-demo/04-05-nodejs-demo.yaml) | +| 应用 | `kubectl apply -f ansible/files/04-01-nodejs-demo/04-05-nodejs-demo.yaml` | ## 场景说明(白话) @@ -26,7 +26,7 @@ ## 部署与验证 ```bash -kubectl apply -f ansible/files/nodejs-demo/04-05-nodejs-demo.yaml +kubectl apply -f ansible/files/04-01-nodejs-demo/04-05-nodejs-demo.yaml kubectl describe pod -l app=nodejs-demo -n default | grep -A5 "Limits\|Requests" ``` diff --git a/docs/04-06-nodejs-探针与健康检查.md b/docs/04-06-nodejs-探针与健康检查.md index fb9f0b2..5acad9d 100644 --- a/docs/04-06-nodejs-探针与健康检查.md +++ b/docs/04-06-nodejs-探针与健康检查.md @@ -8,8 +8,8 @@ ## 清单路径(唯一真源) -| 本篇完整清单 | [`ansible/files/nodejs-demo/04-06-nodejs-demo.yaml`](../ansible/files/nodejs-demo/04-06-nodejs-demo.yaml) | -| 应用 | `kubectl apply -f ansible/files/nodejs-demo/04-06-nodejs-demo.yaml` | +| 本篇完整清单 | [`ansible/files/04-01-nodejs-demo/04-06-nodejs-demo.yaml`](../ansible/files/04-01-nodejs-demo/04-06-nodejs-demo.yaml) | +| 应用 | `kubectl apply -f ansible/files/04-01-nodejs-demo/04-06-nodejs-demo.yaml` | 探针端口与累积清单一致,为 **8080**(自 `04-04` 起)。 @@ -34,7 +34,7 @@ Kubernetes 会**周期性访问**你指定的地址,判断容器该不该重 ## 部署与验证 ```bash -kubectl apply -f ansible/files/nodejs-demo/04-06-nodejs-demo.yaml +kubectl apply -f ansible/files/04-01-nodejs-demo/04-06-nodejs-demo.yaml kubectl describe pod -l app=nodejs-demo -n default | sed -n '/Liveness/,/Events/p' kubectl get endpoints nodejs-demo -n default ``` diff --git a/docs/04-07-nodejs-调度与亲和.md b/docs/04-07-nodejs-调度与亲和.md index 5270822..550806f 100644 --- a/docs/04-07-nodejs-调度与亲和.md +++ b/docs/04-07-nodejs-调度与亲和.md @@ -8,8 +8,8 @@ ## 清单路径(唯一真源) -| 本篇完整清单 | [`ansible/files/nodejs-demo/04-07-nodejs-demo.yaml`](../ansible/files/nodejs-demo/04-07-nodejs-demo.yaml) | -| 应用 | `kubectl apply -f ansible/files/nodejs-demo/04-07-nodejs-demo.yaml` | +| 本篇完整清单 | [`ansible/files/04-01-nodejs-demo/04-07-nodejs-demo.yaml`](../ansible/files/04-01-nodejs-demo/04-07-nodejs-demo.yaml) | +| 应用 | `kubectl apply -f ansible/files/04-01-nodejs-demo/04-07-nodejs-demo.yaml` | 清单中默认 `nodeSelector: kubernetes.io/hostname: ylc62`,请改为本集群节点名。 @@ -38,7 +38,7 @@ ## 部署与验证 ```bash -kubectl apply -f ansible/files/nodejs-demo/04-07-nodejs-demo.yaml +kubectl apply -f ansible/files/04-01-nodejs-demo/04-07-nodejs-demo.yaml kubectl get pod -l app=nodejs-demo -n default -o wide ``` diff --git a/docs/04-08-nodejs-安全上下文.md b/docs/04-08-nodejs-安全上下文.md index df64514..1ec764a 100644 --- a/docs/04-08-nodejs-安全上下文.md +++ b/docs/04-08-nodejs-安全上下文.md @@ -9,8 +9,8 @@ ## 清单路径(唯一真源) -| 本篇完整清单 | [`ansible/files/nodejs-demo/04-08-nodejs-demo.yaml`](../ansible/files/nodejs-demo/04-08-nodejs-demo.yaml) | -| 应用 | `kubectl apply -f ansible/files/nodejs-demo/04-08-nodejs-demo.yaml` | +| 本篇完整清单 | [`ansible/files/04-01-nodejs-demo/04-08-nodejs-demo.yaml`](../ansible/files/04-01-nodejs-demo/04-08-nodejs-demo.yaml) | +| 应用 | `kubectl apply -f ansible/files/04-01-nodejs-demo/04-08-nodejs-demo.yaml` | ## 场景说明(白话) @@ -31,7 +31,7 @@ ## 部署与验证 ```bash -kubectl apply -f ansible/files/nodejs-demo/04-08-nodejs-demo.yaml +kubectl apply -f ansible/files/04-01-nodejs-demo/04-08-nodejs-demo.yaml kubectl get pod -l app=nodejs-demo -n default kubectl exec deploy/nodejs-demo -n default -- id ``` diff --git a/docs/04-09-nodejs-存储与卷.md b/docs/04-09-nodejs-存储与卷.md index c94ecf2..01a64d6 100644 --- a/docs/04-09-nodejs-存储与卷.md +++ b/docs/04-09-nodejs-存储与卷.md @@ -9,8 +9,8 @@ ## 清单路径(唯一真源) -| 本篇完整清单(含 PVC + `/data` 挂载,默认 `storageClassName: local-path`) | [`ansible/files/nodejs-demo/04-09-nodejs-demo.yaml`](../ansible/files/nodejs-demo/04-09-nodejs-demo.yaml) | -| 应用 | `kubectl apply -f ansible/files/nodejs-demo/04-09-nodejs-demo.yaml` | +| 本篇完整清单(含 PVC + `/data` 挂载,默认 `storageClassName: local-path`) | [`ansible/files/04-01-nodejs-demo/04-09-nodejs-demo.yaml`](../ansible/files/04-01-nodejs-demo/04-09-nodejs-demo.yaml) | +| 应用 | `kubectl apply -f ansible/files/04-01-nodejs-demo/04-09-nodejs-demo.yaml` | emptyDir、仅 ConfigMap 卷等变体可在该清单基础上自行删减 PVC 与 `volumeMounts` 做实验。 @@ -28,12 +28,12 @@ emptyDir、仅 ConfigMap 卷等变体可在该清单基础上自行删减 PVC | `volumeMounts` | 仅 `/tmp` | 增加 `/data` | | `volumes` | 仅 `tmp` emptyDir | 增加 `persistentVolumeClaim` | -**emptyDir 缓存卷**、**ConfigMap 只读挂载** 的片段写法见 Kubernetes 文档;可在 [`04-09-nodejs-demo.yaml`](../ansible/files/nodejs-demo/04-09-nodejs-demo.yaml) 上自行合并实验。 +**emptyDir 缓存卷**、**ConfigMap 只读挂载** 的片段写法见 Kubernetes 文档;可在 [`04-09-nodejs-demo.yaml`](../ansible/files/04-01-nodejs-demo/04-09-nodejs-demo.yaml) 上自行合并实验。 ## 部署与验证 ```bash -kubectl apply -f ansible/files/nodejs-demo/04-09-nodejs-demo.yaml +kubectl apply -f ansible/files/04-01-nodejs-demo/04-09-nodejs-demo.yaml kubectl get pvc -n default kubectl exec deploy/nodejs-demo -n default -- df -h /data ``` diff --git a/docs/04-10-nodejs-Ingress与Traefik.md b/docs/04-10-nodejs-Ingress与Traefik.md index 7d2af83..89b54f2 100644 --- a/docs/04-10-nodejs-Ingress与Traefik.md +++ b/docs/04-10-nodejs-Ingress与Traefik.md @@ -8,8 +8,8 @@ ## 清单路径(唯一真源) -| 本篇完整清单(含 Ingress `host` + `/api`) | [`ansible/files/nodejs-demo/04-10-nodejs-demo.yaml`](../ansible/files/nodejs-demo/04-10-nodejs-demo.yaml) | -| 应用 | `kubectl apply -f ansible/files/nodejs-demo/04-10-nodejs-demo.yaml` | +| 本篇完整清单(含 Ingress `host` + `/api`) | [`ansible/files/04-01-nodejs-demo/04-10-nodejs-demo.yaml`](../ansible/files/04-01-nodejs-demo/04-10-nodejs-demo.yaml) | +| 应用 | `kubectl apply -f ansible/files/04-01-nodejs-demo/04-10-nodejs-demo.yaml` | `host` / `path` 可按环境修改清单;`curl` 用 IP 访问时需带 **`Host`** 头。 @@ -44,7 +44,7 @@ Traefik 原生 CRD 可做中间件、多规则组合等;集群需已安装对 ## 部署与验证 ```bash -kubectl apply -f ansible/files/nodejs-demo/04-10-nodejs-demo.yaml +kubectl apply -f ansible/files/04-01-nodejs-demo/04-10-nodejs-demo.yaml kubectl describe ing nodejs-demo -n default # --- 情况 A:仍是 04-01 的 Ingress(无 rules.host,path=/node)--- diff --git a/docs/04-11-nodejs-副本与滚动发布.md b/docs/04-11-nodejs-副本与滚动发布.md index 84dcd76..cb98f31 100644 --- a/docs/04-11-nodejs-副本与滚动发布.md +++ b/docs/04-11-nodejs-副本与滚动发布.md @@ -9,8 +9,8 @@ ## 清单路径(唯一真源) -| 本篇完整清单 | [`ansible/files/nodejs-demo/04-11-nodejs-demo.yaml`](../ansible/files/nodejs-demo/04-11-nodejs-demo.yaml) | -| 应用 | `kubectl apply -f ansible/files/nodejs-demo/04-11-nodejs-demo.yaml` | +| 本篇完整清单 | [`ansible/files/04-01-nodejs-demo/04-11-nodejs-demo.yaml`](../ansible/files/04-01-nodejs-demo/04-11-nodejs-demo.yaml) | +| 应用 | `kubectl apply -f ansible/files/04-01-nodejs-demo/04-11-nodejs-demo.yaml` | `replicas` 与 `strategy` 在 **Deployment.spec** 下,与 `selector` / `template` 同级。 diff --git a/docs/04-12-nodejs-TLS与证书.md b/docs/04-12-nodejs-TLS与证书.md index 755cbc7..96e9660 100644 --- a/docs/04-12-nodejs-TLS与证书.md +++ b/docs/04-12-nodejs-TLS与证书.md @@ -9,8 +9,8 @@ ## 清单路径(唯一真源) -| 本篇完整清单(Ingress 已切 **websecure** + `spec.tls`;**不含** Secret 内容) | [`ansible/files/nodejs-demo/04-12-nodejs-demo.yaml`](../ansible/files/nodejs-demo/04-12-nodejs-demo.yaml) | -| 应用 | 先创建 TLS Secret(见下),再 `kubectl apply -f ansible/files/nodejs-demo/04-12-nodejs-demo.yaml` | +| 本篇完整清单(Ingress 已切 **websecure** + `spec.tls`;**不含** Secret 内容) | [`ansible/files/04-01-nodejs-demo/04-12-nodejs-demo.yaml`](../ansible/files/04-01-nodejs-demo/04-12-nodejs-demo.yaml) | +| 应用 | 先创建 TLS Secret(见下),再 `kubectl apply -f ansible/files/04-01-nodejs-demo/04-12-nodejs-demo.yaml` | **证书 Secret**:使用命令创建(不提交私钥到 Git): diff --git a/docs/04-13-nodejs-HPA.md b/docs/04-13-nodejs-HPA.md index 471360b..35b7047 100644 --- a/docs/04-13-nodejs-HPA.md +++ b/docs/04-13-nodejs-HPA.md @@ -10,8 +10,8 @@ ## 清单路径(唯一真源) -| 本篇完整清单(含 Deployment/Service/Ingress/PVC/CM + **HPA**) | [`ansible/files/nodejs-demo/04-13-nodejs-demo.yaml`](../ansible/files/nodejs-demo/04-13-nodejs-demo.yaml) | -| 应用 | `kubectl apply -f ansible/files/nodejs-demo/04-13-nodejs-demo.yaml`(若用 `04-12`,需先有 TLS Secret) | +| 本篇完整清单(含 Deployment/Service/Ingress/PVC/CM + **HPA**) | [`ansible/files/04-01-nodejs-demo/04-13-nodejs-demo.yaml`](../ansible/files/04-01-nodejs-demo/04-13-nodejs-demo.yaml) | +| 应用 | `kubectl apply -f ansible/files/04-01-nodejs-demo/04-13-nodejs-demo.yaml`(若用 `04-12`,需先有 TLS Secret) | ## 场景说明(白话) @@ -27,7 +27,7 @@ ## 部署与验证 ```bash -kubectl apply -f ansible/files/nodejs-demo/04-13-nodejs-demo.yaml +kubectl apply -f ansible/files/04-01-nodejs-demo/04-13-nodejs-demo.yaml kubectl get hpa -n default kubectl describe hpa nodejs-demo -n default ``` diff --git a/docs/04-14-nodejs-GitOps与CI流水线.md b/docs/04-14-nodejs-GitOps与CI流水线.md index a093a14..a2131ee 100644 --- a/docs/04-14-nodejs-GitOps与CI流水线.md +++ b/docs/04-14-nodejs-GitOps与CI流水线.md @@ -10,7 +10,7 @@ ## 清单与仓库(唯一真源) - **本文无独立流水线 YAML**(GitLab CI、Argo CD、Flux 随版本变化大);流程见 **`05-04`**、**`03-09`**。 -- **应用清单真源**:[`ansible/files/nodejs-demo/`](../ansible/files/nodejs-demo/)(例如 `04-01-nodejs-demo.yaml`)。将 **该目录或单文件** 纳入 Git,由 CI 改 `image:` tag 或由 GitOps 同步到集群。 +- **应用清单真源**:[`ansible/files/04-01-nodejs-demo/`](../ansible/files/04-01-nodejs-demo/)(例如 `04-01-nodejs-demo.yaml`)。将 **该目录或单文件** 纳入 Git,由 CI 改 `image:` tag 或由 GitOps 同步到集群。 ## 场景说明(白话) diff --git a/docs/05-01-k3s-部署homer首页面板.md b/docs/05-01-k3s-部署homer首页面板.md index d41bf06..78e7c2b 100644 --- a/docs/05-01-k3s-部署homer首页面板.md +++ b/docs/05-01-k3s-部署homer首页面板.md @@ -4,6 +4,15 @@ --- +## Homer 相对「纯书签」的优势 + +- **不仅是跳转链接**:普通条目点击后打开 `url`;此外 Homer 支持多种**集成卡片类型**,由浏览器侧按配置去请求你填写的 **目标地址**,在首页上直接看到**简单、只读的实时反馈**(例如是否在线、几项系统指标),无需再开一个监控大盘。 +- **与目标服务的「轻量交互」**:在 `config.yml` 里为条目声明 `type` 后,Homer 会对对应 `url` 发起约定格式的 HTTP 请求(由该类型定义),把返回结果渲染成卡片上的小部件;**不是**任意网站都能自动对接,目标需实现该类型要求的接口(如 Glances 的 Web/API)。 +- **配置仍集中、易维护**:交互逻辑由 Homer 内置,你只维护 **YAML 里的 `url` / `type` / 少量参数**;适合家庭实验室「一眼看状态、一点进服务」。 +- **与重型监控栈的关系**:这类交互偏**轻量展示**;若要做告警、长期存储、复杂查询,仍建议配合 Prometheus/Grafana 等,Homer 作为入口与概览即可。 + +--- + ## 部署思路 - Homer 作为普通 Web 应用运行在 K3s @@ -15,10 +24,61 @@ ```bash kubectl create ns homer -kubectl apply -f ansible/files/homer/homer.yaml +kubectl apply -f ansible/files/05-01-homer/homer.yaml ``` -**唯一真源**:[`ansible/files/homer/homer.yaml`](../ansible/files/homer/homer.yaml)(Deployment + Service + Ingress;按需改 `host`)。 +**唯一真源**:[`ansible/files/05-01-homer/homer.yaml`](../ansible/files/05-01-homer/homer.yaml)(ConfigMap + Deployment + Service + Ingress)。 + +### 自定义导航(config.yml) + +- 清单内 **ConfigMap `homer-config`** 的键 **`config.yml`** 即 Homer 主配置;**所有书签/分组只改这一段**,不必为每个链接单独写 Kubernetes YAML。 +- 镜像约定:该文件挂载到容器内 **`/www/assets/config.yml`**(与 [b4bz/homer](https://hub.docker.com/r/b4bz/homer/) 说明一致)。 +- Deployment 已设 **`INIT_ASSETS=0`**,避免启动脚本覆盖你提供的 `config.yml`。 +- 修改后重新应用并滚动 Pod 生效: + +```bash +kubectl apply -f ansible/files/05-01-homer/homer.yaml +kubectl -n homer rollout restart deploy/homer +``` + +若只想用镜像默认页:按 `homer.yaml` 文件头注释删除 ConfigMap,并去掉 Deployment 中的 `env` / `volumes` / `volumeMounts`。 + +### `type: Glances` 卡片(目标机 CPU / 内存等) + +在 `config.yml` 里可配置 **Glances** 类型条目,用于展示**某台机器**的系统指标(非普通超链接卡片)。 + +**示例(摘自 Homer 文档形态,按需改 `url`):** + +```yaml +- name: "System Metrics" + type: "Glances" + icon: "fa-solid fa-heart-pulse" + url: "https://glances.example.com" # 须指向 Glances 提供的 Web/API 基址 + stats: [cpu, mem] # 可选:load, cpu, mem, swap(均来自下方「目标」) +``` + +**在目标机用 Docker 跑 Glances(Web 模式)示例**(Homer 的 `url` 需指向该服务可访问地址,默认端口以镜像说明为准): + +```yaml +services: + glances: + image: nicolargo/glances:latest + container_name: glances + environment: + - TZ=Asia/Shanghai + - GLANCES_OPT=-w + ports: + - "61208:61208" + restart: unless-stopped +``` + +**要点:** + +- **`url` 指向谁,图上的 CPU / 内存 / swap / load 就是谁的数据**——即运行 **Glances** 并对外暴露接口的那台主机(或该进程的监听地址),**不是** Homer Pod 自己的指标,也不是任意 `https://my-service` 自动就能出数。 +- **`stats`**:从该 Glances **目标**上选取要展示的字段,与「目标」一一对应。 +- **多台机器**:每台通常需要 **各自的 Glances 实例**(不同主机、端口或反代路径),在 Homer 里 **多条目、多条 `url`**,一条对应一个目标。 +- **目标上没有 Glances**:不能靠 `type: Glances` 拉指标;可改为普通 `items` 链接,或在目标上部署 Glances 后再填 `url`。 +- **`url` 具体路径**(是否带 `/api/3` 等)取决于你所部署的 Glances 版本与暴露方式,以浏览器或 `curl` 能访问到的 Glances 接口为准。 --- diff --git a/docs/05-02-onenav首页面板.md b/docs/05-02-onenav首页面板.md index dc12a36..935e2d3 100644 --- a/docs/05-02-onenav首页面板.md +++ b/docs/05-02-onenav首页面板.md @@ -19,12 +19,12 @@ docker run -d --name onenav \ ## 在 K3s 做静态转发 -**唯一真源**:[`ansible/files/onenav/onenav-proxy.yaml`](../ansible/files/onenav/onenav-proxy.yaml)(修改 `Endpoints` IP 与 `Ingress` host)。 +**唯一真源**:[`ansible/files/05-02-onenav/onenav-proxy.yaml`](../ansible/files/05-02-onenav/onenav-proxy.yaml)(修改 `Endpoints` IP 与 `Ingress` host)。 应用方式: ```bash -kubectl apply -f ansible/files/onenav/onenav-proxy.yaml +kubectl apply -f ansible/files/05-02-onenav/onenav-proxy.yaml ``` --- diff --git a/docs/05-03-k3s-安装gitlab-含runner.md b/docs/05-03-k3s-安装gitlab-含runner.md index cc17c29..db70b5f 100644 --- a/docs/05-03-k3s-安装gitlab-含runner.md +++ b/docs/05-03-k3s-安装gitlab-含runner.md @@ -131,7 +131,7 @@ sudo gitlab-runner register \ --non-interactive ``` -在 `.gitlab-ci.yml` 中即可按需指定不同架构运行 Job,示例见 [`ansible/files/gitlab/gitlab-ci-runner-tags.example.yml`](../ansible/files/gitlab/gitlab-ci-runner-tags.example.yml)。 +在 `.gitlab-ci.yml` 中即可按需指定不同架构运行 Job,示例见 [`ansible/files/05-03-gitlab-runner/gitlab-ci-runner-tags.example.yml`](../ansible/files/05-03-gitlab-runner/gitlab-ci-runner-tags.example.yml)。 --- diff --git a/docs/05-04-k3s-配置gitlab-cicd.md b/docs/05-04-k3s-配置gitlab-cicd.md index 7be241c..f1da222 100644 --- a/docs/05-04-k3s-配置gitlab-cicd.md +++ b/docs/05-04-k3s-配置gitlab-cicd.md @@ -33,9 +33,9 @@ homelab-config/ **唯一真源(示例流水线)**: -- 最小:[`ansible/files/gitlab/gitlab-ci-minimal.example.yml`](../ansible/files/gitlab/gitlab-ci-minimal.example.yml) -- 多架构 deploy:[`ansible/files/gitlab/gitlab-ci-multi-arch-deploy.example.yml`](../ansible/files/gitlab/gitlab-ci-multi-arch-deploy.example.yml) - +- 最小:[`ansible/files/05-04-gitlab-cicd/gitlab-ci-minimal.example.yml`](../ansible/files/05-04-gitlab-cicd/gitlab-ci-minimal.example.yml) +- 多架构 deploy:[`ansible/files/05-04-gitlab-cicd/gitlab-ci-multi-arch-deploy.example.yml`](../ansible/files/05-04-gitlab-cicd/gitlab-ci-multi-arch-deploy.example.yml) + 复制到仓库根为 `.gitlab-ci.yml`(或 `include` 引用),并配好 Runner 与 `KUBE_CONFIG_CONTENT` 等变量。 --- diff --git a/docs/05-06-openlist挂载网盘与自动备份.md b/docs/05-06-openlist挂载网盘与自动备份.md index 9848b24..82cc9a2 100644 --- a/docs/05-06-openlist挂载网盘与自动备份.md +++ b/docs/05-06-openlist挂载网盘与自动备份.md @@ -13,12 +13,12 @@ 2. 选择备份方式(`rclone` / cron 脚本 / GitLab CI) 3. 设置定时执行策略 -**唯一真源**:[`ansible/files/openlist/openlist-backup-cronjob.yaml`](../ansible/files/openlist/openlist-backup-cronjob.yaml)(CronJob 示例;替换镜像与 PVC)。 +**唯一真源**:[`ansible/files/05-06-openlist/openlist-backup-cronjob.yaml`](../ansible/files/05-06-openlist/openlist-backup-cronjob.yaml)(CronJob 示例;替换镜像与 PVC)。 应用方式: ```bash -kubectl apply -f ansible/files/openlist/openlist-backup-cronjob.yaml +kubectl apply -f ansible/files/05-06-openlist/openlist-backup-cronjob.yaml ``` ## 验证命令 diff --git a/docs/05-07-openclaw应用部署.md b/docs/05-07-openclaw应用部署.md index a18ea4f..936f647 100644 --- a/docs/05-07-openclaw应用部署.md +++ b/docs/05-07-openclaw应用部署.md @@ -43,12 +43,12 @@ docker compose run --rm openclaw-cli dashboard --no-open ## 在 K3s 中做静态转发 -**唯一真源**:[`ansible/files/openclaw/openclaw-proxy.yaml`](../ansible/files/openclaw/openclaw-proxy.yaml)(按实际 IP、端口、`host` 修改)。 +**唯一真源**:[`ansible/files/05-07-openclaw/openclaw-proxy.yaml`](../ansible/files/05-07-openclaw/openclaw-proxy.yaml)(按实际 IP、端口、`host` 修改)。 ## 部署命令 ```bash -kubectl apply -f ansible/files/openclaw/openclaw-proxy.yaml +kubectl apply -f ansible/files/05-07-openclaw/openclaw-proxy.yaml ``` ## 验证命令 diff --git a/docs/05-08-openclaw-k3s-实验部署.md b/docs/05-08-openclaw-k3s-实验部署.md index 47dfc93..42aae27 100644 --- a/docs/05-08-openclaw-k3s-实验部署.md +++ b/docs/05-08-openclaw-k3s-实验部署.md @@ -42,14 +42,14 @@ docker push registry.local/openclaw:local 下面是一个**高度简化的实验性 Deployment/Service/Ingress 示例**,其目标只是让你能在集群内/通过 Traefik 访问 OpenClaw Gateway 控制界面。 -**唯一真源**:[`ansible/files/openclaw/openclaw-k3s-experimental.yaml`](../ansible/files/openclaw/openclaw-k3s-experimental.yaml)(实验用;替换镜像与域名)。 +**唯一真源**:[`ansible/files/05-08-openclaw/openclaw-k3s-experimental.yaml`](../ansible/files/05-08-openclaw/openclaw-k3s-experimental.yaml)(实验用;替换镜像与域名)。 > 说明:示例使用 `emptyDir`;若要持久化请改为 PVC/hostPath。 应用部署: ```bash -kubectl apply -f ansible/files/openclaw/openclaw-k3s-experimental.yaml +kubectl apply -f ansible/files/05-08-openclaw/openclaw-k3s-experimental.yaml ``` --- diff --git a/docs/05-09-openclaw-web-小游戏网页平台.md b/docs/05-09-openclaw-web-小游戏网页平台.md new file mode 100644 index 0000000..e3ff9f6 --- /dev/null +++ b/docs/05-09-openclaw-web-小游戏网页平台.md @@ -0,0 +1,37 @@ +# 05-09-openclaw-web 小游戏网页平台 + +> 在 K3s 中部署一个简单的 OpenClaw Web 前端(示例 Deployment/Service/Ingress),用于演示“静态站点/前端应用通过 Traefik 暴露”这一类场景。 + +## 前置条件 + +- 已完成基础集群安装:`01-01`、`01-02` +- Traefik 入口可用(参考 `03-01-k3s-traefik-dashboard.md`) +- 你已准备好可拉取的前端镜像(示例使用 `ghcr.io/your/openclaw-web:latest`,请按环境替换) + +## 清单路径(唯一真源) + +| 项 | 路径 | +|----|------| +| 本篇清单 | [`ansible/files/05-09-openclaw-web-小游戏网页平台/openclaw-web.yml`](../ansible/files/05-09-openclaw-web-小游戏网页平台/openclaw-web.yml) | +| 应用 | `kubectl apply -f ansible/files/05-09-openclaw-web-小游戏网页平台/openclaw-web.yml` | +| 删除 | `kubectl delete -f ansible/files/05-09-openclaw-web-小游戏网页平台/openclaw-web.yml` | + +## 部署与验证 + +```bash +kubectl apply -f ansible/files/05-09-openclaw-web-小游戏网页平台/openclaw-web.yml +kubectl get deploy,svc,ing -n default | grep -i openclaw-web +curl -I --max-time 5 http://openclaw.example.com/ +``` + +预期: + +- `openclaw-web` Deployment 处于 `Available` +- Ingress 生效后,通过域名可访问到前端(`200` 或静态站点常见的重定向均可接受) + +## 失败排查 + +- **404**:域名/Host 未解析到集群入口 IP;或 Ingress `host` 不一致。 +- **502**:Service 无 Endpoints(Pod 未 Ready 或 selector 不匹配)。 +- **ImagePullBackOff**:镜像仓库不可达或未配置鉴权。 + diff --git a/docs/06-03-k3s-自动备份与恢复-openlist-webdav.md b/docs/06-03-k3s-自动备份与恢复-openlist-webdav.md index 3e0ad31..21718c1 100644 --- a/docs/06-03-k3s-自动备份与恢复-openlist-webdav.md +++ b/docs/06-03-k3s-自动备份与恢复-openlist-webdav.md @@ -53,12 +53,12 @@ rclone ls openlist-webdav: ### 2.2 使用 CronJob 定期备份(集群内) -如果你希望在 K3s 内部完成备份,可以将 `rclone` 封装到容器镜像中。**唯一真源(CronJob)**:[`ansible/files/openlist/app-data-backup-cronjob.yaml`](../ansible/files/openlist/app-data-backup-cronjob.yaml)。 +如果你希望在 K3s 内部完成备份,可以将 `rclone` 封装到容器镜像中。**唯一真源(CronJob)**:[`ansible/files/06-03-openlist-webdav/app-data-backup-cronjob.yaml`](../ansible/files/06-03-openlist-webdav/app-data-backup-cronjob.yaml)。 应用方式: ```bash -kubectl apply -f ansible/files/openlist/app-data-backup-cronjob.yaml +kubectl apply -f ansible/files/06-03-openlist-webdav/app-data-backup-cronjob.yaml ``` > 提示:如果你的应用使用的是 PVC,而不是 `hostPath`,则可以将 `volumes.hostPath` 改为 `persistentVolumeClaim`。 @@ -69,12 +69,12 @@ kubectl apply -f ansible/files/openlist/app-data-backup-cronjob.yaml ### 3.1 恢复 Job 示例 -当某个节点发生故障、你将应用调度到另一节点后,可以通过一次性 Job 拉回备份。**唯一真源(Job)**:[`ansible/files/openlist/app-data-restore-job.yaml`](../ansible/files/openlist/app-data-restore-job.yaml)。 +当某个节点发生故障、你将应用调度到另一节点后,可以通过一次性 Job 拉回备份。**唯一真源(Job)**:[`ansible/files/06-03-openlist-webdav/app-data-restore-job.yaml`](../ansible/files/06-03-openlist-webdav/app-data-restore-job.yaml)。 执行恢复: ```bash -kubectl apply -f ansible/files/openlist/app-data-restore-job.yaml +kubectl apply -f ansible/files/06-03-openlist-webdav/app-data-restore-job.yaml kubectl -n default logs -f job/app-data-restore ``` diff --git a/scripts/.env.verify.example b/scripts/.env.verify.example new file mode 100644 index 0000000..d877cdf --- /dev/null +++ b/scripts/.env.verify.example @@ -0,0 +1,103 @@ +# 验证矩阵 / 编排脚本用环境变量模板 +# --------------------------------------------------------------------------- +# 使用:复制为本目录下的 .env.verify(勿提交 Git),在仓库根执行: +# set -a && source scripts/.env.verify && set +a +# 或在 bash 中:source scripts/.env.verify +# 仅示例占位,请把 YOUR_* 换成真实值;密钥只留在本机 .env.verify。 +# --------------------------------------------------------------------------- + +# --- 集群与 kubectl --- +# 在控制节点本机跑 kubectl 时常用: +# export KUBECONFIG="${KUBECONFIG:-/etc/rancher/k3s/k3s.yaml}" +# 若在办公机通过 SSH 在远端执行 kubectl,可设为: +# export K3S_CONTROL_SSH="ssh -o BatchMode=yes ylc61" +# export REMOTE_KUBECONFIG="/etc/rancher/k3s/k3s.yaml" +# 在控制节点本机执行 kubectl 时,避免再次经 SSH 自环(可按需保留): +# export VERIFY_FORCE_LOCAL_KUBECTL=1 +# 经 SSH 在控制节点执行 kubectl 时:若 PATH 中无 kubectl,可设 REMOTE_KUBECTL="k3s kubectl" +# export REMOTE_KUBECTL="k3s kubectl" +# 控制节点短主机名(与 inventory 一致;编排里拼 URL/SSH 用) +export K3S_SERVER_HOSTNAME="${K3S_SERVER_HOSTNAME:-ylc61}" +# 与 ansible group_vars 一致,验证磁盘/文档 00-04 时引用 +export K3S_DATA_DIR="${K3S_DATA_DIR:-/storage}" + +# --- Ansible(安装/复验 k3s)--- +export ANSIBLE_INVENTORY="${ANSIBLE_INVENTORY:-$(pwd)/ansible/inventory.ini}" +# phase2(run-phase2-k3s-on-ylc61-as-jack.sh 或手工): +# 为 true 时先跑 k3s-prepare-storage.yml(须与 group_vars 一致) +export K3S_PREPARE_STORAGE="${K3S_PREPARE_STORAGE:-false}" +# phase2 建议在控制节点 ylc61 上执行(本机有 ansible-playbook);办公机无 ansible 时用 phase2-print 复制命令到 ylc61 + +# --- SSH 密钥命名(与 scripts/ssh/test-ssh.sh 默认一致;脚本内尚为硬编码路径)--- +# test-ssh 使用:$K3S_SSH_KEY_DIR/${K3S_SSH_KEY_PREFIX} +# 若你改用其他前缀,需同步改 test-ssh.sh 或仅用手工 ssh -i。 +export K3S_SSH_KEY_DIR="${K3S_SSH_KEY_DIR:-$HOME/.ssh}" +export K3S_SSH_KEY_PREFIX="${K3S_SSH_KEY_PREFIX:-id_ed25519_k3s_}" +# setup-k3s-workers-ssh.sh 首次登录的非 root 用户名(交互默认 jack) +export SSH_USER="${SSH_USER:-jack}" +# test-ssh.sh:连接超时秒数(脚本已支持环境变量 TIMEOUT_SEC) +export TIMEOUT_SEC="${TIMEOUT_SEC:-5}" + +# ---(旧自动验证已下线)--- +# 如果你要重新落地自动化验证,请参考 docs/00-05 的测试框架设计说明。 + +# --- SSH:第三方验证机 onecloud(不忽略:矩阵里多处依赖「集群外」curl/探测)--- +# 用途示例:02-xx nginx 矩阵从第三方访问 Ingress;01-07 经 onecloud 对 OpenWrt:18080/18443 发 curl; +# 与 K3s 节点 SSH 无关,但必须能免交互登录(建议 BatchMode + 已知的 IdentityFile)。 +# 编排脚本应始终引用 ONECLOUD_SSH,不要用「本机直接 curl」代替,除非你明确改成本机。 +export ONECLOUD_SSH="${ONECLOUD_SSH:-ssh -o BatchMode=yes onecloud}" +# 若需显式密钥,可写完整一行,例如: +# export ONECLOUD_SSH="ssh -o BatchMode=yes -i ~/.ssh/id_ed25519_onecloud onecloud" + +# --- NFS(03-06):服务端与导出路径;若需在服务端执行清理命令可填 SSH --- +export NFS_SERVER_HOST="${NFS_SERVER_HOST:-YOUR_NFS_IP_OR_HOSTNAME}" +export NFS_EXPORT_PATH="${NFS_EXPORT_PATH:-/export/k3s}" +# export NFS_SSH="ssh -o BatchMode=yes root@${NFS_SERVER_HOST}" + +# --- Cloudflare(API / Tunnel;与 scripts/cloudflare-delete-acme-challenge-dns.sh 等一致)--- +# DNS 脚本使用:CF_API_TOKEN、ZONE_NAME 或 ZONE_ID +export CF_API_TOKEN="${CF_API_TOKEN:-}" +export ZONE_NAME="${ZONE_NAME:-jackadam.top}" +export ZONE_ID="${ZONE_ID:-}" +# Tunnel / Dashboard 等若需单独 token,按需增加(勿提交真实值): +# export CF_TUNNEL_TOKEN="" +# export CF_ACCOUNT_ID="" + +# --- ACME / Traefik(03-02、03-03):Let's Encrypt 注册邮箱 --- +# 与 HelmChartConfig / traefik-acme.yaml 中 一致;编排或 sed 替换时引用 ACME_EMAIL。 +export ACME_EMAIL="${ACME_EMAIL:-}" +# 文档中 traefik-acme 曾用 staging CA 调试;1=使用测试 CA(与 yaml 中 caserver 是否一致自行核对) +export ACME_CA_STAGING="${ACME_CA_STAGING:-0}" +export TRAEFIK_NAMESPACE="${TRAEFIK_NAMESPACE:-kube-system}" + +# --- TLS 验证域名(02-05 / 03-02 矩阵 curl、openssl s_client)--- +# 逗号分隔,与 ZONE_NAME 下实际 DNS 记录一致;勿提交敏感子域若需可只写本机 +export VERIFY_TLS_HOSTS="${VERIFY_TLS_HOSTS:-test01.jackadam.top,test02.jackadam.top,test03.jackadam.top,test04.jackadam.top}" + +# --- Longhorn(03-07 / ansible longhorn-install)--- +export LONGHORN_NAMESPACE="${LONGHORN_NAMESPACE:-longhorn-system}" + +# --- 可选跳过(仅下列项;ONECLOUD 与 ARMV7 实机路径不在此列——见下方)--- +export SKIP_HA="${SKIP_HA:-1}" +# 跑 01-03 / 01-05 时设为 0,并填写 ARMV7_*;编排脚本不得在无设备时假装通过 +export SKIP_ARMV7="${SKIP_ARMV7:-1}" +export SKIP_GITOPS="${SKIP_GITOPS:-1}" + +# --- armv7(01-03 Docker、01-05 NFS):不忽略;与 ONECLOUD 一样,编排须显式走 ARMV7_*,不得跳过 --- +# 跑 01-03 / 01-05 时:SKIP_ARMV7=0,并填写 ARMV7_SSH(可与 onecloud 同主机、或直连 arm;按你环境二选一)。 +# ARMV7_NFS_SSH 默认同 ARMV7_SSH;若 NFS 在另一台 arm 上再单独覆盖。 +export ARMV7_SSH="${ARMV7_SSH:-}" +export ARMV7_NFS_SSH="${ARMV7_NFS_SSH:-$ARMV7_SSH}" + +# --- OpenWrt / 01-07(与 K3s 四节点无关时单独用)--- +# export OPENWRT_SSH="ssh -o BatchMode=yes root@192.168.x.x" +# export OPENWRT_HAPROXY_HTTP_PORT="18080" +# export OPENWRT_HAPROXY_HTTPS_PORT="18443" +# 01-07 文档中第三方 curl 用 --https-hosts 时的主机列表(逗号分隔,与 VERIFY_TLS_HOSTS 可相同) +# export OPENWRT_VERIFY_HTTPS_HOSTS="test01.jackadam.top,..." + +# --- 与现有 scripts/*.sh 对照(未列出的脚本不在仓库内)--- +# cloudflare-delete-acme-challenge-dns.sh → CF_API_TOKEN, ZONE_NAME, ZONE_ID +# k3s-delete-lab-stacks.sh → KUBECONFIG +# ssh/test-ssh.sh → TIMEOUT_SEC;密钥路径当前固定为 $HOME/.ssh/id_ed25519_k3s_ +# ssh/setup-k3s-workers-ssh.sh → 交互 inventory + SSH_USER;可选一次性密码勿写入本文件 diff --git a/scripts/README.md b/scripts/README.md index 5028005..2dbfd2b 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -2,6 +2,45 @@ 本目录集中维护通用运维脚本。统一约定:**在仓库根目录执行**,使用 `./scripts/...` 路径调用。 +## 验证编排环境变量(可选) + +复制 [`scripts/.env.verify.example`](.env.verify.example) 为 `scripts/.env.verify` 并填写本机值;**勿提交** `scripts/.env.verify`(已在仓库 `.gitignore` 中忽略)。其中 **`ONECLOUD_SSH`**(默认 `ssh … onecloud`)用于矩阵里**集群外**第三方 curl 等验证,与 `SKIP_*` 无关;**`ARMV7_SSH` / `ARMV7_NFS_SSH`** 用于 `01-03` / `01-05` 实机,同样不属「可忽略」项,与 `SKIP_ARMV7=0` 配对使用。**`ACME_EMAIL`** 供 Traefik ACME(`03-02` / `03-03`);另有 **`VERIFY_TLS_HOSTS`**、`K3S_SERVER_HOSTNAME`、`TIMEOUT_SEC`(`test-ssh.sh` 已读)、`LONGHORN_NAMESPACE` 等,完整列表与「和现有 `scripts/*.sh` 的对照」见 [`.env.verify.example`](.env.verify.example) 文末注释。加载示例: + +```bash +set -a && source scripts/.env.verify && set +a +``` + +## 验证框架状态(自动化已移除) + +旧的验证矩阵编排与静态校验 CI 已下线;`docs/00-02-验证矩阵.md` 仍作为“待验证列表/状态记录”,当前以手工验证为准。 + +如果你要重新落地自动化验证框架,请以设计说明 [`docs/00-05-测试与验证框架.md`](../docs/00-05-测试与验证框架.md) 中的分层与 `doc_id` 映射约定为基线。 + +### 用 ylc61 跑 Ansible 装 K3s(推荐) + +如果办公机(Windows)没有 `ansible-playbook`,有两种方式: + +1. 方式 A:把仓库同步到 `ylc61`,然后在 `ylc61` 上直接跑 playbook + +```bash +sudo dnf install -y ansible # Fedora;未装过时 +cd ~/实验室建设 # 按你实际路径;若在 /home/jack 则相应 cd +cd ansible +# 可选:第二块盘 → /storage 先准备 +# ansible-playbook -i inventory.ini playbooks/k3s-prepare-storage.yml +ansible-playbook -i inventory.ini playbooks/k3s-init-and-install.yml +``` + +2. 方式 B:从办公机一键触发(内部通过 SSH 在 `ylc61` 上以 `jack` 身份执行) + +```bash +# 可选:先准备 /storage +export K3S_PREPARE_STORAGE=false # 或 true +./scripts/ssh/run-phase2-k3s-on-ylc61-as-jack.sh +``` + +**密钥在 jack**:`inventory.ini` 中私钥路径会随执行用户变化。上述方式会确保在 `ylc61` 以 `jack` 身份运行,避免把私钥解析到 `/root/.ssh/`。 + ## 目录 - `scripts/k3s-delete-lab-stacks.sh` - **按集群里实际资源**遍历删除:用 `kubectl get` 枚举各命名空间下的 Deployment/Service/Ingress/IngressRoute 等再 `kubectl delete`(**不读仓库 YAML 目录**);默认跳过 `kube-system` 等系统命名空间;`--preview` 只列资源;`--namespaces` 限定 NS;`--with-pvc` / `--with-configmaps` / `--with-secrets`(需 `jq`)按需打开 diff --git a/scripts/ssh/smoke-verify-matrix-on-ylc61.sh b/scripts/ssh/smoke-verify-matrix-on-ylc61.sh new file mode 100644 index 0000000..816393f --- /dev/null +++ b/scripts/ssh/smoke-verify-matrix-on-ylc61.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash +# 从办公机 Git Bash 执行:经 ssh 在 ylc61 上 smoke 跑样板 doc_id。 +set -euo pipefail + +exec ssh -o BatchMode=yes ylc61 'bash -lc "cd /home/jack && chmod +x scripts/verify.sh && export VERIFY_TEARDOWN=${VERIFY_TEARDOWN:-1} && export nginx_entry_base=${nginx_entry_base:-http://192.168.2.61} && export nodejs_entry_base=${nodejs_entry_base:-http://192.168.2.61} && ./scripts/verify.sh run 02-05 && ./scripts/verify.sh run 03-05 && ./scripts/verify.sh run 03-07 && ./scripts/verify.sh run 04-01"' + diff --git a/scripts/verify.sh b/scripts/verify.sh new file mode 100644 index 0000000..0a21400 --- /dev/null +++ b/scripts/verify.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash +# Ansible 验证入口:以 docs/00-02-验证矩阵.md 为执行清单,逐个 doc_id 执行 verify playbook(deploy→verify→teardown)。 +# 推荐在 ylc61(控制节点)仓库根执行。 +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +MATRIX_MD="${ROOT}/docs/00-02-验证矩阵.md" + +load_env() { + if [[ -f "${ROOT}/scripts/.env.verify" ]]; then + set -a + # shellcheck disable=SC1091 + source "${ROOT}/scripts/.env.verify" + set +a + echo "[OK] 已加载 scripts/.env.verify" + fi +} + +parse_doc_ids_from_matrix() { + if [[ ! -f "${MATRIX_MD}" ]]; then + echo "[ERR] matrix 不存在:${MATRIX_MD}" >&2 + exit 1 + fi + # 从矩阵中提取形如 `XX-YY-*.md` 的文件名,输出 XX-YY(按出现顺序) + # shellcheck disable=SC2016 + awk ' + match($0, /`[0-9][0-9]-[0-9][0-9]-[^`]+\.md`/) { + s = substr($0, RSTART+1, RLENGTH-2); + id = substr(s, 1, 5); + if (!seen[id]++) print id; + } + ' "${MATRIX_MD}" +} + +usage() { + cat <<'EOF' +用法:scripts/verify.sh <命令> [...] + +命令: + list 从验证矩阵列出 doc_id + run 运行指定 doc_id(执行 ansible/playbooks/verify/.yml) + run-all 按验证矩阵顺序运行全部 doc_id(fail-fast) + +环境变量: + VERIFY_TEARDOWN=1 验证后清理(默认 1) + nginx_entry_base 例如 http://192.168.2.61(用于 02-05) + nodejs_entry_base 例如 http://192.168.2.61(用于 04-01) + +示例: + export nginx_entry_base=http://192.168.2.61 + export VERIFY_TEARDOWN=1 + ./scripts/verify.sh run 02-05 +EOF +} + +ansible_verify() { + local doc_id="$1" + local inv="${ANSIBLE_INVENTORY:-${ROOT}/ansible/inventory.ini}" + local pb_single="${ROOT}/ansible/playbooks/verify/${doc_id}.yml" + if [[ ! -f "$inv" ]]; then + echo "[ERR] inventory 不存在:$inv" >&2 + exit 1 + fi + if [[ ! -f "$pb_single" ]]; then + echo "[ERR] verify playbook 不存在(fail-fast):$pb_single" >&2 + exit 1 + fi + echo "[RUN] ansible-playbook -i $inv $pb_single" + ansible-playbook -i "$inv" "$pb_single" +} + +main() { + load_env + local cmd="${1:-}" + case "$cmd" in + ""|-h|--help) usage ;; + list) + parse_doc_ids_from_matrix + ;; + run) + local doc_id="${2:?need doc_id like 02-05}" + ansible_verify "$doc_id" + ;; + run-all) + local id + while IFS= read -r id; do + echo "" + echo "########################################## $id" + ansible_verify "$id" + done < <(parse_doc_ids_from_matrix) + ;; + *) + echo "[ERR] unknown cmd: $cmd" >&2 + usage + exit 1 + ;; + esac +} + +main "$@" +