feat: 按 doc_id 重组 ansible/files 与验证框架
- ansible/files 改为与文档 XX-YY 对齐的目录结构,更新相关 playbook 路径 - 新增 scripts/verify.sh 与 ansible/playbooks/verify/*.yml,移除单体 verify-matrix.yml - 补充 docs/00-02 矩阵状态、00-05 验证框架与流程、00-04 环境与 ylc65 工作机说明 - 增加 k3s 存储准备、Longhorn、local-path 等 playbook 与辅助脚本 Made-with: Cursor
This commit is contained in:
103
scripts/.env.verify.example
Normal file
103
scripts/.env.verify.example
Normal file
@@ -0,0 +1,103 @@
|
||||
# 验证矩阵 / 编排脚本用环境变量模板
|
||||
# ---------------------------------------------------------------------------
|
||||
# 使用:复制为本目录下的 .env.verify(勿提交 Git),在仓库根执行:
|
||||
# set -a && source scripts/.env.verify && set +a
|
||||
# 或在 bash 中:source scripts/.env.verify
|
||||
# 仅示例占位,请把 YOUR_* 换成真实值;密钥只留在本机 .env.verify。
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# --- 集群与 kubectl ---
|
||||
# 在控制节点本机跑 kubectl 时常用:
|
||||
# export KUBECONFIG="${KUBECONFIG:-/etc/rancher/k3s/k3s.yaml}"
|
||||
# 若在办公机通过 SSH 在远端执行 kubectl,可设为:
|
||||
# export K3S_CONTROL_SSH="ssh -o BatchMode=yes ylc61"
|
||||
# export REMOTE_KUBECONFIG="/etc/rancher/k3s/k3s.yaml"
|
||||
# 在控制节点本机执行 kubectl 时,避免再次经 SSH 自环(可按需保留):
|
||||
# export VERIFY_FORCE_LOCAL_KUBECTL=1
|
||||
# 经 SSH 在控制节点执行 kubectl 时:若 PATH 中无 kubectl,可设 REMOTE_KUBECTL="k3s kubectl"
|
||||
# export REMOTE_KUBECTL="k3s kubectl"
|
||||
# 控制节点短主机名(与 inventory 一致;编排里拼 URL/SSH 用)
|
||||
export K3S_SERVER_HOSTNAME="${K3S_SERVER_HOSTNAME:-ylc61}"
|
||||
# 与 ansible group_vars 一致,验证磁盘/文档 00-04 时引用
|
||||
export K3S_DATA_DIR="${K3S_DATA_DIR:-/storage}"
|
||||
|
||||
# --- Ansible(安装/复验 k3s)---
|
||||
export ANSIBLE_INVENTORY="${ANSIBLE_INVENTORY:-$(pwd)/ansible/inventory.ini}"
|
||||
# phase2(run-phase2-k3s-on-ylc61-as-jack.sh 或手工):
|
||||
# 为 true 时先跑 k3s-prepare-storage.yml(须与 group_vars 一致)
|
||||
export K3S_PREPARE_STORAGE="${K3S_PREPARE_STORAGE:-false}"
|
||||
# phase2 建议在控制节点 ylc61 上执行(本机有 ansible-playbook);办公机无 ansible 时用 phase2-print 复制命令到 ylc61
|
||||
|
||||
# --- SSH 密钥命名(与 scripts/ssh/test-ssh.sh 默认一致;脚本内尚为硬编码路径)---
|
||||
# test-ssh 使用:$K3S_SSH_KEY_DIR/${K3S_SSH_KEY_PREFIX}<inventory主机名>
|
||||
# 若你改用其他前缀,需同步改 test-ssh.sh 或仅用手工 ssh -i。
|
||||
export K3S_SSH_KEY_DIR="${K3S_SSH_KEY_DIR:-$HOME/.ssh}"
|
||||
export K3S_SSH_KEY_PREFIX="${K3S_SSH_KEY_PREFIX:-id_ed25519_k3s_}"
|
||||
# setup-k3s-workers-ssh.sh 首次登录的非 root 用户名(交互默认 jack)
|
||||
export SSH_USER="${SSH_USER:-jack}"
|
||||
# test-ssh.sh:连接超时秒数(脚本已支持环境变量 TIMEOUT_SEC)
|
||||
export TIMEOUT_SEC="${TIMEOUT_SEC:-5}"
|
||||
|
||||
# ---(旧自动验证已下线)---
|
||||
# 如果你要重新落地自动化验证,请参考 docs/00-05 的测试框架设计说明。
|
||||
|
||||
# --- SSH:第三方验证机 onecloud(不忽略:矩阵里多处依赖「集群外」curl/探测)---
|
||||
# 用途示例:02-xx nginx 矩阵从第三方访问 Ingress;01-07 经 onecloud 对 OpenWrt:18080/18443 发 curl;
|
||||
# 与 K3s 节点 SSH 无关,但必须能免交互登录(建议 BatchMode + 已知的 IdentityFile)。
|
||||
# 编排脚本应始终引用 ONECLOUD_SSH,不要用「本机直接 curl」代替,除非你明确改成本机。
|
||||
export ONECLOUD_SSH="${ONECLOUD_SSH:-ssh -o BatchMode=yes onecloud}"
|
||||
# 若需显式密钥,可写完整一行,例如:
|
||||
# export ONECLOUD_SSH="ssh -o BatchMode=yes -i ~/.ssh/id_ed25519_onecloud onecloud"
|
||||
|
||||
# --- NFS(03-06):服务端与导出路径;若需在服务端执行清理命令可填 SSH ---
|
||||
export NFS_SERVER_HOST="${NFS_SERVER_HOST:-YOUR_NFS_IP_OR_HOSTNAME}"
|
||||
export NFS_EXPORT_PATH="${NFS_EXPORT_PATH:-/export/k3s}"
|
||||
# export NFS_SSH="ssh -o BatchMode=yes root@${NFS_SERVER_HOST}"
|
||||
|
||||
# --- Cloudflare(API / Tunnel;与 scripts/cloudflare-delete-acme-challenge-dns.sh 等一致)---
|
||||
# DNS 脚本使用:CF_API_TOKEN、ZONE_NAME 或 ZONE_ID
|
||||
export CF_API_TOKEN="${CF_API_TOKEN:-}"
|
||||
export ZONE_NAME="${ZONE_NAME:-jackadam.top}"
|
||||
export ZONE_ID="${ZONE_ID:-}"
|
||||
# Tunnel / Dashboard 等若需单独 token,按需增加(勿提交真实值):
|
||||
# export CF_TUNNEL_TOKEN=""
|
||||
# export CF_ACCOUNT_ID=""
|
||||
|
||||
# --- ACME / Traefik(03-02、03-03):Let's Encrypt 注册邮箱 ---
|
||||
# 与 HelmChartConfig / traefik-acme.yaml 中 <YOUR_REAL_EMAIL> 一致;编排或 sed 替换时引用 ACME_EMAIL。
|
||||
export ACME_EMAIL="${ACME_EMAIL:-}"
|
||||
# 文档中 traefik-acme 曾用 staging CA 调试;1=使用测试 CA(与 yaml 中 caserver 是否一致自行核对)
|
||||
export ACME_CA_STAGING="${ACME_CA_STAGING:-0}"
|
||||
export TRAEFIK_NAMESPACE="${TRAEFIK_NAMESPACE:-kube-system}"
|
||||
|
||||
# --- TLS 验证域名(02-05 / 03-02 矩阵 curl、openssl s_client)---
|
||||
# 逗号分隔,与 ZONE_NAME 下实际 DNS 记录一致;勿提交敏感子域若需可只写本机
|
||||
export VERIFY_TLS_HOSTS="${VERIFY_TLS_HOSTS:-test01.jackadam.top,test02.jackadam.top,test03.jackadam.top,test04.jackadam.top}"
|
||||
|
||||
# --- Longhorn(03-07 / ansible longhorn-install)---
|
||||
export LONGHORN_NAMESPACE="${LONGHORN_NAMESPACE:-longhorn-system}"
|
||||
|
||||
# --- 可选跳过(仅下列项;ONECLOUD 与 ARMV7 实机路径不在此列——见下方)---
|
||||
export SKIP_HA="${SKIP_HA:-1}"
|
||||
# 跑 01-03 / 01-05 时设为 0,并填写 ARMV7_*;编排脚本不得在无设备时假装通过
|
||||
export SKIP_ARMV7="${SKIP_ARMV7:-1}"
|
||||
export SKIP_GITOPS="${SKIP_GITOPS:-1}"
|
||||
|
||||
# --- armv7(01-03 Docker、01-05 NFS):不忽略;与 ONECLOUD 一样,编排须显式走 ARMV7_*,不得跳过 ---
|
||||
# 跑 01-03 / 01-05 时:SKIP_ARMV7=0,并填写 ARMV7_SSH(可与 onecloud 同主机、或直连 arm;按你环境二选一)。
|
||||
# ARMV7_NFS_SSH 默认同 ARMV7_SSH;若 NFS 在另一台 arm 上再单独覆盖。
|
||||
export ARMV7_SSH="${ARMV7_SSH:-}"
|
||||
export ARMV7_NFS_SSH="${ARMV7_NFS_SSH:-$ARMV7_SSH}"
|
||||
|
||||
# --- OpenWrt / 01-07(与 K3s 四节点无关时单独用)---
|
||||
# export OPENWRT_SSH="ssh -o BatchMode=yes root@192.168.x.x"
|
||||
# export OPENWRT_HAPROXY_HTTP_PORT="18080"
|
||||
# export OPENWRT_HAPROXY_HTTPS_PORT="18443"
|
||||
# 01-07 文档中第三方 curl 用 --https-hosts 时的主机列表(逗号分隔,与 VERIFY_TLS_HOSTS 可相同)
|
||||
# export OPENWRT_VERIFY_HTTPS_HOSTS="test01.jackadam.top,..."
|
||||
|
||||
# --- 与现有 scripts/*.sh 对照(未列出的脚本不在仓库内)---
|
||||
# cloudflare-delete-acme-challenge-dns.sh → CF_API_TOKEN, ZONE_NAME, ZONE_ID
|
||||
# k3s-delete-lab-stacks.sh → KUBECONFIG
|
||||
# ssh/test-ssh.sh → TIMEOUT_SEC;密钥路径当前固定为 $HOME/.ssh/id_ed25519_k3s_<host>
|
||||
# ssh/setup-k3s-workers-ssh.sh → 交互 inventory + SSH_USER;可选一次性密码勿写入本文件
|
||||
@@ -2,6 +2,45 @@
|
||||
|
||||
本目录集中维护通用运维脚本。统一约定:**在仓库根目录执行**,使用 `./scripts/...` 路径调用。
|
||||
|
||||
## 验证编排环境变量(可选)
|
||||
|
||||
复制 [`scripts/.env.verify.example`](.env.verify.example) 为 `scripts/.env.verify` 并填写本机值;**勿提交** `scripts/.env.verify`(已在仓库 `.gitignore` 中忽略)。其中 **`ONECLOUD_SSH`**(默认 `ssh … onecloud`)用于矩阵里**集群外**第三方 curl 等验证,与 `SKIP_*` 无关;**`ARMV7_SSH` / `ARMV7_NFS_SSH`** 用于 `01-03` / `01-05` 实机,同样不属「可忽略」项,与 `SKIP_ARMV7=0` 配对使用。**`ACME_EMAIL`** 供 Traefik ACME(`03-02` / `03-03`);另有 **`VERIFY_TLS_HOSTS`**、`K3S_SERVER_HOSTNAME`、`TIMEOUT_SEC`(`test-ssh.sh` 已读)、`LONGHORN_NAMESPACE` 等,完整列表与「和现有 `scripts/*.sh` 的对照」见 [`.env.verify.example`](.env.verify.example) 文末注释。加载示例:
|
||||
|
||||
```bash
|
||||
set -a && source scripts/.env.verify && set +a
|
||||
```
|
||||
|
||||
## 验证框架状态(自动化已移除)
|
||||
|
||||
旧的验证矩阵编排与静态校验 CI 已下线;`docs/00-02-验证矩阵.md` 仍作为“待验证列表/状态记录”,当前以手工验证为准。
|
||||
|
||||
如果你要重新落地自动化验证框架,请以设计说明 [`docs/00-05-测试与验证框架.md`](../docs/00-05-测试与验证框架.md) 中的分层与 `doc_id` 映射约定为基线。
|
||||
|
||||
### 用 ylc61 跑 Ansible 装 K3s(推荐)
|
||||
|
||||
如果办公机(Windows)没有 `ansible-playbook`,有两种方式:
|
||||
|
||||
1. 方式 A:把仓库同步到 `ylc61`,然后在 `ylc61` 上直接跑 playbook
|
||||
|
||||
```bash
|
||||
sudo dnf install -y ansible # Fedora;未装过时
|
||||
cd ~/实验室建设 # 按你实际路径;若在 /home/jack 则相应 cd
|
||||
cd ansible
|
||||
# 可选:第二块盘 → /storage 先准备
|
||||
# ansible-playbook -i inventory.ini playbooks/k3s-prepare-storage.yml
|
||||
ansible-playbook -i inventory.ini playbooks/k3s-init-and-install.yml
|
||||
```
|
||||
|
||||
2. 方式 B:从办公机一键触发(内部通过 SSH 在 `ylc61` 上以 `jack` 身份执行)
|
||||
|
||||
```bash
|
||||
# 可选:先准备 /storage
|
||||
export K3S_PREPARE_STORAGE=false # 或 true
|
||||
./scripts/ssh/run-phase2-k3s-on-ylc61-as-jack.sh
|
||||
```
|
||||
|
||||
**密钥在 jack**:`inventory.ini` 中私钥路径会随执行用户变化。上述方式会确保在 `ylc61` 以 `jack` 身份运行,避免把私钥解析到 `/root/.ssh/`。
|
||||
|
||||
## 目录
|
||||
- `scripts/k3s-delete-lab-stacks.sh`
|
||||
- **按集群里实际资源**遍历删除:用 `kubectl get` 枚举各命名空间下的 Deployment/Service/Ingress/IngressRoute 等再 `kubectl delete`(**不读仓库 YAML 目录**);默认跳过 `kube-system` 等系统命名空间;`--preview` 只列资源;`--namespaces` 限定 NS;`--with-pvc` / `--with-configmaps` / `--with-secrets`(需 `jq`)按需打开
|
||||
|
||||
6
scripts/ssh/smoke-verify-matrix-on-ylc61.sh
Normal file
6
scripts/ssh/smoke-verify-matrix-on-ylc61.sh
Normal file
@@ -0,0 +1,6 @@
|
||||
#!/usr/bin/env bash
|
||||
# 从办公机 Git Bash 执行:经 ssh 在 ylc61 上 smoke 跑样板 doc_id。
|
||||
set -euo pipefail
|
||||
|
||||
exec ssh -o BatchMode=yes ylc61 'bash -lc "cd /home/jack && chmod +x scripts/verify.sh && export VERIFY_TEARDOWN=${VERIFY_TEARDOWN:-1} && export nginx_entry_base=${nginx_entry_base:-http://192.168.2.61} && export nodejs_entry_base=${nodejs_entry_base:-http://192.168.2.61} && ./scripts/verify.sh run 02-05 && ./scripts/verify.sh run 03-05 && ./scripts/verify.sh run 03-07 && ./scripts/verify.sh run 04-01"'
|
||||
|
||||
101
scripts/verify.sh
Normal file
101
scripts/verify.sh
Normal file
@@ -0,0 +1,101 @@
|
||||
#!/usr/bin/env bash
|
||||
# Ansible 验证入口:以 docs/00-02-验证矩阵.md 为执行清单,逐个 doc_id 执行 verify playbook(deploy→verify→teardown)。
|
||||
# 推荐在 ylc61(控制节点)仓库根执行。
|
||||
set -euo pipefail
|
||||
|
||||
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
MATRIX_MD="${ROOT}/docs/00-02-验证矩阵.md"
|
||||
|
||||
load_env() {
|
||||
if [[ -f "${ROOT}/scripts/.env.verify" ]]; then
|
||||
set -a
|
||||
# shellcheck disable=SC1091
|
||||
source "${ROOT}/scripts/.env.verify"
|
||||
set +a
|
||||
echo "[OK] 已加载 scripts/.env.verify"
|
||||
fi
|
||||
}
|
||||
|
||||
parse_doc_ids_from_matrix() {
|
||||
if [[ ! -f "${MATRIX_MD}" ]]; then
|
||||
echo "[ERR] matrix 不存在:${MATRIX_MD}" >&2
|
||||
exit 1
|
||||
fi
|
||||
# 从矩阵中提取形如 `XX-YY-*.md` 的文件名,输出 XX-YY(按出现顺序)
|
||||
# shellcheck disable=SC2016
|
||||
awk '
|
||||
match($0, /`[0-9][0-9]-[0-9][0-9]-[^`]+\.md`/) {
|
||||
s = substr($0, RSTART+1, RLENGTH-2);
|
||||
id = substr(s, 1, 5);
|
||||
if (!seen[id]++) print id;
|
||||
}
|
||||
' "${MATRIX_MD}"
|
||||
}
|
||||
|
||||
usage() {
|
||||
cat <<'EOF'
|
||||
用法:scripts/verify.sh <命令> [...]
|
||||
|
||||
命令:
|
||||
list 从验证矩阵列出 doc_id
|
||||
run <XX-YY> 运行指定 doc_id(执行 ansible/playbooks/verify/<XX-YY>.yml)
|
||||
run-all 按验证矩阵顺序运行全部 doc_id(fail-fast)
|
||||
|
||||
环境变量:
|
||||
VERIFY_TEARDOWN=1 验证后清理(默认 1)
|
||||
nginx_entry_base 例如 http://192.168.2.61(用于 02-05)
|
||||
nodejs_entry_base 例如 http://192.168.2.61(用于 04-01)
|
||||
|
||||
示例:
|
||||
export nginx_entry_base=http://192.168.2.61
|
||||
export VERIFY_TEARDOWN=1
|
||||
./scripts/verify.sh run 02-05
|
||||
EOF
|
||||
}
|
||||
|
||||
ansible_verify() {
|
||||
local doc_id="$1"
|
||||
local inv="${ANSIBLE_INVENTORY:-${ROOT}/ansible/inventory.ini}"
|
||||
local pb_single="${ROOT}/ansible/playbooks/verify/${doc_id}.yml"
|
||||
if [[ ! -f "$inv" ]]; then
|
||||
echo "[ERR] inventory 不存在:$inv" >&2
|
||||
exit 1
|
||||
fi
|
||||
if [[ ! -f "$pb_single" ]]; then
|
||||
echo "[ERR] verify playbook 不存在(fail-fast):$pb_single" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "[RUN] ansible-playbook -i $inv $pb_single"
|
||||
ansible-playbook -i "$inv" "$pb_single"
|
||||
}
|
||||
|
||||
main() {
|
||||
load_env
|
||||
local cmd="${1:-}"
|
||||
case "$cmd" in
|
||||
""|-h|--help) usage ;;
|
||||
list)
|
||||
parse_doc_ids_from_matrix
|
||||
;;
|
||||
run)
|
||||
local doc_id="${2:?need doc_id like 02-05}"
|
||||
ansible_verify "$doc_id"
|
||||
;;
|
||||
run-all)
|
||||
local id
|
||||
while IFS= read -r id; do
|
||||
echo ""
|
||||
echo "########################################## $id"
|
||||
ansible_verify "$id"
|
||||
done < <(parse_doc_ids_from_matrix)
|
||||
;;
|
||||
*)
|
||||
echo "[ERR] unknown cmd: $cmd" >&2
|
||||
usage
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
main "$@"
|
||||
|
||||
Reference in New Issue
Block a user