日常更新

This commit is contained in:
2026-03-29 09:08:01 +08:00
parent 31709425e2
commit befdefd222
224 changed files with 7240 additions and 3297 deletions

56
ansible/bin/deploy-lab.sh Executable file
View File

@@ -0,0 +1,56 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
# shellcheck disable=SC1091
source "${ROOT}/ansible/lib/lib-ansible-lab.sh"
ansible_lab_export_config
load_env() {
if [[ -f "${ROOT}/ansible/env/.env.verify" ]]; then
set -a
# shellcheck disable=SC1091
source "${ROOT}/ansible/env/.env.verify"
set +a
echo "[OK] 已加载 ansible/env/.env.verify"
fi
}
usage() {
cat <<'EOF'
用法ansible/bin/deploy-lab.sh <子命令>
子命令k3s | longhorn | nginx-matrix | nginx-matrix-tls
EOF
}
ansible_wrap() {
local inv="${ANSIBLE_INVENTORY:-${ROOT}/ansible/inventory.ini}"
[[ -f "$inv" ]] || { echo "[ERR] inventory 不存在:$inv" >&2; exit 1; }
command -v ansible-playbook >/dev/null 2>&1 || { echo "[ERR] 未找到 ansible-playbook" >&2; exit 1; }
ansible_lab_check_inventory_keys "$inv" || exit 1
local td="${DEPLOY_VERIFY_TEARDOWN:-0}"
echo "[RUN] ansible-playbook -i $inv -e VERIFY_TEARDOWN=$td $*"
ansible-playbook -i "$inv" -e "VERIFY_TEARDOWN=$td" "$@"
}
cmd_k3s() {
if [[ "${K3S_PREPARE_STORAGE:-false}" == "true" ]]; then
ansible_wrap "${ROOT}/ansible/playbooks/verify/01-05.yml" -e 'k3s_do_prepare_storage=true' -e 'k3s_prepare_storage=true'
fi
ansible_wrap "${ROOT}/ansible/playbooks/verify/01-05.yml" -e 'k3s_do_install=true'
}
main() {
load_env
local sub="${1:-}"
case "$sub" in
""|-h|--help) usage ;;
k3s) cmd_k3s ;;
longhorn) ansible_wrap "${ROOT}/ansible/playbooks/verify/03-07.yml" ;;
nginx-matrix) ansible_wrap "${ROOT}/ansible/playbooks/verify/02-05.yml" ;;
nginx-matrix-tls) ansible_wrap "${ROOT}/ansible/playbooks/verify/03-02.yml" -e 'nginx_matrix_tls_enable=true' ;;
*) echo "[ERR] 未知子命令:$sub" >&2; usage; exit 1 ;;
esac
}
main "$@"

6
ansible/bin/scaffold-doc-id.sh Executable file
View File

@@ -0,0 +1,6 @@
#!/usr/bin/env bash
# 生成执行域 doc_id 最小骨架docs + ansible/files + verify playbook。参见 ansible/tools/scaffold_doc_id.py
set -euo pipefail
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
exec python3 "$ROOT/ansible/tools/scaffold_doc_id.py" "$@"

304
ansible/bin/verify.sh Executable file
View File

@@ -0,0 +1,304 @@
#!/usr/bin/env bash
# 验证入口(以 ansible/playbooks/verify/<doc_id>.yml 为唯一执行真源):
# - run <XX-YY>:执行单篇验证 playbook
# - run-all按 verify 目录中存在的 <doc_id>.yml 顺序执行仅执行域XX>0 && YY>0
# - fullpreflight + run-all
set -euo pipefail
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
# shellcheck disable=SC1091
source "${ROOT}/ansible/lib/lib-ansible-lab.sh"
ansible_lab_export_config
export VERIFY_TEARDOWN="${VERIFY_TEARDOWN:-1}"
STATUS_DIR="${ROOT}/.status"
TEARDOWN_STATE_JSON="${STATUS_DIR}/verify-teardown-state.json"
load_env() {
export ANSIBLE_CONFIG="${ANSIBLE_CONFIG:-${ROOT}/ansible/ansible.cfg}"
local td_override="${VERIFY_TEARDOWN-__unset__}"
if [[ -f "${ROOT}/ansible/env/.env.verify" ]]; then
set -a
# shellcheck disable=SC1091
source "${ROOT}/ansible/env/.env.verify"
set +a
echo "[OK] 已加载 ansible/env/.env.verify"
else
echo "[TIP] 未发现 ansible/env/.env.verify按默认变量继续"
fi
if [[ "${td_override}" != "__unset__" ]]; then
export VERIFY_TEARDOWN="${td_override}"
else
export VERIFY_TEARDOWN="${VERIFY_TEARDOWN:-1}"
fi
echo "[INFO] ANSIBLE_CONFIG=${ANSIBLE_CONFIG}"
}
record_teardown_state() {
mkdir -p "${STATUS_DIR}"
local td="${VERIFY_TEARDOWN:-1}"
local ts
ts="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
cat > "${TEARDOWN_STATE_JSON}" <<EOF
{"updated_at":"${ts}","verify_teardown":${td}}
EOF
}
warn_teardown_mode() {
local td="${VERIFY_TEARDOWN:-1}"
if [[ "${td}" == "0" ]]; then
echo "[WARN] VERIFY_TEARDOWN=0保留现场模式已启用可能污染后续 full/run-all" >&2
echo "[TIP] 恢复建议:切回 VERIFY_TEARDOWN=1 并复跑主线;必要时手工清理残留命名空间/资源" >&2
echo "[OC] doc_id=preflight result=verified phase=preflight assertion=teardown_mode verify_teardown=0"
fi
if [[ -f "${TEARDOWN_STATE_JSON}" && "${td}" == "1" ]]; then
local last_td=""
last_td="$(TEARDOWN_STATE_JSON="${TEARDOWN_STATE_JSON}" python3 - <<'PY' 2>/dev/null || true
import json, os, pathlib
p = pathlib.Path(os.environ["TEARDOWN_STATE_JSON"])
try:
d = json.loads(p.read_text(encoding="utf-8"))
print(d.get("verify_teardown", ""))
except Exception:
pass
PY
)"
if [[ "${last_td}" == "0" ]]; then
echo "[WARN] 检测到上次验证使用 VERIFY_TEARDOWN=0当前虽为 1但可能存在残留污染" >&2
echo "[TIP] 建议VERIFY_TEARDOWN=1 ./ansible/bin/verify.sh full或 run-all以清理并回归" >&2
echo "[OC] doc_id=preflight result=verified phase=preflight assertion=teardown_state last_verify_teardown=0 current_verify_teardown=1"
fi
fi
}
DOC_ID_EXEC_RE='^(0[1-9]|[1-9][0-9])-(0[1-9]|[1-9][0-9])$'
is_exec_doc_id() {
local doc_id="$1"
[[ "$doc_id" =~ $DOC_ID_EXEC_RE ]]
}
list_doc_ids_from_verify_dir() {
local series="${1:-}"
local id_regex="${2:-}"
local exclude_noop="${3:-0}"
local require_teardown="${4:-0}"
ROOT="${ROOT}" SERIES="${series}" ID_REGEX="${id_regex}" EXCLUDE_NOOP="${exclude_noop}" REQUIRE_TEARDOWN="${require_teardown}" python3 - <<'PY'
import os
import re
from pathlib import Path
root = Path(os.environ["ROOT"])
verify_dir = root / "ansible" / "playbooks" / "verify"
series = os.environ.get("SERIES", "").strip()
id_regex = os.environ.get("ID_REGEX", "").strip()
exclude_noop = os.environ.get("EXCLUDE_NOOP", "0") == "1"
require_teardown = os.environ.get("REQUIRE_TEARDOWN", "0") == "1"
pat = re.compile(r"^(?P<id>(0[1-9]|[1-9][0-9])-(0[1-9]|[1-9][0-9]))\.yml$")
id_pat = re.compile(id_regex) if id_regex else None
ids = []
for p in verify_dir.iterdir():
m = pat.match(p.name)
if not m:
continue
doc_id = m.group("id")
if series and not doc_id.startswith(f"{series}-"):
continue
if id_pat and not id_pat.search(doc_id):
continue
if exclude_noop or require_teardown:
content = p.read_text(encoding="utf-8", errors="ignore")
if exclude_noop and "noop verify" in content:
continue
if require_teardown and ("VERIFY_TEARDOWN" not in content and "verify_teardown" not in content):
continue
ids.append(doc_id)
for x in sorted(set(ids)):
print(x)
PY
}
run_preflight() {
local inv="${ANSIBLE_INVENTORY:-${ROOT}/ansible/inventory.ini}"
oc_failed() {
# OC-like preflight line for humans/tools (minimal; stdout is source of truth).
local assertion="$1"
shift || true
echo "[OC] doc_id=preflight result=failed phase=preflight assertion=${assertion} $*"
}
oc_gated() {
local missing="$1"
local scope="$2"
echo "[OC] doc_id=preflight result=gated phase=preflight assertion=dependency_check missing_dependency=${missing} skip_scope=\"${scope}\""
}
need_cmd_or_fail() {
local cmd="$1"
if ! command -v "$cmd" >/dev/null 2>&1; then
echo "[ERR] 未找到命令:$cmd" >&2
oc_failed "missing_cmd" "missing_cmd=${cmd}"
exit 2
fi
}
need_cmd_or_fail ansible-playbook
need_cmd_or_fail ansible
warn_teardown_mode
record_teardown_state
[[ -f "$inv" ]] || { echo "[ERR] inventory 不存在:$inv" >&2; oc_failed "missing_inventory" "inventory=${inv}"; exit 2; }
ansible_lab_check_inventory_keys "$inv" || { oc_failed "inventory_keys" "inventory=${inv}"; exit 2; }
echo "[INFO] 变量边界inventory=$inv | group_vars=ansible/group_vars/all.yml | env=ansible/env/.env.verify"
echo "[INFO] 关键变量VERIFY_TEARDOWN=${VERIFY_TEARDOWN:-1} nginx_entry_base=${nginx_entry_base:-<unset>} nodejs_entry_base=${nodejs_entry_base:-<unset>}"
echo "[RUN] ansible k3s_server -m ping"
if ! ansible k3s_server -i "$inv" -m ping; then
echo "[ERR] ansible ping 失败k3s_server 不可达" >&2
oc_failed "ansible_ping" "target_group=k3s_server"
exit 2
fi
# Optional cluster-side check (may still fail-fast: control-side hard failure).
if [[ "${VERIFY_PREFLIGHT_CLUSTER:-0}" == "1" ]]; then
if ! ansible k3s_server -i "$inv" -b -m ansible.builtin.shell -a \
'KUBECONFIG=/etc/rancher/k3s/k3s.yaml kubectl get nodes'; then
echo "[ERR] kubectl 集群检查失败VERIFY_PREFLIGHT_CLUSTER=1" >&2
oc_failed "kubectl_get_nodes"
exit 2
fi
fi
# External dependencies: missing deps should not fail preflight (EC2) but must be explicit gated.
# We gate only the dependent scopes; runtime verify can still proceed for non-dependent doc_ids.
local gated=0
local missing_list=()
local scope_list=()
if [[ -z "${ACME_EMAIL:-}" ]]; then
gated=1; missing_list+=("acme"); scope_list+=("acme/tls issuance")
fi
# Epic 4Traefik ACME DNS-01 仅需 CF_API_TOKEN见 03-02 ensure secretZONE_* 不由 preflight 强门禁。
if [[ -z "${CF_API_TOKEN:-}" ]]; then
gated=1; missing_list+=("cloudflare"); scope_list+=("cloudflare api token / acme dns01")
fi
if [[ -z "${NFS_SERVER_IP:-}" || -z "${NFS_EXPORT_PATH:-}" ]]; then
gated=1; missing_list+=("nfs"); scope_list+=("nfs pv/pvc")
fi
if [[ -z "${WORKSTATION_SSH:-}" ]]; then
gated=1; missing_list+=("third_party_probe"); scope_list+=("third-party probe (WORKSTATION_SSH e.g. jack@ylc65)")
fi
if [[ "$gated" == "1" ]]; then
# Join arrays into readable strings.
local missing joined_scope
missing="$(IFS=,; echo "${missing_list[*]}")"
joined_scope="$(IFS='; '; echo "${scope_list[*]}")"
echo "[GATE] preflight external deps missing: ${missing} (scopes: ${joined_scope})"
oc_gated "${missing}" "${joined_scope}"
echo "[OK] preflight 通过带门控gated"
return 0
fi
echo "[OC] doc_id=preflight result=verified phase=preflight assertion=connectivity"
echo "[OK] preflight 通过"
}
run_all_verify() {
local series="${1:-}"
local id_regex="${2:-}"
local exclude_noop="${3:-0}"
local require_teardown="${4:-0}"
local id
while IFS= read -r id; do
echo ""
echo "########################################## $id"
ansible_verify "$id"
done < <(list_doc_ids_from_verify_dir "$series" "$id_regex" "$exclude_noop" "$require_teardown")
}
usage() {
cat <<'EOF'
用法ansible/bin/verify.sh <命令> [...]
命令flow | preflight | full | list | run <XX-YY> | run-all
筛选参数:--series <XX> | --id-regex <regex> | --exclude-noop | --require-teardown
EOF
}
print_flow() {
cat <<EOF
1 接入目标环境 inventory + 仓库同步;可选 source ansible/env/.env.verify
2 环境与前置清理 轻量:各 verify playbook 的 teardown
3 部署 ./ansible/bin/deploy-lab.sh k3s|longhorn|nginx-matrix*
4 断言 ./ansible/bin/verify.sh run <XX-YY> / run-all
EOF
}
ansible_verify() {
local doc_id="$1"
if ! is_exec_doc_id "$doc_id"; then
echo "[ERR] 非执行域 doc_id$doc_id(仅允许 XX>0 且 YY>0" >&2
echo "[OC] doc_id=${doc_id} result=failed phase=verify assertion=invalid_doc_id"
exit 1
fi
local inv="${ANSIBLE_INVENTORY:-${ROOT}/ansible/inventory.ini}"
local pb_single="${ROOT}/ansible/playbooks/verify/${doc_id}.yml"
[[ -f "$pb_single" ]] || { echo "[ERR] verify playbook 不存在:$pb_single" >&2; echo "[OC] doc_id=${doc_id} result=failed phase=verify assertion=missing_playbook"; exit 1; }
[[ -f "$inv" ]] || { echo "[ERR] inventory 不存在:$inv" >&2; echo "[OC] doc_id=${doc_id} result=failed phase=verify assertion=missing_inventory"; exit 1; }
local td="${VERIFY_TEARDOWN:-1}"
local run_log
run_log="$(mktemp)"
echo "[RUN] ansible-playbook -i $inv -e VERIFY_TEARDOWN=$td $pb_single"
if ansible-playbook -i "$inv" -e "VERIFY_TEARDOWN=$td" "$pb_single" 2>&1 | tee "$run_log"; then
if grep -q '\[GATE\]' "$run_log"; then
echo "[OC] doc_id=${doc_id} result=gated phase=verify assertion=playbook_gated"
else
# OC1: stable parse fields. OC3 evidence points to playbook output sections.
echo "[OC] doc_id=${doc_id} result=verified phase=verify assertion=playbook_success"
echo "[OC-EVIDENCE] doc_id=${doc_id} kind=cluster summary=\"see kubectl/assert output in playbook logs\""
echo "[OC-EVIDENCE] doc_id=${doc_id} kind=entry summary=\"see http/tls/assert output in playbook logs\""
fi
else
echo "[OC] doc_id=${doc_id} result=failed phase=verify assertion=playbook_failed"
rm -f "$run_log"
return 1
fi
rm -f "$run_log"
}
main() {
load_env
local cmd="${1:-}"
shift || true
local series=""
local id_regex=""
local exclude_noop=0
local require_teardown=0
parse_filter_args() {
while [[ $# -gt 0 ]]; do
case "$1" in
--series) series="${2:-}"; shift 2 ;;
--id-regex) id_regex="${2:-}"; shift 2 ;;
--exclude-noop) exclude_noop=1; shift ;;
--require-teardown) require_teardown=1; shift ;;
*) echo "[ERR] 未知参数:$1" >&2; exit 1 ;;
esac
done
}
case "$cmd" in
""|-h|--help) usage ;;
flow) print_flow ;;
preflight) run_preflight ;;
full) parse_filter_args "$@"; run_preflight; run_all_verify "$series" "$id_regex" "$exclude_noop" "$require_teardown" ;;
list) parse_filter_args "$@"; list_doc_ids_from_verify_dir "$series" "$id_regex" "$exclude_noop" "$require_teardown" ;;
run) local doc_id="${1:?need doc_id like 02-05}"; ansible_verify "$doc_id" ;;
run-all) parse_filter_args "$@"; run_all_verify "$series" "$id_regex" "$exclude_noop" "$require_teardown" ;;
*) echo "[ERR] unknown cmd: $cmd" >&2; usage; exit 1 ;;
esac
}
main "$@"