Files
Deploy-Laboratory/ansible/playbooks/longhorn-install.yml
jack 8c43761962 feat: 按 doc_id 重组 ansible/files 与验证框架
- ansible/files 改为与文档 XX-YY 对齐的目录结构,更新相关 playbook 路径
- 新增 scripts/verify.sh 与 ansible/playbooks/verify/*.yml,移除单体 verify-matrix.yml
- 补充 docs/00-02 矩阵状态、00-05 验证框架与流程、00-04 环境与 ylc65 工作机说明
- 增加 k3s 存储准备、Longhorn、local-path 等 playbook 与辅助脚本

Made-with: Cursor
2026-03-26 07:01:14 +08:00

252 lines
9.7 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
---
# Helm 安装 Longhorn与 docs/03-07 一致)。在控制节点执行,依赖 KUBECONFIG=/etc/rancher/k3s/k3s.yaml
# 变量group_vars/all.yml 中 longhorn_chart_version、longhorn_install_node_packages、longhorn_apply_local_path_lab
- name: Longhorn node packages (iSCSI, NFS client)
hosts: k3s_nodes
become: true
tasks:
- name: Install Longhorn OS dependencies
when: longhorn_install_node_packages | default(true) | bool
block:
- name: Install iscsi + nfs (dnf/yum)
ansible.builtin.package:
name:
- iscsi-initiator-utils
- nfs-utils
state: present
- name: Enable iscsid
ansible.builtin.systemd:
name: iscsid
enabled: true
state: started
- name: Ensure Longhorn data subdirectory exists on all nodes
ansible.builtin.file:
path: "{{ k3s_data_dir }}/longhorn"
state: directory
mode: "0700"
- name: Pre-pull Longhorn images on all nodes (optional, avoid DockerHub EOF/ImagePullBackOff)
when: longhorn_prepull_images | default(true) | bool
ansible.builtin.shell: |
set -e
CTR="ctr --address /run/k3s/containerd/containerd.sock -n k8s.io"
imgs=(
"docker.io/longhornio/longhorn-manager:v{{ longhorn_chart_version }}"
"docker.io/longhornio/longhorn-ui:v{{ longhorn_chart_version }}"
"docker.io/longhornio/longhorn-share-manager:v{{ longhorn_chart_version }}"
"docker.io/longhornio/longhorn-engine:v{{ longhorn_chart_version }}"
"docker.io/longhornio/longhorn-instance-manager:v{{ longhorn_chart_version }}"
"docker.io/longhornio/backing-image-manager:v{{ longhorn_chart_version }}"
"docker.io/longhornio/support-bundle-kit:v0.0.45"
)
for img in "${imgs[@]}"; do
ok=0
for i in 1 2 3 4 5; do
echo "[pull] $img (try $i/5)"
if $CTR images pull "$img"; then
ok=1
break
fi
sleep $((i * 3))
done
if [ "$ok" -ne 1 ]; then
echo "[ERR] failed pulling $img after retries"
exit 1
fi
done
args:
executable: /bin/bash
changed_when: true
- name: Install Longhorn with Helm on first server
hosts: k3s_server
become: true
run_once: true
vars:
longhorn_values_src: "{{ playbook_dir }}/../files/03-07-longhorn/values-lab.yaml"
longhorn_values_dest: /root/longhorn-values-lab.yaml
k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml
tasks:
- name: Install helm package (Fedora/RHEL family)
ansible.builtin.package:
name: helm
state: present
ignore_errors: true
register: helm_pkg
- name: Hint if helm package install failed (install Helm 3 manually if needed)
ansible.builtin.debug:
msg: "dnf/yum 未装上 helm 时,请见 https://helm.sh/docs/intro/install/"
when: helm_pkg.failed | default(false)
- name: Fail if helm binary still unavailable
ansible.builtin.command: which helm
register: helm_which
changed_when: false
failed_when: helm_which.rc != 0
- name: Copy lab values to server
ansible.builtin.copy:
src: "{{ longhorn_values_src }}"
dest: "{{ longhorn_values_dest }}"
mode: "0600"
- name: Ensure longhorn-system namespace is not stuck Terminating (force finalize if needed)
ansible.builtin.shell: |
set -e
export KUBECONFIG={{ k3s_kubeconfig }}
ns="longhorn-system"
phase="$(kubectl get ns "$ns" -o jsonpath='{.status.phase}' 2>/dev/null || true)"
if [ "$phase" = "Terminating" ]; then
echo "[WARN] namespace $ns is Terminating; force finalize to unblock install"
kubectl get ns "$ns" -o json > /tmp/ns.json
python3 -c "import json; obj=json.load(open('/tmp/ns.json')); obj.setdefault('spec',{}); obj['spec']['finalizers']=[]; json.dump(obj, open('/tmp/ns-finalize.json','w'))"
kubectl replace --raw "/api/v1/namespaces/$ns/finalize" -f /tmp/ns-finalize.json >/dev/null
fi
args:
executable: /bin/bash
changed_when: true
failed_when: false
- name: Ensure longhorn Helm repo
ansible.builtin.shell: |
set -e
if ! helm repo list 2>/dev/null | grep -q '^longhorn'; then
helm repo add longhorn https://charts.longhorn.io
fi
helm repo update
environment:
KUBECONFIG: "{{ k3s_kubeconfig }}"
args:
executable: /bin/bash
changed_when: true
- name: Delete leftover longhorn PriorityClass (cluster-scoped) to avoid Helm ownership conflicts
ansible.builtin.shell: |
set -e
KUBECONFIG={{ k3s_kubeconfig }} kubectl delete priorityclass longhorn-critical --ignore-not-found=true
args:
executable: /bin/bash
changed_when: true
failed_when: false
- name: Delete leftover Longhorn CRDs (cluster-scoped) to avoid Helm ownership conflicts
ansible.builtin.shell: |
set -e
export KUBECONFIG={{ k3s_kubeconfig }}
crd_list="$(kubectl get crd -o name 2>/dev/null | grep 'longhorn.io' || true)"
if [ -n "$crd_list" ]; then
echo "$crd_list" | while read -r crd; do
[ -z "$crd" ] && continue
timeout 20s kubectl delete "$crd" --ignore-not-found=true || true
done
fi
args:
executable: /bin/bash
changed_when: true
failed_when: false
- name: Delete leftover Longhorn ClusterRole/ClusterRoleBinding (cluster-scoped)
ansible.builtin.shell: |
set -e
export KUBECONFIG={{ k3s_kubeconfig }}
role_list="$(kubectl get clusterrole -o name 2>/dev/null | grep 'longhorn' || true)"
if [ -n "$role_list" ]; then
echo "$role_list" | while read -r role; do
[ -z "$role" ] && continue
timeout 20s kubectl delete "$role" --ignore-not-found=true || true
done
fi
binding_list="$(kubectl get clusterrolebinding -o name 2>/dev/null | grep 'longhorn' || true)"
if [ -n "$binding_list" ]; then
echo "$binding_list" | while read -r binding; do
[ -z "$binding" ] && continue
timeout 20s kubectl delete "$binding" --ignore-not-found=true || true
done
fi
args:
executable: /bin/bash
changed_when: true
failed_when: false
- name: Cleanup leftover Helm release records for Longhorn (default + longhorn-system)
ansible.builtin.shell: |
set -e
export KUBECONFIG={{ k3s_kubeconfig }}
# 有些失败/中断的安装会把 release secret 留在 default 或 longhorn-system导致后续
# - "cannot re-use a name that is still in use"
# - cluster-scoped 资源的 meta.helm.sh/release-namespace 注解冲突
for ns in longhorn-system default; do
if helm -n "$ns" list --all 2>/dev/null | grep -q '^longhorn'; then
# uninstall 可能卡住(例如 uninstall job / hook避免阻塞整个自动化流程
timeout 120s helm -n "$ns" uninstall longhorn --no-hooks || true
fi
sec_list="$(kubectl -n "$ns" get secret -o name 2>/dev/null | grep '^secret/sh\\.helm\\.release\\.v1\\.longhorn\\.' || true)"
if [ -n "$sec_list" ]; then
echo "$sec_list" | xargs -n1 kubectl -n "$ns" delete --ignore-not-found=true
fi
done
environment:
KUBECONFIG: "{{ k3s_kubeconfig }}"
args:
executable: /bin/bash
changed_when: true
failed_when: false
- name: Helm upgrade/install Longhorn失败兜底install --replace
ansible.builtin.shell: |
set -e
helm upgrade --install longhorn longhorn/longhorn --namespace longhorn-system --create-namespace -f {{ longhorn_values_dest }} --version {{ longhorn_chart_version }} --wait --timeout 15m || helm install --replace longhorn longhorn/longhorn --namespace longhorn-system --create-namespace -f {{ longhorn_values_dest }} --version {{ longhorn_chart_version }} --wait --timeout 15m
environment:
KUBECONFIG: "{{ k3s_kubeconfig }}"
args:
executable: /bin/bash
register: helm_longhorn
changed_when: true
- name: Apply local-path-config lab defaults (optional)
hosts: k3s_server
become: true
run_once: true
vars:
k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml
local_path_json_src: "{{ playbook_dir }}/../files/03-05-local-path-config/local-path-config-lab.json"
local_path_json_dest: /root/local-path-config-lab.json
tasks:
- name: Apply local-path-config lab defaults (optional)
when: longhorn_apply_local_path_lab | default(false) | bool
block:
- name: Copy local-path lab json
ansible.builtin.copy:
src: "{{ local_path_json_src }}"
dest: "{{ local_path_json_dest }}"
mode: "0644"
- name: Apply local-path-config ConfigMap
ansible.builtin.shell: |
set -e
KUBECONFIG={{ k3s_kubeconfig }} kubectl -n kube-system create configmap local-path-config \
--from-file=config.json={{ local_path_json_dest }} \
--dry-run=client -o yaml | KUBECONFIG={{ k3s_kubeconfig }} kubectl apply -f -
args:
executable: /bin/bash
changed_when: true
- name: Restart local-path-provisioner if present
ansible.builtin.shell: |
KUBECONFIG={{ k3s_kubeconfig }} kubectl -n kube-system rollout restart deploy/local-path-provisioner
args:
executable: /bin/bash
register: lp_restart
failed_when: false
changed_when: lp_restart.rc == 0