--- # 部署:docs/00-05 §2 步骤 3——Helm 铺栈;验收见 scripts/verify.sh run 03-07。 # Helm 安装 Longhorn(与 docs/03-07 一致)。在控制节点执行,依赖 KUBECONFIG=/etc/rancher/k3s/k3s.yaml # 变量:group_vars/all.yml 中 longhorn_chart_version、longhorn_install_node_packages、longhorn_apply_local_path_lab - name: Longhorn node packages (iSCSI, NFS client) hosts: k3s_nodes become: true tasks: - name: Install Longhorn OS dependencies when: longhorn_install_node_packages | default(true) | bool block: - name: Install iscsi + nfs (dnf/yum) ansible.builtin.package: name: - iscsi-initiator-utils - nfs-utils state: present - name: Enable iscsid ansible.builtin.systemd: name: iscsid enabled: true state: started - name: Ensure Longhorn data subdirectory exists on all nodes ansible.builtin.file: path: "{{ k3s_data_dir }}/longhorn" state: directory mode: "0700" - name: Pre-pull Longhorn images on all nodes (optional, avoid DockerHub EOF/ImagePullBackOff) when: longhorn_prepull_images | default(true) | bool ansible.builtin.shell: | set -e CTR="ctr --address /run/k3s/containerd/containerd.sock -n k8s.io" imgs=( "docker.io/longhornio/longhorn-manager:v{{ longhorn_chart_version }}" "docker.io/longhornio/longhorn-ui:v{{ longhorn_chart_version }}" "docker.io/longhornio/longhorn-share-manager:v{{ longhorn_chart_version }}" "docker.io/longhornio/longhorn-engine:v{{ longhorn_chart_version }}" "docker.io/longhornio/longhorn-instance-manager:v{{ longhorn_chart_version }}" "docker.io/longhornio/backing-image-manager:v{{ longhorn_chart_version }}" "docker.io/longhornio/support-bundle-kit:v0.0.45" ) for img in "${imgs[@]}"; do ok=0 for i in 1 2 3 4 5; do echo "[pull] $img (try $i/5)" if $CTR images pull "$img"; then ok=1 break fi sleep $((i * 3)) done if [ "$ok" -ne 1 ]; then echo "[ERR] failed pulling $img after retries" exit 1 fi done args: executable: /bin/bash changed_when: true - name: Install Longhorn with Helm on first server hosts: k3s_server become: true run_once: true vars: longhorn_values_src: "{{ playbook_dir }}/../files/03-07-longhorn/values-lab.yaml" longhorn_values_dest: /root/longhorn-values-lab.yaml k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml tasks: - name: Install helm package (Fedora/RHEL family) ansible.builtin.package: name: helm state: present ignore_errors: true register: helm_pkg - name: Hint if helm package install failed (install Helm 3 manually if needed) ansible.builtin.debug: msg: "dnf/yum 未装上 helm 时,请见 https://helm.sh/docs/intro/install/" when: helm_pkg.failed | default(false) - name: Fail if helm binary still unavailable ansible.builtin.command: which helm register: helm_which changed_when: false failed_when: helm_which.rc != 0 - name: Copy lab values to server ansible.builtin.copy: src: "{{ longhorn_values_src }}" dest: "{{ longhorn_values_dest }}" mode: "0600" - name: Ensure longhorn-system namespace is not stuck Terminating (force finalize if needed) ansible.builtin.shell: | set -e export KUBECONFIG={{ k3s_kubeconfig }} ns="longhorn-system" phase="$(kubectl get ns "$ns" -o jsonpath='{.status.phase}' 2>/dev/null || true)" if [ "$phase" = "Terminating" ]; then echo "[WARN] namespace $ns is Terminating; force finalize to unblock install" kubectl get ns "$ns" -o json > /tmp/ns.json python3 -c "import json; obj=json.load(open('/tmp/ns.json')); obj.setdefault('spec',{}); obj['spec']['finalizers']=[]; json.dump(obj, open('/tmp/ns-finalize.json','w'))" kubectl replace --raw "/api/v1/namespaces/$ns/finalize" -f /tmp/ns-finalize.json >/dev/null fi args: executable: /bin/bash changed_when: true failed_when: false - name: Ensure longhorn Helm repo ansible.builtin.shell: | set -e if ! helm repo list 2>/dev/null | grep -q '^longhorn'; then helm repo add longhorn https://charts.longhorn.io fi helm repo update environment: KUBECONFIG: "{{ k3s_kubeconfig }}" args: executable: /bin/bash changed_when: true - name: Delete leftover longhorn PriorityClass (cluster-scoped) to avoid Helm ownership conflicts ansible.builtin.shell: | set -e KUBECONFIG={{ k3s_kubeconfig }} kubectl delete priorityclass longhorn-critical --ignore-not-found=true args: executable: /bin/bash changed_when: true failed_when: false - name: Delete leftover Longhorn CRDs (cluster-scoped) to avoid Helm ownership conflicts ansible.builtin.shell: | set -e export KUBECONFIG={{ k3s_kubeconfig }} crd_list="$(kubectl get crd -o name 2>/dev/null | grep 'longhorn.io' || true)" if [ -n "$crd_list" ]; then echo "$crd_list" | while read -r crd; do [ -z "$crd" ] && continue timeout 20s kubectl delete "$crd" --ignore-not-found=true || true done fi args: executable: /bin/bash changed_when: true failed_when: false - name: Delete leftover Longhorn ClusterRole/ClusterRoleBinding (cluster-scoped) ansible.builtin.shell: | set -e export KUBECONFIG={{ k3s_kubeconfig }} role_list="$(kubectl get clusterrole -o name 2>/dev/null | grep 'longhorn' || true)" if [ -n "$role_list" ]; then echo "$role_list" | while read -r role; do [ -z "$role" ] && continue timeout 20s kubectl delete "$role" --ignore-not-found=true || true done fi binding_list="$(kubectl get clusterrolebinding -o name 2>/dev/null | grep 'longhorn' || true)" if [ -n "$binding_list" ]; then echo "$binding_list" | while read -r binding; do [ -z "$binding" ] && continue timeout 20s kubectl delete "$binding" --ignore-not-found=true || true done fi args: executable: /bin/bash changed_when: true failed_when: false - name: Cleanup leftover Helm release records for Longhorn (default + longhorn-system) ansible.builtin.shell: | set -e export KUBECONFIG={{ k3s_kubeconfig }} # 有些失败/中断的安装会把 release secret 留在 default 或 longhorn-system,导致后续: # - "cannot re-use a name that is still in use" # - cluster-scoped 资源的 meta.helm.sh/release-namespace 注解冲突 for ns in longhorn-system default; do if helm -n "$ns" list --all 2>/dev/null | grep -q '^longhorn'; then # uninstall 可能卡住(例如 uninstall job / hook),避免阻塞整个自动化流程 timeout 120s helm -n "$ns" uninstall longhorn --no-hooks || true fi sec_list="$(kubectl -n "$ns" get secret -o name 2>/dev/null | grep '^secret/sh\\.helm\\.release\\.v1\\.longhorn\\.' || true)" if [ -n "$sec_list" ]; then echo "$sec_list" | xargs -n1 kubectl -n "$ns" delete --ignore-not-found=true fi done environment: KUBECONFIG: "{{ k3s_kubeconfig }}" args: executable: /bin/bash changed_when: true failed_when: false - name: Helm upgrade/install Longhorn(失败兜底:install --replace) ansible.builtin.shell: | set -e helm upgrade --install longhorn longhorn/longhorn --namespace longhorn-system --create-namespace -f {{ longhorn_values_dest }} --version {{ longhorn_chart_version }} --wait --timeout 15m || helm install --replace longhorn longhorn/longhorn --namespace longhorn-system --create-namespace -f {{ longhorn_values_dest }} --version {{ longhorn_chart_version }} --wait --timeout 15m environment: KUBECONFIG: "{{ k3s_kubeconfig }}" args: executable: /bin/bash register: helm_longhorn changed_when: true - name: Apply local-path-config lab defaults (optional) hosts: k3s_server become: true run_once: true vars: k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml local_path_json_src: "{{ playbook_dir }}/../files/03-05-local-path-config/local-path-config-lab.json" local_path_json_dest: /root/local-path-config-lab.json tasks: - name: Apply local-path-config lab defaults (optional) when: longhorn_apply_local_path_lab | default(false) | bool block: - name: Copy local-path lab json ansible.builtin.copy: src: "{{ local_path_json_src }}" dest: "{{ local_path_json_dest }}" mode: "0644" - name: Apply local-path-config ConfigMap ansible.builtin.shell: | set -e KUBECONFIG={{ k3s_kubeconfig }} kubectl -n kube-system create configmap local-path-config \ --from-file=config.json={{ local_path_json_dest }} \ --dry-run=client -o yaml | KUBECONFIG={{ k3s_kubeconfig }} kubectl apply -f - args: executable: /bin/bash changed_when: true - name: Restart local-path-provisioner if present ansible.builtin.shell: | KUBECONFIG={{ k3s_kubeconfig }} kubectl -n kube-system rollout restart deploy/local-path-provisioner args: executable: /bin/bash register: lp_restart failed_when: false changed_when: lp_restart.rc == 0