- 新增 deploy-lab.sh(k3s/longhorn/nginx 铺栈)与 ssh/run-phase2-k3s-on-ylc61-as-jack.sh - verify.sh:flow/preflight、VERIFY_TEARDOWN 默认、注释与 §2 对应 - 更新 smoke-verify、README、.env.verify.example、根 README 与主要 playbook 头注释 - k3s-delete-lab-stacks 标明重度清场语义 Made-with: Cursor
253 lines
9.8 KiB
YAML
253 lines
9.8 KiB
YAML
---
|
||
# 部署:docs/00-05 §2 步骤 3——Helm 铺栈;验收见 scripts/verify.sh run 03-07。
|
||
# Helm 安装 Longhorn(与 docs/03-07 一致)。在控制节点执行,依赖 KUBECONFIG=/etc/rancher/k3s/k3s.yaml
|
||
# 变量:group_vars/all.yml 中 longhorn_chart_version、longhorn_install_node_packages、longhorn_apply_local_path_lab
|
||
|
||
- name: Longhorn node packages (iSCSI, NFS client)
|
||
hosts: k3s_nodes
|
||
become: true
|
||
tasks:
|
||
- name: Install Longhorn OS dependencies
|
||
when: longhorn_install_node_packages | default(true) | bool
|
||
block:
|
||
- name: Install iscsi + nfs (dnf/yum)
|
||
ansible.builtin.package:
|
||
name:
|
||
- iscsi-initiator-utils
|
||
- nfs-utils
|
||
state: present
|
||
|
||
- name: Enable iscsid
|
||
ansible.builtin.systemd:
|
||
name: iscsid
|
||
enabled: true
|
||
state: started
|
||
|
||
- name: Ensure Longhorn data subdirectory exists on all nodes
|
||
ansible.builtin.file:
|
||
path: "{{ k3s_data_dir }}/longhorn"
|
||
state: directory
|
||
mode: "0700"
|
||
|
||
- name: Pre-pull Longhorn images on all nodes (optional, avoid DockerHub EOF/ImagePullBackOff)
|
||
when: longhorn_prepull_images | default(true) | bool
|
||
ansible.builtin.shell: |
|
||
set -e
|
||
CTR="ctr --address /run/k3s/containerd/containerd.sock -n k8s.io"
|
||
|
||
imgs=(
|
||
"docker.io/longhornio/longhorn-manager:v{{ longhorn_chart_version }}"
|
||
"docker.io/longhornio/longhorn-ui:v{{ longhorn_chart_version }}"
|
||
"docker.io/longhornio/longhorn-share-manager:v{{ longhorn_chart_version }}"
|
||
"docker.io/longhornio/longhorn-engine:v{{ longhorn_chart_version }}"
|
||
"docker.io/longhornio/longhorn-instance-manager:v{{ longhorn_chart_version }}"
|
||
"docker.io/longhornio/backing-image-manager:v{{ longhorn_chart_version }}"
|
||
"docker.io/longhornio/support-bundle-kit:v0.0.45"
|
||
)
|
||
|
||
for img in "${imgs[@]}"; do
|
||
ok=0
|
||
for i in 1 2 3 4 5; do
|
||
echo "[pull] $img (try $i/5)"
|
||
if $CTR images pull "$img"; then
|
||
ok=1
|
||
break
|
||
fi
|
||
sleep $((i * 3))
|
||
done
|
||
if [ "$ok" -ne 1 ]; then
|
||
echo "[ERR] failed pulling $img after retries"
|
||
exit 1
|
||
fi
|
||
done
|
||
args:
|
||
executable: /bin/bash
|
||
changed_when: true
|
||
|
||
- name: Install Longhorn with Helm on first server
|
||
hosts: k3s_server
|
||
become: true
|
||
run_once: true
|
||
vars:
|
||
longhorn_values_src: "{{ playbook_dir }}/../files/03-07-longhorn/values-lab.yaml"
|
||
longhorn_values_dest: /root/longhorn-values-lab.yaml
|
||
k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml
|
||
tasks:
|
||
- name: Install helm package (Fedora/RHEL family)
|
||
ansible.builtin.package:
|
||
name: helm
|
||
state: present
|
||
ignore_errors: true
|
||
register: helm_pkg
|
||
|
||
- name: Hint if helm package install failed (install Helm 3 manually if needed)
|
||
ansible.builtin.debug:
|
||
msg: "dnf/yum 未装上 helm 时,请见 https://helm.sh/docs/intro/install/"
|
||
when: helm_pkg.failed | default(false)
|
||
|
||
- name: Fail if helm binary still unavailable
|
||
ansible.builtin.command: which helm
|
||
register: helm_which
|
||
changed_when: false
|
||
failed_when: helm_which.rc != 0
|
||
|
||
- name: Copy lab values to server
|
||
ansible.builtin.copy:
|
||
src: "{{ longhorn_values_src }}"
|
||
dest: "{{ longhorn_values_dest }}"
|
||
mode: "0600"
|
||
|
||
- name: Ensure longhorn-system namespace is not stuck Terminating (force finalize if needed)
|
||
ansible.builtin.shell: |
|
||
set -e
|
||
export KUBECONFIG={{ k3s_kubeconfig }}
|
||
ns="longhorn-system"
|
||
phase="$(kubectl get ns "$ns" -o jsonpath='{.status.phase}' 2>/dev/null || true)"
|
||
if [ "$phase" = "Terminating" ]; then
|
||
echo "[WARN] namespace $ns is Terminating; force finalize to unblock install"
|
||
kubectl get ns "$ns" -o json > /tmp/ns.json
|
||
python3 -c "import json; obj=json.load(open('/tmp/ns.json')); obj.setdefault('spec',{}); obj['spec']['finalizers']=[]; json.dump(obj, open('/tmp/ns-finalize.json','w'))"
|
||
kubectl replace --raw "/api/v1/namespaces/$ns/finalize" -f /tmp/ns-finalize.json >/dev/null
|
||
fi
|
||
args:
|
||
executable: /bin/bash
|
||
changed_when: true
|
||
failed_when: false
|
||
|
||
- name: Ensure longhorn Helm repo
|
||
ansible.builtin.shell: |
|
||
set -e
|
||
if ! helm repo list 2>/dev/null | grep -q '^longhorn'; then
|
||
helm repo add longhorn https://charts.longhorn.io
|
||
fi
|
||
helm repo update
|
||
environment:
|
||
KUBECONFIG: "{{ k3s_kubeconfig }}"
|
||
args:
|
||
executable: /bin/bash
|
||
changed_when: true
|
||
|
||
- name: Delete leftover longhorn PriorityClass (cluster-scoped) to avoid Helm ownership conflicts
|
||
ansible.builtin.shell: |
|
||
set -e
|
||
KUBECONFIG={{ k3s_kubeconfig }} kubectl delete priorityclass longhorn-critical --ignore-not-found=true
|
||
args:
|
||
executable: /bin/bash
|
||
changed_when: true
|
||
failed_when: false
|
||
|
||
- name: Delete leftover Longhorn CRDs (cluster-scoped) to avoid Helm ownership conflicts
|
||
ansible.builtin.shell: |
|
||
set -e
|
||
export KUBECONFIG={{ k3s_kubeconfig }}
|
||
crd_list="$(kubectl get crd -o name 2>/dev/null | grep 'longhorn.io' || true)"
|
||
if [ -n "$crd_list" ]; then
|
||
echo "$crd_list" | while read -r crd; do
|
||
[ -z "$crd" ] && continue
|
||
timeout 20s kubectl delete "$crd" --ignore-not-found=true || true
|
||
done
|
||
fi
|
||
args:
|
||
executable: /bin/bash
|
||
changed_when: true
|
||
failed_when: false
|
||
|
||
- name: Delete leftover Longhorn ClusterRole/ClusterRoleBinding (cluster-scoped)
|
||
ansible.builtin.shell: |
|
||
set -e
|
||
export KUBECONFIG={{ k3s_kubeconfig }}
|
||
|
||
role_list="$(kubectl get clusterrole -o name 2>/dev/null | grep 'longhorn' || true)"
|
||
if [ -n "$role_list" ]; then
|
||
echo "$role_list" | while read -r role; do
|
||
[ -z "$role" ] && continue
|
||
timeout 20s kubectl delete "$role" --ignore-not-found=true || true
|
||
done
|
||
fi
|
||
|
||
binding_list="$(kubectl get clusterrolebinding -o name 2>/dev/null | grep 'longhorn' || true)"
|
||
if [ -n "$binding_list" ]; then
|
||
echo "$binding_list" | while read -r binding; do
|
||
[ -z "$binding" ] && continue
|
||
timeout 20s kubectl delete "$binding" --ignore-not-found=true || true
|
||
done
|
||
fi
|
||
args:
|
||
executable: /bin/bash
|
||
changed_when: true
|
||
failed_when: false
|
||
|
||
- name: Cleanup leftover Helm release records for Longhorn (default + longhorn-system)
|
||
ansible.builtin.shell: |
|
||
set -e
|
||
export KUBECONFIG={{ k3s_kubeconfig }}
|
||
|
||
# 有些失败/中断的安装会把 release secret 留在 default 或 longhorn-system,导致后续:
|
||
# - "cannot re-use a name that is still in use"
|
||
# - cluster-scoped 资源的 meta.helm.sh/release-namespace 注解冲突
|
||
for ns in longhorn-system default; do
|
||
if helm -n "$ns" list --all 2>/dev/null | grep -q '^longhorn'; then
|
||
# uninstall 可能卡住(例如 uninstall job / hook),避免阻塞整个自动化流程
|
||
timeout 120s helm -n "$ns" uninstall longhorn --no-hooks || true
|
||
fi
|
||
|
||
sec_list="$(kubectl -n "$ns" get secret -o name 2>/dev/null | grep '^secret/sh\\.helm\\.release\\.v1\\.longhorn\\.' || true)"
|
||
if [ -n "$sec_list" ]; then
|
||
echo "$sec_list" | xargs -n1 kubectl -n "$ns" delete --ignore-not-found=true
|
||
fi
|
||
done
|
||
environment:
|
||
KUBECONFIG: "{{ k3s_kubeconfig }}"
|
||
args:
|
||
executable: /bin/bash
|
||
changed_when: true
|
||
failed_when: false
|
||
|
||
- name: Helm upgrade/install Longhorn(失败兜底:install --replace)
|
||
ansible.builtin.shell: |
|
||
set -e
|
||
helm upgrade --install longhorn longhorn/longhorn --namespace longhorn-system --create-namespace -f {{ longhorn_values_dest }} --version {{ longhorn_chart_version }} --wait --timeout 15m || helm install --replace longhorn longhorn/longhorn --namespace longhorn-system --create-namespace -f {{ longhorn_values_dest }} --version {{ longhorn_chart_version }} --wait --timeout 15m
|
||
environment:
|
||
KUBECONFIG: "{{ k3s_kubeconfig }}"
|
||
args:
|
||
executable: /bin/bash
|
||
register: helm_longhorn
|
||
changed_when: true
|
||
|
||
- name: Apply local-path-config lab defaults (optional)
|
||
hosts: k3s_server
|
||
become: true
|
||
run_once: true
|
||
vars:
|
||
k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml
|
||
local_path_json_src: "{{ playbook_dir }}/../files/03-05-local-path-config/local-path-config-lab.json"
|
||
local_path_json_dest: /root/local-path-config-lab.json
|
||
tasks:
|
||
- name: Apply local-path-config lab defaults (optional)
|
||
when: longhorn_apply_local_path_lab | default(false) | bool
|
||
block:
|
||
- name: Copy local-path lab json
|
||
ansible.builtin.copy:
|
||
src: "{{ local_path_json_src }}"
|
||
dest: "{{ local_path_json_dest }}"
|
||
mode: "0644"
|
||
|
||
- name: Apply local-path-config ConfigMap
|
||
ansible.builtin.shell: |
|
||
set -e
|
||
KUBECONFIG={{ k3s_kubeconfig }} kubectl -n kube-system create configmap local-path-config \
|
||
--from-file=config.json={{ local_path_json_dest }} \
|
||
--dry-run=client -o yaml | KUBECONFIG={{ k3s_kubeconfig }} kubectl apply -f -
|
||
args:
|
||
executable: /bin/bash
|
||
changed_when: true
|
||
|
||
- name: Restart local-path-provisioner if present
|
||
ansible.builtin.shell: |
|
||
KUBECONFIG={{ k3s_kubeconfig }} kubectl -n kube-system rollout restart deploy/local-path-provisioner
|
||
args:
|
||
executable: /bin/bash
|
||
register: lp_restart
|
||
failed_when: false
|
||
changed_when: lp_restart.rc == 0
|