Files
Deploy-Laboratory/ansible/playbooks/verify/03-07.yml
2026-03-27 16:58:41 +08:00

286 lines
10 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
---
- name: Longhorn node packages (iSCSI, NFS client)
hosts: k3s_nodes
become: true
tasks:
- name: Install Longhorn OS dependencies
when: longhorn_install_node_packages | default(true) | bool
block:
- name: Install iscsi + nfs (dnf/yum)
ansible.builtin.package:
name:
- iscsi-initiator-utils
- nfs-utils
state: present
- name: Enable iscsid
ansible.builtin.systemd:
name: iscsid
enabled: true
state: started
- name: Ensure Longhorn data subdirectory exists on all nodes
ansible.builtin.file:
path: "{{ k3s_data_dir }}/longhorn"
state: directory
mode: "0700"
- name: Pre-pull Longhorn images on all nodes (optional, avoid DockerHub EOF/ImagePullBackOff)
when: longhorn_prepull_images | default(true) | bool
ansible.builtin.shell: |
set -e
CTR="ctr --address /run/k3s/containerd/containerd.sock -n k8s.io"
imgs=(
"docker.io/longhornio/longhorn-manager:v{{ longhorn_chart_version }}"
"docker.io/longhornio/longhorn-ui:v{{ longhorn_chart_version }}"
"docker.io/longhornio/longhorn-share-manager:v{{ longhorn_chart_version }}"
"docker.io/longhornio/longhorn-engine:v{{ longhorn_chart_version }}"
"docker.io/longhornio/longhorn-instance-manager:v{{ longhorn_chart_version }}"
"docker.io/longhornio/backing-image-manager:v{{ longhorn_chart_version }}"
"docker.io/longhornio/support-bundle-kit:v0.0.45"
)
for img in "${imgs[@]}"; do
ok=0
for i in 1 2 3 4 5; do
echo "[pull] $img (try $i/5)"
if $CTR images pull "$img"; then
ok=1
break
fi
sleep $((i * 3))
done
if [ "$ok" -ne 1 ]; then
echo "[ERR] failed pulling $img after retries"
exit 1
fi
done
args:
executable: /bin/bash
changed_when: true
- name: Install Longhorn with Helm on first server
hosts: k3s_server
become: true
run_once: true
vars:
longhorn_values_src: "{{ playbook_dir }}/../../files/03-07/values-lab.yaml"
longhorn_values_dest: /root/longhorn-values-lab.yaml
k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml
tasks:
- name: Install helm package (Fedora/RHEL family)
ansible.builtin.package:
name: helm
state: present
ignore_errors: true
register: helm_pkg
- name: Hint if helm package install failed (install Helm 3 manually if needed)
ansible.builtin.debug:
msg: "dnf/yum 未装上 helm 时,请见 https://helm.sh/docs/intro/install/"
when: helm_pkg.failed | default(false)
- name: Fail if helm binary still unavailable
ansible.builtin.command: which helm
register: helm_which
changed_when: false
failed_when: helm_which.rc != 0
- name: Copy lab values to server
ansible.builtin.copy:
src: "{{ longhorn_values_src }}"
dest: "{{ longhorn_values_dest }}"
mode: "0600"
- name: Ensure longhorn-system namespace is not stuck Terminating (force finalize if needed)
ansible.builtin.shell: |
set -e
export KUBECONFIG={{ k3s_kubeconfig }}
ns="longhorn-system"
phase="$(kubectl get ns "$ns" -o jsonpath='{.status.phase}' 2>/dev/null || true)"
if [ "$phase" = "Terminating" ]; then
echo "[WARN] namespace $ns is Terminating; force finalize to unblock install"
kubectl get ns "$ns" -o json > /tmp/ns.json
python3 -c "import json; obj=json.load(open('/tmp/ns.json')); obj.setdefault('spec',{}); obj['spec']['finalizers']=[]; json.dump(obj, open('/tmp/ns-finalize.json','w'))"
kubectl replace --raw \"/api/v1/namespaces/$ns/finalize\" -f /tmp/ns-finalize.json >/dev/null
fi
args:
executable: /bin/bash
changed_when: true
failed_when: false
- name: Ensure longhorn Helm repo
ansible.builtin.shell: |
set -e
if ! helm repo list 2>/dev/null | grep -q '^longhorn'; then
helm repo add longhorn https://charts.longhorn.io
fi
helm repo update
environment:
KUBECONFIG: "{{ k3s_kubeconfig }}"
args:
executable: /bin/bash
changed_when: true
- name: Delete leftover longhorn PriorityClass (cluster-scoped) to avoid Helm ownership conflicts
ansible.builtin.shell: |
set -e
KUBECONFIG={{ k3s_kubeconfig }} kubectl delete priorityclass longhorn-critical --ignore-not-found=true
args:
executable: /bin/bash
changed_when: true
failed_when: false
- name: Delete leftover Longhorn CRDs (cluster-scoped, opt-in)
when: longhorn_force_crd_reset | default(false) | bool
ansible.builtin.shell: |
set -e
export KUBECONFIG={{ k3s_kubeconfig }}
crd_list=\"$(kubectl get crd -o name 2>/dev/null | grep 'longhorn.io' || true)\"
if [ -n \"$crd_list\" ]; then
echo \"$crd_list\" | while read -r crd; do
[ -z \"$crd\" ] && continue
timeout 20s kubectl delete \"$crd\" --ignore-not-found=true || true
done
fi
args:
executable: /bin/bash
changed_when: true
failed_when: false
- name: Delete leftover Longhorn ClusterRole/ClusterRoleBinding (cluster-scoped)
ansible.builtin.shell: |
set -e
export KUBECONFIG={{ k3s_kubeconfig }}
role_list=\"$(kubectl get clusterrole -o name 2>/dev/null | grep 'longhorn' || true)\"
if [ -n \"$role_list\" ]; then
echo \"$role_list\" | while read -r role; do
[ -z \"$role\" ] && continue
timeout 20s kubectl delete \"$role\" --ignore-not-found=true || true
done
fi
binding_list=\"$(kubectl get clusterrolebinding -o name 2>/dev/null | grep 'longhorn' || true)\"
if [ -n \"$binding_list\" ]; then
echo \"$binding_list\" | while read -r binding; do
[ -z \"$binding\" ] && continue
timeout 20s kubectl delete \"$binding\" --ignore-not-found=true || true
done
fi
args:
executable: /bin/bash
changed_when: true
failed_when: false
- name: Cleanup leftover Helm release records for Longhorn (default + longhorn-system)
ansible.builtin.shell: |
set -e
export KUBECONFIG={{ k3s_kubeconfig }}
for ns in longhorn-system default; do
if helm -n \"$ns\" list --all 2>/dev/null | grep -q '^longhorn'; then
timeout 120s helm -n \"$ns\" uninstall longhorn --no-hooks || true
fi
sec_list=\"$(kubectl -n \"$ns\" get secret -o name 2>/dev/null | grep '^secret/sh\\.helm\\.release\\.v1\\.longhorn\\.' || true)\"
if [ -n \"$sec_list\" ]; then
echo \"$sec_list\" | xargs -n1 kubectl -n \"$ns\" delete --ignore-not-found=true
fi
done
environment:
KUBECONFIG: "{{ k3s_kubeconfig }}"
args:
executable: /bin/bash
changed_when: true
failed_when: false
- name: Helm upgrade/install Longhorn失败兜底install --replace
ansible.builtin.shell: |
set -e
helm upgrade --install longhorn longhorn/longhorn --namespace longhorn-system --create-namespace -f {{ longhorn_values_dest }} --version {{ longhorn_chart_version }} --wait --timeout 15m || helm install --replace longhorn longhorn/longhorn --namespace longhorn-system --create-namespace -f {{ longhorn_values_dest }} --version {{ longhorn_chart_version }} --wait --timeout 15m
environment:
KUBECONFIG: "{{ k3s_kubeconfig }}"
args:
executable: /bin/bash
register: helm_longhorn
changed_when: true
- name: Apply local-path-config lab defaults (optional)
hosts: k3s_server
become: true
run_once: true
vars:
k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml
local_path_json_src: "{{ playbook_dir }}/../../files/03-05-local-path-config/local-path-config-lab.json"
local_path_json_dest: /root/local-path-config-lab.json
tasks:
- name: Apply local-path-config lab defaults (optional)
when: longhorn_apply_local_path_lab | default(false) | bool
block:
- name: Copy local-path lab json
ansible.builtin.copy:
src: "{{ local_path_json_src }}"
dest: "{{ local_path_json_dest }}"
mode: "0644"
- name: Apply local-path-config ConfigMap
ansible.builtin.shell: |
set -e
KUBECONFIG={{ k3s_kubeconfig }} kubectl -n kube-system create configmap local-path-config \
--from-file=config.json={{ local_path_json_dest }} \
--dry-run=client -o yaml | KUBECONFIG={{ k3s_kubeconfig }} kubectl apply -f -
args:
executable: /bin/bash
changed_when: true
- name: Restart local-path-provisioner if present
ansible.builtin.shell: |
KUBECONFIG={{ k3s_kubeconfig }} kubectl -n kube-system rollout restart deploy/local-path-provisioner
args:
executable: /bin/bash
register: lp_restart
failed_when: false
changed_when: lp_restart.rc == 0
- name: Verify 03-07 Longhorn (namespace pods)
hosts: k3s_server
become: true
run_once: true
vars:
k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml
longhorn_ns: "{{ longhorn_namespace | default('longhorn-system') }}"
tasks:
- name: Check longhorn pods
ansible.builtin.shell: |
set -e
KUBECONFIG={{ k3s_kubeconfig }} kubectl get pods -n {{ longhorn_ns }} -o wide
args:
executable: /bin/bash
changed_when: false
- name: Teardown 03-07 Longhorn (optional)
hosts: k3s_server
become: true
run_once: true
vars:
k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml
verify_teardown: "{{ (VERIFY_TEARDOWN | default('1')) | string }}"
longhorn_ns: "{{ longhorn_namespace | default('longhorn-system') }}"
tasks:
- name: Uninstall longhorn helm release when VERIFY_TEARDOWN=1
when: verify_teardown == "1"
ansible.builtin.shell: |
set -e
export KUBECONFIG={{ k3s_kubeconfig }}
if helm -n {{ longhorn_ns }} list 2>/dev/null | grep -q longhorn; then
timeout 180s helm -n {{ longhorn_ns }} uninstall longhorn --no-hooks || true
fi
kubectl delete ns {{ longhorn_ns }} --ignore-not-found=true --wait=false || true
args:
executable: /bin/bash
changed_when: true
failed_when: false