- 新增 deploy-lab.sh(k3s/longhorn/nginx 铺栈)与 ssh/run-phase2-k3s-on-ylc61-as-jack.sh - verify.sh:flow/preflight、VERIFY_TEARDOWN 默认、注释与 §2 对应 - 更新 smoke-verify、README、.env.verify.example、根 README 与主要 playbook 头注释 - k3s-delete-lab-stacks 标明重度清场语义 Made-with: Cursor
271 lines
11 KiB
YAML
271 lines
11 KiB
YAML
---
|
||
# 部署:docs/00-05 §2 步骤 3「正式安装类」——全集群 K3s + 节点准备(非 verify.sh 单条 teardown)。
|
||
# 前置:§2 步骤 1 接入(inventory/SSH);步骤 2 可选 scripts/deploy-lab.sh 在 K3S_PREPARE_STORAGE=true 时先跑 k3s-prepare-storage.yml。
|
||
# 入口:仓库根 ./scripts/deploy-lab.sh k3s,或 ansible-playbook -i ansible/inventory.ini ansible/playbooks/k3s-init-and-install.yml
|
||
|
||
- name: Verify /storage is a separate mount (optional)
|
||
hosts: k3s_nodes
|
||
become: true
|
||
tasks:
|
||
- name: Check / and /storage mount sources
|
||
when: k3s_verify_storage_mount | default(false) | bool
|
||
block:
|
||
- name: Get mount source for /
|
||
ansible.builtin.command: findmnt -n -o SOURCE /
|
||
register: mnt_root
|
||
changed_when: false
|
||
|
||
- name: Get mount source for /storage
|
||
ansible.builtin.command: findmnt -n -o SOURCE /storage
|
||
register: mnt_storage
|
||
changed_when: false
|
||
failed_when: false
|
||
|
||
- name: Assert /storage is mounted on a different device than /
|
||
ansible.builtin.assert:
|
||
that:
|
||
- mnt_storage.rc == 0
|
||
- (mnt_root.stdout | trim | length) > 0
|
||
- (mnt_storage.stdout | trim | length) > 0
|
||
- (mnt_root.stdout | trim) != (mnt_storage.stdout | trim)
|
||
fail_msg: >-
|
||
/storage must be a mount point on a block device different from /.
|
||
See docs/00-04-部署环境说明.md and docs/01-06-节点初始化-ansible-实践.md
|
||
|
||
- name: Init base system
|
||
hosts: k3s_nodes
|
||
become: true
|
||
tasks:
|
||
# 检查当前节点上 firewalld 的运行状态,供后续条件判断使用
|
||
- name: Check if firewalld is running
|
||
ansible.builtin.command: firewall-cmd --state
|
||
register: firewalld_state
|
||
changed_when: false
|
||
failed_when: false
|
||
|
||
# 根据全局 timezone 变量设置系统时区(可选)
|
||
- name: Set timezone
|
||
ansible.builtin.command: timedatectl set-timezone {{ timezone }}
|
||
when: timezone is defined and timezone != ""
|
||
|
||
# 安装 k3s 所需的基础工具包(curl、git 等)
|
||
- name: Install basic packages
|
||
ansible.builtin.package:
|
||
name:
|
||
- curl
|
||
- git
|
||
state: present
|
||
|
||
# 确保 /etc/hosts 中包含所有 k3s 节点的主机名解析(可选)
|
||
- name: Ensure /etc/hosts has entries for all k3s nodes
|
||
ansible.builtin.lineinfile:
|
||
path: /etc/hosts
|
||
regexp: '^\S+\s+{{ item }}\s*$'
|
||
line: "{{ hostvars[item]['ansible_host'] }} {{ item }}"
|
||
state: present
|
||
loop: "{{ groups['k3s_nodes'] }}"
|
||
when:
|
||
- k3s_manage_hosts | default(true) | bool
|
||
- hostvars[item]['ansible_host'] is defined
|
||
|
||
# k3s 所需端口:8472/udp(flannel VXLAN)全部节点;6443/tcp(API)仅 server
|
||
# 必须在安装 k3s 前开放,否则 worker 无法连接、flannel 无法建立 overlay
|
||
# 在所有 k3s 节点上开放 flannel VXLAN 所需的 8472/udp 端口
|
||
- name: Open flannel VXLAN port (8472/udp) on all k3s nodes
|
||
ansible.builtin.command: firewall-cmd --permanent --add-port=8472/udp
|
||
when:
|
||
- k3s_manage_firewalld | default(true) | bool
|
||
- firewalld_state.stdout | default('') == 'running'
|
||
|
||
# 在 server 节点上开放 k3s API 端口 6443/tcp
|
||
- name: Open k3s API port (6443/tcp) on server
|
||
ansible.builtin.command: firewall-cmd --permanent --add-port=6443/tcp
|
||
when:
|
||
- k3s_manage_firewalld | default(true) | bool
|
||
- inventory_hostname in groups['k3s_server']
|
||
- firewalld_state.stdout | default('') == 'running'
|
||
|
||
# 在完成端口放行后重新加载 firewalld 规则
|
||
- name: Reload firewalld after opening k3s ports
|
||
ansible.builtin.command: firewall-cmd --reload
|
||
when:
|
||
- k3s_manage_firewalld | default(true) | bool
|
||
- firewalld_state.stdout | default('') == 'running'
|
||
|
||
- name: Install k3s server
|
||
hosts: k3s_server
|
||
become: true
|
||
tasks:
|
||
# 在 server 节点上下载安装并启动 k3s server 进程
|
||
- name: Download and install k3s server
|
||
ansible.builtin.shell: |
|
||
curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="server --data-dir={{ k3s_data_dir }}" sh -
|
||
args:
|
||
creates: "{{ k3s_data_dir }}/server"
|
||
|
||
- name: Install k3s agent (workers)
|
||
hosts: k3s_worker
|
||
become: true
|
||
serial: 1 # 逐台安装,减轻并行下载对网络的压力
|
||
tasks:
|
||
# 从首个 server 节点读取集群 token(仅执行一次)
|
||
- name: Read k3s token from first server
|
||
ansible.builtin.slurp:
|
||
src: "{{ k3s_data_dir }}/server/token"
|
||
delegate_to: "{{ groups['k3s_server'][0] }}"
|
||
run_once: true
|
||
register: k3s_token_from_server
|
||
|
||
# 在各 worker 节点上保存解码后的 token 供后续安装使用
|
||
- name: Set fact for k3s token on workers
|
||
ansible.builtin.set_fact:
|
||
k3s_token: "{{ k3s_token_from_server.content | b64decode | trim }}"
|
||
|
||
# 在每个 worker 节点上下载安装并启动 k3s agent 进程
|
||
- name: Install k3s agent
|
||
ansible.builtin.shell: |
|
||
curl -sfL https://get.k3s.io | K3S_URL=https://{{ k3s_server_ip }}:6443 K3S_TOKEN={{ k3s_token }} INSTALL_K3S_EXEC="agent --data-dir={{ k3s_data_dir }}" sh -
|
||
args:
|
||
creates: "{{ k3s_data_dir }}/agent"
|
||
async: 600
|
||
poll: 15
|
||
|
||
- name: Configure firewalld baseline for k3s (flannel.1 / cni0 -> trusted)
|
||
hosts: k3s_nodes
|
||
become: true
|
||
tasks:
|
||
# 为 k3s 配置 firewalld 基线:将 flannel.1 / cni0 加入 trusted 区域
|
||
- block:
|
||
# 检查节点上 firewalld 是否可用
|
||
- name: Check if firewalld is available
|
||
ansible.builtin.command: firewall-cmd --state
|
||
register: firewalld_check
|
||
changed_when: false
|
||
failed_when: false
|
||
|
||
# 等待 CNI 接口 flannel.1 和 cni0 出现(k3s 启动并创建完成)
|
||
- name: Wait for CNI interfaces (flannel.1, cni0) to appear
|
||
ansible.builtin.shell: |
|
||
for i in $(seq 1 120); do
|
||
ip link show flannel.1 >/dev/null 2>&1 && ip link show cni0 >/dev/null 2>&1 && exit 0
|
||
sleep 1
|
||
done
|
||
exit 1
|
||
when: firewalld_check.stdout == 'running'
|
||
|
||
# 将 flannel.1 / cni0 接口加入 firewalld trusted 区域(运行时和永久)
|
||
- name: Add flannel.1 and cni0 to firewalld trusted zone (runtime + permanent)
|
||
ansible.builtin.shell: |
|
||
firewall-cmd --zone=trusted --add-interface={{ item }}
|
||
firewall-cmd --permanent --zone=trusted --add-interface={{ item }}
|
||
loop:
|
||
- flannel.1
|
||
- cni0
|
||
when: firewalld_check.stdout == 'running'
|
||
|
||
# 更新 firewalld 配置使新接口规则立即生效
|
||
- name: Reload firewalld
|
||
ansible.builtin.command: firewall-cmd --reload
|
||
when: firewalld_check.stdout == 'running'
|
||
when: k3s_manage_firewalld | default(true) | bool
|
||
|
||
- name: Configure CoreDNS (IPv4 upstream for ACME)
|
||
hosts: k3s_server
|
||
become: true
|
||
run_once: true
|
||
vars:
|
||
k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml
|
||
tasks:
|
||
- name: Wait for CoreDNS deployment to be ready
|
||
ansible.builtin.shell: |
|
||
KUBECONFIG={{ k3s_kubeconfig }} kubectl rollout status deployment/coredns -n kube-system --timeout=120s
|
||
when: k3s_manage_coredns | default(true) | bool
|
||
|
||
- name: Extract CoreDNS Corefile from ConfigMap
|
||
ansible.builtin.shell: |
|
||
KUBECONFIG={{ k3s_kubeconfig }} kubectl get configmap coredns -n kube-system -o jsonpath='{.data.Corefile}' > /tmp/coredns-corefile.txt
|
||
when: k3s_manage_coredns | default(true) | bool
|
||
|
||
- name: Patch Corefile forward to IPv4 (avoid IPv6 upstream in Pod network)
|
||
ansible.builtin.replace:
|
||
path: /tmp/coredns-corefile.txt
|
||
regexp: 'forward \. /etc/resolv\.conf'
|
||
replace: 'forward . {{ coredns_forward_servers }}'
|
||
register: coredns_patched
|
||
when: k3s_manage_coredns | default(true) | bool
|
||
|
||
- name: Apply patched CoreDNS ConfigMap and restart
|
||
ansible.builtin.shell: |
|
||
KUBECONFIG={{ k3s_kubeconfig }} kubectl create configmap coredns --from-file=Corefile=/tmp/coredns-corefile.txt -n kube-system --dry-run=client -o yaml | KUBECONFIG={{ k3s_kubeconfig }} kubectl apply -f -
|
||
KUBECONFIG={{ k3s_kubeconfig }} kubectl rollout restart deployment/coredns -n kube-system
|
||
KUBECONFIG={{ k3s_kubeconfig }} kubectl rollout status deployment/coredns -n kube-system --timeout=60s
|
||
when:
|
||
- k3s_manage_coredns | default(true) | bool
|
||
- coredns_patched is changed
|
||
|
||
- name: Remove temp Corefile
|
||
ansible.builtin.file:
|
||
path: /tmp/coredns-corefile.txt
|
||
state: absent
|
||
when: k3s_manage_coredns | default(true) | bool
|
||
|
||
- name: 安装后验证 - traefik / nodes / curl
|
||
hosts: k3s_server
|
||
become: true
|
||
run_once: true
|
||
vars:
|
||
k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml
|
||
tasks:
|
||
# 安装后为控制节点打 control-plane 标签(02-05 矩阵 M1 需此标签才能调度),节点名与 inventory 短主机名一致(ylc61~ylc64)
|
||
- name: Label control-plane nodes (k3s 不默认打标,M1 需此标签)
|
||
ansible.builtin.shell: |
|
||
KUBECONFIG={{ k3s_kubeconfig }} kubectl label node {{ item }} node-role.kubernetes.io/control-plane= --overwrite
|
||
loop: "{{ groups['k3s_server'] | default([]) }}"
|
||
|
||
# 可选:为工作节点打 worker 标签(02-05 矩阵 M3 需要)
|
||
- name: 可选 - 为工作节点打 worker 标签(02-05 矩阵 M3 需要)
|
||
ansible.builtin.shell: |
|
||
KUBECONFIG={{ k3s_kubeconfig }} kubectl label node {{ item }} node-role.kubernetes.io/worker= --overwrite
|
||
loop: "{{ groups['k3s_worker'] | default([]) }}"
|
||
when: k3s_manage_role_labels | default(true) | bool
|
||
|
||
# 查看 kube-system 命名空间中与 Traefik / svclb 相关的 Pod 列表
|
||
- name: kubectl get pods -n kube-system(traefik / svclb)
|
||
ansible.builtin.shell: KUBECONFIG={{ k3s_kubeconfig }} kubectl get pods -n kube-system -o wide | grep -E 'NAME|traefik|svclb'
|
||
register: verify_traefik
|
||
changed_when: false
|
||
|
||
# 打印上一步查询到的 Traefik 相关 Pod 信息
|
||
- name: ">>> Traefik 相关 Pods"
|
||
ansible.builtin.debug:
|
||
msg: "{{ item }}"
|
||
loop: "{{ verify_traefik.stdout_lines }}"
|
||
|
||
# 查询当前集群中的节点列表
|
||
- name: kubectl get nodes
|
||
ansible.builtin.shell: KUBECONFIG={{ k3s_kubeconfig }} kubectl get nodes
|
||
register: verify_nodes
|
||
changed_when: false
|
||
|
||
# 打印节点列表结果,方便确认节点状态与角色
|
||
- name: ">>> kubectl get nodes"
|
||
ansible.builtin.debug:
|
||
msg: "{{ item }}"
|
||
loop: "{{ verify_nodes.stdout_lines }}"
|
||
|
||
# 通过 curl 测试每个节点 80 与 443 入口连通性
|
||
- name: curl 测试各节点 80/443 可达性
|
||
ansible.builtin.shell: |
|
||
for ip in {{ groups['k3s_nodes'] | map('extract', hostvars) | map(attribute='ansible_host') | join(' ') }}; do
|
||
c80=$(curl -sk -o /dev/null -w "%{http_code}" --connect-timeout 2 http://$ip 2>/dev/null) || c80="fail"
|
||
c443=$(curl -sk -o /dev/null -w "%{http_code}" --connect-timeout 2 https://$ip 2>/dev/null) || c443="fail"
|
||
echo "$ip: 80=$c80 443=$c443"
|
||
done
|
||
register: verify_curl
|
||
changed_when: false
|
||
|
||
- name: ">>> curl 结果"
|
||
ansible.builtin.debug:
|
||
msg: "{{ item }}"
|
||
loop: "{{ verify_curl.stdout_lines }}"
|