--- - name: Verify /storage is a separate mount (optional) hosts: k3s_nodes become: true tasks: - name: Check / and /storage mount sources when: k3s_verify_storage_mount | default(false) | bool block: - name: Get mount source for / ansible.builtin.command: findmnt -n -o SOURCE / register: mnt_root changed_when: false - name: Get mount source for /storage ansible.builtin.command: findmnt -n -o SOURCE /storage register: mnt_storage changed_when: false failed_when: false - name: Assert /storage is mounted on a different device than / ansible.builtin.assert: that: - mnt_storage.rc == 0 - (mnt_root.stdout | trim | length) > 0 - (mnt_storage.stdout | trim | length) > 0 - (mnt_root.stdout | trim) != (mnt_storage.stdout | trim) fail_msg: >- /storage must be a mount point on a block device different from /. See docs/00-04-部署环境说明.md and docs/01-06-节点初始化-ansible-实践.md - name: Init base system hosts: k3s_nodes become: true tasks: # 检查当前节点上 firewalld 的运行状态,供后续条件判断使用 - name: Check if firewalld is running ansible.builtin.command: firewall-cmd --state register: firewalld_state changed_when: false failed_when: false # 根据全局 timezone 变量设置系统时区(可选) - name: Set timezone ansible.builtin.command: timedatectl set-timezone {{ timezone }} when: timezone is defined and timezone != "" # 安装 k3s 所需的基础工具包(curl、git 等) - name: Install basic packages ansible.builtin.package: name: - curl - git state: present # 确保 /etc/hosts 中包含所有 k3s 节点的主机名解析(可选) - name: Ensure /etc/hosts has entries for all k3s nodes ansible.builtin.lineinfile: path: /etc/hosts regexp: '^\S+\s+{{ item }}\s*$' line: "{{ hostvars[item]['ansible_host'] }} {{ item }}" state: present loop: "{{ groups['k3s_nodes'] }}" when: - k3s_manage_hosts | default(true) | bool - hostvars[item]['ansible_host'] is defined # k3s 所需端口:8472/udp(flannel VXLAN)全部节点;6443/tcp(API)仅 server # 必须在安装 k3s 前开放,否则 worker 无法连接、flannel 无法建立 overlay # 在所有 k3s 节点上开放 flannel VXLAN 所需的 8472/udp 端口 - name: Open flannel VXLAN port (8472/udp) on all k3s nodes ansible.builtin.command: firewall-cmd --permanent --add-port=8472/udp when: - k3s_manage_firewalld | default(true) | bool - firewalld_state.stdout | default('') == 'running' # 在 server 节点上开放 k3s API 端口 6443/tcp - name: Open k3s API port (6443/tcp) on server ansible.builtin.command: firewall-cmd --permanent --add-port=6443/tcp when: - k3s_manage_firewalld | default(true) | bool - inventory_hostname in groups['k3s_server'] - firewalld_state.stdout | default('') == 'running' # 在完成端口放行后重新加载 firewalld 规则 - name: Reload firewalld after opening k3s ports ansible.builtin.command: firewall-cmd --reload when: - k3s_manage_firewalld | default(true) | bool - firewalld_state.stdout | default('') == 'running' - name: Install k3s server hosts: k3s_server become: true tasks: # 在 server 节点上下载安装并启动 k3s server 进程 - name: Download and install k3s server ansible.builtin.shell: | curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC="server --data-dir={{ k3s_data_dir }}" sh - args: creates: "{{ k3s_data_dir }}/server" - name: Install k3s agent (workers) hosts: k3s_worker become: true serial: 1 # 逐台安装,减轻并行下载对网络的压力 tasks: # 从首个 server 节点读取集群 token(仅执行一次) - name: Read k3s token from first server ansible.builtin.slurp: src: "{{ k3s_data_dir }}/server/token" delegate_to: "{{ groups['k3s_server'][0] }}" run_once: true register: k3s_token_from_server # 在各 worker 节点上保存解码后的 token 供后续安装使用 - name: Set fact for k3s token on workers ansible.builtin.set_fact: k3s_token: "{{ k3s_token_from_server.content | b64decode | trim }}" # 在每个 worker 节点上下载安装并启动 k3s agent 进程 - name: Install k3s agent ansible.builtin.shell: | curl -sfL https://get.k3s.io | K3S_URL=https://{{ k3s_server_ip }}:6443 K3S_TOKEN={{ k3s_token }} INSTALL_K3S_EXEC="agent --data-dir={{ k3s_data_dir }}" sh - args: creates: "{{ k3s_data_dir }}/agent" async: 600 poll: 15 - name: Configure firewalld baseline for k3s (flannel.1 / cni0 -> trusted) hosts: k3s_nodes become: true tasks: # 为 k3s 配置 firewalld 基线:将 flannel.1 / cni0 加入 trusted 区域 - block: # 检查节点上 firewalld 是否可用 - name: Check if firewalld is available ansible.builtin.command: firewall-cmd --state register: firewalld_check changed_when: false failed_when: false # 等待 CNI 接口 flannel.1 和 cni0 出现(k3s 启动并创建完成) - name: Wait for CNI interfaces (flannel.1, cni0) to appear ansible.builtin.shell: | for i in $(seq 1 120); do ip link show flannel.1 >/dev/null 2>&1 && ip link show cni0 >/dev/null 2>&1 && exit 0 sleep 1 done exit 1 when: firewalld_check.stdout == 'running' # 将 flannel.1 / cni0 接口加入 firewalld trusted 区域(运行时和永久) - name: Add flannel.1 and cni0 to firewalld trusted zone (runtime + permanent) ansible.builtin.shell: | firewall-cmd --zone=trusted --add-interface={{ item }} firewall-cmd --permanent --zone=trusted --add-interface={{ item }} loop: - flannel.1 - cni0 when: firewalld_check.stdout == 'running' # 更新 firewalld 配置使新接口规则立即生效 - name: Reload firewalld ansible.builtin.command: firewall-cmd --reload when: firewalld_check.stdout == 'running' when: k3s_manage_firewalld | default(true) | bool - name: Configure CoreDNS (IPv4 upstream for ACME) hosts: k3s_server become: true run_once: true vars: k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml tasks: - name: Wait for CoreDNS deployment to be ready ansible.builtin.shell: | KUBECONFIG={{ k3s_kubeconfig }} kubectl rollout status deployment/coredns -n kube-system --timeout=120s when: k3s_manage_coredns | default(true) | bool - name: Extract CoreDNS Corefile from ConfigMap ansible.builtin.shell: | KUBECONFIG={{ k3s_kubeconfig }} kubectl get configmap coredns -n kube-system -o jsonpath='{.data.Corefile}' > /tmp/coredns-corefile.txt when: k3s_manage_coredns | default(true) | bool - name: Patch Corefile forward to IPv4 (avoid IPv6 upstream in Pod network) ansible.builtin.replace: path: /tmp/coredns-corefile.txt regexp: 'forward \. /etc/resolv\.conf' replace: 'forward . {{ coredns_forward_servers }}' register: coredns_patched when: k3s_manage_coredns | default(true) | bool - name: Apply patched CoreDNS ConfigMap and restart ansible.builtin.shell: | KUBECONFIG={{ k3s_kubeconfig }} kubectl create configmap coredns --from-file=Corefile=/tmp/coredns-corefile.txt -n kube-system --dry-run=client -o yaml | KUBECONFIG={{ k3s_kubeconfig }} kubectl apply -f - KUBECONFIG={{ k3s_kubeconfig }} kubectl rollout restart deployment/coredns -n kube-system KUBECONFIG={{ k3s_kubeconfig }} kubectl rollout status deployment/coredns -n kube-system --timeout=60s when: - k3s_manage_coredns | default(true) | bool - coredns_patched is changed - name: Remove temp Corefile ansible.builtin.file: path: /tmp/coredns-corefile.txt state: absent when: k3s_manage_coredns | default(true) | bool - name: 安装后验证 - traefik / nodes / curl hosts: k3s_server become: true run_once: true vars: k3s_kubeconfig: /etc/rancher/k3s/k3s.yaml tasks: # 安装后为控制节点打 control-plane 标签(02-05 矩阵 M1 需此标签才能调度),节点名与 inventory 短主机名一致(ylc61~ylc64) - name: Label control-plane nodes (k3s 不默认打标,M1 需此标签) ansible.builtin.shell: | KUBECONFIG={{ k3s_kubeconfig }} kubectl label node {{ item }} node-role.kubernetes.io/control-plane= --overwrite loop: "{{ groups['k3s_server'] | default([]) }}" # 可选:为工作节点打 worker 标签(02-05 矩阵 M3 需要) - name: 可选 - 为工作节点打 worker 标签(02-05 矩阵 M3 需要) ansible.builtin.shell: | KUBECONFIG={{ k3s_kubeconfig }} kubectl label node {{ item }} node-role.kubernetes.io/worker= --overwrite loop: "{{ groups['k3s_worker'] | default([]) }}" when: k3s_manage_role_labels | default(true) | bool # 查看 kube-system 命名空间中与 Traefik / svclb 相关的 Pod 列表 - name: kubectl get pods -n kube-system(traefik / svclb) ansible.builtin.shell: KUBECONFIG={{ k3s_kubeconfig }} kubectl get pods -n kube-system -o wide | grep -E 'NAME|traefik|svclb' register: verify_traefik changed_when: false # 打印上一步查询到的 Traefik 相关 Pod 信息 - name: ">>> Traefik 相关 Pods" ansible.builtin.debug: msg: "{{ item }}" loop: "{{ verify_traefik.stdout_lines }}" # 查询当前集群中的节点列表 - name: kubectl get nodes ansible.builtin.shell: KUBECONFIG={{ k3s_kubeconfig }} kubectl get nodes register: verify_nodes changed_when: false # 打印节点列表结果,方便确认节点状态与角色 - name: ">>> kubectl get nodes" ansible.builtin.debug: msg: "{{ item }}" loop: "{{ verify_nodes.stdout_lines }}" # 通过 curl 测试每个节点 80 与 443 入口连通性 - name: curl 测试各节点 80/443 可达性 ansible.builtin.shell: | for ip in {{ groups['k3s_nodes'] | map('extract', hostvars) | map(attribute='ansible_host') | join(' ') }}; do c80=$(curl -sk -o /dev/null -w "%{http_code}" --connect-timeout 2 http://$ip 2>/dev/null) || c80="fail" c443=$(curl -sk -o /dev/null -w "%{http_code}" --connect-timeout 2 https://$ip 2>/dev/null) || c443="fail" echo "$ip: 80=$c80 443=$c443" done register: verify_curl changed_when: false - name: ">>> curl 结果" ansible.builtin.debug: msg: "{{ item }}" loop: "{{ verify_curl.stdout_lines }}"