refactor: config/apps 目录重组、文档重构、架构图收窄
- 中央:config/(prometheus,alertmanager,grafana)、apps/(tile-cache,topology-editor) - 边缘:config/(vmagent,blackbox,targets)、apps/(onvif-exporter) - env: TRAEFIK_PROVIDER、prometheus/env.example 详细说明 - 文档:README/doc 重构,EDGE_CACHE 合并到 EDGE_AGENT_CONFIG - targets.csv 更新流程说明,ARCHITECTURE 图收窄 Made-with: Cursor
This commit is contained in:
49
central-server/config/prometheus/alert_rules.yml
Normal file
49
central-server/config/prometheus/alert_rules.yml
Normal file
@@ -0,0 +1,49 @@
|
||||
groups:
|
||||
- name: onvif_alerts
|
||||
rules:
|
||||
- alert: ONVIFDeviceDown
|
||||
expr: up{job="onvif-devices"} == 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "ONVIF设备离线"
|
||||
description: "ONVIF设备 {{ $labels.instance }} 已离线超过1分钟"
|
||||
|
||||
- alert: ONVIFDeviceHighTemperature
|
||||
expr: onvif_device_temperature > 70
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "ONVIF设备温度过高"
|
||||
description: "设备 {{ $labels.instance }} 温度达到 {{ $value }}°C"
|
||||
|
||||
- alert: ONVIFDeviceLowStorage
|
||||
expr: onvif_storage_usage_percent > 90
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "ONVIF设备存储空间不足"
|
||||
description: "设备 {{ $labels.instance }} 存储使用率达到 {{ $value }}%"
|
||||
|
||||
- name: network_alerts
|
||||
rules:
|
||||
- alert: NetworkDeviceDown
|
||||
expr: probe_success{job="network-ping"} == 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "网络设备离线"
|
||||
description: "网络设备 {{ $labels.instance }} 无法ping通"
|
||||
|
||||
- alert: HighNetworkLatency
|
||||
expr: probe_duration_seconds{job="network-ping"} > 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "网络延迟过高"
|
||||
description: "设备 {{ $labels.instance }} 延迟达到 {{ $value }}秒"
|
||||
67
central-server/config/prometheus/prometheus.yml
Normal file
67
central-server/config/prometheus/prometheus.yml
Normal file
@@ -0,0 +1,67 @@
|
||||
# Prometheus 中央服务器配置模板
|
||||
# ============================================
|
||||
# 说明:
|
||||
# - 本文件为配置模板,包含 ${变量名} 占位符
|
||||
# - 部署时由 deploy.sh 从 .env 读取变量,用 envsubst 生成 prometheus.yml
|
||||
# - 请勿直接编辑 prometheus.yml,修改应在此模板或 .env 中进行
|
||||
#
|
||||
# 变量来源:central-server/.env(参考 env.example);本文件位于 config/prometheus/
|
||||
# 涉及变量:PROMETHEUS_SCRAPE_INTERVAL, PROMETHEUS_EVALUATION_INTERVAL,
|
||||
# PROMETHEUS_CLUSTER_NAME, VICTORIAMETRICS_PORT,
|
||||
# PROMETHEUS_REMOTE_WRITE_MAX_SAMPLES, PROMETHEUS_REMOTE_WRITE_CAPACITY,
|
||||
# PROMETHEUS_REMOTE_WRITE_MAX_SHARDS
|
||||
# ============================================
|
||||
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 15s
|
||||
external_labels:
|
||||
cluster: 'central-monitoring'
|
||||
|
||||
# 远程写入:将中央 Prometheus 抓取到的本地服务指标推送到 VictoriaMetrics
|
||||
# (边缘节点由 vmagent 直接 remote_write 到 VictoriaMetrics)
|
||||
remote_write:
|
||||
- url: http://victoria-metrics:8428/api/v1/write
|
||||
queue_config:
|
||||
max_samples_per_send: 10000
|
||||
capacity: 20000
|
||||
max_shards: 10
|
||||
|
||||
# 抓取配置:仅抓取中央本机 Docker 容器(Prometheus/VM/Alertmanager/Grafana)
|
||||
scrape_configs:
|
||||
# 抓取中央Prometheus自身
|
||||
- job_name: 'prometheus-central'
|
||||
scrape_interval: 15s
|
||||
static_configs:
|
||||
- targets: ['prometheus-central:9090']
|
||||
|
||||
# 抓取VictoriaMetrics (VictoriaMetrics 提供 /metrics 端点)
|
||||
- job_name: 'victoria-metrics'
|
||||
scrape_interval: 15s
|
||||
metrics_path: '/metrics'
|
||||
static_configs:
|
||||
- targets: ['victoria-metrics:8428']
|
||||
|
||||
# 抓取Alertmanager
|
||||
- job_name: 'alertmanager'
|
||||
scrape_interval: 15s
|
||||
static_configs:
|
||||
- targets: ['alertmanager:9093']
|
||||
|
||||
# 抓取Grafana (需要启用 metrics 功能)
|
||||
- job_name: 'grafana'
|
||||
scrape_interval: 15s
|
||||
metrics_path: '/metrics'
|
||||
static_configs:
|
||||
- targets: ['grafana:3000']
|
||||
|
||||
# 告警规则:alert_rules.yml 与 prometheus.yml 同目录
|
||||
rule_files:
|
||||
- "alert_rules.yml"
|
||||
|
||||
# Alertmanager:告警路由与静默
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets:
|
||||
- alertmanager:9093
|
||||
67
central-server/config/prometheus/prometheus.yml.template
Normal file
67
central-server/config/prometheus/prometheus.yml.template
Normal file
@@ -0,0 +1,67 @@
|
||||
# Prometheus 中央服务器配置模板
|
||||
# ============================================
|
||||
# 说明:
|
||||
# - 本文件为配置模板,包含 ${变量名} 占位符
|
||||
# - 部署时由 deploy.sh 从 .env 读取变量,用 envsubst 生成 prometheus.yml
|
||||
# - 请勿直接编辑 prometheus.yml,修改应在此模板或 .env 中进行
|
||||
#
|
||||
# 变量来源:central-server/.env(参考 env.example);本文件位于 config/prometheus/
|
||||
# 涉及变量:PROMETHEUS_SCRAPE_INTERVAL, PROMETHEUS_EVALUATION_INTERVAL,
|
||||
# PROMETHEUS_CLUSTER_NAME, VICTORIAMETRICS_PORT,
|
||||
# PROMETHEUS_REMOTE_WRITE_MAX_SAMPLES, PROMETHEUS_REMOTE_WRITE_CAPACITY,
|
||||
# PROMETHEUS_REMOTE_WRITE_MAX_SHARDS
|
||||
# ============================================
|
||||
|
||||
global:
|
||||
scrape_interval: ${PROMETHEUS_SCRAPE_INTERVAL}s
|
||||
evaluation_interval: ${PROMETHEUS_EVALUATION_INTERVAL}s
|
||||
external_labels:
|
||||
cluster: '${PROMETHEUS_CLUSTER_NAME}'
|
||||
|
||||
# 远程写入:将中央 Prometheus 抓取到的本地服务指标推送到 VictoriaMetrics
|
||||
# (边缘节点由 vmagent 直接 remote_write 到 VictoriaMetrics)
|
||||
remote_write:
|
||||
- url: http://victoria-metrics:${VICTORIAMETRICS_PORT}/api/v1/write
|
||||
queue_config:
|
||||
max_samples_per_send: ${PROMETHEUS_REMOTE_WRITE_MAX_SAMPLES}
|
||||
capacity: ${PROMETHEUS_REMOTE_WRITE_CAPACITY}
|
||||
max_shards: ${PROMETHEUS_REMOTE_WRITE_MAX_SHARDS}
|
||||
|
||||
# 抓取配置:仅抓取中央本机 Docker 容器(Prometheus/VM/Alertmanager/Grafana)
|
||||
scrape_configs:
|
||||
# 抓取中央Prometheus自身
|
||||
- job_name: 'prometheus-central'
|
||||
scrape_interval: ${PROMETHEUS_SCRAPE_INTERVAL}s
|
||||
static_configs:
|
||||
- targets: ['prometheus-central:9090']
|
||||
|
||||
# 抓取VictoriaMetrics (VictoriaMetrics 提供 /metrics 端点)
|
||||
- job_name: 'victoria-metrics'
|
||||
scrape_interval: ${PROMETHEUS_SCRAPE_INTERVAL}s
|
||||
metrics_path: '/metrics'
|
||||
static_configs:
|
||||
- targets: ['victoria-metrics:${VICTORIAMETRICS_PORT}']
|
||||
|
||||
# 抓取Alertmanager
|
||||
- job_name: 'alertmanager'
|
||||
scrape_interval: ${PROMETHEUS_SCRAPE_INTERVAL}s
|
||||
static_configs:
|
||||
- targets: ['alertmanager:9093']
|
||||
|
||||
# 抓取Grafana (需要启用 metrics 功能)
|
||||
- job_name: 'grafana'
|
||||
scrape_interval: ${PROMETHEUS_SCRAPE_INTERVAL}s
|
||||
metrics_path: '/metrics'
|
||||
static_configs:
|
||||
- targets: ['grafana:3000']
|
||||
|
||||
# 告警规则:alert_rules.yml 与 prometheus.yml 同目录
|
||||
rule_files:
|
||||
- "alert_rules.yml"
|
||||
|
||||
# Alertmanager:告警路由与静默
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets:
|
||||
- alertmanager:9093
|
||||
Reference in New Issue
Block a user