完善中央与边缘部署、远程写入与监控文档

- 增加中央与边缘完整配置和部署脚本
- 引入 VictoriaMetrics 数据源与 remote_write 故障排查说明
- 新增 edge-agent 配置脚本、ONVIF 自建 exporter 与 ping 监控示例

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Super User
2026-02-25 04:24:40 -05:00
parent 9e37f79a36
commit 95a09fd9d8
52 changed files with 5978 additions and 0 deletions

View File

@@ -0,0 +1,33 @@
modules:
icmp:
prober: icmp
timeout: 5s
icmp:
preferred_ip_protocol: "ip4"
tcp_connect:
prober: tcp
timeout: 5s
http_2xx:
prober: http
timeout: 5s
http:
valid_http_versions: ["HTTP/1.1", "HTTP/2.0"]
valid_status_codes: [200]
method: GET
http_post_2xx:
prober: http
timeout: 5s
http:
valid_http_versions: ["HTTP/1.1", "HTTP/2.0"]
valid_status_codes: [200]
method: POST
headers:
Content-Type: application/json
body: '{}'
tcp_connect_tls:
prober: tcp
timeout: 5s
tcp:
tls: true
tls_config:
insecure_skip_verify: false

View File

@@ -0,0 +1,63 @@
#!/bin/bash
# CSV转JSON脚本 - 将设备CSV表格转换为Prometheus监控JSON配置
# 使用方法: ./csv-to-json.sh devices.csv > onvif-targets.json
set -e
CSV_FILE=${1:-"devices.csv"}
OUTPUT_FILE=${2:-"onvif-targets.json"}
# 检查jq是否安装
if ! command -v jq &> /dev/null; then
echo "❌ jq未安装请先安装jq:"
echo " Ubuntu/Debian: sudo apt-get install jq"
echo " CentOS/RHEL: sudo yum install jq"
echo " Alpine: apk add jq"
exit 1
fi
# 检查CSV文件是否存在
if [ ! -f "$CSV_FILE" ]; then
echo "❌ CSV文件 $CSV_FILE 不存在"
exit 1
fi
echo "🔄 正在转换 $CSV_FILE$OUTPUT_FILE..."
# 使用jq将CSV转换为JSON
# 1. 读取CSV文件跳过标题行
# 2. 将每行转换为JSON对象
# 3. 构建Prometheus targets格式
tail -n +2 "$CSV_FILE" | while IFS=',' read -r ip device_type model location username password onvif_port; do
# 构建labels对象
labels="{
\"device_type\": \"$device_type\",
\"model\": \"$model\",
\"location\": \"$location\",
\"username\": \"$username\",
\"password\": \"$password\""
# 如果onvif_port不是默认的80则添加到labels中
if [ "$onvif_port" != "80" ] && [ -n "$onvif_port" ]; then
labels="$labels,
\"onvif_port\": \"$onvif_port\""
fi
labels="$labels
}"
# 输出JSON对象
echo "{
\"targets\": [\"$ip\"],
\"labels\": $labels
}"
done | jq -s '.' > "$OUTPUT_FILE"
echo "✅ 转换完成!"
echo "📊 生成了 $(jq length "$OUTPUT_FILE") 个设备配置"
echo "📁 输出文件: $OUTPUT_FILE"
echo ""
echo "🔍 预览生成的JSON:"
jq . "$OUTPUT_FILE"

View File

@@ -0,0 +1,61 @@
#!/bin/bash
# CSV转Ping JSON脚本 - 将Ping目标CSV表格转换为Prometheus监控JSON配置
# 使用方法: ./csv-to-ping-json.sh ping-targets.csv > ping-targets.json
set -e
CSV_FILE=${1:-"ping-targets.csv"}
OUTPUT_FILE=${2:-"ping-targets.json"}
# 检查jq是否安装
if ! command -v jq &> /dev/null; then
echo "❌ jq未安装请先安装jq:"
echo " Ubuntu/Debian: sudo apt-get install jq"
echo " CentOS/RHEL: sudo yum install jq"
echo " Alpine: apk add jq"
exit 1
fi
# 检查CSV文件是否存在
if [ ! -f "$CSV_FILE" ]; then
echo "❌ CSV文件 $CSV_FILE 不存在"
exit 1
fi
echo "🔄 正在转换 $CSV_FILE$OUTPUT_FILE..."
# 使用jq将CSV转换为JSON
tail -n +2 "$CSV_FILE" | while IFS=',' read -r ip device group network; do
# 构建labels对象
labels="{
\"device\": \"$device\""
# 添加可选的group标签
if [ -n "$group" ]; then
labels="$labels,
\"group\": \"$group\""
fi
# 添加可选的network标签
if [ -n "$network" ]; then
labels="$labels,
\"network\": \"$network\""
fi
labels="$labels
}"
# 输出JSON对象
echo "{
\"targets\": [\"$ip\"],
\"labels\": $labels
}"
done | jq -s '.' > "$OUTPUT_FILE"
echo "✅ 转换完成!"
echo "📊 生成了 $(jq length "$OUTPUT_FILE") 个Ping目标配置"
echo "📁 输出文件: $OUTPUT_FILE"
echo ""
echo "🔍 预览生成的JSON:"
jq . "$OUTPUT_FILE"

View File

@@ -0,0 +1,129 @@
#!/bin/bash
# 统一目标配置转换脚本
# 从 targets.csv 生成 onvif-targets.json 和 ping-targets.json
# 使用方法: ./csv-to-targets.sh targets.csv
set -e
CSV_FILE=${1:-"targets.csv"}
# 检查jq是否安装
if ! command -v jq &> /dev/null; then
echo "❌ jq未安装请先安装jq:"
echo " Ubuntu/Debian: sudo apt-get install jq"
echo " CentOS/RHEL: sudo yum install jq"
echo " Fedora: sudo dnf install jq"
exit 1
fi
# 检查CSV文件是否存在
if [ ! -f "$CSV_FILE" ]; then
echo "❌ CSV文件 $CSV_FILE 不存在"
exit 1
fi
echo "🔄 正在从 $CSV_FILE 生成配置文件..."
echo ""
# 临时文件
ONVIF_TEMP=$(mktemp)
PING_TEMP=$(mktemp)
# 处理CSV文件跳过注释行和标题行
tail -n +2 "$CSV_FILE" | grep -v '^#' | while IFS=',' read -r type ip device group network device_type model location username password onvif_port; do
# 去除空格
type=$(echo "$type" | xargs)
ip=$(echo "$ip" | xargs)
# 跳过空行
if [ -z "$type" ] || [ -z "$ip" ]; then
continue
fi
if [ "$type" = "onvif" ]; then
# 处理 ONVIF 设备
device_type=$(echo "$device_type" | xargs)
model=$(echo "$model" | xargs)
location=$(echo "$location" | xargs)
username=$(echo "$username" | xargs)
password=$(echo "$password" | xargs)
onvif_port=$(echo "$onvif_port" | xargs)
labels="{
\"device_type\": \"$device_type\",
\"model\": \"$model\",
\"location\": \"$location\",
\"username\": \"$username\",
\"password\": \"$password\""
if [ "$onvif_port" != "80" ] && [ -n "$onvif_port" ]; then
labels="$labels,
\"onvif_port\": \"$onvif_port\""
fi
labels="$labels
}"
echo "{
\"targets\": [\"$ip\"],
\"labels\": $labels
}" >> "$ONVIF_TEMP"
elif [ "$type" = "ping" ]; then
# 处理 Ping 目标
device=$(echo "$device" | xargs)
group=$(echo "$group" | xargs)
network=$(echo "$network" | xargs)
labels="{
\"device\": \"$device\""
if [ -n "$group" ]; then
labels="$labels,
\"group\": \"$group\""
fi
if [ -n "$network" ]; then
labels="$labels,
\"network\": \"$network\""
fi
labels="$labels
}"
echo "{
\"targets\": [\"$ip\"],
\"labels\": $labels
}" >> "$PING_TEMP"
fi
done
# 生成 JSON 文件
if [ -s "$ONVIF_TEMP" ]; then
jq -s '.' "$ONVIF_TEMP" > onvif-targets.json
ONVIF_COUNT=$(jq length onvif-targets.json)
echo "✅ 生成 ONVIF 设备配置: $ONVIF_COUNT 个设备"
else
echo "[]" > onvif-targets.json
echo "⚠️ 未找到 ONVIF 设备,生成空配置"
fi
if [ -s "$PING_TEMP" ]; then
jq -s '.' "$PING_TEMP" > ping-targets.json
PING_COUNT=$(jq length ping-targets.json)
echo "✅ 生成 Ping 目标配置: $PING_COUNT 个目标"
else
echo "[]" > ping-targets.json
echo "⚠️ 未找到 Ping 目标,生成空配置"
fi
# 清理临时文件
rm -f "$ONVIF_TEMP" "$PING_TEMP"
echo ""
echo "✅ 配置文件生成完成!"
echo "📁 生成的文件:"
echo " - onvif-targets.json"
echo " - ping-targets.json"
echo ""

View File

@@ -0,0 +1,5 @@
ip,device_type,model,location,username,password,onvif_port
192.168.1.100,camera,HIKVISION_DS-2CD2342WD-I,front_door,admin,password1,80
192.168.1.101,camera,DAHUA_IPC-HFW1230S,back_yard,admin,password2,80
192.168.1.102,camera,UNIVIEW_IPC3120SR,living_room,admin,password3,8080
192.168.1.50,nvr,HIKVISION_DS-7608NI-I2,server_rack,admin,password4,80
1 ip device_type model location username password onvif_port
2 192.168.1.100 camera HIKVISION_DS-2CD2342WD-I front_door admin password1 80
3 192.168.1.101 camera DAHUA_IPC-HFW1230S back_yard admin password2 80
4 192.168.1.102 camera UNIVIEW_IPC3120SR living_room admin password3 8080
5 192.168.1.50 nvr HIKVISION_DS-7608NI-I2 server_rack admin password4 80

View File

@@ -0,0 +1,6 @@
ip,device,group,network
192.168.1.1,main_router,network,internal
192.168.1.100,front_camera,onvif_cameras,internal
192.168.1.101,back_camera,onvif_cameras,internal
192.168.1.102,living_camera,onvif_cameras,internal
8.8.8.8,google_dns,external,external
1 ip device group network
2 192.168.1.1 main_router network internal
3 192.168.1.100 front_camera onvif_cameras internal
4 192.168.1.101 back_camera onvif_cameras internal
5 192.168.1.102 living_camera onvif_cameras internal
6 8.8.8.8 google_dns external external

View File

@@ -0,0 +1,63 @@
#!/bin/bash
# 设置远程推送地址脚本
# 使用方法: ./setup-remote-write.sh <中央服务器地址> [端口]
# 支持IP地址和域名
set -e
CENTRAL_HOST=${1:-"192.168.1.10"}
CENTRAL_PORT=${2:-"8428"}
CONFIG_FILE="../prometheus-edge/prometheus.yml"
echo "=== 设置Prometheus远程推送地址 ==="
echo ""
if [ -z "$1" ]; then
echo "使用方法: $0 <中央服务器地址> [端口]"
echo "示例: $0 192.168.1.10 8428"
echo " $0 prometheus.company.com"
echo " $0 prometheus.local 8428"
echo ""
read -p "请输入中央服务器地址 (IP或域名): " CENTRAL_HOST
read -p "请输入端口 (默认8428): " CENTRAL_PORT_INPUT
if [ -n "$CENTRAL_PORT_INPUT" ]; then
CENTRAL_PORT=$CENTRAL_PORT_INPUT
fi
fi
echo "🔧 配置信息:"
echo " 中央服务器地址: $CENTRAL_HOST"
echo " 端口: $CENTRAL_PORT"
echo " 配置文件: $CONFIG_FILE"
echo ""
# 检查配置文件是否存在
if [ ! -f "$CONFIG_FILE" ]; then
echo "❌ 配置文件 $CONFIG_FILE 不存在"
exit 1
fi
# 备份原配置文件
cp "$CONFIG_FILE" "${CONFIG_FILE}.backup.$(date +%Y%m%d_%H%M%S)"
echo "📋 已备份原配置文件"
# 更新配置文件中的远程推送地址
sed -i "s|http://\${CENTRAL_SERVER_HOST}:\${CENTRAL_SERVER_PORT}|http://$CENTRAL_HOST:$CENTRAL_PORT|g" "$CONFIG_FILE"
echo "✅ 远程推送地址已更新"
echo ""
# 显示更新后的配置
echo "🔍 更新后的remote_write配置"
grep -A 5 "remote_write:" "$CONFIG_FILE"
echo ""
echo "🔄 重启Prometheus服务以应用新配置"
echo " docker-compose restart prometheus-edge"
echo ""
echo "📊 检查远程写入状态:"
echo " curl http://localhost:9090/api/v1/status/config"
echo ""
echo "🔗 查看远程写入目标:"
echo " curl http://localhost:9090/api/v1/status/tsdb"

View File

@@ -0,0 +1,13 @@
# 统一监控目标配置文件
# 格式: type,ip,device,group,network,device_type,model,location,username,password,onvif_port
# type: onvif 或 ping
# 对于 onvif 类型,需要填写: ip,device_type,model,location,username,password,onvif_port
# 对于 ping 类型,需要填写: ip,device,group,network
type,ip,device,group,network,device_type,model,location,username,password,onvif_port
ping,192.168.2.1,main_router,network,internal,,,,,,
ping,8.8.8.8,google_dns,external,external,,,,,,
# ONVIF 设备示例(取消注释并填写实际信息)
# onvif,192.168.1.100,,,front_door,camera,HIKVISION_DS-2CD2342WD-I,front_door,admin,password1,80
# onvif,192.168.1.101,,,back_yard,camera,DAHUA_IPC-HFW1230S,back_yard,admin,password2,80
# onvif,192.168.1.102,,,living_room,camera,UNIVIEW_IPC3120SR,living_room,admin,password3,8080
# onvif,192.168.1.50,,,server_rack,nvr,HIKVISION_DS-7608NI-I2,server_rack,admin,password4,80
1 # 统一监控目标配置文件
2 # 格式: type,ip,device,group,network,device_type,model,location,username,password,onvif_port
3 # type: onvif 或 ping
4 # 对于 onvif 类型,需要填写: ip,device_type,model,location,username,password,onvif_port
5 # 对于 ping 类型,需要填写: ip,device,group,network
6 type,ip,device,group,network,device_type,model,location,username,password,onvif_port
7 ping,192.168.2.1,main_router,network,internal,,,,,,
8 ping,8.8.8.8,google_dns,external,external,,,,,,
9 # ONVIF 设备示例(取消注释并填写实际信息)
10 # onvif,192.168.1.100,,,front_door,camera,HIKVISION_DS-2CD2342WD-I,front_door,admin,password1,80
11 # onvif,192.168.1.101,,,back_yard,camera,DAHUA_IPC-HFW1230S,back_yard,admin,password2,80
12 # onvif,192.168.1.102,,,living_room,camera,UNIVIEW_IPC3120SR,living_room,admin,password3,8080
13 # onvif,192.168.1.50,,,server_rack,nvr,HIKVISION_DS-7608NI-I2,server_rack,admin,password4,80

View File

@@ -0,0 +1,90 @@
#!/bin/bash
# 测试中央服务器连接脚本
# 使用方法: ./test-connection.sh <中央服务器地址> [端口]
set -e
CENTRAL_HOST=${1:-"192.168.1.10"}
CENTRAL_PORT=${2:-"8428"}
echo "=== 测试中央服务器连接 ==="
echo ""
if [ -z "$1" ]; then
echo "使用方法: $0 <中央服务器地址> [端口]"
echo "示例: $0 192.168.1.10 8428"
echo " $0 prometheus.company.com"
echo " $0 prometheus.local 9090"
echo ""
read -p "请输入中央服务器地址 (IP或域名): " CENTRAL_HOST
read -p "请输入端口 (默认8428): " CENTRAL_PORT_INPUT
if [ -n "$CENTRAL_PORT_INPUT" ]; then
CENTRAL_PORT=$CENTRAL_PORT_INPUT
fi
fi
echo "🔧 测试配置:"
echo " 中央服务器地址: $CENTRAL_HOST"
echo " 端口: $CENTRAL_PORT"
echo ""
# 测试域名解析
echo "🌐 测试域名解析..."
if command -v nslookup &> /dev/null; then
nslookup $CENTRAL_HOST
else
echo " nslookup 不可用跳过DNS测试"
fi
# 测试网络连通性
echo ""
echo "📡 测试网络连通性..."
if ping -c 3 $CENTRAL_HOST > /dev/null 2>&1; then
echo " ✅ Ping 成功"
else
echo " ❌ Ping 失败"
fi
# 测试端口连通性
echo ""
echo "🔌 测试端口连通性..."
if command -v nc &> /dev/null; then
if nc -z $CENTRAL_HOST $CENTRAL_PORT 2>/dev/null; then
echo " ✅ 端口 $CENTRAL_PORT 可访问"
else
echo " ❌ 端口 $CENTRAL_PORT 不可访问"
fi
else
echo " nc 不可用,跳过端口测试"
fi
# 测试HTTP连接
echo ""
echo "🌐 测试HTTP连接..."
HTTP_URL="http://$CENTRAL_HOST:$CENTRAL_PORT"
if command -v curl &> /dev/null; then
if curl -s --connect-timeout 5 $HTTP_URL > /dev/null 2>&1; then
echo " ✅ HTTP连接成功: $HTTP_URL"
# 测试VictoriaMetrics API
if curl -s --connect-timeout 5 "$HTTP_URL/api/v1/status" > /dev/null 2>&1; then
echo " ✅ VictoriaMetrics API 可访问"
else
echo " ⚠️ VictoriaMetrics API 不可访问 (可能不是VictoriaMetrics服务)"
fi
else
echo " ❌ HTTP连接失败: $HTTP_URL"
fi
else
echo " curl 不可用跳过HTTP测试"
fi
echo ""
echo "📋 测试完成!"
echo ""
echo "💡 如果连接失败,请检查:"
echo " 1. 网络连接是否正常"
echo " 2. 防火墙是否开放端口 $CENTRAL_PORT"
echo " 3. 中央服务器是否正在运行"
echo " 4. DNS解析是否正确"

View File

@@ -0,0 +1,68 @@
#!/bin/bash
# 更新配置文件脚本 - 从CSV生成所有JSON配置文件
# 使用方法: ./update-configs.sh
set -e
echo "=== 更新Prometheus监控配置文件 ==="
echo ""
# 检查jq是否安装
if ! command -v jq &> /dev/null; then
echo "❌ jq未安装请先安装jq:"
echo " Ubuntu/Debian: sudo apt-get install jq"
echo " CentOS/RHEL: sudo yum install jq"
echo " Alpine: apk add jq"
exit 1
fi
# 进入脚本目录
cd "$(dirname "$0")"
echo "🔄 正在从CSV文件生成JSON配置..."
# 优先使用统一的 targets.csv
if [ -f "targets.csv" ]; then
echo "📋 使用统一配置文件 targets.csv..."
chmod +x csv-to-targets.sh 2>/dev/null || true
./csv-to-targets.sh targets.csv
else
echo "⚠️ targets.csv 不存在,使用旧格式配置文件..."
echo ""
# 兼容旧格式生成ONVIF设备配置
if [ -f "devices.csv" ]; then
echo "📱 生成ONVIF设备配置从 devices.csv..."
chmod +x csv-to-json.sh 2>/dev/null || true
./csv-to-json.sh devices.csv onvif-targets.json
else
echo "⚠️ devices.csv 不存在跳过ONVIF设备配置生成"
echo "[]" > onvif-targets.json
fi
# 兼容旧格式生成Ping目标配置
if [ -f "ping-targets.csv" ]; then
echo "🌐 生成Ping目标配置从 ping-targets.csv..."
chmod +x csv-to-ping-json.sh 2>/dev/null || true
./csv-to-ping-json.sh ping-targets.csv ping-targets.json
else
echo "⚠️ ping-targets.csv 不存在跳过Ping目标配置生成"
echo "[]" > ping-targets.json
fi
fi
echo ""
echo "✅ 所有配置文件已更新!"
echo ""
echo "📋 生成的文件:"
ls -la *.json 2>/dev/null || echo " (无JSON文件生成)"
echo ""
echo "🔄 配置热重载:"
echo " - Prometheus会在5分钟内自动检测并重载配置"
echo " - 无需重启Docker容器"
echo ""
echo "⚡ 强制立即重载 (可选)"
echo " docker-compose restart prometheus-edge"
echo ""
echo "📝 编辑CSV文件后重新运行此脚本即可更新配置"

156
edge-agent/deploy.sh Normal file
View File

@@ -0,0 +1,156 @@
#!/bin/bash
# 分布式Prometheus边缘代理部署脚本
# 适用于Linux系统 (玩客云等设备)
set -e
echo "=== 分布式Prometheus边缘代理部署脚本 ==="
echo ""
# 检查Docker是否安装
if ! command -v docker &> /dev/null; then
echo "❌ Docker未安装请先安装Docker"
exit 1
fi
# 检查Docker Compose (优先检查V2然后检查V1)
DOCKER_COMPOSE_CMD=""
if docker compose version &> /dev/null; then
DOCKER_COMPOSE_CMD="docker compose"
echo "✅ 检测到 Docker Compose V2"
elif command -v docker-compose &> /dev/null; then
DOCKER_COMPOSE_CMD="docker-compose"
echo "✅ 检测到 Docker Compose V1"
else
echo "❌ Docker Compose未安装请先安装Docker Compose"
exit 1
fi
echo "✅ Docker环境检查通过"
echo ""
# 检查jq是否安装
if ! command -v jq &> /dev/null; then
echo "❌ jq未安装请先安装jq:"
echo " Ubuntu/Debian: sudo apt-get install jq"
echo " CentOS/RHEL: sudo yum install jq"
echo " Alpine: apk add jq"
exit 1
fi
# 检查并生成配置文件
echo "🔄 检查并生成配置文件..."
if [ -f "config/devices.csv" ]; then
echo "📱 从CSV生成ONVIF设备配置..."
cd config
chmod +x *.sh
./update-configs.sh
cd ..
else
echo "⚠️ config/devices.csv 不存在使用默认JSON配置"
fi
if [ ! -f "config/onvif-targets.json" ]; then
echo "❌ 配置文件 config/onvif-targets.json 不存在"
exit 1
fi
if [ ! -f "config/ping-targets.json" ]; then
echo "❌ 配置文件 config/ping-targets.json 不存在"
exit 1
fi
if [ ! -f "prometheus-edge/prometheus.yml" ]; then
echo "❌ 配置文件 prometheus-edge/prometheus.yml 不存在"
exit 1
fi
echo "✅ 配置文件检查通过"
echo ""
# 创建环境变量文件
if [ ! -f ".env" ]; then
if [ -f "env.example" ]; then
cp env.example .env
echo "📝 已创建 .env 文件,请编辑其中的配置"
echo " 特别是 CENTRAL_SERVER_HOST 和 CENTRAL_SERVER_PORT"
echo ""
read -p "按回车键继续,或 Ctrl+C 取消..."
else
echo "❌ env.example 文件不存在"
exit 1
fi
fi
# 从 .env 生成 prometheus.yml使 remote_write 指向中央服务器)
if [ -f ".env" ]; then
set -a
source .env
set +a
CENTRAL_SERVER_HOST=${CENTRAL_SERVER_HOST:-192.168.1.10}
CENTRAL_SERVER_PORT=${CENTRAL_SERVER_PORT:-8428}
if [ -f "prometheus-edge/prometheus.yml.template" ]; then
echo "📝 根据 .env 生成 prometheus.yml (中央: ${CENTRAL_SERVER_HOST}:${CENTRAL_SERVER_PORT})..."
export CENTRAL_SERVER_HOST CENTRAL_SERVER_PORT
envsubst '${CENTRAL_SERVER_HOST} ${CENTRAL_SERVER_PORT}' < prometheus-edge/prometheus.yml.template > prometheus-edge/prometheus.yml
echo "✅ prometheus.yml 已生成"
fi
fi
# 创建数据目录
mkdir -p prometheus-edge/data
echo "✅ 数据目录创建完成"
echo ""
# 停止现有服务
echo "🛑 停止现有服务..."
$DOCKER_COMPOSE_CMD down 2>/dev/null || true
# 拉取最新镜像
echo "📥 拉取Docker镜像..."
if ! $DOCKER_COMPOSE_CMD pull; then
echo ""
echo "⚠️ 镜像拉取失败,尝试继续启动(如果本地已有镜像)..."
echo ""
fi
# 启动服务
echo "🚀 启动服务..."
$DOCKER_COMPOSE_CMD up -d
# 等待服务启动
echo "⏳ 等待服务启动..."
sleep 10
# 检查服务状态
echo ""
echo "📊 服务状态检查:"
$DOCKER_COMPOSE_CMD ps
echo ""
echo "📋 服务日志:"
$DOCKER_COMPOSE_CMD logs --tail=20
echo ""
echo "✅ 部署完成!"
echo ""
echo "🔗 访问地址:"
echo " - Prometheus UI: http://localhost:9092"
echo " - 目标状态: http://localhost:9092/targets"
echo ""
echo "📝 管理命令:"
echo " - 查看日志: $DOCKER_COMPOSE_CMD logs -f"
echo " - 重启服务: $DOCKER_COMPOSE_CMD restart"
echo " - 停止服务: $DOCKER_COMPOSE_CMD down"
echo ""
echo "🔄 配置更新:"
echo " - 编辑CSV: nano config/devices.csv"
echo " - 生成JSON: cd config && ./update-configs.sh"
echo " - 热重载: 等待5分钟自动重载或重启prometheus-edge"
echo ""
echo "⚠️ 请确保:"
echo " 1. 已正确配置 .env 文件中的服务器地址"
echo " 2. 已更新 config/devices.csv 中的设备信息"
echo " 3. 网络连接正常可以访问ONVIF设备"

View File

@@ -0,0 +1,70 @@
services:
# ========== 边缘必选 ==========
# 1. 边缘 Prometheus抓取 + remote_write 推到中央 VictoriaMetrics
prometheus-edge:
image: prom/prometheus:latest
container_name: prometheus-edge
restart: unless-stopped
environment:
- CENTRAL_SERVER_HOST=${CENTRAL_SERVER_HOST:-192.168.1.10}
- CENTRAL_SERVER_PORT=${CENTRAL_SERVER_PORT:-8428}
volumes:
- prometheus-edge-data:/prometheus
- ./prometheus-edge/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- ./config/onvif-targets.json:/etc/prometheus/onvif-targets.json:ro # 挂载静态设备列表
- ./config/ping-targets.json:/etc/prometheus/ping-targets.json:ro # 挂载Ping目标列表
mem_limit: "256m"
cpus: "2.0"
ports:
- "9092:9090" # 改为9092避免与中央服务器冲突
# 本机同机部署时,容器内通过 host.docker.internal 访问宿主机中央服务
extra_hosts:
- "host.docker.internal:host-gateway"
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.retention.time=1h'
- '--web.enable-lifecycle' # 启用配置重载API
networks:
- monitoring_net
# ========== 可选容器(按需启用)==========
# 2. ONVIF Exporter可选使用本项目自建
# 启用docker compose --profile onvif up -d --build会构建 edge-agent/onvif-exporter 并启动)
# 配置文件config/onvif-targets.json与 targets.csv 中 onvif 行一致,由 update-configs.sh 生成)
onvif-exporter:
profiles:
- onvif
image: onvif-exporter:local
build:
context: ./onvif-exporter
dockerfile: Dockerfile
container_name: onvif-exporter
restart: unless-stopped
environment:
- EXPORTER_PORT=9600
- TARGETS_FILE=/config/targets.json
volumes:
- ./config/onvif-targets.json:/config/targets.json:ro
mem_limit: "128m"
cpus: "1.5"
networks:
- monitoring_net
# 3. Blackbox Exporter必选网络 Ping 探测)
blackbox-exporter:
image: prom/blackbox-exporter:latest
container_name: blackbox-exporter
restart: unless-stopped
volumes:
- ./blackbox/config.yml:/etc/blackbox_exporter/config.yml:ro
mem_limit: "64m"
cpus: "0.5"
networks:
- monitoring_net
networks:
monitoring_net:
driver: bridge
volumes:
prometheus-edge-data:

27
edge-agent/env.example Normal file
View File

@@ -0,0 +1,27 @@
# 中央Prometheus服务器地址 (支持IP或域名)
# 本机同机部署时使用: host.docker.internal容器访问宿主机
CENTRAL_SERVER_HOST=192.168.1.10
# 或者使用域名: CENTRAL_SERVER_HOST=prometheus.company.com
# 本机同机: CENTRAL_SERVER_HOST=host.docker.internal
# 中央服务器端口
CENTRAL_SERVER_PORT=8428
# 常用端口说明:
# 8428 - VictoriaMetrics (推荐)
# 9090 - Prometheus
# 8080 - 自定义端口
# 边缘节点标识
EDGE_NODE_ID=workernode_1
# 远程写入配置
# 边缘节点会将数据推送到中央服务器的VictoriaMetrics
# 格式: http://域名或IP:端口/api/v1/write
# 默认端口: 8428 (VictoriaMetrics)
# 注意ONVIF设备密码现在在 config/devices.csv 中为每个设备单独配置
# ONVIF Exporter 镜像(仅在使用 --profile onvif 时需要)
# 公共 registry 无现成镜像,需自建或使用第三方镜像,参见 doc/ONVIF_ALTERNATIVES.md
# ONVIF_EXPORTER_IMAGE=localhost/onvif-exporter:local

View File

@@ -0,0 +1,12 @@
# 多阶段构建:在镜像内编译,无需本机安装 Go
FROM golang:1.21-alpine AS builder
WORKDIR /app
COPY go.mod ./
COPY main.go ./
RUN go mod tidy && CGO_ENABLED=0 GOOS=linux go build -o /onvif-exporter .
FROM alpine:3.19
RUN apk add --no-cache ca-certificates
COPY --from=builder /onvif-exporter /onvif-exporter
EXPOSE 9600
ENTRYPOINT ["/onvif-exporter"]

View File

@@ -0,0 +1,34 @@
# ONVIF Exporter自建
本目录为自建的 ONVIF 探测容器,供边缘节点可选使用。通过 ONVIF `GetDeviceInformation` 探测设备是否在线,并暴露 Prometheus 指标。
## 指标
- `onvif_device_up`1=可达0=不可达标签instance, location, model, device_type
- `onvif_probe_duration_seconds`:探测耗时(秒)
## 配置
-**config/onvif-targets.json** 读取设备列表(与 `targets.csv` 中 onvif 行一致,由 `config/update-configs.sh` 生成)。
- 环境变量:`TARGETS_FILE`(默认 `/config/targets.json`)、`EXPORTER_PORT`(默认 9600
## 构建与运行
在边缘节点目录下启用 ONVIF 并构建、启动:
```bash
cd edge-agent
docker compose --profile onvif up -d --build
```
或仅构建镜像:
```bash
docker build -t onvif-exporter:local ./onvif-exporter
```
## 依赖
- Go 1.21仅构建时需要Dockerfile 内已包含)
- [github.com/use-go/onvif](https://github.com/use-go/onvif)ONVIF 协议)
- [prometheus/client_golang](https://github.com/prometheus/client_golang)(指标暴露)

View File

@@ -0,0 +1,24 @@
module github.com/distributed-prometheus/onvif-exporter
go 1.21
require (
github.com/prometheus/client_golang v1.19.0
github.com/use-go/onvif v0.0.9
)
require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/beevik/etree v1.1.0 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/elgs/gostrgen v0.0.0-20161222160715-9d61ae07eeae // indirect
github.com/gofrs/uuid v3.2.0+incompatible // indirect
github.com/juju/errors v0.0.0-20220331221717-b38fca44723b // indirect
github.com/prometheus/client_model v0.6.0 // indirect
github.com/prometheus/common v0.50.0 // indirect
github.com/prometheus/procfs v0.13.0 // indirect
github.com/rs/zerolog v1.26.1 // indirect
golang.org/x/net v0.21.0 // indirect
golang.org/x/sys v0.17.0 // indirect
google.golang.org/protobuf v1.32.0 // indirect
)

View File

@@ -0,0 +1,122 @@
package main
import (
"encoding/json"
"log"
"net/http"
"os"
"strings"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/use-go/onvif"
"github.com/use-go/onvif/device"
)
const (
defaultTargetsFile = "/config/targets.json"
defaultPort = "9600"
)
type targetGroup struct {
Targets []string `json:"targets"`
Labels map[string]string `json:"labels"`
}
func main() {
targetsFile := os.Getenv("TARGETS_FILE")
if targetsFile == "" {
targetsFile = defaultTargetsFile
}
port := os.Getenv("EXPORTER_PORT")
if port == "" {
port = defaultPort
}
up := prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "onvif_device_up",
Help: "1 if ONVIF device is reachable, 0 otherwise",
}, []string{"instance", "location", "model", "device_type"})
duration := prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "onvif_probe_duration_seconds",
Help: "ONVIF probe duration in seconds",
}, []string{"instance", "location", "model", "device_type"})
reg := prometheus.NewRegistry()
reg.MustRegister(up, duration)
go func() {
ticker := time.NewTicker(60 * time.Second)
defer ticker.Stop()
for ; true; <-ticker.C {
probe(targetsFile, up, duration)
}
}()
// 启动时立即探测一次
probe(targetsFile, up, duration)
http.Handle("/metrics", promhttp.HandlerFor(reg, promhttp.HandlerOpts{EnableOpenMetrics: true}))
http.HandleFunc("/health", func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) })
log.Printf("ONVIF exporter listening on :%s", port)
if err := http.ListenAndServe(":"+port, nil); err != nil {
log.Fatal(err)
}
}
func probe(targetsFile string, up, duration *prometheus.GaugeVec) {
data, err := os.ReadFile(targetsFile)
if err != nil {
log.Printf("read targets file: %v", err)
return
}
var groups []targetGroup
if err := json.Unmarshal(data, &groups); err != nil {
log.Printf("parse targets: %v", err)
return
}
for _, g := range groups {
if len(g.Targets) == 0 {
continue
}
ip := strings.TrimSpace(g.Targets[0])
port := "80"
if p, ok := g.Labels["onvif_port"]; ok && p != "" {
port = strings.TrimSpace(p)
}
user := strings.TrimSpace(g.Labels["username"])
pass := strings.TrimSpace(g.Labels["password"])
location := strings.TrimSpace(g.Labels["location"])
model := strings.TrimSpace(g.Labels["model"])
deviceType := strings.TrimSpace(g.Labels["device_type"])
instance := ip + ":" + port
labels := prometheus.Labels{
"instance": instance,
"location": location,
"model": model,
"device_type": deviceType,
}
start := time.Now()
err := probeONVIF(ip, port, user, pass)
elapsed := time.Since(start).Seconds()
duration.With(labels).Set(elapsed)
if err != nil {
log.Printf("onvif probe %s: %v", instance, err)
up.With(labels).Set(0)
} else {
up.With(labels).Set(1)
}
}
}
func probeONVIF(ip, port, username, password string) error {
xaddr := ip + ":" + port
params := onvif.DeviceParams{Xaddr: xaddr, Username: username, Password: password}
dev, err := onvif.NewDevice(params)
if err != nil {
return err
}
_, err = dev.CallMethod(device.GetDeviceInformation{})
return err
}

View File

@@ -0,0 +1,45 @@
global:
scrape_interval: 120s
evaluation_interval: 120s
external_labels:
region: workernode_1 # 设置一个唯一边缘节点名称
remote_write:
- url: http://192.168.2.106:8428/api/v1/write
queue_config:
max_samples_per_send: 5000
capacity: 5000
max_shards: 5
scrape_configs:
- job_name: 'onvif-devices'
scrape_interval: 120s
file_sd_configs:
- files: ['/etc/prometheus/onvif-targets.json']
refresh_interval: 5m
metrics_path: /metrics
static_configs:
- targets: ['onvif-exporter:9600']
- job_name: 'network-ping'
scrape_interval: 300s
file_sd_configs:
- files: ['/etc/prometheus/ping-targets.json']
refresh_interval: 5m
metrics_path: /probe
params:
module: [icmp]
static_configs:
- targets: ['blackbox-exporter:9115']
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: blackbox-exporter:9115
- job_name: 'prometheus-edge'
scrape_interval: 60s
static_configs:
- targets: ['localhost:9090']

View File

@@ -0,0 +1,45 @@
global:
scrape_interval: 120s
evaluation_interval: 120s
external_labels:
region: workernode_1 # 设置一个唯一边缘节点名称
remote_write:
- url: http://${CENTRAL_SERVER_HOST}:${CENTRAL_SERVER_PORT}/api/v1/write
queue_config:
max_samples_per_send: 5000
capacity: 5000
max_shards: 5
scrape_configs:
- job_name: 'onvif-devices'
scrape_interval: 120s
file_sd_configs:
- files: ['/etc/prometheus/onvif-targets.json']
refresh_interval: 5m
metrics_path: /metrics
static_configs:
- targets: ['onvif-exporter:9600']
- job_name: 'network-ping'
scrape_interval: 300s
file_sd_configs:
- files: ['/etc/prometheus/ping-targets.json']
refresh_interval: 5m
metrics_path: /probe
params:
module: [icmp]
static_configs:
- targets: ['blackbox-exporter:9115']
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: blackbox-exporter:9115
- job_name: 'prometheus-edge'
scrape_interval: 60s
static_configs:
- targets: ['localhost:9090']

104
edge-agent/quick-setup.sh Normal file
View File

@@ -0,0 +1,104 @@
#!/bin/bash
# 边缘节点快速配置脚本
# 用于在本机快速设置边缘节点
set -e
echo "=== 边缘节点快速配置脚本 ==="
echo ""
# 获取本机IP
LOCAL_IP=$(hostname -I | awk '{print $1}')
echo "📋 检测到本机IP: $LOCAL_IP"
echo ""
# 1. 创建 .env 文件
if [ ! -f ".env" ]; then
echo "📝 创建 .env 配置文件..."
cat > .env << EOF
# 中央服务器地址(本机)
CENTRAL_SERVER_HOST=${LOCAL_IP}
CENTRAL_SERVER_PORT=8428
# 边缘节点标识
EDGE_NODE_ID=workernode_1
EOF
echo "✅ .env 文件已创建"
else
echo "⚠️ .env 文件已存在,跳过创建"
echo " 如需修改,请编辑 .env 文件"
fi
echo ""
# 2. 配置统一监控目标(最小化测试配置)
echo "📝 配置统一监控目标 targets.csv..."
cat > config/targets.csv << 'EOF'
# 统一监控目标配置文件
# 格式: type,ip,device,group,network,device_type,model,location,username,password,onvif_port
# type: onvif 或 ping
type,ip,device,group,network,device_type,model,location,username,password,onvif_port
ping,8.8.8.8,google_dns,external,external,,,,,,
ping,1.1.1.1,cloudflare_dns,external,external,,,,,,
# ONVIF 设备示例(取消注释并填写实际信息)
# onvif,192.168.1.100,,,front_door,camera,HIKVISION_DS-2CD2342WD-I,front_door,admin,password1,80
EOF
echo "✅ 统一监控目标已配置使用公共DNS进行测试"
echo ""
# 3. 生成配置文件
echo "🔄 生成配置文件..."
cd config
chmod +x *.sh 2>/dev/null || true
if [ -f "update-configs.sh" ]; then
./update-configs.sh
echo "✅ 配置文件已生成"
else
echo "⚠️ update-configs.sh 不存在,跳过"
fi
cd ..
echo ""
# 4. 检查配置文件
echo "🔍 检查配置文件..."
if [ ! -f "config/onvif-targets.json" ]; then
echo "📝 创建空的 ONVIF 配置文件..."
echo "[]" > config/onvif-targets.json
fi
if [ ! -f "config/ping-targets.json" ]; then
echo "📝 创建空的 Ping 配置文件..."
echo "[]" > config/ping-targets.json
fi
echo "✅ 配置文件检查通过"
echo ""
# 6. 显示配置摘要
echo "📊 配置摘要:"
echo " - 中央服务器: ${LOCAL_IP}:8428"
echo " - 边缘节点ID: workernode_1"
echo " - 监控目标: 已配置(统一 targets.csv"
echo " * Ping 目标: Google DNS, Cloudflare DNS"
echo " * ONVIF 设备: 无(用于测试,可在 targets.csv 中添加)"
echo ""
# 7. 询问是否立即部署
read -p "是否立即部署边缘节点?(y/N): " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
echo ""
echo "🚀 开始部署..."
bash deploy.sh
else
echo ""
echo "✅ 配置完成!"
echo ""
echo "📝 下一步:"
echo " 1. 检查 .env 文件配置"
echo " 2. 编辑 config/targets.csv 添加监控目标ping 或 onvif"
echo " 3. 运行: cd config && ./update-configs.sh 生成JSON配置"
echo " 4. 运行: bash deploy.sh"
echo ""
fi

38
edge-agent/run-edge-local.sh Executable file
View File

@@ -0,0 +1,38 @@
#!/bin/bash
# 本机同机部署边缘:中央与边缘在同一台机器时,一键配置并启动边缘
# 用法: ./run-edge-local.sh
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
echo "=== 本机同机部署边缘(中央与边缘在同一台机器)==="
echo ""
# 确保有 .env
if [ ! -f ".env" ]; then
[ -f "env.example" ] && cp env.example .env || { echo "❌ 缺少 env.example"; exit 1; }
fi
# 指向本机中央 VictoriaMetrics容器内用 host.docker.internal 访问宿主机)
if ! grep -q 'CENTRAL_SERVER_HOST=host.docker.internal' .env 2>/dev/null; then
echo "📝 设置中央服务器为本机 (host.docker.internal:8428)..."
sed -i 's/^CENTRAL_SERVER_HOST=.*/CENTRAL_SERVER_HOST=host.docker.internal/' .env 2>/dev/null || \
echo 'CENTRAL_SERVER_HOST=host.docker.internal' >> .env
grep -q '^CENTRAL_SERVER_PORT=' .env || echo 'CENTRAL_SERVER_PORT=8428' >> .env
sed -i 's/^CENTRAL_SERVER_PORT=.*/CENTRAL_SERVER_PORT=8428/' .env 2>/dev/null || true
echo "✅ 已写入 CENTRAL_SERVER_HOST=host.docker.internal, CENTRAL_SERVER_PORT=8428"
fi
echo ""
echo "请确保中央服务器已在本机运行central-server 已 deploy且 VictoriaMetrics 监听 8428。"
echo "按回车继续启动边缘,或 Ctrl+C 取消..."
read -r
bash deploy.sh
echo ""
echo "💡 本机部署完成后:"
echo " - 边缘 Prometheus UI: http://localhost:9092"
echo " - 在 Grafana 中选择数据源「VictoriaMetrics」可查看边缘上报的数据"