当前位置: 首页 > article >正文

监控告警+webhook一键部署

安装前修改自己所需变量,安装后会有如下提示,之后追加或修改prometheus配置即可。
安装完成后会有详细提示

[] Webhook服务安装完成
[*] 创建测试脚本...
[] 测试脚本已创建: /data/webhook/test-webhook.sh

===== 安装完成 =====
Prometheus访问地址: http://10.234.210.88:9090
Alertmanager访问地址: http://10.234.210.88:9093
Webhook服务地址: http://10.234.210.88:58888

重要提示:
1. 使用以下命令测试webhook是否正常工作:
   /data/webhook/test-webhook.sh
2. 使用以下命令重新加载Prometheus配置:
   curl -X POST http://localhost:9090/-/reload
3. 查看服务状态:
   systemctl status prometheus
   systemctl status alertmanager
   systemctl status prometheus-webhook
#!/bin/bash
#
# 一键部署Prometheus监控系统脚本 (精简版)
# 该脚本将自动安装和配置Prometheus和Alertmanager

set -e

# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
NC='\033[0m' # No Color

# 安装路径
INSTALL_DIR="/data"
PROMETHEUS_VERSION="2.49.1"
ALERTMANAGER_VERSION="0.26.0"

# Webhook配置
WEBHOOK_PORT="58888"
LARK_WEBHOOK_URL="https://open.larksuite.com/open-apis/bot/v2/hook/-0627-4bc8--"

# 检查是否为root用户
if [ "$(id -u)" != "0" ]; then
   echo -e "${RED}此脚本必须以root用户身份运行${NC}" 1>&2
   exit 1
fi

echo -e "${GREEN}===== 开始部署Prometheus监控系统 =====${NC}"

# 创建安装目录
mkdir -p ${INSTALL_DIR}
cd ${INSTALL_DIR}

# 创建必要的目录结构
echo -e "${YELLOW}[*] 创建目录结构...${NC}"
mkdir -p ${INSTALL_DIR}/prometheus/{config,rules,data}
mkdir -p ${INSTALL_DIR}/alertmanager

# 下载并安装Prometheus
install_prometheus() {
    # 检查是否已安装
    if command -v prometheus &> /dev/null; then
        echo -e "${GREEN}[✓] Prometheus已安装,跳过安装步骤${NC}"
        return
    fi
    
    echo -e "${YELLOW}[*] 下载并安装Prometheus...${NC}"
    
    # 下载Prometheus
    if [ ! -f "prometheus-${PROMETHEUS_VERSION}.linux-amd64.tar.gz" ]; then
        wget https://github.com/prometheus/prometheus/releases/download/v${PROMETHEUS_VERSION}/prometheus-${PROMETHEUS_VERSION}.linux-amd64.tar.gz
    fi
    
    # 解压Prometheus
    tar -xzf prometheus-${PROMETHEUS_VERSION}.linux-amd64.tar.gz
    cp prometheus-${PROMETHEUS_VERSION}.linux-amd64/prometheus ${INSTALL_DIR}/prometheus/
    cp prometheus-${PROMETHEUS_VERSION}.linux-amd64/promtool ${INSTALL_DIR}/prometheus/
    cp -r prometheus-${PROMETHEUS_VERSION}.linux-amd64/consoles ${INSTALL_DIR}/prometheus/
    cp -r prometheus-${PROMETHEUS_VERSION}.linux-amd64/console_libraries ${INSTALL_DIR}/prometheus/
    
    # 创建Prometheus配置文件
    cat >${INSTALL_DIR}/prometheus/prometheus.yml<<EOF
# my global config
global:
  scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
  # scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
  alertmanagers:
    - static_configs:
        - targets:
           - localhost:9093
rule_files:
  - "${INSTALL_DIR}/prometheus/rules/*.yaml"

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
  # The job name is added as a label \`job=<job_name>\` to any timeseries scraped from this config.
  - job_name: "prometheus"
    # metrics_path defaults to '/metrics'
    # scheme defaults to 'http'.
    static_configs:
      - targets: ["localhost:9090"]
EOF

    # 创建示例告警规则
    mkdir -p ${INSTALL_DIR}/prometheus/rules
    cat >${INSTALL_DIR}/prometheus/rules/basic-alert.yaml<<EOF
groups:
  - name: basic-alerts
    rules:
    - alert: InstanceDown
      expr: up == 0
      for: 0m
      labels:
        severity: critical
      annotations:
        title: 'Instance down'
        description: "实例 {{ \$labels.instance }} 已经宕机"
EOF

    # 创建Prometheus systemd服务
    cat >/etc/systemd/system/prometheus.service<<EOF
[Unit]
Description=Prometheus Service
Wants=network-online.target
After=network-online.target

[Service]
User=root
Group=root
Type=simple
ExecStart=${INSTALL_DIR}/prometheus/prometheus --config.file=${INSTALL_DIR}/prometheus/prometheus.yml --web.enable-lifecycle --storage.tsdb.path=${INSTALL_DIR}/prometheus/data
Restart=on-failure

[Install]
WantedBy=multi-user.target
EOF

    # 重新加载systemd并启动Prometheus
    systemctl daemon-reload
    systemctl enable prometheus
    systemctl start prometheus
    
    echo -e "${GREEN}[✓] Prometheus安装完成${NC}"
}

# 下载并安装Alertmanager
install_alertmanager() {
    # 检查是否已安装
    if command -v alertmanager &> /dev/null; then
        echo -e "${GREEN}[✓] Alertmanager已安装,跳过安装步骤${NC}"
        return
    fi
    
    echo -e "${YELLOW}[*] 下载并安装Alertmanager...${NC}"
    
    # 下载Alertmanager
    if [ ! -f "alertmanager-${ALERTMANAGER_VERSION}.linux-amd64.tar.gz" ]; then
        wget https://github.com/prometheus/alertmanager/releases/download/v${ALERTMANAGER_VERSION}/alertmanager-${ALERTMANAGER_VERSION}.linux-amd64.tar.gz
    fi
    
    # 解压Alertmanager
    tar -xzf alertmanager-${ALERTMANAGER_VERSION}.linux-amd64.tar.gz
    cp alertmanager-${ALERTMANAGER_VERSION}.linux-amd64/alertmanager ${INSTALL_DIR}/alertmanager/
    cp alertmanager-${ALERTMANAGER_VERSION}.linux-amd64/amtool ${INSTALL_DIR}/alertmanager/
    
    # 创建Alertmanager配置文件
    cat >${INSTALL_DIR}/alertmanager/alertmanager.yml<<EOF
global:
  resolve_timeout: 1m
route:
  receiver: 'webhook'
  group_by: ['alertname']
  group_wait: 10s
  group_interval: 10s
  repeat_interval: 30m
receivers:
  - name: 'webhook'
    webhook_configs:
    - url: 'http://127.0.0.1:${WEBHOOK_PORT}/send'
      send_resolved: true
inhibit_rules:
  - source_match:
      severity: 'critical'
    target_match:
      severity: 'warning'
    equal: ['alertname', 'instance']
EOF
    
    # 创建Alertmanager systemd服务
    cat >/etc/systemd/system/alertmanager.service<<EOF
[Unit]
Description=Alertmanager Service
Wants=network-online.target
After=network-online.target

[Service]
User=root
Group=root
Type=simple
ExecStart=${INSTALL_DIR}/alertmanager/alertmanager --config.file=${INSTALL_DIR}/alertmanager/alertmanager.yml
Restart=on-failure

[Install]
WantedBy=multi-user.target
EOF

    # 重新加载systemd并启动Alertmanager
    systemctl daemon-reload
    systemctl enable alertmanager
    systemctl start alertmanager
    
    echo -e "${GREEN}[✓] Alertmanager安装完成${NC}"
}

# 安装并配置简单版Webhook服务
install_webhook() {
    echo -e "${YELLOW}[*] 安装简单版Webhook服务...${NC}"
    
    # 检查Python3是否安装
    if ! command -v python3 &> /dev/null; then
        echo -e "${YELLOW}[*] Python3未安装,正在安装...${NC}"
        apt-get update
        apt-get install -y python3
    fi
    
    # 单独检查pip3是否安装
    if ! command -v pip3 &> /dev/null; then
        echo -e "${YELLOW}[*] pip3未安装,正在安装...${NC}"
        apt-get update
        apt-get install -y python3-pip
    fi
    
    # 安装所需的Python包
    pip3 install flask requests

    # 创建webhook目录
    mkdir -p ${INSTALL_DIR}/webhook
    
    # 创建简化版的Flask应用,避免JSON解析错误
    cat >${INSTALL_DIR}/webhook/app.py<<EOF
from flask import Flask, request, jsonify
import requests
import json
from datetime import datetime

app = Flask(__name__)

def send_to_lark(status, title, description, start_time, end_time="", severity="Unknown", instance="Unknown", alertname="Unknown"):
    """发送消息到飞书"""
    # 设置飞书webhook URL
    url = "${LARK_WEBHOOK_URL}"
    
    # 格式化时间
    try:
        # 将UTC时间转为本地时间
        start_dt = datetime.strptime(start_time, "%Y-%m-%dT%H:%M:%S.%fZ")
        start_time_fmt = start_dt.strftime("%Y-%m-%d %H:%M:%S")
        
        if end_time:
            end_dt = datetime.strptime(end_time, "%Y-%m-%dT%H:%M:%S.%fZ")
            end_time_fmt = end_dt.strftime("%Y-%m-%d %H:%M:%S")
        else:
            end_time_fmt = "未结束"
    except:
        start_time_fmt = start_time
        end_time_fmt = end_time if end_time else "未结束"
    
    # 设置消息颜色
    color = "red" if status == "firing" else "green"
    status_text = "🔥告警触发" if status == "firing" else "✅告警恢复"
    
    # 构建简单卡片消息
    card = {
        "msg_type": "interactive",
        "card": {
            "config": {"wide_screen_mode": True},
            "header": {
                "template": color,
                "title": {"content": f"{status_text}: {title}", "tag": "plain_text"}
            },
            "elements": [
                {
                    "tag": "div",
                    "text": {"tag": "lark_md", "content": f"**告警名称**: {alertname}"}
                },
                {"tag": "hr"},
                {
                    "tag": "div",
                    "fields": [
                        {
                            "is_short": True,
                            "text": {"tag": "lark_md", "content": f"**状态**: {status}"}
                        },
                        {
                            "is_short": True,
                            "text": {"tag": "lark_md", "content": f"**级别**: {severity}"}
                        }
                    ]
                },
                {
                    "tag": "div",
                    "fields": [
                        {
                            "is_short": True,
                            "text": {"tag": "lark_md", "content": f"**开始时间**: {start_time_fmt}"}
                        },
                        {
                            "is_short": True,
                            "text": {"tag": "lark_md", "content": f"**结束时间**: {end_time_fmt}"}
                        }
                    ]
                },
                {
                    "tag": "div",
                    "fields": [
                        {
                            "is_short": True,
                            "text": {"tag": "lark_md", "content": f"**实例**: {instance}"}
                        }
                    ]
                },
                {"tag": "hr"},
                {
                    "tag": "div",
                    "text": {"tag": "lark_md", "content": f"**详细信息**: {description}"}
                }
            ]
        }
    }
    
    # 发送请求
    headers = {'Content-Type': 'application/json'}
    try:
        response = requests.post(url, json=card, headers=headers)
        return response.json()
    except Exception as e:
        print(f"发送消息失败: {e}")
        return {"error": str(e)}

@app.route("/")
def hello_world():
    return "<p>Prometheus Alert Webhook Server</p>"

@app.route("/send", methods=['POST', 'GET'])
def send_msg():
    if request.method == 'GET':
        return "<p>请使用POST请求发送告警信息!</p>"
    
    try:
        # 获取请求数据
        try:
            data = request.json
        except:
            return jsonify({"status": "error", "message": "无效的JSON数据"}), 400
        
        print("接收到的告警数据:")
        print(data)
        
        # 处理告警
        responses = []
        if "alerts" in data:
            for alert in data["alerts"]:
                # 提取基本信息
                status = alert.get("status", "Unknown")
                start_time = alert.get("startsAt", "Unknown")
                end_time = alert.get("endsAt", "")
                
                # 提取标签
                labels = alert.get("labels", {})
                alertname = labels.get("alertname", "Unknown")
                severity = labels.get("severity", "Unknown")
                instance = labels.get("instance", "Unknown")
                
                # 提取注释
                annotations = alert.get("annotations", {})
                title = annotations.get("title", alertname)
                description = annotations.get("description", "无详细信息")
                
                # 发送到飞书
                response = send_to_lark(
                    status, 
                    title, 
                    description, 
                    start_time, 
                    end_time, 
                    severity, 
                    instance, 
                    alertname
                )
                
                responses.append(response)
        
        return jsonify({"status": "success", "responses": responses})
        
    except Exception as e:
        print(f"处理请求时出错: {e}")
        return jsonify({"status": "error", "message": str(e)}), 500

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=${WEBHOOK_PORT})
EOF

    # 创建systemd服务
    cat >/etc/systemd/system/prometheus-webhook.service<<EOF
[Unit]
Description=Prometheus Alert Webhook Service
Wants=network-online.target
After=network-online.target

[Service]
User=root
Group=root
Type=simple
ExecStart=/usr/bin/python3 ${INSTALL_DIR}/webhook/app.py
Restart=on-failure
WorkingDirectory=${INSTALL_DIR}/webhook

[Install]
WantedBy=multi-user.target
EOF

    # 重新加载systemd并启动webhook服务
    systemctl daemon-reload
    systemctl enable prometheus-webhook
    systemctl start prometheus-webhook
    
    echo -e "${GREEN}[✓] Webhook服务安装完成${NC}"
}

# 创建测试脚本
create_test_script() {
    echo -e "${YELLOW}[*] 创建测试脚本...${NC}"
    
    cat >${INSTALL_DIR}/webhook/test-webhook.sh<<EOF
#!/bin/bash

# 设置webhook地址
WEBHOOK_URL="http://localhost:${WEBHOOK_PORT}/send"

# 当前时间(UTC格式)
CURRENT_TIME=\$(date -u +"%Y-%m-%dT%H:%M:%S.000Z")

# 模拟一个测试告警的JSON数据
curl -X POST \$WEBHOOK_URL \\
  -H "Content-Type: application/json" \\
  -d '{
  "alerts": [
    {
      "status": "firing",
      "labels": {
        "alertname": "测试告警",
        "severity": "critical",
        "instance": "test-server-01"
      },
      "annotations": {
        "title": "测试告警标题",
        "description": "这是一条测试告警,用于验证webhook是否正常工作"
      },
      "startsAt": "'\$CURRENT_TIME'",
      "endsAt": "",
      "generatorURL": "http://prometheus.example.com/graph",
      "fingerprint": "c1bb9a35f9844428"
    }
  ]
}'

echo "测试告警已发送,请检查飞书是否收到消息"
EOF

    chmod +x ${INSTALL_DIR}/webhook/test-webhook.sh
    echo -e "${GREEN}[✓] 测试脚本已创建: ${INSTALL_DIR}/webhook/test-webhook.sh${NC}"
}

# 显示访问信息
show_info() {
    echo -e "\n${GREEN}===== 安装完成 =====${NC}"
    echo -e "${YELLOW}Prometheus访问地址: http://$(hostname -I | awk '{print $1}'):9090${NC}"
    echo -e "${YELLOW}Alertmanager访问地址: http://$(hostname -I | awk '{print $1}'):9093${NC}"
    echo -e "${YELLOW}Webhook服务地址: http://$(hostname -I | awk '{print $1}'):${WEBHOOK_PORT}${NC}"
    
    echo -e "\n${GREEN}重要提示:${NC}"
    echo -e "1. 使用以下命令测试webhook是否正常工作:"
    echo -e "   ${YELLOW}${INSTALL_DIR}/webhook/test-webhook.sh${NC}"
    echo -e "2. 使用以下命令重新加载Prometheus配置:"
    echo -e "   ${YELLOW}curl -X POST http://localhost:9090/-/reload${NC}"
    echo -e "3. 查看服务状态:"
    echo -e "   ${YELLOW}systemctl status prometheus${NC}"
    echo -e "   ${YELLOW}systemctl status alertmanager${NC}"
    echo -e "   ${YELLOW}systemctl status prometheus-webhook${NC}"
}

# 主函数
main() {
    install_prometheus
    install_alertmanager
    install_webhook
    create_test_script
    show_info
}

# 执行主函数
main

http://www.kler.cn/a/600801.html

相关文章:

  • PAT乙级1007
  • jvm中每个类的Class对象是唯一的吗
  • 万字C++STL——vector模拟实现
  • Linux中的基本开发工具(上)
  • 基于Spring Boot的党员学习交流平台的设计与实现(LW+源码+讲解)
  • 【微服务架构】SpringCloud(七):配置中心 Spring Cloud Config
  • OpenCV图像拼接(7)根据权重图对源图像进行归一化处理函数normalizeUsingWeightMap()
  • 洛谷 P1351 [NOIP 2014 提高组] 联合权值(树)
  • HTML5 canvas圆形泡泡动画背景特效
  • 最长连续子序列和的所含元素 -- Kadane算法拓展
  • R语言——字符串
  • 一文解读DeepSeek的安全风险、挑战与应对策略
  • C#基础学习(一)复杂数据类型之枚举
  • 【Linux】从开发到系统管理深入理解环境变量
  • RocketMQ 详细知识点总结
  • 文章记单词 | 第2篇(六级)
  • STM32/GD32主要学习内容
  • K8s的网络
  • Java高频面试之集合-18
  • 目录遍历漏洞复现