关于go:使用golang120-极简-开发Prometheus-报警接口

35次阅读

共计 13389 个字符,预计需要花费 34 分钟才能阅读完成。

Prometheus

target

[root@vm11 prometheus]# cat prometheus.yml 
global:
  scrape_interval: 60s
  evaluation_interval: 65s
  scrape_timeout: 15s

# Alertmanager configuration
alerting:
  alertmanagers:
    - static_configs:
        - targets: ["localhost:3xxx"]

rule_files:
  - "rule.yml"

scrape_configs:
  - job_name: "prometheus"
    static_configs:
      - targets: ["192.168.1x4.xx:3xx"]
        labels:
          appservice: "prometheus"
          city: "苏州"
          instancehost: "192.1x.1x4.xx"


rule


[root@vm11 prometheus]# cat rule.yml 

groups:
- name: Hosts.rules
  rules:
  - alert: HostDown
    expr: up{job=~"node-exporter|prometheus|grafana|alertmanager"} == 0
    for: 0m
    labels:
      severity: critical
    annotations:
      description: "主机:【{{$labels.instance}}】service is down-- Das ist ein Test."
      summary: "主机:【{{$labels.instance}}】service is resolved"

  - alert: HostCpuLoadAvage
    expr: sum(node_load5) by (instance) > 10
    for: 1m
    annotations:
      title: "5 分钟内 CPU 负载过高"
      description: "主机:【{{$labels.instance}}】5 五分钟内 CPU 负载超过 10 (以后值:{{ $value}})"
    labels:
      severity: 'warning'

  - alert: HostCpuUsage
    expr: (1-((sum(increase(node_cpu_seconds_total{mode="idle"}[5m])) by (instance))/ (sum(increase(node_cpu_seconds_total[5m])) by (instance))))*100 > 80
    for: 1m
    annotations:
      title: "CPU 使用率过高"
      description: "主机:【{{$labels.instance}}】5 五分钟内 CPU 使用率超过 80% (以后值:{{ $value}})"
    labels:
      severity: 'warning'

  - alert: HostMemoryUsage
    expr: (1-((node_memory_Buffers_bytes + node_memory_Cached_bytes + node_memory_MemFree_bytes)/node_memory_MemTotal_bytes))*100 > 80
    for: 1m
    annotations:
      title: "主机内存使用率超过 80%"
      description: "主机:【{{$labels.instance}}】内存使用率超过 80% (以后使用率:{{ $value}}%)"
    labels:
      severity: 'warning'

  - alert: HostIOWait
    expr: ((sum(increase(node_cpu_seconds_total{mode="iowait"}[5m])) by (instance))/(sum(increase(node_cpu_seconds_total[5m])) by (instance)))*100 > 10
    for: 1m
    annotations:
      title: "磁盘负载过高"
      description: "主机:【{{$labels.instance}}】5 五分钟内磁盘负载过高 (以后负载值:{{ $value}})"
    labels:
      severity: 'warning'

  - alert: HostFileSystemUsage
    expr: (1-(node_filesystem_free_bytes{fstype=~"ext4|xfs",mountpoint!~".*tmp|.*boot"}/node_filesystem_size_bytes{fstype=~"ext4|xfs",mountpoint!~".*tmp|.*boot"}))*100 > 70
    for: 1m
    annotations:
      title: "磁盘空间残余有余"
      description: "主机:【{{$labels.instance}}】{{$labels.mountpoint}} 分区使用率超过 70%, 以后值使用率:{{$value}}%"
    labels:
      severity: 'warning'

  - alert: HostSwapIsFillingUp
    expr: (1 - (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes)) * 100 > 80
    for: 2m
    labels:
      severity: 'warning'
    annotations:
      title: "主机 swap 分区有余"
      description: "主机:【{{$labels.instance}}】swap 分区应用超过 (>80%), 以后值使用率: {{$value}}%"

  - alert: HostNetworkConnection-ESTABLISHED
    expr:  sum(node_netstat_Tcp_CurrEstab) by (instance) > 1000
    for: 5m
    labels:
      severity: 'warning'
    annotations:
      title: "主机 ESTABLISHED 连接数过高"
      description: "主机:【{{$labels.instance}}】ESTABLISHED 连接数超过 1000, 以后 ESTABLISHED 连接数: {{$value}}"

  - alert: HostNetworkConnection-TIME_WAIT
    expr:  sum(node_sockstat_TCP_tw) by (instance) > 1000
    for: 5m
    labels:
      severity: 'warning'
    annotations:
      title: "主机 TIME_WAIT 连接数过高"
      description: "主机:【{{$labels.instance}}】TIME_WAIT 连接数超过 1000, 以后 TIME_WAIT 连接数: {{$value}}"

  - alert: HostUnusualNetworkThroughputIn
    expr:  sum by (instance, device) (rate(node_network_receive_bytes_total{device=~"ens.*"}[2m])) / 1024 / 1024 > 100
    for: 5m
    labels:
      severity: 'warning'
    annotations:
      title: "主机网卡入口流量过高"
      description: "主机:【{{$labels.instance}}】, 网卡: {{$labels.device}} 入口流量超过 (> 100 MB/s), 以后值: {{$value}}"

  - alert: HostUnusualNetworkThroughputOut
    expr: sum by (instance, device) (rate(node_network_transmit_bytes_total{device=~"ens.*"}[2m])) / 1024 / 1024 > 100
    for: 5m
    labels:
      severity: 'warning'
    annotations:
      title: "主机网卡进口流量过高"
      description: "主机:【{{$labels.instance}}】, 网卡: {{$labels.device}} 进口流量超过 (> 100 MB/s), 以后值: {{$value}}"

  - alert: HostUnusualDiskReadRate
    expr: sum by (instance, device) (rate(node_disk_read_bytes_total{device=~"sd.*"}[2m])) / 1024 / 1024 > 50
    for: 5m
    labels:
      severity: 'warning'
    annotations:
      title: "主机磁盘读取速率过高"
      description: "主机:【{{$labels.instance}}】, 磁盘: {{$labels.device}} 读取速度超过 (50 MB/s), 以后值: {{$value}}"

  - alert: HostUnusualDiskWriteRate
    expr: sum by (instance, device) (rate(node_disk_written_bytes_total{device=~"sd.*"}[2m])) / 1024 / 1024 > 50
    for: 2m
    labels:
      severity: 'warning'
    annotations:
      title: "主机磁盘写入速率过高"
      description: "主机:【{{$labels.instance}}】, 磁盘: {{$labels.device}} 写入速度超过 (50 MB/s), 以后值: {{$value}}"

  - alert: HostOutOfInodes
    expr: node_filesystem_files_free{fstype=~"ext4|xfs",mountpoint!~".*tmp|.*boot"} / node_filesystem_files{fstype=~"ext4|xfs",mountpoint!~".*tmp|.*boot"} * 100 < 10
    for: 2m
    labels:
      severity: 'warning'
    annotations:
      title: "主机分区 Inode 节点有余"
      description: "主机:【{{$labels.instance}}】{{$labels.mountpoint}} 分区 inode 节点有余 (可用值小于 {{ $value}}%)"

  - alert: HostUnusualDiskReadLatency
    expr: rate(node_disk_read_time_seconds_total{device=~"sd.*"}[1m]) / rate(node_disk_reads_completed_total{device=~"sd.*"}[1m]) > 0.1 and rate(node_disk_reads_completed_total{device=~"sd.*"}[1m]) > 0
    for: 2m
    labels:
      severity: 'warning'
    annotations:
      title: "主机磁盘 Read 提早过高"
      description: "主机:【{{$labels.instance}}】, 磁盘: {{$labels.device}} Read 提早过高 (read operations > 100ms), 以后提早值: {{$value}}ms"

  - alert: HostUnusualDiskWriteLatency
    expr: rate(node_disk_write_time_seconds_total{device=~"sd.*"}[1m]) / rate(node_disk_writes_completed_total{device=~"sd.*"}[1m]) > 0.1 and rate(node_disk_writes_completed_total{device=~"sd.*"}[1m]) > 0
    for: 2m
    labels:
      severity: 'warning'
    annotations:
      title: "主机磁盘 Write 提早过高"
      description: "主机:【{{$labels.instance}}】, 磁盘: {{$labels.device}} Write 提早过高 (write operations > 100ms), 以后提早值: {{$value}}ms"

alertmanager

[root@vm11 alertmanager]# cat alertmanager.yml 
route:
  group_by: ['alertname']
  group_wait: 30s
  group_interval: 5m
  repeat_interval: 3600s
  receiver: 'Warning'
  routes:
    - receiver: 'Information'
      repeat_interval: 3600s
      match:
        severity: Information

    - receiver: 'Warning'
      repeat_interval: 3600s
      match:
        severity: Warning

receivers:
  - name: 'Warning'
    webhook_configs:
      - url: 'http://192.1x.1xx.xx:3xx/alertmanager/warning'
        send_resolved: true

  - name: 'Information'
    webhook_configs:
      - url: 'http://192.168.1x.1x:38xx/alertmanager/information'
        send_resolved: true

inhibit_rules:
  - source_match:
      severity: 'critical'
    target_match:
      severity: 'warning'
    equal: ['alertname', 'dev', 'instance']

go code

  • main.go
package main

import (
    "bytes"
    "encoding/json"
    "fmt"
    "github.com/gin-gonic/gin"
    "io"
    "io/ioutil"
    "net/http"
    "prometheus_alarm/alarmtool"
    "prometheus_alarm/prometheus"
    "strings"
)

// 要害
func JSONDecode(r io.Reader, obj interface{}) error {if err := json.NewDecoder(r).Decode(obj); err != nil {return err}
    return nil
}

func main() {gin.SetMode(gin.ReleaseMode)
    r := gin.Default()

    r.GET("/health", func(c *gin.Context) {
        // 将发送的信息封装成 JSON 发送给浏览器
        c.JSON(http.StatusOK, gin.H{
            // 这是咱们定义的数据
            "status": 200,
        })
    })

    r.POST("/alertmanager/warning", func(c *gin.Context) {
        // 打印出 body
        //data, _ := ioutil.ReadAll(c.Request.Body)
        //fmt.Printf("req.body=%s\n, content-type=%v\n", data, c.ContentType())
        var reqInfo prometheus.AlertMsgRet
        data, err := c.GetRawData()
        if err != nil {fmt.Println(err.Error())
        }
        fmt.Println("### data:", string(data))
        // 把字节流从新放回 body 中
        c.Request.Body = ioutil.NopCloser(bytes.NewBuffer(data))
        err2 := JSONDecode(c.Request.Body, &reqInfo)
        if err2 != nil {fmt.Println("decode err:", err2)
        }
        //// 打印 body 中的参数
        //fmt.Println(reqInfo.Status)
        //fmt.Println(reqInfo.Alerts[0].Labels)
        //fmt.Println(reqInfo.Alerts[0].Labels.Severity)
        //fmt.Println(reqInfo.Alerts[0].Annotations.Description)
        userList := []string{"18xxxxxxxxx"}
        url := "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=axxxxxxxxx"
        //alarmtool.SendMsgChatGroupPerson(url, userList, "## hallo kugou")

        for _, i := range reqInfo.Alerts {
            if i.Status == "firing" {
                content := prometheus.FiringTemp
                replacements := []struct {
                    Old string
                    New string
                }{{Old: "alertname17", New: i.Labels.AlertName},
                    {Old: "appservice17", New: i.Labels.AppService},
                    {Old: "startsAt17", New: alarmtool.TimeTransform(i.StartsAt)},
                    {Old: "city17", New: i.Labels.City},
                    {Old: "instance17", New: i.Labels.Instance},
                    {Old: "severity17", New: i.Labels.Severity},
                    {Old: "description17", New: i.Annotations.Description},
                }
                for _, repl := range replacements {content = strings.Replace(content, repl.Old, repl.New, -1)
                }

                alarmtool.SendMsgChatGroupPerson(url, userList, content)
            } else if i.Status == "resolved" {
                content := prometheus.ResolvedTemp
                replacements := []struct {
                    Old string
                    New string
                }{{Old: "alertname17", New: i.Labels.AlertName},
                    {Old: "appservice17", New: i.Labels.AppService},
                    {Old: "city17", New: i.Labels.City},
                    {Old: "startsAt17", New: alarmtool.TimeTransform(i.StartsAt)},
                    {Old: "endsAt17", New: alarmtool.TimeTransform(i.EndsAt)},
                    {Old: "instance17", New: i.Labels.Instance},
                    {Old: "severity17", New: i.Labels.Severity},
                    {Old: "summary17", New: i.Annotations.Summary},
                }
                for _, repl := range replacements {content = strings.Replace(content, repl.Old, repl.New, -1)
                }

                alarmtool.SendMsgChatGroupPerson(url, userList, content)
            }
        }

        c.JSON(200, "success")
    })
    r.Run("0.0.0.0:38060") // 默认在本地 8080 端口启动服务
}


  • prometheus/promethues.go

package prometheus


type AlertMsgRet struct {
    Receiver string `json:"receiver"`
    Status string `json:"status"`
    Alerts []struct {
        Status string `json:"status"`
        Labels struct {
            AlertName string `json:"alertname"`
            AppService string `json:"appservice"`
            City string `json:"city"`
            Instance string `json:"instance"`
            InstanceHost string `json:"instancehost"`
            Job string `json:"job"`
            Severity string `json:"severity"`
        } `json:"labels"`
        Annotations struct {
            Description string `json:"description"`
            Summary string `json:"summary"`
        } `json:"annotations"`
        StartsAt string `json:"startsAt"`
        EndsAt string `json:"endsAt"`
        GeneratorURL string `json:"generatorURL"`
        Fingerprint string `json:"fingerprint"`
    } `json:"alerts"`
    GroupLabels struct {AlertName string `json:"alertname"`} `json:"groupLabels"`
    CommonLabels struct {
        AlertName string `json:"alertname"`
        AppService string `json:"appservice"`
        City string `json:"city"`
        Instance string `json:"instance"`
        InstanceHost string `json:"instancehost"`
        Job string `json:"job"`
        Severity string `json:"severity"`
    } `json:"commonLabels"`
    CommonAnnotations struct {
        Description string `json:"description"`
        Summary string `json:"summary"`
    } `json:"commonAnnotations"`
    ExternalURL string `json:"externalURL"`
    Version string `json:"version"`
    GroupKey string `json:"groupKey"`
    TruncatedAlerts int `json:"truncatedAlerts"`
}

const(FiringTemp = "#### [Prometheus 告警信息] \n> <font color=\"#FF0000\"> 告警名称 </font>:alertname17\n> <font color=\"#FF0000\"> 告警级别 </font>:severity17 \n> <font color=\"#FF0000\"> 利用名称 </font>:appservice17\n> <font color=\"#FF0000\"> 开始工夫 </font>:startsAt17 \n> <font color=\"#FF0000\"> 主机地址 </font>:instance17 \n> <font color=\"#FF0000\">City</font>:city17 \n> <font color=\"#FF0000\"> 故障信息 </font>:\n** messages:description17 **"

    ResolvedTemp = "#### [Prometheus 复原信息] \n> <font color=\"#00FF00\"> 告警名称 </font>:alertname17\n> <font color=\"#00FF00\"> 告警级别 </font>:severity17 \n> <font color=\"#00FF00\"> 利用名称 </font>:appservice17\n> <font color=\"#00FF00\"> 开始工夫 </font>:startsAt17 \n> <font color=\"#00FF00\"> 复原工夫 </font>:endsAt17 \n> <font color=\"#00FF00\"> 主机地址 </font>:instance17 \n> <font color=\"#00FF00\">City</font>:city17 \n> <font color=\"#00FF00\"> 复原信息 </font>:\n** messages:summary17 **"
)


  • json

    {
      "receiver": "Warning",
      "status": "resolved",
      "alerts": [{
          "status": "resolved",
          "labels": {
              "alertname": "HostDown",
              "appservice": "prometheus",
              "city": "苏州",
              "instance": "192.168.1xx.x1:3xx0",
              "instancehost": "192.1xx.1xx.xx",
              "job": "prometheus",
              "severity": "critical"
          },
          "annotations": {
              "description": "主机:【192.168.1xx.x:3xxx】has been down for more than 1 minute",
              "summary": "主机:【192.168.1xx.xx:3xxx】service is resolved"
          },
          "startsAt": "2023-07-06T01:16:20.991Z",
          "endsAt": "2023-07-06T01:29:20.991Z",
          "generatorURL": "http://vm11:39090/graph?g0.expr=up%7Bjob%3D~%22node-exporter%7Cprometheus%7Cgrafana%7Calertmanager%22%7D+%3D%3D+0\u0026g0.tab=1",
          "fingerprint": "1becfcf3e2ebd5f7"
      }],
      "groupLabels": {"alertname": "HostDown"},
      "commonLabels": {
          "alertname": "HostDown",
          "appservice": "prometheus",
          "city": "苏州",
          "instance": "192.168.1x.xx:3xx",
          "instancehost": "192.1x8.1x4.x",
          "job": "prometheus",
          "severity": "critical"
      },
      "commonAnnotations": {
          "description": "主机:【192.16x.xx.x1:3xxx】has been down for more than 1 minute",
          "summary": "主机:【192.168.1xx.xx:39xx】service is resolved"
      },
      "externalURL": "http://vm11:39xxx",
      "version": "4",
      "groupKey": "{}:{alertname=\"HostDown\"}",
      "truncatedAlerts": 0
    }
    
  • alarmtool/timetransform.go
package alarmtool

import (
    "fmt"
    "time"
)

func TimeTransform(inputTime string)  string{
    //inputTime := "2023-07-06T01:16:20.991Z"

    // 将字符串解析为工夫对象
    t, err := time.Parse(time.RFC3339Nano, inputTime)
    if err != nil {fmt.Println("Failed to parse input time:", err)
        return "error inputTime"
    }
    // 设置东八区的时区
    loc, err := time.LoadLocation("Asia/Shanghai")
    if err != nil {fmt.Println("Failed to load time zone:", err)
        return "转换时区 error"
    }
    // 转换时区
    localTime := t.In(loc)
    // 格式化工夫为指定的输入格局
    outputTime := localTime.Format("2006-01-02 15:04:05")

    fmt.Println(outputTime)
    return outputTime
}
  • alarmtool/wecome

package alarmtool

import (
    "bytes"
    "encoding/json"
    "fmt"
    "io/ioutil"
    "log"
    "net/http"
)

type ChatGroupJsonRes struct {
    MsgType  string `json:"msgtype"`
    Markdown struct {Content interface{} `json:"content"`
    } `json:"markdown"`
}

type T4 struct {
    Errcode int    `json:"errcode"`
    Errmsg  string `json:"errmsg"`
}

func SendMsgChatGroupPerson(webHook string, person []string, comment string) (*T4) {
    var j ChatGroupJsonRes
    var perSionComment string
    for i, p := range person {fmt.Println(i)
        perSionComment = perSionComment + fmt.Sprintf("<@%s>", p)
    }
    j.MsgType = "markdown"
    j.Markdown.Content = comment + "\n" + perSionComment

    data, err := json.Marshal(j)
    if err != nil {fmt.Println("err was %v", err)
    }
    fmt.Println(string(data))
    reader := bytes.NewReader(data)
    client := &http.Client{}
    req, err := http.NewRequest("POST", webHook, reader)
    req.Header.Add("Content-Type", "application/json")
    req.Header.Add("Accept", "application/json")
    resp, err := client.Do(req)
    if err != nil {log.Fatal(err)
    }
    defer resp.Body.Close()
    body, err := ioutil.ReadAll(resp.Body)
    if err != nil {log.Fatal(err)
    }
    fmt.Println("####send msg##", string(body))
    var assetList T4
    err = json.Unmarshal(body, &assetList)
    return &assetList
}




  • Dockerfile
FROM golang:1.19 as builder

ENV HOME /app
ENV CGO_ENABLED 0
ENV GOOS linux
ENV GOPROXY https://goproxy.cn,direct
ENV TIME_ZONE Asia/Shanghai

WORKDIR /app
COPY . .
RUN go mod download

RUN go build -v -a -installsuffix cgo -o demo main.go

FROM alpine:latest

RUN apk --no-cache add ca-certificates

# 设置工作目录
WORKDIR /bin/

COPY --from=builder /app/demo .

ENTRYPOINT ["/bin/demo"]

test

docker build -t prometheusalarm:test1 .
docker run -d -p 38060:38060 prometheusalarm:test1

正文完
 0