$ cd /opt/monitor/config $ vi prometheus.yml # 添加prometheus配置文件 $ vi alertmanager.yml # 添加alertmanager配置,配置收发邮件邮箱 $ vi node_down.yml # 添加node exporter配置,配置告警规则
# my global config global: scrape_interval:15s# Set the scrape interval to every 15 seconds. Default is every 1 minute. evaluation_interval:15s# Evaluate rules every 15 seconds. The default is every 1 minute. # scrape_timeout is set to the global default (10s).
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'. rule_files: -"node_down.yml" # - "first_rules.yml" # - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape: # Here it's Prometheus itself. scrape_configs: # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config. -job_name:'prometheus' static_configs: -targets: ['prometheus:9090']
groups: -name:node_down rules: -alert:InstanceDown expr:up==0 for:1m labels: user:test annotations: summary:"Instance {{ $labels.instance }} down" description:"{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes."
编写docker-compose
1 2
$ cd /opt/docker-compose $ vi docker-compose-monitor.yml