ok
This commit is contained in:
22
Monitoring/Prometheus-Grafana/prometheus/alert.rules
Normal file
22
Monitoring/Prometheus-Grafana/prometheus/alert.rules
Normal file
@ -0,0 +1,22 @@
|
||||
groups:
|
||||
- name: example
|
||||
rules:
|
||||
|
||||
# Alert for any instance that is unreachable for >2 minutes.
|
||||
- alert: service_down
|
||||
expr: up == 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: page
|
||||
annotations:
|
||||
summary: "Instance {{ $labels.instance }} down"
|
||||
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes."
|
||||
|
||||
- alert: high_load
|
||||
expr: node_load1 > 0.5
|
||||
for: 2m
|
||||
labels:
|
||||
severity: page
|
||||
annotations:
|
||||
summary: "Instance {{ $labels.instance }} under high load"
|
||||
description: "{{ $labels.instance }} of job {{ $labels.job }} is under high load."
|
89
Monitoring/Prometheus-Grafana/prometheus/prometheus.yml
Normal file
89
Monitoring/Prometheus-Grafana/prometheus/prometheus.yml
Normal file
@ -0,0 +1,89 @@
|
||||
# my global config
|
||||
global:
|
||||
scrape_interval: 15s # By default, scrape targets every 15 seconds.
|
||||
evaluation_interval: 15s # By default, scrape targets every 15 seconds.
|
||||
# scrape_timeout is set to the global default (10s).
|
||||
|
||||
# Attach these labels to any time series or alerts when communicating with
|
||||
# external systems (federation, remote storage, Alertmanager).
|
||||
external_labels:
|
||||
monitor: 'my-project'
|
||||
|
||||
# Load and evaluate rules in this file every 'evaluation_interval' seconds.
|
||||
rule_files:
|
||||
- 'alert.rules'
|
||||
# - "first.rules"
|
||||
# - "second.rules"
|
||||
|
||||
# alert
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- scheme: http
|
||||
static_configs:
|
||||
- targets:
|
||||
- "alertmanager:9093"
|
||||
|
||||
# A scrape configuration containing exactly one endpoint to scrape:
|
||||
# Here it's Prometheus itself.
|
||||
scrape_configs:
|
||||
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
|
||||
- job_name: app
|
||||
scrape_interval: 5s
|
||||
static_configs:
|
||||
- targets: ['host.docker.internal:8000']
|
||||
|
||||
- job_name: 'prometheus'
|
||||
|
||||
# Override the global default and scrape targets from this job every 5 seconds.
|
||||
scrape_interval: 5s
|
||||
|
||||
static_configs:
|
||||
- targets: ['localhost:9090']
|
||||
|
||||
- job_name: 'cadvisor'
|
||||
|
||||
# Override the global default and scrape targets from this job every 5 seconds.
|
||||
scrape_interval: 5s
|
||||
|
||||
dns_sd_configs:
|
||||
- names:
|
||||
- 'tasks.cadvisor'
|
||||
type: 'A'
|
||||
port: 8080
|
||||
|
||||
# static_configs:
|
||||
# - targets: ['cadvisor:8080']
|
||||
|
||||
- job_name: 'node-exporter'
|
||||
|
||||
# Override the global default and scrape targets from this job every 5 seconds.
|
||||
scrape_interval: 5s
|
||||
|
||||
dns_sd_configs:
|
||||
- names:
|
||||
- 'tasks.node-exporter'
|
||||
type: 'A'
|
||||
port: 9100
|
||||
|
||||
# - job_name: 'pushgateway'
|
||||
# scrape_interval: 10s
|
||||
# dns_sd_configs:
|
||||
# - names:
|
||||
# - 'tasks.pushgateway'
|
||||
# type: 'A'
|
||||
# port: 9091
|
||||
|
||||
# static_configs:
|
||||
# - targets: ['node-exporter:9100']
|
||||
|
||||
- job_name: 'traefik-app'
|
||||
scrape_interval: 5s
|
||||
static_configs:
|
||||
- targets: ['10.12.1.14:8181']
|
||||
|
||||
- job_name: 'airflow'
|
||||
# Override the global default and scrape targets from this job every 5 seconds.
|
||||
scrape_interval: 5s
|
||||
static_configs:
|
||||
- targets:
|
||||
- '10.12.1.14:9102'
|
Reference in New Issue
Block a user