This commit is contained in:
2024-04-19 10:27:36 +02:00
parent fcb6bbe566
commit 35c96e715c
7852 changed files with 4815 additions and 8 deletions

View File

@ -0,0 +1,22 @@
groups:
- name: example
rules:
# Alert for any instance that is unreachable for >2 minutes.
- alert: service_down
expr: up == 0
for: 2m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes."
- alert: high_load
expr: node_load1 > 0.5
for: 2m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} under high load"
description: "{{ $labels.instance }} of job {{ $labels.job }} is under high load."

View File

@ -0,0 +1,89 @@
# my global config
global:
scrape_interval: 15s # By default, scrape targets every 15 seconds.
evaluation_interval: 15s # By default, scrape targets every 15 seconds.
# scrape_timeout is set to the global default (10s).
# Attach these labels to any time series or alerts when communicating with
# external systems (federation, remote storage, Alertmanager).
external_labels:
monitor: 'my-project'
# Load and evaluate rules in this file every 'evaluation_interval' seconds.
rule_files:
- 'alert.rules'
# - "first.rules"
# - "second.rules"
# alert
alerting:
alertmanagers:
- scheme: http
static_configs:
- targets:
- "alertmanager:9093"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: app
scrape_interval: 5s
static_configs:
- targets: ['host.docker.internal:8000']
- job_name: 'prometheus'
# Override the global default and scrape targets from this job every 5 seconds.
scrape_interval: 5s
static_configs:
- targets: ['localhost:9090']
- job_name: 'cadvisor'
# Override the global default and scrape targets from this job every 5 seconds.
scrape_interval: 5s
dns_sd_configs:
- names:
- 'tasks.cadvisor'
type: 'A'
port: 8080
# static_configs:
# - targets: ['cadvisor:8080']
- job_name: 'node-exporter'
# Override the global default and scrape targets from this job every 5 seconds.
scrape_interval: 5s
dns_sd_configs:
- names:
- 'tasks.node-exporter'
type: 'A'
port: 9100
# - job_name: 'pushgateway'
# scrape_interval: 10s
# dns_sd_configs:
# - names:
# - 'tasks.pushgateway'
# type: 'A'
# port: 9091
# static_configs:
# - targets: ['node-exporter:9100']
- job_name: 'traefik-app'
scrape_interval: 5s
static_configs:
- targets: ['10.12.1.14:8181']
- job_name: 'airflow'
# Override the global default and scrape targets from this job every 5 seconds.
scrape_interval: 5s
static_configs:
- targets:
- '10.12.1.14:9102'