update
This commit is contained in:
parent
a6b5e452fb
commit
d89afeb61b
10
alertmanager/config.yml
Normal file
10
alertmanager/config.yml
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
route:
|
||||||
|
receiver: 'slack'
|
||||||
|
|
||||||
|
receivers:
|
||||||
|
- name: 'slack'
|
||||||
|
# slack_configs:
|
||||||
|
# - send_resolved: true
|
||||||
|
# username: '<username>'
|
||||||
|
# channel: '#<channel-name>'
|
||||||
|
# api_url: '<incomming-webhook-url>'
|
@ -1,6 +1,6 @@
|
|||||||
version: '3.8'
|
version: '3.8'
|
||||||
|
|
||||||
#### networks
|
#### NETWORKS
|
||||||
networks:
|
networks:
|
||||||
docker-traefik_front_network:
|
docker-traefik_front_network:
|
||||||
external: true
|
external: true
|
||||||
@ -8,12 +8,10 @@ networks:
|
|||||||
driver: bridge
|
driver: bridge
|
||||||
attachable: true
|
attachable: true
|
||||||
|
|
||||||
volumes:
|
#### SERVICES
|
||||||
prometheus_data: {}
|
|
||||||
grafana_data: {}
|
|
||||||
|
|
||||||
#### services
|
|
||||||
services:
|
services:
|
||||||
|
|
||||||
|
### prometheus
|
||||||
prometheus:
|
prometheus:
|
||||||
image: prom/prometheus
|
image: prom/prometheus
|
||||||
restart: always
|
restart: always
|
||||||
@ -38,18 +36,19 @@ services:
|
|||||||
labels:
|
labels:
|
||||||
- "traefik.enable=true"
|
- "traefik.enable=true"
|
||||||
- "traefik.docker.network=docker-traefik_front_network"
|
- "traefik.docker.network=docker-traefik_front_network"
|
||||||
# HTTP
|
## HTTP
|
||||||
- "traefik.http.routers.prometheus-http.rule=Host(`prometheus.10.0.4.29.traefik.me`)"
|
- "traefik.http.routers.prometheus-http.rule=Host(`prometheus.10.0.4.29.traefik.me`)"
|
||||||
- "traefik.http.routers.prometheus-http.entrypoints=http"
|
- "traefik.http.routers.prometheus-http.entrypoints=http"
|
||||||
# HTTPS
|
## HTTPS
|
||||||
- "traefik.http.routers.prometheus-https.rule=Host(`prometheus.10.0.4.29.traefik.me`)"
|
- "traefik.http.routers.prometheus-https.rule=Host(`prometheus.10.0.4.29.traefik.me`)"
|
||||||
- "traefik.http.routers.prometheus-https.entrypoints=https"
|
- "traefik.http.routers.prometheus-https.entrypoints=https"
|
||||||
- "traefik.http.routers.prometheus-https.tls=true"
|
- "traefik.http.routers.prometheus-https.tls=true"
|
||||||
- "traefik.http.routers.prometheus-https.service=prometheus-service"
|
- "traefik.http.routers.prometheus-https.service=prometheus-service"
|
||||||
# Middleware
|
## Middleware
|
||||||
# Service
|
## Service
|
||||||
- "traefik.http.services.prometheus-service.loadbalancer.server.port=9090"
|
- "traefik.http.services.prometheus-service.loadbalancer.server.port=9090"
|
||||||
|
|
||||||
|
### node-exporter
|
||||||
node-exporter:
|
node-exporter:
|
||||||
image: prom/node-exporter
|
image: prom/node-exporter
|
||||||
volumes:
|
volumes:
|
||||||
@ -69,6 +68,7 @@ services:
|
|||||||
deploy:
|
deploy:
|
||||||
mode: global
|
mode: global
|
||||||
|
|
||||||
|
### alertmanager
|
||||||
alertmanager:
|
alertmanager:
|
||||||
image: prom/alertmanager
|
image: prom/alertmanager
|
||||||
restart: always
|
restart: always
|
||||||
@ -82,6 +82,7 @@ services:
|
|||||||
- '--config.file=/etc/alertmanager/config.yml'
|
- '--config.file=/etc/alertmanager/config.yml'
|
||||||
- '--storage.path=/alertmanager'
|
- '--storage.path=/alertmanager'
|
||||||
|
|
||||||
|
### cadvisor
|
||||||
cadvisor:
|
cadvisor:
|
||||||
image: gcr.io/cadvisor/cadvisor
|
image: gcr.io/cadvisor/cadvisor
|
||||||
volumes:
|
volumes:
|
||||||
@ -97,6 +98,7 @@ services:
|
|||||||
deploy:
|
deploy:
|
||||||
mode: global
|
mode: global
|
||||||
|
|
||||||
|
### grafana
|
||||||
grafana:
|
grafana:
|
||||||
image: grafana/grafana
|
image: grafana/grafana
|
||||||
user: '472'
|
user: '472'
|
||||||
@ -118,14 +120,19 @@ services:
|
|||||||
labels:
|
labels:
|
||||||
- "traefik.enable=true"
|
- "traefik.enable=true"
|
||||||
- "traefik.docker.network=interne"
|
- "traefik.docker.network=interne"
|
||||||
# HTTP
|
## HTTP
|
||||||
- "traefik.http.routers.grafana-http.rule=Host(`grafana.10.0.4.29.traefik.me`)"
|
- "traefik.http.routers.grafana-http.rule=Host(`grafana.10.0.4.29.traefik.me`)"
|
||||||
- "traefik.http.routers.grafana-http.entrypoints=http"
|
- "traefik.http.routers.grafana-http.entrypoints=http"
|
||||||
# HTTPS
|
## HTTPS
|
||||||
- "traefik.http.routers.grafana-https.rule=Host(`grafana.10.0.4.29.traefik.me`)"
|
- "traefik.http.routers.grafana-https.rule=Host(`grafana.10.0.4.29.traefik.me`)"
|
||||||
- "traefik.http.routers.grafana-https.entrypoints=https"
|
- "traefik.http.routers.grafana-https.entrypoints=https"
|
||||||
- "traefik.http.routers.grafana-https.tls=true"
|
- "traefik.http.routers.grafana-https.tls=true"
|
||||||
- "traefik.http.routers.grafana-https.service=grafana-service"
|
- "traefik.http.routers.grafana-https.service=grafana-service"
|
||||||
# Middleware
|
## Middleware
|
||||||
# Service
|
## Service
|
||||||
- "traefik.http.services.grafana-service.loadbalancer.server.port=3000"
|
- "traefik.http.services.grafana-service.loadbalancer.server.port=3000"
|
||||||
|
|
||||||
|
#### VOLUMES
|
||||||
|
volumes:
|
||||||
|
prometheus_data: {}
|
||||||
|
grafana_data: {}
|
3
grafana/config.monitoring
Normal file
3
grafana/config.monitoring
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
GF_SECURITY_ADMIN_USER=admin
|
||||||
|
GF_SECURITY_ADMIN_PASSWORD=foobar
|
||||||
|
GF_USERS_ALLOW_SIGN_UP=false
|
1388
grafana/provisioning/dashboards/authentik.json
Normal file
1388
grafana/provisioning/dashboards/authentik.json
Normal file
File diff suppressed because it is too large
Load Diff
11
grafana/provisioning/dashboards/dashboard.yml
Normal file
11
grafana/provisioning/dashboards/dashboard.yml
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
providers:
|
||||||
|
- name: 'Prometheus'
|
||||||
|
orgId: 1
|
||||||
|
folder: ''
|
||||||
|
type: file
|
||||||
|
disableDeletion: false
|
||||||
|
editable: true
|
||||||
|
options:
|
||||||
|
path: /etc/grafana/provisioning/dashboards
|
1605
grafana/provisioning/dashboards/traefik.json
Normal file
1605
grafana/provisioning/dashboards/traefik.json
Normal file
File diff suppressed because it is too large
Load Diff
50
grafana/provisioning/datasources/datasource.yml
Normal file
50
grafana/provisioning/datasources/datasource.yml
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
# config file version
|
||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
# list of datasources that should be deleted from the database
|
||||||
|
deleteDatasources:
|
||||||
|
- name: Prometheus
|
||||||
|
orgId: 1
|
||||||
|
|
||||||
|
# list of datasources to insert/update depending
|
||||||
|
# whats available in the database
|
||||||
|
datasources:
|
||||||
|
# <string, required> name of the datasource. Required
|
||||||
|
- name: Prometheus
|
||||||
|
# <string, required> datasource type. Required
|
||||||
|
type: prometheus
|
||||||
|
# <string, required> access mode. direct or proxy. Required
|
||||||
|
access: proxy
|
||||||
|
# <int> org id. will default to orgId 1 if not specified
|
||||||
|
orgId: 1
|
||||||
|
# <string> url
|
||||||
|
url: http://prometheus:9090
|
||||||
|
# <string> database password, if used
|
||||||
|
password:
|
||||||
|
# <string> database user, if used
|
||||||
|
user:
|
||||||
|
# <string> database name, if used
|
||||||
|
database:
|
||||||
|
# <bool> enable/disable basic auth
|
||||||
|
basicAuth: false
|
||||||
|
# <string> basic auth username, if used
|
||||||
|
basicAuthUser:
|
||||||
|
# <string> basic auth password, if used
|
||||||
|
basicAuthPassword:
|
||||||
|
# <bool> enable/disable with credentials headers
|
||||||
|
withCredentials:
|
||||||
|
# <bool> mark as default datasource. Max one per org
|
||||||
|
isDefault: true
|
||||||
|
# <map> fields that will be converted to json and stored in json_data
|
||||||
|
jsonData:
|
||||||
|
graphiteVersion: "1.1"
|
||||||
|
tlsAuth: false
|
||||||
|
tlsAuthWithCACert: false
|
||||||
|
# <string> json object of data that will be encrypted.
|
||||||
|
secureJsonData:
|
||||||
|
tlsCACert: "..."
|
||||||
|
tlsClientCert: "..."
|
||||||
|
tlsClientKey: "..."
|
||||||
|
version: 1
|
||||||
|
# <bool> allow users to edit datasources from the UI.
|
||||||
|
editable: true
|
16
prometheus.yml
Normal file
16
prometheus.yml
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
global:
|
||||||
|
scrape_interval: 15s
|
||||||
|
evaluation_interval: 15s
|
||||||
|
|
||||||
|
rule_files:
|
||||||
|
# - "first.rules"
|
||||||
|
# - "second.rules"
|
||||||
|
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: prometheus
|
||||||
|
static_configs:
|
||||||
|
- targets: ['localhost:9090']
|
||||||
|
- job_name: app
|
||||||
|
scrape_interval: 5s
|
||||||
|
static_configs:
|
||||||
|
- targets: ['host.docker.internal:10088']
|
22
prometheus/alert.rules
Normal file
22
prometheus/alert.rules
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
groups:
|
||||||
|
- name: example
|
||||||
|
rules:
|
||||||
|
|
||||||
|
# Alert for any instance that is unreachable for >2 minutes.
|
||||||
|
- alert: service_down
|
||||||
|
expr: up == 0
|
||||||
|
for: 2m
|
||||||
|
labels:
|
||||||
|
severity: page
|
||||||
|
annotations:
|
||||||
|
summary: "Instance {{ $labels.instance }} down"
|
||||||
|
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes."
|
||||||
|
|
||||||
|
- alert: high_load
|
||||||
|
expr: node_load1 > 0.5
|
||||||
|
for: 2m
|
||||||
|
labels:
|
||||||
|
severity: page
|
||||||
|
annotations:
|
||||||
|
summary: "Instance {{ $labels.instance }} under high load"
|
||||||
|
description: "{{ $labels.instance }} of job {{ $labels.job }} is under high load."
|
82
prometheus/prometheus.yml
Normal file
82
prometheus/prometheus.yml
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
# my global config
|
||||||
|
global:
|
||||||
|
scrape_interval: 15s # By default, scrape targets every 15 seconds.
|
||||||
|
evaluation_interval: 15s # By default, scrape targets every 15 seconds.
|
||||||
|
# scrape_timeout is set to the global default (10s).
|
||||||
|
|
||||||
|
# Attach these labels to any time series or alerts when communicating with
|
||||||
|
# external systems (federation, remote storage, Alertmanager).
|
||||||
|
external_labels:
|
||||||
|
monitor: 'my-project'
|
||||||
|
|
||||||
|
# Load and evaluate rules in this file every 'evaluation_interval' seconds.
|
||||||
|
rule_files:
|
||||||
|
- 'alert.rules'
|
||||||
|
# - "first.rules"
|
||||||
|
# - "second.rules"
|
||||||
|
|
||||||
|
# alert
|
||||||
|
alerting:
|
||||||
|
alertmanagers:
|
||||||
|
- scheme: http
|
||||||
|
static_configs:
|
||||||
|
- targets:
|
||||||
|
- "alertmanager:9093"
|
||||||
|
|
||||||
|
# A scrape configuration containing exactly one endpoint to scrape:
|
||||||
|
# Here it's Prometheus itself.
|
||||||
|
scrape_configs:
|
||||||
|
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
|
||||||
|
- job_name: app
|
||||||
|
scrape_interval: 5s
|
||||||
|
static_configs:
|
||||||
|
- targets: ['host.docker.internal:8000']
|
||||||
|
|
||||||
|
- job_name: 'prometheus'
|
||||||
|
|
||||||
|
# Override the global default and scrape targets from this job every 5 seconds.
|
||||||
|
scrape_interval: 5s
|
||||||
|
|
||||||
|
static_configs:
|
||||||
|
- targets: ['localhost:9090']
|
||||||
|
|
||||||
|
- job_name: 'cadvisor'
|
||||||
|
|
||||||
|
# Override the global default and scrape targets from this job every 5 seconds.
|
||||||
|
scrape_interval: 5s
|
||||||
|
|
||||||
|
dns_sd_configs:
|
||||||
|
- names:
|
||||||
|
- 'tasks.cadvisor'
|
||||||
|
type: 'A'
|
||||||
|
port: 8080
|
||||||
|
|
||||||
|
# static_configs:
|
||||||
|
# - targets: ['cadvisor:8080']
|
||||||
|
|
||||||
|
- job_name: 'node-exporter'
|
||||||
|
|
||||||
|
# Override the global default and scrape targets from this job every 5 seconds.
|
||||||
|
scrape_interval: 5s
|
||||||
|
|
||||||
|
dns_sd_configs:
|
||||||
|
- names:
|
||||||
|
- 'tasks.node-exporter'
|
||||||
|
type: 'A'
|
||||||
|
port: 9100
|
||||||
|
|
||||||
|
# - job_name: 'pushgateway'
|
||||||
|
# scrape_interval: 10s
|
||||||
|
# dns_sd_configs:
|
||||||
|
# - names:
|
||||||
|
# - 'tasks.pushgateway'
|
||||||
|
# type: 'A'
|
||||||
|
# port: 9091
|
||||||
|
|
||||||
|
# static_configs:
|
||||||
|
# - targets: ['node-exporter:9100']
|
||||||
|
|
||||||
|
- job_name: 'traefik-app'
|
||||||
|
scrape_interval: 5s
|
||||||
|
static_configs:
|
||||||
|
- targets: ['10.12.1.14:8181']
|
Loading…
x
Reference in New Issue
Block a user