This commit is contained in:
2024-04-19 10:27:36 +02:00
parent fcb6bbe566
commit 35c96e715c
7852 changed files with 4815 additions and 8 deletions

View File

@ -0,0 +1,3 @@
Additional permission under GNU GPL version 3 section 7
If you modify this Program, or any covered work, by linking or combining it with [name of library] (or a modified version of that library), containing parts covered by the terms of [name of library's license], the licensors of this Program grant you additional permission to convey the resulting work. Corresponding Source for a non-source form of such a combination shall include the source code for the parts of [name of library] used as well as that of the covered work.

View File

@ -0,0 +1,30 @@
![Prometheus](./img/logo-Prometheus.png) ![Grafana](./img/logo-Grafana.png)
# Prometheus Grafana
# Installation
Pour utiliser Adminer tout seul
```bash
docker compose up -d
```
Pour utiliser Adminer avec Traefik
```bash
docker compose -f docker-compose-traefik.yml up -d
```
Pour utiliser Adminer avec Nginx
```bash
docker compose -f docker-compose-nginx.yml up -d
```
# Utilisation
## Accueil
# More info
- more information on the website [Tips-Of-Mine](https://www.tips-of-mine.fr/)
# Buy me a coffe
<a href='https://ko-fi.com/R5R2KNI3N' target='_blank'><img height='36' style='border:0px;height:36px;' src='https://storage.ko-fi.com/cdn/kofi4.png?v=3' border='0' alt='Buy Me a Coffee at ko-fi.com' /></a>

View File

@ -0,0 +1,10 @@
route:
receiver: 'slack'
receivers:
- name: 'slack'
# slack_configs:
# - send_resolved: true
# username: '<username>'
# channel: '#<channel-name>'
# api_url: '<incomming-webhook-url>'

View File

@ -0,0 +1,138 @@
version: '3.8'
#### NETWORKS
networks:
docker-traefik_front_network:
external: true
back_network:
driver: bridge
attachable: true
#### SERVICES
services:
### prometheus
prometheus:
image: prom/prometheus
restart: always
volumes:
- ./prometheus:/etc/prometheus/
- prometheus_data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
- '--web.console.templates=/usr/share/prometheus/consoles'
# ports:
# - 9090:9090
networks:
- docker-traefik_front_network
- back_network
links:
- cadvisor:cadvisor
- alertmanager:alertmanager
depends_on:
- cadvisor
labels:
- "traefik.enable=true"
- "traefik.docker.network=docker-traefik_front_network"
## HTTP
- "traefik.http.routers.prometheus-http.rule=Host(`prometheus.10.0.4.29.traefik.me`)"
- "traefik.http.routers.prometheus-http.entrypoints=http"
## HTTPS
- "traefik.http.routers.prometheus-https.rule=Host(`prometheus.10.0.4.29.traefik.me`)"
- "traefik.http.routers.prometheus-https.entrypoints=https"
- "traefik.http.routers.prometheus-https.tls=true"
- "traefik.http.routers.prometheus-https.service=prometheus-service"
## Middleware
## Service
- "traefik.http.services.prometheus-service.loadbalancer.server.port=9090"
### node-exporter
node-exporter:
image: prom/node-exporter
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
command:
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- --collector.filesystem.ignored-mount-points
- '^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)'
ports:
- 9100:9100
networks:
- back_network
restart: always
deploy:
mode: global
### alertmanager
alertmanager:
image: prom/alertmanager
restart: always
ports:
- 9093:9093
networks:
- back_network
volumes:
- ./alertmanager/:/etc/alertmanager/
command:
- '--config.file=/etc/alertmanager/config.yml'
- '--storage.path=/alertmanager'
### cadvisor
cadvisor:
image: gcr.io/cadvisor/cadvisor
volumes:
- /:/rootfs:ro
- /var/run:/var/run:rw
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
ports:
- 8080:8080
networks:
- back_network
restart: always
deploy:
mode: global
### grafana
grafana:
image: grafana/grafana
user: '472'
restart: always
environment:
GF_INSTALL_PLUGINS: 'grafana-clock-panel,grafana-simple-json-datasource'
volumes:
- grafana_data:/var/lib/grafana
- ./grafana/provisioning/:/etc/grafana/provisioning/
env_file:
- ./grafana/config.monitoring
# ports:
# - 3000:3000
networks:
- docker-traefik_front_network
- back_network
depends_on:
- prometheus
labels:
- "traefik.enable=true"
- "traefik.docker.network=interne"
## HTTP
- "traefik.http.routers.grafana-http.rule=Host(`grafana.10.0.4.29.traefik.me`)"
- "traefik.http.routers.grafana-http.entrypoints=http"
## HTTPS
- "traefik.http.routers.grafana-https.rule=Host(`grafana.10.0.4.29.traefik.me`)"
- "traefik.http.routers.grafana-https.entrypoints=https"
- "traefik.http.routers.grafana-https.tls=true"
- "traefik.http.routers.grafana-https.service=grafana-service"
## Middleware
## Service
- "traefik.http.services.grafana-service.loadbalancer.server.port=3000"
#### VOLUMES
volumes:
prometheus_data: {}
grafana_data: {}

View File

@ -0,0 +1,3 @@
GF_SECURITY_ADMIN_USER=admin
GF_SECURITY_ADMIN_PASSWORD=foobar
GF_USERS_ALLOW_SIGN_UP=false

View File

@ -0,0 +1,581 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 2,
"links": [],
"liveNow": false,
"panels": [
{
"collapsed": true,
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 47,
"panels": [],
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"refId": "A"
}
],
"title": "DAG stats",
"type": "row"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"description": "Seconds taken for a DagRun to reach success state",
"fieldConfig": {
"defaults": {
"unit": "ms"
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 1
},
"hiddenSeries": false,
"id": 25,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "10.0.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"expr": "af_agg_dagrun_duration_success{dag_id=\"$dag_id\"} * 1000",
"format": "time_series",
"interval": "",
"legendFormat": "{{dag_id}} p{{quantile}}",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Success DAG run duration",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "ms",
"logBase": 1,
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"description": "Seconds taken for a DagRun to reach failed state",
"fieldConfig": {
"defaults": {
"unit": "s"
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 9
},
"hiddenSeries": false,
"id": 49,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "10.0.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"expr": "af_agg_dagrun_duration_failed{dag_id=\"$dag_id\"} * 1000",
"interval": "",
"legendFormat": "{{dag_id}} p{{quantile}}",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "Failed DAG run duration",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "s",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "short",
"logBase": 1,
"min": "0",
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"description": "Sum of seconds taken to finish a tasks aggregated by DAG",
"fieldConfig": {
"defaults": {
"unit": "s"
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 6,
"w": 24,
"x": 0,
"y": 16
},
"hiddenSeries": false,
"id": 51,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "10.0.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"expr": "sum by (dag_id, quantile) (af_agg_dag_task_duration{dag_id=\"$dag_id\"}) * 1000",
"format": "time_series",
"instant": false,
"interval": "",
"legendFormat": "{{dag_id}} p{{quantile}}",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "DAG run duration",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "s",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "short",
"logBase": 1,
"min": "0",
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"description": "Seconds taken to check DAG dependencies",
"fieldConfig": {
"defaults": {
"unit": "ms"
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 22
},
"hiddenSeries": false,
"id": 19,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "10.0.3",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"expr": "af_agg_dagrun_dependency_check{dag_id=\"$dag_id\"} * 1000",
"interval": "",
"legendFormat": "{{dag_id}} p{{quantile}}",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "DAG run dependency check time",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "ms",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "short",
"logBase": 1,
"min": "0",
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"description": "Seconds of delay between the scheduled DagRun start date and the actual DagRun start date",
"fieldConfig": {
"defaults": {
"custom": {},
"unit": "ms"
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 6,
"w": 24,
"x": 0,
"y": 29
},
"hiddenSeries": false,
"id": 27,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.3.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"expr": "af_agg_dagrun_schedule_delay{dag_id=\"$dag_id\"} * 1000",
"interval": "",
"legendFormat": "{{dag_id}} p{{quantile}}",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "DAG run schedule delay",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"logBase": 1,
"min": "0",
"show": true
},
{
"format": "short",
"logBase": 1,
"min": "0",
"show": true
}
],
"yaxis": {
"align": false
}
}
],
"refresh": "5s",
"schemaVersion": 38,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "latest_only",
"value": "latest_only"
},
"definition": "label_values(dag_id)",
"hide": 0,
"includeAll": false,
"label": "DAG ID",
"multi": false,
"name": "dag_id",
"options": [],
"query": {
"query": "label_values(dag_id)",
"refId": "PrometheusVariableQueryEditor-VariableQuery"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
]
},
"timezone": "",
"title": "Airflow DAG dashboard",
"uid": "qBWJRGPMz",
"version": 1,
"weekStart": ""
}

View File

@ -0,0 +1,646 @@
{
"__inputs": [
{
"name": "DS_PROMETHEUS",
"label": "Prometheus",
"description": "",
"type": "datasource",
"pluginId": "prometheus",
"pluginName": "Prometheus"
}
],
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "7.1.5"
},
{
"type": "panel",
"id": "graph",
"name": "Graph",
"version": ""
},
{
"type": "datasource",
"id": "prometheus",
"name": "Prometheus",
"version": "1.0.0"
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"id": null,
"links": [],
"panels": [
{
"aliasColors": {},
"bars": true,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_PROMETHEUS}",
"fieldConfig": {
"defaults": {
"custom": {
"align": null
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 0
},
"hiddenSeries": false,
"id": 6,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": false,
"linewidth": 1,
"nullPointMode": "null",
"percentage": false,
"pluginVersion": "7.1.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "increase(airflow_ti_successes[1m])",
"instant": false,
"interval": "",
"legendFormat": "Success",
"refId": "A"
},
{
"expr": "increase(airflow_ti_failures[1m])",
"interval": "",
"legendFormat": "Failure",
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Task Instances Successes & Failures",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:753",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"$$hashKey": "object:754",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_PROMETHEUS}",
"fieldConfig": {
"defaults": {
"custom": {},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 7
},
"hiddenSeries": false,
"id": 4,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"percentage": false,
"pluginVersion": "7.1.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(airflow_scheduler_heartbeat[10m])*60",
"interval": "",
"legendFormat": "Heartbeats per Minute",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Scheduler Heartbeat",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:519",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"$$hashKey": "object:520",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_PROMETHEUS}",
"fieldConfig": {
"defaults": {
"custom": {}
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 7
},
"hiddenSeries": false,
"id": 10,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"percentage": false,
"pluginVersion": "7.1.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "airflow_scheduler_tasks_executable",
"interval": "",
"legendFormat": "Executable",
"refId": "A"
},
{
"expr": "airflow_scheduler_tasks_pending",
"interval": "",
"legendFormat": "Pending",
"refId": "B"
},
{
"expr": "airflow_scheduler_tasks_running",
"interval": "",
"legendFormat": "Running",
"refId": "C"
},
{
"expr": "airflow_scheduler_tasks_starving",
"interval": "",
"legendFormat": "Starving",
"refId": "D"
},
{
"expr": "airflow_scheduler_tasks_killed_externally",
"interval": "",
"legendFormat": "Killed Externally",
"refId": "E"
},
{
"expr": "airflow_scheduler_tasks_without_dagrun",
"interval": "",
"legendFormat": "Without DagRun",
"refId": "F"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Scheduler Tasks",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:629",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"$$hashKey": "object:630",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_PROMETHEUS}",
"fieldConfig": {
"defaults": {
"custom": {}
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 9,
"w": 12,
"x": 0,
"y": 15
},
"hiddenSeries": false,
"id": 2,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"percentage": false,
"pluginVersion": "7.1.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "airflow_executor_open_slots",
"interval": "",
"legendFormat": "Open Slots",
"refId": "A"
},
{
"expr": "airflow_executor_queued_tasks",
"interval": "",
"legendFormat": "Queued Tasks",
"refId": "B"
},
{
"expr": "airflow_executor_running_tasks",
"interval": "",
"legendFormat": "Running Tasks",
"refId": "C"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Executor",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:720",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"$$hashKey": "object:721",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_PROMETHEUS}",
"decimals": null,
"fieldConfig": {
"defaults": {
"custom": {},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 15
},
"hiddenSeries": false,
"id": 8,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"percentage": false,
"pluginVersion": "7.1.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "airflow_dagbag_size",
"interval": "",
"legendFormat": "DagBag Size",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "DagBag Size",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:165",
"decimals": null,
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"$$hashKey": "object:166",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}
],
"refresh": false,
"schemaVersion": 26,
"style": "dark",
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
]
},
"timezone": "",
"title": "Airflow Dashboard",
"uid": "ZArfYGOGk",
"version": 39
}

View File

@ -0,0 +1,130 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"id": 9,
"links": [],
"panels": [
{
"datasource": "Prometheus",
"description": "Airflow task duration. Filtered out example tasks that come with Airflow.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "Duration",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"graph": false,
"legend": false,
"tooltip": false
},
"lineInterpolation": "linear",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": true
},
"mappings": [
{
"from": "",
"id": 1,
"text": "",
"to": "",
"type": 1,
"value": ""
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 19,
"w": 19,
"x": 0,
"y": 0
},
"id": 2,
"options": {
"graph": {},
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltipOptions": {
"mode": "single"
}
},
"pluginVersion": "7.5.4",
"targets": [
{
"exemplar": true,
"expr": "af_agg_dag_processing_last_run_seconds{dag_file =~ \"website_backups|website_security|git_backups\"}",
"format": "time_series",
"hide": false,
"instant": false,
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"title": "Airflow task durations",
"type": "timeseries"
}
],
"schemaVersion": 27,
"style": "dark",
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-30m",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Nunez family Airflow tasks duration",
"uid": "OaTTYQrMk",
"version": 1
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,670 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"id": 23,
"iteration": 1612802909274,
"links": [],
"panels": [
{
"collapsed": true,
"datasource": null,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 47,
"panels": [],
"title": "DAG stats",
"type": "row"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "Seconds taken for a DagRun to reach success state",
"fieldConfig": {
"defaults": {
"custom": {
"align": null,
"filterable": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "ms"
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 1
},
"hiddenSeries": false,
"id": 25,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.3.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "af_agg_dagrun_duration_success{dag_id=\"$dag_id\"} * 1000",
"format": "time_series",
"interval": "",
"legendFormat": "{{dag_id}} p{{quantile}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Success DAG run duration",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "ms",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "Seconds taken for a DagRun to reach failed state",
"fieldConfig": {
"defaults": {
"custom": {},
"mappings": [],
"noValue": "0",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "s"
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 9
},
"hiddenSeries": false,
"id": 49,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.3.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "af_agg_dagrun_duration_failed{dag_id=\"$dag_id\"} * 1000",
"interval": "",
"legendFormat": "{{dag_id}} p{{quantile}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Failed DAG run duration",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "Sum of seconds taken to finish a tasks aggregated by DAG",
"fieldConfig": {
"defaults": {
"custom": {},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "s"
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 6,
"w": 24,
"x": 0,
"y": 16
},
"hiddenSeries": false,
"id": 51,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.3.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum by (dag_id, quantile) (af_agg_dag_task_duration{dag_id=\"$dag_id\"}) * 1000",
"format": "time_series",
"instant": false,
"interval": "",
"legendFormat": "{{dag_id}} p{{quantile}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "DAG run duration",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "Seconds taken to check DAG dependencies",
"fieldConfig": {
"defaults": {
"custom": {
"align": null,
"filterable": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "ms"
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 22
},
"hiddenSeries": false,
"id": 19,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.3.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "af_agg_dagrun_dependency_check{dag_id=\"$dag_id\"} * 1000",
"interval": "",
"legendFormat": "{{dag_id}} p{{quantile}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "DAG run dependency check time",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "ms",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "Seconds of delay between the scheduled DagRun start date and the actual DagRun start date",
"fieldConfig": {
"defaults": {
"custom": {},
"unit": "ms"
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 6,
"w": 24,
"x": 0,
"y": 29
},
"hiddenSeries": false,
"id": 27,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.3.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "af_agg_dagrun_schedule_delay{dag_id=\"$dag_id\"} * 1000",
"interval": "",
"legendFormat": "{{dag_id}} p{{quantile}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "DAG run schedule delay",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}
],
"refresh": "5s",
"schemaVersion": 26,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"allValue": null,
"current": {
"selected": true,
"text": "java_dag",
"value": "java_dag"
},
"datasource": "Prometheus",
"definition": "label_values(dag_id)",
"error": null,
"hide": 0,
"includeAll": false,
"label": "DAG ID",
"multi": false,
"name": "dag_id",
"options": [
{
"selected": false,
"text": "dbnd_spark_dag",
"value": "dbnd_spark_dag"
},
{
"selected": false,
"text": "huge_dag",
"value": "huge_dag"
},
{
"selected": true,
"text": "java_dag",
"value": "java_dag"
},
{
"selected": false,
"text": "luigi_top_artists",
"value": "luigi_top_artists"
},
{
"selected": false,
"text": "process_data_spark",
"value": "process_data_spark"
},
{
"selected": false,
"text": "pyspark_dag",
"value": "pyspark_dag"
},
{
"selected": false,
"text": "spark_dag",
"value": "spark_dag"
}
],
"query": "label_values(dag_id)",
"refresh": 0,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
]
},
"timezone": "",
"title": "Airflow DAG dashboard",
"uid": "qBWJRGPMz",
"version": 4
}

View File

@ -0,0 +1,11 @@
apiVersion: 1
providers:
- name: 'Prometheus'
orgId: 1
folder: ''
type: file
disableDeletion: false
editable: true
options:
path: /etc/grafana/provisioning/dashboards

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,50 @@
# config file version
apiVersion: 1
# list of datasources that should be deleted from the database
deleteDatasources:
- name: Prometheus
orgId: 1
# list of datasources to insert/update depending
# whats available in the database
datasources:
# <string, required> name of the datasource. Required
- name: Prometheus
# <string, required> datasource type. Required
type: prometheus
# <string, required> access mode. direct or proxy. Required
access: proxy
# <int> org id. will default to orgId 1 if not specified
orgId: 1
# <string> url
url: http://prometheus:9090
# <string> database password, if used
password:
# <string> database user, if used
user:
# <string> database name, if used
database:
# <bool> enable/disable basic auth
basicAuth: false
# <string> basic auth username, if used
basicAuthUser:
# <string> basic auth password, if used
basicAuthPassword:
# <bool> enable/disable with credentials headers
withCredentials:
# <bool> mark as default datasource. Max one per org
isDefault: true
# <map> fields that will be converted to json and stored in json_data
jsonData:
graphiteVersion: "1.1"
tlsAuth: false
tlsAuthWithCACert: false
# <string> json object of data that will be encrypted.
secureJsonData:
tlsCACert: "..."
tlsClientCert: "..."
tlsClientKey: "..."
version: 1
# <bool> allow users to edit datasources from the UI.
editable: true

View File

@ -0,0 +1,16 @@
global:
scrape_interval: 15s
evaluation_interval: 15s
rule_files:
# - "first.rules"
# - "second.rules"
scrape_configs:
- job_name: prometheus
static_configs:
- targets: ['localhost:9090']
- job_name: app
scrape_interval: 5s
static_configs:
- targets: ['host.docker.internal:10088']

View File

@ -0,0 +1,22 @@
groups:
- name: example
rules:
# Alert for any instance that is unreachable for >2 minutes.
- alert: service_down
expr: up == 0
for: 2m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes."
- alert: high_load
expr: node_load1 > 0.5
for: 2m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} under high load"
description: "{{ $labels.instance }} of job {{ $labels.job }} is under high load."

View File

@ -0,0 +1,89 @@
# my global config
global:
scrape_interval: 15s # By default, scrape targets every 15 seconds.
evaluation_interval: 15s # By default, scrape targets every 15 seconds.
# scrape_timeout is set to the global default (10s).
# Attach these labels to any time series or alerts when communicating with
# external systems (federation, remote storage, Alertmanager).
external_labels:
monitor: 'my-project'
# Load and evaluate rules in this file every 'evaluation_interval' seconds.
rule_files:
- 'alert.rules'
# - "first.rules"
# - "second.rules"
# alert
alerting:
alertmanagers:
- scheme: http
static_configs:
- targets:
- "alertmanager:9093"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: app
scrape_interval: 5s
static_configs:
- targets: ['host.docker.internal:8000']
- job_name: 'prometheus'
# Override the global default and scrape targets from this job every 5 seconds.
scrape_interval: 5s
static_configs:
- targets: ['localhost:9090']
- job_name: 'cadvisor'
# Override the global default and scrape targets from this job every 5 seconds.
scrape_interval: 5s
dns_sd_configs:
- names:
- 'tasks.cadvisor'
type: 'A'
port: 8080
# static_configs:
# - targets: ['cadvisor:8080']
- job_name: 'node-exporter'
# Override the global default and scrape targets from this job every 5 seconds.
scrape_interval: 5s
dns_sd_configs:
- names:
- 'tasks.node-exporter'
type: 'A'
port: 9100
# - job_name: 'pushgateway'
# scrape_interval: 10s
# dns_sd_configs:
# - names:
# - 'tasks.pushgateway'
# type: 'A'
# port: 9091
# static_configs:
# - targets: ['node-exporter:9100']
- job_name: 'traefik-app'
scrape_interval: 5s
static_configs:
- targets: ['10.12.1.14:8181']
- job_name: 'airflow'
# Override the global default and scrape targets from this job every 5 seconds.
scrape_interval: 5s
static_configs:
- targets:
- '10.12.1.14:9102'