From bfdbbe2a809238763a6c06a9f8063117c27d840d Mon Sep 17 00:00:00 2001 From: Florian Sesser <florian@private.storage> Date: Wed, 12 Oct 2022 14:23:02 +0000 Subject: [PATCH] Add dash and alert for textfile-collector metric freshness to meta-monitoring board Refs #129 --- .../grafana-dashboards/meta-monitoring.json | 165 +++++++++++++++--- 1 file changed, 144 insertions(+), 21 deletions(-) diff --git a/nixos/modules/monitoring/server/grafana-dashboards/meta-monitoring.json b/nixos/modules/monitoring/server/grafana-dashboards/meta-monitoring.json index f6636cb3..5cd8ca56 100644 --- a/nixos/modules/monitoring/server/grafana-dashboards/meta-monitoring.json +++ b/nixos/modules/monitoring/server/grafana-dashboards/meta-monitoring.json @@ -8,16 +8,22 @@ "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, "type": "dashboard" } ] }, "description": "Watching the watchers", "editable": true, - "gnetId": null, + "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 22, "links": [], + "liveNow": false, "panels": [ { "alert": { @@ -64,10 +70,6 @@ "uid": "LocalPrometheus" }, "description": "Is Prometheus having problems scraping our instances? Should be zero.", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, "fill": 1, "fillGradient": 0, "gridPos": { @@ -94,7 +96,7 @@ "alertThreshold": false }, "percentage": false, - "pluginVersion": "7.5.10", + "pluginVersion": "8.4.7", "pointradius": 2, "points": false, "renderer": "flot", @@ -122,9 +124,7 @@ "visible": true } ], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Scraping failures", "tooltip": { "shared": true, @@ -133,9 +133,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -143,29 +141,153 @@ { "decimals": 0, "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 600 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "5m", + "frequency": "1m", + "handler": 1, + "message": "A metrics text file is oder than 10 minutes.", + "name": "Textcollector staleness alert", + "noDataState": "no_data", + "notifications": [] + }, + "description": "Node-Exporter's TextCollector reads in plain text files containing metrics every few minutes. Make sure we're not reporting stale text files as new data - Alert if any of the text files is not getting updated.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "exemplar": true, + "expr": "time() - node_textfile_mtime_seconds", + "interval": "", + "legendFormat": "{{instance}}/{{file}}", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "op": "gt", + "value": 600, + "visible": true + } + ], + "title": "Textfile collector freshness", + "type": "timeseries" } ], "refresh": false, - "schemaVersion": 27, + "schemaVersion": 35, "style": "dark", "tags": [], "templating": { @@ -179,5 +301,6 @@ "timezone": "", "title": "Meta monitoring", "uid": "MetaMonitoring", - "version": 1 + "version": 1, + "weekStart": "" } -- GitLab