diff --git a/nixos/modules/monitoring/server/grafana-dashboards/backups.json b/nixos/modules/monitoring/server/grafana-dashboards/backups.json index 51c6ec60444f4f7bac0bf0753e9e0b8e1c3aa1e9..e2ed09b279fc34573323c6a5dd4360a48b2ab6ef 100644 --- a/nixos/modules/monitoring/server/grafana-dashboards/backups.json +++ b/nixos/modules/monitoring/server/grafana-dashboards/backups.json @@ -22,6 +22,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, + "id": 58, "links": [], "liveNow": false, "panels": [ @@ -74,6 +75,7 @@ "noDataState": "no_data", "notifications": [] }, + "datasource": "Prometheus", "description": "Daily backup job systemd timer unit state", "fieldConfig": { "defaults": { @@ -162,10 +164,7 @@ "pluginVersion": "8.4.7", "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, + "datasource": "Prometheus", "exemplar": true, "expr": "sum(node_systemd_unit_state{name=\"borgbackup-job-daily.timer\", state=~\"active\"})", "hide": false, @@ -174,10 +173,7 @@ "refId": "Active timers" }, { - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, + "datasource": "Prometheus", "exemplar": true, "expr": "sum(node_systemd_unit_state{name=\"borgbackup-job-daily.service\", state=\"failed\"})", "hide": false, @@ -233,6 +229,7 @@ "noDataState": "no_data", "notifications": [] }, + "datasource": "Prometheus", "description": "Monthly check-repo systemd timer unit state", "fieldConfig": { "defaults": { @@ -318,10 +315,7 @@ "pluginVersion": "8.4.7", "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, + "datasource": "Prometheus", "exemplar": true, "expr": "sum(node_systemd_unit_state{name=\"borgbackup-check-repo.timer\", state=\"active\"})", "hide": false, @@ -330,10 +324,7 @@ "refId": "Active timers" }, { - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, + "datasource": "Prometheus", "exemplar": true, "expr": "sum(node_systemd_unit_state{name=\"borgbackup-check-repo.service\", state=\"failed\"})", "hide": false, @@ -390,6 +381,7 @@ "noDataState": "no_data", "notifications": [] }, + "datasource": "Prometheus", "description": "This shows the last triggering of the borgbackup-job-daily.timer systemd unit.", "fieldConfig": { "defaults": { @@ -463,10 +455,7 @@ "pluginVersion": "8.3.5", "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, + "datasource": "Prometheus", "exemplar": false, "expr": "node_systemd_timer_last_trigger_seconds{name=\"borgbackup-job-daily.timer\"} - time()", "interval": "", @@ -523,6 +512,7 @@ "noDataState": "no_data", "notifications": [] }, + "datasource": "Prometheus", "description": "This shows the last triggering of the borgbackup-job-daily.timer systemd unit.", "fieldConfig": { "defaults": { @@ -598,10 +588,7 @@ "pluginVersion": "8.3.5", "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, + "datasource": "Prometheus", "exemplar": false, "expr": "node_systemd_timer_last_trigger_seconds{name=\"borgbackup-check-repo.timer\"} - time()", "interval": "", @@ -622,42 +609,7 @@ "type": "timeseries" }, { - "alert": { - "alertRuleTags": {}, - "conditions": [ - { - "evaluator": { - "params": [ - 0 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "5m", - "now-3h" - ] - }, - "reducer": { - "params": [], - "type": "last" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "for": "5m", - "frequency": "1m", - "handler": 1, - "message": "A backup job ran for more than three hours. This means it could run into the check-repo job start time, depending on its \"random\" job delay.", - "name": "Daily backup job run time alert", - "noDataState": "no_data", - "notifications": [] - }, + "datasource": "Prometheus", "description": "When was the unit active? With alerts", "fieldConfig": { "defaults": { @@ -733,10 +685,7 @@ }, "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, + "datasource": "Prometheus", "exemplar": true, "expr": "node_systemd_unit_state{name=\"borgbackup-job-daily.service\", state=\"active\"}", "interval": "", @@ -744,54 +693,12 @@ "refId": "A" } ], - "thresholds": [ - { - "colorMode": "critical", - "op": "gt", - "value": 0, - "visible": true - } - ], + "thresholds": [], "title": "Daily backup job run time", "type": "timeseries" }, { - "alert": { - "alertRuleTags": {}, - "conditions": [ - { - "evaluator": { - "params": [ - 0 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "5m", - "now-6h" - ] - }, - "reducer": { - "params": [], - "type": "last" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "for": "5m", - "frequency": "1m", - "handler": 1, - "message": "A borg check-repo job ran for more than six hours. This means it could collide with the daily backup job, depending on that job's \"random\" delay. If the backup set is large and this is expected to happen again, consider using borgbackup partial checks (--max-duration SECONDS parameter).", - "name": "Monthly check-repo run time alert", - "noDataState": "no_data", - "notifications": [] - }, + "datasource": "Prometheus", "description": "When was the unit active?", "fieldConfig": { "defaults": { @@ -867,10 +774,7 @@ }, "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "000000001" - }, + "datasource": "Prometheus", "exemplar": true, "expr": "node_systemd_unit_state{name=\"borgbackup-check-repo.service\", state=\"active\"}", "interval": "", @@ -878,22 +782,12 @@ "refId": "A" } ], - "thresholds": [ - { - "colorMode": "critical", - "op": "gt", - "value": 0, - "visible": true - } - ], + "thresholds": [], "title": "Monthly check-repo run time", "type": "timeseries" }, { - "datasource": { - "type": "loki", - "uid": "000000002" - }, + "datasource": "Loki", "description": "The \"duration\" that borgbackup status reports.", "fieldConfig": { "defaults": { @@ -968,10 +862,7 @@ "pluginVersion": "8.4.7", "targets": [ { - "datasource": { - "type": "loki", - "uid": "000000002" - }, + "datasource": "Loki", "expr": "{unit=\"borgbackup-job-daily.service\"} |= \"duration\" | pattern \"<_>\\\"duration\\\": <duration>,\"", "legendFormat": "{{host}}", "queryType": "range", @@ -1009,10 +900,7 @@ "type": "barchart" }, { - "datasource": { - "type": "loki", - "uid": "000000002" - }, + "datasource": "Loki", "fieldConfig": { "defaults": { "color": { @@ -1081,20 +969,14 @@ "pluginVersion": "8.4.7", "targets": [ { - "datasource": { - "type": "loki", - "uid": "000000002" - }, + "datasource": "Loki", "expr": "{unit=\"borgbackup-job-daily.service\"} |= \"compressed_size\" | pattern \"<_>\\\"compressed_size\\\": <compressed_size>,\"", "hide": false, "legendFormat": "{{host}} archive", "refId": "This archive size in bytes" }, { - "datasource": { - "type": "loki", - "uid": "000000002" - }, + "datasource": "Loki", "expr": "{unit=\"borgbackup-job-daily.service\"} |= \"unique_csize\" | pattern \"<_>\\\"unique_csize\\\": <unique_csize>,\"", "hide": false, "legendFormat": "{{host}} all archives",