From 914b690cf59c28e5d22604fb85440ff27fb37b97 Mon Sep 17 00:00:00 2001
From: Florian Sesser <florian@private.storage>
Date: Tue, 22 Feb 2022 17:13:04 +0000
Subject: [PATCH] Grafana: Revamp network errors panel

- Also alert on "negative" (== receiving) errors
- Use 'rate' so we can get out of the reporting if situation normalizes
---
 .../resources-overview.json                   | 69 +++++++++++++++----
 1 file changed, 54 insertions(+), 15 deletions(-)

diff --git a/nixos/modules/monitoring/server/grafana-dashboards/resources-overview.json b/nixos/modules/monitoring/server/grafana-dashboards/resources-overview.json
index efe0246d..5001eb7d 100644
--- a/nixos/modules/monitoring/server/grafana-dashboards/resources-overview.json
+++ b/nixos/modules/monitoring/server/grafana-dashboards/resources-overview.json
@@ -715,9 +715,10 @@
           {
             "evaluator": {
               "params": [
-                10
+                -1,
+                1
               ],
-              "type": "gt"
+              "type": "outside_range"
             },
             "operator": {
               "type": "and"
@@ -738,9 +739,10 @@
           {
             "evaluator": {
               "params": [
-                10
+                -1,
+                1
               ],
-              "type": "gt"
+              "type": "outside_range"
             },
             "operator": {
               "type": "or"
@@ -761,9 +763,10 @@
           {
             "evaluator": {
               "params": [
-                10
+                -1,
+                1
               ],
-              "type": "gt"
+              "type": "outside_range"
             },
             "operator": {
               "type": "or"
@@ -784,9 +787,10 @@
           {
             "evaluator": {
               "params": [
-                10
+                -1,
+                1
               ],
-              "type": "gt"
+              "type": "outside_range"
             },
             "operator": {
               "type": "or"
@@ -809,6 +813,7 @@
         "for": "5m",
         "frequency": "1m",
         "handler": 1,
+        "message": "",
         "name": "Network errors alert",
         "noDataState": "no_data",
         "notifications": []
@@ -844,7 +849,7 @@
               "mode": "none"
             },
             "thresholdsStyle": {
-              "mode": "line+area"
+              "mode": "off"
             }
           },
           "links": [],
@@ -886,30 +891,64 @@
       "pluginVersion": "8.3.5",
       "targets": [
         {
-          "expr": "node_network_transmit_errs_total\n",
+          "datasource": {
+            "type": "prometheus",
+            "uid": "000000001"
+          },
+          "exemplar": true,
+          "expr": "rate(node_network_transmit_errs_total{device!=\"lo\"}[5m])\n",
           "interval": "",
           "legendFormat": "{{instance}} {{device}}",
           "refId": "A"
         },
         {
-          "expr": "node_network_transmit_drop_total",
+          "datasource": {
+            "type": "prometheus",
+            "uid": "000000001"
+          },
+          "exemplar": true,
+          "expr": "rate(node_network_transmit_drop_total{device!=\"lo\"}[5m])",
           "interval": "",
           "legendFormat": "{{instance}} {{device}}",
           "refId": "B"
         },
         {
-          "expr": "- node_network_receive_drop_total",
+          "datasource": {
+            "type": "prometheus",
+            "uid": "000000001"
+          },
+          "exemplar": true,
+          "expr": "- rate(node_network_receive_drop_total{device!=\"lo\"}[5m])",
           "interval": "",
           "legendFormat": "{{instance}} {{device}}",
           "refId": "C"
         },
         {
-          "expr": "- node_network_receive_errs_total",
+          "datasource": {
+            "type": "prometheus",
+            "uid": "000000001"
+          },
+          "exemplar": true,
+          "expr": "- rate(node_network_receive_errs_total{device!=\"lo\"}[5m])",
           "interval": "",
           "legendFormat": "{{instance}} {{device}}",
           "refId": "D"
         }
       ],
+      "thresholds": [
+        {
+          "colorMode": "critical",
+          "op": "lt",
+          "value": -1,
+          "visible": true
+        },
+        {
+          "colorMode": "critical",
+          "op": "gt",
+          "value": 1,
+          "visible": true
+        }
+      ],
       "title": "Network errors",
       "type": "timeseries"
     },
@@ -1002,7 +1041,7 @@
         "alertThreshold": true
       },
       "percentage": false,
-      "pluginVersion": "8.3.5",
+      "pluginVersion": "8.3.6",
       "pointradius": 2,
       "points": false,
       "renderer": "flot",
@@ -1375,7 +1414,7 @@
     "list": []
   },
   "time": {
-    "from": "now-24h",
+    "from": "now-2d",
     "to": "now"
   },
   "timepicker": {},
-- 
GitLab