From db935320c0a5e6457df6503cb439f4732f21e539 Mon Sep 17 00:00:00 2001
From: Florian Sesser <florian@privatestorage.io>
Date: Thu, 27 May 2021 18:35:58 +0000
Subject: [PATCH] Make resources dashboard work again

- Add node exporter to nodes
- Make the old nixpkgs 19.09 node exporter work (amend config)
- Fix a typo (??) and exclude monitoringvpn in network traffic graphs to prevent double counting
---
 morph/grid/local/grid.nix                     |  2 +-
 morph/lib/make-issuer.nix                     |  1 +
 morph/lib/make-monitoring.nix                 |  1 +
 morph/lib/make-testing.nix                    |  1 +
 nixos/modules/monitoring/exporters/node.nix   | 75 +++++++++++++++++++
 .../grafana-config/resources-overview.json    | 14 ++--
 6 files changed, 86 insertions(+), 8 deletions(-)
 create mode 100644 nixos/modules/monitoring/exporters/node.nix

diff --git a/morph/grid/local/grid.nix b/morph/grid/local/grid.nix
index b500c485..3f4f5243 100644
--- a/morph/grid/local/grid.nix
+++ b/morph/grid/local/grid.nix
@@ -38,7 +38,7 @@ import ../../lib/make-grid.nix {
       monitoringvpnIPv4 = "172.23.23.1";
       inherit vpnClientIPs;
       inherit sshUsers;
-      nodeExporterTargets = [ ];
+      nodeExporterTargets = [ "172.23.23.1" "172.23.23.11" "172.23.23.12" "172.23.23.13" ]; # TBD: derive automatically
       nginxExporterTargets = [ ];
       hardware = import ./virtual-hardware.nix ({ inherit publicIPv4; });
       stateVersion = "19.09";
diff --git a/morph/lib/make-issuer.nix b/morph/lib/make-issuer.nix
index 82b19484..039e4268 100644
--- a/morph/lib/make-issuer.nix
+++ b/morph/lib/make-issuer.nix
@@ -55,6 +55,7 @@
     hardware
     ../../nixos/modules/issuer.nix
     ../../nixos/modules/monitoring/vpn/client.nix
+    ../../nixos/modules/monitoring/exporters/node.nix
   ];
 
   services.private-storage.sshUsers = sshUsers;
diff --git a/morph/lib/make-monitoring.nix b/morph/lib/make-monitoring.nix
index 4a2dd83c..b8242446 100644
--- a/morph/lib/make-monitoring.nix
+++ b/morph/lib/make-monitoring.nix
@@ -52,6 +52,7 @@ rec {
     ../../nixos/modules/monitoring/vpn/server.nix
     ../../nixos/modules/monitoring/server/grafana.nix
     ../../nixos/modules/monitoring/server/prometheus.nix
+    ../../nixos/modules/monitoring/exporters/node.nix
     # Loki 0.3.0 from Nixpkgs 19.09 is too old and does not work:
     # ../../nixos/modules/monitoring/server/loki.nix
   ];
diff --git a/morph/lib/make-testing.nix b/morph/lib/make-testing.nix
index 7cd3c80a..1eb39ab5 100644
--- a/morph/lib/make-testing.nix
+++ b/morph/lib/make-testing.nix
@@ -48,6 +48,7 @@
     hardware
     ../../nixos/modules/private-storage.nix
     ../../nixos/modules/monitoring/vpn/client.nix
+    ../../nixos/modules/monitoring/exporters/node.nix
   ];
 
   services.private-storage =
diff --git a/nixos/modules/monitoring/exporters/node.nix b/nixos/modules/monitoring/exporters/node.nix
new file mode 100644
index 00000000..519e04d1
--- /dev/null
+++ b/nixos/modules/monitoring/exporters/node.nix
@@ -0,0 +1,75 @@
+# Prometheus common node exporter config
+#
+# Scope: Export platform data like CPU, memory, disk space etc. to be
+#        polled by Prometheus server
+# Usage: Import this to every server you want to include in the central
+#        monitoring system
+# See https://nixos.org/manual/nixos/stable/#module-services-prometheus-exporters
+
+{ config, lib, pkgs, ... }:
+
+with lib;
+
+let
+  mountsFileSystemType = fsType: {} != filterAttrs (n: v: v.fsType == fsType) config.fileSystems;
+
+in {
+  config.networking.firewall.allowedTCPPorts = [ 9100 ];
+
+  config.services.prometheus.exporters.node = {
+    enable = true;
+    openFirewall = true;
+    port = 9100;
+    # extraFlags = [ "--collector.disable-defaults" ]; # not in nixpkgs 19.09
+    # Thanks https://github.com/mayflower/nixexprs/blob/master/modules/monitoring/default.nix
+    enabledCollectors = [
+      "arp"
+      "bcache"
+      "conntrack"
+      "filefd"
+      "logind"
+      "netclass"
+      "netdev"
+      "netstat"
+      #"rapl" # not in nixpkgs 19.09
+      "sockstat"
+      #"softnet" # not in nixpkgs 19.09
+      "stat"
+      "systemd"
+      # "textfile"
+      # "textfile.directory /run/prometheus-node-exporter"
+      #"thermal_zone" # not in nixpkgs 19.09
+      "time"
+      #"udp_queues" # not in nixpkgs 19.09
+      "uname"
+      "vmstat"
+    ] ++ optionals (!config.boot.isContainer) [
+      "cpu"
+      "cpufreq"
+      "diskstats"
+      "edac"
+      "entropy"
+      "filesystem"
+      "hwmon"
+      "interrupts"
+      "ksmd"
+      "loadavg"
+      "meminfo"
+      "pressure"
+      "timex"
+    ] ++ (
+      optionals (config.services.nfs.server.enable) [ "nfsd" ]
+    ) ++ (
+      optionals ("" != config.boot.initrd.mdadmConf) [ "mdadm" ]
+    ) ++ (
+      optionals ({} != config.networking.bonds) [ "bonding" ]
+    ) ++ (
+      optionals (mountsFileSystemType "nfs") [ "nfs" ]
+    ) ++ (
+      optionals (mountsFileSystemType "xfs") [ "xfs" ]
+    ) ++ (
+      optionals (mountsFileSystemType "zfs" || elem "zfs" config.boot.supportedFilesystems) [ "zfs" ]
+    );
+  };
+}
+
diff --git a/nixos/modules/monitoring/server/grafana-config/resources-overview.json b/nixos/modules/monitoring/server/grafana-config/resources-overview.json
index 70519e5b..02db119b 100644
--- a/nixos/modules/monitoring/server/grafana-config/resources-overview.json
+++ b/nixos/modules/monitoring/server/grafana-config/resources-overview.json
@@ -494,14 +494,14 @@
       "steppedLine": false,
       "targets": [
         {
-          "expr": "max by (instance) (rate(node_network_transmit_bytes_total{device!=\"lo\"}[5m]) / node_network_speed_bytes)",
+          "expr": "max by (instance) (rate(node_network_transmit_bytes_total{device!~\"lo|monitoringvpn\"}[5m]) / node_network_speed_bytes)",
           "interval": "",
           "intervalFactor": 4,
           "legendFormat": "{{instance}} out",
           "refId": "A"
         },
         {
-          "expr": "- max by (instance) (rate(node_network_receive_bytes_total{device!=\"lo\"}[5m]) / node_network_speed_bytes)",
+          "expr": "- max by (instance) (rate(node_network_receive_bytes_total{device!~\"lo|monitoringvpn\"}[5m]) / node_network_speed_bytes)",
           "interval": "",
           "intervalFactor": 4,
           "legendFormat": "{{instance}} in",
@@ -602,28 +602,28 @@
       "steppedLine": false,
       "targets": [
         {
-          "expr": "rate(node_network_reveive_packets_total{device!=\"lo\"}[5m])",
+          "expr": "rate(node_network_receive_packets_total{device!~\"lo|monitoringvpn\"}[5m])",
           "interval": "",
           "intervalFactor": 4,
           "legendFormat": "{{instance}} {{device}}",
           "refId": "A"
         },
         {
-          "expr": "rate(node_network_reveive_errs_total{device!=\"lo\"}[5m])",
+          "expr": "rate(node_network_receive_errs_total{device!~\"lo|monitoringvpn\"}[5m])",
           "interval": "",
           "intervalFactor": 4,
           "legendFormat": "{{instance}} {{device}}",
           "refId": "B"
         },
         {
-          "expr": "- rate(node_network_transmit_packets_total{device!=\"lo\"}[5m])",
+          "expr": "- rate(node_network_transmit_packets_total{device!~\"lo|monitoringvpn\"}[5m])",
           "interval": "",
           "intervalFactor": 4,
           "legendFormat": "{{instance}} {{device}}",
           "refId": "C"
         },
         {
-          "expr": "- rate(node_network_transmit_errs_total{device!=\"lo\"}[5m])",
+          "expr": "- rate(node_network_transmit_errs_total{device!~\"lo|monitoringvpn\"}[5m])",
           "interval": "",
           "intervalFactor": 4,
           "legendFormat": "{{instance}} {{device}}",
@@ -1291,4 +1291,4 @@
   "title": "Resources overview",
   "uid": "ResSatUse",
   "version": 1
-}
\ No newline at end of file
+}
-- 
GitLab