From db935320c0a5e6457df6503cb439f4732f21e539 Mon Sep 17 00:00:00 2001 From: Florian Sesser <florian@privatestorage.io> Date: Thu, 27 May 2021 18:35:58 +0000 Subject: [PATCH] Make resources dashboard work again - Add node exporter to nodes - Make the old nixpkgs 19.09 node exporter work (amend config) - Fix a typo (??) and exclude monitoringvpn in network traffic graphs to prevent double counting --- morph/grid/local/grid.nix | 2 +- morph/lib/make-issuer.nix | 1 + morph/lib/make-monitoring.nix | 1 + morph/lib/make-testing.nix | 1 + nixos/modules/monitoring/exporters/node.nix | 75 +++++++++++++++++++ .../grafana-config/resources-overview.json | 14 ++-- 6 files changed, 86 insertions(+), 8 deletions(-) create mode 100644 nixos/modules/monitoring/exporters/node.nix diff --git a/morph/grid/local/grid.nix b/morph/grid/local/grid.nix index b500c485..3f4f5243 100644 --- a/morph/grid/local/grid.nix +++ b/morph/grid/local/grid.nix @@ -38,7 +38,7 @@ import ../../lib/make-grid.nix { monitoringvpnIPv4 = "172.23.23.1"; inherit vpnClientIPs; inherit sshUsers; - nodeExporterTargets = [ ]; + nodeExporterTargets = [ "172.23.23.1" "172.23.23.11" "172.23.23.12" "172.23.23.13" ]; # TBD: derive automatically nginxExporterTargets = [ ]; hardware = import ./virtual-hardware.nix ({ inherit publicIPv4; }); stateVersion = "19.09"; diff --git a/morph/lib/make-issuer.nix b/morph/lib/make-issuer.nix index 82b19484..039e4268 100644 --- a/morph/lib/make-issuer.nix +++ b/morph/lib/make-issuer.nix @@ -55,6 +55,7 @@ hardware ../../nixos/modules/issuer.nix ../../nixos/modules/monitoring/vpn/client.nix + ../../nixos/modules/monitoring/exporters/node.nix ]; services.private-storage.sshUsers = sshUsers; diff --git a/morph/lib/make-monitoring.nix b/morph/lib/make-monitoring.nix index 4a2dd83c..b8242446 100644 --- a/morph/lib/make-monitoring.nix +++ b/morph/lib/make-monitoring.nix @@ -52,6 +52,7 @@ rec { ../../nixos/modules/monitoring/vpn/server.nix ../../nixos/modules/monitoring/server/grafana.nix ../../nixos/modules/monitoring/server/prometheus.nix + ../../nixos/modules/monitoring/exporters/node.nix # Loki 0.3.0 from Nixpkgs 19.09 is too old and does not work: # ../../nixos/modules/monitoring/server/loki.nix ]; diff --git a/morph/lib/make-testing.nix b/morph/lib/make-testing.nix index 7cd3c80a..1eb39ab5 100644 --- a/morph/lib/make-testing.nix +++ b/morph/lib/make-testing.nix @@ -48,6 +48,7 @@ hardware ../../nixos/modules/private-storage.nix ../../nixos/modules/monitoring/vpn/client.nix + ../../nixos/modules/monitoring/exporters/node.nix ]; services.private-storage = diff --git a/nixos/modules/monitoring/exporters/node.nix b/nixos/modules/monitoring/exporters/node.nix new file mode 100644 index 00000000..519e04d1 --- /dev/null +++ b/nixos/modules/monitoring/exporters/node.nix @@ -0,0 +1,75 @@ +# Prometheus common node exporter config +# +# Scope: Export platform data like CPU, memory, disk space etc. to be +# polled by Prometheus server +# Usage: Import this to every server you want to include in the central +# monitoring system +# See https://nixos.org/manual/nixos/stable/#module-services-prometheus-exporters + +{ config, lib, pkgs, ... }: + +with lib; + +let + mountsFileSystemType = fsType: {} != filterAttrs (n: v: v.fsType == fsType) config.fileSystems; + +in { + config.networking.firewall.allowedTCPPorts = [ 9100 ]; + + config.services.prometheus.exporters.node = { + enable = true; + openFirewall = true; + port = 9100; + # extraFlags = [ "--collector.disable-defaults" ]; # not in nixpkgs 19.09 + # Thanks https://github.com/mayflower/nixexprs/blob/master/modules/monitoring/default.nix + enabledCollectors = [ + "arp" + "bcache" + "conntrack" + "filefd" + "logind" + "netclass" + "netdev" + "netstat" + #"rapl" # not in nixpkgs 19.09 + "sockstat" + #"softnet" # not in nixpkgs 19.09 + "stat" + "systemd" + # "textfile" + # "textfile.directory /run/prometheus-node-exporter" + #"thermal_zone" # not in nixpkgs 19.09 + "time" + #"udp_queues" # not in nixpkgs 19.09 + "uname" + "vmstat" + ] ++ optionals (!config.boot.isContainer) [ + "cpu" + "cpufreq" + "diskstats" + "edac" + "entropy" + "filesystem" + "hwmon" + "interrupts" + "ksmd" + "loadavg" + "meminfo" + "pressure" + "timex" + ] ++ ( + optionals (config.services.nfs.server.enable) [ "nfsd" ] + ) ++ ( + optionals ("" != config.boot.initrd.mdadmConf) [ "mdadm" ] + ) ++ ( + optionals ({} != config.networking.bonds) [ "bonding" ] + ) ++ ( + optionals (mountsFileSystemType "nfs") [ "nfs" ] + ) ++ ( + optionals (mountsFileSystemType "xfs") [ "xfs" ] + ) ++ ( + optionals (mountsFileSystemType "zfs" || elem "zfs" config.boot.supportedFilesystems) [ "zfs" ] + ); + }; +} + diff --git a/nixos/modules/monitoring/server/grafana-config/resources-overview.json b/nixos/modules/monitoring/server/grafana-config/resources-overview.json index 70519e5b..02db119b 100644 --- a/nixos/modules/monitoring/server/grafana-config/resources-overview.json +++ b/nixos/modules/monitoring/server/grafana-config/resources-overview.json @@ -494,14 +494,14 @@ "steppedLine": false, "targets": [ { - "expr": "max by (instance) (rate(node_network_transmit_bytes_total{device!=\"lo\"}[5m]) / node_network_speed_bytes)", + "expr": "max by (instance) (rate(node_network_transmit_bytes_total{device!~\"lo|monitoringvpn\"}[5m]) / node_network_speed_bytes)", "interval": "", "intervalFactor": 4, "legendFormat": "{{instance}} out", "refId": "A" }, { - "expr": "- max by (instance) (rate(node_network_receive_bytes_total{device!=\"lo\"}[5m]) / node_network_speed_bytes)", + "expr": "- max by (instance) (rate(node_network_receive_bytes_total{device!~\"lo|monitoringvpn\"}[5m]) / node_network_speed_bytes)", "interval": "", "intervalFactor": 4, "legendFormat": "{{instance}} in", @@ -602,28 +602,28 @@ "steppedLine": false, "targets": [ { - "expr": "rate(node_network_reveive_packets_total{device!=\"lo\"}[5m])", + "expr": "rate(node_network_receive_packets_total{device!~\"lo|monitoringvpn\"}[5m])", "interval": "", "intervalFactor": 4, "legendFormat": "{{instance}} {{device}}", "refId": "A" }, { - "expr": "rate(node_network_reveive_errs_total{device!=\"lo\"}[5m])", + "expr": "rate(node_network_receive_errs_total{device!~\"lo|monitoringvpn\"}[5m])", "interval": "", "intervalFactor": 4, "legendFormat": "{{instance}} {{device}}", "refId": "B" }, { - "expr": "- rate(node_network_transmit_packets_total{device!=\"lo\"}[5m])", + "expr": "- rate(node_network_transmit_packets_total{device!~\"lo|monitoringvpn\"}[5m])", "interval": "", "intervalFactor": 4, "legendFormat": "{{instance}} {{device}}", "refId": "C" }, { - "expr": "- rate(node_network_transmit_errs_total{device!=\"lo\"}[5m])", + "expr": "- rate(node_network_transmit_errs_total{device!~\"lo|monitoringvpn\"}[5m])", "interval": "", "intervalFactor": 4, "legendFormat": "{{instance}} {{device}}", @@ -1291,4 +1291,4 @@ "title": "Resources overview", "uid": "ResSatUse", "version": 1 -} \ No newline at end of file +} -- GitLab