From f7dda51688fbee6f96273b992fe7f4d86088231a Mon Sep 17 00:00:00 2001
From: Florian Sesser <florian@private.storage>
Date: Fri, 15 Oct 2021 14:33:49 +0000
Subject: [PATCH] Monitoring: Add Prometheus Blackbox exporter

The Blackbox exporter can be used to check whether some services do answer;
We'll use it for our HTTPS endpoints.
Especially handy is it's checking for TLS cert expiry.
---
 morph/grid/local/grid.nix                     |  9 ++++-
 morph/lib/customize-monitoring.nix            |  5 +++
 morph/lib/monitoring.nix                      |  1 +
 .../modules/monitoring/exporters/blackbox.nix | 40 +++++++++++++++++++
 .../modules/monitoring/server/prometheus.nix  | 31 ++++++++++++++
 5 files changed, 85 insertions(+), 1 deletion(-)
 create mode 100644 nixos/modules/monitoring/exporters/blackbox.nix

diff --git a/morph/grid/local/grid.nix b/morph/grid/local/grid.nix
index f9778760..5502b8fa 100644
--- a/morph/grid/local/grid.nix
+++ b/morph/grid/local/grid.nix
@@ -112,7 +112,10 @@ let
     imports = [
       gridlib.monitoring
       (gridlib.customize-monitoring {
-        inherit hostsMap vpnClientIPs nodeExporterTargets paymentExporterTargets;
+        inherit hostsMap vpnClientIPs
+                nodeExporterTargets
+                paymentExporterTargets
+                blackboxExporterHttpsTargets;
         inherit (grid-config) letsEncryptAdminEmail;
         googleOAuthClientID = grid-config.monitoringGoogleOAuthClientID;
         enableSlackAlert = false;
@@ -136,6 +139,10 @@ let
   vpnClientIPs = [ "172.23.23.11" "172.23.23.12" "172.23.23.13" ];
   nodeExporterTargets = [ "monitoring" "payments" "storage1" "storage2" ];
   paymentExporterTargets = [ "payments" ];
+  blackboxExporterHttpsTargets = [
+    # "https://private.storage/"
+    # "https://payments.private.storage/"
+  ];
 
 in {
   network = {
diff --git a/morph/lib/customize-monitoring.nix b/morph/lib/customize-monitoring.nix
index d9842692..ef89119f 100644
--- a/morph/lib/customize-monitoring.nix
+++ b/morph/lib/customize-monitoring.nix
@@ -28,6 +28,10 @@
   # which nodes to scrape PaymentServer metrics from.
 , paymentExporterTargets ? []
 
+  # A list of HTTPS servers (URLs, IP addresses or hostnames) as strings indicating
+  # which nodes the BlackboxExporter should scrape HTTP and TLS metrics from.
+, blackboxExporterHttpsTargets ? []
+
   # A string containing the GSuite OAuth2 ClientID to use to authenticate
   # logins to Grafana.
 , googleOAuthClientID
@@ -108,6 +112,7 @@ in {
     inherit nodeExporterTargets;
     inherit nginxExporterTargets;
     inherit paymentExporterTargets;
+    inherit blackboxExporterHttpsTargets;
   };
 
   services.private-storage.monitoring.grafana = {
diff --git a/morph/lib/monitoring.nix b/morph/lib/monitoring.nix
index bf92d104..89a328e8 100644
--- a/morph/lib/monitoring.nix
+++ b/morph/lib/monitoring.nix
@@ -25,6 +25,7 @@
     ../../nixos/modules/monitoring/server/grafana.nix
     ../../nixos/modules/monitoring/server/prometheus.nix
     ../../nixos/modules/monitoring/exporters/node.nix
+    ../../nixos/modules/monitoring/exporters/blackbox.nix
     # Loki 0.3.0 from Nixpkgs 19.09 is too old and does not work:
     # ../../nixos/modules/monitoring/server/loki.nix
   ];
diff --git a/nixos/modules/monitoring/exporters/blackbox.nix b/nixos/modules/monitoring/exporters/blackbox.nix
new file mode 100644
index 00000000..eef377c6
--- /dev/null
+++ b/nixos/modules/monitoring/exporters/blackbox.nix
@@ -0,0 +1,40 @@
+# Prometheus blackbox exporter config
+#
+# Scope: From the monitoring machine, ping (etc.) hosts to check wether
+#        they are reachable, certs still are valid for a while, etc.
+#
+# Notes: Blackbox exporter is using the "Multi Target Exporter" pattern,
+#        see https://prometheus.io/docs/guides/multi-target-exporter/ .
+#
+# Usage: Import this on a monitoring server
+
+{ config, lib, pkgs, ... }:
+
+let
+
+in {
+  # The default limit of 1024 often is too small, see for example
+  # https://github.com/cloudalchemy/ansible-blackbox-exporter/issues/63
+  config.systemd.services.prometheus-blackbox-exporter.serviceConfig.LimitNOFILE = 65000;
+
+  config.services.prometheus.exporters.blackbox = {
+    enable = true;
+
+    configFile = pkgs.writeText "blackbox-exporter.yaml" (builtins.toJSON {
+      modules = {
+        https_2xx = {
+          prober = "http";
+          timeout = "5s";
+          http = {
+            fail_if_not_ssl = true;
+            # This prober is for IPv4 only.
+            preferred_ip_protocol = "ip4";
+            ip_protocol_fallback = false;
+          };
+        };
+      };
+    });
+
+  };
+}
+
diff --git a/nixos/modules/monitoring/server/prometheus.nix b/nixos/modules/monitoring/server/prometheus.nix
index 1f27f023..316cea89 100644
--- a/nixos/modules/monitoring/server/prometheus.nix
+++ b/nixos/modules/monitoring/server/prometheus.nix
@@ -31,6 +31,11 @@ in {
       example = lib.literalExample "[ node1 node2 ]";
       description = "List of nodes (hostnames or IPs) to scrape.";
     };
+    blackboxExporterHttpsTargets = lib.mkOption {
+      type = with lib.types; listOf str;
+      example = lib.literalExample "[ 'https://node1.com/' 'https://node2.org/' ]";
+      description = "List of https URLs to scrape.";
+    };
   };
 
   config = rec {
@@ -65,6 +70,32 @@ in {
           }];
           relabel_configs = [ dropPortNumber ];
         }
+        {
+          # The Blackbox exporter is using Prometheus' "Multi-Target Exporter Pattern",
+          # see https://prometheus.io/docs/guides/multi-target-exporter/
+          job_name = "blackboxExporterHttps";
+          static_configs = [{
+            targets = cfg.blackboxExporterHttpsTargets;
+          }];
+          metrics_path = "/probe";
+          params.module = [ "https_2xx" ];
+          relabel_configs = [
+            {
+              source_labels = [ "__address__" ];
+              target_label = "__param_target";
+            }
+            {
+              source_labels = [ "__param_target" ];
+              target_label = "instance";
+            }
+            {
+              source_labels = [];
+              target_label = "__address__";
+              # The blackbox exporter’s real hostname:port
+              replacement = "monitoring:9115";
+            }
+          ];
+        }
       ];
     };
   };
-- 
GitLab