diff --git a/morph/grid/local/grid.nix b/morph/grid/local/grid.nix index 55b7587c5bbfb6169b8cc1fda506a8d6c07d4dcd..bca902f20440e60e7d71162f273dd65d67317545 100644 --- a/morph/grid/local/grid.nix +++ b/morph/grid/local/grid.nix @@ -49,7 +49,7 @@ let gridlib.monitoring (gridlib.hardware-virtual ({ publicIPv4 = "192.168.67.24"; })) (gridlib.customize-monitoring { - inherit hostsMap vpnClientIPs nodeExporterTargets; + inherit hostsMap vpnClientIPs nodeExporterTargets paymentExporterTargets; inherit (config) domain publicKeyPath privateKeyPath; monitoringvpnIPv4 = "172.23.23.1"; stateVersion = "19.09"; @@ -66,6 +66,7 @@ let }; vpnClientIPs = [ "172.23.23.11" "172.23.23.12" "172.23.23.13" ]; nodeExporterTargets = [ "monitoring" "payments" "storage1" "storage2" ]; + paymentExporterTargets = [ "payments" ]; in { network = { diff --git a/morph/grid/production/grid.nix b/morph/grid/production/grid.nix index fb680338a08b0006e166b13066199d20f6836e44..1aa605615dc26c9394f4f7143f5f50975107cffe 100644 --- a/morph/grid/production/grid.nix +++ b/morph/grid/production/grid.nix @@ -28,7 +28,7 @@ let gridlib.monitoring gridlib.hardware-aws (gridlib.customize-monitoring { - inherit hostsMap vpnClientIPs nodeExporterTargets; + inherit hostsMap vpnClientIPs nodeExporterTargets paymentExporterTargets; inherit (config) domain publicKeyPath privateKeyPath; monitoringvpnIPv4 = "172.23.23.1"; stateVersion = "19.09"; @@ -103,6 +103,7 @@ let "storage004" "storage005" ]; + paymentExporterTargets = [ "payments" ]; in { network = { diff --git a/morph/grid/testing/grid.nix b/morph/grid/testing/grid.nix index 0cdfe5ae755c88baa128eddbafb14f1b19d6edbf..996b1fba0bd2c12c22b00f549aa26c8b8472653d 100644 --- a/morph/grid/testing/grid.nix +++ b/morph/grid/testing/grid.nix @@ -39,7 +39,7 @@ let gridlib.monitoring gridlib.hardware-aws (gridlib.customize-monitoring { - inherit hostsMap vpnClientIPs nodeExporterTargets; + inherit hostsMap vpnClientIPs nodeExporterTargets paymentExporterTargets; inherit (config) domain publicKeyPath privateKeyPath; monitoringvpnIPv4 = "172.23.23.1"; stateVersion = "19.09"; @@ -55,6 +55,7 @@ let }; vpnClientIPs = [ "172.23.23.11" "172.23.23.12" ]; nodeExporterTargets = [ "monitoring" "payments" "storage001" ]; + paymentExporterTargets = [ "payments" ]; in { network = { diff --git a/morph/lib/customize-monitoring.nix b/morph/lib/customize-monitoring.nix index f77d26bd817ebb556c1d22d01e290d2838ab9485..05fe45107e44c583c495ee55aeb9e351ba3871f1 100644 --- a/morph/lib/customize-monitoring.nix +++ b/morph/lib/customize-monitoring.nix @@ -26,6 +26,10 @@ # which nodes to scrape "nginxExporter" metrics from. , nginxExporterTargets ? [] + # A list of VPN clients (IP addresses or hostnames) as strings indicating + # which nodes to scrape PaymentServer metrics from. +, paymentExporterTargets ? [] + # A string giving the NixOS state version for the system. , stateVersion , ... @@ -52,6 +56,7 @@ services.private-storage.monitoring.prometheus = { inherit nodeExporterTargets; inherit nginxExporterTargets; + inherit paymentExporterTargets; }; system.stateVersion = stateVersion; diff --git a/nixos/modules/monitoring/server/grafana-config/services-overview.json b/nixos/modules/monitoring/server/grafana-config/services-overview.json new file mode 100644 index 0000000000000000000000000000000000000000..8ee7b130ce917183edfa26a7a53c1a8df1353303 --- /dev/null +++ b/nixos/modules/monitoring/server/grafana-config/services-overview.json @@ -0,0 +1,664 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "RED: Requests-Errors-Duration for our services", + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 2, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 18, + "panels": [], + "title": "Payments v1/stripe/charge", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "HTTPS responses per second", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 1 + }, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(http_responses_total{path=\"v1/stripe/charge\"}[5m])", + "instant": false, + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "v1/stripe/charge RPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 1 + }, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": true, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(http_responses_total{path=\"v1/stripe/charge\", status=\"4XX\"}) / sum(http_responses_total{path=\"v1/redeem\"})", + "legendFormat": "Client error (4XX) rate", + "refId": "A" + }, + { + "expr": "sum(http_responses_total{path=\"v1/stripe/charge\", status=\"5XX\"}) / sum(http_responses_total{path=\"v1/redeem\"})", + "legendFormat": "Server error (5XX) rate", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "v1/stripe/charge error rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Requests taking longer than 1 s, between 1 sec and 10 msec, and 10 msec and below", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 1 + }, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pluginVersion": "6.4.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"0.01\"}", + "format": "time_series", + "hide": false, + "instant": false, + "intervalFactor": 1, + "legendFormat": "=< 0.01s", + "refId": "A" + }, + { + "expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"1.0\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"0.01\"}", + "format": "time_series", + "hide": false, + "instant": false, + "intervalFactor": 1, + "legendFormat": "=< 1s", + "refId": "D" + }, + { + "expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"+Inf\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"1.0\"}", + "format": "time_series", + "hide": false, + "instant": false, + "intervalFactor": 1, + "legendFormat": "> 1s", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "v1/stripe/charge durations", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 11, + "panels": [], + "title": "Payments v1/redeem", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "HTTPS responses per second", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 9 + }, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(http_responses_total{path=\"v1/redeem\"}[5m])", + "instant": false, + "intervalFactor": 1, + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "v1/redeem RPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 9 + }, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": true, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(http_responses_total{path=\"v1/redeem\", status=\"4XX\"}) / sum(http_responses_total{path=\"v1/redeem\"})", + "legendFormat": "Client error (4XX) rate", + "refId": "A" + }, + { + "expr": "sum(http_responses_total{path=\"v1/redeem\", status=\"5XX\"}) / sum(http_responses_total{path=\"v1/redeem\"})", + "legendFormat": "Server error (5XX) rate", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "v1/redeem error rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Requests taking longer than 1 s, between 1 sec and 10 msec, and 10 msec and below", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 9 + }, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pluginVersion": "6.4.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "http_request_duration_seconds_bucket{path=\"v1/redeem\", le=\"0.01\"}", + "format": "time_series", + "hide": false, + "instant": false, + "intervalFactor": 1, + "legendFormat": "=< 0.01s", + "refId": "A" + }, + { + "expr": "http_request_duration_seconds_bucket{path=\"v1/redeem\", le=\"1.0\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/redeem\", le=\"0.01\"}", + "format": "time_series", + "hide": false, + "instant": false, + "intervalFactor": 1, + "legendFormat": "=< 1s", + "refId": "D" + }, + { + "expr": "http_request_duration_seconds_bucket{path=\"v1/redeem\", le=\"+Inf\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/redeem\", le=\"1.0\"}", + "format": "time_series", + "hide": false, + "instant": false, + "intervalFactor": 1, + "legendFormat": "> 1s", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "v1/redeem durations", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "", + "schemaVersion": 20, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Services overview", + "uid": "JX3RVEk7k", + "version": 6 +} diff --git a/nixos/modules/monitoring/server/prometheus.nix b/nixos/modules/monitoring/server/prometheus.nix index 36c2ba6402559771dff8771b1369842e21f7ff7f..c92261ccad2ebdd8dd34dda9027e66962b345be9 100644 --- a/nixos/modules/monitoring/server/prometheus.nix +++ b/nixos/modules/monitoring/server/prometheus.nix @@ -26,6 +26,11 @@ in { example = lib.literalExample "[ node1 node2 ]"; description = "List of nodes (hostnames or IPs) to scrape."; }; + paymentExporterTargets = lib.mkOption { + type = with lib.types; listOf str; + example = lib.literalExample "[ node1 node2 ]"; + description = "List of nodes (hostnames or IPs) to scrape."; + }; }; config = rec { @@ -49,6 +54,15 @@ in { }]; relabel_configs = [ dropPortNumber ]; } + { + job_name = "payment-exporters"; + scheme = "https"; + tls_config.insecure_skip_verify = true; + static_configs = [{ + targets = cfg.paymentExporterTargets; + }]; + relabel_configs = [ dropPortNumber ]; + } ]; }; }; diff --git a/nixpkgs.json b/nixpkgs.json index 6b98d3d39cffa2eb1a6dfa7fb5f8c3bea50dfb60..e8a900d44fc2a08a09c4e1f6447da3a4a6a52fd1 100644 --- a/nixpkgs.json +++ b/nixpkgs.json @@ -1,4 +1,4 @@ { "name": "nixpkgs" -, "url": "https://github.com/PrivateStorageio/nixpkgs/archive/7e71ee63a67bd3e2c190abd982b541603f4f86b0.tar.gz" -, "sha256": "1yy89lc0p7hx7x4r2y5ll851mfn4a2lacj5c9v5w139zz17ky743" +, "url": "https://github.com/PrivateStorageio/nixpkgs/archive/788cc5806d46b89013ddd59db589b748bc20435e.tar.gz" +, "sha256": "1mjznn4i4524gl5aiapjpy2jzpac1fzp7jvnkamrh9090ndalhar" }