diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 50a346987f40a460f38037b3cbc0a8f3a6e568b2..7e7348ffeeca9e8e39a16adabd7ce9b3eed0418f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -67,8 +67,9 @@ unit-tests: stage: "build" script: - | + set -x # GRID is set in one of the "instantiations" of this job template. - nix-shell --run "morph build --show-trace morph/grid/${GRID}/grid.nix" + nix-shell --pure --run "morph build --show-trace morph/grid/${GRID}/grid.nix" morph-build-localdev: @@ -111,7 +112,7 @@ system-tests: timeout: "3 hours" stage: "build" script: - - "nix-build --attr system-tests" + - "nix-shell --pure --run 'nix-build --attr system-tests'" # A template for a job that can update one of the grids. .update-grid: &UPDATE_GRID diff --git a/DEPLOYMENT-NOTES.rst b/DEPLOYMENT-NOTES.rst index 4337b43b8cf365ba37c50ced218cb49e42708a27..79a98dc5676e397287909470382eb5722ee647de 100644 --- a/DEPLOYMENT-NOTES.rst +++ b/DEPLOYMENT-NOTES.rst @@ -1,6 +1,20 @@ Deployment notes ================ +- 2023-06-19 + + ZKAPAuthorizer's Tahoe-LAFS plugin name changed from "privatestorageio-zkapauthz-v1" to "privatestorageio-zkapauthz-v2". + This causes Tahoe-LAFS to use a different filename to persist the plugin's Foolscap fURL. + To preserve the original fURL value (required) each storage node needs this command run before the deployment:: + + cp /var/db/tahoe-lafs/storage/private/storage-plugin.privatestorageio-zkapauthz-v{1,2}.furl + +- 2023-04-19 + + The team switched from Slack to Zulip. + For the monitoring notifications to reach Zulip, a webhook bot has to be created in Zulip and a secret URL has to be constructed as described in `https://zulip.com/integrations/doc/grafana`_ and added to the ``private_keys`` directory (See ``grid/local/private-keys/grafana-zulip-url`` for an example). + Find the secret URL for production at `https://my.1password.com/vaults/7flqasy5hhhmlbtp5qozd3j4ga/allitems/rb22ipb6gvokohzq2d2hhv6t6u`_. + - 2021-12-20 `https://whetstone.private.storage/privatestorage/privatestorageops/-/issues/399`_ requires moving the PaymentServer database on the ``payments`` host onto a new dedicated filesystem. diff --git a/README.rst b/README.rst index 46511c81f24136615f0513674f8091aad1201262..462bcb988d53f04357ed64d7d6f70f2e52ff4098 100644 --- a/README.rst +++ b/README.rst @@ -1,8 +1,3 @@ -Project Hosting Moved -===================== - -This project can now be found at https://whetstone.private.storage/privatestorage/PrivateStorageio - PrivateStorageio ================ @@ -20,5 +15,6 @@ The documentation can be built using this command:: $ nix-build docs.nix -The documentation is also built on and published by CI. +The documentation is also built on and published by CI: +Navigate to the `list of finished jobs <https://whetstone.private.storage/privatestorage/PrivateStorageio/-/jobs>`_ and download the artefact of the latest ``docs`` build. diff --git a/admin/create-payment-link.sh b/admin/create-payment-link.sh new file mode 100644 index 0000000000000000000000000000000000000000..d63ed9cb45c6c448dea497231654609f5f2c18e4 --- /dev/null +++ b/admin/create-payment-link.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash + +set -euo pipefail + +KEY=$1 +shift + +DOMAIN=$1 +shift + +PRODUCT_ID=$( + curl https://api.stripe.com/v1/products \ + -u "${KEY}:" \ + -d "name=30 GB-months" \ + -d "description=30 GB-months of Private.Storage storage × time" \ + -d "statement_descriptor=PRIVATE STORAGE" \ + -d "url=https://${DOMAIN}/" | + jp --unquoted id + ) + +echo "Product: $PRODUCT_ID" + +PRICE_ID=$( + curl https://api.stripe.com/v1/prices \ + -u "${KEY}:" \ + -d "currency=USD" \ + -d "unit_amount=650" \ + -d "tax_behavior=exclusive" \ + -d "product=${PRODUCT_ID}" | + jp --unquoted id + ) + +echo "Price: $PRICE_ID" + +LINK_URL=$( + curl https://api.stripe.com/v1/payment_links \ + -u "${KEY}:" \ + -d "line_items[0][price]=${PRICE_ID}" \ + -d "line_items[0][quantity]=1" \ + -d "after_completion[type]"=redirect \ + -d "after_completion[redirect][url]"="https://${DOMAIN}/payment/success" | + jp --unquoted url + ) + +echo "Payment link: $LINK_URL" diff --git a/admin/create-webhook.sh b/admin/create-webhook.sh new file mode 100644 index 0000000000000000000000000000000000000000..eacc3308e4c1e29a02372a69868a79daf435657a --- /dev/null +++ b/admin/create-webhook.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +set -euo pipefail + +KEY=$1 +shift + +DOMAIN=$1 +shift + +curl \ + https://api.stripe.com/v1/webhook_endpoints \ + -u "${KEY}:" \ + -d url="https://payments.${DOMAIN}/v1/stripe/webhook" \ + -d "enabled_events[]"="checkout.session.completed" diff --git a/docs/conf.py b/docs/conf.py index 747a90a8cc039e65fd01c3d598170c001599c1c8..f3347073a977bdc170c3e1639c5610bf96b790b0 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -61,7 +61,7 @@ master_doc = 'index' # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = 'en' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. diff --git a/docs/dev/README.rst b/docs/dev/README.rst index 0c688021dfa0aef8d4e10a6c6501dd5a6a5b6d23..c518c69bfc30025dbe297d3e62b12c5fe8ff9f64 100644 --- a/docs/dev/README.rst +++ b/docs/dev/README.rst @@ -24,7 +24,6 @@ The system tests are run using this command:: $ nix-build --attr system-tests -The system tests boot QEMU VMs which prevents them from running on CI at this time. The build requires > 10 GB of disk space, and the VMs might be timing out on slow or busy machines. If you run into timeouts, diff --git a/docs/ops/README.rst b/docs/ops/README.rst index 8026e1673b0dbed3708b1c4b0e7b600c049fabde..109b29d30eb60564c3305c6b97b0d19468dbd3a8 100644 --- a/docs/ops/README.rst +++ b/docs/ops/README.rst @@ -10,3 +10,4 @@ This contains documentation regarding running PrivateStorageio. monitoring generating-keys backup-recovery + stripe diff --git a/docs/ops/monitoring-architecture.drawio b/docs/ops/monitoring-architecture.drawio new file mode 100644 index 0000000000000000000000000000000000000000..a9788eeeffdf745a842d28e2cf41b63eb164252b --- /dev/null +++ b/docs/ops/monitoring-architecture.drawio @@ -0,0 +1,127 @@ +<mxfile host="app.diagrams.net" modified="2023-04-20T20:17:44.466Z" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36" etag="8PyLTVr0G94q4Dna4Dsz" version="21.2.1" type="device"> + <diagram name="Page-1" id="aaaa8250-4180-3840-79b5-4cada1eebb92"> + <mxGraphModel dx="794" dy="476" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="1920" pageHeight="1200" background="#ffffff" math="0" shadow="0"> + <root> + <mxCell id="0" /> + <mxCell id="1" parent="0" /> + <mxCell id="vhdg0YFc32S7_3H95Ew1-1" value="<span>Management VPN<br>(Wireshark, TINC...)</span>" style="ellipse;shape=cloud;whiteSpace=wrap;html=1;" parent="1" vertex="1"> + <mxGeometry x="780" y="720" width="450" height="110" as="geometry" /> + </mxCell> + <mxCell id="2mYkRctJDop23S32jJdh-2" value="" style="rounded=0;whiteSpace=wrap;html=1;" parent="1" vertex="1"> + <mxGeometry x="840" y="405.46" width="370" height="304.54" as="geometry" /> + </mxCell> + <mxCell id="TrmSFti5pUnXIGkjMKb6-3" value="Loki" style="verticalLabelPosition=bottom;html=1;verticalAlign=top;strokeWidth=1;align=center;outlineConnect=0;dashed=0;outlineConnect=0;shape=mxgraph.aws3d.application2;fillColor=#86E83A;strokeColor=#B0F373;aspect=fixed;" parent="1" vertex="1"> + <mxGeometry x="1116" y="592.9000000000001" width="62" height="53" as="geometry" /> + </mxCell> + <mxCell id="TrmSFti5pUnXIGkjMKb6-4" value="Prometheus" style="verticalLabelPosition=bottom;html=1;verticalAlign=top;strokeWidth=1;align=center;outlineConnect=0;dashed=0;outlineConnect=0;shape=mxgraph.aws3d.application;fillColor=#4286c5;strokeColor=#57A2D8;aspect=fixed;" parent="1" vertex="1"> + <mxGeometry x="866" y="585" width="62" height="68.8" as="geometry" /> + </mxCell> + <mxCell id="TrmSFti5pUnXIGkjMKb6-5" value="Grafana" style="verticalLabelPosition=bottom;html=1;verticalAlign=top;strokeWidth=1;align=center;outlineConnect=0;dashed=0;outlineConnect=0;shape=mxgraph.aws3d.ami2;aspect=fixed;fillColor=#FF9900;strokeColor=#ffffff;" parent="1" vertex="1"> + <mxGeometry x="996" y="425" width="74" height="50" as="geometry" /> + </mxCell> + <mxCell id="TrmSFti5pUnXIGkjMKb6-6" value="Node 1" style="verticalLabelPosition=bottom;html=1;verticalAlign=top;strokeWidth=1;align=center;outlineConnect=0;dashed=0;outlineConnect=0;shape=mxgraph.aws3d.worker;fillColor=#ECECEC;strokeColor=#5E5E5E;aspect=fixed;" parent="1" vertex="1"> + <mxGeometry x="779" y="845" width="74" height="50" as="geometry" /> + </mxCell> + <mxCell id="TrmSFti5pUnXIGkjMKb6-7" value="Operator" style="verticalLabelPosition=bottom;html=1;verticalAlign=top;strokeWidth=1;align=center;outlineConnect=0;dashed=0;outlineConnect=0;shape=mxgraph.aws3d.end_user;strokeColor=#9673a6;fillColor=#e1d5e7;aspect=fixed;" parent="1" vertex="1"> + <mxGeometry x="1276" y="305" width="49" height="100.46" as="geometry" /> + </mxCell> + <mxCell id="TrmSFti5pUnXIGkjMKb6-16" value="Node ..." style="verticalLabelPosition=bottom;html=1;verticalAlign=top;strokeWidth=1;align=center;outlineConnect=0;dashed=0;outlineConnect=0;shape=mxgraph.aws3d.worker;fillColor=#ECECEC;strokeColor=#5E5E5E;aspect=fixed;" parent="1" vertex="1"> + <mxGeometry x="902" y="845" width="74" height="50" as="geometry" /> + </mxCell> + <mxCell id="TrmSFti5pUnXIGkjMKb6-17" value="Node ..." style="verticalLabelPosition=bottom;html=1;verticalAlign=top;strokeWidth=1;align=center;outlineConnect=0;dashed=0;outlineConnect=0;shape=mxgraph.aws3d.worker;fillColor=#ECECEC;strokeColor=#5E5E5E;aspect=fixed;" parent="1" vertex="1"> + <mxGeometry x="1025" y="845" width="74" height="50" as="geometry" /> + </mxCell> + <mxCell id="TrmSFti5pUnXIGkjMKb6-18" value="Node N" style="verticalLabelPosition=bottom;html=1;verticalAlign=top;strokeWidth=1;align=center;outlineConnect=0;dashed=0;outlineConnect=0;shape=mxgraph.aws3d.worker;fillColor=#ECECEC;strokeColor=#5E5E5E;aspect=fixed;" parent="1" vertex="1"> + <mxGeometry x="1147.5" y="845" width="74" height="50" as="geometry" /> + </mxCell> + <mxCell id="TrmSFti5pUnXIGkjMKb6-45" value="" style="endArrow=classic;html=1;strokeColor=#6c8ebf;strokeWidth=1;fillColor=#dae8fc;" parent="1" edge="1"> + <mxGeometry x="806" y="695" width="50" height="50" as="geometry"> + <mxPoint x="894.6666666666666" y="695" as="sourcePoint" /> + <mxPoint x="936" y="835" as="targetPoint" /> + </mxGeometry> + </mxCell> + <mxCell id="TrmSFti5pUnXIGkjMKb6-46" value="" style="endArrow=classic;html=1;strokeColor=#6c8ebf;strokeWidth=1;fillColor=#dae8fc;" parent="1" edge="1"> + <mxGeometry x="806" y="695" width="50" height="50" as="geometry"> + <mxPoint x="894.6666666666666" y="695" as="sourcePoint" /> + <mxPoint x="1046" y="835" as="targetPoint" /> + </mxGeometry> + </mxCell> + <mxCell id="TrmSFti5pUnXIGkjMKb6-47" value="" style="endArrow=classic;html=1;strokeColor=#6c8ebf;strokeWidth=1;fillColor=#dae8fc;" parent="1" edge="1"> + <mxGeometry x="806" y="695" width="50" height="50" as="geometry"> + <mxPoint x="894.6666666666666" y="695" as="sourcePoint" /> + <mxPoint x="1166" y="835" as="targetPoint" /> + </mxGeometry> + </mxCell> + <mxCell id="TrmSFti5pUnXIGkjMKb6-50" value="" style="endArrow=classic;html=1;strokeColor=#82b366;strokeWidth=1;fillColor=#d5e8d4;" parent="1" edge="1"> + <mxGeometry x="818.6666666666667" y="695" width="63.33333333333333" height="75" as="geometry"> + <mxPoint x="846" y="835" as="sourcePoint" /> + <mxPoint x="1148" y="695" as="targetPoint" /> + </mxGeometry> + </mxCell> + <mxCell id="TrmSFti5pUnXIGkjMKb6-51" value="" style="endArrow=classic;html=1;strokeColor=#82b366;strokeWidth=1;fillColor=#d5e8d4;" parent="1" edge="1"> + <mxGeometry x="818.6666666666667" y="695" width="63.33333333333333" height="75" as="geometry"> + <mxPoint x="946" y="835" as="sourcePoint" /> + <mxPoint x="1148" y="695" as="targetPoint" /> + </mxGeometry> + </mxCell> + <mxCell id="TrmSFti5pUnXIGkjMKb6-52" value="" style="endArrow=classic;html=1;strokeColor=#82b366;strokeWidth=1;fillColor=#d5e8d4;" parent="1" edge="1"> + <mxGeometry x="818.6666666666667" y="695" width="63.33333333333333" height="75" as="geometry"> + <mxPoint x="1056" y="835" as="sourcePoint" /> + <mxPoint x="1148" y="695" as="targetPoint" /> + </mxGeometry> + </mxCell> + <mxCell id="TrmSFti5pUnXIGkjMKb6-53" value="" style="endArrow=classic;html=1;strokeColor=#82b366;strokeWidth=1;fillColor=#d5e8d4;" parent="1" edge="1"> + <mxGeometry x="818.6666666666667" y="695" width="63.33333333333333" height="75" as="geometry"> + <mxPoint x="1186" y="835" as="sourcePoint" /> + <mxPoint x="1148" y="695" as="targetPoint" /> + </mxGeometry> + </mxCell> + <mxCell id="TrmSFti5pUnXIGkjMKb6-57" value="" style="endArrow=classic;html=1;strokeColor=#d79b00;strokeWidth=1;fillColor=#ffe6cc;" parent="1" edge="1"> + <mxGeometry width="50" height="50" relative="1" as="geometry"> + <mxPoint x="988" y="495" as="sourcePoint" /> + <mxPoint x="928" y="565" as="targetPoint" /> + </mxGeometry> + </mxCell> + <mxCell id="TrmSFti5pUnXIGkjMKb6-58" value="" style="endArrow=classic;html=1;strokeColor=#d79b00;strokeWidth=1;fillColor=#ffe6cc;" parent="1" edge="1"> + <mxGeometry width="50" height="50" relative="1" as="geometry"> + <mxPoint x="1078" y="495" as="sourcePoint" /> + <mxPoint x="1136" y="565" as="targetPoint" /> + </mxGeometry> + </mxCell> + <mxCell id="TrmSFti5pUnXIGkjMKb6-61" value="View dashboards<br>in browser" style="shape=flexArrow;endArrow=classic;html=1;strokeColor=#9673a6;strokeWidth=1;fillColor=#e1d5e7;spacing=8;" parent="1" edge="1"> + <mxGeometry width="50" height="50" relative="1" as="geometry"> + <mxPoint x="1259.5" y="415" as="sourcePoint" /> + <mxPoint x="1109.5" y="445" as="targetPoint" /> + </mxGeometry> + </mxCell> + <mxCell id="TrmSFti5pUnXIGkjMKb6-63" value="<h1>Monitoring architecture&nbsp;</h1><p>Keep it simple, sunshine!<br><br></p><p></p><i>Grafana</i> retrieves metrics from <i>Prometheus</i> and logs from&nbsp;<i>Loki</i>,&nbsp;<span>shows dashboards (web) and does alerting (via eMail? Slack?)</span><br><p><i>Prometheus</i> stores metrics it pulls from various <i>Exporters</i> on nodes</p><p><i>Promtail</i> on nodes pushes logs to <i>Loki<br><br></i></p><p>We try to keep the system as simple as possible: All monitoring and alerting runs on a single machine.</p><h2>Changes</h2><div>v2: Add Github authentication to Grafana. Add management VPN.<br><br>v1: Initial version</div>" style="text;html=1;strokeColor=none;fillColor=none;spacing=5;spacingTop=-20;whiteSpace=wrap;overflow=hidden;rounded=0;shadow=0;comic=0;sketch=0;" parent="1" vertex="1"> + <mxGeometry x="596" y="315" width="164" height="545" as="geometry" /> + </mxCell> + <mxCell id="TrmSFti5pUnXIGkjMKb6-65" value="Send alerts" style="shape=flexArrow;endArrow=classic;html=1;strokeColor=#9673a6;strokeWidth=1;fillColor=#e1d5e7;" parent="1" edge="1"> + <mxGeometry width="50" height="50" relative="1" as="geometry"> + <mxPoint x="1086" y="405.46000000000004" as="sourcePoint" /> + <mxPoint x="1236" y="375.46000000000004" as="targetPoint" /> + </mxGeometry> + </mxCell> + <mxCell id="2mYkRctJDop23S32jJdh-3" value="Monitoring server" style="text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;whiteSpace=wrap;rounded=0;" parent="1" vertex="1"> + <mxGeometry x="845" y="411" width="120" height="20" as="geometry" /> + </mxCell> + <mxCell id="TrmSFti5pUnXIGkjMKb6-44" value="" style="endArrow=classic;html=1;strokeColor=#6c8ebf;strokeWidth=1;fillColor=#dae8fc;" parent="1" edge="1"> + <mxGeometry x="806" y="695" width="50" height="50" as="geometry"> + <mxPoint x="894.6666666666666" y="695" as="sourcePoint" /> + <mxPoint x="826" y="835" as="targetPoint" /> + </mxGeometry> + </mxCell> + <mxCell id="vhdg0YFc32S7_3H95Ew1-7" value="GitHub<br>OAuth2" style="ellipse;shape=cloud;whiteSpace=wrap;html=1;" parent="1" vertex="1"> + <mxGeometry x="984" y="305" width="98" height="60" as="geometry" /> + </mxCell> + <mxCell id="vhdg0YFc32S7_3H95Ew1-8" value="" style="endArrow=classic;html=1;strokeColor=#d79b00;strokeWidth=1;fillColor=#ffe6cc;" parent="1" edge="1"> + <mxGeometry width="50" height="50" relative="1" as="geometry"> + <mxPoint x="1032.76" y="417" as="sourcePoint" /> + <mxPoint x="1032.76" y="372" as="targetPoint" /> + </mxGeometry> + </mxCell> + </root> + </mxGraphModel> + </diagram> +</mxfile> diff --git a/docs/ops/monitoring-architecture.html b/docs/ops/monitoring-architecture.html new file mode 100644 index 0000000000000000000000000000000000000000..91a6000fb4e8f6791fb26e8a83bc393024e52ca8 --- /dev/null +++ b/docs/ops/monitoring-architecture.html @@ -0,0 +1,12 @@ +<!--[if IE]><meta http-equiv="X-UA-Compatible" content="IE=5,IE=9" ><![endif]--> +<!DOCTYPE html> +<html> +<head> +<title>monitoring-architecture.html</title> +<meta charset="utf-8"/> +</head> +<body> +<div class="mxgraph" style="max-width:100%;border:1px solid transparent;" data-mxgraph="{"highlight":"#0000ff","nav":true,"resize":true,"xml":"<mxfile host=\"app.diagrams.net\" modified=\"2023-04-20T20:19:05.428Z\" agent=\"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36\" etag=\"4ETb3iIJGh8a_GDWm07E\" version=\"21.2.1\" type=\"device\"><diagram name=\"Page-1\" id=\"aaaa8250-4180-3840-79b5-4cada1eebb92\"><mxGraphModel dx=\"794\" dy=\"476\" grid=\"1\" gridSize=\"10\" guides=\"1\" tooltips=\"1\" connect=\"1\" arrows=\"1\" fold=\"1\" page=\"1\" pageScale=\"1\" pageWidth=\"1920\" pageHeight=\"1200\" background=\"#ffffff\" math=\"0\" shadow=\"0\"><root><mxCell id=\"0\"/><mxCell id=\"1\" parent=\"0\"/><mxCell id=\"vhdg0YFc32S7_3H95Ew1-1\" value=\"&lt;span&gt;Management VPN&lt;br&gt;(WireGuard)&lt;/span&gt;\" style=\"ellipse;shape=cloud;whiteSpace=wrap;html=1;\" parent=\"1\" vertex=\"1\"><mxGeometry x=\"780\" y=\"720\" width=\"450\" height=\"110\" as=\"geometry\"/></mxCell><mxCell id=\"2mYkRctJDop23S32jJdh-2\" value=\"\" style=\"rounded=0;whiteSpace=wrap;html=1;\" parent=\"1\" vertex=\"1\"><mxGeometry x=\"840\" y=\"405.46\" width=\"370\" height=\"304.54\" as=\"geometry\"/></mxCell><mxCell id=\"TrmSFti5pUnXIGkjMKb6-3\" value=\"Loki\" style=\"verticalLabelPosition=bottom;html=1;verticalAlign=top;strokeWidth=1;align=center;outlineConnect=0;dashed=0;outlineConnect=0;shape=mxgraph.aws3d.application2;fillColor=#86E83A;strokeColor=#B0F373;aspect=fixed;\" parent=\"1\" vertex=\"1\"><mxGeometry x=\"1116\" y=\"592.9000000000001\" width=\"62\" height=\"53\" as=\"geometry\"/></mxCell><mxCell id=\"TrmSFti5pUnXIGkjMKb6-4\" value=\"Prometheus\" style=\"verticalLabelPosition=bottom;html=1;verticalAlign=top;strokeWidth=1;align=center;outlineConnect=0;dashed=0;outlineConnect=0;shape=mxgraph.aws3d.application;fillColor=#4286c5;strokeColor=#57A2D8;aspect=fixed;\" parent=\"1\" vertex=\"1\"><mxGeometry x=\"866\" y=\"585\" width=\"62\" height=\"68.8\" as=\"geometry\"/></mxCell><mxCell id=\"TrmSFti5pUnXIGkjMKb6-5\" value=\"Grafana\" style=\"verticalLabelPosition=bottom;html=1;verticalAlign=top;strokeWidth=1;align=center;outlineConnect=0;dashed=0;outlineConnect=0;shape=mxgraph.aws3d.ami2;aspect=fixed;fillColor=#FF9900;strokeColor=#ffffff;\" parent=\"1\" vertex=\"1\"><mxGeometry x=\"996\" y=\"425\" width=\"74\" height=\"50\" as=\"geometry\"/></mxCell><mxCell id=\"TrmSFti5pUnXIGkjMKb6-6\" value=\"Node 1\" style=\"verticalLabelPosition=bottom;html=1;verticalAlign=top;strokeWidth=1;align=center;outlineConnect=0;dashed=0;outlineConnect=0;shape=mxgraph.aws3d.worker;fillColor=#ECECEC;strokeColor=#5E5E5E;aspect=fixed;\" parent=\"1\" vertex=\"1\"><mxGeometry x=\"779\" y=\"845\" width=\"74\" height=\"50\" as=\"geometry\"/></mxCell><mxCell id=\"TrmSFti5pUnXIGkjMKb6-7\" value=\"Operator\" style=\"verticalLabelPosition=bottom;html=1;verticalAlign=top;strokeWidth=1;align=center;outlineConnect=0;dashed=0;outlineConnect=0;shape=mxgraph.aws3d.end_user;strokeColor=#9673a6;fillColor=#e1d5e7;aspect=fixed;\" parent=\"1\" vertex=\"1\"><mxGeometry x=\"1276\" y=\"305\" width=\"49\" height=\"100.46\" as=\"geometry\"/></mxCell><mxCell id=\"TrmSFti5pUnXIGkjMKb6-16\" value=\"Node ...\" style=\"verticalLabelPosition=bottom;html=1;verticalAlign=top;strokeWidth=1;align=center;outlineConnect=0;dashed=0;outlineConnect=0;shape=mxgraph.aws3d.worker;fillColor=#ECECEC;strokeColor=#5E5E5E;aspect=fixed;\" parent=\"1\" vertex=\"1\"><mxGeometry x=\"902\" y=\"845\" width=\"74\" height=\"50\" as=\"geometry\"/></mxCell><mxCell id=\"TrmSFti5pUnXIGkjMKb6-17\" value=\"Node ...\" style=\"verticalLabelPosition=bottom;html=1;verticalAlign=top;strokeWidth=1;align=center;outlineConnect=0;dashed=0;outlineConnect=0;shape=mxgraph.aws3d.worker;fillColor=#ECECEC;strokeColor=#5E5E5E;aspect=fixed;\" parent=\"1\" vertex=\"1\"><mxGeometry x=\"1025\" y=\"845\" width=\"74\" height=\"50\" as=\"geometry\"/></mxCell><mxCell id=\"TrmSFti5pUnXIGkjMKb6-18\" value=\"Node N\" style=\"verticalLabelPosition=bottom;html=1;verticalAlign=top;strokeWidth=1;align=center;outlineConnect=0;dashed=0;outlineConnect=0;shape=mxgraph.aws3d.worker;fillColor=#ECECEC;strokeColor=#5E5E5E;aspect=fixed;\" parent=\"1\" vertex=\"1\"><mxGeometry x=\"1147.5\" y=\"845\" width=\"74\" height=\"50\" as=\"geometry\"/></mxCell><mxCell id=\"TrmSFti5pUnXIGkjMKb6-45\" value=\"\" style=\"endArrow=classic;html=1;strokeColor=#6c8ebf;strokeWidth=1;fillColor=#dae8fc;\" parent=\"1\" edge=\"1\"><mxGeometry x=\"806\" y=\"695\" width=\"50\" height=\"50\" as=\"geometry\"><mxPoint x=\"894.6666666666666\" y=\"695\" as=\"sourcePoint\"/><mxPoint x=\"936\" y=\"835\" as=\"targetPoint\"/></mxGeometry></mxCell><mxCell id=\"TrmSFti5pUnXIGkjMKb6-46\" value=\"\" style=\"endArrow=classic;html=1;strokeColor=#6c8ebf;strokeWidth=1;fillColor=#dae8fc;\" parent=\"1\" edge=\"1\"><mxGeometry x=\"806\" y=\"695\" width=\"50\" height=\"50\" as=\"geometry\"><mxPoint x=\"894.6666666666666\" y=\"695\" as=\"sourcePoint\"/><mxPoint x=\"1046\" y=\"835\" as=\"targetPoint\"/></mxGeometry></mxCell><mxCell id=\"TrmSFti5pUnXIGkjMKb6-47\" value=\"\" style=\"endArrow=classic;html=1;strokeColor=#6c8ebf;strokeWidth=1;fillColor=#dae8fc;\" parent=\"1\" edge=\"1\"><mxGeometry x=\"806\" y=\"695\" width=\"50\" height=\"50\" as=\"geometry\"><mxPoint x=\"894.6666666666666\" y=\"695\" as=\"sourcePoint\"/><mxPoint x=\"1166\" y=\"835\" as=\"targetPoint\"/></mxGeometry></mxCell><mxCell id=\"TrmSFti5pUnXIGkjMKb6-50\" value=\"\" style=\"endArrow=classic;html=1;strokeColor=#82b366;strokeWidth=1;fillColor=#d5e8d4;\" parent=\"1\" edge=\"1\"><mxGeometry x=\"818.6666666666667\" y=\"695\" width=\"63.33333333333333\" height=\"75\" as=\"geometry\"><mxPoint x=\"846\" y=\"835\" as=\"sourcePoint\"/><mxPoint x=\"1148\" y=\"695\" as=\"targetPoint\"/></mxGeometry></mxCell><mxCell id=\"TrmSFti5pUnXIGkjMKb6-51\" value=\"\" style=\"endArrow=classic;html=1;strokeColor=#82b366;strokeWidth=1;fillColor=#d5e8d4;\" parent=\"1\" edge=\"1\"><mxGeometry x=\"818.6666666666667\" y=\"695\" width=\"63.33333333333333\" height=\"75\" as=\"geometry\"><mxPoint x=\"946\" y=\"835\" as=\"sourcePoint\"/><mxPoint x=\"1148\" y=\"695\" as=\"targetPoint\"/></mxGeometry></mxCell><mxCell id=\"TrmSFti5pUnXIGkjMKb6-52\" value=\"\" style=\"endArrow=classic;html=1;strokeColor=#82b366;strokeWidth=1;fillColor=#d5e8d4;\" parent=\"1\" edge=\"1\"><mxGeometry x=\"818.6666666666667\" y=\"695\" width=\"63.33333333333333\" height=\"75\" as=\"geometry\"><mxPoint x=\"1056\" y=\"835\" as=\"sourcePoint\"/><mxPoint x=\"1148\" y=\"695\" as=\"targetPoint\"/></mxGeometry></mxCell><mxCell id=\"TrmSFti5pUnXIGkjMKb6-53\" value=\"\" style=\"endArrow=classic;html=1;strokeColor=#82b366;strokeWidth=1;fillColor=#d5e8d4;\" parent=\"1\" edge=\"1\"><mxGeometry x=\"818.6666666666667\" y=\"695\" width=\"63.33333333333333\" height=\"75\" as=\"geometry\"><mxPoint x=\"1186\" y=\"835\" as=\"sourcePoint\"/><mxPoint x=\"1148\" y=\"695\" as=\"targetPoint\"/></mxGeometry></mxCell><mxCell id=\"TrmSFti5pUnXIGkjMKb6-57\" value=\"\" style=\"endArrow=classic;html=1;strokeColor=#d79b00;strokeWidth=1;fillColor=#ffe6cc;\" parent=\"1\" edge=\"1\"><mxGeometry width=\"50\" height=\"50\" relative=\"1\" as=\"geometry\"><mxPoint x=\"988\" y=\"495\" as=\"sourcePoint\"/><mxPoint x=\"928\" y=\"565\" as=\"targetPoint\"/></mxGeometry></mxCell><mxCell id=\"TrmSFti5pUnXIGkjMKb6-58\" value=\"\" style=\"endArrow=classic;html=1;strokeColor=#d79b00;strokeWidth=1;fillColor=#ffe6cc;\" parent=\"1\" edge=\"1\"><mxGeometry width=\"50\" height=\"50\" relative=\"1\" as=\"geometry\"><mxPoint x=\"1078\" y=\"495\" as=\"sourcePoint\"/><mxPoint x=\"1136\" y=\"565\" as=\"targetPoint\"/></mxGeometry></mxCell><mxCell id=\"TrmSFti5pUnXIGkjMKb6-61\" value=\"View dashboards&lt;br&gt;in browser\" style=\"shape=flexArrow;endArrow=classic;html=1;strokeColor=#9673a6;strokeWidth=1;fillColor=#e1d5e7;spacing=8;\" parent=\"1\" edge=\"1\"><mxGeometry width=\"50\" height=\"50\" relative=\"1\" as=\"geometry\"><mxPoint x=\"1259.5\" y=\"415\" as=\"sourcePoint\"/><mxPoint x=\"1109.5\" y=\"445\" as=\"targetPoint\"/></mxGeometry></mxCell><mxCell id=\"TrmSFti5pUnXIGkjMKb6-63\" value=\"&lt;h1&gt;Monitoring architecture&amp;nbsp;&lt;/h1&gt;&lt;p&gt;Keep it simple, sunshine!&lt;br&gt;&lt;br&gt;&lt;/p&gt;&lt;p&gt;&lt;/p&gt;&lt;i&gt;Grafana&lt;/i&gt; retrieves metrics from &lt;i&gt;Prometheus&lt;/i&gt; and logs from&amp;nbsp;&lt;i&gt;Loki&lt;/i&gt;,&amp;nbsp;&lt;span&gt;shows dashboards (in a web browser) and does alerting (via Zulip)&lt;/span&gt;&lt;br&gt;&lt;p&gt;&lt;i&gt;Prometheus&lt;/i&gt; stores metrics it pulls from various &lt;i&gt;Exporters&lt;/i&gt; on nodes&lt;/p&gt;&lt;p&gt;&lt;i&gt;Promtail&lt;/i&gt; on nodes pushes logs to &lt;i&gt;Loki&lt;br&gt;&lt;br&gt;&lt;/i&gt;&lt;/p&gt;&lt;p&gt;We try to keep the system as simple as possible: All monitoring and alerting runs on a single machine.&lt;/p&gt;&lt;h2&gt;Changes&lt;/h2&gt;&lt;div&gt;v3: Fix WireGuard/Wireshark braino, Update Auth (Google, not GitHub)&lt;/div&gt;&lt;div&gt;&lt;br&gt;&lt;/div&gt;&lt;div&gt;v2: Add Github authentication to Grafana. Add management VPN.&lt;br&gt;&lt;br&gt;v1: Initial version&lt;/div&gt;\" style=\"text;html=1;strokeColor=none;fillColor=none;spacing=5;spacingTop=-20;whiteSpace=wrap;overflow=hidden;rounded=0;shadow=0;comic=0;sketch=0;\" parent=\"1\" vertex=\"1\"><mxGeometry x=\"596\" y=\"315\" width=\"164\" height=\"605\" as=\"geometry\"/></mxCell><mxCell id=\"TrmSFti5pUnXIGkjMKb6-65\" value=\"Send alerts\" style=\"shape=flexArrow;endArrow=classic;html=1;strokeColor=#9673a6;strokeWidth=1;fillColor=#e1d5e7;\" parent=\"1\" edge=\"1\"><mxGeometry width=\"50\" height=\"50\" relative=\"1\" as=\"geometry\"><mxPoint x=\"1086\" y=\"405.46000000000004\" as=\"sourcePoint\"/><mxPoint x=\"1236\" y=\"375.46000000000004\" as=\"targetPoint\"/></mxGeometry></mxCell><UserObject label=\"Monitoring server\" link=\"https://monitoring.private.storage/\" id=\"2mYkRctJDop23S32jJdh-3\"><mxCell style=\"text;html=1;strokeColor=none;fillColor=none;align=left;verticalAlign=middle;whiteSpace=wrap;rounded=0;\" parent=\"1\" vertex=\"1\"><mxGeometry x=\"845\" y=\"411\" width=\"120\" height=\"20\" as=\"geometry\"/></mxCell></UserObject><mxCell id=\"TrmSFti5pUnXIGkjMKb6-44\" value=\"\" style=\"endArrow=classic;html=1;strokeColor=#6c8ebf;strokeWidth=1;fillColor=#dae8fc;\" parent=\"1\" edge=\"1\"><mxGeometry x=\"806\" y=\"695\" width=\"50\" height=\"50\" as=\"geometry\"><mxPoint x=\"894.6666666666666\" y=\"695\" as=\"sourcePoint\"/><mxPoint x=\"826\" y=\"835\" as=\"targetPoint\"/></mxGeometry></mxCell><mxCell id=\"vhdg0YFc32S7_3H95Ew1-7\" value=\"GSuite&lt;br&gt;OAuth2\" style=\"ellipse;shape=cloud;whiteSpace=wrap;html=1;\" parent=\"1\" vertex=\"1\"><mxGeometry x=\"984\" y=\"305\" width=\"98\" height=\"60\" as=\"geometry\"/></mxCell><mxCell id=\"vhdg0YFc32S7_3H95Ew1-8\" value=\"\" style=\"endArrow=classic;html=1;strokeColor=#d79b00;strokeWidth=1;fillColor=#ffe6cc;\" parent=\"1\" edge=\"1\"><mxGeometry width=\"50\" height=\"50\" relative=\"1\" as=\"geometry\"><mxPoint x=\"1032.76\" y=\"417\" as=\"sourcePoint\"/><mxPoint x=\"1032.76\" y=\"372\" as=\"targetPoint\"/></mxGeometry></mxCell></root></mxGraphModel></diagram></mxfile>","toolbar":"pages zoom layers lightbox","page":0}"></div> +<script type="text/javascript" src="https://app.diagrams.net/js/viewer-static.min.js"></script> +</body> +</html> diff --git a/docs/ops/monitoring.rst b/docs/ops/monitoring.rst index e30831ade4ef71a0abd101e383065f706f586b63..53bd914aed57762302f5f762e59bd65ef54ba8d1 100644 --- a/docs/ops/monitoring.rst +++ b/docs/ops/monitoring.rst @@ -17,6 +17,19 @@ Analyzing long-term trends How big is my database and how fast is it growing? How quickly is my daily-active user count growing? +Architecture +```````````` + +Below you find a diagram of the software and systems that comprise our monitoring and alerting infrastructure. +It has intentionally been kept simple, yet is already surprisingly complex (at least if you are new to the monitoring world). +The software stack is industry standard and chosen so it would be easy to find solutions to problems and people who can help out. + +Log in to `staging <https://monitoring.privatestorage-staging.com/>`_ and `production <https://monitoring.private.storage/>`_ Grafana via your GSuite Private.Storage session. + +.. raw:: html + :file: monitoring-architecture.html + + Introduction to our dashboards `````````````````````````````` diff --git a/docs/ops/stripe.rst b/docs/ops/stripe.rst new file mode 100644 index 0000000000000000000000000000000000000000..17a471590bcb2678635293489e17b5e189a3f848 --- /dev/null +++ b/docs/ops/stripe.rst @@ -0,0 +1,19 @@ +Stripe +====== + +We use Stripe for payment processing. +We have test-mode keys for use in staging and live-mode keys for use in production. + +There is "payment link" state in Stripe to facilitate the payment workflow. +This was created with ``admin/create-payment-link.sh`` +(once for test-mode and once for live-mode). + +The payment links can be found in PrivateStorageOps. +They are the values of the stage3 variables ``stripe_payment_link_staging`` and ``stripe_payment_link_production``. + +There is also "webhook" state in Stripe so that PaymentServer receives notification of payment. +This was created with ``admin/create-webhook.sh`` +(once for test-mode and once for live-mode). +The test-mode webhook is ``we_1LxwKnBHXBAMm9bPDJXJNcDN``. +The live-mode webhook is ``we_1LzioNA9OAm23rYOmAcp3V85``. +The webhook secrets can be found with the rest of the each grid's private keys in ``stripe.webhook-secret``. diff --git a/morph/grid/local/README.rst b/morph/grid/local/README.rst index 48f395cb82fc272481a61f0d1ab425ffbd20cd02..75bc685852a65673bbb1e572249d34eb5b482db4 100644 --- a/morph/grid/local/README.rst +++ b/morph/grid/local/README.rst @@ -8,14 +8,18 @@ Issues with networking that looked like guest misconfigurations vanished after c This requires `NixOS <https://nixos.org/>`_. Nix without the OS will not work. + Use the local development environment ````````````````````````````````````` -0. Add VirtualBox to your NixOs system configuration at ``/etc/nixos/configuration.nix``:: +0. Add to your NixOS system configuration at ``/etc/nixos/configuration.nix`` (and rebuild):: - virtualisation.virtualbox.host.enable = true; - # Save bytes and build time, optional but recommended: - virtualisation.virtualbox.host.headless = true; + # Enable libvirt - likely incompatible with virtualisation.virtualbox! + virtualisation.libvirtd.enable = true; + # Required for LibVirt + security.polkit.enable = true; + # Enable HW acceleration if (nested virtualisation is) available + #boot.kernelModules = [ "kvm-amd" "kvm-intel" ]; 1. Enter the morph local grid directory:: @@ -27,19 +31,27 @@ Use the local development environment 3. Build and start the VMs:: - VAGRANT_DEFAULT_PROVIDER=virtualbox vagrant up + vagrant up --provider=libvirt + + Optionally, to switch from QEMU to KVM virtualization, edit the virtual machine definition of all the machines and replace the "qemu" on the first line with "kvm":: + + sudo virsh list + sudo virsh edit <machine id> (once for every machine) + vagrant halt + vagrant up + 4. Then, add the Vagrant SSH configuration to your user's ``~/.ssh/config`` file:: install -d ~/.ssh ; vagrant ssh-config >> ~/.ssh/config - Latest Morph honors the ``SSH_CONFIG_FILE`` environment variable (`since 3f90aa88 (March 2020, v 1.5.0) <https://github.com/DBCDK/morph/commit/3f90aa885fac1c29fce9242452fa7c0c505744ef#diff-d155ad793bd62e6ea4c44ba985049ecb13a4f4f32f799791b2bce695a16c0101>`_), so in the future this should get a bit more convenient. + Latest Morph honors the ``SSH_CONFIG_FILE`` environment variable (`since 3f90aa88 (March 2020, v 1.5.0) <https://github.com/DBCDK/morph/commit/3f90aa885fac1c29fce9242452fa7c0c505744ef#diff-d155ad793bd62e6ea4c44ba985049ecb13a4f4f32f799791b2bce695a16c0101>`_), so in the future this should get a bit more convenient. -6. Create a ``public-keys/users.nix`` file with your SSH key (see ``public-keys/users.nix.example`` for the format) so you'll be able to log in after deploying the new configuration:: +5. Create a ``public-keys/users.nix`` file with your SSH key (see ``public-keys/users.nix.example`` for the format) so you'll be able to log in after deploying the new configuration:: $EDITOR public-keys/users.nix -7. Then, build and deploy our software to the Vagrant VMs:: +6. Then, build and deploy our software to the Vagrant VMs:: morph build grid.nix morph push grid.nix @@ -48,4 +60,4 @@ Use the local development environment vagrant up morph upload-secrets grid.nix - You should now be able to log in with the users and keys you set in your ``users.nix`` file. +You should now be able to log in with the users and keys you set in your ``users.nix`` file. diff --git a/morph/grid/local/Vagrantfile b/morph/grid/local/Vagrantfile index 64d4aec5aadc67e48c91cb0b8154b1107c23f1bb..911dd3f7570834060ed0879b738bb3ea2a61420d 100644 --- a/morph/grid/local/Vagrantfile +++ b/morph/grid/local/Vagrantfile @@ -1,29 +1,61 @@ # -*- mode: ruby -*- # vi: set ft=ruby : -# This Vagrantfile worked for Florian Sesser using Vagrant 2.2.16dev and -# the VirtualBox Hypervisor. Earlier Vagrant and LibVirt did not work. +# This Vagrantfile worked for Florian Sesser using Vagrant 2.2.19 and +# the LibVirt with QEmu Hypervisor. Earlier Vagrant and VirtualBox did worked too. + +# Get a dedicated LibVirt pool name or use default one +pool_name = ENV.has_key?('POOL_NAME') ? ENV['POOL_NAME'] : 'default' +# For instance, one could create such pool beforehand as follows: +# export POOL_NAME=morph_local_$(id -un) +# POOL_PATH="/path/to/your/storage" +# mkdir -p "${POOL_PATH}" +# sudo virsh pool-define-as ${POOL_NAME} --type dir --target "${POOL_PATH}" +# sudo virsh pool-autostart ${POOL_NAME} +# sudo virsh pool-start ${POOL_NAME} Vagrant.configure("2") do |config| # For a complete reference, please see the online documentation at # https://docs.vagrantup.com. - config.vm.define "payments.localdev" do |config| - config.vm.hostname = "payments" - config.vm.box = "esselius/nixos" - config.vm.box_version = "20.09" - config.vm.box_check_update = false + # Select the base image + config.vm.box = "esselius/nixos" + config.vm.box_version = "20.09" + config.vm.box_check_update = false + + # No need to sync the working dir. with the guest boxess + # Better use SFTP to transfer + config.vm.synced_folder ".", "/vagrant", disabled: true + + # Tune LibVirt/QEmu guests + config.vm.provider :libvirt do |domain| + # The default of one CPU should work + # Increase to speed up boot/push/deploy + # domain.cpus = 1 # To use the self-updating deployment system you need more memory. Giving # all of the VMs enough memory for this is rather taxing, though, and the # self-updating deployment system is not particularly useful for local # dev. But should you want to: # - # config.vm.provider "virtualbox" do |v| - # v.memory = 4096 - # end + # domain.memory = 4096 + # + # Meanwhile, 1024 was apparently the default with VirtualBox + domain.memory = 1024 + + # Using a specific pool may help to manage the disk space + domain.storage_pool_name = pool_name + domain.snapshot_pool_name = pool_name + + # No need of graphics - better use serial + domain.graphics_type = "none" + domain.video_type = "none" + end + + config.vm.define "payments.localdev" do |config| + config.vm.hostname = "payments" - # Assign a static IP address inside the VirtualBox host-only (Vagrant + # Assign a static IP address inside the box host-only (Vagrant # calls it "private") network. The address must be in the range # VirtualBox allows. # https://www.virtualbox.org/manual/ch06.html#network_hostonly says some @@ -37,31 +69,26 @@ Vagrant.configure("2") do |config| config.vm.define "storage1.localdev" do |config| config.vm.hostname = "storage1" - config.vm.box = "esselius/nixos" - config.vm.box_version = "20.09" - config.vm.box_check_update = false config.vm.network "private_network", ip: "192.168.56.22" end config.vm.define "storage2.localdev" do |config| config.vm.hostname = "storage2" - config.vm.box = "esselius/nixos" - config.vm.box_version = "20.09" - config.vm.box_check_update = false config.vm.network "private_network", ip: "192.168.56.23" end config.vm.define "monitoring.localdev" do |config| config.vm.hostname = "monitoring" - config.vm.box = "esselius/nixos" - config.vm.box_version = "20.09" - config.vm.box_check_update = false config.vm.network "private_network", ip: "192.168.56.24" end # To make the VMs assign the static IPs to the network interfaces we need a rebuild: - config.vm.provision "shell", inline: "echo '{nix.trustedUsers = [ \"@wheel\" \"root\" \"vagrant\" ];}' > /etc/nixos/custom-configuration.nix" + ## Rename to 'nix.settings.trusted-users' after 20.09 or so: + config.vm.provision "shell", + inline: "echo '{ nix.trustedUsers = [ \"@wheel\" \"root\" \"vagrant\" ]; boot.kernelParams = [ \"console=tty0\" \"console=ttyS0,115200\" ]; }' > /etc/nixos/custom-configuration.nix" config.vm.provision "shell", inline: "nixos-rebuild switch" + config.vm.provision "shell", inline: "systemctl stop firewall.service" + config.vm.provision "shell", inline: "systemctl start serial-getty@ttyS0.service" config.trigger.after :up do |trigger| trigger.info = "Hostname and IP address this host actually uses:" diff --git a/morph/grid/local/grid.nix b/morph/grid/local/grid.nix index 088d9e8c79422b82d638a42aeab5da1fcf14f536..0c9f3488eceeee0aee5308441712fe9bdb052ddc 100644 --- a/morph/grid/local/grid.nix +++ b/morph/grid/local/grid.nix @@ -122,7 +122,7 @@ let inherit paymentExporterTargets blackboxExporterHttpsTargets; inherit (grid-config) monitoringDomains; googleOAuthClientID = grid-config.monitoringGoogleOAuthClientID; - enableSlackAlert = false; + enableZulipAlert = false; }; system.stateVersion = "19.09"; }; diff --git a/morph/grid/local/private-keys/README.rst b/morph/grid/local/private-keys/README.rst index 176f0d54a9761281b273ab0bdeb710f219807ece..17976e1499e16adaafb982c0360e6ed32ea5c442 100644 --- a/morph/grid/local/private-keys/README.rst +++ b/morph/grid/local/private-keys/README.rst @@ -27,6 +27,13 @@ This file is read by Grafana's systemd service to set an environment variable wi The only line in the file should be the secret URL. Use the url from `this 1Password entry <https://privatestorage.1password.com/vaults/7flqasy5hhhmlbtp5qozd3j4ga/allitems/cgznskz2oix2tyx5xyntwaos5i>`_ or get a new secret URL for your Slack channel at https://www.slack.com/apps/A0F7XDUAZ. +grafana-zulip-url +----------------- + +This file should contain a single line with the secret Zulip alerting Webhook Bot URL. +The URLs for Staging and Production are both stored in 1Password. +See `https://zulip.com/integrations/doc/grafana`_ for documentation and ``grid/local/private-keys/grafana-zulip-url`` for an example. + stripe.secret ------------- diff --git a/morph/grid/local/private-keys/grafana-zulip-url b/morph/grid/local/private-keys/grafana-zulip-url new file mode 100644 index 0000000000000000000000000000000000000000..4b83d4f6301b8ede9a19ce2c2b7b89c492deddf6 --- /dev/null +++ b/morph/grid/local/private-keys/grafana-zulip-url @@ -0,0 +1 @@ +https://yourZulipDomain.zulipchat.com/api/v1/external/grafana?api_key=abcdefgh&stream=stream%20name&topic=your%20topic diff --git a/morph/grid/local/private-keys/stripe.webhook-secret b/morph/grid/local/private-keys/stripe.webhook-secret new file mode 100644 index 0000000000000000000000000000000000000000..a2f493f0cca4e60694b4d9177030da882b703f9e --- /dev/null +++ b/morph/grid/local/private-keys/stripe.webhook-secret @@ -0,0 +1 @@ +whsec_12121212121212121212121212121212121212 diff --git a/morph/grid/production/grid.nix b/morph/grid/production/grid.nix index 06fe07f8277bf81e26e2f9f735783614c117a7b3..4718e93fbe7e70e277349b9f9dab7dda35789c3d 100644 --- a/morph/grid/production/grid.nix +++ b/morph/grid/production/grid.nix @@ -54,7 +54,7 @@ let inherit paymentExporterTargets blackboxExporterHttpsTargets; inherit (grid-config) monitoringDomains; googleOAuthClientID = grid-config.monitoringGoogleOAuthClientID; - enableSlackAlert = true; + enableZulipAlert = true; }; system.stateVersion = "19.09"; }; @@ -62,8 +62,8 @@ let defineStorageNode = name: { vpnIP, stateVersion }: let - nodecfg = import "${./.}/${name}-config.nix"; - hardware ="${./.}/${name}-hardware.nix"; + nodecfg = import (./. + "/${name}-config.nix"); + hardware = (./. + "/${name}-hardware.nix"); in { imports = [ # Get some of the very lowest-level system configuration for this diff --git a/morph/grid/production/storage001-hardware.nix b/morph/grid/production/storage001-hardware.nix index b2ca97c1db1b9721b93f2662d6e8d34189d5a0ab..64f945ed0ea8f0220ce591d7d880189f9a18c59b 100644 --- a/morph/grid/production/storage001-hardware.nix +++ b/morph/grid/production/storage001-hardware.nix @@ -12,7 +12,7 @@ boot.initrd.kernelModules = [ ]; boot.kernelModules = [ "kvm-intel" ]; boot.extraModulePackages = [ ]; - boot.kernel.sysctl = { "vm.swappiness" = 0; }; + boot.kernel.sysctl = { "vm.swappiness" = 1; }; fileSystems."/" = { device = "/dev/disk/by-uuid/f72c1f46-6723-45bf-9ef7-92f31cc37589"; diff --git a/morph/grid/production/storage002-hardware.nix b/morph/grid/production/storage002-hardware.nix index 2f354ad29930f048f7eb20b54a1504ed87db85a1..69b6946098728f2f3e30d7b7570ae88167efbee4 100644 --- a/morph/grid/production/storage002-hardware.nix +++ b/morph/grid/production/storage002-hardware.nix @@ -12,7 +12,7 @@ boot.initrd.kernelModules = [ ]; boot.kernelModules = [ "kvm-intel" ]; boot.extraModulePackages = [ ]; - boot.kernel.sysctl = { "vm.swappiness" = 0; }; + boot.kernel.sysctl = { "vm.swappiness" = 1; }; fileSystems."/" = { device = "/dev/disk/by-uuid/0e92ada9-effb-42e2-a26a-9cdb529bcdc7"; diff --git a/morph/grid/production/storage003-hardware.nix b/morph/grid/production/storage003-hardware.nix index d8ffe5d59fb39ba4a9c6b1b73313f199a2ed980b..f4d68cac14e74d4d41c1af9c50d6e1df41b37aef 100644 --- a/morph/grid/production/storage003-hardware.nix +++ b/morph/grid/production/storage003-hardware.nix @@ -13,7 +13,7 @@ boot.kernelModules = [ "kvm-intel" ]; boot.extraModulePackages = [ ]; boot.supportedFilesystems = [ "zfs" ]; - boot.kernel.sysctl = { "vm.swappiness" = 0; }; + boot.kernel.sysctl = { "vm.swappiness" = 1; }; fileSystems."/" = { device = "/dev/disk/by-uuid/240fc1f6-cd55-48a3-ac80-5b3550a32ef5"; diff --git a/morph/grid/production/storage004-hardware.nix b/morph/grid/production/storage004-hardware.nix index 1fe78a76e813605d8e181d5a858062f77114ba38..6cc5ddc34a470cadd7dd5ddf9cb511f7988428c7 100644 --- a/morph/grid/production/storage004-hardware.nix +++ b/morph/grid/production/storage004-hardware.nix @@ -12,7 +12,7 @@ boot.initrd.kernelModules = [ ]; boot.kernelModules = [ "kvm-intel" ]; boot.extraModulePackages = [ ]; - boot.kernel.sysctl = { "vm.swappiness" = 0; }; + boot.kernel.sysctl = { "vm.swappiness" = 1; }; fileSystems."/" = { device = "/dev/disk/by-uuid/d628122e-05d9-4212-b6a5-4b9516d85dbe"; diff --git a/morph/grid/production/storage005-hardware.nix b/morph/grid/production/storage005-hardware.nix index e8f7b6391b4cb1c8d3e6059c1fd09512a0cc370b..45f4da4bd0160feb3dd27100dfff9306c49edf05 100644 --- a/morph/grid/production/storage005-hardware.nix +++ b/morph/grid/production/storage005-hardware.nix @@ -12,7 +12,7 @@ boot.initrd.kernelModules = [ ]; boot.kernelModules = [ "kvm-intel" ]; boot.extraModulePackages = [ ]; - boot.kernel.sysctl = { "vm.swappiness" = 0; }; + boot.kernel.sysctl = { "vm.swappiness" = 1; }; fileSystems."/" = { device = "/dev/disk/by-uuid/2653c6bb-396f-4911-b9ff-b68de8f9715d"; diff --git a/morph/grid/testing/grid.nix b/morph/grid/testing/grid.nix index c033da1279fa44800e994dc07df3f5febc97d60d..5f3ec05f520d09169e7a5627283b4a05b7fa87f2 100644 --- a/morph/grid/testing/grid.nix +++ b/morph/grid/testing/grid.nix @@ -70,7 +70,7 @@ let inherit paymentExporterTargets blackboxExporterHttpsTargets; inherit (grid-config) monitoringDomains; googleOAuthClientID = grid-config.monitoringGoogleOAuthClientID; - enableSlackAlert = true; + enableZulipAlert = true; }; system.stateVersion = "19.09"; }; diff --git a/morph/grid/testing/public-keys/users.nix b/morph/grid/testing/public-keys/users.nix index a43c3359eb0782635005213585b1400a1138bf2d..fb7ba902aa8ebcec4ff798c36af5215641f7e63b 100644 --- a/morph/grid/testing/public-keys/users.nix +++ b/morph/grid/testing/public-keys/users.nix @@ -1,9 +1,12 @@ let jcalderone = ["ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIN4GenAY/YLGuf1WoMXyyVa3S9i4JLQ0AG+pt7nvcLlQ exarkun@baryon"]; - flo = ["ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHx7wJQNqKn8jOC4AxySRL2UxidNp7uIK9ad3pMb1ifF flo@fs-la"]; + flo = ["ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAII78HGtpjFxQo7wol85hqfoCqjdK9Nk7+82rwttyLHpe flo@la-staging"]; + bdonneaux = ["ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGgpTXgxEqQPSl17NzJkAJgeDSFS1Ke/qjCuVMTZLlna benoit@leastauthority.com" "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIZtWY7t8HVnaz6bluYsrAlzZC3MZtb8g0nO5L5fCQKR benoit@leastauthority.com"]; + shae = ["ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICBODAgs7pGHfxkIZ8mZABUd1LlS9WhxGy0/6FvhlPYq shae@scannedinavian.com"]; + chris = ["ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDT3/sSNoJP3E17oFTYjHN+uOwTY1wVox9tff97iueIo4V88eAc/oXKETiwPkr33qbGwuoXKkxIXJVz8rnNtO6IjTm9qfgzPiRAUJjew8bunL+V7SbhQIv7nk1fyV/efaENElG8bdmzTEpgwGcEnyibqvHJSYX6W+dMCz3G1t/lv97if3ohZKENHuMC5hLfJbGHSGKFO5XdjEjeda9lDd9Ac8XyruaL7iqEefsC7GuUgNRn8V83vwuJMDAC2xXC2V11M65VkGs6WPAct2+llzTtYbsxjxVZXC4yU42eXJYfBZEcCTPtJsKJxQCqSgFOEUnOYiuS6p4Q7a97BfHJ9S9oOV8U/e7YeE4b9Q8TPNzvKTPBAsuKyLyNYekBDB7fOTFziuJy/L578EaDv2BxrsfyCQqtjLko6TIAUbbHvce8urWNvj7H+fNXaURLIQmSTOv/mMl+omkvbP3MNgSFdENpCZaHSTiDxjygf52xcinj6Ijf3uDvPY2UjIRrbWSNV4MYpZDfkqt9THY4QibmxhER/YGvY+0zfiYGqQpQMMbTUB9hhoO5AHnnhMszNG2V9i70VqWyEMsS+Sr1+gOVAPraLp/tqHaqZk7/c4DDpILjA+4davTL6lgaiewx8a0ZEPAKZCZkOMovZKwkVIjyMvfekUkf1cF+QigJPZzcWSWEjQ== cardno:000608671823"]; + meejah = ["ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIEfCDWivT0SCWoMyxUslX0upuhR4X3rNFh5rc/lCcBbe meejah@buyan" "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDkDNUL9OPvTNTijHovvLwdmgATvezS9tkToKrO6U9Gq17SBfFcb2a1nAADt9nmHtu3KExGqGrJeNkoMqGsbo+Y/BCgAz7yutL0PkoDQ4xRcl88kkk+4NtpWFhXelITIJopaNOW5E2qzkvt8FNXKnUfJpmJh+0v1wYseGKMSUncSYTb3vEViVj3DwgLgzQi/YxI/OrEKML0B+vA+n8t0XrqiHh5Ryathk5DFpss5P+0dfWC4PoJZuWbAdQsxqTm7fqmPrX7IfahZpvFHpru2OUICc2sxzoJI7//3bdXfFGkMh0cKG2pRIy2KSJ0IOnLiaACRHeIG2zcnKJLkx6Xbzfr mike@mantle"]; in { - "root" = jcalderone ++ flo; - inherit jcalderone; - inherit flo; + "root" = jcalderone ++ flo ++ bdonneaux ++ shae ++ chris ++ meejah; + inherit jcalderone flo bdonneaux shae chris meejah; } diff --git a/morph/lib/base.nix b/morph/lib/base.nix index 9105347d6f93c3549b6e2932806cc5a5abf627ba..6731aaad36e4a7cfe95b0e7a47b66a585366530f 100644 --- a/morph/lib/base.nix +++ b/morph/lib/base.nix @@ -59,9 +59,6 @@ # qualified domain name. deployment.targetHost = config.networking.fqdn; - # This is for desktop stuff - services.udisks2.enable = false; - services.private-storage.monitoring.exporters.promtail.enable = true; assertions = [ diff --git a/morph/lib/borgbackup.nix b/morph/lib/borgbackup.nix index 4b1d2620515b521da6411fa6ce3ec98b007c9b23..16df515caa5b8ed5e4bfe4eb051153abd8a80b9b 100644 --- a/morph/lib/borgbackup.nix +++ b/morph/lib/borgbackup.nix @@ -56,6 +56,11 @@ in { BORG_RSH = "ssh -i /run/keys/borgbackup/ssh-key -o StrictHostKeyChecking=accept-new"; }; + # Output statistics after uploading a backup set + extraCreateArgs = "--stats --json"; + # All logs in JSON to help Prometheus/Grafana + extraArgs = "--log-json"; + # Ciphertext doesn't compress well compression = "none"; @@ -75,7 +80,7 @@ in { BORG_RSH = "ssh -i /run/keys/borgbackup/ssh-key -o StrictHostKeyChecking=accept-new"; BORG_REPO = lib.fileContents "${publicKeyPath}/borgbackup/${config.networking.hostName}.repopath"; }; - script = ''${pkgs.borgbackup}/bin/borg check''; + script = ''${pkgs.borgbackup}/bin/borg check --verbose --log-json''; }; }; } diff --git a/morph/lib/hardware-vagrant.nix b/morph/lib/hardware-vagrant.nix index 6c41af4923861e89d144303d129d7babde494363..c13cef856552e43e1bdfcab8bffce487dd4c0887 100644 --- a/morph/lib/hardware-vagrant.nix +++ b/morph/lib/hardware-vagrant.nix @@ -15,18 +15,19 @@ }; config = { - virtualisation.virtualbox.guest.enable = true; + services.qemuGuest.enable = true; - boot.loader.grub.device = "/dev/sda"; + boot.loader.grub.device = "/dev/vda"; - boot.initrd.availableKernelModules = [ "ata_piix" "sd_mod" "sr_mod" ]; + boot.initrd.availableKernelModules = [ "ata_piix" "virtio_pci" "virtio_blk" "sd_mod" "sr_mod" ]; boot.kernel.sysctl = { "vm.swappiness" = 0; }; + boot.kernelParams = [ "console=tty0" "console=ttyS0,115200" ]; # remove the fsck that runs at startup. It will always fail to run, stopping # your boot until you press *. boot.initrd.checkJournalingFS = false; - networking.interfaces.enp0s8.ipv4.addresses = [{ + networking.interfaces.ens5.ipv4.addresses = [{ address = config.grid.publicIPv4; prefixLength = 24; }]; @@ -47,11 +48,11 @@ fileSystems."/storage" = { fsType = "tmpfs"; }; fileSystems."/" = - { device = "/dev/sda1"; + { device = "/dev/vda1"; fsType = "ext4"; }; # We want to push packages with morph without having to sign them - nix.trustedUsers = [ "@wheel" "root" "vagrant" ]; + nix.settings.trusted-users = [ "@wheel" "root" "vagrant" ]; }; } diff --git a/morph/lib/issuer-aws.nix b/morph/lib/issuer-aws.nix index 80495e2dc7bafbc9bfbbe174e1a2f75f66942dfe..7455a4375186ec4cf01947d4efc77d5ab9adceff 100644 --- a/morph/lib/issuer-aws.nix +++ b/morph/lib/issuer-aws.nix @@ -6,13 +6,19 @@ boot.loader.grub.device = lib.mkForce "/dev/nvme0n1"; ec2.hvm = true; - boot.kernel.sysctl = { "vm.swappiness" = 0; }; + boot.kernel.sysctl = { "vm.swappiness" = 1; }; swapDevices = [ { device = "/var/swapfile"; size = 4096; # megabytes randomEncryption = true; } ]; + # If we don't manually and explicitly early-load the loop module, crypt-swap + # setup fails with the not very helpful message: "loop device with autoclear + # flag is required" + # See https://unix.stackexchange.com/a/554500/81275 + boot.kernelModules = [ "loop" ]; + # Break the tie between AWS and morph for the hostname by forcing the # morph-supplied name. See also # <https://github.com/DBCDK/morph/issues/146>. @@ -35,5 +41,5 @@ # Turn on automatic optimization of nix store # https://nixos.wiki/wiki/Storage_optimization - nix.autoOptimiseStore = true; + nix.settings.auto-optimise-store = true; } diff --git a/morph/lib/issuer.nix b/morph/lib/issuer.nix index 66791142fe8f353934ae97d5ea34c1031afc0db1..dd5ca1ef78a5fd31511592df5037fdc97e776a0f 100644 --- a/morph/lib/issuer.nix +++ b/morph/lib/issuer.nix @@ -56,6 +56,14 @@ in { permissions = "0400"; action = ["sudo" "systemctl" "restart" "zkapissuer.service"]; }; + "stripe-webhook-secret-key" = { + destination = "/run/keys/stripe.webhook-secret-key"; + source = "${privateKeyPath}/stripe.webhook-secret"; + owner.user = "zkapissuer"; + owner.group = "zkapissuer"; + permissions = "0400"; + action = ["sudo" "systemctl" "restart" "zkapissuer.service"]; + }; }; }; services.private-storage-issuer = { @@ -63,6 +71,7 @@ in { tls = true; ristrettoSigningKeyPath = config.deployment.secrets.ristretto-signing-key.destination; stripeSecretKeyPath = config.deployment.secrets.stripe-secret-key.destination; + stripeWebhookSecretKeyPath = config.deployment.secrets.stripe-webhook-secret-key.destination; database = "SQLite3"; databasePath = "${config.fileSystems."zkapissuer-data".mountPoint}/vouchers.sqlite3"; inherit (config.grid) letsEncryptAdminEmail; diff --git a/morph/lib/monitoring.nix b/morph/lib/monitoring.nix index d2552ebedf026a07a92da783b1de410be9ceca38..a5f2575aaef5fca0cf15f5d125981f150a0f20a3 100644 --- a/morph/lib/monitoring.nix +++ b/morph/lib/monitoring.nix @@ -77,6 +77,15 @@ in { When true requires a grafana-slack-url file (see private-keys/README.rst). ''; }; + + enableZulipAlert = lib.mkOption { + type = lib.types.bool; + default = false; + description = '' + Whether to enable alerting via Zulip. + When true requires a grafana-zulip-url file (see private-keys/README.rst). + ''; + }; }; config = { @@ -138,6 +147,16 @@ in { action = ["sudo" "systemctl" "restart" "grafana.service"]; }; }) + (lib.mkIf cfg.enableZulipAlert { + "grafana-zulip-url" = { + source = "${privateKeyPath}/grafana-zulip-url"; + destination = "/run/keys/grafana-zulip-url"; + owner.user = config.systemd.services.grafana.serviceConfig.User; + owner.group = config.users.users.grafana.group; + permissions = "0400"; + action = ["sudo" "systemctl" "restart" "grafana.service"]; + }; + }) ]; networking.hosts = hostsMap; @@ -156,7 +175,7 @@ in { }; services.private-storage.monitoring.grafana = { - inherit (cfg) googleOAuthClientID enableSlackAlert ; + inherit (cfg) googleOAuthClientID enableSlackAlert enableZulipAlert; inherit letsEncryptAdminEmail; domains = cfg.monitoringDomains; }; diff --git a/nixos/lib/testing.nix b/nixos/lib/testing.nix index d89717a0a76f93bb6062ad63c6cfdbb91c12c746..6ed4b733bffad29fbc52a492e295bc5ee7dddbe2 100644 --- a/nixos/lib/testing.nix +++ b/nixos/lib/testing.nix @@ -14,7 +14,8 @@ '' # The driver runs pyflakes on this script before letting it # run... Convince pyflakes that there is a `test` name. - test = None + def test(): + pass with open("${testpath}") as testfile: exec(testfile.read(), globals()) # For simple types, JSON is compatible with Python syntax! diff --git a/nixos/modules/deployment.nix b/nixos/modules/deployment.nix index 41381ce5d33e62f4e569b87709d591f3586804df..17eb095f836d0ef75a23bad41a318b4a516eda2f 100755 --- a/nixos/modules/deployment.nix +++ b/nixos/modules/deployment.nix @@ -35,11 +35,11 @@ in { config = { # Configure the system to use our binary cache so that deployment updates # only require downloading pre-built software, not building it ourselves. - nix = { - binaryCachePublicKeys = [ + nix.settings = { + trusted-public-keys = [ "saxtons.private.storage:MplOcEH8G/6mRlhlKkbA8GdeFR3dhCFsSszrspE/ZwY=" ]; - binaryCaches = [ + substituters = [ "http://saxtons.private.storage" ]; }; diff --git a/nixos/modules/issuer.nix b/nixos/modules/issuer.nix index b032100f1f20f04f174aa8f03faf5bf22a97b226..5537850b4dfc72038d8ba75f92881dd4762b222c 100644 --- a/nixos/modules/issuer.nix +++ b/nixos/modules/issuer.nix @@ -62,6 +62,13 @@ in { and payment management. ''; }; + services.private-storage-issuer.stripeWebhookSecretKeyPath = lib.mkOption { + type = lib.types.path; + description = '' + The path to a file containing a Stripe "webhook" secret key to use for + charge and payment management. + ''; + }; services.private-storage-issuer.stripeEndpointDomain = lib.mkOption { type = lib.types.str; description = '' @@ -216,6 +223,7 @@ in { stripeArgs = "--stripe-key-path ${cfg.stripeSecretKeyPath} " + + "--stripe-webhook-key-path ${cfg.stripeWebhookSecretKeyPath} " + "--stripe-endpoint-domain ${cfg.stripeEndpointDomain} " + "--stripe-endpoint-scheme ${cfg.stripeEndpointScheme} " + "--stripe-endpoint-port ${toString cfg.stripeEndpointPort}"; @@ -246,7 +254,7 @@ in { ]; # NGINX reverse proxy - security.acme.email = cfg.letsEncryptAdminEmail; + security.acme.defaults.email = cfg.letsEncryptAdminEmail; security.acme.acceptTerms = true; services.nginx = { enable = true; diff --git a/nixos/modules/monitoring/exporters/node.nix b/nixos/modules/monitoring/exporters/node.nix index 407011069ec0cfdec129244b37a60edd09a57f2b..4a9e41b5c840852323aaf952859e79b4c878a870 100644 --- a/nixos/modules/monitoring/exporters/node.nix +++ b/nixos/modules/monitoring/exporters/node.nix @@ -69,7 +69,7 @@ in { ] ++ ( optionals (config.services.nfs.server.enable) [ "nfsd" ] ) ++ ( - optionals ("" != config.boot.initrd.mdadmConf) [ "mdadm" ] + optionals ("" != config.boot.initrd.services.swraid.mdadmConf) [ "mdadm" ] ) ++ ( optionals ({} != config.networking.bonds) [ "bonding" ] ) ++ ( diff --git a/nixos/modules/monitoring/exporters/tahoe.nix b/nixos/modules/monitoring/exporters/tahoe.nix index 0f24bb6c099725242647e4fa23fba20da644811d..6a2cf6a45d8280aee2f073ba00d4dc2dff0ad9fb 100644 --- a/nixos/modules/monitoring/exporters/tahoe.nix +++ b/nixos/modules/monitoring/exporters/tahoe.nix @@ -1,7 +1,7 @@ # Tahoe Prometheus metrics collector # -# Scope: Retrieves OpenMetrics from Tahoe and puts them -# where textfile collector can find them. +# Scope: Retrieve metrics from Tahoe and put them where Prometheus' +# node-exporter's textfile collector can find them. # # Usage: Import this to every server running Tahoe. # @@ -55,14 +55,22 @@ in { description = "Tahoe metrics gathering service"; after = [ "tahoe.storage.service" ]; startAt = cfg.interval; - path = [ pkgs.curl ]; + path = [ pkgs.coreutils pkgs.findutils pkgs.curl ]; restartIfChanged = false; # Save to a temp file and then move atomically so the # textfile collector won't read a partial file. # See https://github.com/prometheus/node_exporter#textfile-collector script = '' - curl --silent --show-error --fail-with-body --output "${cfg.outFile}.tmp" "${cfg.scrapeEndpoint}" + set -euo pipefail + + NUM_CORRUPTION_ADVISORIES=$(find /storage/corruption-advisories/ -type f | wc -l) + echo "tahoe_corruption_advisories_total $NUM_CORRUPTION_ADVISORIES" > "${cfg.outFile}.tmp" + + NUM_INCIDENT_REPORTS=$(find /var/db/tahoe-lafs/storage/logs/incidents/ -type f | wc -l) + echo "tahoe_incident_reports_total $NUM_INCIDENT_REPORTS" >> "${cfg.outFile}.tmp" + + curl --silent --show-error --fail-with-body "${cfg.scrapeEndpoint}" >> "${cfg.outFile}.tmp" mv "${cfg.outFile}.tmp" "${cfg.outFile}" ''; }; diff --git a/nixos/modules/monitoring/server/grafana-dashboards/backups.json b/nixos/modules/monitoring/server/grafana-dashboards/backups.json index 9c9889d3b1dfc37d97bcf0891f943778f9ba71a8..e2387e1f4413cab9372b30dde033d76052262785 100644 --- a/nixos/modules/monitoring/server/grafana-dashboards/backups.json +++ b/nixos/modules/monitoring/server/grafana-dashboards/backups.json @@ -22,11 +22,12 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 39, + "id": 54, "links": [], "liveNow": false, "panels": [ { + "collapsed": false, "gridPos": { "h": 1, "w": 24, @@ -34,6 +35,7 @@ "y": 0 }, "id": 44, + "panels": [], "title": "Customer ciphertext backup to Borgbase.com", "type": "row" }, @@ -84,14 +86,14 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 60, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, - "lineInterpolation": "linear", + "lineInterpolation": "stepAfter", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { @@ -114,15 +116,34 @@ { "color": "green", "value": null + }, + { + "color": "red", + "value": 80 } ] - }, - "unit": "short" + } }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "semi-dark-red", + "mode": "fixed" + } + } + ] + } + ] }, "gridPos": { - "h": 7, + "h": 5, "w": 12, "x": 0, "y": 1 @@ -144,7 +165,7 @@ { "datasource": { "type": "prometheus", - "uid": "000000001" + "uid": "LocalPrometheus" }, "exemplar": true, "expr": "sum(node_systemd_unit_state{name=\"borgbackup-job-daily.timer\", state=~\"active\"})", @@ -156,7 +177,7 @@ { "datasource": { "type": "prometheus", - "uid": "000000001" + "uid": "LocalPrometheus" }, "exemplar": true, "expr": "sum(node_systemd_unit_state{name=\"borgbackup-job-daily.service\", state=\"failed\"})", @@ -224,14 +245,14 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 60, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, - "lineInterpolation": "linear", + "lineInterpolation": "stepAfter", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { @@ -259,10 +280,26 @@ }, "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "semi-dark-red", + "mode": "fixed" + } + } + ] + } + ] }, "gridPos": { - "h": 7, + "h": 5, "w": 12, "x": 12, "y": 1 @@ -284,7 +321,7 @@ { "datasource": { "type": "prometheus", - "uid": "000000001" + "uid": "LocalPrometheus" }, "exemplar": true, "expr": "sum(node_systemd_unit_state{name=\"borgbackup-check-repo.timer\", state=\"active\"})", @@ -296,7 +333,7 @@ { "datasource": { "type": "prometheus", - "uid": "000000001" + "uid": "LocalPrometheus" }, "exemplar": true, "expr": "sum(node_systemd_unit_state{name=\"borgbackup-check-repo.service\", state=\"failed\"})", @@ -407,10 +444,10 @@ "overrides": [] }, "gridPos": { - "h": 7, + "h": 5, "w": 12, "x": 0, - "y": 8 + "y": 6 }, "id": 41, "options": { @@ -429,7 +466,7 @@ { "datasource": { "type": "prometheus", - "uid": "000000001" + "uid": "LocalPrometheus" }, "exemplar": false, "expr": "node_systemd_timer_last_trigger_seconds{name=\"borgbackup-job-daily.timer\"} - time()", @@ -542,10 +579,10 @@ "overrides": [] }, "gridPos": { - "h": 7, + "h": 5, "w": 12, "x": 12, - "y": 8 + "y": 6 }, "id": 42, "options": { @@ -564,7 +601,7 @@ { "datasource": { "type": "prometheus", - "uid": "000000001" + "uid": "LocalPrometheus" }, "exemplar": false, "expr": "node_systemd_timer_last_trigger_seconds{name=\"borgbackup-check-repo.timer\"} - time()", @@ -584,9 +621,520 @@ ], "title": "Monthly check-repo trigger", "type": "timeseries" + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 9000 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "JobRunTime", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "5m", + "frequency": "1m", + "handler": 1, + "message": "A backup job ran for more than 2 ½ hours. After 3 hours it could run into the check-repo job start time, depending on its \"random\" job delay.", + "name": "Daily backup job run time alert", + "noDataState": "no_data", + "notifications": [] + }, + "description": "When was the systemd unit active?", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "left", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 60, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "stepAfter", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 0, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 7200 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 11 + }, + "id": 52, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "exemplar": true, + "expr": "abs((node_systemd_timer_last_trigger_seconds{name=\"borgbackup-job-daily.timer\"} - time())) * on (instance) node_systemd_unit_state{name=\"borgbackup-job-daily.service\", state=\"active\"}", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "JobRunTime" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "op": "gt", + "value": 9000, + "visible": true + } + ], + "title": "Daily backup job run time", + "type": "timeseries" + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 18000 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "JobRunTime", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "5m", + "frequency": "1m", + "handler": 1, + "message": "A borg check-repo job ran for more than five hours. After six hours it could collide with the daily backup job, depending on that job's \"random\" delay. If the backup set is large and this is expected to happen again, consider using borgbackup partial checks (--max-duration SECONDS parameter).", + "name": "Monthly check-repo run time alert", + "noDataState": "no_data", + "notifications": [] + }, + "description": "When was the systemd unit active?", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "left", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 60, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "stepAfter", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 0, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 15000 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 11 + }, + "id": 53, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "exemplar": true, + "expr": "abs((node_systemd_timer_last_trigger_seconds{name=\"borgbackup-check-repo.timer\"} - time())) * on (instance) node_systemd_unit_state{name=\"borgbackup-check-repo.service\", state=\"active\"}", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "JobRunTime" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "op": "gt", + "value": 18000, + "visible": true + } + ], + "title": "Monthly check-repo run time", + "type": "timeseries" + }, + { + "datasource": { + "type": "loki", + "uid": "LocalLoki" + }, + "description": "The \"duration\" that borgbackup status reports.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "fillOpacity": 60, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1, + "scaleDistribution": { + "type": "linear" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 3600 + }, + { + "color": "red", + "value": 10800 + } + ] + }, + "unit": "dtdurations" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 49, + "options": { + "barRadius": 0, + "barWidth": 0.1, + "groupWidth": 0.7, + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "bottom" + }, + "orientation": "auto", + "showValue": "auto", + "stacking": "none", + "tooltip": { + "mode": "single", + "sort": "none" + }, + "xField": "host", + "xTickLabelRotation": -45, + "xTickLabelSpacing": 0 + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "LocalLoki" + }, + "expr": "{unit=\"borgbackup-job-daily.service\"} |= \"duration\" | pattern \"<_>\\\"duration\\\": <duration>,\"", + "legendFormat": "{{host}}", + "queryType": "range", + "refId": "A" + } + ], + "title": "Daily backup job run time (as reported by borg)", + "transformations": [ + { + "id": "labelsToFields", + "options": { + "keepLabels": [ + "duration", + "host" + ] + } + }, + { + "id": "convertFieldType", + "options": { + "conversions": [ + { + "destinationType": "number", + "targetField": "duration" + } + ], + "fields": {} + } + }, + { + "id": "merge", + "options": {} + } + ], + "type": "barchart" + }, + { + "datasource": { + "type": "loki", + "uid": "LocalLoki" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1, + "scaleDistribution": { + "type": "linear" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 55, + "options": { + "barRadius": 0, + "barWidth": 0.97, + "groupWidth": 0.7, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "orientation": "auto", + "showValue": "auto", + "stacking": "none", + "tooltip": { + "mode": "single", + "sort": "none" + }, + "xTickLabelRotation": 0, + "xTickLabelSpacing": 0 + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "LocalLoki" + }, + "expr": "{unit=\"borgbackup-job-daily.service\"} |= \"compressed_size\" | pattern \"<_>\\\"compressed_size\\\": <compressed_size>,\"", + "hide": false, + "legendFormat": "{{host}} archive", + "refId": "This archive size in bytes" + }, + { + "datasource": { + "type": "loki", + "uid": "LocalLoki" + }, + "expr": "{unit=\"borgbackup-job-daily.service\"} |= \"unique_csize\" | pattern \"<_>\\\"unique_csize\\\": <unique_csize>,\"", + "hide": false, + "legendFormat": "{{host}} all archives", + "refId": "All archives deduplicated size" + } + ], + "title": "Backup set size", + "transformations": [ + { + "id": "labelsToFields", + "options": { + "keepLabels": [ + "host", + "unique_csize", + "compressed_size" + ] + } + }, + { + "id": "convertFieldType", + "options": { + "conversions": [ + { + "destinationType": "number", + "targetField": "unique_csize" + }, + { + "destinationType": "number", + "targetField": "compressed_size" + } + ], + "fields": {} + } + }, + { + "id": "merge", + "options": {} + } + ], + "type": "barchart" } ], - "refresh": "1m", + "refresh": "5m", "schemaVersion": 35, "style": "dark", "tags": [], diff --git a/nixos/modules/monitoring/server/grafana-dashboards/meta-monitoring.json b/nixos/modules/monitoring/server/grafana-dashboards/meta-monitoring.json index e116853aa2b899f97f67753cc6e90d7e99a28265..d280cd9b208d4cc5485fd72a3bd4f3d5e47468fd 100644 --- a/nixos/modules/monitoring/server/grafana-dashboards/meta-monitoring.json +++ b/nixos/modules/monitoring/server/grafana-dashboards/meta-monitoring.json @@ -8,16 +8,22 @@ "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, "type": "dashboard" } ] }, "description": "Watching the watchers", "editable": true, - "gnetId": null, + "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 22, "links": [], + "liveNow": false, "panels": [ { "alert": { @@ -59,12 +65,11 @@ "bars": true, "dashLength": 10, "dashes": false, - "datasource": null, - "description": "Is Prometheus having problems scraping our instances? Should be zero.", - "fieldConfig": { - "defaults": {}, - "overrides": [] + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" }, + "description": "Is Prometheus having problems scraping our instances? Should be zero.", "fill": 1, "fillGradient": 0, "gridPos": { @@ -91,7 +96,7 @@ "alertThreshold": false }, "percentage": false, - "pluginVersion": "7.5.10", + "pluginVersion": "8.4.7", "pointradius": 2, "points": false, "renderer": "flot", @@ -119,9 +124,7 @@ "visible": true } ], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Scraping failures", "tooltip": { "shared": true, @@ -130,9 +133,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -140,29 +141,153 @@ { "decimals": 0, "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 600 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "5m", + "frequency": "1m", + "handler": 1, + "message": "A metrics text file is older than 10 minutes.", + "name": "Textcollector staleness alert", + "noDataState": "no_data", + "notifications": [] + }, + "description": "Node-Exporter's TextCollector reads in plain text files containing metrics every few minutes. Make sure we're not reporting stale text files as new data - Alert if any of the text files is not getting updated.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "exemplar": true, + "expr": "time() - node_textfile_mtime_seconds", + "interval": "", + "legendFormat": "{{instance}}/{{file}}", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "op": "gt", + "value": 600, + "visible": true + } + ], + "title": "Textfile collector freshness", + "type": "timeseries" } ], "refresh": false, - "schemaVersion": 27, + "schemaVersion": 35, "style": "dark", "tags": [], "templating": { @@ -176,5 +301,6 @@ "timezone": "", "title": "Meta monitoring", "uid": "MetaMonitoring", - "version": 1 + "version": 1, + "weekStart": "" } diff --git a/nixos/modules/monitoring/server/grafana-dashboards/node-exporter-full.json b/nixos/modules/monitoring/server/grafana-dashboards/node-exporter-full.json new file mode 100644 index 0000000000000000000000000000000000000000..adb222cfd150ee7e12ee4aae7eb6c2e3a64abc05 --- /dev/null +++ b/nixos/modules/monitoring/server/grafana-dashboards/node-exporter-full.json @@ -0,0 +1,13610 @@ +{ + "annotations": { + "list": [ + { + "$$hashKey": "object:1058", + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "Displays a lot of data about one single host.", + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": 1860, + "graphTooltip": 0, + "id": 97, + "iteration": 1660754489147, + "links": [ + { + "icon": "external link", + "tags": [], + "title": "GitHub", + "type": "link", + "url": "https://github.com/rfrail3/grafana-dashboards" + }, + { + "icon": "external link", + "tags": [], + "title": "Grafana", + "type": "link", + "url": "https://grafana.com/grafana/dashboards/1860" + } + ], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 261, + "panels": [], + "title": "Quick CPU / Mem / Disk", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "Busy state of all CPU cores together", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 85 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 95 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 0, + "y": 1 + }, + "id": 20, + "links": [], + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "expr": "(((count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))) - avg(sum by (mode)(rate(node_cpu_seconds_total{mode='idle',instance=\"$node\",job=\"$job\"}[$__rate_interval])))) * 100) / count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))", + "hide": false, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 240 + } + ], + "title": "CPU Busy", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "Busy state of all CPU cores together (5 min average)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 85 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 95 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 3, + "y": 1 + }, + "id": 155, + "links": [], + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "expr": "avg(node_load5{instance=\"$node\",job=\"$job\"}) / count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)) * 100", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A", + "step": 240 + } + ], + "title": "Sys Load (5m avg)", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "Busy state of all CPU cores together (15 min average)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 85 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 95 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 6, + "y": 1 + }, + "id": 19, + "links": [], + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "expr": "avg(node_load15{instance=\"$node\",job=\"$job\"}) / count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu)) * 100", + "hide": false, + "intervalFactor": 1, + "refId": "A", + "step": 240 + } + ], + "title": "Sys Load (15m avg)", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "Non available RAM memory", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 80 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 9, + "y": 1 + }, + "hideTimeOverride": false, + "id": 16, + "links": [], + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "expr": "((node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"}) / (node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} )) * 100", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "refId": "A", + "step": 240 + }, + { + "expr": "100 - ((node_memory_MemAvailable_bytes{instance=\"$node\",job=\"$job\"} * 100) / node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "B", + "step": 240 + } + ], + "title": "RAM Used", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "Used Swap", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 10 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 25 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 12, + "y": 1 + }, + "id": 21, + "links": [], + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "expr": "((node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"}) / (node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} )) * 100", + "intervalFactor": 1, + "refId": "A", + "step": 240 + } + ], + "title": "SWAP Used", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "Used Root FS", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 80 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 15, + "y": 1 + }, + "id": 154, + "links": [], + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "expr": "100 - ((node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"} * 100) / node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"})", + "format": "time_series", + "intervalFactor": 1, + "refId": "A", + "step": 240 + } + ], + "title": "Root FS Used", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "Total number of CPU cores", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 18, + "y": 1 + }, + "id": 14, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "expr": "count(count(node_cpu_seconds_total{instance=\"$node\",job=\"$job\"}) by (cpu))", + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 240 + } + ], + "title": "CPU Cores", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "System uptime", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 4, + "x": 20, + "y": 1 + }, + "hideTimeOverride": true, + "id": 15, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "expr": "node_time_seconds{instance=\"$node\",job=\"$job\"} - node_boot_time_seconds{instance=\"$node\",job=\"$job\"}", + "intervalFactor": 1, + "refId": "A", + "step": 240 + } + ], + "title": "Uptime", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "Total RootFS", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 70 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 90 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 18, + "y": 3 + }, + "id": 23, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",mountpoint=\"/\",fstype!=\"rootfs\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A", + "step": 240 + } + ], + "title": "RootFS Total", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "Total RAM", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 20, + "y": 3 + }, + "id": 75, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"}", + "intervalFactor": 1, + "refId": "A", + "step": 240 + } + ], + "title": "RAM Total", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "Total SWAP", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 22, + "y": 3 + }, + "id": 18, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.4.7", + "targets": [ + { + "expr": "node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"}", + "intervalFactor": 1, + "refId": "A", + "step": 240 + } + ], + "title": "SWAP Total", + "type": "stat" + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 263, + "panels": [], + "title": "Basic CPU / Mem / Net / Disk", + "type": "row" + }, + { + "aliasColors": { + "Busy": "#EAB839", + "Busy Iowait": "#890F02", + "Busy other": "#1F78C1", + "Idle": "#052B51", + "Idle - Waiting for something to happen": "#052B51", + "guest": "#9AC48A", + "idle": "#052B51", + "iowait": "#EAB839", + "irq": "#BF1B00", + "nice": "#C15C17", + "softirq": "#E24D42", + "steal": "#FCE2DE", + "system": "#508642", + "user": "#5195CE" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "decimals": 2, + "description": "Basic CPU info", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 4, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 6 + }, + "hiddenSeries": false, + "id": 77, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": 250, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": true, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Busy Iowait", + "color": "#890F02" + }, + { + "alias": "Idle", + "color": "#7EB26D" + }, + { + "alias": "Busy System", + "color": "#EAB839" + }, + { + "alias": "Busy User", + "color": "#0A437C" + }, + { + "alias": "Busy Other", + "color": "#6D1F62" + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "exemplar": true, + "expr": "sum by (instance)(rate(node_cpu_seconds_total{mode=\"system\",instance=\"$node\",job=\"$job\"}[$__rate_interval])) * 100", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Busy System", + "refId": "A", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "exemplar": true, + "expr": "sum by (instance)(rate(node_cpu_seconds_total{mode='user',instance=\"$node\",job=\"$job\"}[$__rate_interval])) * 100", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Busy User", + "refId": "B", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "exemplar": true, + "expr": "sum by (instance)(rate(node_cpu_seconds_total{mode='iowait',instance=\"$node\",job=\"$job\"}[$__rate_interval])) * 100", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Busy Iowait", + "refId": "C", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "expr": "sum by (instance)(rate(node_cpu_seconds_total{mode=~\".*irq\",instance=\"$node\",job=\"$job\"}[$__rate_interval])) * 100", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Busy IRQs", + "refId": "D", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "expr": "sum (rate(node_cpu_seconds_total{mode!='idle',mode!='user',mode!='system',mode!='iowait',mode!='irq',mode!='softirq',instance=\"$node\",job=\"$job\"}[$__rate_interval])) * 100", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Busy Other", + "refId": "E", + "step": 240 + }, + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "expr": "sum by (mode)(rate(node_cpu_seconds_total{mode='idle',instance=\"$node\",job=\"$job\"}[$__rate_interval])) * 100", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Idle", + "refId": "F", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "CPU Basic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:123", + "format": "short", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "$$hashKey": "object:124", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "Apps": "#629E51", + "Buffers": "#614D93", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Free": "#0A437C", + "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", + "Inactive": "#584477", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "RAM_Free": "#E0F9D7", + "SWAP Used": "#BF1B00", + "Slab": "#806EB7", + "Slab_Cache": "#E0752D", + "Swap": "#BF1B00", + "Swap Used": "#BF1B00", + "Swap_Cache": "#C15C17", + "Swap_Free": "#2F575E", + "Unused": "#EAB839" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "decimals": 2, + "description": "Basic memory usage", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 4, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 6 + }, + "hiddenSeries": false, + "id": 78, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": 350, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "RAM Total", + "color": "#E0F9D7", + "fill": 0, + "stack": false + }, + { + "alias": "RAM Cache + Buffer", + "color": "#052B51" + }, + { + "alias": "RAM Free", + "color": "#7EB26D" + }, + { + "alias": "Avaliable", + "color": "#DEDAF7", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "RAM Total", + "refId": "A", + "step": 240 + }, + { + "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"} - (node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} + node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"} + node_memory_SReclaimable_bytes{instance=\"$node\",job=\"$job\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "RAM Used", + "refId": "B", + "step": 240 + }, + { + "expr": "node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} + node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"} + node_memory_SReclaimable_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "RAM Cache + Buffer", + "refId": "C", + "step": 240 + }, + { + "expr": "node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "RAM Free", + "refId": "D", + "step": 240 + }, + { + "expr": "(node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "SWAP Used", + "refId": "E", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Memory Basic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "Recv_bytes_eth2": "#7EB26D", + "Recv_bytes_lo": "#0A50A1", + "Recv_drop_eth2": "#6ED0E0", + "Recv_drop_lo": "#E0F9D7", + "Recv_errs_eth2": "#BF1B00", + "Recv_errs_lo": "#CCA300", + "Trans_bytes_eth2": "#7EB26D", + "Trans_bytes_lo": "#0A50A1", + "Trans_drop_eth2": "#6ED0E0", + "Trans_drop_lo": "#E0F9D7", + "Trans_errs_eth2": "#BF1B00", + "Trans_errs_lo": "#CCA300", + "recv_bytes_lo": "#0A50A1", + "recv_drop_eth0": "#99440A", + "recv_drop_lo": "#967302", + "recv_errs_eth0": "#BF1B00", + "recv_errs_lo": "#890F02", + "trans_bytes_eth0": "#7EB26D", + "trans_bytes_lo": "#0A50A1", + "trans_drop_eth0": "#99440A", + "trans_drop_lo": "#967302", + "trans_errs_eth0": "#BF1B00", + "trans_errs_lo": "#890F02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "Basic network info per interface", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 4, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 13 + }, + "hiddenSeries": false, + "id": 74, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*trans.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_network_receive_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])*8", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "recv {{device}}", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(node_network_transmit_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])*8", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "trans {{device}} ", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Network Traffic Basic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bps", + "logBase": 1, + "show": true + }, + { + "format": "pps", + "label": "", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "decimals": 3, + "description": "Disk space used of all filesystems mounted", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 4, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 13 + }, + "height": "", + "hiddenSeries": false, + "id": 152, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "100 - ((node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'} * 100) / node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{mountpoint}}", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Disk Space Used Basic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 265, + "panels": [ + { + "aliasColors": { + "Idle - Waiting for something to happen": "#052B51", + "guest": "#9AC48A", + "idle": "#052B51", + "iowait": "#EAB839", + "irq": "#BF1B00", + "nice": "#C15C17", + "softirq": "#E24D42", + "steal": "#FCE2DE", + "system": "#508642", + "user": "#5195CE" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "decimals": 2, + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 4, + "fillGradient": 0, + "gridPos": { + "h": 12, + "w": 12, + "x": 0, + "y": 21 + }, + "hiddenSeries": false, + "id": 3, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 250, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": true, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (mode)(rate(node_cpu_seconds_total{mode=\"system\",instance=\"$node\",job=\"$job\"}[$__rate_interval])) * 100", + "format": "time_series", + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "System - Processes executing in kernel mode", + "refId": "A", + "step": 240 + }, + { + "expr": "sum by (mode)(rate(node_cpu_seconds_total{mode='user',instance=\"$node\",job=\"$job\"}[$__rate_interval])) * 100", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "User - Normal processes executing in user mode", + "refId": "B", + "step": 240 + }, + { + "expr": "sum by (mode)(rate(node_cpu_seconds_total{mode='nice',instance=\"$node\",job=\"$job\"}[$__rate_interval])) * 100", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Nice - Niced processes executing in user mode", + "refId": "C", + "step": 240 + }, + { + "expr": "sum by (mode)(rate(node_cpu_seconds_total{mode='idle',instance=\"$node\",job=\"$job\"}[$__rate_interval])) * 100", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Idle - Waiting for something to happen", + "refId": "D", + "step": 240 + }, + { + "expr": "sum by (mode)(rate(node_cpu_seconds_total{mode='iowait',instance=\"$node\",job=\"$job\"}[$__rate_interval])) * 100", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Iowait - Waiting for I/O to complete", + "refId": "E", + "step": 240 + }, + { + "expr": "sum by (mode)(rate(node_cpu_seconds_total{mode='irq',instance=\"$node\",job=\"$job\"}[$__rate_interval])) * 100", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Irq - Servicing interrupts", + "refId": "F", + "step": 240 + }, + { + "expr": "sum by (mode)(rate(node_cpu_seconds_total{mode='softirq',instance=\"$node\",job=\"$job\"}[$__rate_interval])) * 100", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Softirq - Servicing softirqs", + "refId": "G", + "step": 240 + }, + { + "expr": "sum by (mode)(rate(node_cpu_seconds_total{mode='steal',instance=\"$node\",job=\"$job\"}[$__rate_interval])) * 100", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Steal - Time spent in other operating systems when running in a virtualized environment", + "refId": "H", + "step": 240 + }, + { + "expr": "sum by (mode)(rate(node_cpu_seconds_total{mode='guest',instance=\"$node\",job=\"$job\"}[$__rate_interval])) * 100", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Guest - Time spent running a virtual CPU for a guest operating system", + "refId": "I", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "CPU", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "percentage", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "Apps": "#629E51", + "Buffers": "#614D93", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Free": "#0A437C", + "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", + "Inactive": "#584477", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "RAM_Free": "#E0F9D7", + "Slab": "#806EB7", + "Slab_Cache": "#E0752D", + "Swap": "#BF1B00", + "Swap - Swap memory usage": "#BF1B00", + "Swap_Cache": "#C15C17", + "Swap_Free": "#2F575E", + "Unused": "#EAB839", + "Unused - Free memory unassigned": "#052B51" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "decimals": 2, + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 4, + "fillGradient": 0, + "gridPos": { + "h": 12, + "w": 12, + "x": 12, + "y": 21 + }, + "hiddenSeries": false, + "id": 24, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 350, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Hardware Corrupted - *./", + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_MemTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"} - node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"} - node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"} - node_memory_Slab_bytes{instance=\"$node\",job=\"$job\"} - node_memory_PageTables_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapCached_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Apps - Memory used by user-space applications", + "refId": "A", + "step": 240 + }, + { + "expr": "node_memory_PageTables_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "PageTables - Memory used to map between virtual and physical memory addresses", + "refId": "B", + "step": 240 + }, + { + "expr": "node_memory_SwapCached_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "SwapCache - Memory that keeps track of pages that have been fetched from swap but not yet been modified", + "refId": "C", + "step": 240 + }, + { + "expr": "node_memory_Slab_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Slab - Memory used by the kernel to cache data structures for its own use (caches like inode, dentry, etc)", + "refId": "D", + "step": 240 + }, + { + "expr": "node_memory_Cached_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Cache - Parked file data (file content) cache", + "refId": "E", + "step": 240 + }, + { + "expr": "node_memory_Buffers_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Buffers - Block device (e.g. harddisk) cache", + "refId": "F", + "step": 240 + }, + { + "expr": "node_memory_MemFree_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Unused - Free memory unassigned", + "refId": "G", + "step": 240 + }, + { + "expr": "(node_memory_SwapTotal_bytes{instance=\"$node\",job=\"$job\"} - node_memory_SwapFree_bytes{instance=\"$node\",job=\"$job\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Swap - Swap space used", + "refId": "H", + "step": 240 + }, + { + "expr": "node_memory_HardwareCorrupted_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working", + "refId": "I", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Memory Stack", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "receive_packets_eth0": "#7EB26D", + "receive_packets_lo": "#E24D42", + "transmit_packets_eth0": "#7EB26D", + "transmit_packets_lo": "#E24D42" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 4, + "fillGradient": 0, + "gridPos": { + "h": 12, + "w": 12, + "x": 0, + "y": 33 + }, + "hiddenSeries": false, + "id": 84, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:5871", + "alias": "/.*Trans.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_network_receive_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])*8", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} - Receive", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(node_network_transmit_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])*8", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} - Transmit", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Network Traffic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:5884", + "format": "bps", + "label": "bits out (-) / in (+)", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:5885", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "decimals": 3, + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 4, + "fillGradient": 0, + "gridPos": { + "h": 12, + "w": 12, + "x": 12, + "y": 33 + }, + "height": "", + "hiddenSeries": false, + "id": 156, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'} - node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{mountpoint}}", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Disk Space Used", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 12, + "w": 12, + "x": 0, + "y": 45 + }, + "hiddenSeries": false, + "id": 229, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Read.*/", + "transform": "negative-Y" + }, + { + "alias": "/.*sda_.*/", + "color": "#7EB26D" + }, + { + "alias": "/.*sdb_.*/", + "color": "#EAB839" + }, + { + "alias": "/.*sdc_.*/", + "color": "#6ED0E0" + }, + { + "alias": "/.*sdd_.*/", + "color": "#EF843C" + }, + { + "alias": "/.*sde_.*/", + "color": "#E24D42" + }, + { + "alias": "/.*sda1.*/", + "color": "#584477" + }, + { + "alias": "/.*sda2_.*/", + "color": "#BA43A9" + }, + { + "alias": "/.*sda3_.*/", + "color": "#F4D598" + }, + { + "alias": "/.*sdb1.*/", + "color": "#0A50A1" + }, + { + "alias": "/.*sdb2.*/", + "color": "#BF1B00" + }, + { + "alias": "/.*sdb2.*/", + "color": "#BF1B00" + }, + { + "alias": "/.*sdb3.*/", + "color": "#E0752D" + }, + { + "alias": "/.*sdc1.*/", + "color": "#962D82" + }, + { + "alias": "/.*sdc2.*/", + "color": "#614D93" + }, + { + "alias": "/.*sdc3.*/", + "color": "#9AC48A" + }, + { + "alias": "/.*sdd1.*/", + "color": "#65C5DB" + }, + { + "alias": "/.*sdd2.*/", + "color": "#F9934E" + }, + { + "alias": "/.*sdd3.*/", + "color": "#EA6460" + }, + { + "alias": "/.*sde1.*/", + "color": "#E0F9D7" + }, + { + "alias": "/.*sdd2.*/", + "color": "#FCEACA" + }, + { + "alias": "/.*sde3.*/", + "color": "#F9E2D2" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_disk_reads_completed_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"}[$__rate_interval])", + "intervalFactor": 4, + "legendFormat": "{{device}} - Reads completed", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(node_disk_writes_completed_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"}[$__rate_interval])", + "intervalFactor": 1, + "legendFormat": "{{device}} - Writes completed", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Disk IOps", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "iops", + "label": "IO read (-) / write (+)", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "io time": "#890F02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "decimals": 3, + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 4, + "fillGradient": 0, + "gridPos": { + "h": 12, + "w": 12, + "x": 12, + "y": 45 + }, + "hiddenSeries": false, + "id": 42, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*read*./", + "transform": "negative-Y" + }, + { + "alias": "/.*sda.*/", + "color": "#7EB26D" + }, + { + "alias": "/.*sdb.*/", + "color": "#EAB839" + }, + { + "alias": "/.*sdc.*/", + "color": "#6ED0E0" + }, + { + "alias": "/.*sdd.*/", + "color": "#EF843C" + }, + { + "alias": "/.*sde.*/", + "color": "#E24D42" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_disk_read_bytes_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{device}} - Successfully read bytes", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(node_disk_written_bytes_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{device}} - Successfully written bytes", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "I/O Usage Read / Write", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": false, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:965", + "format": "Bps", + "label": "bytes read (-) / write (+)", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:966", + "format": "ms", + "label": "", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "io time": "#890F02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "decimals": 3, + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 4, + "fillGradient": 0, + "gridPos": { + "h": 12, + "w": 12, + "x": 0, + "y": 57 + }, + "hiddenSeries": false, + "id": 127, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_disk_io_time_seconds_total{instance=\"$node\",job=\"$job\",device=~\"$diskdevices\"} [$__rate_interval])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "I/O Utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": false, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1041", + "format": "percentunit", + "label": "%util", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:1042", + "format": "s", + "label": "", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + } + ], + "title": "CPU / Memory / Net / Disk", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 266, + "panels": [ + { + "aliasColors": { + "Apps": "#629E51", + "Buffers": "#614D93", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Free": "#0A437C", + "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", + "Inactive": "#584477", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "RAM_Free": "#E0F9D7", + "Slab": "#806EB7", + "Slab_Cache": "#E0752D", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Swap_Free": "#2F575E", + "Unused": "#EAB839" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "decimals": 2, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 22 + }, + "hiddenSeries": false, + "id": 136, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 350, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 2, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_Inactive_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Inactive - Memory which has been less recently used. It is more eligible to be reclaimed for other purposes", + "refId": "A", + "step": 240 + }, + { + "expr": "node_memory_Active_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Active - Memory that has been used more recently and usually not reclaimed unless absolutely necessary", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Memory Active / Inactive", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "Apps": "#629E51", + "Buffers": "#614D93", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Free": "#0A437C", + "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", + "Inactive": "#584477", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "RAM_Free": "#E0F9D7", + "Slab": "#806EB7", + "Slab_Cache": "#E0752D", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Swap_Free": "#2F575E", + "Unused": "#EAB839" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "decimals": 2, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 22 + }, + "hiddenSeries": false, + "id": 135, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 350, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Committed_AS - *./" + }, + { + "alias": "/.*CommitLimit - *./", + "color": "#BF1B00", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_Committed_AS_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Committed_AS - Amount of memory presently allocated on the system", + "refId": "A", + "step": 240 + }, + { + "expr": "node_memory_CommitLimit_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "CommitLimit - Amount of memory currently available to be allocated on the system", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Memory Commited", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "Apps": "#629E51", + "Buffers": "#614D93", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Free": "#0A437C", + "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", + "Inactive": "#584477", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "RAM_Free": "#E0F9D7", + "Slab": "#806EB7", + "Slab_Cache": "#E0752D", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Swap_Free": "#2F575E", + "Unused": "#EAB839" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "decimals": 2, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 32 + }, + "hiddenSeries": false, + "id": 191, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 350, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_Inactive_file_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Inactive_file - File-backed memory on inactive LRU list", + "refId": "A", + "step": 240 + }, + { + "expr": "node_memory_Inactive_anon_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Inactive_anon - Anonymous and swap cache on inactive LRU list, including tmpfs (shmem)", + "refId": "B", + "step": 240 + }, + { + "expr": "node_memory_Active_file_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Active_file - File-backed memory on active LRU list", + "refId": "C", + "step": 240 + }, + { + "expr": "node_memory_Active_anon_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Active_anon - Anonymous and swap cache on active least-recently-used (LRU) list, including tmpfs", + "refId": "D", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Memory Active / Inactive Detail", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "bytes", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "Active": "#99440A", + "Buffers": "#58140C", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Dirty": "#6ED0E0", + "Free": "#B7DBAB", + "Inactive": "#EA6460", + "Mapped": "#052B51", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "Slab_Cache": "#EAB839", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Total": "#511749", + "Total RAM": "#052B51", + "Total RAM + Swap": "#052B51", + "Total Swap": "#614D93", + "VmallocUsed": "#EA6460" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "decimals": 2, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 32 + }, + "hiddenSeries": false, + "id": 130, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 2, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_Writeback_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Writeback - Memory which is actively being written back to disk", + "refId": "A", + "step": 240 + }, + { + "expr": "node_memory_WritebackTmp_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "WritebackTmp - Memory used by FUSE for temporary writeback buffers", + "refId": "B", + "step": 240 + }, + { + "expr": "node_memory_Dirty_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Dirty - Memory which is waiting to get written back to the disk", + "refId": "C", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Memory Writeback and Dirty", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "Apps": "#629E51", + "Buffers": "#614D93", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Free": "#0A437C", + "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", + "Inactive": "#584477", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "RAM_Free": "#E0F9D7", + "Slab": "#806EB7", + "Slab_Cache": "#E0752D", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Swap_Free": "#2F575E", + "Unused": "#EAB839" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "decimals": 2, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 42 + }, + "hiddenSeries": false, + "id": 138, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 350, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:4131", + "alias": "ShmemHugePages - Memory used by shared memory (shmem) and tmpfs allocated with huge pages", + "fill": 0 + }, + { + "$$hashKey": "object:4138", + "alias": "ShmemHugePages - Memory used by shared memory (shmem) and tmpfs allocated with huge pages", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_Mapped_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Mapped - Used memory in mapped pages files which have been mmaped, such as libraries", + "refId": "A", + "step": 240 + }, + { + "expr": "node_memory_Shmem_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Shmem - Used shared memory (shared between several processes, thus including RAM disks)", + "refId": "B", + "step": 240 + }, + { + "expr": "node_memory_ShmemHugePages_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "ShmemHugePages - Memory used by shared memory (shmem) and tmpfs allocated with huge pages", + "refId": "C", + "step": 240 + }, + { + "expr": "node_memory_ShmemPmdMapped_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "ShmemPmdMapped - Ammount of shared (shmem/tmpfs) memory backed by huge pages", + "refId": "D", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Memory Shared and Mapped", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:4106", + "format": "bytes", + "label": "bytes", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:4107", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "Active": "#99440A", + "Buffers": "#58140C", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Dirty": "#6ED0E0", + "Free": "#B7DBAB", + "Inactive": "#EA6460", + "Mapped": "#052B51", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "Slab_Cache": "#EAB839", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Total": "#511749", + "Total RAM": "#052B51", + "Total RAM + Swap": "#052B51", + "Total Swap": "#614D93", + "VmallocUsed": "#EA6460" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "decimals": 2, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 42 + }, + "hiddenSeries": false, + "id": 131, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 2, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_SUnreclaim_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "SUnreclaim - Part of Slab, that cannot be reclaimed on memory pressure", + "refId": "A", + "step": 240 + }, + { + "expr": "node_memory_SReclaimable_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "SReclaimable - Part of Slab, that might be reclaimed, such as caches", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Memory Slab", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "Active": "#99440A", + "Buffers": "#58140C", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Dirty": "#6ED0E0", + "Free": "#B7DBAB", + "Inactive": "#EA6460", + "Mapped": "#052B51", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "Slab_Cache": "#EAB839", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Total": "#511749", + "Total RAM": "#052B51", + "Total RAM + Swap": "#052B51", + "VmallocUsed": "#EA6460" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "decimals": 2, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 52 + }, + "hiddenSeries": false, + "id": 70, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_VmallocChunk_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "VmallocChunk - Largest contigious block of vmalloc area which is free", + "refId": "A", + "step": 240 + }, + { + "expr": "node_memory_VmallocTotal_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "VmallocTotal - Total size of vmalloc memory area", + "refId": "B", + "step": 240 + }, + { + "expr": "node_memory_VmallocUsed_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "VmallocUsed - Amount of vmalloc area which is used", + "refId": "C", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Memory Vmalloc", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "Apps": "#629E51", + "Buffers": "#614D93", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Free": "#0A437C", + "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", + "Inactive": "#584477", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "RAM_Free": "#E0F9D7", + "Slab": "#806EB7", + "Slab_Cache": "#E0752D", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Swap_Free": "#2F575E", + "Unused": "#EAB839" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "decimals": 2, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 52 + }, + "hiddenSeries": false, + "id": 159, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 350, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_Bounce_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Bounce - Memory used for block device bounce buffers", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Memory Bounce", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "Active": "#99440A", + "Buffers": "#58140C", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Dirty": "#6ED0E0", + "Free": "#B7DBAB", + "Inactive": "#EA6460", + "Mapped": "#052B51", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "Slab_Cache": "#EAB839", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Total": "#511749", + "Total RAM": "#052B51", + "Total RAM + Swap": "#052B51", + "VmallocUsed": "#EA6460" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "decimals": 2, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 62 + }, + "hiddenSeries": false, + "id": 129, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Inactive *./", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_AnonHugePages_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "AnonHugePages - Memory in anonymous huge pages", + "refId": "A", + "step": 240 + }, + { + "expr": "node_memory_AnonPages_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "AnonPages - Memory in user pages not backed by files", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Memory Anonymous", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "Apps": "#629E51", + "Buffers": "#614D93", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Free": "#0A437C", + "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", + "Inactive": "#584477", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "RAM_Free": "#E0F9D7", + "Slab": "#806EB7", + "Slab_Cache": "#E0752D", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Swap_Free": "#2F575E", + "Unused": "#EAB839" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "decimals": 2, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 62 + }, + "hiddenSeries": false, + "id": 160, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 350, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 2, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_KernelStack_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "KernelStack - Kernel memory stack. This is not reclaimable", + "refId": "A", + "step": 240 + }, + { + "expr": "node_memory_Percpu_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "PerCPU - Per CPU memory allocated dynamically by loadable modules", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Memory Kernel / CPU", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "Active": "#99440A", + "Buffers": "#58140C", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Dirty": "#6ED0E0", + "Free": "#B7DBAB", + "Inactive": "#EA6460", + "Mapped": "#052B51", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "Slab_Cache": "#EAB839", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Total": "#511749", + "Total RAM": "#806EB7", + "Total RAM + Swap": "#806EB7", + "VmallocUsed": "#EA6460" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "decimals": 2, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 72 + }, + "hiddenSeries": false, + "id": 140, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_HugePages_Free{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "HugePages_Free - Huge pages in the pool that are not yet allocated", + "refId": "A", + "step": 240 + }, + { + "expr": "node_memory_HugePages_Rsvd{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "HugePages_Rsvd - Huge pages for which a commitment to allocate from the pool has been made, but no allocation has yet been made", + "refId": "B", + "step": 240 + }, + { + "expr": "node_memory_HugePages_Surp{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "HugePages_Surp - Huge pages in the pool above the value in /proc/sys/vm/nr_hugepages", + "refId": "C", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Memory HugePages Counter", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "pages", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "label": "", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "Active": "#99440A", + "Buffers": "#58140C", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Dirty": "#6ED0E0", + "Free": "#B7DBAB", + "Inactive": "#EA6460", + "Mapped": "#052B51", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "Slab_Cache": "#EAB839", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Total": "#511749", + "Total RAM": "#806EB7", + "Total RAM + Swap": "#806EB7", + "VmallocUsed": "#EA6460" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "decimals": 2, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 72 + }, + "hiddenSeries": false, + "id": 71, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 2, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_HugePages_Total{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "HugePages - Total size of the pool of huge pages", + "refId": "A", + "step": 240 + }, + { + "expr": "node_memory_Hugepagesize_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Hugepagesize - Huge Page size", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Memory HugePages Size", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "label": "", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "Active": "#99440A", + "Buffers": "#58140C", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Dirty": "#6ED0E0", + "Free": "#B7DBAB", + "Inactive": "#EA6460", + "Mapped": "#052B51", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "Slab_Cache": "#EAB839", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Total": "#511749", + "Total RAM": "#052B51", + "Total RAM + Swap": "#052B51", + "VmallocUsed": "#EA6460" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "decimals": 2, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 82 + }, + "hiddenSeries": false, + "id": 128, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_DirectMap1G_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "DirectMap1G - Amount of pages mapped as this size", + "refId": "A", + "step": 240 + }, + { + "expr": "node_memory_DirectMap2M_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "DirectMap2M - Amount of pages mapped as this size", + "refId": "B", + "step": 240 + }, + { + "expr": "node_memory_DirectMap4k_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "DirectMap4K - Amount of pages mapped as this size", + "refId": "C", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Memory DirectMap", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "Apps": "#629E51", + "Buffers": "#614D93", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Free": "#0A437C", + "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", + "Inactive": "#584477", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "RAM_Free": "#E0F9D7", + "Slab": "#806EB7", + "Slab_Cache": "#E0752D", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Swap_Free": "#2F575E", + "Unused": "#EAB839" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "decimals": 2, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 82 + }, + "hiddenSeries": false, + "id": 137, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 350, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_Unevictable_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Unevictable - Amount of unevictable memory that can't be swapped out for a variety of reasons", + "refId": "A", + "step": 240 + }, + { + "expr": "node_memory_Mlocked_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "MLocked - Size of pages locked to memory using the mlock() system call", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Memory Unevictable and MLocked", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "Active": "#99440A", + "Buffers": "#58140C", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Dirty": "#6ED0E0", + "Free": "#B7DBAB", + "Inactive": "#EA6460", + "Mapped": "#052B51", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "Slab_Cache": "#EAB839", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Total": "#511749", + "Total RAM": "#052B51", + "Total RAM + Swap": "#052B51", + "Total Swap": "#614D93", + "VmallocUsed": "#EA6460" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "decimals": 2, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 92 + }, + "hiddenSeries": false, + "id": 132, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_NFS_Unstable_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "NFS Unstable - Memory in NFS pages sent to the server, but not yet commited to the storage", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Memory NFS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + } + ], + "title": "Memory Meminfo", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 22 + }, + "id": 267, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 23 + }, + "hiddenSeries": false, + "id": 176, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*out/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_vmstat_pgpgin{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Pagesin - Page in operations", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(node_vmstat_pgpgout{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Pagesout - Page out operations", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Memory Pages In / Out", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "pages out (-) / in (+)", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 23 + }, + "hiddenSeries": false, + "id": 22, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*out/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_vmstat_pswpin{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Pswpin - Pages swapped in", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(node_vmstat_pswpout{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Pswpout - Pages swapped out", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Memory Pages Swap In / Out", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "pages out (-) / in (+)", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "Apps": "#629E51", + "Buffers": "#614D93", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Free": "#0A437C", + "Hardware Corrupted - Amount of RAM that the kernel identified as corrupted / not working": "#CFFAFF", + "Inactive": "#584477", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "RAM_Free": "#E0F9D7", + "Slab": "#806EB7", + "Slab_Cache": "#E0752D", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Swap_Free": "#2F575E", + "Unused": "#EAB839" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "decimals": 2, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 33 + }, + "hiddenSeries": false, + "id": 175, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 350, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:6118", + "alias": "Pgfault - Page major and minor fault operations", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_vmstat_pgfault{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Pgfault - Page major and minor fault operations", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(node_vmstat_pgmajfault{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Pgmajfault - Major page fault operations", + "refId": "B", + "step": 240 + }, + { + "expr": "rate(node_vmstat_pgfault{instance=\"$node\",job=\"$job\"}[$__rate_interval]) - rate(node_vmstat_pgmajfault{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Pgminfault - Minor page fault operations", + "refId": "C", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Memory Page Faults", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:6133", + "format": "short", + "label": "faults", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:6134", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "Active": "#99440A", + "Buffers": "#58140C", + "Cache": "#6D1F62", + "Cached": "#511749", + "Committed": "#508642", + "Dirty": "#6ED0E0", + "Free": "#B7DBAB", + "Inactive": "#EA6460", + "Mapped": "#052B51", + "PageTables": "#0A50A1", + "Page_Tables": "#0A50A1", + "Slab_Cache": "#EAB839", + "Swap": "#BF1B00", + "Swap_Cache": "#C15C17", + "Total": "#511749", + "Total RAM": "#052B51", + "Total RAM + Swap": "#052B51", + "Total Swap": "#614D93", + "VmallocUsed": "#EA6460" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "decimals": 2, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 33 + }, + "hiddenSeries": false, + "id": 307, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_vmstat_oom_kill{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "oom killer invocations ", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "OOM Killer", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:5373", + "format": "short", + "label": "counter", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:5374", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + } + ], + "title": "Memory Vmstat", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 23 + }, + "id": 293, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 24 + }, + "hiddenSeries": false, + "id": 260, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Variation*./", + "color": "#890F02" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_timex_estimated_error_seconds{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Estimated error in seconds", + "refId": "A", + "step": 240 + }, + { + "expr": "node_timex_offset_seconds{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Time offset in between local system and reference clock", + "refId": "B", + "step": 240 + }, + { + "expr": "node_timex_maxerror_seconds{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Maximum error in seconds", + "refId": "C", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Time Syncronized Drift", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": "seconds", + "logBase": 1, + "show": true + }, + { + "format": "short", + "label": "counter", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 24 + }, + "hiddenSeries": false, + "id": 291, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_timex_loop_time_constant{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Phase-locked loop time adjust", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Time PLL Adjust", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 34 + }, + "hiddenSeries": false, + "id": 168, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Variation*./", + "color": "#890F02" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_timex_sync_status{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Is clock synchronized to a reliable server (1 = yes, 0 = no)", + "refId": "A", + "step": 240 + }, + { + "expr": "node_timex_frequency_adjustment_ratio{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Local clock frequency adjustment", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Time Syncronized Status", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 34 + }, + "hiddenSeries": false, + "id": 294, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_timex_tick_seconds{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Seconds between clock ticks", + "refId": "A", + "step": 240 + }, + { + "expr": "node_timex_tai_offset_seconds{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "International Atomic Time (TAI) offset", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Time Misc", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": "seconds", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + } + ], + "title": "System Timesync", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 24 + }, + "id": 312, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 73 + }, + "hiddenSeries": false, + "id": 62, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_procs_blocked{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Processes blocked waiting for I/O to complete", + "refId": "A", + "step": 240 + }, + { + "expr": "node_procs_running{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Processes in runnable state", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Processes Status", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:6500", + "format": "short", + "label": "counter", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:6501", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 73 + }, + "hiddenSeries": false, + "id": 315, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node_processes_state{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ state }}", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Processes State", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:6500", + "format": "short", + "label": "counter", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:6501", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 83 + }, + "hiddenSeries": false, + "id": 148, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_forks_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Processes forks second", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Processes Forks", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:6640", + "format": "short", + "label": "forks / sec", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:6641", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 83 + }, + "hiddenSeries": false, + "id": 149, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Max.*/", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(process_virtual_memory_bytes{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Processes virtual memory size in bytes", + "refId": "A", + "step": 240 + }, + { + "expr": "process_resident_memory_max_bytes{instance=\"$node\",job=\"$job\"}", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Maximum amount of virtual memory available in bytes", + "refId": "B", + "step": 240 + }, + { + "expr": "rate(process_virtual_memory_bytes{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Processes virtual memory size in bytes", + "refId": "C", + "step": 240 + }, + { + "expr": "rate(process_virtual_memory_max_bytes{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Maximum amount of virtual memory available in bytes", + "refId": "D", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Processes Memory", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": "bytes", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 93 + }, + "hiddenSeries": false, + "id": 313, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:709", + "alias": "PIDs limit", + "color": "#F2495C", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_processes_pids{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Number of PIDs", + "refId": "A", + "step": 240 + }, + { + "expr": "node_processes_max_processes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "PIDs limit", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "PIDs Number and Limit", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:6500", + "format": "short", + "label": "counter", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:6501", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 93 + }, + "hiddenSeries": false, + "id": 305, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:4963", + "alias": "/.*waiting.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_schedstat_running_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "CPU {{ cpu }} - seconds spent running a process", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(node_schedstat_waiting_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "CPU {{ cpu }} - seconds spent by processing waiting for this CPU", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Process schedule stats Running / Waiting", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:4860", + "format": "s", + "label": "seconds", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:4861", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 103 + }, + "hiddenSeries": false, + "id": 314, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:709", + "alias": "Threads limit", + "color": "#F2495C", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_processes_threads{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Allocated threads", + "refId": "A", + "step": 240 + }, + { + "expr": "node_processes_max_threads{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Threads limit", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Threads Number and Limit", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:6500", + "format": "short", + "label": "counter", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:6501", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + } + ], + "title": "System Processes", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 25 + }, + "id": 269, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 74 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_context_switches_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Context switches", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(node_intr_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Interrupts", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Context Switches / Interrupts", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 74 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_load1{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 4, + "legendFormat": "Load 1m", + "refId": "A", + "step": 240 + }, + { + "expr": "node_load5{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 4, + "legendFormat": "Load 5m", + "refId": "B", + "step": 240 + }, + { + "expr": "node_load15{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 4, + "legendFormat": "Load 15m", + "refId": "C", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "System Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:6261", + "format": "short", + "label": "counter", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:6262", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 84 + }, + "hiddenSeries": false, + "id": 259, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Critical*./", + "color": "#E24D42", + "fill": 0 + }, + { + "alias": "/.*Max*./", + "color": "#EF843C", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_interrupts_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ type }} - {{ info }}", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Interrupts Detail", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 84 + }, + "hiddenSeries": false, + "id": 306, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_schedstat_timeslices_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "CPU {{ cpu }}", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Schedule timeslices executed by each cpu", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:4860", + "format": "short", + "label": "counter", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:4861", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 94 + }, + "hiddenSeries": false, + "id": 151, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_entropy_available_bits{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Entropy available to random number generators", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Entropy", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:6568", + "format": "short", + "label": "counter", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:6569", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 94 + }, + "hiddenSeries": false, + "id": 308, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(process_cpu_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Time spent", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "CPU time spent in user and system contexts", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:4860", + "format": "s", + "label": "seconds", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:4861", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 104 + }, + "hiddenSeries": false, + "id": 64, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:6323", + "alias": "/.*Max*./", + "color": "#890F02", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "process_max_fds{instance=\"$node\",job=\"$job\"}", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Maximum open file descriptors", + "refId": "A", + "step": 240 + }, + { + "expr": "process_open_fds{instance=\"$node\",job=\"$job\"}", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Open file descriptors", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "File Descriptors", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:6338", + "format": "short", + "label": "counter", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:6339", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + } + ], + "title": "System Misc", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 26 + }, + "id": 304, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 75 + }, + "hiddenSeries": false, + "id": 158, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:6726", + "alias": "/.*Critical*./", + "color": "#E24D42", + "fill": 0 + }, + { + "$$hashKey": "object:6727", + "alias": "/.*Max*./", + "color": "#EF843C", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_hwmon_temp_celsius{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ chip }} {{ sensor }} temp", + "refId": "A", + "step": 240 + }, + { + "expr": "node_hwmon_temp_crit_alarm_celsius{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ chip }} {{ sensor }} Critical Alarm", + "refId": "B", + "step": 240 + }, + { + "expr": "node_hwmon_temp_crit_celsius{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ chip }} {{ sensor }} Critical", + "refId": "C", + "step": 240 + }, + { + "expr": "node_hwmon_temp_crit_hyst_celsius{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ chip }} {{ sensor }} Critical Historical", + "refId": "D", + "step": 240 + }, + { + "expr": "node_hwmon_temp_max_celsius{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ chip }} {{ sensor }} Max", + "refId": "E", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Hardware temperature monitor", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:6750", + "format": "celsius", + "label": "temperature", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:6751", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 75 + }, + "hiddenSeries": false, + "id": 300, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:1655", + "alias": "/.*Max*./", + "color": "#EF843C", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_cooling_device_cur_state{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Current {{ name }} in {{ type }}", + "refId": "A", + "step": 240 + }, + { + "expr": "node_cooling_device_max_state{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Max {{ name }} in {{ type }}", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Throttle cooling device", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1678", + "format": "short", + "label": "counter", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:1679", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 85 + }, + "hiddenSeries": false, + "id": 302, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_power_supply_online{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ power_supply }} online", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Power supply", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1678", + "format": "short", + "label": "counter", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:1679", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + } + ], + "title": "Hardware Misc", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 27 + }, + "id": 296, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 48 + }, + "hiddenSeries": false, + "id": 297, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_systemd_socket_accepted_connections_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ name }} Connections", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Systemd Sockets", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 48 + }, + "hiddenSeries": false, + "id": 298, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Failed", + "color": "#F2495C" + }, + { + "alias": "Inactive", + "color": "#FF9830" + }, + { + "alias": "Active", + "color": "#73BF69" + }, + { + "alias": "Deactivating", + "color": "#FFCB7D" + }, + { + "alias": "Activating", + "color": "#C8F2C2" + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"activating\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Activating", + "refId": "A", + "step": 240 + }, + { + "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"active\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Active", + "refId": "B", + "step": 240 + }, + { + "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"deactivating\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Deactivating", + "refId": "C", + "step": 240 + }, + { + "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"failed\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Failed", + "refId": "D", + "step": 240 + }, + { + "expr": "node_systemd_units{instance=\"$node\",job=\"$job\",state=\"inactive\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Inactive", + "refId": "E", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Systemd Units State", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + } + ], + "title": "Systemd", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 28 + }, + "id": 270, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "The number (after merges) of I/O requests completed per second for the device", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 29 + }, + "hiddenSeries": false, + "id": 9, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2033", + "alias": "/.*Read.*/", + "transform": "negative-Y" + }, + { + "$$hashKey": "object:2034", + "alias": "/.*sda_.*/", + "color": "#7EB26D" + }, + { + "$$hashKey": "object:2035", + "alias": "/.*sdb_.*/", + "color": "#EAB839" + }, + { + "$$hashKey": "object:2036", + "alias": "/.*sdc_.*/", + "color": "#6ED0E0" + }, + { + "$$hashKey": "object:2037", + "alias": "/.*sdd_.*/", + "color": "#EF843C" + }, + { + "$$hashKey": "object:2038", + "alias": "/.*sde_.*/", + "color": "#E24D42" + }, + { + "$$hashKey": "object:2039", + "alias": "/.*sda1.*/", + "color": "#584477" + }, + { + "$$hashKey": "object:2040", + "alias": "/.*sda2_.*/", + "color": "#BA43A9" + }, + { + "$$hashKey": "object:2041", + "alias": "/.*sda3_.*/", + "color": "#F4D598" + }, + { + "$$hashKey": "object:2042", + "alias": "/.*sdb1.*/", + "color": "#0A50A1" + }, + { + "$$hashKey": "object:2043", + "alias": "/.*sdb2.*/", + "color": "#BF1B00" + }, + { + "$$hashKey": "object:2044", + "alias": "/.*sdb3.*/", + "color": "#E0752D" + }, + { + "$$hashKey": "object:2045", + "alias": "/.*sdc1.*/", + "color": "#962D82" + }, + { + "$$hashKey": "object:2046", + "alias": "/.*sdc2.*/", + "color": "#614D93" + }, + { + "$$hashKey": "object:2047", + "alias": "/.*sdc3.*/", + "color": "#9AC48A" + }, + { + "$$hashKey": "object:2048", + "alias": "/.*sdd1.*/", + "color": "#65C5DB" + }, + { + "$$hashKey": "object:2049", + "alias": "/.*sdd2.*/", + "color": "#F9934E" + }, + { + "$$hashKey": "object:2050", + "alias": "/.*sdd3.*/", + "color": "#EA6460" + }, + { + "$$hashKey": "object:2051", + "alias": "/.*sde1.*/", + "color": "#E0F9D7" + }, + { + "$$hashKey": "object:2052", + "alias": "/.*sdd2.*/", + "color": "#FCEACA" + }, + { + "$$hashKey": "object:2053", + "alias": "/.*sde3.*/", + "color": "#F9E2D2" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_disk_reads_completed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "intervalFactor": 4, + "legendFormat": "{{device}} - Reads completed", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(node_disk_writes_completed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "intervalFactor": 1, + "legendFormat": "{{device}} - Writes completed", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Disk IOps Completed", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:2186", + "format": "iops", + "label": "IO read (-) / write (+)", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:2187", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "The number of bytes read from or written to the device per second", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 29 + }, + "hiddenSeries": false, + "id": 33, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Read.*/", + "transform": "negative-Y" + }, + { + "alias": "/.*sda_.*/", + "color": "#7EB26D" + }, + { + "alias": "/.*sdb_.*/", + "color": "#EAB839" + }, + { + "alias": "/.*sdc_.*/", + "color": "#6ED0E0" + }, + { + "alias": "/.*sdd_.*/", + "color": "#EF843C" + }, + { + "alias": "/.*sde_.*/", + "color": "#E24D42" + }, + { + "alias": "/.*sda1.*/", + "color": "#584477" + }, + { + "alias": "/.*sda2_.*/", + "color": "#BA43A9" + }, + { + "alias": "/.*sda3_.*/", + "color": "#F4D598" + }, + { + "alias": "/.*sdb1.*/", + "color": "#0A50A1" + }, + { + "alias": "/.*sdb2.*/", + "color": "#BF1B00" + }, + { + "alias": "/.*sdb3.*/", + "color": "#E0752D" + }, + { + "alias": "/.*sdc1.*/", + "color": "#962D82" + }, + { + "alias": "/.*sdc2.*/", + "color": "#614D93" + }, + { + "alias": "/.*sdc3.*/", + "color": "#9AC48A" + }, + { + "alias": "/.*sdd1.*/", + "color": "#65C5DB" + }, + { + "alias": "/.*sdd2.*/", + "color": "#F9934E" + }, + { + "alias": "/.*sdd3.*/", + "color": "#EA6460" + }, + { + "alias": "/.*sde1.*/", + "color": "#E0F9D7" + }, + { + "alias": "/.*sdd2.*/", + "color": "#FCEACA" + }, + { + "alias": "/.*sde3.*/", + "color": "#F9E2D2" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_disk_read_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 4, + "legendFormat": "{{device}} - Read bytes", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(node_disk_written_bytes_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} - Written bytes", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Disk R/W Data", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:369", + "format": "Bps", + "label": "bytes read (-) / write (+)", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:370", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "The average time for requests issued to the device to be served. This includes the time spent by the requests in queue and the time spent servicing them.", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 3, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 39 + }, + "hiddenSeries": false, + "id": 37, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Read.*/", + "transform": "negative-Y" + }, + { + "alias": "/.*sda_.*/", + "color": "#7EB26D" + }, + { + "alias": "/.*sdb_.*/", + "color": "#EAB839" + }, + { + "alias": "/.*sdc_.*/", + "color": "#6ED0E0" + }, + { + "alias": "/.*sdd_.*/", + "color": "#EF843C" + }, + { + "alias": "/.*sde_.*/", + "color": "#E24D42" + }, + { + "alias": "/.*sda1.*/", + "color": "#584477" + }, + { + "alias": "/.*sda2_.*/", + "color": "#BA43A9" + }, + { + "alias": "/.*sda3_.*/", + "color": "#F4D598" + }, + { + "alias": "/.*sdb1.*/", + "color": "#0A50A1" + }, + { + "alias": "/.*sdb2.*/", + "color": "#BF1B00" + }, + { + "alias": "/.*sdb3.*/", + "color": "#E0752D" + }, + { + "alias": "/.*sdc1.*/", + "color": "#962D82" + }, + { + "alias": "/.*sdc2.*/", + "color": "#614D93" + }, + { + "alias": "/.*sdc3.*/", + "color": "#9AC48A" + }, + { + "alias": "/.*sdd1.*/", + "color": "#65C5DB" + }, + { + "alias": "/.*sdd2.*/", + "color": "#F9934E" + }, + { + "alias": "/.*sdd3.*/", + "color": "#EA6460" + }, + { + "alias": "/.*sde1.*/", + "color": "#E0F9D7" + }, + { + "alias": "/.*sdd2.*/", + "color": "#FCEACA" + }, + { + "alias": "/.*sde3.*/", + "color": "#F9E2D2" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_disk_read_time_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval]) / rate(node_disk_reads_completed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "hide": false, + "interval": "", + "intervalFactor": 4, + "legendFormat": "{{device}} - Read wait time avg", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(node_disk_write_time_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval]) / rate(node_disk_writes_completed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}} - Write wait time avg", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Disk Average Wait Time", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:441", + "format": "s", + "label": "time. read (-) / write (+)", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:442", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "The average queue length of the requests that were issued to the device", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 39 + }, + "hiddenSeries": false, + "id": 35, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*sda_.*/", + "color": "#7EB26D" + }, + { + "alias": "/.*sdb_.*/", + "color": "#EAB839" + }, + { + "alias": "/.*sdc_.*/", + "color": "#6ED0E0" + }, + { + "alias": "/.*sdd_.*/", + "color": "#EF843C" + }, + { + "alias": "/.*sde_.*/", + "color": "#E24D42" + }, + { + "alias": "/.*sda1.*/", + "color": "#584477" + }, + { + "alias": "/.*sda2_.*/", + "color": "#BA43A9" + }, + { + "alias": "/.*sda3_.*/", + "color": "#F4D598" + }, + { + "alias": "/.*sdb1.*/", + "color": "#0A50A1" + }, + { + "alias": "/.*sdb2.*/", + "color": "#BF1B00" + }, + { + "alias": "/.*sdb3.*/", + "color": "#E0752D" + }, + { + "alias": "/.*sdc1.*/", + "color": "#962D82" + }, + { + "alias": "/.*sdc2.*/", + "color": "#614D93" + }, + { + "alias": "/.*sdc3.*/", + "color": "#9AC48A" + }, + { + "alias": "/.*sdd1.*/", + "color": "#65C5DB" + }, + { + "alias": "/.*sdd2.*/", + "color": "#F9934E" + }, + { + "alias": "/.*sdd3.*/", + "color": "#EA6460" + }, + { + "alias": "/.*sde1.*/", + "color": "#E0F9D7" + }, + { + "alias": "/.*sdd2.*/", + "color": "#FCEACA" + }, + { + "alias": "/.*sde3.*/", + "color": "#F9E2D2" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_disk_io_time_weighted_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "interval": "", + "intervalFactor": 4, + "legendFormat": "{{device}}", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Average Queue Size", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:513", + "format": "none", + "label": "aqu-sz", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:514", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "The number of read and write requests merged per second that were queued to the device", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 49 + }, + "hiddenSeries": false, + "id": 133, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Read.*/", + "transform": "negative-Y" + }, + { + "alias": "/.*sda_.*/", + "color": "#7EB26D" + }, + { + "alias": "/.*sdb_.*/", + "color": "#EAB839" + }, + { + "alias": "/.*sdc_.*/", + "color": "#6ED0E0" + }, + { + "alias": "/.*sdd_.*/", + "color": "#EF843C" + }, + { + "alias": "/.*sde_.*/", + "color": "#E24D42" + }, + { + "alias": "/.*sda1.*/", + "color": "#584477" + }, + { + "alias": "/.*sda2_.*/", + "color": "#BA43A9" + }, + { + "alias": "/.*sda3_.*/", + "color": "#F4D598" + }, + { + "alias": "/.*sdb1.*/", + "color": "#0A50A1" + }, + { + "alias": "/.*sdb2.*/", + "color": "#BF1B00" + }, + { + "alias": "/.*sdb3.*/", + "color": "#E0752D" + }, + { + "alias": "/.*sdc1.*/", + "color": "#962D82" + }, + { + "alias": "/.*sdc2.*/", + "color": "#614D93" + }, + { + "alias": "/.*sdc3.*/", + "color": "#9AC48A" + }, + { + "alias": "/.*sdd1.*/", + "color": "#65C5DB" + }, + { + "alias": "/.*sdd2.*/", + "color": "#F9934E" + }, + { + "alias": "/.*sdd3.*/", + "color": "#EA6460" + }, + { + "alias": "/.*sde1.*/", + "color": "#E0F9D7" + }, + { + "alias": "/.*sdd2.*/", + "color": "#FCEACA" + }, + { + "alias": "/.*sde3.*/", + "color": "#F9E2D2" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_disk_reads_merged_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "intervalFactor": 1, + "legendFormat": "{{device}} - Read merged", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(node_disk_writes_merged_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "intervalFactor": 1, + "legendFormat": "{{device}} - Write merged", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Disk R/W Merged", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:585", + "format": "iops", + "label": "I/Os", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:586", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "Percentage of elapsed time during which I/O requests were issued to the device (bandwidth utilization for the device). Device saturation occurs when this value is close to 100% for devices serving requests serially. But for devices serving requests in parallel, such as RAID arrays and modern SSDs, this number does not reflect their performance limits.", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 3, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 49 + }, + "hiddenSeries": false, + "id": 36, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*sda_.*/", + "color": "#7EB26D" + }, + { + "alias": "/.*sdb_.*/", + "color": "#EAB839" + }, + { + "alias": "/.*sdc_.*/", + "color": "#6ED0E0" + }, + { + "alias": "/.*sdd_.*/", + "color": "#EF843C" + }, + { + "alias": "/.*sde_.*/", + "color": "#E24D42" + }, + { + "alias": "/.*sda1.*/", + "color": "#584477" + }, + { + "alias": "/.*sda2_.*/", + "color": "#BA43A9" + }, + { + "alias": "/.*sda3_.*/", + "color": "#F4D598" + }, + { + "alias": "/.*sdb1.*/", + "color": "#0A50A1" + }, + { + "alias": "/.*sdb2.*/", + "color": "#BF1B00" + }, + { + "alias": "/.*sdb3.*/", + "color": "#E0752D" + }, + { + "alias": "/.*sdc1.*/", + "color": "#962D82" + }, + { + "alias": "/.*sdc2.*/", + "color": "#614D93" + }, + { + "alias": "/.*sdc3.*/", + "color": "#9AC48A" + }, + { + "alias": "/.*sdd1.*/", + "color": "#65C5DB" + }, + { + "alias": "/.*sdd2.*/", + "color": "#F9934E" + }, + { + "alias": "/.*sdd3.*/", + "color": "#EA6460" + }, + { + "alias": "/.*sde1.*/", + "color": "#E0F9D7" + }, + { + "alias": "/.*sdd2.*/", + "color": "#FCEACA" + }, + { + "alias": "/.*sde3.*/", + "color": "#F9E2D2" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_disk_io_time_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "interval": "", + "intervalFactor": 4, + "legendFormat": "{{device}} - IO", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(node_disk_discard_time_seconds_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "interval": "", + "intervalFactor": 4, + "legendFormat": "{{device}} - discard", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Time Spent Doing I/Os", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:657", + "format": "percentunit", + "label": "%util", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:658", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "The number of outstanding requests at the instant the sample was taken. Incremented as requests are given to appropriate struct request_queue and decremented as they finish.", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 59 + }, + "hiddenSeries": false, + "id": 34, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*sda_.*/", + "color": "#7EB26D" + }, + { + "alias": "/.*sdb_.*/", + "color": "#EAB839" + }, + { + "alias": "/.*sdc_.*/", + "color": "#6ED0E0" + }, + { + "alias": "/.*sdd_.*/", + "color": "#EF843C" + }, + { + "alias": "/.*sde_.*/", + "color": "#E24D42" + }, + { + "alias": "/.*sda1.*/", + "color": "#584477" + }, + { + "alias": "/.*sda2_.*/", + "color": "#BA43A9" + }, + { + "alias": "/.*sda3_.*/", + "color": "#F4D598" + }, + { + "alias": "/.*sdb1.*/", + "color": "#0A50A1" + }, + { + "alias": "/.*sdb2.*/", + "color": "#BF1B00" + }, + { + "alias": "/.*sdb3.*/", + "color": "#E0752D" + }, + { + "alias": "/.*sdc1.*/", + "color": "#962D82" + }, + { + "alias": "/.*sdc2.*/", + "color": "#614D93" + }, + { + "alias": "/.*sdc3.*/", + "color": "#9AC48A" + }, + { + "alias": "/.*sdd1.*/", + "color": "#65C5DB" + }, + { + "alias": "/.*sdd2.*/", + "color": "#F9934E" + }, + { + "alias": "/.*sdd3.*/", + "color": "#EA6460" + }, + { + "alias": "/.*sde1.*/", + "color": "#E0F9D7" + }, + { + "alias": "/.*sdd2.*/", + "color": "#FCEACA" + }, + { + "alias": "/.*sde3.*/", + "color": "#F9E2D2" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_disk_io_now{instance=\"$node\",job=\"$job\"}", + "interval": "", + "intervalFactor": 4, + "legendFormat": "{{device}} - IO now", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Instantaneous Queue Size", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:729", + "format": "none", + "label": "Outstanding req.", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:730", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 59 + }, + "hiddenSeries": false, + "id": 301, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:2034", + "alias": "/.*sda_.*/", + "color": "#7EB26D" + }, + { + "$$hashKey": "object:2035", + "alias": "/.*sdb_.*/", + "color": "#EAB839" + }, + { + "$$hashKey": "object:2036", + "alias": "/.*sdc_.*/", + "color": "#6ED0E0" + }, + { + "$$hashKey": "object:2037", + "alias": "/.*sdd_.*/", + "color": "#EF843C" + }, + { + "$$hashKey": "object:2038", + "alias": "/.*sde_.*/", + "color": "#E24D42" + }, + { + "$$hashKey": "object:2039", + "alias": "/.*sda1.*/", + "color": "#584477" + }, + { + "$$hashKey": "object:2040", + "alias": "/.*sda2_.*/", + "color": "#BA43A9" + }, + { + "$$hashKey": "object:2041", + "alias": "/.*sda3_.*/", + "color": "#F4D598" + }, + { + "$$hashKey": "object:2042", + "alias": "/.*sdb1.*/", + "color": "#0A50A1" + }, + { + "$$hashKey": "object:2043", + "alias": "/.*sdb2.*/", + "color": "#BF1B00" + }, + { + "$$hashKey": "object:2044", + "alias": "/.*sdb3.*/", + "color": "#E0752D" + }, + { + "$$hashKey": "object:2045", + "alias": "/.*sdc1.*/", + "color": "#962D82" + }, + { + "$$hashKey": "object:2046", + "alias": "/.*sdc2.*/", + "color": "#614D93" + }, + { + "$$hashKey": "object:2047", + "alias": "/.*sdc3.*/", + "color": "#9AC48A" + }, + { + "$$hashKey": "object:2048", + "alias": "/.*sdd1.*/", + "color": "#65C5DB" + }, + { + "$$hashKey": "object:2049", + "alias": "/.*sdd2.*/", + "color": "#F9934E" + }, + { + "$$hashKey": "object:2050", + "alias": "/.*sdd3.*/", + "color": "#EA6460" + }, + { + "$$hashKey": "object:2051", + "alias": "/.*sde1.*/", + "color": "#E0F9D7" + }, + { + "$$hashKey": "object:2052", + "alias": "/.*sdd2.*/", + "color": "#FCEACA" + }, + { + "$$hashKey": "object:2053", + "alias": "/.*sde3.*/", + "color": "#F9E2D2" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_disk_discards_completed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "interval": "", + "intervalFactor": 4, + "legendFormat": "{{device}} - Discards completed", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(node_disk_discards_merged_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}} - Discards merged", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Disk IOps Discards completed / merged", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:2186", + "format": "iops", + "label": "IOs", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:2187", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + } + ], + "title": "Storage Disk", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 271, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "decimals": 3, + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 30 + }, + "hiddenSeries": false, + "id": 43, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_filesystem_avail_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{mountpoint}} - Available", + "metric": "", + "refId": "A", + "step": 240 + }, + { + "expr": "node_filesystem_free_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "legendFormat": "{{mountpoint}} - Free", + "refId": "B", + "step": 240 + }, + { + "expr": "node_filesystem_size_bytes{instance=\"$node\",job=\"$job\",device!~'rootfs'}", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "legendFormat": "{{mountpoint}} - Size", + "refId": "C", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Filesystem space available", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:3826", + "format": "bytes", + "label": "bytes", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:3827", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 30 + }, + "hiddenSeries": false, + "id": 41, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_filesystem_files_free{instance=\"$node\",job=\"$job\",device!~'rootfs'}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{mountpoint}} - Free file nodes", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "File Nodes Free", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:3894", + "format": "short", + "label": "file nodes", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:3895", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 40 + }, + "hiddenSeries": false, + "id": 28, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_filefd_maximum{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 4, + "legendFormat": "Max open files", + "refId": "A", + "step": 240 + }, + { + "expr": "node_filefd_allocated{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Open files", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "File Descriptor", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "files", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 40 + }, + "hiddenSeries": false, + "id": 219, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_filesystem_files{instance=\"$node\",job=\"$job\",device!~'rootfs'}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{mountpoint}} - File nodes total", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "File Nodes Size", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "file Nodes", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "/ ReadOnly": "#890F02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 50 + }, + "hiddenSeries": false, + "id": 44, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 6, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node_filesystem_readonly{instance=\"$node\",job=\"$job\",device!~'rootfs'}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{mountpoint}} - ReadOnly", + "refId": "A", + "step": 240 + }, + { + "expr": "node_filesystem_device_error{instance=\"$node\",job=\"$job\",device!~'rootfs',fstype!~'tmpfs'}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{mountpoint}} - Device error", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Filesystem in ReadOnly / Error", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:3670", + "format": "short", + "label": "counter", + "logBase": 1, + "max": "1", + "min": "0", + "show": true + }, + { + "$$hashKey": "object:3671", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + } + ], + "title": "Storage Filesystem", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 272, + "panels": [ + { + "aliasColors": { + "receive_packets_eth0": "#7EB26D", + "receive_packets_lo": "#E24D42", + "transmit_packets_eth0": "#7EB26D", + "transmit_packets_lo": "#E24D42" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 30 + }, + "hiddenSeries": false, + "id": 60, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Trans.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_network_receive_packets_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}} - Receive", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(node_network_transmit_packets_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}} - Transmit", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Network Traffic by Packets", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "pps", + "label": "packets out (-) / in (+)", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 30 + }, + "hiddenSeries": false, + "id": 142, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Trans.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_network_receive_errs_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} - Receive errors", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(node_network_transmit_errs_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} - Rransmit errors", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Network Traffic Errors", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "pps", + "label": "packets out (-) / in (+)", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 40 + }, + "hiddenSeries": false, + "id": 143, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Trans.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_network_receive_drop_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} - Receive drop", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(node_network_transmit_drop_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} - Transmit drop", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Network Traffic Drop", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "pps", + "label": "packets out (-) / in (+)", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 40 + }, + "hiddenSeries": false, + "id": 141, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Trans.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_network_receive_compressed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} - Receive compressed", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(node_network_transmit_compressed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} - Transmit compressed", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Network Traffic Compressed", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "pps", + "label": "packets out (-) / in (+)", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 50 + }, + "hiddenSeries": false, + "id": 146, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Trans.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_network_receive_multicast_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} - Receive multicast", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Network Traffic Multicast", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "pps", + "label": "packets out (-) / in (+)", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 50 + }, + "hiddenSeries": false, + "id": 144, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Trans.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_network_receive_fifo_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} - Receive fifo", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(node_network_transmit_fifo_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} - Transmit fifo", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Network Traffic Fifo", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "pps", + "label": "packets out (-) / in (+)", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 60 + }, + "hiddenSeries": false, + "id": 145, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:576", + "alias": "/.*Trans.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_network_receive_frame_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{device}} - Receive frame", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Network Traffic Frame", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:589", + "format": "pps", + "label": "packets out (-) / in (+)", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:590", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 60 + }, + "hiddenSeries": false, + "id": 231, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_network_transmit_carrier_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} - Statistic transmit_carrier", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Network Traffic Carrier", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 70 + }, + "hiddenSeries": false, + "id": 232, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Trans.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_network_transmit_colls_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} - Transmit colls", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Network Traffic Colls", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 70 + }, + "hiddenSeries": false, + "id": 61, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:663", + "alias": "NF conntrack limit", + "color": "#890F02", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_nf_conntrack_entries{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "NF conntrack entries", + "refId": "A", + "step": 240 + }, + { + "expr": "node_nf_conntrack_entries_limit{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "NF conntrack limit", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "NF Contrack", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:678", + "format": "short", + "label": "entries", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:679", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 80 + }, + "hiddenSeries": false, + "id": 230, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_arp_entries{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ device }} - ARP entries", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "ARP Entries", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "Entries", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 80 + }, + "hiddenSeries": false, + "id": 288, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_network_mtu_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ device }} - Bytes", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "MTU", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "bytes", + "label": "bytes", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 90 + }, + "hiddenSeries": false, + "id": 280, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_network_speed_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ device }} - Speed", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Speed", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "bytes", + "label": "bytes", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 90 + }, + "hiddenSeries": false, + "id": 289, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_network_transmit_queue_length{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ device }} - Interface transmit queue length", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Queue Length", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "none", + "label": "packets", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 100 + }, + "hiddenSeries": false, + "id": 290, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:232", + "alias": "/.*Dropped.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_softnet_processed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "CPU {{cpu}} - Processed", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(node_softnet_dropped_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "CPU {{cpu}} - Dropped", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Softnet Packets", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:207", + "format": "short", + "label": "packetes drop (-) / process (+)", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:208", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 100 + }, + "hiddenSeries": false, + "id": 310, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_softnet_times_squeezed_total{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "CPU {{cpu}} - Squeezed", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Softnet Out of Quota", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:207", + "format": "short", + "label": "counter", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:208", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 110 + }, + "hiddenSeries": false, + "id": 309, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_network_up{operstate=\"up\",instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{interface}} - Operational state UP", + "refId": "A", + "step": 240 + }, + { + "expr": "node_network_carrier{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "instant": false, + "legendFormat": "{{device}} - Physical link state", + "refId": "B" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Network Operational Status", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + } + ], + "title": "Network Traffic", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 273, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 80 + }, + "hiddenSeries": false, + "id": 63, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_sockstat_TCP_alloc{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "TCP_alloc - Allocated sockets", + "refId": "A", + "step": 240 + }, + { + "expr": "node_sockstat_TCP_inuse{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "TCP_inuse - Tcp sockets currently in use", + "refId": "B", + "step": 240 + }, + { + "expr": "node_sockstat_TCP_mem{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "TCP_mem - Used memory for tcp", + "refId": "C", + "step": 240 + }, + { + "expr": "node_sockstat_TCP_orphan{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "TCP_orphan - Orphan sockets", + "refId": "D", + "step": 240 + }, + { + "expr": "node_sockstat_TCP_tw{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "TCP_tw - Sockets wating close", + "refId": "E", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Sockstat TCP", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 80 + }, + "hiddenSeries": false, + "id": 124, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_sockstat_UDPLITE_inuse{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "UDPLITE_inuse - Udplite sockets currently in use", + "refId": "A", + "step": 240 + }, + { + "expr": "node_sockstat_UDP_inuse{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "UDP_inuse - Udp sockets currently in use", + "refId": "B", + "step": 240 + }, + { + "expr": "node_sockstat_UDP_mem{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "UDP_mem - Used memory for udp", + "refId": "C", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Sockstat UDP", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 90 + }, + "hiddenSeries": false, + "id": 125, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_sockstat_FRAG_inuse{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "FRAG_inuse - Frag sockets currently in use", + "refId": "A", + "step": 240 + }, + { + "expr": "node_sockstat_RAW_inuse{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "RAW_inuse - Raw sockets currently in use", + "refId": "C", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Sockstat FRAG / RAW", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1572", + "format": "short", + "label": "counter", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:1573", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 90 + }, + "hiddenSeries": false, + "id": 220, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_sockstat_TCP_mem_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "mem_bytes - TCP sockets in that state", + "refId": "A", + "step": 240 + }, + { + "expr": "node_sockstat_UDP_mem_bytes{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "mem_bytes - UDP sockets in that state", + "refId": "B", + "step": 240 + }, + { + "expr": "node_sockstat_FRAG_memory{instance=\"$node\",job=\"$job\"}", + "interval": "", + "intervalFactor": 1, + "legendFormat": "FRAG_memory - Used memory for frag", + "refId": "C" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Sockstat Memory Size", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "bytes", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 100 + }, + "hiddenSeries": false, + "id": 126, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_sockstat_sockets_used{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Sockets_used - Sockets currently in use", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Sockstat Used", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "sockets", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + } + ], + "title": "Network Sockstat", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 274, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 81 + }, + "height": "", + "hiddenSeries": false, + "id": 221, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 12, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:1876", + "alias": "/.*Out.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_netstat_IpExt_InOctets{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "InOctets - Received octets", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(node_netstat_IpExt_OutOctets{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "OutOctets - Sent octets", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Netstat IP In / Out Octets", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1889", + "format": "short", + "label": "octects out (-) / in (+)", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:1890", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 81 + }, + "height": "", + "hiddenSeries": false, + "id": 81, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": 300, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_netstat_Ip_Forwarding{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Forwarding - IP forwarding", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Netstat IP Forwarding", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1957", + "format": "short", + "label": "datagrams", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:1958", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 91 + }, + "height": "", + "hiddenSeries": false, + "id": 115, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 12, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Out.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_netstat_Icmp_InMsgs{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "InMsgs - Messages which the entity received. Note that this counter includes all those counted by icmpInErrors", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(node_netstat_Icmp_OutMsgs{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "OutMsgs - Messages which this entity attempted to send. Note that this counter includes all those counted by icmpOutErrors", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "ICMP In / Out", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "messages out (-) / in (+)", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 91 + }, + "height": "", + "hiddenSeries": false, + "id": 50, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 12, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Out.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_netstat_Icmp_InErrors{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "InErrors - Messages which the entity received but determined as having ICMP-specific errors (bad ICMP checksums, bad length, etc.)", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "ICMP Errors", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "messages out (-) / in (+)", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 101 + }, + "height": "", + "hiddenSeries": false, + "id": 55, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 12, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Out.*/", + "transform": "negative-Y" + }, + { + "alias": "/.*Snd.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_netstat_Udp_InDatagrams{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "InDatagrams - Datagrams received", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(node_netstat_Udp_OutDatagrams{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "OutDatagrams - Datagrams sent", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "UDP In / Out", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "datagrams out (-) / in (+)", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 101 + }, + "height": "", + "hiddenSeries": false, + "id": 109, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 12, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_netstat_Udp_InErrors{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "InErrors - UDP Datagrams that could not be delivered to an application", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(node_netstat_Udp_NoPorts{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "NoPorts - UDP Datagrams received on a port with no listener", + "refId": "B", + "step": 240 + }, + { + "expr": "rate(node_netstat_UdpLite_InErrors{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "interval": "", + "legendFormat": "InErrors Lite - UDPLite Datagrams that could not be delivered to an application", + "refId": "C" + }, + { + "expr": "rate(node_netstat_Udp_RcvbufErrors{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "RcvbufErrors - UDP buffer errors received", + "refId": "D", + "step": 240 + }, + { + "expr": "rate(node_netstat_Udp_SndbufErrors{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "SndbufErrors - UDP buffer errors send", + "refId": "E", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "UDP Errors", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:4232", + "format": "short", + "label": "datagrams", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:4233", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 111 + }, + "height": "", + "hiddenSeries": false, + "id": 299, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 12, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Out.*/", + "transform": "negative-Y" + }, + { + "alias": "/.*Snd.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_netstat_Tcp_InSegs{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "InSegs - Segments received, including those received in error. This count includes segments received on currently established connections", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(node_netstat_Tcp_OutSegs{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "OutSegs - Segments sent, including those on current connections but excluding those containing only retransmitted octets", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "TCP In / Out", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "datagrams out (-) / in (+)", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 111 + }, + "height": "", + "hiddenSeries": false, + "id": 104, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 12, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_netstat_TcpExt_ListenOverflows{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "ListenOverflows - Times the listen queue of a socket overflowed", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(node_netstat_TcpExt_ListenDrops{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "ListenDrops - SYNs to LISTEN sockets ignored", + "refId": "B", + "step": 240 + }, + { + "expr": "rate(node_netstat_TcpExt_TCPSynRetrans{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "TCPSynRetrans - SYN-SYN/ACK retransmits to break down retransmissions in SYN, fast/timeout retransmits", + "refId": "C", + "step": 240 + }, + { + "expr": "rate(node_netstat_Tcp_RetransSegs{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "interval": "", + "legendFormat": "RetransSegs - Segments retransmitted - that is, the number of TCP segments transmitted containing one or more previously transmitted octets", + "refId": "D" + }, + { + "expr": "rate(node_netstat_Tcp_InErrs{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "interval": "", + "legendFormat": "InErrs - Segments received in error (e.g., bad TCP checksums)", + "refId": "E" + }, + { + "expr": "rate(node_netstat_Tcp_OutRsts{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "interval": "", + "legendFormat": "OutRsts - Segments sent with RST flag", + "refId": "F" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "TCP Errors", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 121 + }, + "height": "", + "hiddenSeries": false, + "id": 85, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 12, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:454", + "alias": "/.*MaxConn *./", + "color": "#890F02", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_netstat_Tcp_CurrEstab{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "CurrEstab - TCP connections for which the current state is either ESTABLISHED or CLOSE- WAIT", + "refId": "A", + "step": 240 + }, + { + "expr": "node_netstat_Tcp_MaxConn{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "MaxConn - Limit on the total number of TCP connections the entity can support (Dinamic is \"-1\")", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "TCP Connections", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:469", + "format": "short", + "label": "connections", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:470", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 121 + }, + "height": "", + "hiddenSeries": false, + "id": 91, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 12, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*Sent.*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_netstat_TcpExt_SyncookiesFailed{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "SyncookiesFailed - Invalid SYN cookies received", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(node_netstat_TcpExt_SyncookiesRecv{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "SyncookiesRecv - SYN cookies received", + "refId": "B", + "step": 240 + }, + { + "expr": "rate(node_netstat_TcpExt_SyncookiesSent{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "SyncookiesSent - SYN cookies sent", + "refId": "C", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "TCP SynCookie", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "counter out (-) / in (+)", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 131 + }, + "height": "", + "hiddenSeries": false, + "id": 82, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideZero": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 12, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.4.7", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_netstat_Tcp_ActiveOpens{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "ActiveOpens - TCP connections that have made a direct transition to the SYN-SENT state from the CLOSED state", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(node_netstat_Tcp_PassiveOpens{instance=\"$node\",job=\"$job\"}[$__rate_interval])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "PassiveOpens - TCP connections that have made a direct transition to the SYN-RCVD state from the LISTEN state", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "TCP Direct Transition", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "connections", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + } + ], + "title": "Network Netstat", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 33 + }, + "id": 279, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 54 + }, + "hiddenSeries": false, + "id": 40, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node_scrape_collector_duration_seconds{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{collector}} - Scrape duration", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Node Exporter Scrape Time", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": "seconds", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 54 + }, + "hiddenSeries": false, + "id": 157, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "$$hashKey": "object:1969", + "alias": "/.*error.*/", + "color": "#F2495C", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node_scrape_collector_success{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{collector}} - Scrape success", + "refId": "A", + "step": 240 + }, + { + "expr": "node_textfile_scrape_error{instance=\"$node\",job=\"$job\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{collector}} - Scrape textfile error (1 = true)", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Node Exporter Scrape", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1484", + "format": "short", + "label": "counter", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:1485", + "format": "short", + "logBase": 1, + "show": false + } + ], + "yaxis": { + "align": false + } + } + ], + "title": "Node Exporter", + "type": "row" + } + ], + "refresh": "1m", + "schemaVersion": 35, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "default", + "value": "default" + }, + "hide": 0, + "includeAll": false, + "label": "datasource", + "multi": false, + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "selected": false, + "text": "node-exporters", + "value": "node-exporters" + }, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "definition": "label_values(node_uname_info, job)", + "hide": 0, + "includeAll": false, + "label": "Job", + "multi": false, + "name": "job", + "options": [], + "query": { + "query": "label_values(node_uname_info, job)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": "storage001", + "value": "storage001" + }, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "definition": "label_values(node_uname_info{job=\"$job\"}, instance)", + "hide": 0, + "includeAll": false, + "label": "Host:", + "multi": false, + "name": "node", + "options": [], + "query": { + "query": "label_values(node_uname_info{job=\"$job\"}, instance)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": false, + "text": "[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+", + "value": "[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+" + }, + "hide": 2, + "includeAll": false, + "multi": false, + "name": "diskdevices", + "options": [ + { + "selected": true, + "text": "[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+", + "value": "[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+" + } + ], + "query": "[a-z]+|nvme[0-9]+n[0-9]+|mmcblk[0-9]+", + "skipUrlSync": false, + "type": "custom" + } + ] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Node Exporter Full", + "uid": "node-exporter", + "version": 1, + "weekStart": "" +} diff --git a/nixos/modules/monitoring/server/grafana-dashboards/payments.json b/nixos/modules/monitoring/server/grafana-dashboards/payments.json index 331e623dd18024e58ce49eb4f435ca74e7ad55d8..ee721b78a8058880de41beb31f04cd442bbabeca 100644 --- a/nixos/modules/monitoring/server/grafana-dashboards/payments.json +++ b/nixos/modules/monitoring/server/grafana-dashboards/payments.json @@ -39,7 +39,7 @@ "type": "row" }, { - "description": "Our calls to the Stripe API: Attempted and successful credit card charges.", + "description": "Calls to our Stripe Webhook and the (obsolete) Charge endpoint.", "fieldConfig": { "defaults": { "color": { @@ -136,86 +136,154 @@ "placement": "bottom" }, "tooltip": { - "mode": "single" + "mode": "multi", + "sort": "none" } }, "pluginVersion": "8.3.4", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "exemplar": true, + "expr": "sum (http_responses_total{path=\"v1/stripe/webhook\"})", + "hide": false, + "interval": "", + "legendFormat": "Webhook attempts", + "refId": "Webhook attempts" + }, + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "exemplar": true, + "expr": "http_responses_total{path=\"v1/stripe/webhook\", status=\"2XX\"}", + "hide": false, + "interval": "", + "legendFormat": "Webhook successes", + "refId": "Webhook successes" + }, + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, "exemplar": true, "expr": "payment_processors_stripe_charge_attempts", "hide": false, "interval": "", - "legendFormat": "Attempts", - "refId": "B" + "legendFormat": "Charge attempts", + "refId": "Charge attempts" }, { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, "exemplar": true, "expr": "payment_processors_stripe_charge_successes", + "hide": false, "interval": "", - "legendFormat": "Successes", - "refId": "C" + "legendFormat": "Charge successes", + "refId": "Charge successes" } ], - "title": "Stripe", + "title": "Stripe Webhook and (obsolete) Charge API", "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, "description": "", "fieldConfig": { "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byType", + "options": "time" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "hidden" + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 1 }, - "hiddenSeries": false, "id": 20, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.3.5", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Issued signatures", - "yaxis": 2 + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi", + "sort": "none" } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + }, + "pluginVersion": "8.4.7", "targets": [ { "datasource": { "type": "prometheus", - "uid": "000000001" + "uid": "LocalPrometheus" }, "exemplar": true, "expr": "payment_redemption_signatures_issued", @@ -226,7 +294,7 @@ { "datasource": { "type": "prometheus", - "uid": "000000001" + "uid": "LocalPrometheus" }, "exemplar": true, "expr": "payment_redemption_vouchers_redeemed", @@ -237,37 +305,415 @@ "refId": "B" } ], - "thresholds": [], - "timeRegions": [], "title": "Redemption", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": false, - "values": [] + "id": 32, + "panels": [], + "title": "HTTP v1/stripe/webhook", + "type": "row" + }, + { + "description": "HTTPS responses per second", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 9 }, - "yaxes": [ + "id": 33, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.3.4", + "targets": [ { - "$$hashKey": "object:408", - "format": "short", - "logBase": 1, - "show": true + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "exemplar": true, + "expr": "rate(http_responses_total{path=\"v1/stripe/webhook\"}[5m])", + "instant": false, + "interval": "", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests per second", + "type": "timeseries" + }, + { + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 9 + }, + "id": 34, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.3.4", + "targets": [ { - "$$hashKey": "object:409", - "format": "short", - "logBase": 1, - "show": true + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "exemplar": true, + "expr": "sum(http_responses_total{path=\"v1/stripe/webhook\", status=\"4XX\"}) / sum(http_responses_total{path=\"v1/stripe/webhook\"})", + "interval": "", + "legendFormat": "Client error (4XX) rate", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "exemplar": true, + "expr": "sum(http_responses_total{path=\"v1/stripe/charge\", status=\"5XX\"}) / sum(http_responses_total{path=\"v1/stripe/webhook\"})", + "interval": "", + "legendFormat": "Server error (5XX) rate", + "refId": "B" } ], - "yaxis": { - "align": false - } + "title": "Error rate", + "type": "timeseries" + }, + { + "description": "Request durations, stacked.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "=< 0.1s" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "=< 1s" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "=< 5s" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "> 5s" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 9 + }, + "id": 13, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.3.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "exemplar": true, + "expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/webhook\", le=\"0.1\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "=< 0.1s", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "exemplar": true, + "expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/webhook\", le=\"1.0\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/stripe/webhook\", le=\"0.1\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "=< 1s", + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "exemplar": true, + "expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/webhook\", le=\"5.0\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/stripe/webhook\", le=\"1.0\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "=< 5s", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "exemplar": true, + "expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/webhook\", le=\"+Inf\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/stripe/webhook\", le=\"5.0\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "> 5s", + "refId": "C" + } + ], + "title": "Durations", + "type": "timeseries" }, { "collapsed": false, @@ -275,7 +721,7 @@ "h": 1, "w": 24, "x": 0, - "y": 8 + "y": 16 }, "id": 18, "panels": [], @@ -341,7 +787,7 @@ "h": 7, "w": 8, "x": 0, - "y": 9 + "y": 17 }, "id": 4, "options": { @@ -351,7 +797,8 @@ "placement": "bottom" }, "tooltip": { - "mode": "single" + "mode": "single", + "sort": "none" } }, "pluginVersion": "8.3.4", @@ -428,7 +875,7 @@ "h": 7, "w": 8, "x": 8, - "y": 9 + "y": 17 }, "id": 15, "options": { @@ -438,7 +885,8 @@ "placement": "bottom" }, "tooltip": { - "mode": "single" + "mode": "multi", + "sort": "none" } }, "pluginVersion": "8.3.4", @@ -581,7 +1029,7 @@ "h": 7, "w": 8, "x": 16, - "y": 9 + "y": 17 }, "id": 12, "links": [], @@ -592,7 +1040,8 @@ "placement": "bottom" }, "tooltip": { - "mode": "single" + "mode": "multi", + "sort": "none" } }, "pluginVersion": "8.3.4", @@ -651,7 +1100,7 @@ "h": 1, "w": 24, "x": 0, - "y": 16 + "y": 24 }, "id": 11, "panels": [], @@ -717,7 +1166,7 @@ "h": 7, "w": 8, "x": 0, - "y": 17 + "y": 25 }, "id": 2, "options": { @@ -727,7 +1176,8 @@ "placement": "bottom" }, "tooltip": { - "mode": "single" + "mode": "single", + "sort": "none" } }, "pluginVersion": "8.3.4", @@ -806,7 +1256,7 @@ "h": 7, "w": 8, "x": 8, - "y": 17 + "y": 25 }, "id": 16, "options": { @@ -816,7 +1266,8 @@ "placement": "bottom" }, "tooltip": { - "mode": "multi" + "mode": "multi", + "sort": "none" } }, "pluginVersion": "8.3.4", @@ -836,7 +1287,7 @@ "type": "timeseries" }, { - "description": "Request durations, stacked.", + "description": "Request durations, stacked", "fieldConfig": { "defaults": { "color": { @@ -955,9 +1406,9 @@ "h": 7, "w": 8, "x": 16, - "y": 17 + "y": 25 }, - "id": 13, + "id": 35, "links": [], "options": { "legend": { @@ -966,14 +1417,15 @@ "placement": "bottom" }, "tooltip": { - "mode": "single" + "mode": "single", + "sort": "none" } }, "pluginVersion": "8.3.4", "targets": [ { "exemplar": true, - "expr": "http_request_duration_seconds_bucket{path=\"v1/redeem\", le=\"0.1\"}", + "expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"0.1\"}", "format": "time_series", "hide": false, "instant": false, @@ -984,7 +1436,7 @@ }, { "exemplar": true, - "expr": "http_request_duration_seconds_bucket{path=\"v1/redeem\", le=\"1.0\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/redeem\", le=\"0.1\"}", + "expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"1.0\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"0.1\"}", "format": "time_series", "hide": false, "instant": false, @@ -995,7 +1447,7 @@ }, { "exemplar": true, - "expr": "http_request_duration_seconds_bucket{path=\"v1/redeem\", le=\"5.0\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/redeem\", le=\"1.0\"}", + "expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"5.0\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"1.0\"}", "format": "time_series", "hide": false, "instant": false, @@ -1006,7 +1458,7 @@ }, { "exemplar": true, - "expr": "http_request_duration_seconds_bucket{path=\"v1/redeem\", le=\"+Inf\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/redeem\", le=\"5.0\"}", + "expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"+Inf\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"5.0\"}", "format": "time_series", "hide": false, "instant": false, @@ -1025,7 +1477,7 @@ "h": 1, "w": 24, "x": 0, - "y": 24 + "y": 32 }, "id": 26, "panels": [], @@ -1035,7 +1487,7 @@ { "datasource": { "type": "loki", - "uid": "000000002" + "uid": "LocalLoki" }, "description": "Exercise in counting maybe interesting lines. This can be alerted on.", "fieldConfig": { @@ -1097,7 +1549,7 @@ "h": 8, "w": 12, "x": 0, - "y": 25 + "y": 33 }, "id": 28, "options": { @@ -1107,14 +1559,15 @@ "placement": "bottom" }, "tooltip": { - "mode": "single" + "mode": "single", + "sort": "none" } }, "targets": [ { "datasource": { "type": "loki", - "uid": "000000002" + "uid": "LocalLoki" }, "expr": "count_over_time({host=\"payments\",unit=\"zkapissuer.service\"} !~ \"200|/metrics|Accept\" [5m])", "refId": "A" @@ -1127,14 +1580,14 @@ { "datasource": { "type": "loki", - "uid": "000000002" + "uid": "LocalLoki" }, "description": "Exercise in filtering the payment server logs.", "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 25 + "y": 33 }, "id": 30, "options": { @@ -1151,7 +1604,7 @@ { "datasource": { "type": "loki", - "uid": "000000002" + "uid": "LocalLoki" }, "expr": "{host=\"payments\",unit=\"zkapissuer.service\"} !~ \"200|/metrics|Accept\"", "refId": "A" @@ -1162,7 +1615,7 @@ } ], "refresh": "5m", - "schemaVersion": 34, + "schemaVersion": 35, "style": "dark", "tags": [], "templating": { diff --git a/nixos/modules/monitoring/server/grafana-dashboards/resources-overview.json b/nixos/modules/monitoring/server/grafana-dashboards/resources-overview.json index 077c43a39dfb075429f574b5221e6caf8d419360..98e0e1e1157f0e0e9277c4e0f6eff40c38f08f67 100644 --- a/nixos/modules/monitoring/server/grafana-dashboards/resources-overview.json +++ b/nixos/modules/monitoring/server/grafana-dashboards/resources-overview.json @@ -22,6 +22,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, + "id": 125, "links": [], "liveNow": false, "panels": [ @@ -115,7 +116,7 @@ "pluginVersion": "8.3.5", "targets": [ { - "expr": "1 - (max by (instance) (irate(node_cpu_seconds_total{mode=\"idle\"}[5m])))", + "expr": "1 - (min by (instance) (irate(node_cpu_seconds_total{mode=\"idle\"}[5m])))", "interval": "", "intervalFactor": 1, "legendFormat": "{{instance}}", @@ -154,7 +155,7 @@ } ], "executionErrorState": "alerting", - "for": "5m", + "for": "2h", "frequency": "1m", "handler": 1, "name": "15 min load average alert", @@ -274,7 +275,30 @@ }, "query": { "params": [ - "A", + "Hosts without ZFS", + "15m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + }, + { + "evaluator": { + "params": [ + 0.8 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "Hosts with ZFS", "15m", "now" ] @@ -294,6 +318,7 @@ "noDataState": "no_data", "notifications": [] }, + "datasource": {}, "description": "How much RAM is in use? Relative to available system memory.", "fieldConfig": { "defaults": { @@ -370,11 +395,39 @@ "pluginVersion": "8.3.5", "targets": [ { - "expr": "1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes", + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "exemplar": true, + "expr": "1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes unless node_zfs_arc_size", + "hide": false, + "instant": false, "interval": "", "intervalFactor": 4, "legendFormat": "{{instance}}", - "refId": "A" + "refId": "Hosts without ZFS" + }, + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "exemplar": true, + "expr": "1 - (node_memory_MemAvailable_bytes + node_zfs_arc_size) / node_memory_MemTotal_bytes", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "{{instance}}", + "refId": "Hosts with ZFS" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "op": "gt", + "value": 0.8, + "visible": true } ], "title": "RAM used %", @@ -832,6 +885,10 @@ "noDataState": "no_data", "notifications": [] }, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, "description": "Network errors, drops etc. Should all be 0.", "fieldConfig": { "defaults": { @@ -908,7 +965,7 @@ { "datasource": { "type": "prometheus", - "uid": "000000001" + "uid": "LocalPrometheus" }, "exemplar": true, "expr": "rate(node_network_transmit_errs_total{device!=\"lo\"}[5m])\n", @@ -919,7 +976,7 @@ { "datasource": { "type": "prometheus", - "uid": "000000001" + "uid": "LocalPrometheus" }, "exemplar": true, "expr": "rate(node_network_transmit_drop_total{device!=\"lo\"}[5m])", @@ -930,7 +987,7 @@ { "datasource": { "type": "prometheus", - "uid": "000000001" + "uid": "LocalPrometheus" }, "exemplar": true, "expr": "- rate(node_network_receive_drop_total{device!=\"lo\"}[5m])", @@ -941,7 +998,7 @@ { "datasource": { "type": "prometheus", - "uid": "000000001" + "uid": "LocalPrometheus" }, "exemplar": true, "expr": "- rate(node_network_receive_errs_total{device!=\"lo\"}[5m])", @@ -1142,6 +1199,10 @@ "noDataState": "no_data", "notifications": [] }, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, "description": "How much user data do we store", "fieldConfig": { "defaults": { @@ -1221,7 +1282,7 @@ { "datasource": { "type": "prometheus", - "uid": "000000001" + "uid": "LocalPrometheus" }, "exemplar": true, "expr": "1 - (node_filesystem_avail_bytes{mountpoint=\"/storage\"} / node_filesystem_size_bytes{mountpoint=\"/storage\"})", @@ -1558,6 +1619,10 @@ "type": "row" }, { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, "fieldConfig": { "defaults": { "color": { @@ -1630,7 +1695,7 @@ { "datasource": { "type": "prometheus", - "uid": "000000001" + "uid": "LocalPrometheus" }, "exemplar": true, "expr": "rate(node_pressure_cpu_waiting_seconds_total[5m])", @@ -1643,6 +1708,10 @@ "type": "timeseries" }, { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, "fieldConfig": { "defaults": { "color": { @@ -1715,7 +1784,7 @@ { "datasource": { "type": "prometheus", - "uid": "000000001" + "uid": "LocalPrometheus" }, "exemplar": true, "expr": "rate(node_pressure_memory_waiting_seconds_total[5m])", @@ -1728,6 +1797,10 @@ "type": "timeseries" }, { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, "fieldConfig": { "defaults": { "color": { @@ -1800,7 +1873,7 @@ { "datasource": { "type": "prometheus", - "uid": "000000001" + "uid": "LocalPrometheus" }, "exemplar": true, "expr": "rate(node_pressure_io_waiting_seconds_total[5m])", diff --git a/nixos/modules/monitoring/server/grafana-dashboards/tahoe-lafs.json b/nixos/modules/monitoring/server/grafana-dashboards/tahoe-lafs.json index 6d7e7014e12753ce791c0059630e009d4172544d..4fc78ea08c7615de835b3bfb1a3edbc3f1c6e51f 100644 --- a/nixos/modules/monitoring/server/grafana-dashboards/tahoe-lafs.json +++ b/nixos/modules/monitoring/server/grafana-dashboards/tahoe-lafs.json @@ -8,21 +8,27 @@ "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, "type": "dashboard" } ] }, "description": "", "editable": true, - "gnetId": null, + "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 41, - "iteration": 1636742282779, + "id": 55, + "iteration": 1662661580921, "links": [], + "liveNow": false, "panels": [ { "collapsed": false, - "datasource": null, "gridPos": { "h": 1, "w": 24, @@ -39,7 +45,6 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": null, "description": "These stats keep track of local disk latencies for storage-server operations. All values are in seconds. These are recorded by the storage server, starting from the time the request arrives (post-deserialization) and ending when the response begins serialization. As such, they are mostly useful for measuring disk speeds.", "fieldConfig": { "defaults": { @@ -73,7 +78,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.11", + "pluginVersion": "8.4.7", "pointradius": 2, "points": false, "renderer": "flot", @@ -139,9 +144,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Latency means", "tooltip": { "shared": true, @@ -150,9 +153,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -160,25 +161,18 @@ { "$$hashKey": "object:1111", "format": "s", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "$$hashKey": "object:1112", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -186,12 +180,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": null, "description": "This counts inbound storage-server operations.", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, "fill": 1, "fillGradient": 0, "gridPos": { @@ -218,7 +207,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.11", + "pluginVersion": "8.4.7", "pointradius": 2, "points": false, "renderer": "flot", @@ -308,9 +297,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Counts/s", "tooltip": { "shared": true, @@ -319,9 +306,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -329,25 +314,19 @@ { "$$hashKey": "object:2483", "format": "short", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "$$hashKey": "object:2484", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -355,11 +334,6 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": null, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, "fill": 1, "fillGradient": 0, "gridPos": { @@ -388,7 +362,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.11", + "pluginVersion": "8.4.7", "pointradius": 2, "points": false, "renderer": "flot", @@ -414,9 +388,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Bytes/s", "tooltip": { "shared": true, @@ -425,9 +397,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -435,30 +405,23 @@ { "$$hashKey": "object:2568", "format": "bytes", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "$$hashKey": "object:2569", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { "collapsed": false, - "datasource": null, "gridPos": { "h": 1, "w": 24, @@ -467,12 +430,10 @@ }, "id": 19, "panels": [], - "repeat": null, "title": "Latency Histograms", "type": "row" }, { - "datasource": null, "description": "These stats keep track of local disk latencies for storage-server operations. All values are in seconds. These are recorded by the storage server, starting from the time the request arrives (post-deserialization) and ending when the response begins serialization. As such, they are mostly useful for measuring disk speeds.", "fieldConfig": { "defaults": { @@ -517,16 +478,9 @@ "showUnfilled": true, "text": {} }, - "pluginVersion": "7.5.11", + "pluginVersion": "8.4.7", "repeat": "storageserverop", "repeatDirection": "h", - "scopedVars": { - "storageserverop": { - "selected": true, - "text": "allocate", - "value": "allocate" - } - }, "targets": [ { "exemplar": true, @@ -539,147 +493,8 @@ "title": "$storageserverop", "type": "bargauge" }, - { - "datasource": null, - "description": "These stats keep track of local disk latencies for storage-server operations. All values are in seconds. These are recorded by the storage server, starting from the time the request arrives (post-deserialization) and ending when the response begins serialization. As such, they are mostly useful for measuring disk speeds.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 10 - }, - "id": 39, - "options": { - "displayMode": "gradient", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true, - "text": {} - }, - "pluginVersion": "7.5.11", - "repeatDirection": "h", - "repeatIteration": 1636742282779, - "repeatPanelId": 11, - "scopedVars": { - "storageserverop": { - "selected": true, - "text": "write", - "value": "write" - } - }, - "targets": [ - { - "exemplar": true, - "expr": "tahoe_stats_storage_server_latencies_$storageserverop", - "interval": "", - "legendFormat": "{{quantile}}", - "refId": "A" - } - ], - "title": "$storageserverop", - "type": "bargauge" - }, - { - "datasource": null, - "description": "These stats keep track of local disk latencies for storage-server operations. All values are in seconds. These are recorded by the storage server, starting from the time the request arrives (post-deserialization) and ending when the response begins serialization. As such, they are mostly useful for measuring disk speeds.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 10 - }, - "id": 40, - "options": { - "displayMode": "gradient", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true, - "text": {} - }, - "pluginVersion": "7.5.11", - "repeatDirection": "h", - "repeatIteration": 1636742282779, - "repeatPanelId": 11, - "scopedVars": { - "storageserverop": { - "selected": true, - "text": "readv", - "value": "readv" - } - }, - "targets": [ - { - "exemplar": true, - "expr": "tahoe_stats_storage_server_latencies_$storageserverop", - "interval": "", - "legendFormat": "{{quantile}}", - "refId": "A" - } - ], - "title": "$storageserverop", - "type": "bargauge" - }, { "collapsed": false, - "datasource": null, "gridPos": { "h": 1, "w": 24, @@ -696,12 +511,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": null, "description": "These all reflect disk-space usage policies and status.", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, "fill": 1, "fillGradient": 0, "gridPos": { @@ -728,7 +538,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.11", + "pluginVersion": "8.4.7", "pointradius": 2, "points": false, "renderer": "flot", @@ -771,9 +581,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Bytes free", "tooltip": { "shared": true, @@ -782,9 +590,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -792,25 +598,18 @@ { "$$hashKey": "object:712", "format": "bytes", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "$$hashKey": "object:713", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -818,12 +617,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": null, "description": "These all reflect disk-space usage policies and status.", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, "fill": 1, "fillGradient": 0, "gridPos": { @@ -850,7 +644,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.11", + "pluginVersion": "8.4.7", "pointradius": 2, "points": false, "renderer": "flot", @@ -876,9 +670,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Bytes used", "tooltip": { "shared": true, @@ -887,9 +679,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -897,25 +687,18 @@ { "$$hashKey": "object:712", "format": "bytes", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "$$hashKey": "object:713", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -923,12 +706,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": null, "description": "This counts the number of ‘buckets’ (i.e. unique storage-index values) currently managed by the storage server. It indicates roughly how many files are managed by the server.", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, "fill": 1, "fillGradient": 0, "gridPos": { @@ -955,7 +733,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.11", + "pluginVersion": "8.4.7", "pointradius": 2, "points": false, "renderer": "flot", @@ -973,9 +751,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Total bucket count", "tooltip": { "shared": true, @@ -984,9 +760,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -994,30 +768,22 @@ { "$$hashKey": "object:797", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "$$hashKey": "object:798", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { "collapsed": false, - "datasource": null, "gridPos": { "h": 1, "w": 24, @@ -1034,12 +800,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": null, "description": "Estimate of what percentage of system CPU time was consumed by the node process, over the given time interval. ", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, "fill": 1, "fillGradient": 0, "gridPos": { @@ -1066,7 +827,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.11", + "pluginVersion": "8.4.7", "pointradius": 2, "points": false, "renderer": "flot", @@ -1102,9 +863,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "CPU monitor", "tooltip": { "shared": true, @@ -1113,18 +872,14 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:62", - "decimals": null, "format": "percentunit", - "label": null, "logBase": 1, "max": "1", "min": "0", @@ -1133,16 +888,12 @@ { "$$hashKey": "object:63", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1150,12 +901,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": null, "description": "Estimate of total number of CPU seconds consumed by node since the process was started. Ticket #472 indicates that .total may sometimes be negative due to wraparound of the kernel’s counter.", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, "fill": 1, "fillGradient": 0, "gridPos": { @@ -1182,7 +928,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.11", + "pluginVersion": "8.4.7", "pointradius": 2, "points": false, "renderer": "flot", @@ -1200,9 +946,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "CPU time total", "tooltip": { "shared": true, @@ -1211,36 +955,27 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:62", - "decimals": null, "format": "s", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "$$hashKey": "object:63", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1248,12 +983,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": null, "description": "How many seconds since the node process was started.", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, "fill": 1, "fillGradient": 0, "gridPos": { @@ -1280,7 +1010,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.11", + "pluginVersion": "8.4.7", "pointradius": 2, "points": false, "renderer": "flot", @@ -1298,9 +1028,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Node uptime", "tooltip": { "shared": true, @@ -1309,9 +1037,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -1319,44 +1045,490 @@ { "$$hashKey": "object:386", "format": "s", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "$$hashKey": "object:387", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 36 + }, + "id": 42, + "panels": [], + "title": "Corruption Advisories", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "File count of /storage/corruption-advisories/", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 60, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 37 + }, + "id": 44, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "exemplar": true, + "expr": "tahoe_corruption_advisories_total", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Corruption Advisory count", + "type": "timeseries" + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "5m", + "frequency": "1m", + "handler": 1, + "name": "Corruption Advisory rate alert", + "noDataState": "no_data", + "notifications": [] + }, + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "description": "Rate of new files in /storage/corruption-advisories/", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 60, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 37 + }, + "id": 46, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "exemplar": true, + "expr": "rate(tahoe_corruption_advisories_total[5m])", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "op": "gt", + "value": 0, + "visible": true + } + ], + "title": "Corruption Advisory rate", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 45 + }, + "id": 50, + "panels": [], + "title": "Incident Reports", + "type": "row" + }, + { + "datasource": {}, + "description": "File count of /var/db/tahoe-lafs/storage/logs/incidents/", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 60, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 46 + }, + "id": 53, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "exemplar": true, + "expr": "tahoe_incident_reports_total", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Incident Reports count", + "type": "timeseries" + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "5m", + "frequency": "1m", + "handler": 1, + "name": "Incident Reports rate alert", + "noDataState": "no_data", + "notifications": [] + }, + "datasource": {}, + "description": "Rate of new files in /var/db/tahoe-lafs/storage/logs/incidents/", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 60, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 46 + }, + "id": 54, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "LocalPrometheus" + }, + "exemplar": true, + "expr": "rate(tahoe_incident_reports_total[5m])", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "op": "gt", + "value": 0, + "visible": true + } + ], + "title": "Incident Reports rate", + "type": "timeseries" } ], - "schemaVersion": 27, + "schemaVersion": 35, "style": "dark", "tags": [], "templating": { "list": [ { - "allValue": null, "current": { "selected": false, "text": "storage1", "value": "storage1" }, - "datasource": null, "definition": "tahoe_stats_cpu_monitor_1min_avg", "description": "Which node (instamce) to show", - "error": null, "hide": 0, "includeAll": false, "label": "Node", @@ -1372,16 +1544,13 @@ "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", - "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { - "allValue": null, "current": { "selected": true, - "tags": [], "text": [ "allocate", "write", @@ -1394,7 +1563,6 @@ ] }, "description": "Inbound storage-server operations ", - "error": null, "hide": 0, "includeAll": true, "label": "Detailed latencies for", @@ -1472,5 +1640,6 @@ "timezone": "", "title": "Tahoe-LAFS", "uid": "TahoeLAFS", - "version": 1 + "version": 1, + "weekStart": "" } diff --git a/nixos/modules/monitoring/server/grafana.nix b/nixos/modules/monitoring/server/grafana.nix index 0923885f86d9bcebc4d3df590c71fbcddd8d1df8..4da836c10975d56438fee3961912da20b4788c7b 100644 --- a/nixos/modules/monitoring/server/grafana.nix +++ b/nixos/modules/monitoring/server/grafana.nix @@ -7,18 +7,9 @@ let cfg = config.services.private-storage.monitoring.grafana; - grafanaAuth = if (cfg.googleOAuthClientID == "") then { - anonymous.enable = true; - } else { - google.enable = true; - # Grafana considers it "sign up" to let in a user it has - # never seen before. - google.allowSignUp = true; - google.clientSecretFile = cfg.googleOAuthClientSecretFile; - google.clientId = cfg.googleOAuthClientID; - }; in { + options.services.private-storage.monitoring.grafana = { domains = lib.mkOption { type = lib.types.listOf lib.types.str; @@ -77,6 +68,21 @@ in { Where to find the file that containts the slack URL. ''; }; + enableZulipAlert = lib.mkOption + { type = lib.types.bool; + default = false; + description = '' + Enables the Zulip alerter. Expects a file that contains + the secret Zulip Web Hook URL in grafanaZulipUrlFile (see below). + ''; + }; + grafanaZulipUrlFile = lib.mkOption + { type = lib.types.path; + default = /run/keys/grafana-zulip-url; + description = '' + Where to find the file that containts the Zulip URL. + ''; + }; }; config = @@ -90,54 +96,74 @@ in { services.grafana = { enable = true; - inherit domain; - port = 2342; - addr = "127.0.0.1"; - - # No phoning home - analytics.reporting.enable = false; - - # Force Grafana to believe it is reachable via https on the default port - # number because that's where the nginx that forwards traffic to it is - # listening. Grafana's own server listens on an internal address that - # doesn't matter to anyone except our nginx instance. - rootUrl = "https://%(domain)s/"; - - extraOptions = { - # Defend against DNS rebinding attacks. - SERVER_ENFORCE_DOMAIN = "true"; - # Same time zone for all users by default - DATE_FORMATS_DEFAULT_TIMEZONE = "UTC"; - }; - auth = { - anonymous.org_role = "Admin"; - anonymous.org_name = "Main Org."; - } // grafanaAuth; + settings = { + + server = { + domain = "${toString domain}"; + http_port = 2342; + http_addr = "127.0.0.1"; - # Give users that come through GSuite SSO the highest possible privileges: - users.autoAssignOrgRole = "Editor"; + # Defend against DNS rebinding attacks. + enforce_domain = true; - # Read the admin password from a file in our secrets folder: - security.adminPasswordFile = cfg.adminPasswordFile; + # Force Grafana to believe it is reachable via https on the default port + # number because that's where the nginx that forwards traffic to it is + # listening. Grafana's own server listens on an internal address that + # doesn't matter to anyone except our nginx instance. + root_url = "https://%(domain)s/"; + }; + + # No phoning home + analytics.reporting_enabled = false; + + # Same time zone for all users by default + date_formats.default_timezone = "UTC"; + + # The auth sections since NixOS 22.11 are named a bit funky with a dot in the name + # + # https://grafana.com/docs/grafana/latest/setup-grafana/configure-security/configure-authentication/grafana/#anonymous-authentication + # https://grafana.com/docs/grafana/latest/setup-grafana/configure-security/configure-authentication/google/ + "auth.anonymous" = lib.mkIf (cfg.googleOAuthClientID == "") { + enabled = true; + org_role = "Admin"; + org_name = "Main Org."; + }; + "auth.google" = lib.mkIf (cfg.googleOAuthClientID != "") { + enabled = true; + # Grafana considers it "sign up" to let in a user it has + # never seen before. + allow_sign_up = true; + client_secret = "$__file{${toString cfg.googleOAuthClientSecretFile}}"; + client_id = cfg.googleOAuthClientID; + }; + + # Give users that come through GSuite SSO the highest possible privileges: + users.auto_assign_org_role = "Editor"; + + # Read the admin password from a file in our secrets folder: + security.admin_password = "$__file{${toString cfg.adminPasswordFile}}"; + }; provision = { enable = true; # See https://grafana.com/docs/grafana/latest/administration/provisioning/#datasources - datasources = [{ + datasources.settings.datasources = [{ name = "Prometheus"; type = "prometheus"; + uid = "LocalPrometheus"; access = "proxy"; url = cfg.prometheusUrl; isDefault = true; } { name = "Loki"; type = "loki"; + uid = "LocalLoki"; access = "proxy"; url = cfg.lokiUrl; }]; # See https://grafana.com/docs/grafana/latest/administration/provisioning/#dashboards - dashboards = [{ + dashboards.settings.providers = [{ name = "provisioned"; options.path = ./grafana-dashboards; }]; @@ -157,12 +183,22 @@ in { # See https://grafana.com/docs/grafana/latest/administration/configuration/#file-provider url = "$__file{${toString cfg.grafanaSlackUrlFile}}"; }; + }]) ++ (lib.optionals (cfg.enableZulipAlert) [{ + # See https://zulip.com/integrations/doc/grafana + uid = "zulip-notifier-1"; + name = "Zulip"; + type = "webhook"; + is_default = true; + send_reminder = false; + settings = { + url = "$__file{${toString cfg.grafanaZulipUrlFile}}"; + }; }]); }; }; # nginx reverse proxy - security.acme.email = cfg.letsEncryptAdminEmail; + security.acme.defaults.email = cfg.letsEncryptAdminEmail; security.acme.acceptTerms = true; services.nginx = { enable = true; @@ -180,7 +216,7 @@ in { enableACME = true; forceSSL = true; locations."/" = { - proxyPass = "http://127.0.0.1:${toString config.services.grafana.port}"; + proxyPass = "http://127.0.0.1:${toString config.services.grafana.settings.server.http_port}"; proxyWebsockets = true; }; locations."/metrics" = { @@ -192,7 +228,7 @@ in { allow ::1; deny all; ''; - proxyPass = "http://127.0.0.1:${toString config.services.grafana.port}"; + proxyPass = "http://127.0.0.1:${toString config.services.grafana.settings.server.http_port}"; }; }; }; diff --git a/nixos/modules/monitoring/server/prometheus.nix b/nixos/modules/monitoring/server/prometheus.nix index 2a78dd3e797c0b28d14fc9e9e0858811ac86ef76..fac29c29ffaae507549f78826edd3e838ccb4e6c 100644 --- a/nixos/modules/monitoring/server/prometheus.nix +++ b/nixos/modules/monitoring/server/prometheus.nix @@ -13,6 +13,7 @@ let regex = "^(.*)(?:\\.monitoringvpn):\\d+$"; target_label = "instance"; }; + logRetention = toString(config.services.private-storage.monitoring.policy.logRetentionSeconds) + "s"; in { options.services.private-storage.monitoring.prometheus = { @@ -44,6 +45,7 @@ in { services.prometheus = { enable = true; # port = 9090; # Option only in recent (20.09?) nixpkgs, 9090 default + retentionTime = logRetention; scrapeConfigs = [ { job_name = "node-exporters"; diff --git a/nixos/modules/private-storage.nix b/nixos/modules/private-storage.nix index c620e2fb00fe3b24fc1f6b3c5defc12cddc30aa1..fd64d76025d3c265ffe6dce3cc8132bdac0c5685 100644 --- a/nixos/modules/private-storage.nix +++ b/nixos/modules/private-storage.nix @@ -67,6 +67,14 @@ in The port number on which to service storage clients. ''; }; + services.private-storage.publicReadOnlyStoragePort = lib.mkOption + { default = 8899; + type = lib.types.int; + example = 8099; + description = '' + The port number on which to service read-only storage clients. + ''; + }; services.private-storage.issuerRootURL = lib.mkOption { default = "https://issuer.${config.networking.domain}/"; type = lib.types.str; @@ -96,70 +104,96 @@ in # Define configuration based on values given for our options - starting with # the option that says whether this is even turned on. config = lib.mkIf cfg.enable - { services.tahoe.nodes."${storage-node-name}" = - { package = cfg.tahoe.package; - # Each attribute in this set corresponds to a section in the tahoe.cfg - # file. Attributes on those sets correspond to individual assignments - # in those sections. - # - # We just populate this according to policy/preference of Private - # Storage. - sections = - { client = if cfg.introducerFURL == null then {} else - { "introducer.furl" = cfg.introducerFURL; + { + # A read-only storage service. This allows read-only access for clients + # that use Great Black Swamp. There is no ZKAP/GBS integration yet so + # this is the most we can do at the moment. + services.tahoe.nodes."ro-${storage-node-name}" = + { package = cfg.tahoe.package; + sections = + { client = if cfg.introducerFURL == null then {} else + { "introducer.furl" = cfg.introducerFURL; + }; + node = + { nickname = "ro-${storage-node-name}"; + "tub.port" = "tcp:${toString cfg.publicReadOnlyStoragePort}"; + "tub.location" = "tcp:${cfg.publicAddress}:${toString cfg.publicReadOnlyStoragePort}"; + }; + storage = + { enabled = true; + storage_dir = "/storage"; + readonly = true; + force_foolscap = false; + }; + }; }; - node = - # XXX Should try to name that is unique across the grid. - { nickname = "${storage-node-name}"; - # We have the web port active because the CLI uses it. We may - # eventually turn this off, or at least have it off by default (with - # an option to turn it on). I don't know how much we'll use the CLI - # on the nodes. Maybe very little? Or maybe it will be part of a - # health check for the node... In any case, we tell it to bind to - # localhost so no one *else* can use it. And the principle of the - # web interface is that merely having access to it doesn't grant - # access to any data. It does grant access to storage capabilities - # but with our plugin configuration you still need ZKAPs to use - # those... - "web.port" = "tcp:3456:interface=127.0.0.1"; - # We have to tell Tahoe-LAFS where to listen for Foolscap - # connections for the storage protocol. We have to tell it twice. - # First, in the syntax which it uses to listen. - "tub.port" = "tcp:${toString cfg.publicStoragePort}"; - # Second, in the syntax it advertises to in the fURL. - "tub.location" = "tcp:${cfg.publicAddress}:${toString cfg.publicStoragePort}"; - }; - storage = - { enabled = true; - # Put the storage where we have a lot of space configured. - storage_dir = "/storage"; - # Turn on our plugin. - plugins = "privatestorageio-zkapauthz-v1"; - }; - "storageserver.plugins.privatestorageio-zkapauthz-v1" = - { "ristretto-issuer-root-url" = cfg.issuerRootURL; - "ristretto-signing-key-path" = cfg.ristrettoSigningKeyPath; - } // ( - if cfg.passValue == null - then {} - else { "pass-value" = (toString cfg.passValue); } - ); - }; - }; + # Tahoe nixos module brings along a single socket for the web api. + # That's for the other storage node though. Turn off the integration + # with this one. + systemd.services."tahoe.ro-storage".unitConfig.Requires = lib.mkForce []; - # Let traffic destined for the storage node's Foolscap server through. - networking.firewall.allowedTCPPorts = [ cfg.publicStoragePort ]; + services.tahoe.nodes."${storage-node-name}" = + { package = cfg.tahoe.package; + # Each attribute in this set corresponds to a section in the + # tahoe.cfg file. Attributes on those sets correspond to individual + # assignments in those sections. + # + # We just populate this according to policy/preference of Private + # Storage. + sections = + { client = if cfg.introducerFURL == null then {} else + { "introducer.furl" = cfg.introducerFURL; + }; + node = + # XXX Should try to name that is unique across the grid. + { nickname = "${storage-node-name}"; - systemd.tmpfiles.rules = - # Add a rule to prevent incident reports from accumulating indefinitely. - # See tmpfiles.d(5) for the syntax. - [ "d ${incidents-dir} 0755 root root ${max-incident-age} -" - ]; + # We have the web port active because the CLI uses it and + # because it exposes a metrics endpoint for our monitoring + # system. The actual port configuration lives in systemd so + # that it can order binding the socket correctly with other + # dependencies (which we can't reliably do with Tahoe + # without a bunch of other work). + "web.port" = "systemd:domain=INET:index=0"; - environment.systemPackages = [ - # Provide a useful tool for reporting about shares. - ourpkgs.leasereport - ]; + # We have to tell Tahoe-LAFS where to listen for Foolscap + # connections for the storage protocol. We have to tell it twice. + # First, in the syntax which it uses to listen. + "tub.port" = "tcp:${toString cfg.publicStoragePort}"; - }; + # Second, in the syntax it advertises to in the fURL. + "tub.location" = "tcp:${cfg.publicAddress}:${toString cfg.publicStoragePort}"; + }; + storage = + { enabled = true; + # Put the storage where we have a lot of space configured. + storage_dir = "/storage"; + # Turn on our plugin. + plugins = "privatestorageio-zkapauthz-v2"; + }; + "storageserver.plugins.privatestorageio-zkapauthz-v2" = + { "ristretto-issuer-root-url" = cfg.issuerRootURL; + "ristretto-signing-key-path" = cfg.ristrettoSigningKeyPath; + } // ( + if cfg.passValue == null + then {} + else { "pass-value" = (toString cfg.passValue); } + ); + }; + }; + + # Let traffic destined for the storage node's Foolscap server through. + networking.firewall.allowedTCPPorts = [ cfg.publicStoragePort cfg.publicReadOnlyStoragePort ]; + + systemd.tmpfiles.rules = + # Add a rule to prevent incident reports from accumulating indefinitely. + # See tmpfiles.d(5) for the syntax. + [ "d ${incidents-dir} 0755 root root ${max-incident-age} -" + ]; + + environment.systemPackages = [ + # Provide a useful tool for reporting about shares. + ourpkgs.leasereport + ]; + }; } diff --git a/nixos/modules/ssh.nix b/nixos/modules/ssh.nix index 8d5d5766ae3b30c4801b6ce200fa58c1460f6ca7..90fd34b002c607965038a574334fb0fc370d146c 100644 --- a/nixos/modules/ssh.nix +++ b/nixos/modules/ssh.nix @@ -25,11 +25,8 @@ services.openssh = { enable = true; - # We don't use SFTP for anything. No reason to expose it. - allowSFTP = false; - # We only allow key-based authentication. - challengeResponseAuthentication = false; + kbdInteractiveAuthentication = false; passwordAuthentication = false; extraConfig = '' diff --git a/nixos/modules/tahoe.nix b/nixos/modules/tahoe.nix index e4a83006e129e80a59ca9f8262acf74760b4fc98..b53435080d104bccfed5e7e3004f7891f14159bf 100644 --- a/nixos/modules/tahoe.nix +++ b/nixos/modules/tahoe.nix @@ -182,6 +182,17 @@ in # Open up the firewall. # networking.firewall.allowedTCPPorts = flip mapAttrsToList cfg.nodes # (node: settings: settings.tub.port); + + # Make systemd open a port for us: + # Systemd uses the socket name to link to the corresponding Service Unit. + systemd.sockets."tahoe.storage" = { + description = "Tahoe Web Server Socket"; + wantedBy = [ "sockets.target" ]; + socketConfig = { + ListenStream = "127.0.0.1:3456"; + }; + }; + systemd.services = flip mapAttrs' cfg.nodes (node: settings: let pidfile = "/run/tahoe.${lib.escapeShellArg node}.pid"; @@ -191,10 +202,18 @@ in eliotLog = "file:${nodedir}/logs/eliot.json,rotate_length=${toString (1024 * 1024 * 32)},max_rotated_files=32"; in nameValuePair "tahoe.${node}" { description = "Tahoe LAFS node ${node}"; + + # We are partially socket activated but only for the web API port. + # For the actual storage service port, we bind ourselves. So make + # sure we actually do start up early in case storage requests come wantedBy = [ "multi-user.target" ]; + path = [ settings.package ]; - restartTriggers = [ - config.environment.etc."tahoe-lafs/${node}.cfg".source ]; + + # We don't know how to re-read our configuration file at runtime + # so restart if it ever changes. + restartTriggers = [ config.environment.etc."tahoe-lafs/${node}.cfg".source ]; + serviceConfig = { Type = "simple"; PIDFile = pidfile; @@ -202,8 +221,12 @@ in # arguments to $(tahoe run). The node directory must come first, # and arguments which alter Twisted's behavior come afterwards. ExecStart = '' - ${settings.package}/bin/tahoe --eliot-destination ${eliotLog} run ${nodedir} -n -l- --pidfile=${pidfile} + ${settings.package}/bin/tahoe --eliot-destination ${eliotLog} run --allow-stdin-close ${nodedir} -n -l- --pidfile=${pidfile} ''; + + # Twisted wants non-blocking sockets: + NonBlocking = true; + # The rlimit on number of open files controls how many # connections a particular storage server can accept (factoring # in the number of non-connection files the server needs open - @@ -240,6 +263,14 @@ in # now. So it makes sense to have the limit be 2^15 right now. LimitNOFILE = 32768; }; + + unitConfig = { + # Our config doesn't know how to bind all of its sockets on its + # own so don't start without the systemd units that *do* know + # how to bind them. + Requires = [ "tahoe.${node}.socket" ]; + }; + preStart = let created = "${nodedir}.created"; diff --git a/nixos/modules/update-deployment b/nixos/modules/update-deployment index a0d233a63595a9f838f48243b14ef98fa79a240d..cd41a2363c699c7e551a33d73f1eb83996c1ca85 100755 --- a/nixos/modules/update-deployment +++ b/nixos/modules/update-deployment @@ -79,7 +79,11 @@ ssh -o StrictHostKeyChecking=no "$(hostname).$(domainname)" ":" # # So instead, import our nixpkgs which forces it to be instantiated in the # store, then ask for its path, then set NIX_PATH to that. -export NIX_PATH="nixpkgs=$(nix eval "(import ${CHECKOUT}/nixpkgs.nix { }).path")" + +# Two lines since 'export' masks 'set -e' +# See https://mywiki.wooledge.org/BashFAQ/105#line-204 +NIX_PATH="nixpkgs=$(nix --extra-experimental-features nix-command eval --impure --expr "(import ${CHECKOUT}/nixpkgs.nix { }).path")" +export NIX_PATH # Attempt to update just this host. Choose the morph grid definition matching # the grid we belong to and limit the morph deployment update to the host diff --git a/nixos/pkgs/privatestorage/default.nix b/nixos/pkgs/privatestorage/default.nix index 3bbbd3dbcf0b974e6e1997e20773cddbd9ea59c0..f2c7ddea1a2eb35259ca5dbbff3ccd86bdac3a04 100644 --- a/nixos/pkgs/privatestorage/default.nix +++ b/nixos/pkgs/privatestorage/default.nix @@ -2,7 +2,9 @@ let repo-data = lib.importJSON ./repo.json; repo = fetchFromGitHub (builtins.removeAttrs repo-data [ "branch" ]); - privatestorage = callPackage repo { python = "python39"; }; + zk = import repo; + # XXX package version choice here + zkapauthorizer = zk.outputs.packages.x86_64-linux.zkapauthorizer-python39-tahoe_dev; + python = zkapauthorizer.passthru.python; in - privatestorage.privatestorage - + python.withPackages (ps: [ zkapauthorizer ] ) diff --git a/nixos/pkgs/privatestorage/repo.json b/nixos/pkgs/privatestorage/repo.json index 4113e150a72c907dce4ee0d7345361eadb248041..a0aab4861c244c1b6663453c15a3e6a31f99ba0d 100644 --- a/nixos/pkgs/privatestorage/repo.json +++ b/nixos/pkgs/privatestorage/repo.json @@ -2,7 +2,7 @@ "owner": "PrivateStorageio", "branch": "main", "repo": "ZKAPAuthorizer", - "rev": "744a063ab76a677b259aa9022711113ffbab2545", + "rev": "fb89e91a6c7f595cd0b1c7aa7055cbd32c482180", "outputHashAlgo": "sha512", - "outputHash": "293j4469iy69d2hz3gwxwyj0flqb1cncl938s5w5jmfgbvkm1w0yfg1y06nx89zis1rvwqpcly3vxp94pz1dx28d74wiianqks11p54" + "outputHash": "3f44znykq8f7mcgdwdyhgf2dvnx7yydmlrjcr17mxfwya4jqmx8zb59mxkxvar0ahn639y2nq3bcqxdyipljfxilfi1cz21li908kkw" } \ No newline at end of file diff --git a/nixos/pkgs/zkapissuer/repo.json b/nixos/pkgs/zkapissuer/repo.json index 2963aafabef1503608195f7903fe202f11dd70a8..bdfc12089d9559492340898369770062d2cb3137 100644 --- a/nixos/pkgs/zkapissuer/repo.json +++ b/nixos/pkgs/zkapissuer/repo.json @@ -1,8 +1,8 @@ { "owner": "PrivateStorageio", "repo": "PaymentServer", - "rev": "b1bd39ffb7269e5334bf068263733472b27b38ef", + "rev": "a82ccda4ea604ef983a8bb73616b48d9a6ea16ac", "branch": "main", "outputHashAlgo": "sha512", - "outputHash": "1jncsdyz83xphairk4rx47wrlkdn7s48dk8l3mnimal4h4x7zb34lyx2anm1nhxm7s52jv28rv4mvggc2dhsa21dsqhga3qggcpfi2m" + "outputHash": "07iwsnfn1avkyl05592vx3n51dlwdd0asym0qb9857bh2xbgdmzl5pdx70dwrgvdad19fjnphqsga7x9b649q0l9xq13rln6j9aljl0" } \ No newline at end of file diff --git a/nixos/system-tests.nix b/nixos/system-tests.nix index 819b5c738eca08b95d3c85b14088a2bf6c000dbf..eafd712cc141bd3ba7c9f6f824d06cb950745ec0 100644 --- a/nixos/system-tests.nix +++ b/nixos/system-tests.nix @@ -6,6 +6,13 @@ let pkgs' = pkgs.extend (self: super: { ourpkgs = self.callPackage ./pkgs {}; }); in { private-storage = pkgs'.nixosTest ./tests/private-storage.nix; - spending = pkgs'.nixosTest ./tests/spending.nix; + + # The spending service is not deployed so it doesn't seem *necessary* to run + # its test suite here. The packaging still uses mach-nix which is + # incompatible with NixOS 22.11 so we can't actually load the ZKAP spending + # service derivation anymore. So ... disable the test suite. + # + # spending = pkgs'.nixosTest ./tests/spending.nix; + tahoe = pkgs'.nixosTest ./tests/tahoe.nix; } diff --git a/nixos/tests/exercise-storage.py b/nixos/tests/exercise-storage.py index e3a1d4d2ec7674042487cc0c6dabc670fcd6561d..a4e177b5aa9db7372a41214d2ab4afeef1d23c13 100755 --- a/nixos/tests/exercise-storage.py +++ b/nixos/tests/exercise-storage.py @@ -47,7 +47,12 @@ def block_until_connected(api_root): in servers if server["connection_status"].startswith("Connected to ") ) - if len(connected) >= 1: + # There is a read-only server and a read-write server! The easiest + # way to be sure we've connected to the read-write server is to wait + # until we're connected to both. Also, if we manage to connect to two + # servers this gives us some confidence that both the read-only and + # read-write servers are running. + if len(connected) >= 2: print( "Connected to a server:\n" "\t{nodeid}\n" @@ -85,10 +90,13 @@ def get_api_root(path): return hyperlink.URL.from_text(f.read().strip()) def tahoe_put(api_root, data, **kwargs): + uri = api_root.child(u"uri").to_uri() response = requests.put( - api_root.child(u"uri").to_uri(), + uri, BytesIO(data), + headers={"accept": "text/plain"}, ) + print(f"PUT {uri} responded:\n{response.text}\n") response.raise_for_status() return response.text diff --git a/nixos/tests/get-passes.py b/nixos/tests/get-passes.py index 63b59e5700e0fa0c659f04df8fa889a0501f125f..6f9263345521fa9e3977015231a06060f83bd912 100755 --- a/nixos/tests/get-passes.py +++ b/nixos/tests/get-passes.py @@ -12,30 +12,38 @@ from json import dumps from time import sleep def main(): - if len(argv) != 5: + if len(argv) == 4: + # If no issuer is given then we just won't make the charge request. + # This is useful for following the webhook-based workflow. + clientAPIRoot, clientAPITokenPath, voucher = argv[1:] + issuerAPIRoot = None + elif len(argv) == 5: + clientAPIRoot, clientAPITokenPath, issuerAPIRoot, voucher = argv[1:] + else: raise SystemExit( - "usage: %s <client api root> <client api token path> <issuer api root> <voucher>", + "usage: %s <client api root> <client api token path> [<issuer api root>] <voucher>", ) - clientAPIRoot, clientAPITokenPath, issuerAPIRoot, voucher = argv[1:] + if not clientAPIRoot.endswith("/"): clientAPIRoot += "/" - if not issuerAPIRoot.endswith("/"): + if issuerAPIRoot is not None and not issuerAPIRoot.endswith("/"): issuerAPIRoot += "/" - zkapauthz = clientAPIRoot + "storage-plugins/privatestorageio-zkapauthz-v1" + zkapauthz = clientAPIRoot + "storage-plugins/privatestorageio-zkapauthz-v2" with open(clientAPITokenPath) as p: clientAPIToken = p.read().strip() - # Submit a charge to the issuer (which is also the PaymentServer). - charge_response = post( - issuerAPIRoot + "v1/stripe/charge", - dumps(charge_json(voucher)), - headers={ - "content-type": "application/json", - }, - ) - charge_response.raise_for_status() + if issuerAPIRoot is not None: + # Submit a charge to the issuer (which is also the PaymentServer). + charge_response = post( + issuerAPIRoot + "v1/stripe/charge", + dumps(charge_json(voucher)), + headers={ + "content-type": "application/json", + }, + ) + charge_response.raise_for_status() # Tell the client to redeem the voucher. response = put( diff --git a/nixos/tests/private-storage.nix b/nixos/tests/private-storage.nix index e02f9c49550d7fa4994c3d6099eda1e9374a278c..d7a5aaafe9908b0f3a36c4cdfb041e371eef6bfe 100644 --- a/nixos/tests/private-storage.nix +++ b/nixos/tests/private-storage.nix @@ -1,4 +1,4 @@ -{ pkgs }: +{ pkgs, ... }: let ourpkgs = pkgs.callPackage ../pkgs { }; @@ -40,6 +40,9 @@ let in pkgs.writeText basename key; + stripeWebhookSecretKey = "whsec_e302402f2f4b5d8241fe494cd693464345bf28c4d7312516d6c1ce69cd0c1e1d"; + stripeWebhookSecretKeyPath = pkgs.writeText "stripe-webhook.secret" stripeWebhookSecretKey; + # Here are the preconstructed secrets which we can assign to the introducer. # This is a lot easier than having the introducer generate them and then # discovering and configuring the other nodes with them. @@ -63,6 +66,8 @@ let networking.dhcpcd.enable = false; }; in { + name = "private-storage"; + # https://nixos.org/nixos/manual/index.html#sec-nixos-tests # https://nixos.mayflower.consulting/blog/2019/07/11/leveraging-nixos-tests-in-your-project/ nodes = rec { @@ -122,7 +127,7 @@ in { letsEncryptAdminEmail = "user@example.invalid"; allowedChargeOrigins = [ "http://unused.invalid" ]; - inherit stripeSecretKeyPath; + inherit stripeSecretKeyPath stripeWebhookSecretKeyPath; stripeEndpointDomain = "api_stripe_com"; stripeEndpointScheme = "HTTP"; stripeEndpointPort = 80; @@ -159,7 +164,7 @@ in { testScript = ourpkgs.lib.testing.makeTestScript { testpath = ./test_privatestorage.py; kwargs = { - inherit sshPrivateKeyFile pemFile introducerPort introducerFURL issuerURL ristrettoPublicKey voucher tokenCount; + inherit sshPrivateKeyFile pemFile introducerPort introducerFURL issuerURL ristrettoPublicKey voucher tokenCount stripeWebhookSecretKey; # Supply some helper programs to help the tests stay a bit higher level. run_introducer = ./run-introducer.py; run_client = ./run-client.py; diff --git a/nixos/tests/run-client.py b/nixos/tests/run-client.py index 8d3d82720ec94b4b91e6af8791aadc58cd7ce2ad..403e47977ca675358dcdab6d0296ba006903b78c 100755 --- a/nixos/tests/run-client.py +++ b/nixos/tests/run-client.py @@ -29,12 +29,12 @@ def main(): with open("/tmp/client/tahoe.cfg") as cfg: config.read_file(cfg) - config.set(u"client", u"storage.plugins", u"privatestorageio-zkapauthz-v1") - config.add_section(u"storageclient.plugins.privatestorageio-zkapauthz-v1") - config.set(u"storageclient.plugins.privatestorageio-zkapauthz-v1", u"redeemer", u"ristretto") - config.set(u"storageclient.plugins.privatestorageio-zkapauthz-v1", u"ristretto-issuer-root-url", issuerURL) - config.set(u"storageclient.plugins.privatestorageio-zkapauthz-v1", u"allowed-public-keys", publicKey) - config.set(u"storageclient.plugins.privatestorageio-zkapauthz-v1", u"default-token-count", tokenCount) + config.set(u"client", u"storage.plugins", u"privatestorageio-zkapauthz-v2") + config.add_section(u"storageclient.plugins.privatestorageio-zkapauthz-v2") + config.set(u"storageclient.plugins.privatestorageio-zkapauthz-v2", u"redeemer", u"ristretto") + config.set(u"storageclient.plugins.privatestorageio-zkapauthz-v2", u"ristretto-issuer-root-url", issuerURL) + config.set(u"storageclient.plugins.privatestorageio-zkapauthz-v2", u"allowed-public-keys", publicKey) + config.set(u"storageclient.plugins.privatestorageio-zkapauthz-v2", u"default-token-count", tokenCount) with open("/tmp/client/tahoe.cfg", "wt") as cfg: config.write(cfg) @@ -43,7 +43,7 @@ def main(): "daemonize", "-o", "/tmp/stdout", "-e", "/tmp/stderr", - which("tahoe"), "run", "/tmp/client", + which("tahoe"), "run", "--allow-stdin-close", "/tmp/client", ]) def run(argv): diff --git a/nixos/tests/run-introducer.py b/nixos/tests/run-introducer.py index 33c3ec10369477e39c1461b3e59149e015f03ce9..9062c43243f3a5a672ae41d53ace636d7698843a 100755 --- a/nixos/tests/run-introducer.py +++ b/nixos/tests/run-introducer.py @@ -31,11 +31,11 @@ def main(): "daemonize", "-o", "/tmp/stdout", "-e", "/tmp/stderr", - which("tahoe"), "run", "/tmp/introducer", + which("tahoe"), "run", "--allow-stdin-close", "/tmp/introducer", ]) retry( - "waiting for open introducer port", + f"connect to introducer (port {introducerPort})", lambda: checkOpen(int(introducerPort)), ) diff --git a/nixos/tests/tahoe.nix b/nixos/tests/tahoe.nix index a007e65efd2d6bee8ab4adba9df3cb2901f53526..5b3c5c3827d11798f3656a4912de7d648883e624 100644 --- a/nixos/tests/tahoe.nix +++ b/nixos/tests/tahoe.nix @@ -1,8 +1,9 @@ -{ pkgs }: +{ pkgs, ... }: let ourpkgs = pkgs.callPackage ../pkgs { }; in { + name = "tahoe"; nodes = { storage = { config, pkgs, ourpkgs, ... }: { imports = [ diff --git a/nixos/tests/test_privatestorage.py b/nixos/tests/test_privatestorage.py index df2aeb9372042d9368736579090e70ab90097f43..724b99c3667074bc97af13ebe5de71dd58405f24 100644 --- a/nixos/tests/test_privatestorage.py +++ b/nixos/tests/test_privatestorage.py @@ -1,4 +1,8 @@ -def runOnNode(node, argv): +import hmac +from shlex import quote +from time import time + +def runOnNode(node, argvs): """ Run a shell command on one of the nodes. The first argument is the name of the node. The second is a list of the argv to run. @@ -6,12 +10,13 @@ def runOnNode(node, argv): The program's output is piped to systemd-cat and the python fragment evaluates to success if the command exits with a success status. """ - try: - node.succeed('set -eo pipefail; {} | systemd-cat'.format(" ".join(argv))) - except Exception as e: - code, output = node.execute('cat /tmp/stdout /tmp/stderr') - introducer.log(output) - raise + for argv in argvs: + try: + node.succeed('set -eo pipefail; {} | systemd-cat'.format(" ".join(map(quote, argv)))) + except Exception as e: + code, output = node.execute('cat /tmp/stdout /tmp/stderr') + node.log(output) + raise def ssh(username, sshPrivateKeyFile, hostname): """ @@ -19,10 +24,144 @@ def ssh(username, sshPrivateKeyFile, hostname): host. """ return [ - "cp", sshPrivateKeyFile, "/tmp/ssh_key", ";", - "chmod", "0400", "/tmp/ssh_key", ";", - "ssh", "-oStrictHostKeyChecking=no", "-i", "/tmp/ssh_key", - "{username}@{hostname}".format(username=username, hostname=hostname), ":", + ["cp", sshPrivateKeyFile, "/tmp/ssh_key"], + ["chmod", "0400", "/tmp/ssh_key"], + ["ssh", "-oStrictHostKeyChecking=no", "-i", "/tmp/ssh_key", + "{username}@{hostname}".format(username=username, hostname=hostname), ":"], + ] + +def checkout_session_completed(voucher: str) -> str: + """ + Return a request body string which represents the payment completed event + for the given voucher. + """ + return """\ +{ + "id": "evt_1LxcsdBHXBAMm9bPSq6UWAZe", + "object": "event", + "api_version": "2019-11-05", + "created": 1666903247, + "data": { + "object": { + "id": "cs_test_a1kWLWGoXZPa6ywyVnuib8DPA3BqXCWZX5UEjLfKh7gLjdZy2LD3F5mEp3", + "object": "checkout.session", + "after_expiration": null, + "allow_promotion_codes": null, + "amount_subtotal": 3000, + "amount_total": 3000, + "automatic_tax": { + "enabled": false, + "status": null + }, + "billing_address_collection": null, + "cancel_url": "https://httpbin.org/post", + "client_reference_id": "%(voucher)s", + "consent": null, + "consent_collection": null, + "created": 1666903243, + "currency": "usd", + "customer": "cus_Mh0u62xtelUehD", + "customer_creation": "always", + "customer_details": { + "address": { + "city": null, + "country": null, + "line1": null, + "line2": null, + "postal_code": null, + "state": null + }, + "email": "stripe@example.com", + "name": null, + "phone": null, + "tax_exempt": "none", + "tax_ids": [ + + ] + }, + "customer_email": null, + "display_items": [ + { + "amount": 1500, + "currency": "usd", + "custom": { + "description": "comfortable cotton t-shirt", + "images": null, + "name": "t-shirt" + }, + "quantity": 2, + "type": "custom" + } + ], + "expires_at": 1666989643, + "livemode": false, + "locale": null, + "metadata": { + }, + "mode": "payment", + "payment_intent": "pi_3LxcsZBHXBAMm9bP1daBGoPV", + "payment_link": null, + "payment_method_collection": "always", + "payment_method_options": { + }, + "payment_method_types": [ + "card" + ], + "payment_status": "paid", + "phone_number_collection": { + "enabled": false + }, + "recovered_from": null, + "setup_intent": null, + "shipping": null, + "shipping_address_collection": null, + "shipping_options": [ + + ], + "shipping_rate": null, + "status": "complete", + "submit_type": null, + "subscription": null, + "success_url": "https://httpbin.org/post", + "total_details": { + "amount_discount": 0, + "amount_shipping": 0, + "amount_tax": 0 + }, + "url": null + } + }, + "livemode": false, + "pending_webhooks": 2, + "request": { + "id": null, + "idempotency_key": null + }, + "type": "checkout.session.completed" +} +""" % dict(voucher=voucher) + +def stripe_signature(key: str, body: str) -> str: + """ + Construct a valid value for the ``Stripe-Signature`` header item. + """ + timestamp = int(time()) + v1 = hmac.new(key.encode("utf-8"), f"{timestamp}.{body}".encode("utf-8"), "sha256").hexdigest() + return f"t={timestamp},v1={v1}" + +def pay_for_voucher(url: str, webhook_secret, voucher: str) -> list[str]: + """ + Return a command to run to report to the issuer that payment for the given + voucher has been received. + """ + body = checkout_session_completed(voucher) + return [ + "curl", + "-X", "POST", + "--header", "content-type: application/json; charset=utf-8", + "--header", f"stripe-signature: {stripe_signature(webhook_secret, body)}", + "--data-binary", body, + url + "v1/stripe/webhook", ] def test( @@ -36,6 +175,7 @@ def test( introducerFURL, issuerURL, ristrettoPublicKey, + stripeWebhookSecretKey, voucher, tokenCount, ): @@ -69,8 +209,7 @@ def test( # Set up a Tahoe-LAFS introducer. introducer.copy_from_host(pemFile, '/tmp/node.pem') - - runOnNode(introducer, [run_introducer, "/tmp/node.pem", str(introducerPort), introducerFURL]) + runOnNode(introducer, [[run_introducer, "/tmp/node.pem", str(introducerPort), introducerFURL]]) # # Get a Tahoe-LAFS storage server up. @@ -96,10 +235,13 @@ def test( # Make sure the issuer is ready to accept connections. issuer.wait_for_open_port(80) + # Pretend to be Stripe and report that our voucher has been paid for. + runOnNode(issuer, [pay_for_voucher("http://localhost/", stripeWebhookSecretKey, voucher)]) + # # Storage appears to be working so try to get a client to speak with it. # - runOnNode(client, [run_client, "/tmp/client", introducerFURL, issuerURL, ristrettoPublicKey, str(tokenCount)]) + runOnNode(client, [[run_client, "/tmp/client", introducerFURL, issuerURL, ristrettoPublicKey, str(tokenCount)]]) client.wait_for_open_port(3456) # Make sure the fake Stripe API server is ready for requests. @@ -112,13 +254,12 @@ def test( # Get some ZKAPs from the issuer. try: - runOnNode(client, [ + runOnNode(client, [[ get_passes, "http://127.0.0.1:3456", "/tmp/client/private/api_auth_token", - issuerURL, voucher, - ]) + ]]) except: # Dump the fake Stripe API server logs, too, since the error may arise # from a PaymentServer/Stripe interaction. @@ -129,19 +270,19 @@ def test( raise # The client should be prepped now. Make it try to use some storage. - runOnNode(client, [exercise_storage, "/tmp/client"]) + runOnNode(client, [[exercise_storage, "/tmp/client"]]) # It should be possible to restart the storage service without the # storage node fURL changing. - furlfile = '/var/db/tahoe-lafs/storage/private/storage-plugin.privatestorageio-zkapauthz-v1.furl' + furlfile = '/var/db/tahoe-lafs/storage/private/storage-plugin.privatestorageio-zkapauthz-v2.furl' before = storage.execute('cat ' + furlfile) - runOnNode(storage, ["systemctl", "restart", "tahoe.storage"]) + runOnNode(storage, [["systemctl", "restart", "tahoe.storage"]]) after = storage.execute('cat ' + furlfile) if (before != after): raise Exception('fURL changes after storage node restart') # The client should actually still work, too. - runOnNode(client, [exercise_storage, "/tmp/client"]) + runOnNode(client, [[exercise_storage, "/tmp/client"]]) # The issuer metrics should be accessible from the monitoring network. issuer.execute('ifconfig lo:fauxvpn 172.23.23.2/24') diff --git a/nixpkgs-2105.nix b/nixpkgs-2105.nix deleted file mode 100644 index e33347a21c29186826256e60bea0122fc85322bd..0000000000000000000000000000000000000000 --- a/nixpkgs-2105.nix +++ /dev/null @@ -1,6 +0,0 @@ -# This actually imports nixos-21.11 but we need to keep this file around so that -# upgrades work, as the on-node deployment script expects this file in the checkout. -# See https://whetstone.private.storage/privatestorage/PrivateStorageio/-/merge_requests/222#note_18600 -# This file can be removed once all nodes have been updated to point to the new file. - -import ./nixpkgs.nix diff --git a/nixpkgs.json b/nixpkgs.json index 87445ee44c04baa66c0973d665194fa494481973..e6f18dacda7288ab93aaf785cb0e59cdf8ac9973 100644 --- a/nixpkgs.json +++ b/nixpkgs.json @@ -1,5 +1,5 @@ { "name": "source", - "url": "https://releases.nixos.org/nixos/21.11/nixos-21.11.337975.eabc3821918/nixexprs.tar.xz", - "sha256": "1fq3zz7qfavksdbqvicns7hg61q3hhbxs2ibm818gy629hwkvsvm" + "url": "https://releases.nixos.org/nixos/22.11/nixos-22.11.4773.ea4c80b39be4/nixexprs.tar.xz", + "sha256": "1j7h75a9hwkkm97jicky5rhvzkdwxsv5v46473rl6agvq2sj97y1" } \ No newline at end of file diff --git a/shell.nix b/shell.nix index b8be3a3a6088a987468329ad29919a6957313c6a..d23d71f2a08f548f9fd95c6a95490f2de7f8b60a 100644 --- a/shell.nix +++ b/shell.nix @@ -22,6 +22,8 @@ pkgs.mkShell { inputsFrom = [tools]; buildInputs = [ tools + pkgs.cacert + pkgs.nix pkgs.morph pkgs.jp ]; diff --git a/tools/update-nixpkgs b/tools/update-nixpkgs index 12752892fab880582857d43c5cc4632a41c96d0f..58dedd9c0894b074f41bfa7b2a1b7c3ec1812616 100755 --- a/tools/update-nixpkgs +++ b/tools/update-nixpkgs @@ -10,7 +10,7 @@ from ps_tools import get_url_hash # We pass this to builtins.fetchTarball which only supports sha256 HASH_TYPE = "sha256" -DEFAULT_CHANNEL = "nixos-21.11" +DEFAULT_CHANNEL = "nixos-22.11" CHANNEL_URL_TEMPLATE = "https://channels.nixos.org/{channel}/nixexprs.tar.xz" @@ -24,9 +24,8 @@ def get_nixos_channel_url(*, channel): the release. """ response = httpx.head( - CHANNEL_URL_TEMPLATE.format(channel=channel), allow_redirects=False + CHANNEL_URL_TEMPLATE.format(channel=channel), follow_redirects=False ) - response.raise_for_status() assert response.is_redirect return str(response.next_request.url)