Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • privatestorage/PrivateStorageio
  • tomprince/PrivateStorageio
2 results
Show changes
Showing
with 31205 additions and 274 deletions
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "Watching the watchers",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"links": [],
"liveNow": false,
"panels": [
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
0
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "count"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "10m",
"frequency": "1m",
"handler": 1,
"name": "Scraping down",
"noDataState": "ok",
"notifications": []
},
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "Is Prometheus having problems scraping our instances? Should be zero.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "bars",
"fillOpacity": 100,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "line"
}
},
"decimals": 0,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "transparent",
"value": null
},
{
"color": "red",
"value": 0
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 6,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "10.4.6",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "count by (job, instance) (up == 0)",
"hide": false,
"interval": "",
"legendFormat": "{{job}}/{{instance}}",
"refId": "A"
}
],
"title": "Scraping failures",
"type": "timeseries"
},
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
600
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "5m",
"frequency": "1m",
"handler": 1,
"message": "A metrics text file is older than 10 minutes.",
"name": "Textcollector staleness alert",
"noDataState": "no_data",
"notifications": []
},
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "Node-Exporter's TextCollector reads in plain text files containing metrics every few minutes. Make sure we're not reporting stale text files as new data - Alert if any of the text files is not getting updated.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 8,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "time() - node_textfile_mtime_seconds",
"interval": "",
"legendFormat": "{{instance}}/{{file}}",
"refId": "A"
}
],
"thresholds": [
{
"colorMode": "critical",
"op": "gt",
"value": 600,
"visible": true
}
],
"title": "Textfile collector freshness",
"type": "timeseries"
}
],
"refresh": false,
"schemaVersion": 39,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-30m",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Meta monitoring",
"uid": "MetaMonitoring",
"version": 1,
"weekStart": ""
}
Source diff could not be displayed: it is too large. Options to address this: view the blob.
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "PaymentServer and related metrics",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 2,
"links": [],
"liveNow": false,
"panels": [
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 24,
"panels": [],
"title": "Payments",
"type": "row"
},
{
"description": "Calls to our Stripe Webhook and the (obsolete) Charge endpoint.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Attempts"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "yellow",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Successes"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "green",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 1
},
"id": 22,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.3.4",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "sum (http_responses_total{path=\"v1/stripe/webhook\"})",
"hide": false,
"interval": "",
"legendFormat": "Webhook attempts",
"refId": "Webhook attempts"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "http_responses_total{path=\"v1/stripe/webhook\", status=\"2XX\"}",
"hide": false,
"interval": "",
"legendFormat": "Webhook successes",
"refId": "Webhook successes"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "payment_processors_stripe_charge_attempts",
"hide": false,
"interval": "",
"legendFormat": "Charge attempts",
"refId": "Charge attempts"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "payment_processors_stripe_charge_successes",
"hide": false,
"interval": "",
"legendFormat": "Charge successes",
"refId": "Charge successes"
}
],
"title": "Stripe Webhook and (obsolete) Charge API",
"type": "timeseries"
},
{
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byType",
"options": "time"
},
"properties": [
{
"id": "custom.axisPlacement",
"value": "hidden"
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 1
},
"id": 20,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.4.7",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "payment_redemption_signatures_issued",
"interval": "",
"legendFormat": "Issued signatures",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "payment_redemption_vouchers_redeemed",
"format": "time_series",
"hide": false,
"interval": "",
"legendFormat": "Redeemed vouchers",
"refId": "B"
}
],
"title": "Redemption",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 8
},
"id": 32,
"panels": [],
"title": "HTTP v1/stripe/webhook",
"type": "row"
},
{
"description": "HTTPS responses per second",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 8,
"x": 0,
"y": 9
},
"id": 33,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.3.4",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(http_responses_total{path=\"v1/stripe/webhook\"}[5m])",
"instant": false,
"interval": "",
"legendFormat": "{{status}}",
"refId": "A"
}
],
"title": "Requests per second",
"type": "timeseries"
},
{
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 8,
"x": 8,
"y": 9
},
"id": 34,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.3.4",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "sum(http_responses_total{path=\"v1/stripe/webhook\", status=\"4XX\"}) / sum(http_responses_total{path=\"v1/stripe/webhook\"})",
"interval": "",
"legendFormat": "Client error (4XX) rate",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "sum(http_responses_total{path=\"v1/stripe/charge\", status=\"5XX\"}) / sum(http_responses_total{path=\"v1/stripe/webhook\"})",
"interval": "",
"legendFormat": "Server error (5XX) rate",
"refId": "B"
}
],
"title": "Error rate",
"type": "timeseries"
},
{
"description": "Request durations, stacked.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "=< 0.1s"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "blue",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "=< 1s"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "green",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "=< 5s"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "yellow",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "> 5s"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "orange",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"y": 9
},
"id": 13,
"links": [],
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.3.4",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/webhook\", le=\"0.1\"}",
"format": "time_series",
"hide": false,
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "=< 0.1s",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/webhook\", le=\"1.0\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/stripe/webhook\", le=\"0.1\"}",
"format": "time_series",
"hide": false,
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "=< 1s",
"refId": "D"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/webhook\", le=\"5.0\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/stripe/webhook\", le=\"1.0\"}",
"format": "time_series",
"hide": false,
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "=< 5s",
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/webhook\", le=\"+Inf\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/stripe/webhook\", le=\"5.0\"}",
"format": "time_series",
"hide": false,
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "> 5s",
"refId": "C"
}
],
"title": "Durations",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 16
},
"id": 18,
"panels": [],
"title": "HTTP v1/stripe/charge",
"type": "row"
},
{
"description": "HTTPS responses per second",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 8,
"x": 0,
"y": 17
},
"id": 4,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.3.4",
"targets": [
{
"exemplar": true,
"expr": "rate(http_responses_total{path=\"v1/stripe/charge\", instance=\"payments\"}[5m])",
"instant": false,
"interval": "",
"legendFormat": "{{status}}",
"refId": "A"
}
],
"title": "Requests per second",
"type": "timeseries"
},
{
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 8,
"x": 8,
"y": 17
},
"id": 15,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.3.4",
"targets": [
{
"exemplar": true,
"expr": "sum(http_responses_total{path=\"v1/stripe/charge\", status=\"4XX\"}) / sum(http_responses_total{path=\"v1/stripe/charge\"})",
"interval": "",
"legendFormat": "Client error (4XX) rate",
"refId": "A"
},
{
"exemplar": true,
"expr": "sum(http_responses_total{path=\"v1/stripe/charge\", status=\"5XX\"}) / sum(http_responses_total{path=\"v1/stripe/charge\"})",
"interval": "",
"legendFormat": "Server error (5XX) rate",
"refId": "B"
}
],
"title": "Error rate",
"type": "timeseries"
},
{
"description": "Request durations, stacked",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "=< 0.1s"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "blue",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "=< 1s"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "green",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "=< 5s"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "yellow",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "> 5s"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "orange",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"y": 17
},
"id": 12,
"links": [],
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.3.4",
"targets": [
{
"exemplar": true,
"expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"0.1\"}",
"format": "time_series",
"hide": false,
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "=< 0.1s",
"refId": "A"
},
{
"exemplar": true,
"expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"1.0\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"0.1\"}",
"format": "time_series",
"hide": false,
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "=< 1s",
"refId": "D"
},
{
"exemplar": true,
"expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"5.0\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"1.0\"}",
"format": "time_series",
"hide": false,
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "=< 5s",
"refId": "B"
},
{
"exemplar": true,
"expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"+Inf\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"5.0\"}",
"format": "time_series",
"hide": false,
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "> 5s",
"refId": "C"
}
],
"title": "Durations",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 24
},
"id": 11,
"panels": [],
"title": "HTTP v1/redeem",
"type": "row"
},
{
"description": "HTTPS responses per second",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 8,
"x": 0,
"y": 25
},
"id": 2,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.3.4",
"targets": [
{
"expr": "rate(http_responses_total{path=\"v1/redeem\"}[5m])",
"instant": false,
"intervalFactor": 1,
"legendFormat": "{{status}}",
"refId": "A"
}
],
"title": "Requests per second",
"type": "timeseries"
},
{
"description": "HTTP 4xx and 5xx errors",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 8,
"x": 8,
"y": 25
},
"id": 16,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.3.4",
"targets": [
{
"expr": "sum(http_responses_total{path=\"v1/redeem\", status=\"4XX\"}) / sum(http_responses_total{path=\"v1/redeem\"})",
"legendFormat": "Client error (4XX) rate",
"refId": "A"
},
{
"expr": "sum(http_responses_total{path=\"v1/redeem\", status=\"5XX\"}) / sum(http_responses_total{path=\"v1/redeem\"})",
"legendFormat": "Server error (5XX) rate",
"refId": "B"
}
],
"title": "Error rate",
"type": "timeseries"
},
{
"description": "Request durations, stacked",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "=< 0.1s"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "blue",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "=< 1s"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "green",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "=< 5s"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "yellow",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "> 5s"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "orange",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"y": 25
},
"id": 35,
"links": [],
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.3.4",
"targets": [
{
"exemplar": true,
"expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"0.1\"}",
"format": "time_series",
"hide": false,
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "=< 0.1s",
"refId": "A"
},
{
"exemplar": true,
"expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"1.0\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"0.1\"}",
"format": "time_series",
"hide": false,
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "=< 1s",
"refId": "D"
},
{
"exemplar": true,
"expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"5.0\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"1.0\"}",
"format": "time_series",
"hide": false,
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "=< 5s",
"refId": "B"
},
{
"exemplar": true,
"expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"+Inf\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"5.0\"}",
"format": "time_series",
"hide": false,
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "> 5s",
"refId": "C"
}
],
"title": "Durations",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 32
},
"id": 26,
"panels": [],
"title": "Logs",
"type": "row"
},
{
"datasource": {
"type": "loki",
"uid": "LocalLoki"
},
"description": "Exercise in counting maybe interesting lines. This can be alerted on.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 33
},
"id": 28,
"options": {
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "loki",
"uid": "LocalLoki"
},
"expr": "count_over_time({host=\"payments\",unit=\"zkapissuer.service\"} !~ \"200|/metrics|Accept\" [5m])",
"refId": "A"
}
],
"thresholds": [],
"title": "Number of maybe interesting log lines",
"type": "timeseries"
},
{
"datasource": {
"type": "loki",
"uid": "LocalLoki"
},
"description": "Exercise in filtering the payment server logs.",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 33
},
"id": 30,
"options": {
"dedupStrategy": "none",
"enableLogDetails": true,
"prettifyLogMessage": false,
"showCommonLabels": false,
"showLabels": false,
"showTime": true,
"sortOrder": "Descending",
"wrapLogMessage": false
},
"targets": [
{
"datasource": {
"type": "loki",
"uid": "LocalLoki"
},
"expr": "{host=\"payments\",unit=\"zkapissuer.service\"} !~ \"200|/metrics|Accept\"",
"refId": "A"
}
],
"title": "Maybe interesting lines",
"type": "logs"
}
],
"refresh": "5m",
"schemaVersion": 35,
"style": "dark",
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-7d",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
]
},
"timezone": "",
"title": "Payments",
"uid": "Payments",
"version": 1,
"weekStart": ""
}
......@@ -8,19 +8,26 @@
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "USE: Usage, Saturation and Error rate for our resources",
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": 125,
"links": [],
"liveNow": false,
"panels": [
{
"collapsed": false,
"datasource": null,
"gridPos": {
"h": 1,
"w": 24,
......@@ -33,102 +40,91 @@
"type": "row"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "Some of our software runs in a single thread, so this shows max CPU per core (instead of averaged over all cores)",
"fieldConfig": {
"defaults": {
"custom": {}
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"w": 6,
"x": 0,
"y": 1
},
"hiddenSeries": false,
"id": 28,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true,
"dataLinks": []
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"percentage": false,
"pluginVersion": "7.3.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"pluginVersion": "8.3.5",
"targets": [
{
"expr": "1 - (max by (instance) (irate(node_cpu_seconds_total{mode=\"idle\"}[5m])))",
"expr": "1 - (min by (instance) (irate(node_cpu_seconds_total{mode=\"idle\"}[5m])))",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Max CPU % per core per node",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "percentunit",
"label": null,
"logBase": 1,
"max": "1",
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
"type": "timeseries"
},
{
"alert": {
......@@ -159,22 +155,64 @@
}
],
"executionErrorState": "alerting",
"for": "5m",
"for": "2h",
"frequency": "1m",
"handler": 1,
"name": "15 min load average alert",
"noDataState": "no_data",
"notifications": []
},
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"fieldConfig": {
"defaults": {
"custom": {},
"displayName": "${__field.labels.instance}"
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "area"
}
},
"displayName": "${__field.labels.instance}",
"links": [],
"mappings": [],
"max": 1,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "transparent",
"value": null
},
{
"color": "red",
"value": 1
}
]
},
"unit": "short"
},
"overrides": [
{
......@@ -190,41 +228,25 @@
}
]
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 8,
"w": 6,
"x": 6,
"y": 1
},
"hiddenSeries": false,
"id": 6,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true,
"dataLinks": []
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"percentage": false,
"pluginVersion": "7.3.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"pluginVersion": "8.3.5",
"targets": [
{
"expr": "node_load15",
......@@ -234,55 +256,8 @@
"refId": "A"
}
],
"thresholds": [
{
"colorMode": "critical",
"fill": true,
"line": true,
"op": "gt",
"value": 1,
"yaxis": "left"
}
],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "15 min load average",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": "1",
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
"type": "timeseries"
},
{
"alert": {
......@@ -300,7 +275,30 @@
},
"query": {
"params": [
"A",
"Hosts without ZFS",
"15m",
"now"
]
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
},
{
"evaluator": {
"params": [
0.8
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"Hosts with ZFS",
"15m",
"now"
]
......@@ -320,117 +318,253 @@
"noDataState": "no_data",
"notifications": []
},
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"datasource": {},
"description": "How much RAM is in use? Relative to available system memory.",
"fieldConfig": {
"defaults": {
"custom": {}
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "area"
}
},
"links": [],
"mappings": [],
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "transparent",
"value": null
},
{
"color": "red",
"value": 0.8
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"w": 6,
"x": 12,
"y": 1
},
"hiddenSeries": false,
"id": 2,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true,
"dataLinks": []
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"percentage": false,
"pluginVersion": "7.3.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"pluginVersion": "8.3.5",
"targets": [
{
"expr": "1 - (node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes) / node_memory_MemTotal_bytes\r\n",
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes unless node_zfs_arc_size",
"hide": false,
"instant": false,
"interval": "",
"intervalFactor": 4,
"legendFormat": "{{instance}}",
"refId": "A"
"refId": "Hosts without ZFS"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "1 - (node_memory_MemAvailable_bytes + node_zfs_arc_size) / node_memory_MemTotal_bytes",
"hide": false,
"instant": false,
"interval": "",
"legendFormat": "{{instance}}",
"refId": "Hosts with ZFS"
}
],
"thresholds": [
{
"colorMode": "critical",
"fill": true,
"line": true,
"op": "gt",
"value": 0.8,
"yaxis": "left"
"visible": true
}
],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "RAM used %",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
"type": "timeseries"
},
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
0.1
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "5m",
"frequency": "1m",
"handler": 1,
"name": "Swap usage alert",
"noDataState": "no_data",
"notifications": []
},
"description": "How much Swap is in use? Relative to available swap.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "area"
}
},
"mappings": [],
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "transparent",
"value": null
},
{
"color": "red",
"value": 0.1
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
"gridPos": {
"h": 7,
"w": 6,
"x": 18,
"y": 1
},
"yaxes": [
{
"decimals": null,
"format": "percentunit",
"label": null,
"logBase": 1,
"max": "1",
"min": "0",
"show": true
"id": 30,
"options": {
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.3.5",
"targets": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
"exemplar": true,
"expr": "1 - node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
"thresholds": [
{
"colorMode": "critical",
"op": "gt",
"value": 0.1,
"visible": true
}
],
"title": "Swap used %",
"type": "timeseries"
},
{
"collapsed": false,
"datasource": null,
"gridPos": {
"h": 1,
"w": 24,
......@@ -443,230 +577,203 @@
"type": "row"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "Shows most saturated network link for every node. Baseline is the reported NIC link speed - that might not be the actual limit.",
"description": "Shows most utilized network link for every node. Bit/s.",
"fieldConfig": {
"defaults": {
"custom": {}
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bps"
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 0,
"y": 9
},
"hiddenSeries": false,
"id": 12,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true,
"dataLinks": []
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"percentage": false,
"pluginVersion": "7.3.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"pluginVersion": "8.3.5",
"targets": [
{
"expr": "max by (instance) (rate(node_network_transmit_bytes_total{device!~\"lo|monitoringvpn\"}[5m]) / node_network_speed_bytes)",
"expr": "max by (instance) (rate(node_network_transmit_bytes_total{device!~\"lo|monitoringvpn\"}[5m]) * 8)",
"interval": "",
"intervalFactor": 4,
"legendFormat": "{{instance}} out",
"refId": "A"
},
{
"expr": "- max by (instance) (rate(node_network_receive_bytes_total{device!~\"lo|monitoringvpn\"}[5m]) / node_network_speed_bytes)",
"expr": "- max by (instance) (rate(node_network_receive_bytes_total{device!~\"lo|monitoringvpn\"}[5m]) * 8)",
"interval": "",
"intervalFactor": 4,
"legendFormat": "{{instance}} in",
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Throughput %",
"tooltip": {
"shared": false,
"sort": 2,
"value_type": "individual"
},
"title": "Throughput",
"transformations": [],
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"decimals": null,
"format": "percentunit",
"label": null,
"logBase": 1,
"max": "1",
"min": "-1",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
"type": "timeseries"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "Packet and error count. Positive values mean transmit, negative receive.",
"fieldConfig": {
"defaults": {
"custom": {}
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "pps"
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 8,
"y": 9
},
"hiddenSeries": false,
"id": 26,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null as zero",
"options": {
"alertThreshold": true,
"dataLinks": []
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"percentage": false,
"pluginVersion": "7.3.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"pluginVersion": "8.3.5",
"targets": [
{
"expr": "- rate(node_network_receive_packets_total{device!~\"lo|monitoringvpn\"}[5m])",
"interval": "",
"intervalFactor": 4,
"legendFormat": "{{instance}} {{device}}",
"legendFormat": "{{instance}} in",
"refId": "A"
},
{
"expr": "- rate(node_network_receive_errs_total{device!~\"lo|monitoringvpn\"}[5m])",
"interval": "",
"intervalFactor": 4,
"legendFormat": "{{instance}} {{device}}",
"legendFormat": "{{instance}} in err",
"refId": "B"
},
{
"expr": "rate(node_network_transmit_packets_total{device!~\"lo|monitoringvpn\"}[5m])",
"interval": "",
"intervalFactor": 4,
"legendFormat": "{{instance}} {{device}}",
"legendFormat": "{{instance}} out",
"refId": "C"
},
{
"expr": "rate(node_network_transmit_errs_total{device!~\"lo|monitoringvpn\"}[5m])",
"interval": "",
"intervalFactor": 4,
"legendFormat": "{{instance}} {{device}}",
"legendFormat": "{{instance}} out err",
"refId": "D"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Network pkt/s",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
"type": "timeseries"
},
{
"alert": {
......@@ -675,9 +782,10 @@
{
"evaluator": {
"params": [
10
-1,
1
],
"type": "gt"
"type": "outside_range"
},
"operator": {
"type": "and"
......@@ -698,9 +806,10 @@
{
"evaluator": {
"params": [
10
-1,
1
],
"type": "gt"
"type": "outside_range"
},
"operator": {
"type": "or"
......@@ -721,9 +830,10 @@
{
"evaluator": {
"params": [
10
-1,
1
],
"type": "gt"
"type": "outside_range"
},
"operator": {
"type": "or"
......@@ -744,9 +854,10 @@
{
"evaluator": {
"params": [
10
-1,
1
],
"type": "gt"
"type": "outside_range"
},
"operator": {
"type": "or"
......@@ -769,78 +880,128 @@
"for": "5m",
"frequency": "1m",
"handler": 1,
"message": "",
"name": "Network errors alert",
"noDataState": "no_data",
"notifications": []
},
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "Network errors, drops etc. Should all be 0.",
"fieldConfig": {
"defaults": {
"custom": {}
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "transparent",
"value": null
},
{
"color": "red",
"value": 10
}
]
},
"unit": "short"
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"y": 9
},
"hiddenSeries": false,
"id": 10,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true,
"dataLinks": []
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"percentage": false,
"pluginVersion": "7.3.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"pluginVersion": "8.3.5",
"targets": [
{
"expr": "node_network_transmit_errs_total\n",
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(node_network_transmit_errs_total{device!=\"lo\"}[5m])\n",
"interval": "",
"legendFormat": "{{instance}} {{device}}",
"refId": "A"
},
{
"expr": "node_network_transmit_drop_total",
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(node_network_transmit_drop_total{device!=\"lo\"}[5m])",
"interval": "",
"legendFormat": "{{instance}} {{device}}",
"refId": "B"
},
{
"expr": "- node_network_receive_drop_total",
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "- rate(node_network_receive_drop_total{device!=\"lo\"}[5m])",
"interval": "",
"legendFormat": "{{instance}} {{device}}",
"refId": "C"
},
{
"expr": "- node_network_receive_errs_total",
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "- rate(node_network_receive_errs_total{device!=\"lo\"}[5m])",
"interval": "",
"legendFormat": "{{instance}} {{device}}",
"refId": "D"
......@@ -849,55 +1010,22 @@
"thresholds": [
{
"colorMode": "critical",
"fill": true,
"line": true,
"op": "lt",
"value": -1,
"visible": true
},
{
"colorMode": "critical",
"op": "gt",
"value": 10
"value": 1,
"visible": true
}
],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Network errors",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
"type": "timeseries"
},
{
"collapsed": false,
"datasource": null,
"gridPos": {
"h": 1,
"w": 24,
......@@ -945,57 +1073,83 @@
"noDataState": "no_data",
"notifications": []
},
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "Watch filesystems filling up. Shows only mounts over 10 % of available bytes used.",
"fieldConfig": {
"defaults": {
"custom": {},
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "area"
}
},
"links": [],
"mappings": [],
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "transparent",
"value": null
},
{
"color": "red",
"value": 0.8
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"h": 6,
"w": 12,
"x": 0,
"y": 17
},
"hiddenSeries": false,
"id": 4,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true,
"dataLinks": []
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"percentage": false,
"pluginVersion": "7.3.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"pluginVersion": "8.4.5",
"targets": [
{
"expr": "1 - (node_filesystem_avail_bytes / node_filesystem_size_bytes) > 0.1",
"expr": "1 - (node_filesystem_avail_bytes{mountpoint!='/nix/store'} / node_filesystem_size_bytes{mountpoint!='/nix/store'}) > 0.1",
"format": "time_series",
"hide": false,
"instant": false,
......@@ -1005,105 +1159,222 @@
"refId": "A"
}
],
"thresholds": [
{
"colorMode": "critical",
"fill": true,
"line": true,
"op": "gt",
"value": 0.8,
"yaxis": "left"
}
],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Storage usage %",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"title": "Storage usage % per partition",
"transformations": [],
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
"type": "timeseries"
},
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
0.5
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "5m",
"frequency": "15m",
"handler": 1,
"name": "User ciphertext usage % per node alert",
"noDataState": "no_data",
"notifications": []
},
"yaxes": [
{
"format": "percentunit",
"label": null,
"logBase": 1,
"max": "1",
"min": "0",
"show": true
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "How much user data do we store",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "area"
}
},
"mappings": [],
"max": 1,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 0.5
},
{
"color": "red",
"value": 0.8
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 12,
"x": 12,
"y": 17
},
"id": 41,
"options": {
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "1 - (node_filesystem_avail_bytes{mountpoint=\"/storage\"} / node_filesystem_size_bytes{mountpoint=\"/storage\"})",
"interval": "",
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"thresholds": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
"colorMode": "critical",
"op": "gt",
"value": 0.5,
"visible": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
"title": "User ciphertext usage % per node",
"type": "timeseries"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "Input Output Operations per second. Positive values mean read, negative write.",
"fieldConfig": {
"defaults": {
"custom": {}
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 8,
"y": 17
"x": 0,
"y": 23
},
"hiddenSeries": false,
"id": 14,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null as zero",
"options": {
"alertThreshold": true,
"dataLinks": []
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"percentage": false,
"pluginVersion": "7.3.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"pluginVersion": "8.3.5",
"targets": [
{
"expr": "rate(node_disk_reads_completed_total[5m]) > 0",
......@@ -1120,95 +1391,82 @@
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "IOPS",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
"type": "timeseries"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "Max average storage latency per node. Positive values mean read, negative write.",
"fieldConfig": {
"defaults": {
"custom": {}
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"y": 17
"x": 8,
"y": 23
},
"hiddenSeries": false,
"id": 16,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null as zero",
"options": {
"alertThreshold": true,
"dataLinks": []
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"percentage": false,
"pluginVersion": "7.3.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"pluginVersion": "8.3.5",
"targets": [
{
"expr": "max by (instance, device) (rate(node_disk_read_time_seconds_total[5m]) / rate(node_disk_reads_completed_total[5m]))",
......@@ -1225,62 +1483,424 @@
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Storage latency",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"type": "timeseries"
},
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
0
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "5m",
"frequency": "5m",
"handler": 1,
"name": "Degraded RAID alert",
"noDataState": "ok",
"notifications": []
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "bars",
"fillOpacity": 100,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "transparent",
"value": null
},
{
"color": "red",
"value": 0
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"y": 23
},
"id": 32,
"options": {
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.3.5",
"targets": [
{
"exemplar": true,
"expr": "megacli_drives{state=\"Degraded\"}",
"interval": "",
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"title": "Degraded RAID arrays",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 30
},
"id": 34,
"panels": [],
"title": "Pressure Stall Information",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 8,
"x": 0,
"y": 31
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
"id": 36,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"yaxes": [
"targets": [
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(node_pressure_cpu_waiting_seconds_total[5m])",
"interval": "",
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"title": "CPU waiting",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 8,
"x": 8,
"y": 31
},
"id": 38,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(node_pressure_memory_waiting_seconds_total[5m])",
"interval": "",
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
"title": "Memory waiting",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"y": 31
},
"id": 37,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(node_pressure_io_waiting_seconds_total[5m])",
"interval": "",
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"title": "I/O waiting",
"type": "timeseries"
}
],
"refresh": "30s",
"schemaVersion": 20,
"refresh": false,
"schemaVersion": 35,
"style": "dark",
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-1h",
"from": "now-3h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Resources overview",
"uid": "ResourcesOverview",
"version": 1
"version": 1,
"weekStart": ""
}
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "How are our user-facing services doing?",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": 116,
"links": [],
"liveNow": false,
"panels": [
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 39,
"panels": [],
"title": "Tahoe-LAFS Overview",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": true,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": 3600000,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "reqps"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 8,
"x": 0,
"y": 1
},
"id": 41,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"editorMode": "code",
"expr": "sum(irate(tahoe_counters_storage_server_allocate[$__range]))",
"instant": false,
"legendFormat": "allocate",
"range": true,
"refId": "allocate"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"editorMode": "code",
"expr": "sum(irate(tahoe_counters_storage_server_writev[$__range]))",
"hide": false,
"instant": false,
"legendFormat": "writev",
"range": true,
"refId": "writev"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"editorMode": "code",
"expr": "sum(irate(tahoe_counters_storage_server_write[$__range]))",
"hide": false,
"instant": false,
"legendFormat": "write",
"range": true,
"refId": "write"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"editorMode": "code",
"expr": "sum(irate(tahoe_counters_storage_server_close[$__range]))",
"hide": false,
"instant": false,
"legendFormat": "close",
"range": true,
"refId": "close"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"editorMode": "code",
"expr": "-sum(irate(tahoe_counters_storage_server_get[$__range]))",
"hide": false,
"instant": false,
"legendFormat": "get",
"range": true,
"refId": "get"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"editorMode": "code",
"expr": "-sum(irate(tahoe_counters_storage_server_read[$__range]))",
"hide": false,
"instant": false,
"legendFormat": "read",
"range": true,
"refId": "read"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"editorMode": "code",
"expr": "-sum(irate(tahoe_counters_storage_server_readv[$__range]))",
"hide": false,
"instant": false,
"legendFormat": "readv",
"range": true,
"refId": "readv"
}
],
"title": "Request rates",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": -1,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "binBps"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 8,
"x": 8,
"y": 1
},
"id": 42,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"editorMode": "code",
"expr": "sum by (instance)(irate(tahoe_counters_storage_server_bytes_added[$__range]))",
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "Writing"
}
],
"title": "Byes added",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "Sum of all latency means.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"y": 1
},
"id": 43,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"editorMode": "code",
"exemplar": false,
"expr": "(tahoe_stats_storage_server_latencies_allocate_mean or vector(0) +\r\ntahoe_stats_storage_server_latencies_close_mean or vector(0) +\r\ntahoe_stats_storage_server_latencies_get_mean or vector(0) +\r\ntahoe_stats_storage_server_latencies_read_mean or vector(0) +\r\ntahoe_stats_storage_server_latencies_readv_mean or vector(0) +\r\ntahoe_stats_storage_server_latencies_write_mean or vector(0) +\r\ntahoe_stats_storage_server_latencies_writev_mean or vector(0)) != 0\r\n",
"hide": false,
"instant": false,
"interval": "",
"legendFormat": "{{instance}}",
"range": true,
"refId": "A"
}
],
"title": "Latencies",
"type": "timeseries"
},
{
"collapsed": false,
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 8
},
"id": 32,
"panels": [],
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"refId": "A"
}
],
"title": "HTTPS endpoints",
"type": "row"
},
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
3.142
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "5m",
"frequency": "1m",
"handler": 1,
"name": "Response times alert",
"noDataState": "no_data",
"notifications": []
},
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "line+area"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "transparent",
"value": null
},
{
"color": "red",
"value": 3.142
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 0,
"y": 9
},
"id": 36,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": false
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.3.5",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "probe_duration_seconds",
"interval": "",
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"title": "Response times",
"type": "timeseries"
},
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
0
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "count"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "10m",
"frequency": "1m",
"handler": 1,
"name": "Probe fails alert",
"noDataState": "ok",
"notifications": []
},
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "Shows all HTTP endpoints where probe_success == 0. This could have different reasons, likely ones being the service is down or the TLS certificate is not trusted.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "bars",
"fillOpacity": 100,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "line+area"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "transparent",
"value": null
},
{
"color": "red",
"value": 0
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 8,
"y": 9
},
"id": 38,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": false
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.3.5",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "count by (instance) (probe_http_status_code!=200 and probe_http_status_code!=401 and probe_http_status_code!=404)",
"interval": "",
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"title": "Probe fails",
"type": "timeseries"
},
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
2419200
],
"type": "lt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "5m",
"frequency": "60m",
"handler": 1,
"message": "A TLS certificate is expiring within four weeks.",
"name": "TLS certificate expiry alert",
"noDataState": "no_data",
"notifications": []
},
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "line+area"
}
},
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "transparent",
"value": 2419200
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 16,
"y": 9
},
"id": 34,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": false
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.3.5",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "probe_ssl_earliest_cert_expiry - time()",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"title": "TLS certificate expiry",
"type": "timeseries"
}
],
"refresh": "auto",
"schemaVersion": 39,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-7d",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Services overview",
"uid": "ServicesOverview",
"version": 6,
"weekStart": ""
}
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"links": [],
"liveNow": false,
"panels": [
{
"collapsed": false,
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 38,
"panels": [],
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"refId": "A"
}
],
"title": "Inbound operations",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "These stats keep track of local disk latencies for storage-server operations. All values are in seconds. These are recorded by the storage server, starting from the time the request arrives (post-deserialization) and ending when the response begins serialization. As such, they are mostly useful for measuring disk speeds.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 0,
"y": 1
},
"id": 13,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": false
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "10.4.6",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_storage_server_latencies_allocate_mean{instance=\"$node\"}",
"interval": "",
"legendFormat": "allocate",
"refId": "allocate_mean"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_storage_server_latencies_close_mean{instance=\"$node\"}",
"hide": false,
"interval": "",
"legendFormat": "close",
"refId": "close_mean"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_storage_server_latencies_get_mean{instance=\"$node\"}",
"hide": false,
"interval": "",
"legendFormat": "get",
"refId": "get_mean"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_storage_server_latencies_read_mean{instance=\"$node\"}",
"hide": false,
"interval": "",
"legendFormat": "read",
"refId": "read_mean"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_storage_server_latencies_readv_mean{instance=\"$node\"}",
"hide": false,
"interval": "",
"legendFormat": "readv",
"refId": "readv_mean"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_storage_server_latencies_write_mean{instance=\"$node\"}",
"hide": false,
"interval": "",
"legendFormat": "write",
"refId": "write_mean"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_storage_server_latencies_writev_mean{instance=\"$node\"}",
"hide": false,
"interval": "",
"legendFormat": "writev",
"refId": "writev_mean"
}
],
"title": "Latency means",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "This counts inbound storage-server operations.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 8,
"y": 1
},
"id": 34,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": false
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "10.4.6",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(tahoe_counters_storage_server_allocate{instance=\"$node\"}[5m])",
"interval": "",
"legendFormat": "allocate",
"refId": "allocate"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(tahoe_counters_storage_server_write{instance=\"$node\"}[5m])",
"hide": false,
"interval": "",
"legendFormat": "write",
"refId": "write"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(tahoe_counters_storage_server_close{instance=\"$node\"}[5m])",
"hide": false,
"interval": "",
"legendFormat": "close",
"refId": "close"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(tahoe_counters_storage_server_get{instance=\"$node\"}[5m])",
"hide": false,
"interval": "",
"legendFormat": "get",
"refId": "get"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(tahoe_counters_storage_server_read{instance=\"$node\"}[5m])",
"hide": false,
"interval": "",
"legendFormat": "read",
"refId": "read"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(tahoe_counters_storage_server_add-lease{instance=\"$node\"}[5m])",
"hide": true,
"interval": "",
"legendFormat": "add-lease",
"refId": "add-lease"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(tahoe_counters_storage_server_renew{instance=\"$node\"}[5m])",
"hide": false,
"interval": "",
"legendFormat": "renew",
"refId": "renew"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(tahoe_counters_storage_server_cancel{instance=\"$node\"}[5m])",
"hide": false,
"interval": "",
"legendFormat": "cancel",
"refId": "cancel"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(tahoe_counters_storage_server_readv{instance=\"$node\"}[5m])",
"hide": false,
"interval": "",
"legendFormat": "readv",
"refId": "readv"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(tahoe_counters_storage_server_writev{instance=\"$node\"}[5m])",
"hide": false,
"interval": "",
"legendFormat": "writev",
"refId": "writev"
}
],
"title": "Counts/s",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 16,
"y": 1
},
"id": 36,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": false
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "10.4.6",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(tahoe_counters_storage_server_bytes_added{instance=\"$node\"}[5m])",
"interval": "",
"legendFormat": "Added",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(tahoe_counters_storage_server_bytes_freed{instance=\"$node\"}[5m])",
"hide": false,
"interval": "",
"legendFormat": "Freed",
"refId": "B"
}
],
"title": "Bytes/s",
"type": "timeseries"
},
{
"collapsed": false,
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 9
},
"id": 19,
"panels": [],
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"refId": "A"
}
],
"title": "Latency Histograms",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "These stats keep track of local disk latencies for storage-server operations. All values are in seconds. These are recorded by the storage server, starting from the time the request arrives (post-deserialization) and ending when the response begins serialization. As such, they are mostly useful for measuring disk speeds.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 0,
"y": 10
},
"id": 11,
"options": {
"displayMode": "gradient",
"maxVizHeight": 300,
"minVizHeight": 16,
"minVizWidth": 8,
"namePlacement": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showUnfilled": true,
"sizing": "auto",
"text": {},
"valueMode": "color"
},
"pluginVersion": "10.4.6",
"repeat": "storageserverop",
"repeatDirection": "h",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_storage_server_latencies_$storageserverop{instance=\"$node\"}",
"interval": "",
"legendFormat": "{{quantile}}",
"refId": "A"
}
],
"title": "$storageserverop",
"type": "bargauge"
},
{
"collapsed": false,
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 18
},
"id": 30,
"panels": [],
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"refId": "A"
}
],
"title": "Storage overview",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "These all reflect disk-space usage policies and status.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 0,
"y": 19
},
"id": 22,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "10.4.6",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_storage_server_disk_avail{instance=\"$node\"}",
"hide": false,
"interval": "",
"legendFormat": "disk_avail",
"refId": "disk_avail"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_storage_server_disk_free_for_nonroot{instance=\"$node\"}",
"hide": false,
"interval": "",
"legendFormat": "free_for_nonroot",
"refId": "disk_free_for_nonroot"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_storage_server_disk_free_for_root{instance=\"$node\"}",
"hide": false,
"interval": "",
"legendFormat": "free_for_root",
"refId": "disk_free_for_root"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "",
"hide": true,
"interval": "",
"legendFormat": "disk_total",
"refId": "disk_total"
}
],
"title": "Bytes free",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "These all reflect disk-space usage policies and status.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 8,
"y": 19
},
"id": 7,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "10.4.6",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_storage_server_allocated{instance=\"$node\"}",
"interval": "",
"legendFormat": "allocated",
"refId": "allocated"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_storage_server_disk_used{instance=\"$node\"}",
"hide": false,
"interval": "",
"legendFormat": "disk_used",
"refId": "disk_used"
}
],
"title": "Bytes used",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "This counts the number of ‘buckets’ (i.e. unique storage-index values) currently managed by the storage server. It indicates roughly how many files are managed by the server.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 16,
"y": 19
},
"id": 9,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": false
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "10.4.6",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_storage_server_total_bucket_count{instance=\"$node\"}",
"interval": "",
"legendFormat": "total_bucket_count",
"refId": "A"
}
],
"title": "Total bucket count",
"type": "timeseries"
},
{
"collapsed": false,
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 27
},
"id": 15,
"panels": [],
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"refId": "A"
}
],
"title": "CPU",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "Estimate of what percentage of system CPU time was consumed by the node process, over the given time interval. ",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 0,
"y": 28
},
"id": 2,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "10.4.6",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_cpu_monitor_1min_avg{instance=\"$node\"}\n",
"interval": "",
"legendFormat": "1 min avg",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_cpu_monitor_5min_avg{instance=\"$node\"}\n",
"hide": false,
"interval": "",
"intervalFactor": 5,
"legendFormat": "5 mins avg",
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_cpu_monitor_15min_avg{instance=\"$node\"}\n",
"hide": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "15 mins avg",
"refId": "C"
}
],
"title": "CPU monitor",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "Estimate of total number of CPU seconds consumed by node since the process was started. Ticket #472 indicates that .total may sometimes be negative due to wraparound of the kernel’s counter.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 8,
"y": 28
},
"id": 5,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "10.4.6",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_cpu_monitor_total{instance=\"$node\"}\n",
"interval": "",
"legendFormat": "Total CPU seconds",
"refId": "A"
}
],
"title": "CPU time total",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "How many seconds since the node process was started.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 16,
"y": 28
},
"id": 4,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "10.4.6",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_node_uptime{instance=\"$node\"}",
"interval": "",
"legendFormat": "{{instance}} uptime",
"refId": "A"
}
],
"title": "Node uptime",
"type": "timeseries"
},
{
"collapsed": false,
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 36
},
"id": 42,
"panels": [],
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"refId": "A"
}
],
"title": "Corruption Advisories",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "File count of /storage/corruption-advisories/",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 60,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 37
},
"id": 44,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_corruption_advisories_total",
"interval": "",
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"title": "Corruption Advisory count",
"type": "timeseries"
},
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
0
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "5m",
"frequency": "1m",
"handler": 1,
"name": "Corruption Advisory rate alert",
"noDataState": "no_data",
"notifications": []
},
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "Rate of new files in /storage/corruption-advisories/",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 60,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 37
},
"id": 46,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(tahoe_corruption_advisories_total[5m])",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"thresholds": [
{
"colorMode": "critical",
"op": "gt",
"value": 0,
"visible": true
}
],
"title": "Corruption Advisory rate",
"type": "timeseries"
},
{
"collapsed": false,
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 45
},
"id": 50,
"panels": [],
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"refId": "A"
}
],
"title": "Incident Reports",
"type": "row"
},
{
"datasource": {},
"description": "File count of /var/db/tahoe-lafs/storage/logs/incidents/",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 60,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 46
},
"id": 53,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_incident_reports_total",
"interval": "",
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"title": "Incident Reports count",
"type": "timeseries"
},
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
0
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "5m",
"frequency": "1m",
"handler": 1,
"name": "Incident Reports rate alert",
"noDataState": "no_data",
"notifications": []
},
"datasource": {},
"description": "Rate of new files in /var/db/tahoe-lafs/storage/logs/incidents/",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 60,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 46
},
"id": 54,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(tahoe_incident_reports_total[5m])",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"thresholds": [
{
"colorMode": "critical",
"op": "gt",
"value": 0,
"visible": true
}
],
"title": "Incident Reports rate",
"type": "timeseries"
}
],
"schemaVersion": 39,
"tags": [],
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "storage001",
"value": "storage001"
},
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"definition": "tahoe_stats_cpu_monitor_1min_avg",
"description": "Which node (instamce) to show",
"hide": 0,
"includeAll": false,
"label": "Node",
"multi": false,
"name": "node",
"options": [],
"query": {
"query": "tahoe_stats_cpu_monitor_1min_avg",
"refId": "StandardVariableQuery"
},
"refresh": 1,
"regex": "/.*instance=\"([^\"]*)\".*/",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"current": {
"selected": true,
"text": [
"allocate",
"write",
"readv"
],
"value": [
"allocate",
"write",
"readv"
]
},
"description": "Inbound storage-server operations ",
"hide": 0,
"includeAll": true,
"label": "Detailed latencies for",
"multi": true,
"name": "storageserverop",
"options": [
{
"selected": false,
"text": "All",
"value": "$__all"
},
{
"selected": true,
"text": "allocate",
"value": "allocate"
},
{
"selected": true,
"text": "write",
"value": "write"
},
{
"selected": false,
"text": "close",
"value": "close"
},
{
"selected": false,
"text": "get",
"value": "get"
},
{
"selected": false,
"text": "read",
"value": "read"
},
{
"selected": false,
"text": "add-lease",
"value": "add-lease"
},
{
"selected": false,
"text": "renew",
"value": "renew"
},
{
"selected": false,
"text": "cancel",
"value": "cancel"
},
{
"selected": true,
"text": "readv",
"value": "readv"
},
{
"selected": false,
"text": "writev",
"value": "writev"
}
],
"query": "allocate, write, close, get, read, add-lease, renew, cancel, readv, writev",
"queryValue": "",
"skipUrlSync": false,
"type": "custom"
}
]
},
"time": {
"from": "now-15m",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Tahoe-LAFS",
"uid": "TahoeLAFS",
"version": 1,
"weekStart": ""
}
......@@ -7,33 +7,24 @@
let
cfg = config.services.private-storage.monitoring.grafana;
grafanaAuth = if (cfg.googleOAuthClientID == "") then {
anonymous.enable = true;
} else {
google.enable = true;
# Grafana considers it "sign up" to let in a user it has
# never seen before.
google.allowSignUp = true;
google.clientSecretFile = cfg.googleOAuthClientSecretFile;
google.clientId = cfg.googleOAuthClientID;
};
in {
options.services.private-storage.monitoring.grafana = {
domain = lib.mkOption
{ type = lib.types.str;
example = lib.literalExample "grafana.grid.private.storage";
description = "The FQDN of the Grafana host";
domains = lib.mkOption
{ type = lib.types.listOf lib.types.str;
example = [ "grafana.grid.private.storage" ];
description = "The domain names at which the server is reachable.";
};
prometheusUrl = lib.mkOption
{ type = lib.types.str;
example = lib.literalExample "http://prometheus:9090/";
example = "http://prometheus:9090/";
default = "http://localhost:9090/";
description = "The URL of the Prometheus host to access";
};
lokiUrl = lib.mkOption
{ type = lib.types.str;
example = lib.literalExample "http://loki:3100/";
example = "http://loki:3100/";
default = "http://localhost:3100/";
description = "The URL of the Loki host to access";
};
......@@ -46,86 +37,169 @@ in {
};
googleOAuthClientID = lib.mkOption
{ type = lib.types.str;
example = lib.literalExample "grafana-staging-345678";
example = "grafana-staging-345678";
default = "replace-by-your-client-id-or-set-empty-string-for-anonymous-access";
description = "The GSuite OAuth2 SSO Client ID. Empty string turns SSO auth off and anonymous (free for all) access on.";
};
googleOAuthClientSecretFile = lib.mkOption
{ type = lib.types.path;
example = lib.literalExample "/var/secret/monitoring-gsuite-client-secret";
example = /var/secret/monitoring-gsuite-client-secret;
default = /run/keys/grafana-google-sso.secret;
description = "The path to the GSuite SSO secret file.";
};
adminPasswordFile = lib.mkOption
{ type = lib.types.path;
example = lib.literalExample "/var/secret/monitoring-admin-password";
example = "/var/secret/monitoring-admin-password";
default = /run/keys/grafana-admin.password;
description = "A file containing the password for the Grafana Admin account.";
};
enableSlackAlert = lib.mkOption
{ type = lib.types.bool;
default = false;
description = ''
Enables the slack alerter. Expects a file that contains
the secret Slack Web Hook URL in grafanaSlackUrlFile (see below).
'';
};
grafanaSlackUrlFile = lib.mkOption
{ type = lib.types.path;
default = /run/keys/grafana-slack-url;
description = ''
Where to find the file that containts the slack URL.
'';
};
enableZulipAlert = lib.mkOption
{ type = lib.types.bool;
default = false;
description = ''
Enables the Zulip alerter. Expects a file that contains
the secret Zulip Web Hook URL in grafanaZulipUrlFile (see below).
'';
};
grafanaZulipUrlFile = lib.mkOption
{ type = lib.types.path;
default = /run/keys/grafana-zulip-url;
description = ''
Where to find the file that containts the Zulip URL.
'';
};
};
config = {
config =
let
# We'll refer to this collection of domains by the first domain in the list.
domain = builtins.head cfg.domains;
in {
# Port 80 for ACME ssl retrieval only. 443 for nginx -> grafana.
networking.firewall.allowedTCPPorts = [ 80 443 ];
services.grafana = {
enable = true;
domain = cfg.domain;
port = 2342;
addr = "127.0.0.1";
# No phoning home
analytics.reporting.enable = false;
# Force Grafana to believe it is reachable via https on the default port
# number because that's where the nginx that forwards traffic to it is
# listening. Grafana's own server listens on an internal address that
# doesn't matter to anyone except our nginx instance.
rootUrl = "https://%(domain)s/";
extraOptions = {
# Defend against DNS rebinding attacks.
SERVER_ENFORCE_DOMAIN = "true";
# Same time zone for all users by default
DATE_FORMATS_DEFAULT_TIMEZONE = "UTC";
};
auth = {
anonymous.org_role = "Admin";
anonymous.org_name = "Main Org.";
} // grafanaAuth;
settings = {
# Give users that come through GSuite SSO the highest possible privileges:
users.autoAssignOrgRole = "Editor";
server = {
domain = "${toString domain}";
http_port = 2342;
http_addr = "127.0.0.1";
# Read the admin password from a file in our secrets folder:
security.adminPasswordFile = cfg.adminPasswordFile;
# Defend against DNS rebinding attacks.
enforce_domain = true;
# Force Grafana to believe it is reachable via https on the default port
# number because that's where the nginx that forwards traffic to it is
# listening. Grafana's own server listens on an internal address that
# doesn't matter to anyone except our nginx instance.
root_url = "https://%(domain)s/";
};
# No phoning home
analytics.reporting_enabled = false;
# Same time zone for all users by default
date_formats.default_timezone = "UTC";
# The auth sections since NixOS 22.11 are named a bit funky with a dot in the name
#
# https://grafana.com/docs/grafana/latest/setup-grafana/configure-security/configure-authentication/grafana/#anonymous-authentication
# https://grafana.com/docs/grafana/latest/setup-grafana/configure-security/configure-authentication/google/
"auth.anonymous" = lib.mkIf (cfg.googleOAuthClientID == "") {
enabled = true;
org_role = "Admin";
org_name = "Main Org.";
};
"auth.google" = lib.mkIf (cfg.googleOAuthClientID != "") {
enabled = true;
# Grafana considers it "sign up" to let in a user it has
# never seen before.
allow_sign_up = true;
client_secret = "$__file{${toString cfg.googleOAuthClientSecretFile}}";
client_id = cfg.googleOAuthClientID;
};
# Give users that come through GSuite SSO the highest possible privileges:
users.auto_assign_org_role = "Editor";
# Read the admin password from a file in our secrets folder:
security.admin_password = "$__file{${toString cfg.adminPasswordFile}}";
};
provision = {
enable = true;
# See https://grafana.com/docs/grafana/latest/administration/provisioning/#datasources
datasources = [{
datasources.settings.datasources = [{
name = "Prometheus";
type = "prometheus";
uid = "LocalPrometheus";
access = "proxy";
url = cfg.prometheusUrl;
isDefault = true;
} {
name = "Loki";
type = "loki";
uid = "LocalLoki";
access = "proxy";
url = cfg.lokiUrl;
}];
# See https://grafana.com/docs/grafana/latest/administration/provisioning/#dashboards
dashboards = [{
dashboards.settings.providers = [{
name = "provisioned";
options.path = ./grafana-config;
options.path = ./grafana-dashboards;
}];
# See https://grafana.com/docs/grafana/latest/alerting/set-up/provision-alerting-resources/file-provisioning/#provision-contact-points
alerting.contactPoints.settings.contactPoints =
[ ] ++ (lib.optionals (cfg.enableSlackAlert) [{
uid = "slack-notifier-1";
name = "Slack";
type = "slack";
is_default = true;
send_reminder = false;
settings = {
username = "${domain}";
uploadImage = true;
};
secure_settings = {
# `$__file{}` reads the value from the named file.
# See https://grafana.com/docs/grafana/latest/administration/configuration/#file-provider
url = "$__file{${toString cfg.grafanaSlackUrlFile}}";
};
}]) ++ (lib.optionals (cfg.enableZulipAlert) [{
# See https://zulip.com/integrations/doc/grafana
uid = "zulip-notifier-1";
name = "Zulip";
type = "webhook";
is_default = true;
send_reminder = false;
settings = {
url = "$__file{${toString cfg.grafanaZulipUrlFile}}";
};
}]);
};
};
# nginx reverse proxy
security.acme.email = cfg.letsEncryptAdminEmail;
security.acme.defaults.email = cfg.letsEncryptAdminEmail;
security.acme.acceptTerms = true;
services.nginx = {
enable = true;
......@@ -138,13 +212,25 @@ in {
# Only allow PFS-enabled ciphers with AES256:
sslCiphers = "AES256+EECDH:AES256+EDH:!aNULL";
virtualHosts.${config.services.grafana.domain} = {
virtualHosts."${domain}" = {
serverAliases = builtins.tail cfg.domains;
enableACME = true;
forceSSL = true;
locations."/" = {
proxyPass = "http://127.0.0.1:${toString config.services.grafana.port}";
proxyPass = "http://127.0.0.1:${toString config.services.grafana.settings.server.http_port}";
proxyWebsockets = true;
};
locations."/metrics" = {
# Only allow our monitoringvpn subnet
# And localhost since we're the monitoring server currently
extraConfig = ''
allow ${config.grid.monitoringvpnIPv4}/24;
allow 127.0.0.1;
allow ::1;
deny all;
'';
proxyPass = "http://127.0.0.1:${toString config.services.grafana.settings.server.http_port}";
};
};
};
......
# Loki Server
#
# Scope: Log aggregator
# Scope: Log ingester and aggregator to be run on the monitoring node
#
# See also:
# - The configuration is adapted from
# https://grafana.com/docs/loki/latest/configuration/examples/#complete-local-configyaml
#
{
config.networking.firewall.allowedTCPPorts = [ 3100 ];
{ config, ...}:
let
logRetention = toString(config.services.private-storage.monitoring.policy.logRetentionSeconds) + "s";
in {
config.networking.firewall.interfaces.monitoringvpn.allowedTCPPorts = [ 3100 ];
config.services.loki = {
enable = true;
......@@ -12,31 +21,39 @@
{
auth_enabled = false;
common = {
ring = {
kvstore = {
store = "inmemory";
};
};
instance_addr = "127.0.0.1";
replication_factor = 1;
path_prefix = "/var/lib/loki";
storage = {
filesystem = {
chunks_directory = "/var/lib/loki/chunks";
rules_directory = "/var/lib/loki/rules";
};
};
};
server = {
http_listen_port = 3100;
grpc_listen_port = 9095; # unused, but no option to turn it off.
grpc_listen_address = "127.0.0.1"; # unused, but no option to turn it off.
};
ingester = {
lifecycler = {
address = "0.0.0.0";
ring = {
kvstore = {
store = "inmemory";
};
replication_factor = 1;
};
final_sleep = "0s";
};
chunk_idle_period = "1h"; # Any chunk not receiving new logs in this time will be flushed
max_chunk_age = "1h"; # All chunks will be flushed when they hit this age, default is 1h
chunk_target_size = 1048576; # Loki will attempt to build chunks up to 1.5MB, flushing first if chunk_idle_period or max_chunk_age is reached first
chunk_retain_period = "30s"; # Must be greater than index read cache TTL if using an index cache (Default index read cache TTL is 5m)
max_transfer_retries = 0; # Chunk transfers disabled
chunk_target_size = 1536000; # As per https://grafana.com/docs/loki/v2.2.1/best-practices/
};
schema_config = {
configs = [{
from = "2020-10-24"; # TODO: Should this be "today"?
from = "2020-12-26";
store = "boltdb-shipper";
object_store = "filesystem";
schema = "v11";
......@@ -47,30 +64,13 @@
}];
};
storage_config = {
boltdb_shipper = {
active_index_directory = "/var/lib/loki/boltdb-shipper-active";
cache_location = "/var/lib/loki/boltdb-shipper-cache";
cache_ttl = "24h"; # Can be increased for faster performance over longer query periods, uses more disk space
shared_store = "filesystem";
};
filesystem = {
directory = "/var/lib/loki/chunks";
};
};
limits_config = {
reject_old_samples = true;
reject_old_samples_max_age = "168h";
};
chunk_store_config = {
max_look_back_period = "336h";
allow_structured_metadata = false;
};
table_manager = {
retention_deletes_enabled = true;
retention_period = "336h";
retention_period = logRetention;
};
};
};
......
......@@ -10,27 +10,33 @@ let
cfg = config.services.private-storage.monitoring.prometheus;
dropPortNumber = {
source_labels = [ "__address__" ];
regex = "^(.*):\\d+$";
regex = "^(.*)(?:\\.monitoringvpn):\\d+$";
target_label = "instance";
};
logRetention = toString(config.services.private-storage.monitoring.policy.logRetentionSeconds) + "s";
in {
options.services.private-storage.monitoring.prometheus = {
nodeExporterTargets = lib.mkOption {
type = with lib.types; listOf str;
example = lib.literalExample "[ node1 node2 ]";
example = [ "node1" "node2" ];
description = "List of nodes (hostnames or IPs) to scrape.";
};
nginxExporterTargets = lib.mkOption {
type = with lib.types; listOf str;
example = lib.literalExample "[ node1 node2 ]";
example = [ "node1" "node2" ];
description = "List of nodes (hostnames or IPs) to scrape.";
};
paymentExporterTargets = lib.mkOption {
type = with lib.types; listOf str;
example = lib.literalExample "[ node1 node2 ]";
example = [ "node1" "node2" ];
description = "List of nodes (hostnames or IPs) to scrape.";
};
blackboxExporterHttpsTargets = lib.mkOption {
type = with lib.types; listOf str;
example = [ "https://node1.com/" "https://node2.org/" ];
description = "List of https URLs to scrape.";
};
};
config = rec {
......@@ -39,6 +45,7 @@ in {
services.prometheus = {
enable = true;
# port = 9090; # Option only in recent (20.09?) nixpkgs, 9090 default
retentionTime = logRetention;
scrapeConfigs = [
{
job_name = "node-exporters";
......@@ -65,6 +72,32 @@ in {
}];
relabel_configs = [ dropPortNumber ];
}
{
# The Blackbox exporter is using Prometheus' "Multi-Target Exporter Pattern",
# see https://prometheus.io/docs/guides/multi-target-exporter/
job_name = "blackboxExporterHttps";
static_configs = [{
targets = cfg.blackboxExporterHttpsTargets;
}];
metrics_path = "/probe";
params.module = [ "https_2xx" ];
relabel_configs = [
{
source_labels = [ "__address__" ];
target_label = "__param_target";
}
{
source_labels = [ "__param_target" ];
target_label = "instance";
}
{
source_labels = [];
target_label = "__address__";
# The blackbox exporter’s real hostname:port
replacement = "monitoring:9115";
}
];
}
];
};
};
......
......@@ -8,7 +8,7 @@ in {
enable = lib.mkEnableOption "PrivateStorageio Monitoring VPN client service";
privateKeyFile = lib.mkOption {
type = lib.types.path;
example = lib.literalExample /run/keys/monitoringvpn/host.key;
example = /run/keys/monitoringvpn/host.key;
default = /run/keys/monitoringvpn/client.key;
description = ''
File with base64 private key generated by <command>wg genkey</command>.
......@@ -18,7 +18,7 @@ in {
};
presharedKeyFile = lib.mkOption {
type = lib.types.path;
example = lib.literalExample /run/keys/monitoringvpn/preshared.key;
example = /run/keys/monitoringvpn/preshared.key;
default = /run/keys/monitoringvpn/preshared.key;
description = ''
File with base64 preshared key generated by <command>wg genpsk</command>.
......@@ -26,7 +26,7 @@ in {
};
allowedIPs = lib.mkOption {
type = lib.types.listOf lib.types.str;
example = lib.literalExample [ "172.23.23.1/32" ];
example = [ "172.23.23.1/32" ];
default = [ "172.23.23.1/32" ];
description = ''
Limits which IPs this client receives data from.
......@@ -34,21 +34,21 @@ in {
};
ip = lib.mkOption {
type = lib.types.str;
example = lib.literalExample "172.23.23.11";
example = "172.23.23.11";
description = ''
The IP addresses of the interface.
'';
};
endpoint = lib.mkOption {
type = lib.types.str;
example = lib.literalExample "vpn.monitoring.private.storage:54321";
example = "vpn.monitoring.private.storage:54321";
description = ''
The address and port number of the server to establish the VPN with.
'';
};
endpointPublicKeyFile = lib.mkOption {
type = lib.types.path;
example = lib.literalExample ./monitoringvpn/server.pub;
example = ./monitoringvpn/server.pub;
description = ''
File with base64 public key generated by <command>cat private.key | wg pubkey > pubkey.pub</command>.
'';
......
......@@ -13,7 +13,7 @@ in {
enable = lib.mkEnableOption "PrivateStorageio Monitoring VPN server service";
privateKeyFile = lib.mkOption {
type = lib.types.path;
example = lib.literalExample /run/keys/monitoringvpn/server.key;
example = /run/keys/monitoringvpn/server.key;
default = /run/keys/monitoringvpn/server.key;
description = ''
File with base64 private key generated by <command>wg genkey</command>.
......@@ -21,7 +21,7 @@ in {
};
presharedKeyFile = lib.mkOption {
type = lib.types.path;
example = lib.literalExample /run/keys/monitoringvpn/preshared.key;
example = /run/keys/monitoringvpn/preshared.key;
default = /run/keys/monitoringvpn/preshared.key;
description = ''
File with base64 preshared key generated by <command>wg genpsk</command>.
......@@ -29,14 +29,14 @@ in {
};
ip = lib.mkOption {
type = lib.types.str;
example = lib.literalExample [ "172.23.23.23" ];
example = [ "172.23.23.23" ];
description = ''
The IP address of the interface.
'';
};
port = lib.mkOption {
type = lib.types.port;
example = lib.literalExample 54321;
example = 54321;
default = 51820;
description = ''
The UDP port to listen on.
......@@ -44,14 +44,14 @@ in {
};
vpnClientIPs = lib.mkOption {
type = lib.types.listOf lib.types.str;
example = lib.literalExample [ "172.23.23.23" "172.23.23.42" ];
example = [ "172.23.23.23" "172.23.23.42" ];
description = ''
The IP addresses to allow connections from.
'';
};
pubKeysPath = lib.mkOption {
type = lib.types.path;
example = lib.literalExample ./monitoringvpn;
example = ./monitoringvpn;
description = ''
The path to the directory that holds the public keys.
'';
......
# A NixOS module which exposes custom packages to other modules.
{ pkgs, ...}:
let
# Get our custom packages; either from the nixpkgs attribute added via an
# overlay in `morph/lib/default.nix`, or by importing them directly.
ourpkgs = pkgs.ourpkgs or (pkgs.callPackage ../pkgs {});
in {
config = {
# Expose `nixos/pkgs` as a new module argument `ourpkgs`.
_module.args.ourpkgs = ourpkgs;
# Also expose it as a config setting, for usage by tests,
# since the `_module` config is not exposed in the result.
passthru.ourpkgs = ourpkgs;
};
}
# A NixOS module which can instantiate a Tahoe-LAFS storage server in the
# preferred configuration for the Private Storage grid.
{ pkgs, lib, config, ... }:
{ pkgs, ourpkgs, lib, config, ... }:
let
# Grab the configuration for this module for convenient access below.
cfg = config.services.private-storage;
......@@ -8,9 +8,6 @@ let
# TODO: This path copied from tahoe.nix.
tahoe-base = "/var/db/tahoe-lafs";
# Our own nixpkgs fork:
ourpkgs = import ../../nixpkgs-ps.nix {};
# The full path to the directory where the storage server will write
# incident reports.
incidents-dir = "${tahoe-base}/${storage-node-name}/logs/incidents";
......@@ -20,7 +17,10 @@ let
#
# NOTE: This is promised by the service privacy policy. It *may not* be
# raised without following the process for updating the privacy policy.
max-incident-age = "29d";
# Fallback to 29d if "monitoring" attribute is not available (currently
# in the system tests)
max-incident-age = toString(cfg.monitoring.policy.logRetentionSeconds or
(29 * (24 * 60 * 60))) + "s";
fqdn = "${
assert config.networking.hostName != null; config.networking.hostName
......@@ -30,8 +30,6 @@ let
in
{
imports = [
# Give it a good SSH configuration.
./ssh.nix
# Load our tahoe-lafs module. It is configurable in the way I want it to
# be configurable.
./tahoe.nix
......@@ -42,7 +40,7 @@ in
services.private-storage.tahoe.package = lib.mkOption
{ default = ourpkgs.privatestorage;
type = lib.types.package;
example = lib.literalExample "pkgs.tahoelafs";
example = lib.literalExpression "pkgs.tahoelafs";
description = ''
The package to use for the Tahoe-LAFS daemon.
'';
......@@ -50,7 +48,7 @@ in
services.private-storage.publicAddress = lib.mkOption
{ default = "${fqdn}";
type = lib.types.str;
example = lib.literalExample "storage.example.invalid";
example = "storage.example.invalid";
description = ''
A publicly-visible address to use in Tahoe-LAFS advertisements for
this storage service.
......@@ -59,7 +57,7 @@ in
services.private-storage.introducerFURL = lib.mkOption
{ default = null;
type = lib.types.nullOr lib.types.str;
example = lib.literalExample "pb://<tubid>@<location hint>/<swissnum>";
example = "pb://<tubid>@<location hint>/<swissnum>";
description = ''
A Tahoe-LAFS introducer node fURL at which this storage node should announce itself.
'';
......@@ -67,22 +65,30 @@ in
services.private-storage.publicStoragePort = lib.mkOption
{ default = 8898;
type = lib.types.int;
example = lib.literalExample 8098;
example = 8098;
description = ''
The port number on which to service storage clients.
'';
};
services.private-storage.publicReadOnlyStoragePort = lib.mkOption
{ default = 8899;
type = lib.types.int;
example = 8099;
description = ''
The port number on which to service read-only storage clients.
'';
};
services.private-storage.issuerRootURL = lib.mkOption
{ default = "https://issuer.${config.networking.domain}/";
type = lib.types.str;
example = lib.literalExample "https://example.invalid/";
example = "https://example.invalid/";
description = ''
The URL of the Ristretto issuer service to announce.
'';
};
services.private-storage.ristrettoSigningKeyPath = lib.mkOption
{ type = lib.types.path;
example = lib.literalExample "/var/run/secrets/signing-key.private";
example = "/var/run/secrets/signing-key.private";
description = ''
The path to the Ristretto signing key for the service.
'';
......@@ -101,70 +107,96 @@ in
# Define configuration based on values given for our options - starting with
# the option that says whether this is even turned on.
config = lib.mkIf cfg.enable
{ services.tahoe.nodes."${storage-node-name}" =
{ package = cfg.tahoe.package;
# Each attribute in this set corresponds to a section in the tahoe.cfg
# file. Attributes on those sets correspond to individual assignments
# in those sections.
#
# We just populate this according to policy/preference of Private
# Storage.
sections =
{ client = if cfg.introducerFURL == null then {} else
{ "introducer.furl" = cfg.introducerFURL;
};
node =
# XXX Should try to name that is unique across the grid.
{ nickname = "${storage-node-name}";
# We have the web port active because the CLI uses it. We may
# eventually turn this off, or at least have it off by default (with
# an option to turn it on). I don't know how much we'll use the CLI
# on the nodes. Maybe very little? Or maybe it will be part of a
# health check for the node... In any case, we tell it to bind to
# localhost so no one *else* can use it. And the principle of the
# web interface is that merely having access to it doesn't grant
# access to any data. It does grant access to storage capabilities
# but with our plugin configuration you still need ZKAPs to use
# those...
"web.port" = "tcp:3456:interface=127.0.0.1";
# We have to tell Tahoe-LAFS where to listen for Foolscap
# connections for the storage protocol. We have to tell it twice.
# First, in the syntax which it uses to listen.
"tub.port" = "tcp:${toString cfg.publicStoragePort}";
# Second, in the syntax it advertises to in the fURL.
"tub.location" = "tcp:${cfg.publicAddress}:${toString cfg.publicStoragePort}";
};
storage =
{ enabled = true;
# Put the storage where we have a lot of space configured.
storage_dir = "/storage";
# Turn on our plugin.
plugins = "privatestorageio-zkapauthz-v1";
{
# A read-only storage service. This allows read-only access for clients
# that use Great Black Swamp. There is no ZKAP/GBS integration yet so
# this is the most we can do at the moment.
services.tahoe.nodes."ro-${storage-node-name}" =
{ package = cfg.tahoe.package;
sections =
{ client = if cfg.introducerFURL == null then {} else
{ "introducer.furl" = cfg.introducerFURL;
};
node =
{ nickname = "ro-${storage-node-name}";
"tub.port" = "tcp:${toString cfg.publicReadOnlyStoragePort}";
"tub.location" = "tcp:${cfg.publicAddress}:${toString cfg.publicReadOnlyStoragePort}";
};
storage =
{ enabled = true;
storage_dir = "/storage";
readonly = true;
force_foolscap = false;
};
};
};
"storageserver.plugins.privatestorageio-zkapauthz-v1" =
{ "ristretto-issuer-root-url" = cfg.issuerRootURL;
"ristretto-signing-key-path" = cfg.ristrettoSigningKeyPath;
} // (
if cfg.passValue == null
then {}
else { "pass-value" = (toString cfg.passValue); }
);
};
};
# Tahoe nixos module brings along a single socket for the web api.
# That's for the other storage node though. Turn off the integration
# with this one.
systemd.services."tahoe.ro-storage".unitConfig.Requires = lib.mkForce [];
# Let traffic destined for the storage node's Foolscap server through.
networking.firewall.allowedTCPPorts = [ cfg.publicStoragePort ];
services.tahoe.nodes."${storage-node-name}" =
{ package = cfg.tahoe.package;
# Each attribute in this set corresponds to a section in the
# tahoe.cfg file. Attributes on those sets correspond to individual
# assignments in those sections.
#
# We just populate this according to policy/preference of Private
# Storage.
sections =
{ client = if cfg.introducerFURL == null then {} else
{ "introducer.furl" = cfg.introducerFURL;
};
node =
# XXX Should try to name that is unique across the grid.
{ nickname = "${storage-node-name}";
systemd.tmpfiles.rules =
# Add a rule to prevent incident reports from accumulating indefinitely.
# See tmpfiles.d(5) for the syntax.
[ "d ${incidents-dir} 0755 root root ${max-incident-age} -"
];
# We have the web port active because the CLI uses it and
# because it exposes a metrics endpoint for our monitoring
# system. The actual port configuration lives in systemd so
# that it can order binding the socket correctly with other
# dependencies (which we can't reliably do with Tahoe
# without a bunch of other work).
"web.port" = "systemd:domain=INET:index=0";
environment.systemPackages = [
# Provide a useful tool for reporting about shares.
ourpkgs.leasereport
];
# We have to tell Tahoe-LAFS where to listen for Foolscap
# connections for the storage protocol. We have to tell it twice.
# First, in the syntax which it uses to listen.
"tub.port" = "tcp:${toString cfg.publicStoragePort}";
};
# Second, in the syntax it advertises to in the fURL.
"tub.location" = "tcp:${cfg.publicAddress}:${toString cfg.publicStoragePort}";
};
storage =
{ enabled = true;
# Put the storage where we have a lot of space configured.
storage_dir = "/storage";
# Turn on our plugin.
plugins = "privatestorageio-zkapauthz-v2";
};
"storageserver.plugins.privatestorageio-zkapauthz-v2" =
{ "ristretto-issuer-root-url" = cfg.issuerRootURL;
"ristretto-signing-key-path" = cfg.ristrettoSigningKeyPath;
} // (
if cfg.passValue == null
then {}
else { "pass-value" = (toString cfg.passValue); }
);
};
};
# Let traffic destined for the storage node's Foolscap server through.
networking.firewall.allowedTCPPorts = [ cfg.publicStoragePort cfg.publicReadOnlyStoragePort ];
systemd.tmpfiles.rules =
# Add a rule to prevent incident reports from accumulating indefinitely.
# See tmpfiles.d(5) for the syntax.
[ "d ${incidents-dir} 0755 root root ${max-incident-age} -"
];
environment.systemPackages = [
# Provide a useful tool for reporting about shares.
ourpkgs.leasereport
];
};
}
# Provide secure defaults for systemd services
#
# Good reads:
# https://gist.github.com/ageis/f5595e59b1cddb1513d1b425a323db04
# https://docs.arbitrary.ch/security/systemd.html
# https://www.freedesktop.org/software/systemd/man/systemd.exec.html
{
DynamicUser = true;
# This set of restrictions is mostly dervied from
# - running `systemd-analyze security zkap-spending-service.service`
# - Looking at the restrictions from the nixos nginx config.
AmbientCapabilities = "";
CapabilityBoundingSet = "";
LockPersonality = true;
MemoryDenyWriteExecute = true;
NoNewPrivileges = true;
PrivateDevices = true;
PrivateMounts = true;
PrivateNetwork = true;
PrivateTmp = true;
PrivateUsers = true;
ProcSubset = "pid";
ProtectClock = true;
ProtectControlGroups = true;
ProtectHome = true;
ProtectHostname = true;
ProtectKernelLogs = true;
ProtectKernelModules = true;
ProtectKernelTunables = true;
ProtectProc = "invisible";
ProtectSystem = "strict";
RemoveIPC = true;
RestrictAddressFamilies = "AF_UNIX";
RestrictNamespaces = true;
RestrictRealtime = true;
RestrictSUIDSGID = true;
SystemCallArchitectures = "native";
# Lines starting with "~" are deny-list the others are allow-list
# Since the first line is allow, that bounds the set of allowed syscalls
# and the further lines restrict it.
SystemCallFilter = [
# From systemd.exec(5), @system-service is "A reasonable set of
# system calls used by common system [...]"
"@system-service"
# This is from the nginx config, except that `@ipc` is not removed,
# since twisted uses a self-pipe.
"~@cpu-emulation @debug @keyring @mount @obsolete @privileged @setuid"
];
Umask = "0077";
}
# A NixOS module which can run a service tracking spending of ZKAPs.
# ZKAPs.
{ lib, pkgs, config, ourpkgs, ... }@args: let
cfg = config.services.private-storage-spending;
in
{
options = {
services.private-storage-spending = {
enable = lib.mkEnableOption "PrivateStorage Spending Service";
package = lib.mkOption {
default = ourpkgs.zkap-spending-service;
type = lib.types.package;
example = "ourpkgs.zkap-spending-service";
description = ''
The package to use for the spending service.
'';
};
unixSocket = lib.mkOption {
default = "/run/zkap-spending-service/api.socket";
type = lib.types.path;
description = ''
The unix socket that the spending service API listens on.
'';
};
};
services.private-storage-spending.domain = lib.mkOption {
default = config.networking.fqdn;
type = lib.types.str;
example = [ "spending.example.com" ];
description = ''
The domain name at which the spending service is reachable.
'';
};
};
config =
lib.mkIf cfg.enable {
systemd.sockets.zkap-spending-service = {
enable = true;
wantedBy = [ "sockets.target" ];
listenStreams = [ cfg.unixSocket ];
};
# Add a systemd service to run zkap-spending-service.
systemd.services.zkap-spending-service = {
enable = true;
description = "ZKAP Spending Service";
wantedBy = [ "multi-user.target" ];
serviceConfig = (import ./restricted-service.nix) // {
NonBlocking = true;
# It really shouldn't ever exit on its own! If it does, it's a bug
# we'll have to fix. Restart it and hope it doesn't happen too much
# before we can fix whatever the issue is.
Restart = "always";
Type = "simple";
# Work around https://twistedmatrix.com/trac/ticket/10261
# Create a runtime directory so that the service has permission
# to change the mode on the socket.
RuntimeDirectory = "zkap-spending-service";
};
script = let
httpArgs = "--http-endpoint systemd:domain=UNIX:index=0";
in
"exec ${cfg.package}/bin/${cfg.package.meta.mainProgram} run ${httpArgs}";
};
services.nginx = {
enable = true;
recommendedGzipSettings = true;
recommendedOptimisation = true;
recommendedProxySettings = true;
recommendedTlsSettings = true;
virtualHosts."${cfg.domain}" = {
locations."/v1/" = {
# Only forward requests beginning with /v1/ so
# we pass less scanning spam on to our backend
# Want a regex instead? try locations."~ /v\d+/"
proxyPass = "http://unix:${cfg.unixSocket}";
};
locations."/metrics" = {
proxyPass = "http://unix:${cfg.unixSocket}";
# Only allow our monitoringvpn subnet
extraConfig = ''
allow 172.23.23.0/24;
allow 127.0.0.1;
allow ::1;
deny all;
'';
};
locations."/" = {
# Return a 404 error for any paths not specified above.
extraConfig = ''
return 404;
'';
};
};
};
# Open 80 and 443 for nginx
networking.firewall.allowedTCPPorts = [
80
443
];
};
}
......@@ -6,8 +6,8 @@
}: {
options = {
services.private-storage.sshUsers = lib.mkOption {
type = lib.types.attrsOf lib.types.str;
example = lib.literalExample { root = "ssh-ed25519 AAA..."; };
type = lib.types.attrsOf (lib.types.listOf lib.types.str);
example = { root = "ssh-ed25519 AAA..."; };
description = ''
Users to configure on the issuer server and the storage servers and
the SSH public keys to use to authenticate them.
......@@ -25,12 +25,9 @@
services.openssh = {
enable = true;
# We don't use SFTP for anything. No reason to expose it.
allowSFTP = false;
# We only allow key-based authentication.
challengeResponseAuthentication = false;
passwordAuthentication = false;
settings.KbdInteractiveAuthentication = false;
settings.PasswordAuthentication = false;
extraConfig = ''
# Possibly this is superfluous considering we don't allow
......@@ -44,9 +41,9 @@
};
users.users =
let makeUserConfig = username: sshPublicKey: {
let makeUserConfig = username: sshPublicKeys: {
isNormalUser = username != "root";
openssh.authorizedKeys.keys = [ sshPublicKey ];
openssh.authorizedKeys.keys = sshPublicKeys;
};
in builtins.mapAttrs makeUserConfig cfg.sshUsers;
};
......
......@@ -48,7 +48,7 @@ in
default = pkgs.tahoelafs;
defaultText = "pkgs.tahoelafs";
type = types.package;
example = literalExample "pkgs.tahoelafs";
example = "pkgs.tahoelafs";
description = ''
The package to use for the Tahoe LAFS daemon.
'';
......@@ -78,7 +78,7 @@ in
default = pkgs.tahoelafs;
defaultText = "pkgs.tahoelafs";
type = types.package;
example = literalExample "pkgs.tahoelafs";
example = "pkgs.tahoelafs";
description = ''
The package to use for the Tahoe LAFS daemon.
'';
......@@ -156,6 +156,10 @@ in
nameValuePair "tahoe.introducer-${node}" {
description = "Tahoe node user for introducer ${node}";
isSystemUser = true;
group = "tahoe.introducer-${node}";
});
users.groups = flip mapAttrs' cfg.introducers (node: _:
nameValuePair "tahoe.introducer-${node}" {
});
})
(mkIf (cfg.nodes != {}) {
......@@ -178,6 +182,17 @@ in
# Open up the firewall.
# networking.firewall.allowedTCPPorts = flip mapAttrsToList cfg.nodes
# (node: settings: settings.tub.port);
# Make systemd open a port for us:
# Systemd uses the socket name to link to the corresponding Service Unit.
systemd.sockets."tahoe.storage" = {
description = "Tahoe Web Server Socket";
wantedBy = [ "sockets.target" ];
socketConfig = {
ListenStream = "127.0.0.1:3456";
};
};
systemd.services = flip mapAttrs' cfg.nodes (node: settings:
let
pidfile = "/run/tahoe.${lib.escapeShellArg node}.pid";
......@@ -187,10 +202,18 @@ in
eliotLog = "file:${nodedir}/logs/eliot.json,rotate_length=${toString (1024 * 1024 * 32)},max_rotated_files=32";
in nameValuePair "tahoe.${node}" {
description = "Tahoe LAFS node ${node}";
# We are partially socket activated but only for the web API port.
# For the actual storage service port, we bind ourselves. So make
# sure we actually do start up early in case storage requests come
wantedBy = [ "multi-user.target" ];
path = [ settings.package ];
restartTriggers = [
config.environment.etc."tahoe-lafs/${node}.cfg".source ];
# We don't know how to re-read our configuration file at runtime
# so restart if it ever changes.
restartTriggers = [ config.environment.etc."tahoe-lafs/${node}.cfg".source ];
serviceConfig = {
Type = "simple";
PIDFile = pidfile;
......@@ -198,8 +221,12 @@ in
# arguments to $(tahoe run). The node directory must come first,
# and arguments which alter Twisted's behavior come afterwards.
ExecStart = ''
${settings.package}/bin/tahoe --eliot-destination ${eliotLog} run ${nodedir} -n -l- --pidfile=${pidfile}
${settings.package}/bin/tahoe --eliot-destination ${eliotLog} run --allow-stdin-close ${nodedir} -n -l- --pidfile=${pidfile}
'';
# Twisted wants non-blocking sockets:
NonBlocking = true;
# The rlimit on number of open files controls how many
# connections a particular storage server can accept (factoring
# in the number of non-connection files the server needs open -
......@@ -236,6 +263,14 @@ in
# now. So it makes sense to have the limit be 2^15 right now.
LimitNOFILE = 32768;
};
unitConfig = {
# Our config doesn't know how to bind all of its sockets on its
# own so don't start without the systemd units that *do* know
# how to bind them.
Requires = [ "tahoe.${node}.socket" ];
};
preStart =
let
created = "${nodedir}.created";
......@@ -287,6 +322,10 @@ in
nameValuePair "tahoe.${node}" {
description = "Tahoe node user for node ${node}";
isSystemUser = true;
group = "tahoe.${node}";
});
users.groups = flip mapAttrs' cfg.nodes (node: _:
nameValuePair "tahoe.${node}" {
});
})
];
......
{ ... }: {
nodes = {
storage = { config, pkgs, ... }: {
imports = [
../tahoe.nix
];
services.tahoe.nodes.storage = {
package = pkgs.privatestorage;
sections = {
node = {
nickname = "storage";
"web.port" = "tcp:4000:interface=127.0.0.1";
"tub.port" = "tcp:4001";
"tub.location" = "tcp:127.0.0.1:4001";
};
storage = {
enabled = true;
};
};
};
};
};
testScript = ''
startAll;
# After the service starts, destroy the "created" marker to force it to
# re-create its internal state.
$storage->waitForOpenPort(4001);
$storage->succeed("systemctl stop tahoe.storage");
$storage->succeed("rm /var/db/tahoe-lafs/storage.created");
$storage->succeed("systemctl start tahoe.storage");
# After it starts up again, verify it has consistent internal state and a
# backup of the prior state.
$storage->waitForOpenPort(4001);
$storage->succeed("[ -e /var/db/tahoe-lafs/storage ]");
$storage->succeed("[ -e /var/db/tahoe-lafs/storage.created ]");
$storage->succeed("[ -e /var/db/tahoe-lafs/storage.1 ]");
$storage->succeed("[ -e /var/db/tahoe-lafs/storage.1/private/node.privkey ]");
$storage->succeed("[ -e /var/db/tahoe-lafs/storage.1/private/node.pem ]");
$storage->succeed("[ ! -e /var/db/tahoe-lafs/storage.2 ]");
# Stop it again, once again destroy the "created" marker, and this time also
# jam some partial state in the way that will need cleanup.
$storage->succeed("systemctl stop tahoe.storage");
$storage->succeed("rm /var/db/tahoe-lafs/storage.created");
$storage->succeed("mkdir -p /var/db/tahoe-lafs/storage.atomic/partial");
eval {
$storage->succeed("systemctl start tahoe.storage");
1;
} or do {
my ($x, $y) = $storage->execute("journalctl -u tahoe.storage");
$storage->log($y);
die $@;
};
# After it starts up again, verify it has consistent internal state and
# backups of the prior two states. It also has no copy of the inconsistent
# state because it could never have been used.
$storage->waitForOpenPort(4001);
$storage->succeed("[ -e /var/db/tahoe-lafs/storage ]");
$storage->succeed("[ -e /var/db/tahoe-lafs/storage.created ]");
$storage->succeed("[ -e /var/db/tahoe-lafs/storage.1 ]");
$storage->succeed("[ -e /var/db/tahoe-lafs/storage.2 ]");
$storage->succeed("[ -e /var/db/tahoe-lafs/storage.2/private/node.privkey ]");
$storage->succeed("[ -e /var/db/tahoe-lafs/storage.2/private/node.pem ]");
$storage->succeed("[ ! -e /var/db/tahoe-lafs/storage.atomic ]");
$storage->succeed("[ ! -e /var/db/tahoe-lafs/storage/partial ]");
$storage->succeed("[ ! -e /var/db/tahoe-lafs/storage.3 ]");
'';
}
......@@ -13,12 +13,12 @@ shift
# configuration that controls what value is actually passed when an update is
# triggered.
case "${GRIDNAME}" in
"local")
"local"|"testing")
BRANCH="develop"
;;
"testing")
BRANCH="staging"
"hro-cloud")
BRANCH="hro-cloud"
;;
"production")
......@@ -36,7 +36,7 @@ CHECKOUT="${HOME}/PrivateStorageio"
# This is the address of the git remote where we can get the latest
# PrivateStorageio.
REPO="https://whetstone.privatestorage.io/privatestorage/PrivateStorageio.git"
REPO="https://whetstone.private.storage/privatestorage/PrivateStorageio.git"
if [ -e "${CHECKOUT}" ]; then
# It exists already so just make sure it contains the latest changes from
......@@ -50,10 +50,10 @@ fi
# Get us to a pristine checkout of the right branch.
git -C "${CHECKOUT}" reset --hard "origin/${BRANCH}"
# If we happen to be on the local grid then fix the undefined key.
# If we happen to be on the local grid then add the required user.nix file
# containing ssh-keys.
if [ "${GRIDNAME}" = "local" ]; then
KEY="$(cat /etc/ssh/authorized_keys.d/vagrant)"
sed -i "s_undefined_\"${KEY}\"_" "${CHECKOUT}"/morph/grid/${GRIDNAME}/public-keys/users.nix
echo "import /etc/nixos/ssh-users.nix" > "${CHECKOUT}"/morph/grid/"${GRIDNAME}"/public-keys/users.nix
fi
# Compute a log message explaining what we're doing.
......@@ -76,14 +76,18 @@ EOF
ssh -o StrictHostKeyChecking=no "$(hostname).$(domainname)" ":"
# Set nixpkgs to our preferred version for the morph build. Annoyingly, we
# can't just use nixpkgs-2105.nix as our nixpkgs because some code (in morph,
# can't just use nixpkgs.nix as our nixpkgs because some code (in morph,
# at least) wants <nixpkgs> to be a fully-resolved path to a nixpkgs tree.
# For example, morph evaluated `import <nixpkgs/lib>` which would turn into
# something like `import nixpkgs-2105.nix/lib` which is nonsense.
# something like `import nixpkgs.nix/lib` which is nonsense.
#
# So instead, import our nixpkgs which forces it to be instantiated in the
# store, then ask for its path, then set NIX_PATH to that.
export NIX_PATH="nixpkgs=$(nix eval "(import ${CHECKOUT}/nixpkgs-2105.nix { }).path")"
# Two lines since 'export' masks 'set -e'
# See https://mywiki.wooledge.org/BashFAQ/105#line-204
NIX_PATH="nixpkgs=$(nix --extra-experimental-features nix-command eval --impure --expr "(import ${CHECKOUT}/nixpkgs.nix { }).path")"
export NIX_PATH
# Attempt to update just this host. Choose the morph grid definition matching
# the grid we belong to and limit the morph deployment update to the host
......
# Expose all our locally defined packages as attributes.
# In `gridlib.base`, we expose this as a new `ourpkgs` module argument.
# To access this directly, you can call this as::
#
# pkgs.callPackage ./nixos/pkgs
{buildPlatform, hostPlatform, callPackage}:
{
lib = callPackage ../lib {};
leasereport = callPackage ./leasereport {};
# `privatestorage` is a derivation with a good Tahoe+ZKAP environment
# that is exposed by ZKAPAuthorizer.
privatestorage = callPackage ./privatestorage {};
zkap-spending-service = callPackage ./zkap-spending-service {};
zkapissuer = callPackage ./zkapissuer {};
megacli2prom = callPackage ./megacli2prom {};
}