Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision

Target

Select target project
  • tomprince/PrivateStorageio
  • privatestorage/PrivateStorageio
2 results
Select Git revision
Show changes
Showing
with 3353 additions and 276 deletions
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "How are our user-facing services doing?",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": 116,
"links": [],
"liveNow": false,
"panels": [
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 39,
"panels": [],
"title": "Tahoe-LAFS Overview",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": true,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": 3600000,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "reqps"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 8,
"x": 0,
"y": 1
},
"id": 41,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"editorMode": "code",
"expr": "sum(irate(tahoe_counters_storage_server_allocate[$__range]))",
"instant": false,
"legendFormat": "allocate",
"range": true,
"refId": "allocate"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"editorMode": "code",
"expr": "sum(irate(tahoe_counters_storage_server_writev[$__range]))",
"hide": false,
"instant": false,
"legendFormat": "writev",
"range": true,
"refId": "writev"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"editorMode": "code",
"expr": "sum(irate(tahoe_counters_storage_server_write[$__range]))",
"hide": false,
"instant": false,
"legendFormat": "write",
"range": true,
"refId": "write"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"editorMode": "code",
"expr": "sum(irate(tahoe_counters_storage_server_close[$__range]))",
"hide": false,
"instant": false,
"legendFormat": "close",
"range": true,
"refId": "close"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"editorMode": "code",
"expr": "-sum(irate(tahoe_counters_storage_server_get[$__range]))",
"hide": false,
"instant": false,
"legendFormat": "get",
"range": true,
"refId": "get"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"editorMode": "code",
"expr": "-sum(irate(tahoe_counters_storage_server_read[$__range]))",
"hide": false,
"instant": false,
"legendFormat": "read",
"range": true,
"refId": "read"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"editorMode": "code",
"expr": "-sum(irate(tahoe_counters_storage_server_readv[$__range]))",
"hide": false,
"instant": false,
"legendFormat": "readv",
"range": true,
"refId": "readv"
}
],
"title": "Request rates",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": -1,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "binBps"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 8,
"x": 8,
"y": 1
},
"id": 42,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"editorMode": "code",
"expr": "sum by (instance)(irate(tahoe_counters_storage_server_bytes_added[$__range]))",
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "Writing"
}
],
"title": "Byes added",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "Sum of all latency means.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"y": 1
},
"id": 43,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"editorMode": "code",
"exemplar": false,
"expr": "(tahoe_stats_storage_server_latencies_allocate_mean or vector(0) +\r\ntahoe_stats_storage_server_latencies_close_mean or vector(0) +\r\ntahoe_stats_storage_server_latencies_get_mean or vector(0) +\r\ntahoe_stats_storage_server_latencies_read_mean or vector(0) +\r\ntahoe_stats_storage_server_latencies_readv_mean or vector(0) +\r\ntahoe_stats_storage_server_latencies_write_mean or vector(0) +\r\ntahoe_stats_storage_server_latencies_writev_mean or vector(0)) != 0\r\n",
"hide": false,
"instant": false,
"interval": "",
"legendFormat": "{{instance}}",
"range": true,
"refId": "A"
}
],
"title": "Latencies",
"type": "timeseries"
},
{
"collapsed": false,
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 8
},
"id": 32,
"panels": [],
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"refId": "A"
}
],
"title": "HTTPS endpoints",
"type": "row"
},
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
3.142
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "5m",
"frequency": "1m",
"handler": 1,
"name": "Response times alert",
"noDataState": "no_data",
"notifications": []
},
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "line+area"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "transparent",
"value": null
},
{
"color": "red",
"value": 3.142
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 0,
"y": 9
},
"id": 36,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": false
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.3.5",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "probe_duration_seconds",
"interval": "",
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"title": "Response times",
"type": "timeseries"
},
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
0
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "count"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "10m",
"frequency": "1m",
"handler": 1,
"name": "Probe fails alert",
"noDataState": "ok",
"notifications": []
},
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "Shows all HTTP endpoints where probe_success == 0. This could have different reasons, likely ones being the service is down or the TLS certificate is not trusted.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "bars",
"fillOpacity": 100,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "line+area"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "transparent",
"value": null
},
{
"color": "red",
"value": 0
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 8,
"y": 9
},
"id": 38,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": false
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.3.5",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "count by (instance) (probe_http_status_code!=200 and probe_http_status_code!=401 and probe_http_status_code!=404)",
"interval": "",
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"title": "Probe fails",
"type": "timeseries"
},
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
2419200
],
"type": "lt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "5m",
"frequency": "60m",
"handler": 1,
"message": "A TLS certificate is expiring within four weeks.",
"name": "TLS certificate expiry alert",
"noDataState": "no_data",
"notifications": []
},
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "line+area"
}
},
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "transparent",
"value": 2419200
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 16,
"y": 9
},
"id": 34,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": false
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.3.5",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "probe_ssl_earliest_cert_expiry - time()",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"title": "TLS certificate expiry",
"type": "timeseries"
}
],
"refresh": "auto",
"schemaVersion": 39,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-7d",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Services overview",
"uid": "ServicesOverview",
"version": 6,
"weekStart": ""
}
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"links": [],
"liveNow": false,
"panels": [
{
"collapsed": false,
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 38,
"panels": [],
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"refId": "A"
}
],
"title": "Inbound operations",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "These stats keep track of local disk latencies for storage-server operations. All values are in seconds. These are recorded by the storage server, starting from the time the request arrives (post-deserialization) and ending when the response begins serialization. As such, they are mostly useful for measuring disk speeds.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 0,
"y": 1
},
"id": 13,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": false
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "10.4.6",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_storage_server_latencies_allocate_mean{instance=\"$node\"}",
"interval": "",
"legendFormat": "allocate",
"refId": "allocate_mean"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_storage_server_latencies_close_mean{instance=\"$node\"}",
"hide": false,
"interval": "",
"legendFormat": "close",
"refId": "close_mean"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_storage_server_latencies_get_mean{instance=\"$node\"}",
"hide": false,
"interval": "",
"legendFormat": "get",
"refId": "get_mean"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_storage_server_latencies_read_mean{instance=\"$node\"}",
"hide": false,
"interval": "",
"legendFormat": "read",
"refId": "read_mean"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_storage_server_latencies_readv_mean{instance=\"$node\"}",
"hide": false,
"interval": "",
"legendFormat": "readv",
"refId": "readv_mean"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_storage_server_latencies_write_mean{instance=\"$node\"}",
"hide": false,
"interval": "",
"legendFormat": "write",
"refId": "write_mean"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_storage_server_latencies_writev_mean{instance=\"$node\"}",
"hide": false,
"interval": "",
"legendFormat": "writev",
"refId": "writev_mean"
}
],
"title": "Latency means",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "This counts inbound storage-server operations.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 8,
"y": 1
},
"id": 34,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": false
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "10.4.6",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(tahoe_counters_storage_server_allocate{instance=\"$node\"}[5m])",
"interval": "",
"legendFormat": "allocate",
"refId": "allocate"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(tahoe_counters_storage_server_write{instance=\"$node\"}[5m])",
"hide": false,
"interval": "",
"legendFormat": "write",
"refId": "write"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(tahoe_counters_storage_server_close{instance=\"$node\"}[5m])",
"hide": false,
"interval": "",
"legendFormat": "close",
"refId": "close"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(tahoe_counters_storage_server_get{instance=\"$node\"}[5m])",
"hide": false,
"interval": "",
"legendFormat": "get",
"refId": "get"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(tahoe_counters_storage_server_read{instance=\"$node\"}[5m])",
"hide": false,
"interval": "",
"legendFormat": "read",
"refId": "read"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(tahoe_counters_storage_server_add-lease{instance=\"$node\"}[5m])",
"hide": true,
"interval": "",
"legendFormat": "add-lease",
"refId": "add-lease"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(tahoe_counters_storage_server_renew{instance=\"$node\"}[5m])",
"hide": false,
"interval": "",
"legendFormat": "renew",
"refId": "renew"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(tahoe_counters_storage_server_cancel{instance=\"$node\"}[5m])",
"hide": false,
"interval": "",
"legendFormat": "cancel",
"refId": "cancel"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(tahoe_counters_storage_server_readv{instance=\"$node\"}[5m])",
"hide": false,
"interval": "",
"legendFormat": "readv",
"refId": "readv"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(tahoe_counters_storage_server_writev{instance=\"$node\"}[5m])",
"hide": false,
"interval": "",
"legendFormat": "writev",
"refId": "writev"
}
],
"title": "Counts/s",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 16,
"y": 1
},
"id": 36,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": false
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "10.4.6",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(tahoe_counters_storage_server_bytes_added{instance=\"$node\"}[5m])",
"interval": "",
"legendFormat": "Added",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(tahoe_counters_storage_server_bytes_freed{instance=\"$node\"}[5m])",
"hide": false,
"interval": "",
"legendFormat": "Freed",
"refId": "B"
}
],
"title": "Bytes/s",
"type": "timeseries"
},
{
"collapsed": false,
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 9
},
"id": 19,
"panels": [],
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"refId": "A"
}
],
"title": "Latency Histograms",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "These stats keep track of local disk latencies for storage-server operations. All values are in seconds. These are recorded by the storage server, starting from the time the request arrives (post-deserialization) and ending when the response begins serialization. As such, they are mostly useful for measuring disk speeds.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 0,
"y": 10
},
"id": 11,
"options": {
"displayMode": "gradient",
"maxVizHeight": 300,
"minVizHeight": 16,
"minVizWidth": 8,
"namePlacement": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showUnfilled": true,
"sizing": "auto",
"text": {},
"valueMode": "color"
},
"pluginVersion": "10.4.6",
"repeat": "storageserverop",
"repeatDirection": "h",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_storage_server_latencies_$storageserverop{instance=\"$node\"}",
"interval": "",
"legendFormat": "{{quantile}}",
"refId": "A"
}
],
"title": "$storageserverop",
"type": "bargauge"
},
{
"collapsed": false,
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 18
},
"id": 30,
"panels": [],
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"refId": "A"
}
],
"title": "Storage overview",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "These all reflect disk-space usage policies and status.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 0,
"y": 19
},
"id": 22,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "10.4.6",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_storage_server_disk_avail{instance=\"$node\"}",
"hide": false,
"interval": "",
"legendFormat": "disk_avail",
"refId": "disk_avail"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_storage_server_disk_free_for_nonroot{instance=\"$node\"}",
"hide": false,
"interval": "",
"legendFormat": "free_for_nonroot",
"refId": "disk_free_for_nonroot"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_storage_server_disk_free_for_root{instance=\"$node\"}",
"hide": false,
"interval": "",
"legendFormat": "free_for_root",
"refId": "disk_free_for_root"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "",
"hide": true,
"interval": "",
"legendFormat": "disk_total",
"refId": "disk_total"
}
],
"title": "Bytes free",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "These all reflect disk-space usage policies and status.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 8,
"y": 19
},
"id": 7,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "10.4.6",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_storage_server_allocated{instance=\"$node\"}",
"interval": "",
"legendFormat": "allocated",
"refId": "allocated"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_storage_server_disk_used{instance=\"$node\"}",
"hide": false,
"interval": "",
"legendFormat": "disk_used",
"refId": "disk_used"
}
],
"title": "Bytes used",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "This counts the number of ‘buckets’ (i.e. unique storage-index values) currently managed by the storage server. It indicates roughly how many files are managed by the server.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 16,
"y": 19
},
"id": 9,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": false
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "10.4.6",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_storage_server_total_bucket_count{instance=\"$node\"}",
"interval": "",
"legendFormat": "total_bucket_count",
"refId": "A"
}
],
"title": "Total bucket count",
"type": "timeseries"
},
{
"collapsed": false,
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 27
},
"id": 15,
"panels": [],
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"refId": "A"
}
],
"title": "CPU",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "Estimate of what percentage of system CPU time was consumed by the node process, over the given time interval. ",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 0,
"y": 28
},
"id": 2,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "10.4.6",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_cpu_monitor_1min_avg{instance=\"$node\"}\n",
"interval": "",
"legendFormat": "1 min avg",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_cpu_monitor_5min_avg{instance=\"$node\"}\n",
"hide": false,
"interval": "",
"intervalFactor": 5,
"legendFormat": "5 mins avg",
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_cpu_monitor_15min_avg{instance=\"$node\"}\n",
"hide": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "15 mins avg",
"refId": "C"
}
],
"title": "CPU monitor",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "Estimate of total number of CPU seconds consumed by node since the process was started. Ticket #472 indicates that .total may sometimes be negative due to wraparound of the kernel’s counter.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 8,
"y": 28
},
"id": 5,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "10.4.6",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_cpu_monitor_total{instance=\"$node\"}\n",
"interval": "",
"legendFormat": "Total CPU seconds",
"refId": "A"
}
],
"title": "CPU time total",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "How many seconds since the node process was started.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 16,
"y": 28
},
"id": 4,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "10.4.6",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_stats_node_uptime{instance=\"$node\"}",
"interval": "",
"legendFormat": "{{instance}} uptime",
"refId": "A"
}
],
"title": "Node uptime",
"type": "timeseries"
},
{
"collapsed": false,
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 36
},
"id": 42,
"panels": [],
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"refId": "A"
}
],
"title": "Corruption Advisories",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "File count of /storage/corruption-advisories/",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 60,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 37
},
"id": 44,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_corruption_advisories_total",
"interval": "",
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"title": "Corruption Advisory count",
"type": "timeseries"
},
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
0
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "5m",
"frequency": "1m",
"handler": 1,
"name": "Corruption Advisory rate alert",
"noDataState": "no_data",
"notifications": []
},
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "Rate of new files in /storage/corruption-advisories/",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 60,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 37
},
"id": 46,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(tahoe_corruption_advisories_total[5m])",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"thresholds": [
{
"colorMode": "critical",
"op": "gt",
"value": 0,
"visible": true
}
],
"title": "Corruption Advisory rate",
"type": "timeseries"
},
{
"collapsed": false,
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 45
},
"id": 50,
"panels": [],
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"refId": "A"
}
],
"title": "Incident Reports",
"type": "row"
},
{
"datasource": {},
"description": "File count of /var/db/tahoe-lafs/storage/logs/incidents/",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 60,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 46
},
"id": 53,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "tahoe_incident_reports_total",
"interval": "",
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"title": "Incident Reports count",
"type": "timeseries"
},
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
0
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "5m",
"frequency": "1m",
"handler": 1,
"name": "Incident Reports rate alert",
"noDataState": "no_data",
"notifications": []
},
"datasource": {},
"description": "Rate of new files in /var/db/tahoe-lafs/storage/logs/incidents/",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 60,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 46
},
"id": 54,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(tahoe_incident_reports_total[5m])",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"thresholds": [
{
"colorMode": "critical",
"op": "gt",
"value": 0,
"visible": true
}
],
"title": "Incident Reports rate",
"type": "timeseries"
}
],
"schemaVersion": 39,
"tags": [],
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "storage001",
"value": "storage001"
},
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"definition": "tahoe_stats_cpu_monitor_1min_avg",
"description": "Which node (instamce) to show",
"hide": 0,
"includeAll": false,
"label": "Node",
"multi": false,
"name": "node",
"options": [],
"query": {
"query": "tahoe_stats_cpu_monitor_1min_avg",
"refId": "StandardVariableQuery"
},
"refresh": 1,
"regex": "/.*instance=\"([^\"]*)\".*/",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"current": {
"selected": true,
"text": [
"allocate",
"write",
"readv"
],
"value": [
"allocate",
"write",
"readv"
]
},
"description": "Inbound storage-server operations ",
"hide": 0,
"includeAll": true,
"label": "Detailed latencies for",
"multi": true,
"name": "storageserverop",
"options": [
{
"selected": false,
"text": "All",
"value": "$__all"
},
{
"selected": true,
"text": "allocate",
"value": "allocate"
},
{
"selected": true,
"text": "write",
"value": "write"
},
{
"selected": false,
"text": "close",
"value": "close"
},
{
"selected": false,
"text": "get",
"value": "get"
},
{
"selected": false,
"text": "read",
"value": "read"
},
{
"selected": false,
"text": "add-lease",
"value": "add-lease"
},
{
"selected": false,
"text": "renew",
"value": "renew"
},
{
"selected": false,
"text": "cancel",
"value": "cancel"
},
{
"selected": true,
"text": "readv",
"value": "readv"
},
{
"selected": false,
"text": "writev",
"value": "writev"
}
],
"query": "allocate, write, close, get, read, add-lease, renew, cancel, readv, writev",
"queryValue": "",
"skipUrlSync": false,
"type": "custom"
}
]
},
"time": {
"from": "now-15m",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Tahoe-LAFS",
"uid": "TahoeLAFS",
"version": 1,
"weekStart": ""
}
......@@ -7,33 +7,24 @@
let
cfg = config.services.private-storage.monitoring.grafana;
grafanaAuth = if (cfg.googleOAuthClientID == "") then {
anonymous.enable = true;
} else {
google.enable = true;
# Grafana considers it "sign up" to let in a user it has
# never seen before.
google.allowSignUp = true;
google.clientSecretFile = cfg.googleOAuthClientSecretFile;
google.clientId = cfg.googleOAuthClientID;
};
in {
options.services.private-storage.monitoring.grafana = {
domain = lib.mkOption
{ type = lib.types.str;
example = lib.literalExample "grafana.grid.private.storage";
description = "The FQDN of the Grafana host";
domains = lib.mkOption
{ type = lib.types.listOf lib.types.str;
example = [ "grafana.grid.private.storage" ];
description = "The domain names at which the server is reachable.";
};
prometheusUrl = lib.mkOption
{ type = lib.types.str;
example = lib.literalExample "http://prometheus:9090/";
example = "http://prometheus:9090/";
default = "http://localhost:9090/";
description = "The URL of the Prometheus host to access";
};
lokiUrl = lib.mkOption
{ type = lib.types.str;
example = lib.literalExample "http://loki:3100/";
example = "http://loki:3100/";
default = "http://localhost:3100/";
description = "The URL of the Loki host to access";
};
......@@ -46,19 +37,19 @@ in {
};
googleOAuthClientID = lib.mkOption
{ type = lib.types.str;
example = lib.literalExample "grafana-staging-345678";
example = "grafana-staging-345678";
default = "replace-by-your-client-id-or-set-empty-string-for-anonymous-access";
description = "The GSuite OAuth2 SSO Client ID. Empty string turns SSO auth off and anonymous (free for all) access on.";
};
googleOAuthClientSecretFile = lib.mkOption
{ type = lib.types.path;
example = lib.literalExample "/var/secret/monitoring-gsuite-client-secret";
example = /var/secret/monitoring-gsuite-client-secret;
default = /run/keys/grafana-google-sso.secret;
description = "The path to the GSuite SSO secret file.";
};
adminPasswordFile = lib.mkOption
{ type = lib.types.path;
example = lib.literalExample "/var/secret/monitoring-admin-password";
example = "/var/secret/monitoring-admin-password";
default = /run/keys/grafana-admin.password;
description = "A file containing the password for the Grafana Admin account.";
};
......@@ -67,104 +58,148 @@ in {
default = false;
description = ''
Enables the slack alerter. Expects a file that contains
the definition of an environment variable named SLACKURL
pointing to the secret Slack Web Hook URL in
grafanaSlackUrlFile (see below).
the secret Slack Web Hook URL in grafanaSlackUrlFile (see below).
'';
};
grafanaSlackUrlFile = lib.mkOption
{ type = lib.types.path;
default = /run/keys/grafana-slack-url;
description = ''
Where to find the Grafana Systemd EnvironmentFile that
sets the secret SLACKURL environment variable.
Where to find the file that containts the slack URL.
'';
};
enableZulipAlert = lib.mkOption
{ type = lib.types.bool;
default = false;
description = ''
Enables the Zulip alerter. Expects a file that contains
the secret Zulip Web Hook URL in grafanaZulipUrlFile (see below).
'';
};
grafanaZulipUrlFile = lib.mkOption
{ type = lib.types.path;
default = /run/keys/grafana-zulip-url;
description = ''
Where to find the file that containts the Zulip URL.
'';
};
};
config = {
config =
let
# We'll refer to this collection of domains by the first domain in the list.
domain = builtins.head cfg.domains;
in {
# Port 80 for ACME ssl retrieval only. 443 for nginx -> grafana.
networking.firewall.allowedTCPPorts = [ 80 443 ];
# We pass the secret Slack URL using an environment variable.
systemd.services.grafana.serviceConfig.EnvironmentFile =
if cfg.enableSlackAlert
then [ cfg.grafanaSlackUrlFile ]
else [ ];
services.grafana = {
enable = true;
domain = cfg.domain;
port = 2342;
addr = "127.0.0.1";
# No phoning home
analytics.reporting.enable = false;
settings = {
server = {
domain = "${toString domain}";
http_port = 2342;
http_addr = "127.0.0.1";
# Defend against DNS rebinding attacks.
enforce_domain = true;
# Force Grafana to believe it is reachable via https on the default port
# number because that's where the nginx that forwards traffic to it is
# listening. Grafana's own server listens on an internal address that
# doesn't matter to anyone except our nginx instance.
rootUrl = "https://%(domain)s/";
root_url = "https://%(domain)s/";
};
# No phoning home
analytics.reporting_enabled = false;
extraOptions = {
# Defend against DNS rebinding attacks.
SERVER_ENFORCE_DOMAIN = "true";
# Same time zone for all users by default
DATE_FORMATS_DEFAULT_TIMEZONE = "UTC";
};
date_formats.default_timezone = "UTC";
auth = {
anonymous.org_role = "Admin";
anonymous.org_name = "Main Org.";
} // grafanaAuth;
# The auth sections since NixOS 22.11 are named a bit funky with a dot in the name
#
# https://grafana.com/docs/grafana/latest/setup-grafana/configure-security/configure-authentication/grafana/#anonymous-authentication
# https://grafana.com/docs/grafana/latest/setup-grafana/configure-security/configure-authentication/google/
"auth.anonymous" = lib.mkIf (cfg.googleOAuthClientID == "") {
enabled = true;
org_role = "Admin";
org_name = "Main Org.";
};
"auth.google" = lib.mkIf (cfg.googleOAuthClientID != "") {
enabled = true;
# Grafana considers it "sign up" to let in a user it has
# never seen before.
allow_sign_up = true;
client_secret = "$__file{${toString cfg.googleOAuthClientSecretFile}}";
client_id = cfg.googleOAuthClientID;
};
# Give users that come through GSuite SSO the highest possible privileges:
users.autoAssignOrgRole = "Editor";
users.auto_assign_org_role = "Editor";
# Read the admin password from a file in our secrets folder:
security.adminPasswordFile = cfg.adminPasswordFile;
security.admin_password = "$__file{${toString cfg.adminPasswordFile}}";
};
provision = {
enable = true;
# See https://grafana.com/docs/grafana/latest/administration/provisioning/#datasources
datasources = [{
datasources.settings.datasources = [{
name = "Prometheus";
type = "prometheus";
uid = "LocalPrometheus";
access = "proxy";
url = cfg.prometheusUrl;
isDefault = true;
} {
name = "Loki";
type = "loki";
uid = "LocalLoki";
access = "proxy";
url = cfg.lokiUrl;
}];
# See https://grafana.com/docs/grafana/latest/administration/provisioning/#dashboards
dashboards = [{
dashboards.settings.providers = [{
name = "provisioned";
options.path = ./grafana-dashboards;
}];
# See https://grafana.com/docs/grafana/latest/administration/provisioning/#example-alert-notification-channels-config-file
notifiers = [ ] ++ (lib.optionals (cfg.enableSlackAlert) [{
# See https://grafana.com/docs/grafana/latest/alerting/set-up/provision-alerting-resources/file-provisioning/#provision-contact-points
alerting.contactPoints.settings.contactPoints =
[ ] ++ (lib.optionals (cfg.enableSlackAlert) [{
uid = "slack-notifier-1";
name = "Slack";
type = "slack";
is_default = true;
send_reminder = false;
settings = {
username = "${cfg.domain}";
username = "${domain}";
uploadImage = true;
};
secure_settings = {
url = "$SLACKURL";
# `$__file{}` reads the value from the named file.
# See https://grafana.com/docs/grafana/latest/administration/configuration/#file-provider
url = "$__file{${toString cfg.grafanaSlackUrlFile}}";
};
}]) ++ (lib.optionals (cfg.enableZulipAlert) [{
# See https://zulip.com/integrations/doc/grafana
uid = "zulip-notifier-1";
name = "Zulip";
type = "webhook";
is_default = true;
send_reminder = false;
settings = {
url = "$__file{${toString cfg.grafanaZulipUrlFile}}";
};
}]);
};
};
# nginx reverse proxy
security.acme.email = cfg.letsEncryptAdminEmail;
security.acme.defaults.email = cfg.letsEncryptAdminEmail;
security.acme.acceptTerms = true;
services.nginx = {
enable = true;
......@@ -177,13 +212,25 @@ in {
# Only allow PFS-enabled ciphers with AES256:
sslCiphers = "AES256+EECDH:AES256+EDH:!aNULL";
virtualHosts.${config.services.grafana.domain} = {
virtualHosts."${domain}" = {
serverAliases = builtins.tail cfg.domains;
enableACME = true;
forceSSL = true;
locations."/" = {
proxyPass = "http://127.0.0.1:${toString config.services.grafana.port}";
proxyPass = "http://127.0.0.1:${toString config.services.grafana.settings.server.http_port}";
proxyWebsockets = true;
};
locations."/metrics" = {
# Only allow our monitoringvpn subnet
# And localhost since we're the monitoring server currently
extraConfig = ''
allow ${config.grid.monitoringvpnIPv4}/24;
allow 127.0.0.1;
allow ::1;
deny all;
'';
proxyPass = "http://127.0.0.1:${toString config.services.grafana.settings.server.http_port}";
};
};
};
......
# Loki Server
#
# Scope: Log aggregator
# Scope: Log ingester and aggregator to be run on the monitoring node
#
# See also:
# - The configuration is adapted from
# https://grafana.com/docs/loki/latest/configuration/examples/#complete-local-configyaml
#
{
config.networking.firewall.allowedTCPPorts = [ 3100 ];
{ config, ...}:
let
logRetention = toString(config.services.private-storage.monitoring.policy.logRetentionSeconds) + "s";
in {
config.networking.firewall.interfaces.monitoringvpn.allowedTCPPorts = [ 3100 ];
config.services.loki = {
enable = true;
......@@ -12,31 +21,39 @@
{
auth_enabled = false;
server = {
http_listen_port = 3100;
};
ingester = {
lifecycler = {
address = "0.0.0.0";
common = {
ring = {
kvstore = {
store = "inmemory";
};
};
instance_addr = "127.0.0.1";
replication_factor = 1;
path_prefix = "/var/lib/loki";
storage = {
filesystem = {
chunks_directory = "/var/lib/loki/chunks";
rules_directory = "/var/lib/loki/rules";
};
};
};
server = {
http_listen_port = 3100;
grpc_listen_port = 9095; # unused, but no option to turn it off.
grpc_listen_address = "127.0.0.1"; # unused, but no option to turn it off.
};
ingester = {
lifecycler = {
final_sleep = "0s";
};
chunk_idle_period = "1h"; # Any chunk not receiving new logs in this time will be flushed
max_chunk_age = "1h"; # All chunks will be flushed when they hit this age, default is 1h
chunk_target_size = 1048576; # Loki will attempt to build chunks up to 1.5MB, flushing first if chunk_idle_period or max_chunk_age is reached first
chunk_retain_period = "30s"; # Must be greater than index read cache TTL if using an index cache (Default index read cache TTL is 5m)
max_transfer_retries = 0; # Chunk transfers disabled
chunk_target_size = 1536000; # As per https://grafana.com/docs/loki/v2.2.1/best-practices/
};
schema_config = {
configs = [{
from = "2020-10-24"; # TODO: Should this be "today"?
from = "2020-12-26";
store = "boltdb-shipper";
object_store = "filesystem";
schema = "v11";
......@@ -47,30 +64,19 @@
}];
};
storage_config = {
boltdb_shipper = {
active_index_directory = "/var/lib/loki/boltdb-shipper-active";
cache_location = "/var/lib/loki/boltdb-shipper-cache";
cache_ttl = "24h"; # Can be increased for faster performance over longer query periods, uses more disk space
shared_store = "filesystem";
};
filesystem = {
directory = "/var/lib/loki/chunks";
};
};
limits_config = {
reject_old_samples = true;
reject_old_samples_max_age = "168h";
};
chunk_store_config = {
max_look_back_period = "336h";
allow_structured_metadata = false;
};
table_manager = {
retention_deletes_enabled = true;
retention_period = "336h";
retention_period = logRetention;
};
compactor = {
retention_enabled = true;
delete_request_store = "filesystem";
working_directory = "/var/lib/loki/compactor";
};
};
};
......
......@@ -10,27 +10,33 @@ let
cfg = config.services.private-storage.monitoring.prometheus;
dropPortNumber = {
source_labels = [ "__address__" ];
regex = "^(.*):\\d+$";
regex = "^(.*)(?:\\.monitoringvpn):\\d+$";
target_label = "instance";
};
logRetention = toString(config.services.private-storage.monitoring.policy.logRetentionSeconds) + "s";
in {
options.services.private-storage.monitoring.prometheus = {
nodeExporterTargets = lib.mkOption {
type = with lib.types; listOf str;
example = lib.literalExample "[ node1 node2 ]";
example = [ "node1" "node2" ];
description = "List of nodes (hostnames or IPs) to scrape.";
};
nginxExporterTargets = lib.mkOption {
type = with lib.types; listOf str;
example = lib.literalExample "[ node1 node2 ]";
example = [ "node1" "node2" ];
description = "List of nodes (hostnames or IPs) to scrape.";
};
paymentExporterTargets = lib.mkOption {
type = with lib.types; listOf str;
example = lib.literalExample "[ node1 node2 ]";
example = [ "node1" "node2" ];
description = "List of nodes (hostnames or IPs) to scrape.";
};
blackboxExporterHttpsTargets = lib.mkOption {
type = with lib.types; listOf str;
example = [ "https://node1.com/" "https://node2.org/" ];
description = "List of https URLs to scrape.";
};
};
config = rec {
......@@ -39,6 +45,7 @@ in {
services.prometheus = {
enable = true;
# port = 9090; # Option only in recent (20.09?) nixpkgs, 9090 default
retentionTime = logRetention;
scrapeConfigs = [
{
job_name = "node-exporters";
......@@ -65,6 +72,32 @@ in {
}];
relabel_configs = [ dropPortNumber ];
}
{
# The Blackbox exporter is using Prometheus' "Multi-Target Exporter Pattern",
# see https://prometheus.io/docs/guides/multi-target-exporter/
job_name = "blackboxExporterHttps";
static_configs = [{
targets = cfg.blackboxExporterHttpsTargets;
}];
metrics_path = "/probe";
params.module = [ "https_2xx" ];
relabel_configs = [
{
source_labels = [ "__address__" ];
target_label = "__param_target";
}
{
source_labels = [ "__param_target" ];
target_label = "instance";
}
{
source_labels = [];
target_label = "__address__";
# The blackbox exporter’s real hostname:port
replacement = "monitoring:9115";
}
];
}
];
};
};
......
......@@ -8,7 +8,7 @@ in {
enable = lib.mkEnableOption "PrivateStorageio Monitoring VPN client service";
privateKeyFile = lib.mkOption {
type = lib.types.path;
example = lib.literalExample /run/keys/monitoringvpn/host.key;
example = /run/keys/monitoringvpn/host.key;
default = /run/keys/monitoringvpn/client.key;
description = ''
File with base64 private key generated by <command>wg genkey</command>.
......@@ -18,7 +18,7 @@ in {
};
presharedKeyFile = lib.mkOption {
type = lib.types.path;
example = lib.literalExample /run/keys/monitoringvpn/preshared.key;
example = /run/keys/monitoringvpn/preshared.key;
default = /run/keys/monitoringvpn/preshared.key;
description = ''
File with base64 preshared key generated by <command>wg genpsk</command>.
......@@ -26,7 +26,7 @@ in {
};
allowedIPs = lib.mkOption {
type = lib.types.listOf lib.types.str;
example = lib.literalExample [ "172.23.23.1/32" ];
example = [ "172.23.23.1/32" ];
default = [ "172.23.23.1/32" ];
description = ''
Limits which IPs this client receives data from.
......@@ -34,21 +34,21 @@ in {
};
ip = lib.mkOption {
type = lib.types.str;
example = lib.literalExample "172.23.23.11";
example = "172.23.23.11";
description = ''
The IP addresses of the interface.
'';
};
endpoint = lib.mkOption {
type = lib.types.str;
example = lib.literalExample "vpn.monitoring.private.storage:54321";
example = "vpn.monitoring.private.storage:54321";
description = ''
The address and port number of the server to establish the VPN with.
'';
};
endpointPublicKeyFile = lib.mkOption {
type = lib.types.path;
example = lib.literalExample ./monitoringvpn/server.pub;
example = ./monitoringvpn/server.pub;
description = ''
File with base64 public key generated by <command>cat private.key | wg pubkey > pubkey.pub</command>.
'';
......
......@@ -13,7 +13,7 @@ in {
enable = lib.mkEnableOption "PrivateStorageio Monitoring VPN server service";
privateKeyFile = lib.mkOption {
type = lib.types.path;
example = lib.literalExample /run/keys/monitoringvpn/server.key;
example = /run/keys/monitoringvpn/server.key;
default = /run/keys/monitoringvpn/server.key;
description = ''
File with base64 private key generated by <command>wg genkey</command>.
......@@ -21,7 +21,7 @@ in {
};
presharedKeyFile = lib.mkOption {
type = lib.types.path;
example = lib.literalExample /run/keys/monitoringvpn/preshared.key;
example = /run/keys/monitoringvpn/preshared.key;
default = /run/keys/monitoringvpn/preshared.key;
description = ''
File with base64 preshared key generated by <command>wg genpsk</command>.
......@@ -29,14 +29,14 @@ in {
};
ip = lib.mkOption {
type = lib.types.str;
example = lib.literalExample [ "172.23.23.23" ];
example = [ "172.23.23.23" ];
description = ''
The IP address of the interface.
'';
};
port = lib.mkOption {
type = lib.types.port;
example = lib.literalExample 54321;
example = 54321;
default = 51820;
description = ''
The UDP port to listen on.
......@@ -44,14 +44,14 @@ in {
};
vpnClientIPs = lib.mkOption {
type = lib.types.listOf lib.types.str;
example = lib.literalExample [ "172.23.23.23" "172.23.23.42" ];
example = [ "172.23.23.23" "172.23.23.42" ];
description = ''
The IP addresses to allow connections from.
'';
};
pubKeysPath = lib.mkOption {
type = lib.types.path;
example = lib.literalExample ./monitoringvpn;
example = ./monitoringvpn;
description = ''
The path to the directory that holds the public keys.
'';
......
# A NixOS module which exposes custom packages to other modules.
{ pkgs, ...}:
let
ourpkgs = pkgs.callPackage ../../nixos/pkgs {};
# Get our custom packages; either from the nixpkgs attribute added via an
# overlay in `morph/lib/default.nix`, or by importing them directly.
ourpkgs = pkgs.ourpkgs or (pkgs.callPackage ../pkgs {});
in {
config = {
# Expose `nixos/pkgs` as a new module argument `ourpkgs`.
......
......@@ -17,7 +17,10 @@ let
#
# NOTE: This is promised by the service privacy policy. It *may not* be
# raised without following the process for updating the privacy policy.
max-incident-age = "29d";
# Fallback to 29d if "monitoring" attribute is not available (currently
# in the system tests)
max-incident-age = toString(cfg.monitoring.policy.logRetentionSeconds or
(29 * (24 * 60 * 60))) + "s";
fqdn = "${
assert config.networking.hostName != null; config.networking.hostName
......@@ -37,7 +40,7 @@ in
services.private-storage.tahoe.package = lib.mkOption
{ default = ourpkgs.privatestorage;
type = lib.types.package;
example = lib.literalExample "pkgs.tahoelafs";
example = lib.literalExpression "pkgs.tahoelafs";
description = ''
The package to use for the Tahoe-LAFS daemon.
'';
......@@ -45,7 +48,7 @@ in
services.private-storage.publicAddress = lib.mkOption
{ default = "${fqdn}";
type = lib.types.str;
example = lib.literalExample "storage.example.invalid";
example = "storage.example.invalid";
description = ''
A publicly-visible address to use in Tahoe-LAFS advertisements for
this storage service.
......@@ -54,7 +57,7 @@ in
services.private-storage.introducerFURL = lib.mkOption
{ default = null;
type = lib.types.nullOr lib.types.str;
example = lib.literalExample "pb://<tubid>@<location hint>/<swissnum>";
example = "pb://<tubid>@<location hint>/<swissnum>";
description = ''
A Tahoe-LAFS introducer node fURL at which this storage node should announce itself.
'';
......@@ -62,22 +65,30 @@ in
services.private-storage.publicStoragePort = lib.mkOption
{ default = 8898;
type = lib.types.int;
example = lib.literalExample 8098;
example = 8098;
description = ''
The port number on which to service storage clients.
'';
};
services.private-storage.publicReadOnlyStoragePort = lib.mkOption
{ default = 8899;
type = lib.types.int;
example = 8099;
description = ''
The port number on which to service read-only storage clients.
'';
};
services.private-storage.issuerRootURL = lib.mkOption
{ default = "https://issuer.${config.networking.domain}/";
type = lib.types.str;
example = lib.literalExample "https://example.invalid/";
example = "https://example.invalid/";
description = ''
The URL of the Ristretto issuer service to announce.
'';
};
services.private-storage.ristrettoSigningKeyPath = lib.mkOption
{ type = lib.types.path;
example = lib.literalExample "/var/run/secrets/signing-key.private";
example = "/var/run/secrets/signing-key.private";
description = ''
The path to the Ristretto signing key for the service.
'';
......@@ -96,11 +107,39 @@ in
# Define configuration based on values given for our options - starting with
# the option that says whether this is even turned on.
config = lib.mkIf cfg.enable
{ services.tahoe.nodes."${storage-node-name}" =
{
# A read-only storage service. This allows read-only access for clients
# that use Great Black Swamp. There is no ZKAP/GBS integration yet so
# this is the most we can do at the moment.
services.tahoe.nodes."ro-${storage-node-name}" =
{ package = cfg.tahoe.package;
# Each attribute in this set corresponds to a section in the tahoe.cfg
# file. Attributes on those sets correspond to individual assignments
# in those sections.
sections =
{ client = if cfg.introducerFURL == null then {} else
{ "introducer.furl" = cfg.introducerFURL;
};
node =
{ nickname = "ro-${storage-node-name}";
"tub.port" = "tcp:${toString cfg.publicReadOnlyStoragePort}";
"tub.location" = "tcp:${cfg.publicAddress}:${toString cfg.publicReadOnlyStoragePort}";
};
storage =
{ enabled = true;
storage_dir = "/storage";
readonly = true;
force_foolscap = false;
};
};
};
# Tahoe nixos module brings along a single socket for the web api.
# That's for the other storage node though. Turn off the integration
# with this one.
systemd.services."tahoe.ro-storage".unitConfig.Requires = lib.mkForce [];
services.tahoe.nodes."${storage-node-name}" =
{ package = cfg.tahoe.package;
# Each attribute in this set corresponds to a section in the
# tahoe.cfg file. Attributes on those sets correspond to individual
# assignments in those sections.
#
# We just populate this according to policy/preference of Private
# Storage.
......@@ -111,21 +150,20 @@ in
node =
# XXX Should try to name that is unique across the grid.
{ nickname = "${storage-node-name}";
# We have the web port active because the CLI uses it. We may
# eventually turn this off, or at least have it off by default (with
# an option to turn it on). I don't know how much we'll use the CLI
# on the nodes. Maybe very little? Or maybe it will be part of a
# health check for the node... In any case, we tell it to bind to
# localhost so no one *else* can use it. And the principle of the
# web interface is that merely having access to it doesn't grant
# access to any data. It does grant access to storage capabilities
# but with our plugin configuration you still need ZKAPs to use
# those...
"web.port" = "tcp:3456:interface=127.0.0.1";
# We have the web port active because the CLI uses it and
# because it exposes a metrics endpoint for our monitoring
# system. The actual port configuration lives in systemd so
# that it can order binding the socket correctly with other
# dependencies (which we can't reliably do with Tahoe
# without a bunch of other work).
"web.port" = "systemd:domain=INET:index=0";
# We have to tell Tahoe-LAFS where to listen for Foolscap
# connections for the storage protocol. We have to tell it twice.
# First, in the syntax which it uses to listen.
"tub.port" = "tcp:${toString cfg.publicStoragePort}";
# Second, in the syntax it advertises to in the fURL.
"tub.location" = "tcp:${cfg.publicAddress}:${toString cfg.publicStoragePort}";
};
......@@ -134,9 +172,9 @@ in
# Put the storage where we have a lot of space configured.
storage_dir = "/storage";
# Turn on our plugin.
plugins = "privatestorageio-zkapauthz-v1";
plugins = "privatestorageio-zkapauthz-v2";
};
"storageserver.plugins.privatestorageio-zkapauthz-v1" =
"storageserver.plugins.privatestorageio-zkapauthz-v2" =
{ "ristretto-issuer-root-url" = cfg.issuerRootURL;
"ristretto-signing-key-path" = cfg.ristrettoSigningKeyPath;
} // (
......@@ -148,7 +186,7 @@ in
};
# Let traffic destined for the storage node's Foolscap server through.
networking.firewall.allowedTCPPorts = [ cfg.publicStoragePort ];
networking.firewall.allowedTCPPorts = [ cfg.publicStoragePort cfg.publicReadOnlyStoragePort ];
systemd.tmpfiles.rules =
# Add a rule to prevent incident reports from accumulating indefinitely.
......@@ -160,6 +198,5 @@ in
# Provide a useful tool for reporting about shares.
ourpkgs.leasereport
];
};
}
# Provide secure defaults for systemd services
#
# Good reads:
# https://gist.github.com/ageis/f5595e59b1cddb1513d1b425a323db04
# https://docs.arbitrary.ch/security/systemd.html
# https://www.freedesktop.org/software/systemd/man/systemd.exec.html
{
DynamicUser = true;
# This set of restrictions is mostly dervied from
# - running `systemd-analyze security zkap-spending-service.service`
# - Looking at the restrictions from the nixos nginx config.
AmbientCapabilities = "";
CapabilityBoundingSet = "";
LockPersonality = true;
MemoryDenyWriteExecute = true;
NoNewPrivileges = true;
PrivateDevices = true;
PrivateMounts = true;
PrivateNetwork = true;
PrivateTmp = true;
PrivateUsers = true;
ProcSubset = "pid";
ProtectClock = true;
ProtectControlGroups = true;
ProtectHome = true;
ProtectHostname = true;
ProtectKernelLogs = true;
ProtectKernelModules = true;
ProtectKernelTunables = true;
ProtectProc = "invisible";
ProtectSystem = "strict";
RemoveIPC = true;
RestrictAddressFamilies = "AF_UNIX";
RestrictNamespaces = true;
RestrictRealtime = true;
RestrictSUIDSGID = true;
SystemCallArchitectures = "native";
# Lines starting with "~" are deny-list the others are allow-list
# Since the first line is allow, that bounds the set of allowed syscalls
# and the further lines restrict it.
SystemCallFilter = [
# From systemd.exec(5), @system-service is "A reasonable set of
# system calls used by common system [...]"
"@system-service"
# This is from the nginx config, except that `@ipc` is not removed,
# since twisted uses a self-pipe.
"~@cpu-emulation @debug @keyring @mount @obsolete @privileged @setuid"
];
Umask = "0077";
}
# A NixOS module which can run a Ristretto-based issuer for PrivateStorage
# A NixOS module which can run a service tracking spending of ZKAPs.
# ZKAPs.
{ lib, pkgs, config, ourpkgs, ... }@args: let
cfg = config.services.private-storage-spending;
......@@ -10,7 +10,7 @@ in
package = lib.mkOption {
default = ourpkgs.zkap-spending-service;
type = lib.types.package;
example = lib.literalExample "ourpkgs.zkap-spending-service";
example = "ourpkgs.zkap-spending-service";
description = ''
The package to use for the spending service.
'';
......@@ -26,7 +26,7 @@ in
services.private-storage-spending.domain = lib.mkOption {
default = config.networking.fqdn;
type = lib.types.str;
example = lib.literalExample [ "spending.example.com" ];
example = [ "spending.example.com" ];
description = ''
The domain name at which the spending service is reachable.
'';
......@@ -40,70 +40,26 @@ in
wantedBy = [ "sockets.target" ];
listenStreams = [ cfg.unixSocket ];
};
# Add a systemd service to run zkap-spending-service.
systemd.services.zkap-spending-service = {
enable = true;
description = "ZKAP Spending Service";
wantedBy = [ "multi-user.target" ];
serviceConfig.NonBlocking = true;
serviceConfig = (import ./restricted-service.nix) // {
NonBlocking = true;
# It really shouldn't ever exit on its own! If it does, it's a bug
# we'll have to fix. Restart it and hope it doesn't happen too much
# before we can fix whatever the issue is.
serviceConfig.Restart = "always";
serviceConfig.Type = "simple";
# Use a unnamed user.
serviceConfig.DynamicUser = true;
Restart = "always";
Type = "simple";
serviceConfig = {
# Work around https://twistedmatrix.com/trac/ticket/10261
# Create a runtime directory so that the service has permission
# to change the mode on the socket.
RuntimeDirectory = "zkap-spending-service";
# This set of restrictions is mostly dervied from
# - running `systemd-analyze security zkap-spending-service.service
# - Looking at the restrictions from the nixos nginx config.
AmbientCapabilities = "";
CapabilityBoundingSet = "";
LockPersonality = true;
MemoryDenyWriteExecute = true;
NoNewPrivileges = true;
PrivateDevices = true;
PrivateMounts = true;
PrivateNetwork = true;
PrivateTmp = true;
PrivateUsers = true;
ProcSubset = "pid";
ProtectClock = true;
ProtectControlGroups = true;
ProtectHome = true;
ProtectHostname = true;
ProtectKernelLogs = true;
ProtectKernelModules = true;
ProtectKernelTunables = true;
ProtectProc = "invisible";
ProtectSystem = "strict";
RemoveIPC = true;
RestrictAddressFamilies = "AF_UNIX";
RestrictNamespaces = true;
RestrictRealtime = true;
RestrictSUIDSGID = true;
SystemCallArchitectures = "native";
# Lines starting with "~" are deny-list the others are allow-list
# Since the first line is allow, that bounds the set of allowed syscalls
# and the further lines restrict it.
SystemCallFilter = [
# From systemd.exec(5), @system-service is "A reasonable set of
# system calls used by common system [...]"
"@system-service"
# This is from the nginx config, except that `@ipc` is not removed,
# since twisted uses a self-pipe.
"~@cpu-emulation @debug @keyring @mount @obsolete @privileged @setuid"
];
Umask = "0077";
};
script = let
......@@ -127,6 +83,16 @@ in
# Want a regex instead? try locations."~ /v\d+/"
proxyPass = "http://unix:${cfg.unixSocket}";
};
locations."/metrics" = {
proxyPass = "http://unix:${cfg.unixSocket}";
# Only allow our monitoringvpn subnet
extraConfig = ''
allow 172.23.23.0/24;
allow 127.0.0.1;
allow ::1;
deny all;
'';
};
locations."/" = {
# Return a 404 error for any paths not specified above.
extraConfig = ''
......@@ -135,5 +101,11 @@ in
};
};
};
# Open 80 and 443 for nginx
networking.firewall.allowedTCPPorts = [
80
443
];
};
}
......@@ -6,8 +6,8 @@
}: {
options = {
services.private-storage.sshUsers = lib.mkOption {
type = lib.types.attrsOf lib.types.str;
example = lib.literalExample { root = "ssh-ed25519 AAA..."; };
type = lib.types.attrsOf (lib.types.listOf lib.types.str);
example = { root = "ssh-ed25519 AAA..."; };
description = ''
Users to configure on the issuer server and the storage servers and
the SSH public keys to use to authenticate them.
......@@ -25,12 +25,9 @@
services.openssh = {
enable = true;
# We don't use SFTP for anything. No reason to expose it.
allowSFTP = false;
# We only allow key-based authentication.
challengeResponseAuthentication = false;
passwordAuthentication = false;
settings.KbdInteractiveAuthentication = false;
settings.PasswordAuthentication = false;
extraConfig = ''
# Possibly this is superfluous considering we don't allow
......@@ -44,9 +41,9 @@
};
users.users =
let makeUserConfig = username: sshPublicKey: {
let makeUserConfig = username: sshPublicKeys: {
isNormalUser = username != "root";
openssh.authorizedKeys.keys = [ sshPublicKey ];
openssh.authorizedKeys.keys = sshPublicKeys;
};
in builtins.mapAttrs makeUserConfig cfg.sshUsers;
};
......
......@@ -48,7 +48,7 @@ in
default = pkgs.tahoelafs;
defaultText = "pkgs.tahoelafs";
type = types.package;
example = literalExample "pkgs.tahoelafs";
example = "pkgs.tahoelafs";
description = ''
The package to use for the Tahoe LAFS daemon.
'';
......@@ -78,7 +78,7 @@ in
default = pkgs.tahoelafs;
defaultText = "pkgs.tahoelafs";
type = types.package;
example = literalExample "pkgs.tahoelafs";
example = "pkgs.tahoelafs";
description = ''
The package to use for the Tahoe LAFS daemon.
'';
......@@ -156,6 +156,10 @@ in
nameValuePair "tahoe.introducer-${node}" {
description = "Tahoe node user for introducer ${node}";
isSystemUser = true;
group = "tahoe.introducer-${node}";
});
users.groups = flip mapAttrs' cfg.introducers (node: _:
nameValuePair "tahoe.introducer-${node}" {
});
})
(mkIf (cfg.nodes != {}) {
......@@ -178,6 +182,17 @@ in
# Open up the firewall.
# networking.firewall.allowedTCPPorts = flip mapAttrsToList cfg.nodes
# (node: settings: settings.tub.port);
# Make systemd open a port for us:
# Systemd uses the socket name to link to the corresponding Service Unit.
systemd.sockets."tahoe.storage" = {
description = "Tahoe Web Server Socket";
wantedBy = [ "sockets.target" ];
socketConfig = {
ListenStream = "127.0.0.1:3456";
};
};
systemd.services = flip mapAttrs' cfg.nodes (node: settings:
let
pidfile = "/run/tahoe.${lib.escapeShellArg node}.pid";
......@@ -187,10 +202,18 @@ in
eliotLog = "file:${nodedir}/logs/eliot.json,rotate_length=${toString (1024 * 1024 * 32)},max_rotated_files=32";
in nameValuePair "tahoe.${node}" {
description = "Tahoe LAFS node ${node}";
# We are partially socket activated but only for the web API port.
# For the actual storage service port, we bind ourselves. So make
# sure we actually do start up early in case storage requests come
wantedBy = [ "multi-user.target" ];
path = [ settings.package ];
restartTriggers = [
config.environment.etc."tahoe-lafs/${node}.cfg".source ];
# We don't know how to re-read our configuration file at runtime
# so restart if it ever changes.
restartTriggers = [ config.environment.etc."tahoe-lafs/${node}.cfg".source ];
serviceConfig = {
Type = "simple";
PIDFile = pidfile;
......@@ -198,8 +221,12 @@ in
# arguments to $(tahoe run). The node directory must come first,
# and arguments which alter Twisted's behavior come afterwards.
ExecStart = ''
${settings.package}/bin/tahoe --eliot-destination ${eliotLog} run ${nodedir} -n -l- --pidfile=${pidfile}
${settings.package}/bin/tahoe --eliot-destination ${eliotLog} run --allow-stdin-close ${nodedir} -n -l- --pidfile=${pidfile}
'';
# Twisted wants non-blocking sockets:
NonBlocking = true;
# The rlimit on number of open files controls how many
# connections a particular storage server can accept (factoring
# in the number of non-connection files the server needs open -
......@@ -236,6 +263,14 @@ in
# now. So it makes sense to have the limit be 2^15 right now.
LimitNOFILE = 32768;
};
unitConfig = {
# Our config doesn't know how to bind all of its sockets on its
# own so don't start without the systemd units that *do* know
# how to bind them.
Requires = [ "tahoe.${node}.socket" ];
};
preStart =
let
created = "${nodedir}.created";
......@@ -287,6 +322,10 @@ in
nameValuePair "tahoe.${node}" {
description = "Tahoe node user for node ${node}";
isSystemUser = true;
group = "tahoe.${node}";
});
users.groups = flip mapAttrs' cfg.nodes (node: _:
nameValuePair "tahoe.${node}" {
});
})
];
......
......@@ -32,7 +32,7 @@ CHECKOUT="${HOME}/PrivateStorageio"
# This is the address of the git remote where we can get the latest
# PrivateStorageio.
REPO="https://whetstone.privatestorage.io/privatestorage/PrivateStorageio.git"
REPO="https://whetstone.private.storage/privatestorage/PrivateStorageio.git"
if [ -e "${CHECKOUT}" ]; then
# It exists already so just make sure it contains the latest changes from
......@@ -72,14 +72,18 @@ EOF
ssh -o StrictHostKeyChecking=no "$(hostname).$(domainname)" ":"
# Set nixpkgs to our preferred version for the morph build. Annoyingly, we
# can't just use nixpkgs-2105.nix as our nixpkgs because some code (in morph,
# can't just use nixpkgs.nix as our nixpkgs because some code (in morph,
# at least) wants <nixpkgs> to be a fully-resolved path to a nixpkgs tree.
# For example, morph evaluated `import <nixpkgs/lib>` which would turn into
# something like `import nixpkgs-2105.nix/lib` which is nonsense.
# something like `import nixpkgs.nix/lib` which is nonsense.
#
# So instead, import our nixpkgs which forces it to be instantiated in the
# store, then ask for its path, then set NIX_PATH to that.
export NIX_PATH="nixpkgs=$(nix eval "(import ${CHECKOUT}/nixpkgs-2105.nix { }).path")"
# Two lines since 'export' masks 'set -e'
# See https://mywiki.wooledge.org/BashFAQ/105#line-204
NIX_PATH="nixpkgs=$(nix --extra-experimental-features nix-command eval --impure --expr "(import ${CHECKOUT}/nixpkgs.nix { }).path")"
export NIX_PATH
# Attempt to update just this host. Choose the morph grid definition matching
# the grid we belong to and limit the morph deployment update to the host
......
......@@ -4,22 +4,14 @@
#
# pkgs.callPackage ./nixos/pkgs
{buildPlatform, hostPlatform, callPackage}:
let
# Our own nixpkgs fork:
ourpkgs = import ../../nixpkgs-ps.nix {
# Ensure that the fork is configured for the same system
# as we were called with.
localSystem = buildPlatform;
crossSystem = hostPlatform;
# Ensure that configuration of the system where this runs
# doesn't leak into what we build.
# See https://github.com/NixOS/nixpkgs/issues/62513
config = {};
overlays = [];
};
in
{
zkapissuer = callPackage ./zkapissuer {};
lib = callPackage ../lib {};
leasereport = callPackage ./leasereport {};
# `privatestorage` is a derivation with a good Tahoe+ZKAP environment
# that is exposed by ZKAPAuthorizer.
privatestorage = callPackage ./privatestorage {};
zkap-spending-service = callPackage ./zkap-spending-service {};
inherit (ourpkgs) privatestorage leasereport;
zkapissuer = callPackage ./zkapissuer {};
megacli2prom = callPackage ./megacli2prom {};
}
{ callPackage, fetchFromGitLab, lib }:
let
repo-data = lib.importJSON ./repo.json;
repo = fetchFromGitLab (builtins.removeAttrs repo-data [ "branch" ]);
LeaseReport = (import "${repo}/nix").LeaseReport;
in
LeaseReport.components.exes.LeaseReport
{
"owner": "privatestorage",
"repo": "LeaseReport",
"branch": "main",
"domain": "whetstone.private.storage",
"rev": "25174533c782f5e5f17aa1fa4d29e2adbdf96a08",
"outputHashAlgo": "sha512",
"outputHash": "0h3yzmcizxkz2dl54b8xzbkdb1bvnqiyp8xrhjgzi59y3iq3ggss9i5cy2mbmy467ri8llnax2p2paykv29lw6c8d8zihw1qq5gv46v"
}
\ No newline at end of file
{ pkgs ? import <nixpkgs> {} }:
let
repo-data = pkgs.lib.importJSON ./repo.json;
repo = pkgs.fetchFromGitHub (builtins.removeAttrs repo-data [ "branch" ]);
in
pkgs.stdenv.mkDerivation {
name = "megacli2prom";
buildInputs = [ pkgs.python3 pkgs.megacli ];
src = repo;
installPhase = ''
mkdir -p $out/bin
cp ./megacli2prom.py $out/bin/megacli2prom
chmod +x $out/bin/megacli2prom
'';
}
{
"owner": "PrivateStorageio",
"repo": "megacli2prom",
"branch": "main",
"rev": "2872bf3526c6074e21ddf9bd684355c928bf1626",
"outputHashAlgo": "sha512",
"outputHash": "1yvyz6lngsx7dv5nb89gb8akzj3hmiihjcg8ya3xcbdvv39qydd8l416k3b3w08pz87gwvxd44bhni5kljqdj60ixyhf1mcvmm0rffw"
}
\ No newline at end of file
{ fetchFromGitHub, callPackage, lib }:
let
repo-data = lib.importJSON ./repo.json;
repo = fetchFromGitHub (builtins.removeAttrs repo-data [ "branch" ]);
zk = import repo;
# XXX package version choice here
zkapauthorizer = zk.outputs.packages.x86_64-linux.zkapauthorizer-python39-tahoe_dev;
python = zkapauthorizer.passthru.python;
in
python.withPackages (ps: [ zkapauthorizer ] )