{ "annotations": { "list": [ { "builtIn": 1, "datasource": "-- Grafana --", "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "target": { "limit": 100, "matchAny": false, "tags": [], "type": "dashboard" }, "type": "dashboard" } ] }, "description": "USE: Usage, Saturation and Error rate for our resources", "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, "links": [], "liveNow": false, "panels": [ { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, "id": 22, "panels": [], "title": "CPU & Memory", "type": "row" }, { "description": "Some of our software runs in a single thread, so this shows max CPU per core (instead of averaged over all cores)", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": true, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "max": 1, "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 7, "w": 6, "x": 0, "y": 1 }, "id": 28, "options": { "legend": { "calcs": [], "displayMode": "hidden", "placement": "bottom" }, "tooltip": { "mode": "multi" } }, "pluginVersion": "8.3.5", "targets": [ { "expr": "1 - (max by (instance) (irate(node_cpu_seconds_total{mode=\"idle\"}[5m])))", "interval": "", "intervalFactor": 1, "legendFormat": "{{instance}}", "refId": "A" } ], "title": "Max CPU % per core per node", "type": "timeseries" }, { "alert": { "alertRuleTags": {}, "conditions": [ { "evaluator": { "params": [ 1 ], "type": "gt" }, "operator": { "type": "and" }, "query": { "params": [ "A", "5m", "now" ] }, "reducer": { "params": [], "type": "avg" }, "type": "query" } ], "executionErrorState": "alerting", "for": "5m", "frequency": "1m", "handler": 1, "name": "15 min load average alert", "noDataState": "no_data", "notifications": [] }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": true, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "line+area" } }, "displayName": "${__field.labels.instance}", "links": [], "mappings": [], "max": 1, "thresholds": { "mode": "absolute", "steps": [ { "color": "transparent", "value": null }, { "color": "red", "value": 1 } ] }, "unit": "short" }, "overrides": [ { "matcher": { "id": "byName", "options": "node_load15{instance=\"grafana:9100\", job=\"node-exporters\"}" }, "properties": [ { "id": "links" } ] } ] }, "gridPos": { "h": 7, "w": 6, "x": 6, "y": 1 }, "id": 6, "options": { "legend": { "calcs": [], "displayMode": "hidden", "placement": "bottom" }, "tooltip": { "mode": "multi" } }, "pluginVersion": "8.3.5", "targets": [ { "expr": "node_load15", "interval": "", "intervalFactor": 1, "legendFormat": "{{instance}}", "refId": "A" } ], "title": "15 min load average", "type": "timeseries" }, { "alert": { "alertRuleTags": {}, "conditions": [ { "evaluator": { "params": [ 0.8 ], "type": "gt" }, "operator": { "type": "and" }, "query": { "params": [ "A", "15m", "now" ] }, "reducer": { "params": [], "type": "avg" }, "type": "query" } ], "executionErrorState": "alerting", "for": "5m", "frequency": "1m", "handler": 1, "name": "RAM filling up", "noDataState": "no_data", "notifications": [] }, "description": "How much RAM is in use? Relative to available system memory.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": true, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "line+area" } }, "links": [], "mappings": [], "max": 1, "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "transparent", "value": null }, { "color": "red", "value": 0.8 } ] }, "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 7, "w": 6, "x": 12, "y": 1 }, "id": 2, "options": { "legend": { "calcs": [], "displayMode": "hidden", "placement": "bottom" }, "tooltip": { "mode": "multi" } }, "pluginVersion": "8.3.5", "targets": [ { "expr": "1 - (node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes) / node_memory_MemTotal_bytes\r\n", "interval": "", "intervalFactor": 4, "legendFormat": "{{instance}}", "refId": "A" } ], "title": "RAM used %", "type": "timeseries" }, { "alert": { "alertRuleTags": {}, "conditions": [ { "evaluator": { "params": [ 0.1 ], "type": "gt" }, "operator": { "type": "and" }, "query": { "params": [ "A", "5m", "now" ] }, "reducer": { "params": [], "type": "avg" }, "type": "query" } ], "executionErrorState": "alerting", "for": "5m", "frequency": "1m", "handler": 1, "name": "Swap usage alert", "noDataState": "no_data", "notifications": [] }, "description": "How much Swap is in use? Relative to available swap.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": true, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "line+area" } }, "mappings": [], "max": 1, "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "transparent", "value": null }, { "color": "red", "value": 0.1 } ] }, "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 7, "w": 6, "x": 18, "y": 1 }, "id": 30, "options": { "legend": { "calcs": [], "displayMode": "hidden", "placement": "bottom" }, "tooltip": { "mode": "multi" } }, "pluginVersion": "8.3.5", "targets": [ { "exemplar": true, "expr": "1 - node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes", "interval": "", "intervalFactor": 1, "legendFormat": "{{instance}}", "refId": "A" } ], "title": "Swap used %", "type": "timeseries" }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 8 }, "id": 20, "panels": [], "title": "Network", "type": "row" }, { "description": "Shows most saturated network link for every node. Bit/s.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": true, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "bps" }, "overrides": [] }, "gridPos": { "h": 7, "w": 8, "x": 0, "y": 9 }, "id": 12, "options": { "legend": { "calcs": [], "displayMode": "hidden", "placement": "bottom" }, "tooltip": { "mode": "single" } }, "pluginVersion": "8.3.5", "targets": [ { "expr": "max by (instance) (rate(node_network_transmit_bytes_total{device!~\"lo|monitoringvpn\"}[5m]) * 8)", "interval": "", "intervalFactor": 4, "legendFormat": "{{instance}} out", "refId": "A" }, { "expr": "- max by (instance) (rate(node_network_receive_bytes_total{device!~\"lo|monitoringvpn\"}[5m]) * 8)", "interval": "", "intervalFactor": 4, "legendFormat": "{{instance}} in", "refId": "B" } ], "title": "Throughput", "transformations": [], "type": "timeseries" }, { "description": "Packet and error count. Positive values mean transmit, negative receive.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "pps" }, "overrides": [] }, "gridPos": { "h": 7, "w": 8, "x": 8, "y": 9 }, "id": 26, "options": { "legend": { "calcs": [], "displayMode": "hidden", "placement": "bottom" }, "tooltip": { "mode": "single" } }, "pluginVersion": "8.3.5", "targets": [ { "expr": "- rate(node_network_receive_packets_total{device!~\"lo|monitoringvpn\"}[5m])", "interval": "", "intervalFactor": 4, "legendFormat": "{{instance}} in", "refId": "A" }, { "expr": "- rate(node_network_receive_errs_total{device!~\"lo|monitoringvpn\"}[5m])", "interval": "", "intervalFactor": 4, "legendFormat": "{{instance}} in err", "refId": "B" }, { "expr": "rate(node_network_transmit_packets_total{device!~\"lo|monitoringvpn\"}[5m])", "interval": "", "intervalFactor": 4, "legendFormat": "{{instance}} out", "refId": "C" }, { "expr": "rate(node_network_transmit_errs_total{device!~\"lo|monitoringvpn\"}[5m])", "interval": "", "intervalFactor": 4, "legendFormat": "{{instance}} out err", "refId": "D" } ], "title": "Network pkt/s", "type": "timeseries" }, { "alert": { "alertRuleTags": {}, "conditions": [ { "evaluator": { "params": [ -1, 1 ], "type": "outside_range" }, "operator": { "type": "and" }, "query": { "params": [ "A", "5m", "now" ] }, "reducer": { "params": [], "type": "avg" }, "type": "query" }, { "evaluator": { "params": [ -1, 1 ], "type": "outside_range" }, "operator": { "type": "or" }, "query": { "params": [ "B", "5m", "now" ] }, "reducer": { "params": [], "type": "avg" }, "type": "query" }, { "evaluator": { "params": [ -1, 1 ], "type": "outside_range" }, "operator": { "type": "or" }, "query": { "params": [ "C", "5m", "now" ] }, "reducer": { "params": [], "type": "avg" }, "type": "query" }, { "evaluator": { "params": [ -1, 1 ], "type": "outside_range" }, "operator": { "type": "or" }, "query": { "params": [ "D", "5m", "now" ] }, "reducer": { "params": [], "type": "avg" }, "type": "query" } ], "executionErrorState": "alerting", "for": "5m", "frequency": "1m", "handler": 1, "message": "", "name": "Network errors alert", "noDataState": "no_data", "notifications": [] }, "description": "Network errors, drops etc. Should all be 0.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": true, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "transparent", "value": null }, { "color": "red", "value": 10 } ] }, "unit": "short" }, "overrides": [] }, "gridPos": { "h": 7, "w": 8, "x": 16, "y": 9 }, "id": 10, "options": { "legend": { "calcs": [], "displayMode": "hidden", "placement": "bottom" }, "tooltip": { "mode": "single" } }, "pluginVersion": "8.3.5", "targets": [ { "datasource": { "type": "prometheus", "uid": "000000001" }, "exemplar": true, "expr": "rate(node_network_transmit_errs_total{device!=\"lo\"}[5m])\n", "interval": "", "legendFormat": "{{instance}} {{device}}", "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "000000001" }, "exemplar": true, "expr": "rate(node_network_transmit_drop_total{device!=\"lo\"}[5m])", "interval": "", "legendFormat": "{{instance}} {{device}}", "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "000000001" }, "exemplar": true, "expr": "- rate(node_network_receive_drop_total{device!=\"lo\"}[5m])", "interval": "", "legendFormat": "{{instance}} {{device}}", "refId": "C" }, { "datasource": { "type": "prometheus", "uid": "000000001" }, "exemplar": true, "expr": "- rate(node_network_receive_errs_total{device!=\"lo\"}[5m])", "interval": "", "legendFormat": "{{instance}} {{device}}", "refId": "D" } ], "thresholds": [ { "colorMode": "critical", "op": "lt", "value": -1, "visible": true }, { "colorMode": "critical", "op": "gt", "value": 1, "visible": true } ], "title": "Network errors", "type": "timeseries" }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 16 }, "id": 18, "panels": [], "title": "Storage", "type": "row" }, { "alert": { "alertRuleTags": {}, "conditions": [ { "evaluator": { "params": [ 0.8 ], "type": "gt" }, "operator": { "type": "and" }, "query": { "params": [ "A", "5m", "now" ] }, "reducer": { "params": [], "type": "avg" }, "type": "query" } ], "executionErrorState": "alerting", "for": "5m", "frequency": "1m", "handler": 1, "name": "Filesystem usage % alert", "noDataState": "no_data", "notifications": [] }, "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "description": "Watch filesystems filling up. Shows only mounts over 10 % of available bytes used.", "fieldConfig": { "defaults": { "links": [], "unit": "percentunit" }, "overrides": [] }, "fill": 0, "fillGradient": 0, "gridPos": { "h": 7, "w": 6, "x": 0, "y": 17 }, "hiddenSeries": false, "id": 4, "legend": { "avg": false, "current": false, "hideEmpty": false, "hideZero": false, "max": false, "min": false, "show": false, "total": false, "values": false }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "8.3.6", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "1 - (node_filesystem_avail_bytes / node_filesystem_size_bytes) > 0.1", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 2, "legendFormat": "{{instance}} {{mountpoint}} ", "refId": "A" } ], "thresholds": [ { "colorMode": "critical", "fill": true, "line": true, "op": "gt", "value": 0.8, "visible": true } ], "timeRegions": [], "title": "Storage usage %", "tooltip": { "shared": false, "sort": 2, "value_type": "individual" }, "transformations": [], "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:131", "format": "percentunit", "logBase": 1, "max": "1", "min": "0", "show": true }, { "$$hashKey": "object:132", "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } }, { "description": "Input Output Operations per second. Positive values mean read, negative write.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "short" }, "overrides": [] }, "gridPos": { "h": 7, "w": 6, "x": 6, "y": 17 }, "id": 14, "options": { "legend": { "calcs": [], "displayMode": "hidden", "placement": "bottom" }, "tooltip": { "mode": "single" } }, "pluginVersion": "8.3.5", "targets": [ { "expr": "rate(node_disk_reads_completed_total[5m]) > 0", "interval": "", "intervalFactor": 4, "legendFormat": "{{instance}} R {{device}}", "refId": "A" }, { "expr": "- (rate(node_disk_writes_completed_total[5m]) > 0)", "interval": "", "intervalFactor": 4, "legendFormat": "{{instance}} W {{device}}", "refId": "B" } ], "title": "IOPS", "type": "timeseries" }, { "description": "Max average storage latency per node. Positive values mean read, negative write.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "links": [], "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "s" }, "overrides": [] }, "gridPos": { "h": 7, "w": 6, "x": 12, "y": 17 }, "id": 16, "options": { "legend": { "calcs": [], "displayMode": "hidden", "placement": "bottom" }, "tooltip": { "mode": "single" } }, "pluginVersion": "8.3.5", "targets": [ { "expr": "max by (instance, device) (rate(node_disk_read_time_seconds_total[5m]) / rate(node_disk_reads_completed_total[5m]))", "interval": "", "intervalFactor": 4, "legendFormat": "{{instance}} R {{device}}", "refId": "A" }, { "expr": "- max by (instance, device) (rate(node_disk_write_time_seconds_total[5m]) / rate(node_disk_writes_completed_total[5m]))", "interval": "", "intervalFactor": 4, "legendFormat": "{{instance}} W {{device}}", "refId": "B" } ], "title": "Storage latency", "type": "timeseries" }, { "alert": { "alertRuleTags": {}, "conditions": [ { "evaluator": { "params": [ 0 ], "type": "gt" }, "operator": { "type": "and" }, "query": { "params": [ "A", "5m", "now" ] }, "reducer": { "params": [], "type": "count" }, "type": "query" } ], "executionErrorState": "alerting", "for": "5m", "frequency": "5m", "handler": 1, "name": "Degraded RAID alert", "noDataState": "ok", "notifications": [] }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "bars", "fillOpacity": 100, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": true, "stacking": { "group": "A", "mode": "normal" }, "thresholdsStyle": { "mode": "line" } }, "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "transparent", "value": null }, { "color": "red", "value": 0 } ] }, "unit": "short" }, "overrides": [] }, "gridPos": { "h": 7, "w": 6, "x": 18, "y": 17 }, "id": 32, "options": { "legend": { "calcs": [], "displayMode": "hidden", "placement": "bottom" }, "tooltip": { "mode": "single" } }, "pluginVersion": "8.3.5", "targets": [ { "exemplar": true, "expr": "megacli_drives{state=\"Degraded\"}", "interval": "", "legendFormat": "{{instance}}", "refId": "A" } ], "title": "Degraded RAID arrays", "type": "timeseries" } ], "refresh": "5s", "schemaVersion": 34, "style": "dark", "tags": [], "templating": { "list": [] }, "time": { "from": "now-2d", "to": "now" }, "timepicker": {}, "timezone": "", "title": "Resources overview", "uid": "ResourcesOverview", "version": 1, "weekStart": "" }