Skip to content
Snippets Groups Projects
resources-overview.json 44.1 KiB
Newer Older
{
  "annotations": {
    "list": [
      {
        "builtIn": 1,
        "datasource": "-- Grafana --",
        "enable": true,
        "hide": true,
        "iconColor": "rgba(0, 211, 255, 1)",
        "name": "Annotations & Alerts",
        "target": {
          "limit": 100,
          "matchAny": false,
          "tags": [],
          "type": "dashboard"
        },
        "type": "dashboard"
      }
    ]
  },
  "description": "USE: Usage, Saturation and Error rate for our resources",
  "editable": true,
  "fiscalYearStartMonth": 0,
  "graphTooltip": 1,
  "panels": [
    {
      "collapsed": false,
      "gridPos": {
        "h": 1,
        "w": 24,
        "x": 0,
        "y": 0
      },
      "id": 22,
      "panels": [],
      "title": "CPU & Memory",
      "type": "row"
    },
    {
      "description": "Some of our software runs in a single thread, so this shows max CPU per core (instead of averaged over all cores)",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 0,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": true,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "links": [],
          "mappings": [],
          "max": 1,
          "min": 0,
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "percentunit"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 7,
        "x": 0,
        "y": 1
      },
      "id": 28,
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "hidden",
          "placement": "bottom"
        },
        "tooltip": {
          "mode": "multi",
          "sort": "none"
      "targets": [
        {
          "expr": "1 - (max by (instance) (irate(node_cpu_seconds_total{mode=\"idle\"}[5m])))",
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "{{instance}}",
          "refId": "A"
        }
      ],
      "title": "Max CPU % per core per node",
    },
    {
      "alert": {
        "alertRuleTags": {},
        "conditions": [
          {
            "evaluator": {
              "params": [
                1
              ],
              "type": "gt"
            },
            "operator": {
              "type": "and"
            },
            "query": {
              "params": [
                "A",
                "5m",
                "now"
              ]
            },
            "reducer": {
              "params": [],
              "type": "avg"
            },
            "type": "query"
          }
        ],
        "executionErrorState": "alerting",
        "for": "5m",
        "frequency": "1m",
        "handler": 1,
        "name": "15 min load average alert",
        "noDataState": "no_data",
        "notifications": []
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 0,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": true,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "area"
          "displayName": "${__field.labels.instance}",
          "links": [],
          "mappings": [],
          "max": 1,
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "transparent",
                "value": null
              },
              {
                "color": "red",
                "value": 1
              }
            ]
          },
          "unit": "short"
        },
        "overrides": [
          {
            "matcher": {
              "id": "byName",
              "options": "node_load15{instance=\"grafana:9100\", job=\"node-exporters\"}"
            },
            "properties": [
              {
                "id": "links"
              }
            ]
          }
        ]
      },
      "gridPos": {
        "h": 7,
        "legend": {
          "calcs": [],
          "displayMode": "hidden",
          "placement": "bottom"
        },
        "tooltip": {
          "mode": "multi",
          "sort": "none"
      "targets": [
        {
          "expr": "node_load15",
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "{{instance}}",
          "refId": "A"
        }
      ],
      "title": "15 min load average",
    },
    {
      "alert": {
        "alertRuleTags": {},
        "conditions": [
          {
            "evaluator": {
              "params": [
                0.8
              ],
              "type": "gt"
            },
            "operator": {
              "type": "and"
            },
            "query": {
              "params": [
                "A",
                "15m",
                "now"
              ]
            },
            "reducer": {
              "params": [],
              "type": "avg"
            },
            "type": "query"
          }
        ],
        "executionErrorState": "alerting",
        "for": "5m",
        "frequency": "1m",
        "handler": 1,
        "name": "RAM filling up",
        "noDataState": "no_data",
        "notifications": []
      },
      "description": "How much RAM is in use? Relative to available system memory.",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 0,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": true,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "area"
            }
          },
          "links": [],
          "mappings": [],
          "max": 1,
          "min": 0,
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "transparent",
                "value": null
              },
              {
                "color": "red",
                "value": 0.8
              }
            ]
          },
          "unit": "percentunit"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 7,
        "legend": {
          "calcs": [],
          "displayMode": "hidden",
          "placement": "bottom"
        },
        "tooltip": {
          "mode": "multi",
          "sort": "none"
          "datasource": {
            "type": "prometheus",
            "uid": "000000001"
          },
          "exemplar": true,
          "expr": "1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes unless node_zfs_arc_size",
          "hide": false,
          "instant": false,
          "interval": "",
          "intervalFactor": 4,
          "legendFormat": "{{instance}}",
          "refId": "Hosts without ZFS"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "000000001"
          },
          "exemplar": true,
          "expr": "1 - (node_memory_MemAvailable_bytes + node_zfs_arc_size) / node_memory_MemTotal_bytes",
          "hide": false,
          "instant": false,
          "interval": "",
          "legendFormat": "{{instance}}",
          "refId": "Hosts with ZFS"
    {
      "alert": {
        "alertRuleTags": {},
        "conditions": [
          {
            "evaluator": {
              "params": [
                0.1
              ],
              "type": "gt"
            },
            "operator": {
              "type": "and"
            },
            "query": {
              "params": [
                "A",
                "5m",
                "now"
              ]
            },
            "reducer": {
              "params": [],
              "type": "avg"
            },
            "type": "query"
          }
        ],
        "executionErrorState": "alerting",
        "for": "5m",
        "frequency": "1m",
        "handler": 1,
        "name": "Swap usage alert",
        "noDataState": "no_data",
        "notifications": []
      },
      "description": "How much Swap is in use? Relative to available swap.",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": true,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "area"
            }
          },
          "mappings": [],
          "max": 1,
          "min": 0,
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "transparent",
                "value": null
              },
              {
                "color": "red",
                "value": 0.1
              }
            ]
          },
          "unit": "percentunit"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 7,
        "w": 6,
        "x": 18,
        "y": 1
      },
      "id": 30,
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "hidden",
          "placement": "bottom"
        },
        "tooltip": {
          "mode": "multi",
          "sort": "none"
      "targets": [
        {
          "exemplar": true,
          "expr": "1 - node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes",
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "{{instance}}",
          "refId": "A"
        }
      ],
      "thresholds": [
        {
          "colorMode": "critical",
          "op": "gt",
          "value": 0.1,
          "visible": true
        }
      ],
      "title": "Swap used %",
    {
      "collapsed": false,
      "gridPos": {
        "h": 1,
        "w": 24,
        "x": 0,
        "y": 8
      },
      "id": 20,
      "panels": [],
      "title": "Network",
      "type": "row"
    },
    {
      "description": "Shows most saturated network link for every node. Bit/s.",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": true,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "links": [],
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "bps"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 7,
        "w": 8,
        "x": 0,
        "y": 9
      },
      "id": 12,
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "hidden",
          "placement": "bottom"
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
          "expr": "max by (instance) (rate(node_network_transmit_bytes_total{device!~\"lo|monitoringvpn\"}[5m]) * 8)",
          "interval": "",
          "intervalFactor": 4,
          "legendFormat": "{{instance}} out",
          "refId": "A"
        },
        {
          "expr": "- max by (instance) (rate(node_network_receive_bytes_total{device!~\"lo|monitoringvpn\"}[5m]) * 8)",
          "interval": "",
          "intervalFactor": 4,
          "legendFormat": "{{instance}} in",
          "refId": "B"
        }
      ],
      "title": "Throughput",
      "transformations": [],
      "description": "Packet and error count. Positive values mean transmit, negative receive.",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 0,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "links": [],
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unit": "pps"
        },
        "overrides": []
      },
      "gridPos": {
        "h": 7,
        "w": 8,
        "x": 8,
        "y": 9
      },
      "id": 26,
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "hidden",
          "placement": "bottom"
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
          "expr": "- rate(node_network_receive_packets_total{device!~\"lo|monitoringvpn\"}[5m])",
          "interval": "",
          "intervalFactor": 4,
          "legendFormat": "{{instance}} in",
          "expr": "- rate(node_network_receive_errs_total{device!~\"lo|monitoringvpn\"}[5m])",
          "interval": "",
          "intervalFactor": 4,
          "legendFormat": "{{instance}} in err",
          "expr": "rate(node_network_transmit_packets_total{device!~\"lo|monitoringvpn\"}[5m])",
          "interval": "",
          "intervalFactor": 4,
          "legendFormat": "{{instance}} out",
          "expr": "rate(node_network_transmit_errs_total{device!~\"lo|monitoringvpn\"}[5m])",
          "interval": "",
          "intervalFactor": 4,
          "legendFormat": "{{instance}} out err",
          "refId": "D"
        }
      ],
      "title": "Network pkt/s",
    },
    {
      "alert": {
        "alertRuleTags": {},
        "conditions": [
          {
            "evaluator": {
              "params": [
              "type": "outside_range"
            },
            "operator": {
              "type": "and"
            },
            "query": {
              "params": [
                "A",
                "5m",
                "now"
              ]
            },
            "reducer": {
              "params": [],
              "type": "avg"
            },
            "type": "query"
          },
          {
            "evaluator": {
              "params": [
              "type": "outside_range"
            },
            "operator": {
              "type": "or"
            },
            "query": {
              "params": [
                "B",
                "5m",
                "now"
              ]
            },
            "reducer": {
              "params": [],
              "type": "avg"
            },
            "type": "query"
          },
          {
            "evaluator": {
              "params": [
              "type": "outside_range"
            },
            "operator": {
              "type": "or"
            },
            "query": {
              "params": [
                "C",
                "5m",
                "now"
              ]
            },
            "reducer": {
              "params": [],
              "type": "avg"
            },
            "type": "query"
          },
          {
            "evaluator": {
              "params": [
              "type": "outside_range"
            },
            "operator": {
              "type": "or"
            },
            "query": {
              "params": [
                "D",
                "5m",
                "now"
              ]
            },
            "reducer": {
              "params": [],
              "type": "avg"
            },
            "type": "query"
          }
        ],
        "executionErrorState": "alerting",
        "for": "5m",
        "frequency": "1m",
        "handler": 1,
        "message": "",
        "name": "Network errors alert",
        "noDataState": "no_data",
        "notifications": []
      },
      "description": "Network errors, drops etc. Should all be 0.",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 10,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": true,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "links": [],
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "transparent",
                "value": null
              },
              {
                "color": "red",
                "value": 10
              }
            ]
          },
          "unit": "short"
        "legend": {
          "calcs": [],
          "displayMode": "hidden",
          "placement": "bottom"
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
          "datasource": {
            "type": "prometheus",
            "uid": "000000001"
          },
          "exemplar": true,
          "expr": "rate(node_network_transmit_errs_total{device!=\"lo\"}[5m])\n",
          "legendFormat": "{{instance}} {{device}}",
          "datasource": {
            "type": "prometheus",
            "uid": "000000001"
          },
          "exemplar": true,
          "expr": "rate(node_network_transmit_drop_total{device!=\"lo\"}[5m])",
          "legendFormat": "{{instance}} {{device}}",
          "datasource": {
            "type": "prometheus",
            "uid": "000000001"
          },
          "exemplar": true,
          "expr": "- rate(node_network_receive_drop_total{device!=\"lo\"}[5m])",
          "legendFormat": "{{instance}} {{device}}",
          "datasource": {
            "type": "prometheus",
            "uid": "000000001"
          },
          "exemplar": true,
          "expr": "- rate(node_network_receive_errs_total{device!=\"lo\"}[5m])",
          "legendFormat": "{{instance}} {{device}}",
      "thresholds": [
        {
          "colorMode": "critical",
          "op": "lt",
          "value": -1,
          "visible": true
        },
        {
          "colorMode": "critical",
          "op": "gt",
          "value": 1,
          "visible": true
        }
      ],
      "title": "Network errors",
    },
    {
      "collapsed": false,
      "gridPos": {
        "h": 1,
        "w": 24,
        "x": 0,
        "y": 16
      },
      "id": 18,
      "panels": [],
      "title": "Storage",