Skip to content
Snippets Groups Projects
resources-overview.json 44.1 KiB
Newer Older
  • Learn to ignore specific revisions
  • {
      "annotations": {
        "list": [
          {
            "builtIn": 1,
            "datasource": "-- Grafana --",
            "enable": true,
            "hide": true,
            "iconColor": "rgba(0, 211, 255, 1)",
            "name": "Annotations & Alerts",
    
            "target": {
              "limit": 100,
              "matchAny": false,
              "tags": [],
              "type": "dashboard"
            },
    
            "type": "dashboard"
          }
        ]
      },
      "description": "USE: Usage, Saturation and Error rate for our resources",
      "editable": true,
    
      "fiscalYearStartMonth": 0,
      "graphTooltip": 1,
    
      "panels": [
        {
          "collapsed": false,
          "gridPos": {
            "h": 1,
            "w": 24,
            "x": 0,
            "y": 0
          },
          "id": 22,
          "panels": [],
          "title": "CPU & Memory",
          "type": "row"
        },
        {
          "description": "Some of our software runs in a single thread, so this shows max CPU per core (instead of averaged over all cores)",
          "fieldConfig": {
            "defaults": {
    
              "color": {
                "mode": "palette-classic"
              },
              "custom": {
                "axisLabel": "",
                "axisPlacement": "auto",
                "barAlignment": 0,
                "drawStyle": "line",
                "fillOpacity": 0,
                "gradientMode": "none",
                "hideFrom": {
                  "legend": false,
                  "tooltip": false,
                  "viz": false
                },
                "lineInterpolation": "linear",
                "lineWidth": 1,
                "pointSize": 5,
                "scaleDistribution": {
                  "type": "linear"
                },
                "showPoints": "never",
                "spanNulls": true,
                "stacking": {
                  "group": "A",
                  "mode": "none"
                },
                "thresholdsStyle": {
                  "mode": "off"
                }
              },
              "links": [],
              "mappings": [],
              "max": 1,
              "min": 0,
              "thresholds": {
                "mode": "absolute",
                "steps": [
                  {
                    "color": "green",
                    "value": null
                  },
                  {
                    "color": "red",
                    "value": 80
                  }
                ]
              },
              "unit": "percentunit"
    
            },
            "overrides": []
          },
          "gridPos": {
            "h": 7,
    
            "x": 0,
            "y": 1
          },
          "id": 28,
          "options": {
    
            "legend": {
              "calcs": [],
              "displayMode": "hidden",
              "placement": "bottom"
            },
            "tooltip": {
    
              "mode": "multi",
              "sort": "none"
    
          "targets": [
            {
              "expr": "1 - (max by (instance) (irate(node_cpu_seconds_total{mode=\"idle\"}[5m])))",
              "interval": "",
              "intervalFactor": 1,
              "legendFormat": "{{instance}}",
              "refId": "A"
            }
          ],
          "title": "Max CPU % per core per node",
    
        },
        {
          "alert": {
            "alertRuleTags": {},
            "conditions": [
              {
                "evaluator": {
                  "params": [
                    1
                  ],
                  "type": "gt"
                },
                "operator": {
                  "type": "and"
                },
                "query": {
                  "params": [
                    "A",
                    "5m",
                    "now"
                  ]
                },
                "reducer": {
                  "params": [],
                  "type": "avg"
                },
                "type": "query"
              }
            ],
            "executionErrorState": "alerting",
            "for": "5m",
            "frequency": "1m",
            "handler": 1,
            "name": "15 min load average alert",
            "noDataState": "no_data",
            "notifications": []
          },
          "fieldConfig": {
            "defaults": {
    
              "color": {
                "mode": "palette-classic"
              },
              "custom": {
                "axisLabel": "",
                "axisPlacement": "auto",
                "barAlignment": 0,
                "drawStyle": "line",
                "fillOpacity": 0,
                "gradientMode": "none",
                "hideFrom": {
                  "legend": false,
                  "tooltip": false,
                  "viz": false
                },
                "lineInterpolation": "linear",
                "lineWidth": 1,
                "pointSize": 5,
                "scaleDistribution": {
                  "type": "linear"
                },
                "showPoints": "never",
                "spanNulls": true,
                "stacking": {
                  "group": "A",
                  "mode": "none"
                },
                "thresholdsStyle": {
    
                  "mode": "area"
    
              "displayName": "${__field.labels.instance}",
    
              "links": [],
              "mappings": [],
              "max": 1,
              "thresholds": {
                "mode": "absolute",
                "steps": [
                  {
                    "color": "transparent",
                    "value": null
                  },
                  {
                    "color": "red",
                    "value": 1
                  }
                ]
              },
              "unit": "short"
    
            },
            "overrides": [
              {
                "matcher": {
                  "id": "byName",
                  "options": "node_load15{instance=\"grafana:9100\", job=\"node-exporters\"}"
                },
                "properties": [
                  {
                    "id": "links"
                  }
                ]
              }
            ]
          },
          "gridPos": {
            "h": 7,
    
            "legend": {
              "calcs": [],
              "displayMode": "hidden",
              "placement": "bottom"
            },
            "tooltip": {
    
              "mode": "multi",
              "sort": "none"
    
          "targets": [
            {
              "expr": "node_load15",
              "interval": "",
              "intervalFactor": 1,
    
              "legendFormat": "{{instance}}",
    
              "refId": "A"
            }
          ],
          "title": "15 min load average",
    
        },
        {
          "alert": {
            "alertRuleTags": {},
            "conditions": [
              {
                "evaluator": {
                  "params": [
                    0.8
                  ],
                  "type": "gt"
                },
                "operator": {
                  "type": "and"
                },
                "query": {
                  "params": [
                    "A",
                    "15m",
                    "now"
                  ]
                },
                "reducer": {
                  "params": [],
                  "type": "avg"
                },
                "type": "query"
              }
            ],
            "executionErrorState": "alerting",
            "for": "5m",
            "frequency": "1m",
            "handler": 1,
            "name": "RAM filling up",
            "noDataState": "no_data",
            "notifications": []
          },
          "description": "How much RAM is in use? Relative to available system memory.",
          "fieldConfig": {
            "defaults": {
    
              "color": {
                "mode": "palette-classic"
              },
              "custom": {
                "axisLabel": "",
                "axisPlacement": "auto",
                "barAlignment": 0,
                "drawStyle": "line",
                "fillOpacity": 0,
                "gradientMode": "none",
                "hideFrom": {
                  "legend": false,
                  "tooltip": false,
                  "viz": false
                },
                "lineInterpolation": "linear",
                "lineWidth": 1,
                "pointSize": 5,
                "scaleDistribution": {
                  "type": "linear"
                },
                "showPoints": "never",
                "spanNulls": true,
                "stacking": {
                  "group": "A",
                  "mode": "none"
                },
                "thresholdsStyle": {
    
                  "mode": "area"
    
                }
              },
              "links": [],
              "mappings": [],
              "max": 1,
              "min": 0,
              "thresholds": {
                "mode": "absolute",
                "steps": [
                  {
                    "color": "transparent",
                    "value": null
                  },
                  {
                    "color": "red",
                    "value": 0.8
                  }
                ]
              },
              "unit": "percentunit"
    
            },
            "overrides": []
          },
          "gridPos": {
            "h": 7,
    
            "legend": {
              "calcs": [],
              "displayMode": "hidden",
              "placement": "bottom"
            },
            "tooltip": {
    
              "mode": "multi",
              "sort": "none"
    
              "datasource": {
                "type": "prometheus",
                "uid": "000000001"
              },
              "exemplar": true,
    
              "expr": "1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes unless node_zfs_arc_size",
              "hide": false,
              "instant": false,
    
              "interval": "",
              "intervalFactor": 4,
              "legendFormat": "{{instance}}",
    
              "refId": "Hosts without ZFS"
            },
            {
              "datasource": {
                "type": "prometheus",
                "uid": "000000001"
              },
              "exemplar": true,
              "expr": "1 - (node_memory_MemAvailable_bytes + node_zfs_arc_size) / node_memory_MemTotal_bytes",
              "hide": false,
              "instant": false,
              "interval": "",
              "legendFormat": "{{instance}}",
              "refId": "Hosts with ZFS"
    
        {
          "alert": {
            "alertRuleTags": {},
            "conditions": [
              {
                "evaluator": {
                  "params": [
                    0.1
                  ],
                  "type": "gt"
                },
                "operator": {
                  "type": "and"
                },
                "query": {
                  "params": [
                    "A",
                    "5m",
                    "now"
                  ]
                },
                "reducer": {
                  "params": [],
                  "type": "avg"
                },
                "type": "query"
              }
            ],
            "executionErrorState": "alerting",
            "for": "5m",
            "frequency": "1m",
            "handler": 1,
            "name": "Swap usage alert",
            "noDataState": "no_data",
            "notifications": []
          },
          "description": "How much Swap is in use? Relative to available swap.",
          "fieldConfig": {
    
            "defaults": {
              "color": {
                "mode": "palette-classic"
              },
              "custom": {
                "axisLabel": "",
                "axisPlacement": "auto",
                "barAlignment": 0,
                "drawStyle": "line",
                "fillOpacity": 10,
                "gradientMode": "none",
                "hideFrom": {
                  "legend": false,
                  "tooltip": false,
                  "viz": false
                },
                "lineInterpolation": "linear",
                "lineWidth": 1,
                "pointSize": 5,
                "scaleDistribution": {
                  "type": "linear"
                },
                "showPoints": "never",
                "spanNulls": true,
                "stacking": {
                  "group": "A",
                  "mode": "none"
                },
                "thresholdsStyle": {
    
                  "mode": "area"
    
                }
              },
              "mappings": [],
              "max": 1,
              "min": 0,
              "thresholds": {
                "mode": "absolute",
                "steps": [
                  {
                    "color": "transparent",
                    "value": null
                  },
                  {
                    "color": "red",
                    "value": 0.1
                  }
                ]
              },
              "unit": "percentunit"
            },
    
            "overrides": []
          },
          "gridPos": {
            "h": 7,
            "w": 6,
            "x": 18,
            "y": 1
          },
          "id": 30,
          "options": {
    
            "legend": {
              "calcs": [],
              "displayMode": "hidden",
              "placement": "bottom"
            },
            "tooltip": {
    
              "mode": "multi",
              "sort": "none"
    
          "targets": [
            {
              "exemplar": true,
              "expr": "1 - node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes",
              "interval": "",
              "intervalFactor": 1,
              "legendFormat": "{{instance}}",
              "refId": "A"
            }
          ],
    
          "thresholds": [
            {
              "colorMode": "critical",
              "op": "gt",
              "value": 0.1,
              "visible": true
            }
          ],
    
          "title": "Swap used %",
    
        {
          "collapsed": false,
          "gridPos": {
            "h": 1,
            "w": 24,
            "x": 0,
            "y": 8
          },
          "id": 20,
          "panels": [],
          "title": "Network",
          "type": "row"
        },
        {
    
          "description": "Shows most saturated network link for every node. Bit/s.",
    
          "fieldConfig": {
            "defaults": {
    
              "color": {
                "mode": "palette-classic"
              },
              "custom": {
                "axisLabel": "",
                "axisPlacement": "auto",
                "barAlignment": 0,
                "drawStyle": "line",
                "fillOpacity": 10,
                "gradientMode": "none",
                "hideFrom": {
                  "legend": false,
                  "tooltip": false,
                  "viz": false
                },
                "lineInterpolation": "linear",
                "lineWidth": 1,
                "pointSize": 5,
                "scaleDistribution": {
                  "type": "linear"
                },
                "showPoints": "never",
                "spanNulls": true,
                "stacking": {
                  "group": "A",
                  "mode": "none"
                },
                "thresholdsStyle": {
                  "mode": "off"
                }
              },
              "links": [],
              "mappings": [],
              "thresholds": {
                "mode": "absolute",
                "steps": [
                  {
                    "color": "green",
                    "value": null
                  },
                  {
                    "color": "red",
                    "value": 80
                  }
                ]
              },
              "unit": "bps"
    
            },
            "overrides": []
          },
          "gridPos": {
            "h": 7,
            "w": 8,
            "x": 0,
            "y": 9
          },
          "id": 12,
          "options": {
    
            "legend": {
              "calcs": [],
              "displayMode": "hidden",
              "placement": "bottom"
            },
            "tooltip": {
    
              "mode": "single",
              "sort": "none"
    
              "expr": "max by (instance) (rate(node_network_transmit_bytes_total{device!~\"lo|monitoringvpn\"}[5m]) * 8)",
    
              "interval": "",
              "intervalFactor": 4,
              "legendFormat": "{{instance}} out",
              "refId": "A"
            },
            {
    
              "expr": "- max by (instance) (rate(node_network_receive_bytes_total{device!~\"lo|monitoringvpn\"}[5m]) * 8)",
    
              "interval": "",
              "intervalFactor": 4,
              "legendFormat": "{{instance}} in",
              "refId": "B"
            }
          ],
    
          "title": "Throughput",
    
          "transformations": [],
    
          "description": "Packet and error count. Positive values mean transmit, negative receive.",
    
          "fieldConfig": {
            "defaults": {
    
              "color": {
                "mode": "palette-classic"
              },
              "custom": {
                "axisLabel": "",
                "axisPlacement": "auto",
                "barAlignment": 0,
                "drawStyle": "line",
                "fillOpacity": 0,
                "gradientMode": "none",
                "hideFrom": {
                  "legend": false,
                  "tooltip": false,
                  "viz": false
                },
                "lineInterpolation": "linear",
                "lineWidth": 1,
                "pointSize": 5,
                "scaleDistribution": {
                  "type": "linear"
                },
                "showPoints": "never",
                "spanNulls": false,
                "stacking": {
                  "group": "A",
                  "mode": "none"
                },
                "thresholdsStyle": {
                  "mode": "off"
                }
              },
              "links": [],
              "mappings": [],
              "thresholds": {
                "mode": "absolute",
                "steps": [
                  {
                    "color": "green",
                    "value": null
                  },
                  {
                    "color": "red",
                    "value": 80
                  }
                ]
              },
              "unit": "pps"
    
            },
            "overrides": []
          },
          "gridPos": {
            "h": 7,
            "w": 8,
            "x": 8,
            "y": 9
          },
          "id": 26,
          "options": {
    
            "legend": {
              "calcs": [],
              "displayMode": "hidden",
              "placement": "bottom"
            },
            "tooltip": {
    
              "mode": "single",
              "sort": "none"
    
              "expr": "- rate(node_network_receive_packets_total{device!~\"lo|monitoringvpn\"}[5m])",
    
              "interval": "",
              "intervalFactor": 4,
    
              "legendFormat": "{{instance}} in",
    
              "expr": "- rate(node_network_receive_errs_total{device!~\"lo|monitoringvpn\"}[5m])",
    
              "interval": "",
              "intervalFactor": 4,
    
              "legendFormat": "{{instance}} in err",
    
              "expr": "rate(node_network_transmit_packets_total{device!~\"lo|monitoringvpn\"}[5m])",
    
              "interval": "",
              "intervalFactor": 4,
    
              "legendFormat": "{{instance}} out",
    
              "expr": "rate(node_network_transmit_errs_total{device!~\"lo|monitoringvpn\"}[5m])",
    
              "interval": "",
              "intervalFactor": 4,
    
              "legendFormat": "{{instance}} out err",
    
              "refId": "D"
            }
          ],
          "title": "Network pkt/s",
    
        },
        {
          "alert": {
            "alertRuleTags": {},
            "conditions": [
              {
                "evaluator": {
                  "params": [
    
                  "type": "outside_range"
    
                },
                "operator": {
                  "type": "and"
                },
                "query": {
                  "params": [
                    "A",
                    "5m",
                    "now"
                  ]
                },
                "reducer": {
                  "params": [],
                  "type": "avg"
                },
                "type": "query"
              },
              {
                "evaluator": {
                  "params": [
    
                  "type": "outside_range"
    
                },
                "operator": {
                  "type": "or"
                },
                "query": {
                  "params": [
                    "B",
                    "5m",
                    "now"
                  ]
                },
                "reducer": {
                  "params": [],
                  "type": "avg"
                },
                "type": "query"
              },
              {
                "evaluator": {
                  "params": [
    
                  "type": "outside_range"
    
                },
                "operator": {
                  "type": "or"
                },
                "query": {
                  "params": [
                    "C",
                    "5m",
                    "now"
                  ]
                },
                "reducer": {
                  "params": [],
                  "type": "avg"
                },
                "type": "query"
              },
              {
                "evaluator": {
                  "params": [
    
                  "type": "outside_range"
    
                },
                "operator": {
                  "type": "or"
                },
                "query": {
                  "params": [
                    "D",
                    "5m",
                    "now"
                  ]
                },
                "reducer": {
                  "params": [],
                  "type": "avg"
                },
                "type": "query"
              }
            ],
            "executionErrorState": "alerting",
            "for": "5m",
            "frequency": "1m",
            "handler": 1,
    
            "message": "",
    
            "name": "Network errors alert",
            "noDataState": "no_data",
            "notifications": []
          },
          "description": "Network errors, drops etc. Should all be 0.",
          "fieldConfig": {
            "defaults": {
    
              "color": {
                "mode": "palette-classic"
              },
              "custom": {
                "axisLabel": "",
                "axisPlacement": "auto",
                "barAlignment": 0,
                "drawStyle": "line",
                "fillOpacity": 10,
                "gradientMode": "none",
                "hideFrom": {
                  "legend": false,
                  "tooltip": false,
                  "viz": false
                },
                "lineInterpolation": "linear",
                "lineWidth": 1,
                "pointSize": 5,
                "scaleDistribution": {
                  "type": "linear"
                },
                "showPoints": "never",
                "spanNulls": true,
                "stacking": {
                  "group": "A",
                  "mode": "none"
                },
                "thresholdsStyle": {
    
                  "mode": "off"
    
                }
              },
              "links": [],
              "mappings": [],
              "thresholds": {
                "mode": "absolute",
                "steps": [
                  {
                    "color": "transparent",
                    "value": null
                  },
                  {
                    "color": "red",
                    "value": 10
                  }
                ]
              },
              "unit": "short"
    
            "legend": {
              "calcs": [],
              "displayMode": "hidden",
              "placement": "bottom"
            },
            "tooltip": {
    
              "mode": "single",
              "sort": "none"
    
              "datasource": {
                "type": "prometheus",
                "uid": "000000001"
              },
              "exemplar": true,
              "expr": "rate(node_network_transmit_errs_total{device!=\"lo\"}[5m])\n",
    
              "legendFormat": "{{instance}} {{device}}",
    
              "datasource": {
                "type": "prometheus",
                "uid": "000000001"
              },
              "exemplar": true,
              "expr": "rate(node_network_transmit_drop_total{device!=\"lo\"}[5m])",
    
              "legendFormat": "{{instance}} {{device}}",
    
              "datasource": {
                "type": "prometheus",
                "uid": "000000001"
              },
              "exemplar": true,
              "expr": "- rate(node_network_receive_drop_total{device!=\"lo\"}[5m])",
    
              "legendFormat": "{{instance}} {{device}}",
    
              "datasource": {
                "type": "prometheus",
                "uid": "000000001"
              },
              "exemplar": true,
              "expr": "- rate(node_network_receive_errs_total{device!=\"lo\"}[5m])",
    
              "legendFormat": "{{instance}} {{device}}",
    
          "thresholds": [
            {
              "colorMode": "critical",
              "op": "lt",
              "value": -1,
              "visible": true
            },
            {
              "colorMode": "critical",
              "op": "gt",
              "value": 1,
              "visible": true
            }
          ],
    
          "title": "Network errors",
    
        },
        {
          "collapsed": false,
          "gridPos": {
            "h": 1,
            "w": 24,
            "x": 0,
            "y": 16
          },
          "id": 18,
          "panels": [],
          "title": "Storage",