Skip to content
Snippets Groups Projects
backups.json 30.5 KiB
Newer Older
{
  "annotations": {
    "list": [
      {
        "builtIn": 1,
        "datasource": {
          "type": "datasource",
          "uid": "grafana"
        },
        "enable": true,
        "hide": true,
        "iconColor": "rgba(0, 211, 255, 1)",
        "name": "Annotations & Alerts",
        "target": {
          "limit": 100,
          "matchAny": false,
          "tags": [],
          "type": "dashboard"
        },
        "type": "dashboard"
      }
    ]
  },
  "description": "Is our data safe?",
  "editable": true,
  "fiscalYearStartMonth": 0,
  "graphTooltip": 0,
  "links": [],
  "liveNow": false,
  "panels": [
    {
      "datasource": {
        "type": "prometheus",
        "uid": "LocalPrometheus"
      },
      "gridPos": {
        "h": 1,
        "w": 24,
        "x": 0,
        "y": 0
      },
      "id": 44,
      "targets": [
        {
          "datasource": {
            "type": "prometheus",
            "uid": "LocalPrometheus"
          },
          "refId": "A"
        }
      ],
      "title": "Customer ciphertext backup to Borgbase.com",
      "type": "row"
    },
    {
      "alert": {
        "alertRuleTags": {},
        "conditions": [
          {
            "evaluator": {
              "params": [
                0
              ],
              "type": "gt"
            },
            "operator": {
              "type": "and"
            },
            "query": {
              "params": [
                "Failed jobs",
                "5m",
                "now"
              ]
            },
            "reducer": {
              "params": [],
              "type": "last"
            },
            "type": "query"
          }
        ],
        "executionErrorState": "alerting",
        "for": "5m",
        "frequency": "1m",
        "handler": 1,
        "name": "Daily backup jobs state alert",
        "noDataState": "no_data",
        "notifications": []
      },
      "datasource": {
        "type": "prometheus",
        "uid": "LocalPrometheus"
      },
      "description": "Daily backup job systemd timer unit state",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 60,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "auto",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
        "overrides": [
          {
            "matcher": {
              "id": "byName",
              "options": "Failed"
            },
            "properties": [
              {
                "id": "color",
                "value": {
                  "fixedColor": "semi-dark-red",
                  "mode": "fixed"
                }
              }
            ]
          }
        ]
        "w": 12,
        "x": 0,
        "y": 1
      },
      "id": 46,
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "multi",
          "sort": "none"
        }
      },
      "pluginVersion": "8.4.7",
      "targets": [
        {
          "datasource": {
            "type": "prometheus",
          },
          "exemplar": true,
          "expr": "sum(node_systemd_unit_state{name=\"borgbackup-job-daily.timer\", state=~\"active\"})",
          "hide": false,
          "interval": "",
          "legendFormat": "Active",
          "refId": "Active timers"
        },
        {
          "datasource": {
            "type": "prometheus",
          },
          "exemplar": true,
          "expr": "sum(node_systemd_unit_state{name=\"borgbackup-job-daily.service\", state=\"failed\"})",
          "hide": false,
          "interval": "",
          "legendFormat": "Failed",
          "refId": "Failed jobs"
        }
      ],
      "thresholds": [
        {
          "colorMode": "critical",
          "op": "gt",
          "value": 0,
          "visible": true
        }
      ],
      "title": "Daily backup timer state",
      "type": "timeseries"
    },
    {
      "alert": {
        "alertRuleTags": {},
        "conditions": [
          {
            "evaluator": {
              "params": [
                0
              ],
              "type": "gt"
            },
            "operator": {
              "type": "and"
            },
            "query": {
              "params": [
                "Failed jobs",
                "5m",
                "now"
              ]
            },
            "reducer": {
              "params": [],
              "type": "last"
            },
            "type": "query"
          }
        ],
        "executionErrorState": "alerting",
        "for": "5m",
        "frequency": "1m",
        "handler": 1,
        "name": "Monthly check-repo timer state alert",
        "noDataState": "no_data",
        "notifications": []
      },
      "datasource": {
        "type": "prometheus",
        "uid": "LocalPrometheus"
      },
      "description": "Monthly check-repo systemd timer unit state",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 60,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "auto",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              }
            ]
          },
          "unit": "short"
        },
        "overrides": [
          {
            "matcher": {
              "id": "byName",
              "options": "Failed"
            },
            "properties": [
              {
                "id": "color",
                "value": {
                  "fixedColor": "semi-dark-red",
                  "mode": "fixed"
                }
              }
            ]
          }
        ]
        "w": 12,
        "x": 12,
        "y": 1
      },
      "id": 47,
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "multi",
          "sort": "none"
        }
      },
      "pluginVersion": "8.4.7",
      "targets": [
        {
          "datasource": {
            "type": "prometheus",
          },
          "exemplar": true,
          "expr": "sum(node_systemd_unit_state{name=\"borgbackup-check-repo.timer\", state=\"active\"})",
          "hide": false,
          "interval": "",
          "legendFormat": "Active",
          "refId": "Active timers"
        },
        {
          "datasource": {
            "type": "prometheus",
          },
          "exemplar": true,
          "expr": "sum(node_systemd_unit_state{name=\"borgbackup-check-repo.service\", state=\"failed\"})",
          "hide": false,
          "interval": "",
          "legendFormat": "Failed",
          "refId": "Failed jobs"
        }
      ],
      "thresholds": [
        {
          "colorMode": "critical",
          "op": "gt",
          "value": 0,
          "visible": true
        }
      ],
      "title": "Monthly check-repo timer state",
      "type": "timeseries"
    },
    {
      "alert": {
        "alertRuleTags": {},
        "conditions": [
          {
            "evaluator": {
              "params": [
                -90000
              ],
              "type": "lt"
            },
            "operator": {
              "type": "and"
            },
            "query": {
              "params": [
                "A",
                "5m",
                "now"
              ]
            },
            "reducer": {
              "params": [],
              "type": "avg"
            },
            "type": "query"
          }
        ],
        "executionErrorState": "keep_state",
        "for": "5m",
        "frequency": "1m",
        "handler": 1,
        "message": "Borgbase.com daily backup job trigger.",
        "name": "Daily backup to Borgbase Trigger alert",
        "noDataState": "no_data",
        "notifications": []
      },
      "datasource": {
        "type": "prometheus",
        "uid": "LocalPrometheus"
      },
      "description": "This shows the last triggering of the borgbackup-job-daily.timer systemd unit.",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 0,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": true,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "area"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "red",
                "value": null
              },
              {
                "color": "semi-dark-green",
                "value": -90000
              }
            ]
          },
          "unit": "s"
        },
        "overrides": []
      },
      "gridPos": {
      },
      "id": 41,
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": false
        },
        "tooltip": {
          "mode": "multi",
          "sort": "asc"
        }
      },
      "pluginVersion": "8.3.5",
      "targets": [
        {
          "datasource": {
            "type": "prometheus",
          },
          "exemplar": false,
          "expr": "node_systemd_timer_last_trigger_seconds{name=\"borgbackup-job-daily.timer\"} - time()",
          "interval": "",
          "intervalFactor": 1,
          "legendFormat": "{{instance}}",
          "refId": "A"
        }
      ],
      "thresholds": [
        {
          "colorMode": "critical",
          "op": "lt",
          "value": -90000,
          "visible": true
        }
      ],
      "title": "Daily backup job trigger",
      "type": "timeseries"
    },
    {
      "alert": {
        "alertRuleTags": {},
        "conditions": [
          {
            "evaluator": {
              "params": [
                -2764800
              ],
              "type": "lt"
            },
            "operator": {
              "type": "and"
            },
            "query": {
              "params": [
                "A",
                "5m",
                "now"
              ]
            },
            "reducer": {
              "params": [],
              "type": "avg"
            },
            "type": "query"
          }
        ],
        "executionErrorState": "keep_state",
        "for": "5m",
        "frequency": "1m",
        "handler": 1,
        "message": "Borgbase.com monthly check-repo trigger.",
        "name": "Monthly check of Borgbase backup Trigger alert",
        "noDataState": "no_data",
        "notifications": []
      },
      "datasource": {
        "type": "prometheus",
        "uid": "LocalPrometheus"
      },
      "description": "This shows the last triggering of the borgbackup-job-daily.timer systemd unit.",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 0,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "lineInterpolation": "linear",
            "lineWidth": 1,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": true,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "area"
            }
          },
          "mappings": [],
          "max": 0,
          "min": -2864800,
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "red",
                "value": null
              },
              {
                "color": "semi-dark-green",
                "value": -2764800
              }
            ]
          },
          "unit": "s"
        },
        "overrides": []
      },
      "gridPos": {
      },
      "id": 42,
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": false
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "pluginVersion": "8.3.5",
      "targets": [
        {
          "datasource": {
            "type": "prometheus",
          },
          "exemplar": false,
          "expr": "node_systemd_timer_last_trigger_seconds{name=\"borgbackup-check-repo.timer\"} - time()",
          "interval": "",
          "intervalFactor": 4,
          "legendFormat": "{{instance}}",
          "refId": "A"
        }
      ],
      "thresholds": [
        {
          "colorMode": "critical",
          "op": "lt",
          "value": -2764800,
          "visible": true
        }
      ],
      "title": "Monthly check-repo trigger",
      "type": "timeseries"
    {
      "alert": {
        "alertRuleTags": {},
        "conditions": [
          {
            "evaluator": {
              "params": [
              ],
              "type": "gt"
            },
            "operator": {
              "type": "and"
            },
            "query": {
              "params": [
              ]
            },
            "reducer": {
              "params": [],
              "type": "last"
            },
            "type": "query"
          }
        ],
        "executionErrorState": "alerting",
        "for": "5m",
        "frequency": "1m",
        "handler": 1,
        "message": "A backup job ran for more than 2 ½ hours. After 3 hours it could run into the check-repo job start time, depending on its \"random\" job delay.",
        "name": "Daily backup job run time alert",
        "noDataState": "no_data",
        "notifications": []
      },
      "datasource": {
        "type": "prometheus",
        "uid": "LocalPrometheus"
      },
      "description": "When was the systemd unit active?",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "left",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 60,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "lineInterpolation": "stepAfter",
            "lineStyle": {
              "fill": "solid"
            },
            "lineWidth": 0,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
      },
      "id": 52,
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "targets": [
        {
          "datasource": {
            "type": "prometheus",
          "expr": "abs((node_systemd_timer_last_trigger_seconds{name=\"borgbackup-job-daily.timer\"} - time())) * on (instance) node_systemd_unit_state{name=\"borgbackup-job-daily.service\", state=\"active\"}",
          "interval": "",
          "legendFormat": "{{instance}}",
        }
      ],
      "thresholds": [
        {
          "colorMode": "critical",
          "op": "gt",
          "visible": true
        }
      ],
      "title": "Daily backup job run time",
      "type": "timeseries"
    },
    {
      "alert": {
        "alertRuleTags": {},
        "conditions": [
          {
            "evaluator": {
              "params": [
              ],
              "type": "gt"
            },
            "operator": {
              "type": "and"
            },
            "query": {
              "params": [
              ]
            },
            "reducer": {
              "params": [],
              "type": "last"
            },
            "type": "query"
          }
        ],
        "executionErrorState": "alerting",
        "for": "5m",
        "frequency": "1m",
        "handler": 1,
        "message": "A borg check-repo job ran for more than five hours.  After six hours it could collide with the daily backup job, depending on that job's \"random\" delay. If the backup set is large and this is expected to happen again, consider using borgbackup partial checks (--max-duration SECONDS parameter).",
        "name": "Monthly check-repo run time alert",
        "noDataState": "no_data",
        "notifications": []
      },
      "datasource": {
        "type": "prometheus",
        "uid": "LocalPrometheus"
      },
      "description": "When was the systemd unit active?",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "left",
            "barAlignment": 0,
            "drawStyle": "line",
            "fillOpacity": 60,
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "lineInterpolation": "stepAfter",
            "lineStyle": {
              "fill": "solid"
            },
            "lineWidth": 0,
            "pointSize": 5,
            "scaleDistribution": {
              "type": "linear"
            },
            "showPoints": "never",
            "spanNulls": false,
            "stacking": {
              "group": "A",
              "mode": "none"
            },
            "thresholdsStyle": {
              "mode": "off"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
      },
      "id": 53,
      "options": {
        "legend": {
          "calcs": [],
          "displayMode": "list",
          "placement": "bottom",
          "showLegend": true
        },
        "tooltip": {
          "mode": "single",
          "sort": "none"
        }
      },
      "targets": [
        {
          "datasource": {
            "type": "prometheus",
          "expr": "abs((node_systemd_timer_last_trigger_seconds{name=\"borgbackup-check-repo.timer\"} - time())) * on (instance) node_systemd_unit_state{name=\"borgbackup-check-repo.service\", state=\"active\"}",
          "interval": "",
          "legendFormat": "{{instance}}",
        }
      ],
      "thresholds": [
        {
          "colorMode": "critical",
          "op": "gt",
          "visible": true
        }
      ],
      "title": "Monthly check-repo run time",
      "type": "timeseries"
    },
    {
      "datasource": {
        "type": "loki",
      "description": "The \"duration\" that borgbackup status reports.",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "palette-classic"
          },
          "custom": {
            "axisBorderShow": false,
            "axisCenteredZero": false,
            "axisColorMode": "text",
            "axisLabel": "",
            "axisPlacement": "auto",
            "gradientMode": "none",
            "hideFrom": {
              "legend": false,
              "tooltip": false,
              "viz": false
            },
            "lineWidth": 1,
            "scaleDistribution": {
              "type": "linear"
            }
          },
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",