Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision

Target

Select target project
  • tomprince/PrivateStorageio
  • privatestorage/PrivateStorageio
2 results
Select Git revision
Show changes
Showing
with 28756 additions and 1397 deletions
# Similar to ``issuer.nix`` but for a "storage"-type system. Holes are filled
# by ``customize-storage.nix``.
{ config, ...} :
# This contains all of the NixOS system configuration necessary to specify an
# "storage"-type system.
{ lib, config, ...} :
let
inherit (config.grid) publicKeyPath privateKeyPath;
inherit (config.grid) privateKeyPath;
in {
# Any extra NixOS modules to load on this server.
imports = [
./monitoringvpn-client.nix
./borgbackup.nix
];
options.grid.storage = {
passValue = lib.mkOption {
type = lib.types.int;
description = ''
An integer giving the value of a single pass in byte×months.
'';
};
publicStoragePort = lib.mkOption {
type = lib.types.port;
description = ''
An integer giving the port number to include in Tahoe storage service
advertisements and on which to listen for storage connections.
'';
};
};
config = {
deployment = {
secrets = {
"ristretto-signing-key" = {
......@@ -17,33 +41,13 @@ in {
# extract it from the tahoe-lafs nixos module somehow?
action = ["sudo" "systemctl" "restart" "tahoe.storage.service"];
};
"monitoringvpn-secret-key" = {
destination = "/run/keys/monitoringvpn/client.key";
owner.user = "root";
owner.group = "root";
permissions = "0400";
action = ["sudo" "systemctl" "restart" "wireguard-monitoringvpn.service"];
};
"monitoringvpn-preshared-key" = {
destination = "/run/keys/monitoringvpn/preshared.key";
owner.user = "root";
owner.group = "root";
permissions = "0400";
action = ["sudo" "systemctl" "restart" "wireguard-monitoringvpn.service"];
};
};
};
# Any extra NixOS modules to load on this server.
imports = [
# Bring in our module for configuring the Tahoe-LAFS service and other
# Private Storage-specific things.
../../nixos/modules/private-storage.nix
# Connect to the monitoringvpn.
../../nixos/modules/monitoring/vpn/client.nix
# Expose base system metrics over the monitoringvpn.
../../nixos/modules/monitoring/exporters/node.nix
];
services.private-storage.monitoring.exporters.node.enable = true;
services.private-storage.monitoring.exporters.tahoe.enable = true;
services.private-storage.borgbackup.enable = lib.mkDefault true;
# Turn on the Private Storage (Tahoe-LAFS) service.
services.private-storage = {
......@@ -51,5 +55,7 @@ in {
enable = true;
# Give it the Ristretto signing key to support authorization.
ristrettoSigningKeyPath = config.deployment.secrets.ristretto-signing-key.destination;
inherit (config.grid.storage) passValue publicStoragePort;
};
};
}
{ callPackage }:
{
/* A library of tools useful for writing tests with Nix.
*/
testing = callPackage ./testing.nix { };
}
# Thank you: https://gist.github.com/petabyteboy/558ffddb9aeb24e1eab2d5d6d021b5d7
with import <nixpkgs/lib>;
rec {
# FIXME: add case for negative numbers
pow = base: exponent: if exponent == 0 then 1 else fold (
x: y: y * base
) base (
range 2 exponent
);
fromHexString = hex: foldl (
x: y: 16 * x + (
(
listToAttrs (
map (
x: nameValuePair (
toString x
) x
) (
range 0 9
)
) // {
"a" = 10;
"b" = 11;
"c" = 12;
"d" = 13;
"e" = 14;
"f" = 15;
}
).${y}
)
) 0 (
stringToCharacters (
removePrefix "0x" (
hex
)
)
);
ipv4 = rec {
decode = address: foldl (
x: y: 256 * x + y
) 0 (
map toInt (
splitString "." address
)
);
encode = num: concatStringsSep "." (
map (
x: toString (mod (num / x) 256)
) (
reverseList (
genList (
x: pow 2 (x * 8)
) 4
)
)
);
netmask = prefixLength: (
foldl (
x: y: 2 * x + 1
) 0 (
range 1 prefixLength
)
) * (
pow 2 (
32 - prefixLength
)
);
reverseZone = net: (
concatStringsSep "." (
reverseList (
splitString "." net
)
)
) + ".in-addr.arpa";
eachAddress = net: prefixLength: genList (
x: decode (
x + (
decode net
)
)
) (
pow 2 (
32 - prefixLength
)
);
networkOf = address: prefixLength: encode (
bitAnd (
decode address
) (
netmask prefixLength
)
);
isInNetwork = net: address: networkOf address == net;
/* nixos-specific stuff */
findOwnAddress = config: net: head (
filter (
isInNetwork net
) (
configuredAddresses config
)
);
configuredAddresses = config: concatLists (
mapAttrsToList (
name: iface: iface.ipv4.addresses
) config.networking.interfaces
);
};
ipv6 = rec {
expand = address: (
replaceStrings ["::"] [(
concatStringsSep "0" (
genList (x: ":") (
9 - (count (x: x == ":") (stringToCharacters address))
)
)
)] address
) + (
if hasSuffix "::" address then
"0"
else
""
);
decode = address: map fromHexString (
splitString ":" (
expand address
)
);
encode = address: toLower (
concatStringsSep ":" (
map toHexString address
)
);
netmask = prefixLength: map (
x: if prefixLength > x + 16 then
(pow 2 16) - 1
else if prefixLength < x then
0
else
(
foldl (
x: y: 2 * x + 1
) 0 (
range 1 (prefixLength - x)
)
) * (
pow 2 (
16 - (prefixLength - x)
)
)
) (
genList (
x: x * 16
) 8
);
reverseZone = net: (
concatStringsSep "." (
concatLists (
reverseList (
map (
x: stringToCharacters (fixedWidthString 4 "0" x)
) (
splitString ":" (
expand net
)
)
)
)
)
) + ".ip6.arpa";
networkOf = address: prefixLength: encode (
zipListsWith bitAnd (
decode address
) (
netmask prefixLength
)
);
isInNetwork = net: address: networkOf address == (expand net);
/* nixos-specific stuff */
findOwnAddress = config: net: head (
filter (
isInNetwork net
) (
configuredAddresses config
)
);
configuredAddresses = config: concatLists (
mapAttrsToList (
name: iface: iface.ipv6.addresses
) config.networking.interfaces
);
};
}
{ ...}:
{
/* Returns a string that runs tests from the Python code at the given path.
The Python code is loaded using *execfile* and the *test* global it
defines is called with the given keyword arguments.
Type: makeTestScript :: Path -> AttrSet -> String
Example:
testScript = (makeTestScript ./test_foo.py { x = "y"; });
*/
makeTestScript = { testpath, kwargs ? {} }:
''
# The driver runs pyflakes on this script before letting it
# run... Convince pyflakes that there is a `test` name.
def test():
pass
with open("${testpath}") as testfile:
exec(testfile.read(), globals())
# For simple types, JSON is compatible with Python syntax!
test(**${builtins.toJSON kwargs})
'';
}
......@@ -34,40 +34,41 @@ let
options = {
hostId = lib.mkOption
{ type = lib.types.str;
example = lib.literalExample "abcdefab";
example = "abcdefab";
description = "The 32-bit host ID of the machine, formatted as 8 hexadecimal characters.";
};
interface = lib.mkOption
{ type = lib.types.str;
example = lib.literalExample "eno0";
example = "eno0";
description = "The name of the network interface on which to configure a static address.";
};
publicIPv4 = lib.mkOption
{ type = lib.types.str;
example = lib.literalExample "192.0.2.0";
example = "192.0.2.0";
description = "The IPv4 address to statically assign to `interface`.";
};
prefixLength = lib.mkOption
{ type = lib.types.int;
example = lib.literalExample 24;
example = 24;
description = "The statically configured network's prefix length.";
};
gateway = lib.mkOption
{ type = lib.types.str;
example = lib.literalExample "192.0.2.1";
example = "192.0.2.1";
description = "The statically configured address of the network gateway.";
};
gatewayInterface = lib.mkOption
{ type = lib.types.str;
example = lib.literalExample "eno0";
example = "eno0";
description = "The name of the network interface for the default route.";
default = cfg.interface;
};
grubDeviceID = lib.mkOption
{ type = lib.types.str;
example = lib.literalExample "wwn-0x5000c500936410b9";
example = "wwn-0x5000c500936410b9";
description = "The ID of the disk on which to install grub.";
default = "nodev";
};
};
in {
......@@ -102,10 +103,11 @@ in {
# harder to deploy in the bootstrap environment.
config =
{ boot.loader.grub.enable = true;
boot.loader.grub.version = 2;
boot.loader.grub.device = "/dev/disk/by-id/${cfg.grubDeviceID}";
boot.loader.grub.device = if cfg.grubDeviceID == "nodev" then "nodev" else "/dev/disk/by-id/${cfg.grubDeviceID}";
boot.loader.timeout = 10;
# NixOS likes to fill up boot partitions with (by default) 100 old kernels.
# Keep a (for us) more reasonable number around.
boot.loader.grub.configurationLimit = 8;
networking.firewall.enable = false;
networking.hostId = cfg.hostId;
......
These are mostly modelled on upstream nixos modules.
They are generally fairly configurable (they don't tend to hard-code paths, they can be enabled or disabled).
They don't know anything about morph (e.g. ``deployment.secrets``) or how the different grids are configured (e.g. ``grid.publicKeyPath``).
Each module here tends to define one service (or group of related services) or feature.
Eventually, all of these will be imported automatically and controlled by ``services.private-storage.*.enabled`` options.
{
# Load modules that are sometimes universally useful and other times useful
# only for a specific service. Where functionality is not universally
# useful, it needs to be enabled by a node's configuration. By loading more
# modules (and therefore defining more options) than is strictly necessary
# for any single node the logic for supplying conditional configuration
# elsewhere is much simplified. For example, a Vagrant module can
# unconditionally set up a filesystem for PaymentServer. If PaymentServer
# is running on that node then it will get a Vagrant-appropriate
# configuration. If PaymentServer hasn't been enabled then the
# configuration will just be ignored.
imports = [
./packages.nix
./issuer.nix
./private-storage.nix
./monitoring/policy.nix
./monitoring/vpn/client.nix
./monitoring/exporters/node.nix
./monitoring/exporters/tahoe.nix
./monitoring/exporters/promtail.nix
];
}
......@@ -16,7 +16,7 @@ in {
options = {
services.private-storage.deployment.authorizedKey = lib.mkOption {
type = lib.types.str;
example = lib.literalExample ''
example = ''
ssh-ed25519 AAAAC3N...
'';
description = ''
......@@ -25,7 +25,7 @@ in {
};
services.private-storage.deployment.gridName = lib.mkOption {
type = lib.types.str;
example = lib.literalExample "staging";
example = "staging";
description = ''
The name of the grid configuration to use to update this deployment.
'';
......@@ -35,11 +35,11 @@ in {
config = {
# Configure the system to use our binary cache so that deployment updates
# only require downloading pre-built software, not building it ourselves.
nix = {
binaryCachePublicKeys = [
nix.settings = {
trusted-public-keys = [
"saxtons.private.storage:MplOcEH8G/6mRlhlKkbA8GdeFR3dhCFsSszrspE/ZwY="
];
binaryCaches = [
substituters = [
"http://saxtons.private.storage"
];
};
......
......@@ -8,14 +8,14 @@ in {
services.private-storage-issuer.package = lib.mkOption {
default = ourpkgs.zkapissuer;
type = lib.types.package;
example = lib.literalExample "pkgs.zkapissuer.components.exes.\"PaymentServer-exe\"";
example = lib.literalExpression "pkgs.zkapissuer.components.exes.\"PaymentServer-exe\"";
description = ''
The package to use for the ZKAP issuer.
'';
};
services.private-storage-issuer.domains = lib.mkOption {
type = lib.types.listOf lib.types.str;
example = lib.literalExample [ "payments.example.com" ];
example = [ "payments.example.com" ];
description = ''
The domain names at which the issuer is reachable.
'';
......@@ -32,12 +32,21 @@ in {
services.private-storage-issuer.issuer = lib.mkOption {
default = "Ristretto";
type = lib.types.enum [ "Trivial" "Ristretto" ];
example = lib.literalExample "Trivial";
example = "Trivial";
description = ''
The issuer algorithm to use. Either Trivial for a fake no-crypto
algorithm or Ristretto for Ristretto-flavored PrivacyPass.
'';
};
services.private-storage-issuer.tokensPerVoucher = lib.mkOption {
default = null;
type = lib.types.nullOr lib.types.int;
example = 50000;
description = ''
If not null, a value to pass to PaymentServer for
``--tokens-per-voucher``.
'';
};
services.private-storage-issuer.ristrettoSigningKeyPath = lib.mkOption {
default = null;
type = lib.types.path;
......@@ -53,6 +62,13 @@ in {
and payment management.
'';
};
services.private-storage-issuer.stripeWebhookSecretKeyPath = lib.mkOption {
type = lib.types.path;
description = ''
The path to a file containing a Stripe "webhook" secret key to use for
charge and payment management.
'';
};
services.private-storage-issuer.stripeEndpointDomain = lib.mkOption {
type = lib.types.str;
description = ''
......@@ -81,6 +97,15 @@ in {
The kind of voucher database to use.
'';
};
services.private-storage-issuer.databaseFileSystem = lib.mkOption {
# Logically, the type is the type of an entry in fileSystems - but we'll
# just let the type system enforce that when we pass the value on to
# fileSystems.
description = ''
Configuration for a filesystem to mount which will hold the issuer's
internal state database.
'';
};
services.private-storage-issuer.databasePath = lib.mkOption {
default = null;
type = lib.types.str;
......@@ -111,11 +136,26 @@ in {
# We'll refer to this collection of domains by the first domain in the
# list.
domain = builtins.head cfg.domains;
certServiceName = "acme-${domain}";
# Payment server internal http port (arbitrary, non-priviledged):
internalHttpPort = "1061";
# The "-vN" suffix indicates that this Nth incompatible version of on
# disk state as managed by this deployment system. This does not have
# anything to do with what's inside the PaymentServer-managed state.
# Instead it's about things like the type of filesystem used or options
# having to do with the backing volume behind the filesystem. In
# general I expect that to get from "-vN" to "-v(N+1)" some manual
# upgrade steps will be required.
stateDirectory = "zkapissuer-v2";
in lib.mkIf cfg.enable {
# Make sure the voucher database filesystem is mounted.
fileSystems = {
"zkapissuer-data" = cfg.databaseFileSystem // {
mountPoint = "/var/lib/${stateDirectory}";
};
};
# Add a systemd service to run PaymentServer.
systemd.services.zkapissuer = {
enable = true;
......@@ -138,15 +178,30 @@ in {
# Make systemd create a User/Group owned directory for PaymentServer
# state. According to the docs at
# https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RuntimeDirectory=
# "The specified directory names must be relative" ... this
# makes systemd create /var/lib/zkapissuer/ for us:
serviceConfig.StateDirectory = "zkapissuer";
# "The specified directory names must be relative" ... this makes
# systemd create this directory in /var/lib/ for us.
serviceConfig.StateDirectory = stateDirectory;
serviceConfig.StateDirectoryMode = "0750";
unitConfig.AssertPathExists = [
# Bail if there is still an old (root-owned) DB file on this system.
# If you hit this, and this /var/db/ file is indeed current, move it to
# /var/lib/zkapissuer/vouchers.sqlite3 and chown it to zkapissuer:zkapissuer.
unitConfig.AssertPathExists = "!/var/db/vouchers.sqlite3";
# If you hit this, and this /var/db/ file is indeed current, move it
# to /var/lib/zkapissuer/vouchers.sqlite3 and chown it to
# zkapissuer:zkapissuer.
"!/var/db/vouchers.sqlite3"
# Similarly, bail if the newer path you were just told to create --
# /var/lib/zkapissuer/vouchers.sqlite3 -- exists. It needs to be
# moved /var/lib/zkapissuer-v2 where a dedicated filesystem has been
# created for it.
"!/var/lib/zkapissuer/vouchers.sqlite3"
];
# Only start if the dedicated vouchers database filesystem is mounted so
# that we know we're going to find our vouchers database there (or that
# we will create it in the right place).
unitConfig.Requires = ["local-fs.target"];
unitConfig.After = ["local-fs.target"];
script =
let
......@@ -168,11 +223,14 @@ in {
stripeArgs =
"--stripe-key-path ${cfg.stripeSecretKeyPath} " +
"--stripe-webhook-key-path ${cfg.stripeWebhookSecretKeyPath} " +
"--stripe-endpoint-domain ${cfg.stripeEndpointDomain} " +
"--stripe-endpoint-scheme ${cfg.stripeEndpointScheme} " +
"--stripe-endpoint-port ${toString cfg.stripeEndpointPort}";
redemptionConfig = lib.optionalString (cfg.tokensPerVoucher != null) "--tokens-per-voucher ${builtins.toString cfg.tokensPerVoucher}";
in
"${cfg.package.exePath} ${originArgs} ${issuerArgs} ${databaseArgs} ${httpArgs} ${stripeArgs}";
"${cfg.package.exePath} ${originArgs} ${issuerArgs} ${databaseArgs} ${httpArgs} ${stripeArgs} ${redemptionConfig}";
};
# PaymentServer runs as this user and group by default
......@@ -189,14 +247,14 @@ in {
extraGroups = [ "keys" ];
};
# Open 80 and 443 for the certbot HTTP server and the PaymentServer HTTPS server.
# Open 80 and 443 for nginx
networking.firewall.allowedTCPPorts = [
80
443
];
# NGINX reverse proxy
security.acme.email = cfg.letsEncryptAdminEmail;
security.acme.defaults.email = cfg.letsEncryptAdminEmail;
security.acme.acceptTerms = true;
services.nginx = {
enable = true;
......@@ -215,6 +273,17 @@ in {
# we pass less scanning spam on to our backend
# Want a regex instead? try locations."~ /v\d+/"
proxyPass = "http://127.0.0.1:${internalHttpPort}";
# The redemption endpoint can intentionally delay its response for
# up to 600 seconds for a cheap kind of server-push when payment
# completes. Let that timeout control how long the connection stays
# open. PaymentServer does not accept configuration for that
# duration so we also hard-code it here.
#
# http://nginx.org/en/docs/http/ngx_http_proxy_module.html#proxy_read_timeout
extraConfig = ''
proxy_read_timeout 660;
'';
};
locations."/metrics" = {
# Only allow our monitoringvpn subnet
......
# Prometheus Blackbox exporter configuration
#
# Scope: From the monitoring machine, ping (etc.) hosts to check whether
# they are reachable, certs still are valid for a while, etc.
#
# Notes: The Blackbox exporter is using the "Multi Target Exporter" pattern,
# see https://prometheus.io/docs/guides/multi-target-exporter/ .
#
# Usage: Import this on a monitoring server
{ config, lib, pkgs, ... }: {
config.services.prometheus.exporters.blackbox = {
enable = true;
configFile = pkgs.writeText "blackbox-exporter.yaml" (builtins.toJSON {
modules = {
https_2xx = {
prober = "http";
timeout = "5s";
http = {
fail_if_not_ssl = true;
# This prober is for IPv4 only.
preferred_ip_protocol = "ip4";
ip_protocol_fallback = false;
};
};
};
});
};
}
# MegaCli to Prometheus text format exporter
#
# Scope: Gets data from MegaRAID compatible storage controllers and mogrifies
# to Prometheus text format, saves to a temp file, to later be scraped
# by the node exporter.
#
# Usage: Import this to every server with a MegaRAID card that you want to
# include in the central monitoring system
#
# See https://nixos.org/manual/nixos/stable/#module-services-prometheus-exporters
{ config, options, lib, ourpkgs, pkgs, ... }:
let
cfg = config.services.private-storage.monitoring.exporters.megacli2prom;
in {
options.services.private-storage.monitoring.exporters.megacli2prom = {
enable = lib.mkEnableOption "MegaCli2Prom metrics gathering service";
outFile = lib.mkOption {
type = lib.types.str;
description = "Where to store the temporary file for node exporter to scrape?";
default = "/run/prometheus-node-exporter/megacli.prom";
};
interval = lib.mkOption {
type = lib.types.str;
description = ''
How often to do it?
See https://www.freedesktop.org/software/systemd/man/systemd.time.html#Calendar%20Events
'';
# Every five minutes.
default = "*:0/5";
};
};
config =
lib.mkIf cfg.enable {
environment.systemPackages = [ ourpkgs.megacli2prom pkgs.megacli ];
systemd.services.megacli2prom = {
enable = true;
description = "MegaCli2Prom metrics gathering service";
startAt = cfg.interval;
path = [ pkgs.megacli ];
# Save to a temp file and then move atomically so the
# textfile collector won't read a partial file.
# See https://github.com/prometheus/node_exporter#textfile-collector
script = ''
"${ourpkgs.megacli2prom}/bin/megacli2prom" > "${cfg.outFile}.tmp"
mv "${cfg.outFile}.tmp" "${cfg.outFile}"
'';
};
};
}
......@@ -6,15 +6,25 @@
# monitoring system
# See https://nixos.org/manual/nixos/stable/#module-services-prometheus-exporters
{ config, lib, pkgs, ... }:
{ config, lib, pkgs, options, ... }:
with lib;
let
cfg = config.services.private-storage.monitoring.exporters.node;
mountsFileSystemType = fsType: {} != filterAttrs (n: v: v.fsType == fsType) config.fileSystems;
in {
config.services.prometheus.exporters.node = {
options.services.private-storage.monitoring.exporters.node = {
enable = lib.mkEnableOption "Base system metrics collection";
textfiles-directory = lib.mkOption {
type = lib.types.str;
description = "Directory used by the textfiles collector.";
default = "/run/prometheus-node-exporter";
};
};
config.services.prometheus.exporters.node = lib.mkIf cfg.enable {
enable = true;
openFirewall = true;
firewallFilter = "-i monitoringvpn -p tcp -m tcp --dport 9100";
......@@ -22,7 +32,7 @@ in {
# extraFlags = [ "--collector.disable-defaults" ]; # not in nixpkgs 19.09
# Thanks https://github.com/mayflower/nixexprs/blob/master/modules/monitoring/default.nix
enabledCollectors = [
"arp"
# "arp" # is broken in 1.7.0 (2024-02-07)
"bcache"
"conntrack"
"filefd"
......@@ -30,16 +40,16 @@ in {
"netclass"
"netdev"
"netstat"
#"rapl" # not in nixpkgs 19.09
"rapl"
"sockstat"
#"softnet" # not in nixpkgs 19.09
"softnet"
"stat"
"systemd"
# "textfile"
# "textfile.directory /run/prometheus-node-exporter"
#"thermal_zone" # not in nixpkgs 19.09
"textfile"
"textfile.directory ${cfg.textfiles-directory}"
"thermal_zone"
"time"
#"udp_queues" # not in nixpkgs 19.09
"udp_queues"
"uname"
"vmstat"
] ++ optionals (!config.boot.isContainer) [
......@@ -59,7 +69,7 @@ in {
] ++ (
optionals (config.services.nfs.server.enable) [ "nfsd" ]
) ++ (
optionals ("" != config.boot.initrd.mdadmConf) [ "mdadm" ]
optionals ("" != config.boot.swraid.mdadmConf) [ "mdadm" ]
) ++ (
optionals ({} != config.networking.bonds) [ "bonding" ]
) ++ (
......@@ -67,7 +77,7 @@ in {
) ++ (
optionals (mountsFileSystemType "xfs") [ "xfs" ]
) ++ (
optionals (mountsFileSystemType "zfs" || elem "zfs" config.boot.supportedFilesystems) [ "zfs" ]
optionals (mountsFileSystemType "zfs" || config.boot.supportedFilesystems.zfs or false) [ "zfs" ]
);
};
}
......
# Promtail log forwarder configuration
#
# Scope: Tail logs on the local system and send them to Loki
#
# Description: This is not strictly an "exporter" like the Prometheus
# exporters, but it is very similar in what it is doing -
# preparing local data and sending it off to a TSDB.
{ config, options, lib, ... }:
let
cfg = config.services.private-storage.monitoring.exporters.promtail;
hostName = config.networking.hostName;
logRetention = toString(config.services.private-storage.monitoring.policy.logRetentionSeconds) + "s";
in {
options.services.private-storage.monitoring.exporters.promtail = {
enable = lib.mkEnableOption "Promtail log exporter service";
lokiUrl = lib.mkOption {
type = lib.types.str;
description = ''
The server URL that logs should be pushed to.
'';
# Resolving names is hard, let's have breakfast
# If you are curious why there's a plain IP address in here, read all of
# https://whetstone.private.storage/privatestorage/PrivateStorageio/-/merge_requests/251
# https://whetstone.private.storage/privatestorage/PrivateStorageio/-/merge_requests/257
# https://whetstone.private.storage/privatestorage/PrivateStorageio/-/merge_requests/258
default = "http://172.23.23.1:3100/loki/api/v1/push";
};
};
config = lib.mkIf cfg.enable {
services.promtail.enable = true;
networking.firewall.interfaces.monitoringvpn.allowedTCPPorts = [ 9080 ];
services.journald.extraConfig = ''
# This tells journald it can discard log files that contain only log
# entries older than...
MaxRetentionSec=${logRetention}
# This tells journald to start a new log file once a day. Together with
# the MaxRetentionSec setting, this means that entries are kept for
# up to a full day longer than MaxRetentionSec.
#
# https://www.freedesktop.org/software/systemd/man/journald.conf.html
# for further details about these options.
#
MaxFileSec=1day
# This asks journald to not use more than 500M of disk space. Due to
# journald's characteristics this might only be a week of logs, but that
# should be okay since we ship all logs to a central server that keeps
# them for a while longer.
SystemMaxUse=500M
'';
services.promtail.configuration = {
server = {
http_listen_port = 9080; # Using /metrics for health check
grpc_listen_address = "127.0.0.1"; # unused, but no option to turn it off.
grpc_listen_port = 9094; # unused, but no option to turn it off.
};
clients = [{
url = cfg.lokiUrl;
}];
scrape_configs = [{
job_name = "systemd-journal";
journal = {
labels = {
job = "systemd-journal";
host = hostName;
};
};
# The journal has many internal labels, that by default will
# be dropped because of their "__" prefix. To keep them, rename them.
# https://grafana.com/docs/loki/latest/clients/promtail/scraping/#journal-scraping-linux-only
# https://www.freedesktop.org/software/systemd/man/systemd.journal-fields.html
relabel_configs = [{
source_labels = [ "__journal__systemd_unit" ];
target_label = "unit";
}];
}];
};
};
}
# Tahoe Prometheus metrics collector
#
# Scope: Retrieve metrics from Tahoe and put them where Prometheus'
# node-exporter's textfile collector can find them.
#
# Usage: Import this to every server running Tahoe.
#
# See https://nixos.org/manual/nixos/stable/#module-services-prometheus-exporters
{ config, options, lib, pkgs, ... }:
let
cfg = config.services.private-storage.monitoring.exporters.tahoe;
inherit (config.services.private-storage.monitoring.exporters.node) textfiles-directory;
in {
options.services.private-storage.monitoring.exporters.tahoe = {
enable = lib.mkEnableOption "Tahoe OpenMetrics collecting service";
scrapeEndpoint = lib.mkOption {
type = lib.types.str;
description = "Where to get our metrics from?";
default = "http://localhost:3456/statistics?t=openmetrics";
};
outFile = lib.mkOption {
type = lib.types.str;
description = "Where to store the temporary file for node exporter to scrape?";
default = "${textfiles-directory}/tahoe.prom";
};
interval = lib.mkOption {
type = lib.types.str;
description = ''
How often to do it?
See https://www.freedesktop.org/software/systemd/man/systemd.time.html#Calendar%20Events
'';
# Every five minutes.
default = "*:0/5";
};
};
config =
lib.mkIf cfg.enable {
assertions = [
{
assertion = config.services.private-storage.monitoring.exporters.node.enable;
message = ''
services.private-storage.monitoring.tahoe requires services.private-storage.monitoring.exporters.node to provide the textfile prometheus collector.
'';
}
];
environment.systemPackages = [ pkgs.curl ];
systemd.services.tahoe-metrics-collector = {
enable = true;
description = "Tahoe metrics gathering service";
after = [ "tahoe.storage.service" ];
startAt = cfg.interval;
path = [ pkgs.coreutils pkgs.findutils pkgs.curl ];
restartIfChanged = false;
# Save to a temp file and then move atomically so the
# textfile collector won't read a partial file.
# See https://github.com/prometheus/node_exporter#textfile-collector
script = ''
set -euo pipefail
NUM_CORRUPTION_ADVISORIES=$(find /storage/corruption-advisories/ -type f | wc -l)
echo "tahoe_corruption_advisories_total $NUM_CORRUPTION_ADVISORIES" > "${cfg.outFile}.tmp"
NUM_INCIDENT_REPORTS=$(find /var/db/tahoe-lafs/storage/logs/incidents/ -type f | wc -l)
echo "tahoe_incident_reports_total $NUM_INCIDENT_REPORTS" >> "${cfg.outFile}.tmp"
curl --silent --show-error --fail-with-body "${cfg.scrapeEndpoint}" >> "${cfg.outFile}.tmp"
mv "${cfg.outFile}.tmp" "${cfg.outFile}"
'';
};
systemd.timers.tahoe-metrics-collector = {
after = [ "tahoe.storage.service" ];
};
};
}
# Codify our log data retention policy
#
# A maximum retention of 30 days conforms to the published log retention policy,
# see https://private.storage/privacy-policy/ .
{ options, lib, ... }: {
options.services.private-storage.monitoring.policy = {
logRetentionSeconds = lib.mkOption {
type = lib.types.int;
description = "How long do we retain logs (seconds)";
default = 29 * (24 * 60 * 60); # 29 days, to accomodate for the journald log rotation (1 day).
};
};
}
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "Is our data safe?",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"links": [],
"liveNow": false,
"panels": [
{
"collapsed": false,
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 44,
"panels": [],
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"refId": "A"
}
],
"title": "Customer ciphertext backup to Borgbase.com",
"type": "row"
},
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
0
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"Failed jobs",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "5m",
"frequency": "1m",
"handler": 1,
"name": "Daily backup jobs state alert",
"noDataState": "no_data",
"notifications": []
},
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "Daily backup job systemd timer unit state",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 60,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "stepAfter",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Failed"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "semi-dark-red",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 5,
"w": 12,
"x": 0,
"y": 1
},
"id": 46,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.4.7",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "sum(node_systemd_unit_state{name=\"borgbackup-job-daily.timer\", state=~\"active\"})",
"hide": false,
"interval": "",
"legendFormat": "Active",
"refId": "Active timers"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "sum(node_systemd_unit_state{name=\"borgbackup-job-daily.service\", state=\"failed\"})",
"hide": false,
"interval": "",
"legendFormat": "Failed",
"refId": "Failed jobs"
}
],
"thresholds": [
{
"colorMode": "critical",
"op": "gt",
"value": 0,
"visible": true
}
],
"title": "Daily backup timer state",
"type": "timeseries"
},
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
0
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"Failed jobs",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "5m",
"frequency": "1m",
"handler": 1,
"name": "Monthly check-repo timer state alert",
"noDataState": "no_data",
"notifications": []
},
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "Monthly check-repo systemd timer unit state",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 60,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "stepAfter",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Failed"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "semi-dark-red",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 5,
"w": 12,
"x": 12,
"y": 1
},
"id": 47,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.4.7",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "sum(node_systemd_unit_state{name=\"borgbackup-check-repo.timer\", state=\"active\"})",
"hide": false,
"interval": "",
"legendFormat": "Active",
"refId": "Active timers"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "sum(node_systemd_unit_state{name=\"borgbackup-check-repo.service\", state=\"failed\"})",
"hide": false,
"interval": "",
"legendFormat": "Failed",
"refId": "Failed jobs"
}
],
"thresholds": [
{
"colorMode": "critical",
"op": "gt",
"value": 0,
"visible": true
}
],
"title": "Monthly check-repo timer state",
"type": "timeseries"
},
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
-90000
],
"type": "lt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"executionErrorState": "keep_state",
"for": "5m",
"frequency": "1m",
"handler": 1,
"message": "Borgbase.com daily backup job trigger.",
"name": "Daily backup to Borgbase Trigger alert",
"noDataState": "no_data",
"notifications": []
},
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "This shows the last triggering of the borgbackup-job-daily.timer systemd unit.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "area"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "semi-dark-green",
"value": -90000
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 12,
"x": 0,
"y": 6
},
"id": 41,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": false
},
"tooltip": {
"mode": "multi",
"sort": "asc"
}
},
"pluginVersion": "8.3.5",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": false,
"expr": "node_systemd_timer_last_trigger_seconds{name=\"borgbackup-job-daily.timer\"} - time()",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"thresholds": [
{
"colorMode": "critical",
"op": "lt",
"value": -90000,
"visible": true
}
],
"title": "Daily backup job trigger",
"type": "timeseries"
},
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
-2764800
],
"type": "lt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A"
]
},
"reducer": {
"type": "last"
}
},
{
"evaluator": {
"params": [
-30758400
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"executionErrorState": "keep_state",
"for": "5m",
"frequency": "1m",
"handler": 1,
"message": "Borgbase.com monthly check-repo trigger.",
"name": "Monthly check of Borgbase backup Trigger alert",
"noDataState": "no_data",
"notifications": []
},
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "This shows the last triggering of the borgbackup-job-daily.timer systemd unit.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "area"
}
},
"mappings": [],
"max": 0,
"min": -2864800,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "semi-dark-green",
"value": -2764800
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 12,
"x": 12,
"y": 6
},
"id": 42,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": false
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.3.5",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": false,
"expr": "node_systemd_timer_last_trigger_seconds{name=\"borgbackup-check-repo.timer\"} - time()",
"interval": "",
"intervalFactor": 4,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"thresholds": [
{
"colorMode": "critical",
"op": "lt",
"value": -2764800,
"visible": true
}
],
"title": "Monthly check-repo trigger",
"type": "timeseries"
},
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
9000
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"JobRunTime",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "5m",
"frequency": "1m",
"handler": 1,
"message": "A backup job ran for more than 2 ½ hours. After 3 hours it could run into the check-repo job start time, depending on its \"random\" job delay.",
"name": "Daily backup job run time alert",
"noDataState": "no_data",
"notifications": []
},
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "When was the systemd unit active?",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "left",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 60,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "stepAfter",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 0,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "#EAB839",
"value": 7200
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 12,
"x": 0,
"y": 11
},
"id": 52,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "abs((node_systemd_timer_last_trigger_seconds{name=\"borgbackup-job-daily.timer\"} - time())) * on (instance) node_systemd_unit_state{name=\"borgbackup-job-daily.service\", state=\"active\"}",
"interval": "",
"legendFormat": "{{instance}}",
"refId": "JobRunTime"
}
],
"thresholds": [
{
"colorMode": "critical",
"op": "gt",
"value": 9000,
"visible": true
}
],
"title": "Daily backup job run time",
"type": "timeseries"
},
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
18000
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"JobRunTime",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "5m",
"frequency": "1m",
"handler": 1,
"message": "A borg check-repo job ran for more than five hours. After six hours it could collide with the daily backup job, depending on that job's \"random\" delay. If the backup set is large and this is expected to happen again, consider using borgbackup partial checks (--max-duration SECONDS parameter).",
"name": "Monthly check-repo run time alert",
"noDataState": "no_data",
"notifications": []
},
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "When was the systemd unit active?",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "left",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 60,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "stepAfter",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 0,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "#EAB839",
"value": 15000
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 12,
"x": 12,
"y": 11
},
"id": 53,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "abs((node_systemd_timer_last_trigger_seconds{name=\"borgbackup-check-repo.timer\"} - time())) * on (instance) node_systemd_unit_state{name=\"borgbackup-check-repo.service\", state=\"active\"}",
"interval": "",
"legendFormat": "{{instance}}",
"refId": "JobRunTime"
}
],
"thresholds": [
{
"colorMode": "critical",
"op": "gt",
"value": 18000,
"visible": true
}
],
"title": "Monthly check-repo run time",
"type": "timeseries"
},
{
"datasource": {
"type": "loki",
"uid": "LocalLoki"
},
"description": "The \"duration\" that borgbackup status reports.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"fillOpacity": 60,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineWidth": 1,
"scaleDistribution": {
"type": "linear"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "#EAB839",
"value": 3600
},
{
"color": "red",
"value": 10800
}
]
},
"unit": "dtdurations"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 16
},
"id": 49,
"options": {
"barRadius": 0,
"barWidth": 0.1,
"fullHighlight": false,
"groupWidth": 0.7,
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": false
},
"orientation": "auto",
"showValue": "auto",
"stacking": "none",
"tooltip": {
"mode": "single",
"sort": "none"
},
"xField": "host",
"xTickLabelRotation": -45,
"xTickLabelSpacing": 0
},
"pluginVersion": "8.4.7",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "LocalLoki"
},
"expr": "{unit=\"borgbackup-job-daily.service\"} |= \"duration\" | pattern \"<_>\\\"duration\\\": <duration>,\"",
"legendFormat": "{{host}}",
"queryType": "range",
"refId": "A"
}
],
"title": "Daily backup job run time (as reported by borg)",
"transformations": [
{
"id": "extractFields",
"options": {
"source": "labels"
}
},
{
"id": "convertFieldType",
"options": {
"conversions": [
{
"destinationType": "number",
"targetField": "duration"
}
],
"fields": {}
}
}
],
"type": "barchart"
},
{
"datasource": {
"type": "loki",
"uid": "LocalLoki"
},
"description": "\"compressed_size\" is size of last archive, \"unique_csize\" is deduplicated size of all archives.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"fillOpacity": 80,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineWidth": 1,
"scaleDistribution": {
"type": "linear"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 16
},
"id": 55,
"options": {
"barRadius": 0,
"barWidth": 0.97,
"fullHighlight": false,
"groupWidth": 0.7,
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": false
},
"orientation": "vertical",
"showValue": "auto",
"stacking": "none",
"tooltip": {
"mode": "single",
"sort": "none"
},
"xField": "host",
"xTickLabelRotation": -45,
"xTickLabelSpacing": 0
},
"pluginVersion": "8.4.7",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "LocalLoki"
},
"editorMode": "code",
"expr": "{unit=\"borgbackup-job-daily.service\"} |= \"compressed_size\" | pattern \"<_>\\\"compressed_size\\\": <compressed_size>,\"",
"hide": false,
"legendFormat": "{{host}} archive",
"queryType": "range",
"refId": "This archive size in bytes"
},
{
"datasource": {
"type": "loki",
"uid": "LocalLoki"
},
"editorMode": "code",
"expr": "{unit=\"borgbackup-job-daily.service\"} |= \"unique_csize\" | pattern \"<_>\\\"unique_csize\\\": <unique_csize>,\"",
"hide": false,
"legendFormat": "{{host}} all archives",
"queryType": "range",
"refId": "All archives deduplicated size"
}
],
"title": "Backup set size",
"transformations": [
{
"id": "extractFields",
"options": {
"source": "labels"
}
},
{
"id": "convertFieldType",
"options": {
"conversions": [
{
"destinationType": "number",
"targetField": "unique_csize"
},
{
"destinationType": "number",
"targetField": "compressed_size"
}
],
"fields": {}
}
}
],
"type": "barchart"
}
],
"refresh": "5m",
"schemaVersion": 38,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-7d",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Backups",
"uid": "backups",
"version": 1,
"weekStart": ""
}
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "Watching the watchers",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"links": [],
"liveNow": false,
"panels": [
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
0
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "count"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "10m",
"frequency": "1m",
"handler": 1,
"name": "Scraping down",
"noDataState": "ok",
"notifications": []
},
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "Is Prometheus having problems scraping our instances? Should be zero.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "bars",
"fillOpacity": 100,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "line"
}
},
"decimals": 0,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "transparent",
"value": null
},
{
"color": "red",
"value": 0
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 6,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "10.4.6",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "count by (job, instance) (up == 0)",
"hide": false,
"interval": "",
"legendFormat": "{{job}}/{{instance}}",
"refId": "A"
}
],
"title": "Scraping failures",
"type": "timeseries"
},
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
600
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "5m",
"frequency": "1m",
"handler": 1,
"message": "A metrics text file is older than 10 minutes.",
"name": "Textcollector staleness alert",
"noDataState": "no_data",
"notifications": []
},
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "Node-Exporter's TextCollector reads in plain text files containing metrics every few minutes. Make sure we're not reporting stale text files as new data - Alert if any of the text files is not getting updated.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 8,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "time() - node_textfile_mtime_seconds",
"interval": "",
"legendFormat": "{{instance}}/{{file}}",
"refId": "A"
}
],
"thresholds": [
{
"colorMode": "critical",
"op": "gt",
"value": 600,
"visible": true
}
],
"title": "Textfile collector freshness",
"type": "timeseries"
}
],
"refresh": false,
"schemaVersion": 39,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-30m",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Meta monitoring",
"uid": "MetaMonitoring",
"version": 1,
"weekStart": ""
}
Source diff could not be displayed: it is too large. Options to address this: view the blob.
......@@ -8,19 +8,25 @@
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "PaymentServer and related metrics",
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"fiscalYearStartMonth": 0,
"graphTooltip": 2,
"links": [],
"liveNow": false,
"panels": [
{
"collapsed": false,
"datasource": null,
"gridPos": {
"h": 1,
"w": 24,
......@@ -33,173 +39,252 @@
"type": "row"
},
{
"aliasColors": {
"Attempts": "yellow",
"Successes": "green"
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "Our calls to the Stripe API: Attempted and successful credit card charges.",
"description": "Calls to our Stripe Webhook and the (obsolete) Charge endpoint.",
"fieldConfig": {
"defaults": {},
"overrides": []
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Attempts"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "yellow",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "Successes"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "green",
"mode": "fixed"
}
}
]
}
]
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 1
},
"hiddenSeries": false,
"id": 22,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.7",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.3.4",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "sum (http_responses_total{path=\"v1/stripe/webhook\"})",
"hide": false,
"interval": "",
"legendFormat": "Webhook attempts",
"refId": "Webhook attempts"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "http_responses_total{path=\"v1/stripe/webhook\", status=\"2XX\"}",
"hide": false,
"interval": "",
"legendFormat": "Webhook successes",
"refId": "Webhook successes"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "payment_processors_stripe_charge_attempts",
"hide": false,
"interval": "",
"legendFormat": "Attempts",
"refId": "B"
"legendFormat": "Charge attempts",
"refId": "Charge attempts"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "payment_processors_stripe_charge_successes",
"hide": false,
"interval": "",
"legendFormat": "Successes",
"refId": "C"
"legendFormat": "Charge successes",
"refId": "Charge successes"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Stripe",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:350",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"$$hashKey": "object:351",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
"title": "Stripe Webhook and (obsolete) Charge API",
"type": "timeseries"
},
{
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
],
"yaxis": {
"align": false,
"alignLevel": null
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"aliasColors": {
"Redeemed vouchers": "yellow"
"matcher": {
"id": "byType",
"options": "time"
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "",
"fieldConfig": {
"defaults": {},
"overrides": []
"properties": [
{
"id": "custom.axisPlacement",
"value": "hidden"
}
]
}
]
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 1
},
"hiddenSeries": false,
"id": 20,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.7",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"$$hashKey": "object:223",
"alias": "Redeemed vouchers",
"yaxis": 1
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
{
"$$hashKey": "object:230",
"alias": "Issued signatures",
"yaxis": 2
"tooltip": {
"mode": "multi",
"sort": "none"
}
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
},
"pluginVersion": "8.4.7",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "payment_redemption_signatures_issued",
"interval": "",
......@@ -207,6 +292,10 @@
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "payment_redemption_vouchers_redeemed",
"format": "time_series",
......@@ -216,58 +305,423 @@
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Redemption",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 8
},
"id": 32,
"panels": [],
"title": "HTTP v1/stripe/webhook",
"type": "row"
},
{
"description": "HTTPS responses per second",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 8,
"x": 0,
"y": 9
},
"id": 33,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.3.4",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(http_responses_total{path=\"v1/stripe/webhook\"}[5m])",
"instant": false,
"interval": "",
"legendFormat": "{{status}}",
"refId": "A"
}
],
"title": "Requests per second",
"type": "timeseries"
},
{
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 8,
"x": 8,
"y": 9
},
"id": 34,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.3.4",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "sum(http_responses_total{path=\"v1/stripe/webhook\", status=\"4XX\"}) / sum(http_responses_total{path=\"v1/stripe/webhook\"})",
"interval": "",
"legendFormat": "Client error (4XX) rate",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "sum(http_responses_total{path=\"v1/stripe/charge\", status=\"5XX\"}) / sum(http_responses_total{path=\"v1/stripe/webhook\"})",
"interval": "",
"legendFormat": "Server error (5XX) rate",
"refId": "B"
}
],
"title": "Error rate",
"type": "timeseries"
},
{
"description": "Request durations, stacked.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "=< 0.1s"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "blue",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "=< 1s"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "green",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "=< 5s"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "yellow",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "> 5s"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "orange",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"y": 9
},
"id": 13,
"links": [],
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:285",
"format": "short",
"label": "",
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"$$hashKey": "object:286",
"decimals": null,
"format": "short",
"label": "",
"logBase": 1,
"max": null,
"min": "0",
"show": true
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.3.4",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/webhook\", le=\"0.1\"}",
"format": "time_series",
"hide": false,
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "=< 0.1s",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/webhook\", le=\"1.0\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/stripe/webhook\", le=\"0.1\"}",
"format": "time_series",
"hide": false,
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "=< 1s",
"refId": "D"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/webhook\", le=\"5.0\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/stripe/webhook\", le=\"1.0\"}",
"format": "time_series",
"hide": false,
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "=< 5s",
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/webhook\", le=\"+Inf\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/stripe/webhook\", le=\"5.0\"}",
"format": "time_series",
"hide": false,
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "> 5s",
"refId": "C"
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
"title": "Durations",
"type": "timeseries"
},
{
"collapsed": false,
"datasource": null,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 8
"y": 16
},
"id": 18,
"panels": [],
......@@ -275,52 +729,79 @@
"type": "row"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "HTTPS responses per second",
"fieldConfig": {
"defaults": {
"links": []
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 0,
"y": 9
"y": 17
},
"hiddenSeries": false,
"id": 4,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.7",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.3.4",
"targets": [
{
"exemplar": true,
......@@ -331,96 +812,84 @@
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Requests per second",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:452",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"$$hashKey": "object:453",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
"type": "timeseries"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "",
"fieldConfig": {
"defaults": {
"links": []
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 8,
"y": 9
"y": 17
},
"hiddenSeries": false,
"id": 15,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.7",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.3.4",
"targets": [
{
"exemplar": true,
......@@ -437,103 +906,145 @@
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Error rate",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:576",
"format": "percentunit",
"label": null,
"logBase": 1,
"max": "1",
"min": "0",
"show": true
},
{
"$$hashKey": "object:577",
"format": "percent",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
"type": "timeseries"
},
{
"aliasColors": {
"=< 0.1s": "blue",
"=< 1s": "green",
"=< 5s": "yellow",
"> 5s": "orange"
},
"bars": false,
"cacheTimeout": null,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "Request durations, stacked",
"fieldConfig": {
"defaults": {
"links": []
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"overrides": []
"links": [],
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "=< 0.1s"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "blue",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "=< 1s"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "green",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "=< 5s"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "yellow",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "> 5s"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "orange",
"mode": "fixed"
}
}
]
}
]
},
"fill": 2,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"y": 9
"y": 17
},
"hiddenSeries": false,
"id": 12,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.7",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.3.4",
"targets": [
{
"exemplar": true,
......@@ -580,57 +1091,16 @@
"refId": "C"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Durations",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:625",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"$$hashKey": "object:626",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
"type": "timeseries"
},
{
"collapsed": false,
"datasource": null,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 16
"y": 24
},
"id": 11,
"panels": [],
......@@ -638,52 +1108,79 @@
"type": "row"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "HTTPS responses per second",
"fieldConfig": {
"defaults": {
"links": []
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 0,
"y": 17
"y": 25
},
"hiddenSeries": false,
"id": 2,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.7",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.3.4",
"targets": [
{
"expr": "rate(http_responses_total{path=\"v1/redeem\"}[5m])",
......@@ -693,95 +1190,87 @@
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Requests per second",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:751",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"$$hashKey": "object:752",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
"type": "timeseries"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "HTTP 4xx and 5xx errors",
"fieldConfig": {
"defaults": {
"links": []
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 8,
"y": 17
"y": 25
},
"hiddenSeries": false,
"id": 16,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.7",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.3.4",
"targets": [
{
"expr": "sum(http_responses_total{path=\"v1/redeem\", status=\"4XX\"}) / sum(http_responses_total{path=\"v1/redeem\"})",
......@@ -794,107 +1283,149 @@
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Error rate",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:804",
"format": "percentunit",
"label": null,
"logBase": 1,
"max": "1",
"min": "0",
"show": true
},
{
"$$hashKey": "object:805",
"format": "percent",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
"type": "timeseries"
},
{
"description": "Request durations, stacked",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
],
"yaxis": {
"align": false,
"alignLevel": null
},
"links": [],
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"aliasColors": {
"=< 0.1s": "blue",
"=< 1s": "green",
"=< 5s": "yellow",
"> 5s": "orange"
"matcher": {
"id": "byName",
"options": "=< 0.1s"
},
"bars": false,
"cacheTimeout": null,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "Request durations, stacked.",
"fieldConfig": {
"defaults": {
"links": []
"properties": [
{
"id": "color",
"value": {
"fixedColor": "blue",
"mode": "fixed"
}
}
]
},
"overrides": []
{
"matcher": {
"id": "byName",
"options": "=< 1s"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "green",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "=< 5s"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "yellow",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "> 5s"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "orange",
"mode": "fixed"
}
}
]
}
]
},
"fill": 2,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"y": 17
"y": 25
},
"hiddenSeries": false,
"id": 13,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"id": 35,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.7",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.3.4",
"targets": [
{
"exemplar": true,
"expr": "http_request_duration_seconds_bucket{path=\"v1/redeem\", le=\"0.1\"}",
"expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"0.1\"}",
"format": "time_series",
"hide": false,
"instant": false,
......@@ -905,7 +1436,7 @@
},
{
"exemplar": true,
"expr": "http_request_duration_seconds_bucket{path=\"v1/redeem\", le=\"1.0\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/redeem\", le=\"0.1\"}",
"expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"1.0\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"0.1\"}",
"format": "time_series",
"hide": false,
"instant": false,
......@@ -916,7 +1447,7 @@
},
{
"exemplar": true,
"expr": "http_request_duration_seconds_bucket{path=\"v1/redeem\", le=\"5.0\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/redeem\", le=\"1.0\"}",
"expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"5.0\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"1.0\"}",
"format": "time_series",
"hide": false,
"instant": false,
......@@ -927,7 +1458,7 @@
},
{
"exemplar": true,
"expr": "http_request_duration_seconds_bucket{path=\"v1/redeem\", le=\"+Inf\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/redeem\", le=\"5.0\"}",
"expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"+Inf\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"5.0\"}",
"format": "time_series",
"hide": false,
"instant": false,
......@@ -937,59 +1468,161 @@
"refId": "C"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Durations",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 32
},
"id": 26,
"panels": [],
"title": "Logs",
"type": "row"
},
{
"datasource": {
"type": "loki",
"uid": "LocalLoki"
},
"description": "Exercise in counting maybe interesting lines. This can be alerted on.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 33
},
"id": 28,
"options": {
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:853",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"$$hashKey": "object:854",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "loki",
"uid": "LocalLoki"
},
"expr": "count_over_time({host=\"payments\",unit=\"zkapissuer.service\"} !~ \"200|/metrics|Accept\" [5m])",
"refId": "A"
}
],
"yaxis": {
"align": false,
"alignLevel": null
"thresholds": [],
"title": "Number of maybe interesting log lines",
"type": "timeseries"
},
{
"datasource": {
"type": "loki",
"uid": "LocalLoki"
},
"description": "Exercise in filtering the payment server logs.",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 33
},
"id": 30,
"options": {
"dedupStrategy": "none",
"enableLogDetails": true,
"prettifyLogMessage": false,
"showCommonLabels": false,
"showLabels": false,
"showTime": true,
"sortOrder": "Descending",
"wrapLogMessage": false
},
"targets": [
{
"datasource": {
"type": "loki",
"uid": "LocalLoki"
},
"expr": "{host=\"payments\",unit=\"zkapissuer.service\"} !~ \"200|/metrics|Accept\"",
"refId": "A"
}
],
"title": "Maybe interesting lines",
"type": "logs"
}
],
"refresh": "5m",
"schemaVersion": 27,
"schemaVersion": 35,
"style": "dark",
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-3h",
"from": "now-7d",
"to": "now"
},
"timepicker": {
......@@ -1009,5 +1642,6 @@
"timezone": "",
"title": "Payments",
"uid": "Payments",
"version": 1
"version": 1,
"weekStart": ""
}
......@@ -8,19 +8,26 @@
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "USE: Usage, Saturation and Error rate for our resources",
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": 125,
"links": [],
"liveNow": false,
"panels": [
{
"collapsed": false,
"datasource": null,
"gridPos": {
"h": 1,
"w": 24,
......@@ -33,101 +40,91 @@
"type": "row"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "Some of our software runs in a single thread, so this shows max CPU per core (instead of averaged over all cores)",
"fieldConfig": {
"defaults": {
"links": []
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"w": 6,
"x": 0,
"y": 1
},
"hiddenSeries": false,
"id": 28,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.10",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.3.5",
"targets": [
{
"expr": "1 - (max by (instance) (irate(node_cpu_seconds_total{mode=\"idle\"}[5m])))",
"expr": "1 - (min by (instance) (irate(node_cpu_seconds_total{mode=\"idle\"}[5m])))",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Max CPU % per core per node",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "percentunit",
"label": null,
"logBase": 1,
"max": "1",
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
"type": "timeseries"
},
{
"alert": {
......@@ -158,22 +155,64 @@
}
],
"executionErrorState": "alerting",
"for": "5m",
"for": "2h",
"frequency": "1m",
"handler": 1,
"name": "15 min load average alert",
"noDataState": "no_data",
"notifications": []
},
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "area"
}
},
"displayName": "${__field.labels.instance}",
"links": []
"links": [],
"mappings": [],
"max": 1,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "transparent",
"value": null
},
{
"color": "red",
"value": 1
}
]
},
"unit": "short"
},
"overrides": [
{
......@@ -189,40 +228,25 @@
}
]
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 8,
"w": 6,
"x": 6,
"y": 1
},
"hiddenSeries": false,
"id": 6,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.10",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.3.5",
"targets": [
{
"expr": "node_load15",
......@@ -232,55 +256,8 @@
"refId": "A"
}
],
"thresholds": [
{
"colorMode": "critical",
"fill": true,
"line": true,
"op": "gt",
"value": 1,
"visible": true
}
],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "15 min load average",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": "1",
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
"type": "timeseries"
},
{
"alert": {
......@@ -298,7 +275,30 @@
},
"query": {
"params": [
"A",
"Hosts without ZFS",
"15m",
"now"
]
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
},
{
"evaluator": {
"params": [
0.8
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"Hosts with ZFS",
"15m",
"now"
]
......@@ -318,116 +318,253 @@
"noDataState": "no_data",
"notifications": []
},
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"datasource": {},
"description": "How much RAM is in use? Relative to available system memory.",
"fieldConfig": {
"defaults": {
"links": []
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "area"
}
},
"links": [],
"mappings": [],
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "transparent",
"value": null
},
{
"color": "red",
"value": 0.8
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"w": 6,
"x": 12,
"y": 1
},
"hiddenSeries": false,
"id": 2,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.10",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.3.5",
"targets": [
{
"expr": "1 - (node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes) / node_memory_MemTotal_bytes\r\n",
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes unless node_zfs_arc_size",
"hide": false,
"instant": false,
"interval": "",
"intervalFactor": 4,
"legendFormat": "{{instance}}",
"refId": "A"
"refId": "Hosts without ZFS"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "1 - (node_memory_MemAvailable_bytes + node_zfs_arc_size) / node_memory_MemTotal_bytes",
"hide": false,
"instant": false,
"interval": "",
"legendFormat": "{{instance}}",
"refId": "Hosts with ZFS"
}
],
"thresholds": [
{
"colorMode": "critical",
"fill": true,
"line": true,
"op": "gt",
"value": 0.8,
"visible": true
}
],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "RAM used %",
"type": "timeseries"
},
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
0.1
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "5m",
"frequency": "1m",
"handler": 1,
"name": "Swap usage alert",
"noDataState": "no_data",
"notifications": []
},
"description": "How much Swap is in use? Relative to available swap.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "area"
}
},
"mappings": [],
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "transparent",
"value": null
},
{
"color": "red",
"value": 0.1
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 6,
"x": 18,
"y": 1
},
"id": 30,
"options": {
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"decimals": null,
"format": "percentunit",
"label": null,
"logBase": 1,
"max": "1",
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.3.5",
"targets": [
{
"exemplar": true,
"expr": "1 - node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"yaxis": {
"align": false,
"alignLevel": null
"thresholds": [
{
"colorMode": "critical",
"op": "gt",
"value": 0.1,
"visible": true
}
],
"title": "Swap used %",
"type": "timeseries"
},
{
"collapsed": false,
"datasource": null,
"gridPos": {
"h": 1,
"w": 24,
......@@ -440,52 +577,78 @@
"type": "row"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "Shows most saturated network link for every node. Bit/s.",
"description": "Shows most utilized network link for every node. Bit/s.",
"fieldConfig": {
"defaults": {
"links": []
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bps"
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 0,
"y": 9
},
"hiddenSeries": false,
"id": 12,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.10",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.3.5",
"targets": [
{
"expr": "max by (instance) (rate(node_network_transmit_bytes_total{device!~\"lo|monitoringvpn\"}[5m]) * 8)",
......@@ -502,98 +665,83 @@
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Throughput",
"tooltip": {
"shared": false,
"sort": 2,
"value_type": "individual"
},
"transformations": [],
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:226",
"decimals": null,
"format": "bps",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"$$hashKey": "object:227",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
"type": "timeseries"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "Packet and error count. Positive values mean transmit, negative receive.",
"fieldConfig": {
"defaults": {
"links": []
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "pps"
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 8,
"y": 9
},
"hiddenSeries": false,
"id": 26,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null as zero",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.10",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.3.5",
"targets": [
{
"expr": "- rate(node_network_receive_packets_total{device!~\"lo|monitoringvpn\"}[5m])",
......@@ -624,46 +772,8 @@
"refId": "D"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Network pkt/s",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "pps",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
"type": "timeseries"
},
{
"alert": {
......@@ -672,9 +782,10 @@
{
"evaluator": {
"params": [
10
-1,
1
],
"type": "gt"
"type": "outside_range"
},
"operator": {
"type": "and"
......@@ -695,9 +806,10 @@
{
"evaluator": {
"params": [
10
-1,
1
],
"type": "gt"
"type": "outside_range"
},
"operator": {
"type": "or"
......@@ -718,9 +830,10 @@
{
"evaluator": {
"params": [
10
-1,
1
],
"type": "gt"
"type": "outside_range"
},
"operator": {
"type": "or"
......@@ -741,9 +854,10 @@
{
"evaluator": {
"params": [
10
-1,
1
],
"type": "gt"
"type": "outside_range"
},
"operator": {
"type": "or"
......@@ -766,77 +880,128 @@
"for": "5m",
"frequency": "1m",
"handler": 1,
"message": "",
"name": "Network errors alert",
"noDataState": "no_data",
"notifications": []
},
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "Network errors, drops etc. Should all be 0.",
"fieldConfig": {
"defaults": {
"links": []
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "transparent",
"value": null
},
{
"color": "red",
"value": 10
}
]
},
"unit": "short"
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"y": 9
},
"hiddenSeries": false,
"id": 10,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.10",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.3.5",
"targets": [
{
"expr": "node_network_transmit_errs_total\n",
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(node_network_transmit_errs_total{device!=\"lo\"}[5m])\n",
"interval": "",
"legendFormat": "{{instance}} {{device}}",
"refId": "A"
},
{
"expr": "node_network_transmit_drop_total",
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(node_network_transmit_drop_total{device!=\"lo\"}[5m])",
"interval": "",
"legendFormat": "{{instance}} {{device}}",
"refId": "B"
},
{
"expr": "- node_network_receive_drop_total",
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "- rate(node_network_receive_drop_total{device!=\"lo\"}[5m])",
"interval": "",
"legendFormat": "{{instance}} {{device}}",
"refId": "C"
},
{
"expr": "- node_network_receive_errs_total",
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "- rate(node_network_receive_errs_total{device!=\"lo\"}[5m])",
"interval": "",
"legendFormat": "{{instance}} {{device}}",
"refId": "D"
......@@ -845,56 +1010,22 @@
"thresholds": [
{
"colorMode": "critical",
"fill": true,
"line": true,
"op": "lt",
"value": -1,
"visible": true
},
{
"colorMode": "critical",
"op": "gt",
"value": 10,
"value": 1,
"visible": true
}
],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Network errors",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
"type": "timeseries"
},
{
"collapsed": false,
"datasource": null,
"gridPos": {
"h": 1,
"w": 24,
......@@ -942,56 +1073,83 @@
"noDataState": "no_data",
"notifications": []
},
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "Watch filesystems filling up. Shows only mounts over 10 % of available bytes used.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "area"
}
},
"links": [],
"unit": "percentunit"
"mappings": [],
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "transparent",
"value": null
},
{
"color": "red",
"value": 0.8
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"h": 6,
"w": 12,
"x": 0,
"y": 17
},
"hiddenSeries": false,
"id": 4,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.10",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.4.5",
"targets": [
{
"expr": "1 - (node_filesystem_avail_bytes / node_filesystem_size_bytes) > 0.1",
"expr": "1 - (node_filesystem_avail_bytes{mountpoint!='/nix/store'} / node_filesystem_size_bytes{mountpoint!='/nix/store'}) > 0.1",
"format": "time_series",
"hide": false,
"instant": false,
......@@ -1001,106 +1159,222 @@
"refId": "A"
}
],
"thresholds": [
"title": "Storage usage % per partition",
"transformations": [],
"type": "timeseries"
},
{
"colorMode": "critical",
"fill": true,
"line": true,
"op": "gt",
"value": 0.8,
"visible": true
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
0.5
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Storage usage %",
"executionErrorState": "alerting",
"for": "5m",
"frequency": "15m",
"handler": 1,
"name": "User ciphertext usage % per node alert",
"noDataState": "no_data",
"notifications": []
},
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "How much user data do we store",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "area"
}
},
"mappings": [],
"max": 1,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 0.5
},
{
"color": "red",
"value": 0.8
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"gridPos": {
"h": 6,
"w": 12,
"x": 12,
"y": 17
},
"id": 41,
"options": {
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
"mode": "multi",
"sort": "none"
}
},
"transformations": [],
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:131",
"format": "percentunit",
"label": null,
"logBase": 1,
"max": "1",
"min": "0",
"show": true
},
{
"$$hashKey": "object:132",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "1 - (node_filesystem_avail_bytes{mountpoint=\"/storage\"} / node_filesystem_size_bytes{mountpoint=\"/storage\"})",
"interval": "",
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"yaxis": {
"align": false,
"alignLevel": null
"thresholds": [
{
"colorMode": "critical",
"op": "gt",
"value": 0.5,
"visible": true
}
],
"title": "User ciphertext usage % per node",
"type": "timeseries"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "Input Output Operations per second. Positive values mean read, negative write.",
"fieldConfig": {
"defaults": {
"links": []
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 8,
"y": 17
"x": 0,
"y": 23
},
"hiddenSeries": false,
"id": 14,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null as zero",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.10",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.3.5",
"targets": [
{
"expr": "rate(node_disk_reads_completed_total[5m]) > 0",
......@@ -1117,94 +1391,82 @@
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "IOPS",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
"type": "timeseries"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "Max average storage latency per node. Positive values mean read, negative write.",
"fieldConfig": {
"defaults": {
"links": []
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"fill": 0,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"y": 17
"x": 8,
"y": 23
},
"hiddenSeries": false,
"id": 16,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null as zero",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.10",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.3.5",
"targets": [
{
"expr": "max by (instance, device) (rate(node_disk_read_time_seconds_total[5m]) / rate(node_disk_reads_completed_total[5m]))",
......@@ -1221,50 +1483,411 @@
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Storage latency",
"type": "timeseries"
},
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
0
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "5m",
"frequency": "5m",
"handler": 1,
"name": "Degraded RAID alert",
"noDataState": "ok",
"notifications": []
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "bars",
"fillOpacity": 100,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "transparent",
"value": null
},
{
"color": "red",
"value": 0
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"y": 23
},
"id": 32,
"options": {
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.3.5",
"targets": [
{
"exemplar": true,
"expr": "megacli_drives{state=\"Degraded\"}",
"interval": "",
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"yaxis": {
"align": false,
"alignLevel": null
"title": "Degraded RAID arrays",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 30
},
"id": 34,
"panels": [],
"title": "Pressure Stall Information",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 8,
"x": 0,
"y": 31
},
"id": 36,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(node_pressure_cpu_waiting_seconds_total[5m])",
"interval": "",
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"title": "CPU waiting",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 8,
"x": 8,
"y": 31
},
"id": 38,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(node_pressure_memory_waiting_seconds_total[5m])",
"interval": "",
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"title": "Memory waiting",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"y": 31
},
"id": 37,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "rate(node_pressure_io_waiting_seconds_total[5m])",
"interval": "",
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"title": "I/O waiting",
"type": "timeseries"
}
],
"refresh": "30s",
"schemaVersion": 27,
"refresh": false,
"schemaVersion": 35,
"style": "dark",
"tags": [],
"templating": {
......@@ -1278,5 +1901,6 @@
"timezone": "",
"title": "Resources overview",
"uid": "ResourcesOverview",
"version": 1
"version": 1,
"weekStart": ""
}