Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • privatestorage/PrivateStorageio
  • tomprince/PrivateStorageio
2 results
Show changes
Showing
with 2243 additions and 1263 deletions
# Similar to ``issuer.nix`` but for a "monitoring"-type system. Holes are
# filled by ``customize-monitoring.nix``.
{
deployment = {
secrets = {
"monitoringvpn-private-key" = {
# This contains all of the NixOS system configuration necessary to specify an
# "monitoring"-type system.
{ lib, config, nodes, ...}:
let
cfg = config.grid.monitoring;
inherit (config.grid) publicKeyPath privateKeyPath monitoringvpnIPv4 letsEncryptAdminEmail;
# This collects information about monitored hosts from their configuration for use below.
monitoringHosts = lib.mapAttrsToList (name: node: rec {
inherit name;
vpnIPv4 = node.config.grid.monitoringvpnIPv4;
vpnHostName = "${name}.monitoringvpn";
hostNames = [name vpnHostName];
}) nodes;
# A set mapping VPN IP addresses as strings to lists of hostnames as
# strings. The system's ``/etc/hosts`` will be populated with this
# information. Apart from helping with normal forward resolution, this
# *also* gives us reverse resolution from the VPN IPs to hostnames which
# allows Grafana to show us hostnames instead of VPN IP addresses.
hostsMap = lib.listToAttrs (map (node: lib.nameValuePair node.vpnIPv4 node.hostNames) monitoringHosts);
# A list of VPN IP addresses as strings indicating which clients will be
# allowed onto the VPN.
vpnClientIPs = lib.remove monitoringvpnIPv4 (map (node: node.vpnIPv4) monitoringHosts);
# A list of VPN clients (IP addresses or hostnames) as strings indicating
# which nodes to scrape "nodeExporter" metrics from.
nodeExporterTargets = map (node: node.vpnHostName) monitoringHosts;
in {
imports = [
../../nixos/modules/monitoring/vpn/server.nix
../../nixos/modules/monitoring/server/grafana.nix
../../nixos/modules/monitoring/server/prometheus.nix
../../nixos/modules/monitoring/server/loki.nix
../../nixos/modules/monitoring/exporters/blackbox.nix
];
options.grid.monitoring = {
paymentExporterTargets = lib.mkOption {
type = lib.types.listOf lib.types.str;
description = ''
A list of VPN clients (IP addresses or hostnames) as strings indicating
which nodes to scrape PaymentServer metrics from.
'';
};
blackboxExporterHttpsTargets = lib.mkOption {
type = lib.types.listOf lib.types.str;
description = ''
A list of HTTPS servers (URLs, IP addresses or hostnames) as strings indicating
which nodes the BlackboxExporter should scrape HTTP and TLS metrics from.
'';
};
monitoringDomains = lib.mkOption {
type = lib.types.listOf lib.types.str;
description = ''
A list of strings giving the domain names that point at this monitoring
system. These will all be included in Let's Encrypt certificate.
'';
};
googleOAuthClientID = lib.mkOption {
type = lib.types.str;
default = "";
description = ''
A string containing the GSuite OAuth2 ClientID to use to authenticate
logins to Grafana.
'';
};
enableSlackAlert = lib.mkOption {
type = lib.types.bool;
default = false;
description = ''
Whether to enable alerting via Slack.
When true requires a grafana-slack-url file (see private-keys/README.rst).
'';
};
enableZulipAlert = lib.mkOption {
type = lib.types.bool;
default = false;
description = ''
Whether to enable alerting via Zulip.
When true requires a grafana-zulip-url file (see private-keys/README.rst).
'';
};
};
config = {
assertions = [
{
assertion = let
vpnIPs = (map (node: node.vpnIPv4) monitoringHosts);
in vpnIPs == lib.unique vpnIPs;
message = ''
Duplicate grid.monitoringvpnIPv4 values specified for different nodes.
'';
}
];
deployment.secrets = lib.mkMerge [
{
"monitoringvpn-private-key" = {
destination = "/run/keys/monitoringvpn/server.key";
source = "${privateKeyPath}/monitoringvpn/server.key";
owner.user = "root";
owner.group = "root";
permissions = "0400";
action = ["sudo" "systemctl" "restart" "wireguard-monitoringvpn.service"];
};
"monitoringvpn-preshared-key" = {
};
"monitoringvpn-preshared-key" = {
destination = "/run/keys/monitoringvpn/preshared.key";
source = "${privateKeyPath}/monitoringvpn/preshared.key";
owner.user = "root";
owner.group = "root";
permissions = "0400";
action = ["sudo" "systemctl" "restart" "wireguard-monitoringvpn.service"];
};
};
"grafana-admin-password" = {
source = "${privateKeyPath}/grafana-admin.password";
destination = "/run/keys/grafana-admin.password";
owner.user = config.systemd.services.grafana.serviceConfig.User;
owner.group = config.users.users.grafana.group;
permissions = "0400";
action = ["sudo" "systemctl" "restart" "grafana.service"];
};
}
(lib.mkIf (cfg.googleOAuthClientID != "") {
"grafana-google-sso-secret" = {
source = "${privateKeyPath}/grafana-google-sso.secret";
destination = "/run/keys/grafana-google-sso.secret";
owner.user = config.systemd.services.grafana.serviceConfig.User;
owner.group = config.users.users.grafana.group;
permissions = "0400";
action = ["sudo" "systemctl" "restart" "grafana.service"];
};
})
(lib.mkIf cfg.enableSlackAlert {
"grafana-slack-url" = {
source = "${privateKeyPath}/grafana-slack-url";
destination = "/run/keys/grafana-slack-url";
owner.user = config.systemd.services.grafana.serviceConfig.User;
owner.group = config.users.users.grafana.group;
permissions = "0400";
action = ["sudo" "systemctl" "restart" "grafana.service"];
};
})
(lib.mkIf cfg.enableZulipAlert {
"grafana-zulip-url" = {
source = "${privateKeyPath}/grafana-zulip-url";
destination = "/run/keys/grafana-zulip-url";
owner.user = config.systemd.services.grafana.serviceConfig.User;
owner.group = config.users.users.grafana.group;
permissions = "0400";
action = ["sudo" "systemctl" "restart" "grafana.service"];
};
})
];
networking.hosts = hostsMap;
services.private-storage.monitoring.vpn.server = {
enable = true;
ip = monitoringvpnIPv4;
inherit vpnClientIPs;
pubKeysPath = "${publicKeyPath}/monitoringvpn";
};
};
imports = [
# Give it a good SSH configuration.
../../nixos/modules/ssh.nix
# Allow us to remotely trigger updates to this system.
../../nixos/modules/deployment.nix
services.private-storage.monitoring.prometheus = {
inherit nodeExporterTargets;
inherit (cfg) paymentExporterTargets blackboxExporterHttpsTargets;
nginxExporterTargets = [];
};
../../nixos/modules/monitoring/vpn/server.nix
../../nixos/modules/monitoring/server/grafana.nix
../../nixos/modules/monitoring/server/prometheus.nix
../../nixos/modules/monitoring/exporters/node.nix
# Loki 0.3.0 from Nixpkgs 19.09 is too old and does not work:
# ../../nixos/modules/monitoring/server/loki.nix
];
services.private-storage.monitoring.grafana = {
inherit (cfg) googleOAuthClientID enableSlackAlert enableZulipAlert;
inherit letsEncryptAdminEmail;
domains = cfg.monitoringDomains;
};
services.private-storage.monitoring.exporters.node.enable = true;
};
}
{ lib, config, ...}:
let
inherit (config.grid) publicKeyPath privateKeyPath monitoringvpnEndpoint monitoringvpnIPv4;
in {
config = {
deployment = {
secrets = {
"monitoringvpn-secret-key" = {
destination = "/run/keys/monitoringvpn/client.key";
source = "${privateKeyPath}/monitoringvpn/${monitoringvpnIPv4}.key";
owner.user = "root";
owner.group = "root";
permissions = "0400";
action = ["sudo" "systemctl" "restart" "wireguard-monitoringvpn.service"];
};
"monitoringvpn-preshared-key" = {
destination = "/run/keys/monitoringvpn/preshared.key";
source = "${privateKeyPath}/monitoringvpn/preshared.key";
owner.user = "root";
owner.group = "root";
permissions = "0400";
action = ["sudo" "systemctl" "restart" "wireguard-monitoringvpn.service"];
};
};
};
services.private-storage.monitoring.vpn.client = {
enable = true;
ip = monitoringvpnIPv4;
endpoint = monitoringvpnEndpoint;
endpointPublicKeyFile = "${publicKeyPath}/monitoringvpn/server.pub";
};
};
}
# Similar to ``issuer.nix`` but for a "storage"-type system. Holes are filled
# by ``customize-storage.nix``.
{ config, ...} :
{
deployment = {
secrets = {
"ristretto-signing-key" = {
destination = "/run/keys/ristretto.signing-key";
owner.user = "root";
owner.group = "root";
permissions = "0400";
# Service name here matches the name defined by our tahoe-lafs nixos
# module. It would be nice to not have to hard-code it here. Can we
# extract it from the tahoe-lafs nixos module somehow?
action = ["sudo" "systemctl" "restart" "tahoe.storage.service"];
};
"monitoringvpn-secret-key" = {
destination = "/run/keys/monitoringvpn/client.key";
owner.user = "root";
owner.group = "root";
permissions = "0400";
action = ["sudo" "systemctl" "restart" "wireguard-monitoringvpn.service"];
};
"monitoringvpn-preshared-key" = {
destination = "/run/keys/monitoringvpn/preshared.key";
owner.user = "root";
owner.group = "root";
permissions = "0400";
action = ["sudo" "systemctl" "restart" "wireguard-monitoringvpn.service"];
};
};
};
# This contains all of the NixOS system configuration necessary to specify an
# "storage"-type system.
{ lib, config, ...} :
let
inherit (config.grid) privateKeyPath;
in {
# Any extra NixOS modules to load on this server.
imports = [
# Allow us to remotely trigger updates to this system.
../../nixos/modules/deployment.nix
# Bring in our module for configuring the Tahoe-LAFS service and other
# Private Storage-specific things.
../../nixos/modules/private-storage.nix
# Connect to the monitoringvpn.
../../nixos/modules/monitoring/vpn/client.nix
# Expose base system metrics over the monitoringvpn.
../../nixos/modules/monitoring/exporters/node.nix
./monitoringvpn-client.nix
./borgbackup.nix
];
# Turn on the Private Storage (Tahoe-LAFS) service.
services.private-storage = {
# Yep. Turn it on.
enable = true;
# Give it the Ristretto signing key to support authorization.
ristrettoSigningKeyPath = config.deployment.secrets.ristretto-signing-key.destination;
options.grid.storage = {
passValue = lib.mkOption {
type = lib.types.int;
description = ''
An integer giving the value of a single pass in byte×months.
'';
};
publicStoragePort = lib.mkOption {
type = lib.types.port;
description = ''
An integer giving the port number to include in Tahoe storage service
advertisements and on which to listen for storage connections.
'';
};
};
config = {
deployment = {
secrets = {
"ristretto-signing-key" = {
destination = "/run/keys/ristretto.signing-key";
source = "${privateKeyPath}/ristretto.signing-key";
owner.user = "root";
owner.group = "root";
permissions = "0400";
# Service name here matches the name defined by our tahoe-lafs nixos
# module. It would be nice to not have to hard-code it here. Can we
# extract it from the tahoe-lafs nixos module somehow?
action = ["sudo" "systemctl" "restart" "tahoe.storage.service"];
};
};
};
services.private-storage.monitoring.exporters.node.enable = true;
services.private-storage.monitoring.exporters.tahoe.enable = true;
services.private-storage.borgbackup.enable = lib.mkDefault true;
# Turn on the Private Storage (Tahoe-LAFS) service.
services.private-storage = {
# Yep. Turn it on.
enable = true;
# Give it the Ristretto signing key to support authorization.
ristrettoSigningKeyPath = config.deployment.secrets.ristretto-signing-key.destination;
inherit (config.grid.storage) passValue publicStoragePort;
};
};
}
# Minimal configuration that vagrant depends on
{ config, pkgs, lib, ... }:
let
# Vagrant uses an insecure shared private key by default, but we
# don't use the authorizedKeys attribute under users because it should be
# removed on first boot and replaced with a random one. This script sets
# the correct permissions and installs the temporary key if no
# ~/.ssh/authorized_keys exists.
install-vagrant-ssh-key = pkgs.writeScriptBin "install-vagrant-ssh-key" ''
#!${pkgs.runtimeShell}
if [ ! -e ~/.ssh/authorized_keys ]; then
mkdir -m 0700 -p ~/.ssh
echo "ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEA6NF8iallvQVp22WDkTkyrtvp9eWW6A8YVr+kz4TjGYe7gHzIw+niNltGEFHzD8+v1I2YJ6oXevct1YeS0o9HZyN1Q9qgCgzUFtdOKLv6IedplqoPkcmF0aYet2PkEDo3MlTBckFXPITAMzF8dJSIFo9D8HfdOV0IAdx4O7PtixWKn5y2hMNG0zQPyUecp4pzC6kivAIhyfHilFR61RGL+GPXQ2MWZWFYbAGjyiYJnAmCP3NOTd0jMZEnDkbUvxhMmBYSdETk1rRgm+R4LOzFUGaHqHDLKLX+FIPKcF96hrucXzcWyLbIbEgE98OHlnVYCzRdK8jlqm8tehUc9c9WhQ== vagrant insecure public key" >> ~/.ssh/authorized_keys
chmod 0600 ~/.ssh/authorized_keys
fi
'';
in
{
# Services to enable:
# Enable the OpenSSH daemon.
services.openssh.enable = true;
# Wireguard kernel module for Kernels < 5.6
boot = lib.mkIf (lib.versionOlder pkgs.linuxPackages.kernel.version "5.6") {
extraModulePackages = [ config.boot.kernelPackages.wireguard ] ;
};
# Enable DBus
services.dbus.enable = true;
# Replace ntpd by timesyncd
services.timesyncd.enable = true;
# Packages for Vagrant
environment.systemPackages = with pkgs; [
findutils
gnumake
iputils
jq
nettools
netcat
nfs-utils
rsync
];
users.users.root = { password = "vagrant"; };
# Creates a "vagrant" group & user with password-less sudo access
users.groups.vagrant = {
name = "vagrant";
members = [ "vagrant" ];
};
users.extraUsers.vagrant = {
isNormalUser = true;
createHome = true;
group = "vagrant";
extraGroups = [ "users" "wheel" ];
password = "vagrant";
home = "/home/vagrant";
useDefaultShell = true;
};
systemd.services.install-vagrant-ssh-key = {
description = "Vagrant SSH key install (if needed)";
after = [ "fs.target" ];
wants = [ "fs.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
ExecStart = "${install-vagrant-ssh-key}/bin/install-vagrant-ssh-key";
User = "vagrant";
# So it won't be (needlessly) restarted:
RemainAfterExit = true;
};
};
security.sudo.wheelNeedsPassword = false;
security.sudo.extraConfig =
''
Defaults:root,%wheel env_keep+=LOCALE_ARCHIVE
Defaults:root,%wheel env_keep+=NIX_PATH
Defaults:root,%wheel env_keep+=TERMINFO_DIRS
Defaults env_keep+=SSH_AUTH_SOCK
Defaults lecture = never
root ALL=(ALL) SETENV: ALL
%wheel ALL=(ALL) NOPASSWD: ALL, SETENV: ALL
'';
}
{ callPackage }:
{
/* A library of tools useful for writing tests with Nix.
*/
testing = callPackage ./testing.nix { };
}
# Thank you: https://gist.github.com/petabyteboy/558ffddb9aeb24e1eab2d5d6d021b5d7
with import <nixpkgs/lib>;
rec {
# FIXME: add case for negative numbers
pow = base: exponent: if exponent == 0 then 1 else fold (
x: y: y * base
) base (
range 2 exponent
);
fromHexString = hex: foldl (
x: y: 16 * x + (
(
listToAttrs (
map (
x: nameValuePair (
toString x
) x
) (
range 0 9
)
) // {
"a" = 10;
"b" = 11;
"c" = 12;
"d" = 13;
"e" = 14;
"f" = 15;
}
).${y}
)
) 0 (
stringToCharacters (
removePrefix "0x" (
hex
)
)
);
ipv4 = rec {
decode = address: foldl (
x: y: 256 * x + y
) 0 (
map toInt (
splitString "." address
)
);
encode = num: concatStringsSep "." (
map (
x: toString (mod (num / x) 256)
) (
reverseList (
genList (
x: pow 2 (x * 8)
) 4
)
)
);
netmask = prefixLength: (
foldl (
x: y: 2 * x + 1
) 0 (
range 1 prefixLength
)
) * (
pow 2 (
32 - prefixLength
)
);
reverseZone = net: (
concatStringsSep "." (
reverseList (
splitString "." net
)
)
) + ".in-addr.arpa";
eachAddress = net: prefixLength: genList (
x: decode (
x + (
decode net
)
)
) (
pow 2 (
32 - prefixLength
)
);
networkOf = address: prefixLength: encode (
bitAnd (
decode address
) (
netmask prefixLength
)
);
isInNetwork = net: address: networkOf address == net;
/* nixos-specific stuff */
findOwnAddress = config: net: head (
filter (
isInNetwork net
) (
configuredAddresses config
)
);
configuredAddresses = config: concatLists (
mapAttrsToList (
name: iface: iface.ipv4.addresses
) config.networking.interfaces
);
};
ipv6 = rec {
expand = address: (
replaceStrings ["::"] [(
concatStringsSep "0" (
genList (x: ":") (
9 - (count (x: x == ":") (stringToCharacters address))
)
)
)] address
) + (
if hasSuffix "::" address then
"0"
else
""
);
decode = address: map fromHexString (
splitString ":" (
expand address
)
);
encode = address: toLower (
concatStringsSep ":" (
map toHexString address
)
);
netmask = prefixLength: map (
x: if prefixLength > x + 16 then
(pow 2 16) - 1
else if prefixLength < x then
0
else
(
foldl (
x: y: 2 * x + 1
) 0 (
range 1 (prefixLength - x)
)
) * (
pow 2 (
16 - (prefixLength - x)
)
)
) (
genList (
x: x * 16
) 8
);
reverseZone = net: (
concatStringsSep "." (
concatLists (
reverseList (
map (
x: stringToCharacters (fixedWidthString 4 "0" x)
) (
splitString ":" (
expand net
)
)
)
)
)
) + ".ip6.arpa";
networkOf = address: prefixLength: encode (
zipListsWith bitAnd (
decode address
) (
netmask prefixLength
)
);
isInNetwork = net: address: networkOf address == (expand net);
/* nixos-specific stuff */
findOwnAddress = config: net: head (
filter (
isInNetwork net
) (
configuredAddresses config
)
);
configuredAddresses = config: concatLists (
mapAttrsToList (
name: iface: iface.ipv6.addresses
) config.networking.interfaces
);
};
}
{ ...}:
{
/* Returns a string that runs tests from the Python code at the given path.
The Python code is loaded using *execfile* and the *test* global it
defines is called with the given keyword arguments.
Type: makeTestScript :: Path -> AttrSet -> String
Example:
testScript = (makeTestScript ./test_foo.py { x = "y"; });
*/
makeTestScript = { testpath, kwargs ? {} }:
''
# The driver runs pyflakes on this script before letting it
# run... Convince pyflakes that there is a `test` name.
def test():
pass
with open("${testpath}") as testfile:
exec(testfile.read(), globals())
# For simple types, JSON is compatible with Python syntax!
test(**${builtins.toJSON kwargs})
'';
}
......@@ -34,40 +34,41 @@ let
options = {
hostId = lib.mkOption
{ type = lib.types.str;
example = lib.literalExample "abcdefab";
example = "abcdefab";
description = "The 32-bit host ID of the machine, formatted as 8 hexadecimal characters.";
};
interface = lib.mkOption
{ type = lib.types.str;
example = lib.literalExample "eno0";
example = "eno0";
description = "The name of the network interface on which to configure a static address.";
};
publicIPv4 = lib.mkOption
{ type = lib.types.str;
example = lib.literalExample "192.0.2.0";
example = "192.0.2.0";
description = "The IPv4 address to statically assign to `interface`.";
};
prefixLength = lib.mkOption
{ type = lib.types.int;
example = lib.literalExample 24;
example = 24;
description = "The statically configured network's prefix length.";
};
gateway = lib.mkOption
{ type = lib.types.str;
example = lib.literalExample "192.0.2.1";
example = "192.0.2.1";
description = "The statically configured address of the network gateway.";
};
gatewayInterface = lib.mkOption
{ type = lib.types.str;
example = lib.literalExample "eno0";
example = "eno0";
description = "The name of the network interface for the default route.";
default = cfg.interface;
};
grubDeviceID = lib.mkOption
{ type = lib.types.str;
example = lib.literalExample "wwn-0x5000c500936410b9";
example = "wwn-0x5000c500936410b9";
description = "The ID of the disk on which to install grub.";
default = "nodev";
};
};
in {
......@@ -102,10 +103,11 @@ in {
# harder to deploy in the bootstrap environment.
config =
{ boot.loader.grub.enable = true;
boot.loader.grub.version = 2;
boot.loader.grub.device = "/dev/disk/by-id/${cfg.grubDeviceID}";
boot.loader.grub.device = if cfg.grubDeviceID == "nodev" then "nodev" else "/dev/disk/by-id/${cfg.grubDeviceID}";
boot.loader.timeout = 10;
# NixOS likes to fill up boot partitions with (by default) 100 old kernels.
# Keep a (for us) more reasonable number around.
boot.loader.grub.configurationLimit = 8;
networking.firewall.enable = false;
networking.hostId = cfg.hostId;
......
These are mostly modelled on upstream nixos modules.
They are generally fairly configurable (they don't tend to hard-code paths, they can be enabled or disabled).
They don't know anything about morph (e.g. ``deployment.secrets``) or how the different grids are configured (e.g. ``grid.publicKeyPath``).
Each module here tends to define one service (or group of related services) or feature.
Eventually, all of these will be imported automatically and controlled by ``services.private-storage.*.enabled`` options.
{
# Load modules that are sometimes universally useful and other times useful
# only for a specific service. Where functionality is not universally
# useful, it needs to be enabled by a node's configuration. By loading more
# modules (and therefore defining more options) than is strictly necessary
# for any single node the logic for supplying conditional configuration
# elsewhere is much simplified. For example, a Vagrant module can
# unconditionally set up a filesystem for PaymentServer. If PaymentServer
# is running on that node then it will get a Vagrant-appropriate
# configuration. If PaymentServer hasn't been enabled then the
# configuration will just be ignored.
imports = [
./packages.nix
./issuer.nix
./private-storage.nix
./monitoring/policy.nix
./monitoring/vpn/client.nix
./monitoring/exporters/node.nix
./monitoring/exporters/tahoe.nix
./monitoring/exporters/promtail.nix
];
}
......@@ -16,7 +16,7 @@ in {
options = {
services.private-storage.deployment.authorizedKey = lib.mkOption {
type = lib.types.str;
example = lib.literalExample ''
example = ''
ssh-ed25519 AAAAC3N...
'';
description = ''
......@@ -25,7 +25,7 @@ in {
};
services.private-storage.deployment.gridName = lib.mkOption {
type = lib.types.str;
example = lib.literalExample "staging";
example = "staging";
description = ''
The name of the grid configuration to use to update this deployment.
'';
......@@ -35,11 +35,11 @@ in {
config = {
# Configure the system to use our binary cache so that deployment updates
# only require downloading pre-built software, not building it ourselves.
nix = {
binaryCachePublicKeys = [
nix.settings = {
trusted-public-keys = [
"saxtons.private.storage:MplOcEH8G/6mRlhlKkbA8GdeFR3dhCFsSszrspE/ZwY="
];
binaryCaches = [
substituters = [
"http://saxtons.private.storage"
];
};
......
# A NixOS module which can run a Ristretto-based issuer for PrivateStorage
# ZKAPs.
{ lib, pkgs, config, ... }: let
{ lib, pkgs, ourpkgs, config, ... }: let
cfg = config.services.private-storage-issuer;
# Our own nixpkgs fork:
ourpkgs = import ../../nixpkgs-ps.nix {};
in {
imports = [
# Give it a good SSH configuration.
../../nixos/modules/ssh.nix
];
options = {
services.private-storage-issuer.enable = lib.mkEnableOption "PrivateStorage ZKAP Issuer Service";
services.private-storage-issuer.package = lib.mkOption {
default = ourpkgs.zkapissuer.components.exes."PaymentServer-exe";
default = ourpkgs.zkapissuer;
type = lib.types.package;
example = lib.literalExample "pkgs.zkapissuer.components.exes.\"PaymentServer-exe\"";
example = lib.literalExpression "pkgs.zkapissuer.components.exes.\"PaymentServer-exe\"";
description = ''
The package to use for the ZKAP issuer.
'';
};
services.private-storage-issuer.domains = lib.mkOption {
type = lib.types.listOf lib.types.str;
example = lib.literalExample [ "payments.example.com" ];
example = [ "payments.example.com" ];
description = ''
The domain names at which the issuer is reachable.
'';
......@@ -39,12 +32,21 @@ in {
services.private-storage-issuer.issuer = lib.mkOption {
default = "Ristretto";
type = lib.types.enum [ "Trivial" "Ristretto" ];
example = lib.literalExample "Trivial";
example = "Trivial";
description = ''
The issuer algorithm to use. Either Trivial for a fake no-crypto
algorithm or Ristretto for Ristretto-flavored PrivacyPass.
'';
};
services.private-storage-issuer.tokensPerVoucher = lib.mkOption {
default = null;
type = lib.types.nullOr lib.types.int;
example = 50000;
description = ''
If not null, a value to pass to PaymentServer for
``--tokens-per-voucher``.
'';
};
services.private-storage-issuer.ristrettoSigningKeyPath = lib.mkOption {
default = null;
type = lib.types.path;
......@@ -60,6 +62,13 @@ in {
and payment management.
'';
};
services.private-storage-issuer.stripeWebhookSecretKeyPath = lib.mkOption {
type = lib.types.path;
description = ''
The path to a file containing a Stripe "webhook" secret key to use for
charge and payment management.
'';
};
services.private-storage-issuer.stripeEndpointDomain = lib.mkOption {
type = lib.types.str;
description = ''
......@@ -88,6 +97,15 @@ in {
The kind of voucher database to use.
'';
};
services.private-storage-issuer.databaseFileSystem = lib.mkOption {
# Logically, the type is the type of an entry in fileSystems - but we'll
# just let the type system enforce that when we pass the value on to
# fileSystems.
description = ''
Configuration for a filesystem to mount which will hold the issuer's
internal state database.
'';
};
services.private-storage-issuer.databasePath = lib.mkOption {
default = null;
type = lib.types.str;
......@@ -115,37 +133,76 @@ in {
config =
let
certroot = "/var/lib/letsencrypt/live";
# We'll refer to this collection of domains by the first domain in the
# list.
domain = builtins.head cfg.domains;
certServiceName = "cert-${domain}";
# Payment server internal http port (arbitrary, non-priviledged):
internalHttpPort = "1061";
# The "-vN" suffix indicates that this Nth incompatible version of on
# disk state as managed by this deployment system. This does not have
# anything to do with what's inside the PaymentServer-managed state.
# Instead it's about things like the type of filesystem used or options
# having to do with the backing volume behind the filesystem. In
# general I expect that to get from "-vN" to "-v(N+1)" some manual
# upgrade steps will be required.
stateDirectory = "zkapissuer-v2";
in lib.mkIf cfg.enable {
# Make sure the voucher database filesystem is mounted.
fileSystems = {
"zkapissuer-data" = cfg.databaseFileSystem // {
mountPoint = "/var/lib/${stateDirectory}";
};
};
# Add a systemd service to run PaymentServer.
systemd.services.zkapissuer = {
enable = true;
description = "ZKAP Issuer";
wantedBy = [ "multi-user.target" ];
# Make sure we have a certificate the first time, if we are running over
# TLS and require a certificate.
requires = lib.optional cfg.tls "${certServiceName}.service";
after = [
# Make sure there is a network so we can bind to all of the
# interfaces.
"network.target"
] ++
# Make sure we run after the certificate is issued, if we are running
# over TLS and require a certificate.
lib.optional cfg.tls "${certServiceName}.service";
# It really shouldn't ever exit on its own! If it does, it's a bug
# we'll have to fix. Restart it and hope it doesn't happen too much
# before we can fix whatever the issue is.
serviceConfig.Restart = "always";
serviceConfig.Type = "simple";
# Run w/o privileges
serviceConfig = {
DynamicUser = false;
User = "zkapissuer";
Group = "zkapissuer";
};
# Make systemd create a User/Group owned directory for PaymentServer
# state. According to the docs at
# https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RuntimeDirectory=
# "The specified directory names must be relative" ... this makes
# systemd create this directory in /var/lib/ for us.
serviceConfig.StateDirectory = stateDirectory;
serviceConfig.StateDirectoryMode = "0750";
unitConfig.AssertPathExists = [
# Bail if there is still an old (root-owned) DB file on this system.
# If you hit this, and this /var/db/ file is indeed current, move it
# to /var/lib/zkapissuer/vouchers.sqlite3 and chown it to
# zkapissuer:zkapissuer.
"!/var/db/vouchers.sqlite3"
# Similarly, bail if the newer path you were just told to create --
# /var/lib/zkapissuer/vouchers.sqlite3 -- exists. It needs to be
# moved /var/lib/zkapissuer-v2 where a dedicated filesystem has been
# created for it.
"!/var/lib/zkapissuer/vouchers.sqlite3"
];
# Only start if the dedicated vouchers database filesystem is mounted so
# that we know we're going to find our vouchers database there (or that
# we will create it in the right place).
unitConfig.Requires = ["local-fs.target"];
unitConfig.After = ["local-fs.target"];
script =
let
# Compute the right command line arguments to pass to it. The
......@@ -158,16 +215,7 @@ in {
if cfg.database == "Memory"
then "--database Memory"
else "--database SQLite3 --database-path ${cfg.databasePath}";
httpsArgs =
if cfg.tls
then
"--https-port 443 " +
"--https-certificate-path ${certroot}/${domain}/cert.pem " +
"--https-certificate-chain-path ${certroot}/${domain}/chain.pem " +
"--https-key-path ${certroot}/${domain}/privkey.pem"
else
# Only for automated testing.
"--http-port 80";
httpArgs = "--http-port ${internalHttpPort}";
prefixOption = s: "--cors-origin=" + s;
originStrings = map prefixOption cfg.allowedChargeOrigins;
......@@ -175,43 +223,84 @@ in {
stripeArgs =
"--stripe-key-path ${cfg.stripeSecretKeyPath} " +
"--stripe-webhook-key-path ${cfg.stripeWebhookSecretKeyPath} " +
"--stripe-endpoint-domain ${cfg.stripeEndpointDomain} " +
"--stripe-endpoint-scheme ${cfg.stripeEndpointScheme} " +
"--stripe-endpoint-port ${toString cfg.stripeEndpointPort}";
redemptionConfig = lib.optionalString (cfg.tokensPerVoucher != null) "--tokens-per-voucher ${builtins.toString cfg.tokensPerVoucher}";
in
"${cfg.package}/bin/PaymentServer-exe ${originArgs} ${issuerArgs} ${databaseArgs} ${httpsArgs} ${stripeArgs}";
"${cfg.package.exePath} ${originArgs} ${issuerArgs} ${databaseArgs} ${httpArgs} ${stripeArgs} ${redemptionConfig}";
};
# Certificate renewal. A short-lived service meant to be repeatedly
# activated to request a new certificate be issued, if the current one is
# close to expiring.
systemd.services.${certServiceName} = {
enable = cfg.tls;
description = "Certificate ${domain}";
# Activate this unit periodically so that certbot can determine if the
# certificate expiration time is close enough to warrant a renewal
# request.
startAt = "weekly";
serviceConfig = {
ExecStart =
let
configArgs = "--config-dir /var/lib/letsencrypt --work-dir /var/run/letsencrypt --logs-dir /var/run/log/letsencrypt";
in
pkgs.writeScript "cert-${domain}-start.sh" ''
#!${pkgs.runtimeShell} -e
# Register if necessary.
${pkgs.certbot}/bin/certbot register ${configArgs} --non-interactive --agree-tos -m ${cfg.letsEncryptAdminEmail} || true
# Obtain the certificate.
${pkgs.certbot}/bin/certbot certonly ${configArgs} --non-interactive --standalone --expand --domains ${builtins.concatStringsSep "," cfg.domains}
'';
};
# PaymentServer runs as this user and group by default
# Mind the comments in nixpkgs/nixos/modules/misc/ids.nix: "When adding a uid,
# make sure it doesn't match an existing gid. And don't use uids above 399!"
ids.uids.zkapissuer = 397;
ids.gids.zkapissuer = 397;
users.extraGroups.zkapissuer.gid = config.ids.gids.zkapissuer;
users.extraUsers.zkapissuer = {
uid = config.ids.uids.zkapissuer;
isNormalUser = false;
group = "zkapissuer";
# Let PaymentServer read from keys, if necessary.
extraGroups = [ "keys" ];
};
# Open 80 and 443 for the certbot HTTP server and the PaymentServer HTTPS server.
# Open 80 and 443 for nginx
networking.firewall.allowedTCPPorts = [
80
443
];
# NGINX reverse proxy
security.acme.defaults.email = cfg.letsEncryptAdminEmail;
security.acme.acceptTerms = true;
services.nginx = {
enable = true;
recommendedGzipSettings = true;
recommendedOptimisation = true;
recommendedProxySettings = true;
recommendedTlsSettings = true;
virtualHosts."${domain}" = {
serverAliases = builtins.tail cfg.domains;
enableACME = cfg.tls;
forceSSL = cfg.tls;
locations."/v1/" = {
# Only forward requests beginning with /v1/ so
# we pass less scanning spam on to our backend
# Want a regex instead? try locations."~ /v\d+/"
proxyPass = "http://127.0.0.1:${internalHttpPort}";
# The redemption endpoint can intentionally delay its response for
# up to 600 seconds for a cheap kind of server-push when payment
# completes. Let that timeout control how long the connection stays
# open. PaymentServer does not accept configuration for that
# duration so we also hard-code it here.
#
# http://nginx.org/en/docs/http/ngx_http_proxy_module.html#proxy_read_timeout
extraConfig = ''
proxy_read_timeout 660;
'';
};
locations."/metrics" = {
# Only allow our monitoringvpn subnet
extraConfig = ''
allow 172.23.23.0/24;
deny all;
'';
proxyPass = "http://127.0.0.1:${internalHttpPort}";
};
locations."/" = {
# Return a 404 error for any paths not specified above.
extraConfig = ''
return 404;
'';
};
};
};
};
}
# Prometheus Blackbox exporter configuration
#
# Scope: From the monitoring machine, ping (etc.) hosts to check whether
# they are reachable, certs still are valid for a while, etc.
#
# Notes: The Blackbox exporter is using the "Multi Target Exporter" pattern,
# see https://prometheus.io/docs/guides/multi-target-exporter/ .
#
# Usage: Import this on a monitoring server
{ config, lib, pkgs, ... }: {
config.services.prometheus.exporters.blackbox = {
enable = true;
configFile = pkgs.writeText "blackbox-exporter.yaml" (builtins.toJSON {
modules = {
https_2xx = {
prober = "http";
timeout = "5s";
http = {
fail_if_not_ssl = true;
# This prober is for IPv4 only.
preferred_ip_protocol = "ip4";
ip_protocol_fallback = false;
};
};
};
});
};
}
# MegaCli to Prometheus text format exporter
#
# Scope: Gets data from MegaRAID compatible storage controllers and mogrifies
# to Prometheus text format, saves to a temp file, to later be scraped
# by the node exporter.
#
# Usage: Import this to every server with a MegaRAID card that you want to
# include in the central monitoring system
#
# See https://nixos.org/manual/nixos/stable/#module-services-prometheus-exporters
{ config, options, lib, ourpkgs, pkgs, ... }:
let
cfg = config.services.private-storage.monitoring.exporters.megacli2prom;
in {
options.services.private-storage.monitoring.exporters.megacli2prom = {
enable = lib.mkEnableOption "MegaCli2Prom metrics gathering service";
outFile = lib.mkOption {
type = lib.types.str;
description = "Where to store the temporary file for node exporter to scrape?";
default = "/run/prometheus-node-exporter/megacli.prom";
};
interval = lib.mkOption {
type = lib.types.str;
description = ''
How often to do it?
See https://www.freedesktop.org/software/systemd/man/systemd.time.html#Calendar%20Events
'';
# Every five minutes.
default = "*:0/5";
};
};
config =
lib.mkIf cfg.enable {
environment.systemPackages = [ ourpkgs.megacli2prom pkgs.megacli ];
systemd.services.megacli2prom = {
enable = true;
description = "MegaCli2Prom metrics gathering service";
startAt = cfg.interval;
path = [ pkgs.megacli ];
# Save to a temp file and then move atomically so the
# textfile collector won't read a partial file.
# See https://github.com/prometheus/node_exporter#textfile-collector
script = ''
"${ourpkgs.megacli2prom}/bin/megacli2prom" > "${cfg.outFile}.tmp"
mv "${cfg.outFile}.tmp" "${cfg.outFile}"
'';
};
};
}
......@@ -6,15 +6,25 @@
# monitoring system
# See https://nixos.org/manual/nixos/stable/#module-services-prometheus-exporters
{ config, lib, pkgs, ... }:
{ config, lib, pkgs, options, ... }:
with lib;
let
cfg = config.services.private-storage.monitoring.exporters.node;
mountsFileSystemType = fsType: {} != filterAttrs (n: v: v.fsType == fsType) config.fileSystems;
in {
config.services.prometheus.exporters.node = {
options.services.private-storage.monitoring.exporters.node = {
enable = lib.mkEnableOption "Base system metrics collection";
textfiles-directory = lib.mkOption {
type = lib.types.str;
description = "Directory used by the textfiles collector.";
default = "/run/prometheus-node-exporter";
};
};
config.services.prometheus.exporters.node = lib.mkIf cfg.enable {
enable = true;
openFirewall = true;
firewallFilter = "-i monitoringvpn -p tcp -m tcp --dport 9100";
......@@ -22,7 +32,7 @@ in {
# extraFlags = [ "--collector.disable-defaults" ]; # not in nixpkgs 19.09
# Thanks https://github.com/mayflower/nixexprs/blob/master/modules/monitoring/default.nix
enabledCollectors = [
"arp"
# "arp" # is broken in 1.7.0 (2024-02-07)
"bcache"
"conntrack"
"filefd"
......@@ -30,16 +40,16 @@ in {
"netclass"
"netdev"
"netstat"
#"rapl" # not in nixpkgs 19.09
"rapl"
"sockstat"
#"softnet" # not in nixpkgs 19.09
"softnet"
"stat"
"systemd"
# "textfile"
# "textfile.directory /run/prometheus-node-exporter"
#"thermal_zone" # not in nixpkgs 19.09
"textfile"
"textfile.directory ${cfg.textfiles-directory}"
"thermal_zone"
"time"
#"udp_queues" # not in nixpkgs 19.09
"udp_queues"
"uname"
"vmstat"
] ++ optionals (!config.boot.isContainer) [
......@@ -59,7 +69,7 @@ in {
] ++ (
optionals (config.services.nfs.server.enable) [ "nfsd" ]
) ++ (
optionals ("" != config.boot.initrd.mdadmConf) [ "mdadm" ]
optionals ("" != config.boot.swraid.mdadmConf) [ "mdadm" ]
) ++ (
optionals ({} != config.networking.bonds) [ "bonding" ]
) ++ (
......@@ -67,7 +77,7 @@ in {
) ++ (
optionals (mountsFileSystemType "xfs") [ "xfs" ]
) ++ (
optionals (mountsFileSystemType "zfs" || elem "zfs" config.boot.supportedFilesystems) [ "zfs" ]
optionals (mountsFileSystemType "zfs" || config.boot.supportedFilesystems.zfs or false) [ "zfs" ]
);
};
}
......
# Promtail log forwarder configuration
#
# Scope: Tail logs on the local system and send them to Loki
#
# Description: This is not strictly an "exporter" like the Prometheus
# exporters, but it is very similar in what it is doing -
# preparing local data and sending it off to a TSDB.
{ config, options, lib, ... }:
let
cfg = config.services.private-storage.monitoring.exporters.promtail;
hostName = config.networking.hostName;
logRetention = toString(config.services.private-storage.monitoring.policy.logRetentionSeconds) + "s";
in {
options.services.private-storage.monitoring.exporters.promtail = {
enable = lib.mkEnableOption "Promtail log exporter service";
lokiUrl = lib.mkOption {
type = lib.types.str;
description = ''
The server URL that logs should be pushed to.
'';
# Resolving names is hard, let's have breakfast
# If you are curious why there's a plain IP address in here, read all of
# https://whetstone.private.storage/privatestorage/PrivateStorageio/-/merge_requests/251
# https://whetstone.private.storage/privatestorage/PrivateStorageio/-/merge_requests/257
# https://whetstone.private.storage/privatestorage/PrivateStorageio/-/merge_requests/258
default = "http://172.23.23.1:3100/loki/api/v1/push";
};
};
config = lib.mkIf cfg.enable {
services.promtail.enable = true;
networking.firewall.interfaces.monitoringvpn.allowedTCPPorts = [ 9080 ];
services.journald.extraConfig = ''
# This tells journald it can discard log files that contain only log
# entries older than...
MaxRetentionSec=${logRetention}
# This tells journald to start a new log file once a day. Together with
# the MaxRetentionSec setting, this means that entries are kept for
# up to a full day longer than MaxRetentionSec.
#
# https://www.freedesktop.org/software/systemd/man/journald.conf.html
# for further details about these options.
#
MaxFileSec=1day
# This asks journald to not use more than 500M of disk space. Due to
# journald's characteristics this might only be a week of logs, but that
# should be okay since we ship all logs to a central server that keeps
# them for a while longer.
SystemMaxUse=500M
'';
services.promtail.configuration = {
server = {
http_listen_port = 9080; # Using /metrics for health check
grpc_listen_address = "127.0.0.1"; # unused, but no option to turn it off.
grpc_listen_port = 9094; # unused, but no option to turn it off.
};
clients = [{
url = cfg.lokiUrl;
}];
scrape_configs = [{
job_name = "systemd-journal";
journal = {
labels = {
job = "systemd-journal";
host = hostName;
};
};
# The journal has many internal labels, that by default will
# be dropped because of their "__" prefix. To keep them, rename them.
# https://grafana.com/docs/loki/latest/clients/promtail/scraping/#journal-scraping-linux-only
# https://www.freedesktop.org/software/systemd/man/systemd.journal-fields.html
relabel_configs = [{
source_labels = [ "__journal__systemd_unit" ];
target_label = "unit";
}];
}];
};
};
}
# Tahoe Prometheus metrics collector
#
# Scope: Retrieve metrics from Tahoe and put them where Prometheus'
# node-exporter's textfile collector can find them.
#
# Usage: Import this to every server running Tahoe.
#
# See https://nixos.org/manual/nixos/stable/#module-services-prometheus-exporters
{ config, options, lib, pkgs, ... }:
let
cfg = config.services.private-storage.monitoring.exporters.tahoe;
inherit (config.services.private-storage.monitoring.exporters.node) textfiles-directory;
in {
options.services.private-storage.monitoring.exporters.tahoe = {
enable = lib.mkEnableOption "Tahoe OpenMetrics collecting service";
scrapeEndpoint = lib.mkOption {
type = lib.types.str;
description = "Where to get our metrics from?";
default = "http://localhost:3456/statistics?t=openmetrics";
};
outFile = lib.mkOption {
type = lib.types.str;
description = "Where to store the temporary file for node exporter to scrape?";
default = "${textfiles-directory}/tahoe.prom";
};
interval = lib.mkOption {
type = lib.types.str;
description = ''
How often to do it?
See https://www.freedesktop.org/software/systemd/man/systemd.time.html#Calendar%20Events
'';
# Every five minutes.
default = "*:0/5";
};
};
config =
lib.mkIf cfg.enable {
assertions = [
{
assertion = config.services.private-storage.monitoring.exporters.node.enable;
message = ''
services.private-storage.monitoring.tahoe requires services.private-storage.monitoring.exporters.node to provide the textfile prometheus collector.
'';
}
];
environment.systemPackages = [ pkgs.curl ];
systemd.services.tahoe-metrics-collector = {
enable = true;
description = "Tahoe metrics gathering service";
after = [ "tahoe.storage.service" ];
startAt = cfg.interval;
path = [ pkgs.coreutils pkgs.findutils pkgs.curl ];
restartIfChanged = false;
# Save to a temp file and then move atomically so the
# textfile collector won't read a partial file.
# See https://github.com/prometheus/node_exporter#textfile-collector
script = ''
set -euo pipefail
NUM_CORRUPTION_ADVISORIES=$(find /storage/corruption-advisories/ -type f | wc -l)
echo "tahoe_corruption_advisories_total $NUM_CORRUPTION_ADVISORIES" > "${cfg.outFile}.tmp"
NUM_INCIDENT_REPORTS=$(find /var/db/tahoe-lafs/storage/logs/incidents/ -type f | wc -l)
echo "tahoe_incident_reports_total $NUM_INCIDENT_REPORTS" >> "${cfg.outFile}.tmp"
curl --silent --show-error --fail-with-body "${cfg.scrapeEndpoint}" >> "${cfg.outFile}.tmp"
mv "${cfg.outFile}.tmp" "${cfg.outFile}"
'';
};
systemd.timers.tahoe-metrics-collector = {
after = [ "tahoe.storage.service" ];
};
};
}
# Codify our log data retention policy
#
# A maximum retention of 30 days conforms to the published log retention policy,
# see https://private.storage/privacy-policy/ .
{ options, lib, ... }: {
options.services.private-storage.monitoring.policy = {
logRetentionSeconds = lib.mkOption {
type = lib.types.int;
description = "How long do we retain logs (seconds)";
default = 29 * (24 * 60 * 60); # 29 days, to accomodate for the journald log rotation (1 day).
};
};
}
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"description": "PaymentServer and related metrics",
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"links": [],
"panels": [
{
"collapsed": false,
"datasource": null,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 24,
"panels": [],
"title": "Payments",
"type": "row"
},
{
"aliasColors": {
"Attempts": "yellow",
"Successes": "green"
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "Our calls to the Stripe API: Attempted and successful credit card charges.",
"fieldConfig": {
"defaults": {},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 1
},
"hiddenSeries": false,
"id": 22,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.7",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "processors_stripe_charge_attempts",
"hide": false,
"interval": "",
"legendFormat": "Attempts",
"refId": "B"
},
{
"exemplar": true,
"expr": "processors_stripe_charge_successes",
"interval": "",
"legendFormat": "Successes",
"refId": "C"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Stripe",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:350",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"$$hashKey": "object:351",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {
"Redeemed vouchers": "yellow"
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "",
"fieldConfig": {
"defaults": {},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
"y": 1
},
"hiddenSeries": false,
"id": 20,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.7",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"$$hashKey": "object:223",
"alias": "Redeemed vouchers",
"yaxis": 1
},
{
"$$hashKey": "object:230",
"alias": "Issued signatures",
"yaxis": 2
}
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "redemption_signatures_issued",
"interval": "",
"legendFormat": "Issued signatures",
"refId": "A"
},
{
"exemplar": true,
"expr": "redemption_voucher_redeemed",
"format": "time_series",
"hide": false,
"interval": "",
"legendFormat": "Redeemed vouchers",
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Redemption",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:285",
"format": "short",
"label": "",
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"$$hashKey": "object:286",
"decimals": null,
"format": "short",
"label": "",
"logBase": 1,
"max": null,
"min": "0",
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"collapsed": false,
"datasource": null,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 8
},
"id": 18,
"panels": [],
"title": "HTTP v1/stripe/charge",
"type": "row"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "HTTPS responses per second",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 0,
"y": 9
},
"hiddenSeries": false,
"id": 4,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.7",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "rate(http_responses_total{path=\"v1/stripe/charge\", instance=\"payments\"}[5m])",
"instant": false,
"interval": "",
"legendFormat": "{{status}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Requests per second",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:452",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"$$hashKey": "object:453",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 8,
"y": 9
},
"hiddenSeries": false,
"id": 15,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.7",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum(http_responses_total{path=\"v1/stripe/charge\", status=\"4XX\"}) / sum(http_responses_total{path=\"v1/stripe/charge\"})",
"interval": "",
"legendFormat": "Client error (4XX) rate",
"refId": "A"
},
{
"exemplar": true,
"expr": "sum(http_responses_total{path=\"v1/stripe/charge\", status=\"5XX\"}) / sum(http_responses_total{path=\"v1/stripe/charge\"})",
"interval": "",
"legendFormat": "Server error (5XX) rate",
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Error rate",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:576",
"format": "percentunit",
"label": null,
"logBase": 1,
"max": "1",
"min": "0",
"show": true
},
{
"$$hashKey": "object:577",
"format": "percent",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {
"=< 0.1s": "blue",
"=< 1s": "green",
"=< 5s": "yellow",
"> 5s": "orange"
},
"bars": false,
"cacheTimeout": null,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "Request durations, stacked",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 2,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"y": 9
},
"hiddenSeries": false,
"id": 12,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.7",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"0.1\"}",
"format": "time_series",
"hide": false,
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "=< 0.1s",
"refId": "A"
},
{
"exemplar": true,
"expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"1.0\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"0.1\"}",
"format": "time_series",
"hide": false,
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "=< 1s",
"refId": "D"
},
{
"exemplar": true,
"expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"5.0\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"1.0\"}",
"format": "time_series",
"hide": false,
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "=< 5s",
"refId": "B"
},
{
"exemplar": true,
"expr": "http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"+Inf\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/stripe/charge\", le=\"5.0\"}",
"format": "time_series",
"hide": false,
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "> 5s",
"refId": "C"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Durations",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:625",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"$$hashKey": "object:626",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"collapsed": false,
"datasource": null,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 16
},
"id": 11,
"panels": [],
"title": "HTTP v1/redeem",
"type": "row"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "HTTPS responses per second",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 0,
"y": 17
},
"hiddenSeries": false,
"id": 2,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.7",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(http_responses_total{path=\"v1/redeem\"}[5m])",
"instant": false,
"intervalFactor": 1,
"legendFormat": "{{status}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Requests per second",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:751",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"$$hashKey": "object:752",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 8,
"y": 17
},
"hiddenSeries": false,
"id": 16,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.7",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum(http_responses_total{path=\"v1/redeem\", status=\"4XX\"}) / sum(http_responses_total{path=\"v1/redeem\"})",
"legendFormat": "Client error (4XX) rate",
"refId": "A"
},
{
"expr": "sum(http_responses_total{path=\"v1/redeem\", status=\"5XX\"}) / sum(http_responses_total{path=\"v1/redeem\"})",
"legendFormat": "Server error (5XX) rate",
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Error rate",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:804",
"format": "percentunit",
"label": null,
"logBase": 1,
"max": "1",
"min": "0",
"show": true
},
{
"$$hashKey": "object:805",
"format": "percent",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {
"=< 0.1s": "blue",
"=< 1s": "green",
"=< 5s": "yellow",
"> 5s": "orange"
},
"bars": false,
"cacheTimeout": null,
"dashLength": 10,
"dashes": false,
"datasource": null,
"description": "Request durations, stacked.",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 2,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"y": 17
},
"hiddenSeries": false,
"id": 13,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.7",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "http_request_duration_seconds_bucket{path=\"v1/redeem\", le=\"0.1\"}",
"format": "time_series",
"hide": false,
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "=< 0.1s",
"refId": "A"
},
{
"exemplar": true,
"expr": "http_request_duration_seconds_bucket{path=\"v1/redeem\", le=\"1.0\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/redeem\", le=\"0.1\"}",
"format": "time_series",
"hide": false,
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "=< 1s",
"refId": "D"
},
{
"exemplar": true,
"expr": "http_request_duration_seconds_bucket{path=\"v1/redeem\", le=\"5.0\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/redeem\", le=\"1.0\"}",
"format": "time_series",
"hide": false,
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "=< 5s",
"refId": "B"
},
{
"exemplar": true,
"expr": "http_request_duration_seconds_bucket{path=\"v1/redeem\", le=\"+Inf\"} - ignoring(le) http_request_duration_seconds_bucket{path=\"v1/redeem\", le=\"5.0\"}",
"format": "time_series",
"hide": false,
"instant": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "> 5s",
"refId": "C"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Durations",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:853",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"$$hashKey": "object:854",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}
],
"refresh": "5m",
"schemaVersion": 27,
"style": "dark",
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-3h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
]
},
"timezone": "",
"title": "Payments",
"uid": "Payments",
"version": 1
}
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "Is our data safe?",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"links": [],
"liveNow": false,
"panels": [
{
"collapsed": false,
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 44,
"panels": [],
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"refId": "A"
}
],
"title": "Customer ciphertext backup to Borgbase.com",
"type": "row"
},
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
0
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"Failed jobs",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "5m",
"frequency": "1m",
"handler": 1,
"name": "Daily backup jobs state alert",
"noDataState": "no_data",
"notifications": []
},
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "Daily backup job systemd timer unit state",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 60,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "stepAfter",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Failed"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "semi-dark-red",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 5,
"w": 12,
"x": 0,
"y": 1
},
"id": 46,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.4.7",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "sum(node_systemd_unit_state{name=\"borgbackup-job-daily.timer\", state=~\"active\"})",
"hide": false,
"interval": "",
"legendFormat": "Active",
"refId": "Active timers"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "sum(node_systemd_unit_state{name=\"borgbackup-job-daily.service\", state=\"failed\"})",
"hide": false,
"interval": "",
"legendFormat": "Failed",
"refId": "Failed jobs"
}
],
"thresholds": [
{
"colorMode": "critical",
"op": "gt",
"value": 0,
"visible": true
}
],
"title": "Daily backup timer state",
"type": "timeseries"
},
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
0
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"Failed jobs",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "5m",
"frequency": "1m",
"handler": 1,
"name": "Monthly check-repo timer state alert",
"noDataState": "no_data",
"notifications": []
},
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "Monthly check-repo systemd timer unit state",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 60,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "stepAfter",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Failed"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "semi-dark-red",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 5,
"w": 12,
"x": 12,
"y": 1
},
"id": 47,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "8.4.7",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "sum(node_systemd_unit_state{name=\"borgbackup-check-repo.timer\", state=\"active\"})",
"hide": false,
"interval": "",
"legendFormat": "Active",
"refId": "Active timers"
},
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "sum(node_systemd_unit_state{name=\"borgbackup-check-repo.service\", state=\"failed\"})",
"hide": false,
"interval": "",
"legendFormat": "Failed",
"refId": "Failed jobs"
}
],
"thresholds": [
{
"colorMode": "critical",
"op": "gt",
"value": 0,
"visible": true
}
],
"title": "Monthly check-repo timer state",
"type": "timeseries"
},
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
-90000
],
"type": "lt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"executionErrorState": "keep_state",
"for": "5m",
"frequency": "1m",
"handler": 1,
"message": "Borgbase.com daily backup job trigger.",
"name": "Daily backup to Borgbase Trigger alert",
"noDataState": "no_data",
"notifications": []
},
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "This shows the last triggering of the borgbackup-job-daily.timer systemd unit.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "area"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "semi-dark-green",
"value": -90000
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 12,
"x": 0,
"y": 6
},
"id": 41,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": false
},
"tooltip": {
"mode": "multi",
"sort": "asc"
}
},
"pluginVersion": "8.3.5",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": false,
"expr": "node_systemd_timer_last_trigger_seconds{name=\"borgbackup-job-daily.timer\"} - time()",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"thresholds": [
{
"colorMode": "critical",
"op": "lt",
"value": -90000,
"visible": true
}
],
"title": "Daily backup job trigger",
"type": "timeseries"
},
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
-2764800
],
"type": "lt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A"
]
},
"reducer": {
"type": "last"
}
},
{
"evaluator": {
"params": [
-30758400
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"executionErrorState": "keep_state",
"for": "5m",
"frequency": "1m",
"handler": 1,
"message": "Borgbase.com monthly check-repo trigger.",
"name": "Monthly check of Borgbase backup Trigger alert",
"noDataState": "no_data",
"notifications": []
},
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "This shows the last triggering of the borgbackup-job-daily.timer systemd unit.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "area"
}
},
"mappings": [],
"max": 0,
"min": -2864800,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "semi-dark-green",
"value": -2764800
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 12,
"x": 12,
"y": 6
},
"id": 42,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": false
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.3.5",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": false,
"expr": "node_systemd_timer_last_trigger_seconds{name=\"borgbackup-check-repo.timer\"} - time()",
"interval": "",
"intervalFactor": 4,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"thresholds": [
{
"colorMode": "critical",
"op": "lt",
"value": -2764800,
"visible": true
}
],
"title": "Monthly check-repo trigger",
"type": "timeseries"
},
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
9000
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"JobRunTime",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "5m",
"frequency": "1m",
"handler": 1,
"message": "A backup job ran for more than 2 ½ hours. After 3 hours it could run into the check-repo job start time, depending on its \"random\" job delay.",
"name": "Daily backup job run time alert",
"noDataState": "no_data",
"notifications": []
},
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "When was the systemd unit active?",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "left",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 60,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "stepAfter",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 0,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "#EAB839",
"value": 7200
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 12,
"x": 0,
"y": 11
},
"id": 52,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "abs((node_systemd_timer_last_trigger_seconds{name=\"borgbackup-job-daily.timer\"} - time())) * on (instance) node_systemd_unit_state{name=\"borgbackup-job-daily.service\", state=\"active\"}",
"interval": "",
"legendFormat": "{{instance}}",
"refId": "JobRunTime"
}
],
"thresholds": [
{
"colorMode": "critical",
"op": "gt",
"value": 9000,
"visible": true
}
],
"title": "Daily backup job run time",
"type": "timeseries"
},
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
18000
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"JobRunTime",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "5m",
"frequency": "1m",
"handler": 1,
"message": "A borg check-repo job ran for more than five hours. After six hours it could collide with the daily backup job, depending on that job's \"random\" delay. If the backup set is large and this is expected to happen again, consider using borgbackup partial checks (--max-duration SECONDS parameter).",
"name": "Monthly check-repo run time alert",
"noDataState": "no_data",
"notifications": []
},
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "When was the systemd unit active?",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "left",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 60,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "stepAfter",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 0,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "#EAB839",
"value": 15000
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 5,
"w": 12,
"x": 12,
"y": 11
},
"id": 53,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "abs((node_systemd_timer_last_trigger_seconds{name=\"borgbackup-check-repo.timer\"} - time())) * on (instance) node_systemd_unit_state{name=\"borgbackup-check-repo.service\", state=\"active\"}",
"interval": "",
"legendFormat": "{{instance}}",
"refId": "JobRunTime"
}
],
"thresholds": [
{
"colorMode": "critical",
"op": "gt",
"value": 18000,
"visible": true
}
],
"title": "Monthly check-repo run time",
"type": "timeseries"
},
{
"datasource": {
"type": "loki",
"uid": "LocalLoki"
},
"description": "The \"duration\" that borgbackup status reports.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"fillOpacity": 60,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineWidth": 1,
"scaleDistribution": {
"type": "linear"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "#EAB839",
"value": 3600
},
{
"color": "red",
"value": 10800
}
]
},
"unit": "dtdurations"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 16
},
"id": 49,
"options": {
"barRadius": 0,
"barWidth": 0.1,
"fullHighlight": false,
"groupWidth": 0.7,
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": false
},
"orientation": "auto",
"showValue": "auto",
"stacking": "none",
"tooltip": {
"mode": "single",
"sort": "none"
},
"xField": "host",
"xTickLabelRotation": -45,
"xTickLabelSpacing": 0
},
"pluginVersion": "8.4.7",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "LocalLoki"
},
"expr": "{unit=\"borgbackup-job-daily.service\"} |= \"duration\" | pattern \"<_>\\\"duration\\\": <duration>,\"",
"legendFormat": "{{host}}",
"queryType": "range",
"refId": "A"
}
],
"title": "Daily backup job run time (as reported by borg)",
"transformations": [
{
"id": "extractFields",
"options": {
"source": "labels"
}
},
{
"id": "convertFieldType",
"options": {
"conversions": [
{
"destinationType": "number",
"targetField": "duration"
}
],
"fields": {}
}
}
],
"type": "barchart"
},
{
"datasource": {
"type": "loki",
"uid": "LocalLoki"
},
"description": "\"compressed_size\" is size of last archive, \"unique_csize\" is deduplicated size of all archives.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"fillOpacity": 80,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineWidth": 1,
"scaleDistribution": {
"type": "linear"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 16
},
"id": 55,
"options": {
"barRadius": 0,
"barWidth": 0.97,
"fullHighlight": false,
"groupWidth": 0.7,
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": false
},
"orientation": "vertical",
"showValue": "auto",
"stacking": "none",
"tooltip": {
"mode": "single",
"sort": "none"
},
"xField": "host",
"xTickLabelRotation": -45,
"xTickLabelSpacing": 0
},
"pluginVersion": "8.4.7",
"targets": [
{
"datasource": {
"type": "loki",
"uid": "LocalLoki"
},
"editorMode": "code",
"expr": "{unit=\"borgbackup-job-daily.service\"} |= \"compressed_size\" | pattern \"<_>\\\"compressed_size\\\": <compressed_size>,\"",
"hide": false,
"legendFormat": "{{host}} archive",
"queryType": "range",
"refId": "This archive size in bytes"
},
{
"datasource": {
"type": "loki",
"uid": "LocalLoki"
},
"editorMode": "code",
"expr": "{unit=\"borgbackup-job-daily.service\"} |= \"unique_csize\" | pattern \"<_>\\\"unique_csize\\\": <unique_csize>,\"",
"hide": false,
"legendFormat": "{{host}} all archives",
"queryType": "range",
"refId": "All archives deduplicated size"
}
],
"title": "Backup set size",
"transformations": [
{
"id": "extractFields",
"options": {
"source": "labels"
}
},
{
"id": "convertFieldType",
"options": {
"conversions": [
{
"destinationType": "number",
"targetField": "unique_csize"
},
{
"destinationType": "number",
"targetField": "compressed_size"
}
],
"fields": {}
}
}
],
"type": "barchart"
}
],
"refresh": "5m",
"schemaVersion": 38,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-7d",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Backups",
"uid": "backups",
"version": 1,
"weekStart": ""
}