Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
  • 118-borg-backup-not-running-as-it-should
  • 125.dont-set-static-datasource-uids
  • 125.silence-broken-backup-alerts
  • 133.give-access-to-prod-infra
  • 149.fix-bootloader
  • 157.authorize-new-hro-key
  • 162.flexible-grafana-module
  • 163.jp-to-ben-for-prod
  • 164.grafana-alert-rules
  • 190-our-regular-updates-fill-up-the-servers-boot-partitions
  • 207.payment-server-exception-reporting
  • 287.publish-tahoe-error-rate
  • 300.monitor-payment-server
  • 352.cachix
  • 42.update-nixpkgs
  • 445.update-zkapauthorizer
  • 62.openssl-111k
  • 67.rationalize-morph-names.2
  • 87.qemu-local-grid
  • 87.test-local-grid
  • 88.no-gui-for-qemu
  • also-alert-on-incoming-network-errors
  • develop
  • doc-fix
  • dont-use-etc-hosts
  • failsafe-payment-process
  • fix-repo-update-docs
  • flake
  • hro-cloud
  • localdev-qemu
  • make-sure-we-run-a-openzfs-compatible-kernel
  • meejah-develop-patch-44361
  • monitored-node
  • nixpkgs-upgrade-2022-07-13
  • nixpkgs-upgrade-2022-07-14
  • nixpkgs-upgrade-2022-07-22
  • nixpkgs-upgrade-2023-11-06
  • nixpkgs-upgrade-2024-02-12
  • nixpkgs-upgrade-2024-02-19
  • nixpkgs-upgrade-2024-02-26
  • nixpkgs-upgrade-2024-03-04
  • nixpkgs-upgrade-2024-03-11
  • nixpkgs-upgrade-2024-03-18
  • nixpkgs-upgrade-2024-03-25
  • nixpkgs-upgrade-2024-04-22
  • nixpkgs-upgrade-2024-05-13
  • nixpkgs-upgrade-2024-10-14
  • nixpkgs-upgrade-2024-12-23
  • nixpkgs-upgrade-2025-06-16
  • parallel-privatestorage-system-tests
  • payment-proxy-timeouts
  • per-node-monitor-config
  • production
  • reproduce-permission-errors
  • smaller-system-images
  • spending-node
  • spending-node-rebase
  • staging
  • upgrade-nixos-to-22.11_with-libvirt-localgrid
59 results

Target

Select target project
  • tomprince/PrivateStorageio
  • privatestorage/PrivateStorageio
2 results
Select Git revision
  • arion
  • develop
  • dont-use-etc-hosts
  • local-test-grid
  • no-morph-on-nodes
  • sec
  • simple-docs-build
  • simplify-grafana
  • stuff
9 results
Show changes
Showing
with 28756 additions and 1397 deletions
# Similar to ``issuer.nix`` but for a "storage"-type system. Holes are filled # This contains all of the NixOS system configuration necessary to specify an
# by ``customize-storage.nix``. # "storage"-type system.
{ config, ...} : { lib, config, ...} :
let let
inherit (config.grid) publicKeyPath privateKeyPath; inherit (config.grid) privateKeyPath;
in { in {
# Any extra NixOS modules to load on this server.
imports = [
./monitoringvpn-client.nix
./borgbackup.nix
];
options.grid.storage = {
passValue = lib.mkOption {
type = lib.types.int;
description = ''
An integer giving the value of a single pass in byte×months.
'';
};
publicStoragePort = lib.mkOption {
type = lib.types.port;
description = ''
An integer giving the port number to include in Tahoe storage service
advertisements and on which to listen for storage connections.
'';
};
};
config = {
deployment = { deployment = {
secrets = { secrets = {
"ristretto-signing-key" = { "ristretto-signing-key" = {
...@@ -17,33 +41,13 @@ in { ...@@ -17,33 +41,13 @@ in {
# extract it from the tahoe-lafs nixos module somehow? # extract it from the tahoe-lafs nixos module somehow?
action = ["sudo" "systemctl" "restart" "tahoe.storage.service"]; action = ["sudo" "systemctl" "restart" "tahoe.storage.service"];
}; };
"monitoringvpn-secret-key" = {
destination = "/run/keys/monitoringvpn/client.key";
owner.user = "root";
owner.group = "root";
permissions = "0400";
action = ["sudo" "systemctl" "restart" "wireguard-monitoringvpn.service"];
};
"monitoringvpn-preshared-key" = {
destination = "/run/keys/monitoringvpn/preshared.key";
owner.user = "root";
owner.group = "root";
permissions = "0400";
action = ["sudo" "systemctl" "restart" "wireguard-monitoringvpn.service"];
};
}; };
}; };
# Any extra NixOS modules to load on this server. services.private-storage.monitoring.exporters.node.enable = true;
imports = [ services.private-storage.monitoring.exporters.tahoe.enable = true;
# Bring in our module for configuring the Tahoe-LAFS service and other
# Private Storage-specific things. services.private-storage.borgbackup.enable = lib.mkDefault true;
../../nixos/modules/private-storage.nix
# Connect to the monitoringvpn.
../../nixos/modules/monitoring/vpn/client.nix
# Expose base system metrics over the monitoringvpn.
../../nixos/modules/monitoring/exporters/node.nix
];
# Turn on the Private Storage (Tahoe-LAFS) service. # Turn on the Private Storage (Tahoe-LAFS) service.
services.private-storage = { services.private-storage = {
...@@ -51,5 +55,7 @@ in { ...@@ -51,5 +55,7 @@ in {
enable = true; enable = true;
# Give it the Ristretto signing key to support authorization. # Give it the Ristretto signing key to support authorization.
ristrettoSigningKeyPath = config.deployment.secrets.ristretto-signing-key.destination; ristrettoSigningKeyPath = config.deployment.secrets.ristretto-signing-key.destination;
inherit (config.grid.storage) passValue publicStoragePort;
};
}; };
} }
{ callPackage }:
{
/* A library of tools useful for writing tests with Nix.
*/
testing = callPackage ./testing.nix { };
}
# Thank you: https://gist.github.com/petabyteboy/558ffddb9aeb24e1eab2d5d6d021b5d7
with import <nixpkgs/lib>;
rec {
# FIXME: add case for negative numbers
pow = base: exponent: if exponent == 0 then 1 else fold (
x: y: y * base
) base (
range 2 exponent
);
fromHexString = hex: foldl (
x: y: 16 * x + (
(
listToAttrs (
map (
x: nameValuePair (
toString x
) x
) (
range 0 9
)
) // {
"a" = 10;
"b" = 11;
"c" = 12;
"d" = 13;
"e" = 14;
"f" = 15;
}
).${y}
)
) 0 (
stringToCharacters (
removePrefix "0x" (
hex
)
)
);
ipv4 = rec {
decode = address: foldl (
x: y: 256 * x + y
) 0 (
map toInt (
splitString "." address
)
);
encode = num: concatStringsSep "." (
map (
x: toString (mod (num / x) 256)
) (
reverseList (
genList (
x: pow 2 (x * 8)
) 4
)
)
);
netmask = prefixLength: (
foldl (
x: y: 2 * x + 1
) 0 (
range 1 prefixLength
)
) * (
pow 2 (
32 - prefixLength
)
);
reverseZone = net: (
concatStringsSep "." (
reverseList (
splitString "." net
)
)
) + ".in-addr.arpa";
eachAddress = net: prefixLength: genList (
x: decode (
x + (
decode net
)
)
) (
pow 2 (
32 - prefixLength
)
);
networkOf = address: prefixLength: encode (
bitAnd (
decode address
) (
netmask prefixLength
)
);
isInNetwork = net: address: networkOf address == net;
/* nixos-specific stuff */
findOwnAddress = config: net: head (
filter (
isInNetwork net
) (
configuredAddresses config
)
);
configuredAddresses = config: concatLists (
mapAttrsToList (
name: iface: iface.ipv4.addresses
) config.networking.interfaces
);
};
ipv6 = rec {
expand = address: (
replaceStrings ["::"] [(
concatStringsSep "0" (
genList (x: ":") (
9 - (count (x: x == ":") (stringToCharacters address))
)
)
)] address
) + (
if hasSuffix "::" address then
"0"
else
""
);
decode = address: map fromHexString (
splitString ":" (
expand address
)
);
encode = address: toLower (
concatStringsSep ":" (
map toHexString address
)
);
netmask = prefixLength: map (
x: if prefixLength > x + 16 then
(pow 2 16) - 1
else if prefixLength < x then
0
else
(
foldl (
x: y: 2 * x + 1
) 0 (
range 1 (prefixLength - x)
)
) * (
pow 2 (
16 - (prefixLength - x)
)
)
) (
genList (
x: x * 16
) 8
);
reverseZone = net: (
concatStringsSep "." (
concatLists (
reverseList (
map (
x: stringToCharacters (fixedWidthString 4 "0" x)
) (
splitString ":" (
expand net
)
)
)
)
)
) + ".ip6.arpa";
networkOf = address: prefixLength: encode (
zipListsWith bitAnd (
decode address
) (
netmask prefixLength
)
);
isInNetwork = net: address: networkOf address == (expand net);
/* nixos-specific stuff */
findOwnAddress = config: net: head (
filter (
isInNetwork net
) (
configuredAddresses config
)
);
configuredAddresses = config: concatLists (
mapAttrsToList (
name: iface: iface.ipv6.addresses
) config.networking.interfaces
);
};
}
{ ...}:
{
/* Returns a string that runs tests from the Python code at the given path.
The Python code is loaded using *execfile* and the *test* global it
defines is called with the given keyword arguments.
Type: makeTestScript :: Path -> AttrSet -> String
Example:
testScript = (makeTestScript ./test_foo.py { x = "y"; });
*/
makeTestScript = { testpath, kwargs ? {} }:
''
# The driver runs pyflakes on this script before letting it
# run... Convince pyflakes that there is a `test` name.
def test():
pass
with open("${testpath}") as testfile:
exec(testfile.read(), globals())
# For simple types, JSON is compatible with Python syntax!
test(**${builtins.toJSON kwargs})
'';
}
...@@ -34,40 +34,41 @@ let ...@@ -34,40 +34,41 @@ let
options = { options = {
hostId = lib.mkOption hostId = lib.mkOption
{ type = lib.types.str; { type = lib.types.str;
example = lib.literalExample "abcdefab"; example = "abcdefab";
description = "The 32-bit host ID of the machine, formatted as 8 hexadecimal characters."; description = "The 32-bit host ID of the machine, formatted as 8 hexadecimal characters.";
}; };
interface = lib.mkOption interface = lib.mkOption
{ type = lib.types.str; { type = lib.types.str;
example = lib.literalExample "eno0"; example = "eno0";
description = "The name of the network interface on which to configure a static address."; description = "The name of the network interface on which to configure a static address.";
}; };
publicIPv4 = lib.mkOption publicIPv4 = lib.mkOption
{ type = lib.types.str; { type = lib.types.str;
example = lib.literalExample "192.0.2.0"; example = "192.0.2.0";
description = "The IPv4 address to statically assign to `interface`."; description = "The IPv4 address to statically assign to `interface`.";
}; };
prefixLength = lib.mkOption prefixLength = lib.mkOption
{ type = lib.types.int; { type = lib.types.int;
example = lib.literalExample 24; example = 24;
description = "The statically configured network's prefix length."; description = "The statically configured network's prefix length.";
}; };
gateway = lib.mkOption gateway = lib.mkOption
{ type = lib.types.str; { type = lib.types.str;
example = lib.literalExample "192.0.2.1"; example = "192.0.2.1";
description = "The statically configured address of the network gateway."; description = "The statically configured address of the network gateway.";
}; };
gatewayInterface = lib.mkOption gatewayInterface = lib.mkOption
{ type = lib.types.str; { type = lib.types.str;
example = lib.literalExample "eno0"; example = "eno0";
description = "The name of the network interface for the default route."; description = "The name of the network interface for the default route.";
default = cfg.interface; default = cfg.interface;
}; };
grubDeviceID = lib.mkOption grubDeviceID = lib.mkOption
{ type = lib.types.str; { type = lib.types.str;
example = lib.literalExample "wwn-0x5000c500936410b9"; example = "wwn-0x5000c500936410b9";
description = "The ID of the disk on which to install grub."; description = "The ID of the disk on which to install grub.";
default = "nodev";
}; };
}; };
in { in {
...@@ -102,10 +103,11 @@ in { ...@@ -102,10 +103,11 @@ in {
# harder to deploy in the bootstrap environment. # harder to deploy in the bootstrap environment.
config = config =
{ boot.loader.grub.enable = true; { boot.loader.grub.enable = true;
boot.loader.grub.version = 2; boot.loader.grub.device = if cfg.grubDeviceID == "nodev" then "nodev" else "/dev/disk/by-id/${cfg.grubDeviceID}";
boot.loader.grub.device = "/dev/disk/by-id/${cfg.grubDeviceID}";
boot.loader.timeout = 10; boot.loader.timeout = 10;
# NixOS likes to fill up boot partitions with (by default) 100 old kernels.
# Keep a (for us) more reasonable number around.
boot.loader.grub.configurationLimit = 8;
networking.firewall.enable = false; networking.firewall.enable = false;
networking.hostId = cfg.hostId; networking.hostId = cfg.hostId;
......
These are mostly modelled on upstream nixos modules.
They are generally fairly configurable (they don't tend to hard-code paths, they can be enabled or disabled).
They don't know anything about morph (e.g. ``deployment.secrets``) or how the different grids are configured (e.g. ``grid.publicKeyPath``).
Each module here tends to define one service (or group of related services) or feature.
Eventually, all of these will be imported automatically and controlled by ``services.private-storage.*.enabled`` options.
{
# Load modules that are sometimes universally useful and other times useful
# only for a specific service. Where functionality is not universally
# useful, it needs to be enabled by a node's configuration. By loading more
# modules (and therefore defining more options) than is strictly necessary
# for any single node the logic for supplying conditional configuration
# elsewhere is much simplified. For example, a Vagrant module can
# unconditionally set up a filesystem for PaymentServer. If PaymentServer
# is running on that node then it will get a Vagrant-appropriate
# configuration. If PaymentServer hasn't been enabled then the
# configuration will just be ignored.
imports = [
./packages.nix
./issuer.nix
./private-storage.nix
./monitoring/policy.nix
./monitoring/vpn/client.nix
./monitoring/exporters/node.nix
./monitoring/exporters/tahoe.nix
./monitoring/exporters/promtail.nix
];
}
...@@ -16,7 +16,7 @@ in { ...@@ -16,7 +16,7 @@ in {
options = { options = {
services.private-storage.deployment.authorizedKey = lib.mkOption { services.private-storage.deployment.authorizedKey = lib.mkOption {
type = lib.types.str; type = lib.types.str;
example = lib.literalExample '' example = ''
ssh-ed25519 AAAAC3N... ssh-ed25519 AAAAC3N...
''; '';
description = '' description = ''
...@@ -25,7 +25,7 @@ in { ...@@ -25,7 +25,7 @@ in {
}; };
services.private-storage.deployment.gridName = lib.mkOption { services.private-storage.deployment.gridName = lib.mkOption {
type = lib.types.str; type = lib.types.str;
example = lib.literalExample "staging"; example = "staging";
description = '' description = ''
The name of the grid configuration to use to update this deployment. The name of the grid configuration to use to update this deployment.
''; '';
...@@ -35,11 +35,11 @@ in { ...@@ -35,11 +35,11 @@ in {
config = { config = {
# Configure the system to use our binary cache so that deployment updates # Configure the system to use our binary cache so that deployment updates
# only require downloading pre-built software, not building it ourselves. # only require downloading pre-built software, not building it ourselves.
nix = { nix.settings = {
binaryCachePublicKeys = [ trusted-public-keys = [
"saxtons.private.storage:MplOcEH8G/6mRlhlKkbA8GdeFR3dhCFsSszrspE/ZwY=" "saxtons.private.storage:MplOcEH8G/6mRlhlKkbA8GdeFR3dhCFsSszrspE/ZwY="
]; ];
binaryCaches = [ substituters = [
"http://saxtons.private.storage" "http://saxtons.private.storage"
]; ];
}; };
......
...@@ -8,14 +8,14 @@ in { ...@@ -8,14 +8,14 @@ in {
services.private-storage-issuer.package = lib.mkOption { services.private-storage-issuer.package = lib.mkOption {
default = ourpkgs.zkapissuer; default = ourpkgs.zkapissuer;
type = lib.types.package; type = lib.types.package;
example = lib.literalExample "pkgs.zkapissuer.components.exes.\"PaymentServer-exe\""; example = lib.literalExpression "pkgs.zkapissuer.components.exes.\"PaymentServer-exe\"";
description = '' description = ''
The package to use for the ZKAP issuer. The package to use for the ZKAP issuer.
''; '';
}; };
services.private-storage-issuer.domains = lib.mkOption { services.private-storage-issuer.domains = lib.mkOption {
type = lib.types.listOf lib.types.str; type = lib.types.listOf lib.types.str;
example = lib.literalExample [ "payments.example.com" ]; example = [ "payments.example.com" ];
description = '' description = ''
The domain names at which the issuer is reachable. The domain names at which the issuer is reachable.
''; '';
...@@ -32,12 +32,21 @@ in { ...@@ -32,12 +32,21 @@ in {
services.private-storage-issuer.issuer = lib.mkOption { services.private-storage-issuer.issuer = lib.mkOption {
default = "Ristretto"; default = "Ristretto";
type = lib.types.enum [ "Trivial" "Ristretto" ]; type = lib.types.enum [ "Trivial" "Ristretto" ];
example = lib.literalExample "Trivial"; example = "Trivial";
description = '' description = ''
The issuer algorithm to use. Either Trivial for a fake no-crypto The issuer algorithm to use. Either Trivial for a fake no-crypto
algorithm or Ristretto for Ristretto-flavored PrivacyPass. algorithm or Ristretto for Ristretto-flavored PrivacyPass.
''; '';
}; };
services.private-storage-issuer.tokensPerVoucher = lib.mkOption {
default = null;
type = lib.types.nullOr lib.types.int;
example = 50000;
description = ''
If not null, a value to pass to PaymentServer for
``--tokens-per-voucher``.
'';
};
services.private-storage-issuer.ristrettoSigningKeyPath = lib.mkOption { services.private-storage-issuer.ristrettoSigningKeyPath = lib.mkOption {
default = null; default = null;
type = lib.types.path; type = lib.types.path;
...@@ -53,6 +62,13 @@ in { ...@@ -53,6 +62,13 @@ in {
and payment management. and payment management.
''; '';
}; };
services.private-storage-issuer.stripeWebhookSecretKeyPath = lib.mkOption {
type = lib.types.path;
description = ''
The path to a file containing a Stripe "webhook" secret key to use for
charge and payment management.
'';
};
services.private-storage-issuer.stripeEndpointDomain = lib.mkOption { services.private-storage-issuer.stripeEndpointDomain = lib.mkOption {
type = lib.types.str; type = lib.types.str;
description = '' description = ''
...@@ -81,6 +97,15 @@ in { ...@@ -81,6 +97,15 @@ in {
The kind of voucher database to use. The kind of voucher database to use.
''; '';
}; };
services.private-storage-issuer.databaseFileSystem = lib.mkOption {
# Logically, the type is the type of an entry in fileSystems - but we'll
# just let the type system enforce that when we pass the value on to
# fileSystems.
description = ''
Configuration for a filesystem to mount which will hold the issuer's
internal state database.
'';
};
services.private-storage-issuer.databasePath = lib.mkOption { services.private-storage-issuer.databasePath = lib.mkOption {
default = null; default = null;
type = lib.types.str; type = lib.types.str;
...@@ -111,11 +136,26 @@ in { ...@@ -111,11 +136,26 @@ in {
# We'll refer to this collection of domains by the first domain in the # We'll refer to this collection of domains by the first domain in the
# list. # list.
domain = builtins.head cfg.domains; domain = builtins.head cfg.domains;
certServiceName = "acme-${domain}";
# Payment server internal http port (arbitrary, non-priviledged): # Payment server internal http port (arbitrary, non-priviledged):
internalHttpPort = "1061"; internalHttpPort = "1061";
# The "-vN" suffix indicates that this Nth incompatible version of on
# disk state as managed by this deployment system. This does not have
# anything to do with what's inside the PaymentServer-managed state.
# Instead it's about things like the type of filesystem used or options
# having to do with the backing volume behind the filesystem. In
# general I expect that to get from "-vN" to "-v(N+1)" some manual
# upgrade steps will be required.
stateDirectory = "zkapissuer-v2";
in lib.mkIf cfg.enable { in lib.mkIf cfg.enable {
# Make sure the voucher database filesystem is mounted.
fileSystems = {
"zkapissuer-data" = cfg.databaseFileSystem // {
mountPoint = "/var/lib/${stateDirectory}";
};
};
# Add a systemd service to run PaymentServer. # Add a systemd service to run PaymentServer.
systemd.services.zkapissuer = { systemd.services.zkapissuer = {
enable = true; enable = true;
...@@ -138,15 +178,30 @@ in { ...@@ -138,15 +178,30 @@ in {
# Make systemd create a User/Group owned directory for PaymentServer # Make systemd create a User/Group owned directory for PaymentServer
# state. According to the docs at # state. According to the docs at
# https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RuntimeDirectory= # https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RuntimeDirectory=
# "The specified directory names must be relative" ... this # "The specified directory names must be relative" ... this makes
# makes systemd create /var/lib/zkapissuer/ for us: # systemd create this directory in /var/lib/ for us.
serviceConfig.StateDirectory = "zkapissuer"; serviceConfig.StateDirectory = stateDirectory;
serviceConfig.StateDirectoryMode = "0750"; serviceConfig.StateDirectoryMode = "0750";
unitConfig.AssertPathExists = [
# Bail if there is still an old (root-owned) DB file on this system. # Bail if there is still an old (root-owned) DB file on this system.
# If you hit this, and this /var/db/ file is indeed current, move it to # If you hit this, and this /var/db/ file is indeed current, move it
# /var/lib/zkapissuer/vouchers.sqlite3 and chown it to zkapissuer:zkapissuer. # to /var/lib/zkapissuer/vouchers.sqlite3 and chown it to
unitConfig.AssertPathExists = "!/var/db/vouchers.sqlite3"; # zkapissuer:zkapissuer.
"!/var/db/vouchers.sqlite3"
# Similarly, bail if the newer path you were just told to create --
# /var/lib/zkapissuer/vouchers.sqlite3 -- exists. It needs to be
# moved /var/lib/zkapissuer-v2 where a dedicated filesystem has been
# created for it.
"!/var/lib/zkapissuer/vouchers.sqlite3"
];
# Only start if the dedicated vouchers database filesystem is mounted so
# that we know we're going to find our vouchers database there (or that
# we will create it in the right place).
unitConfig.Requires = ["local-fs.target"];
unitConfig.After = ["local-fs.target"];
script = script =
let let
...@@ -168,11 +223,14 @@ in { ...@@ -168,11 +223,14 @@ in {
stripeArgs = stripeArgs =
"--stripe-key-path ${cfg.stripeSecretKeyPath} " + "--stripe-key-path ${cfg.stripeSecretKeyPath} " +
"--stripe-webhook-key-path ${cfg.stripeWebhookSecretKeyPath} " +
"--stripe-endpoint-domain ${cfg.stripeEndpointDomain} " + "--stripe-endpoint-domain ${cfg.stripeEndpointDomain} " +
"--stripe-endpoint-scheme ${cfg.stripeEndpointScheme} " + "--stripe-endpoint-scheme ${cfg.stripeEndpointScheme} " +
"--stripe-endpoint-port ${toString cfg.stripeEndpointPort}"; "--stripe-endpoint-port ${toString cfg.stripeEndpointPort}";
redemptionConfig = lib.optionalString (cfg.tokensPerVoucher != null) "--tokens-per-voucher ${builtins.toString cfg.tokensPerVoucher}";
in in
"${cfg.package.exePath} ${originArgs} ${issuerArgs} ${databaseArgs} ${httpArgs} ${stripeArgs}"; "${cfg.package.exePath} ${originArgs} ${issuerArgs} ${databaseArgs} ${httpArgs} ${stripeArgs} ${redemptionConfig}";
}; };
# PaymentServer runs as this user and group by default # PaymentServer runs as this user and group by default
...@@ -189,14 +247,14 @@ in { ...@@ -189,14 +247,14 @@ in {
extraGroups = [ "keys" ]; extraGroups = [ "keys" ];
}; };
# Open 80 and 443 for the certbot HTTP server and the PaymentServer HTTPS server. # Open 80 and 443 for nginx
networking.firewall.allowedTCPPorts = [ networking.firewall.allowedTCPPorts = [
80 80
443 443
]; ];
# NGINX reverse proxy # NGINX reverse proxy
security.acme.email = cfg.letsEncryptAdminEmail; security.acme.defaults.email = cfg.letsEncryptAdminEmail;
security.acme.acceptTerms = true; security.acme.acceptTerms = true;
services.nginx = { services.nginx = {
enable = true; enable = true;
...@@ -215,6 +273,17 @@ in { ...@@ -215,6 +273,17 @@ in {
# we pass less scanning spam on to our backend # we pass less scanning spam on to our backend
# Want a regex instead? try locations."~ /v\d+/" # Want a regex instead? try locations."~ /v\d+/"
proxyPass = "http://127.0.0.1:${internalHttpPort}"; proxyPass = "http://127.0.0.1:${internalHttpPort}";
# The redemption endpoint can intentionally delay its response for
# up to 600 seconds for a cheap kind of server-push when payment
# completes. Let that timeout control how long the connection stays
# open. PaymentServer does not accept configuration for that
# duration so we also hard-code it here.
#
# http://nginx.org/en/docs/http/ngx_http_proxy_module.html#proxy_read_timeout
extraConfig = ''
proxy_read_timeout 660;
'';
}; };
locations."/metrics" = { locations."/metrics" = {
# Only allow our monitoringvpn subnet # Only allow our monitoringvpn subnet
......
# Prometheus Blackbox exporter configuration
#
# Scope: From the monitoring machine, ping (etc.) hosts to check whether
# they are reachable, certs still are valid for a while, etc.
#
# Notes: The Blackbox exporter is using the "Multi Target Exporter" pattern,
# see https://prometheus.io/docs/guides/multi-target-exporter/ .
#
# Usage: Import this on a monitoring server
{ config, lib, pkgs, ... }: {
config.services.prometheus.exporters.blackbox = {
enable = true;
configFile = pkgs.writeText "blackbox-exporter.yaml" (builtins.toJSON {
modules = {
https_2xx = {
prober = "http";
timeout = "5s";
http = {
fail_if_not_ssl = true;
# This prober is for IPv4 only.
preferred_ip_protocol = "ip4";
ip_protocol_fallback = false;
};
};
};
});
};
}
# MegaCli to Prometheus text format exporter
#
# Scope: Gets data from MegaRAID compatible storage controllers and mogrifies
# to Prometheus text format, saves to a temp file, to later be scraped
# by the node exporter.
#
# Usage: Import this to every server with a MegaRAID card that you want to
# include in the central monitoring system
#
# See https://nixos.org/manual/nixos/stable/#module-services-prometheus-exporters
{ config, options, lib, ourpkgs, pkgs, ... }:
let
cfg = config.services.private-storage.monitoring.exporters.megacli2prom;
in {
options.services.private-storage.monitoring.exporters.megacli2prom = {
enable = lib.mkEnableOption "MegaCli2Prom metrics gathering service";
outFile = lib.mkOption {
type = lib.types.str;
description = "Where to store the temporary file for node exporter to scrape?";
default = "/run/prometheus-node-exporter/megacli.prom";
};
interval = lib.mkOption {
type = lib.types.str;
description = ''
How often to do it?
See https://www.freedesktop.org/software/systemd/man/systemd.time.html#Calendar%20Events
'';
# Every five minutes.
default = "*:0/5";
};
};
config =
lib.mkIf cfg.enable {
environment.systemPackages = [ ourpkgs.megacli2prom pkgs.megacli ];
systemd.services.megacli2prom = {
enable = true;
description = "MegaCli2Prom metrics gathering service";
startAt = cfg.interval;
path = [ pkgs.megacli ];
# Save to a temp file and then move atomically so the
# textfile collector won't read a partial file.
# See https://github.com/prometheus/node_exporter#textfile-collector
script = ''
"${ourpkgs.megacli2prom}/bin/megacli2prom" > "${cfg.outFile}.tmp"
mv "${cfg.outFile}.tmp" "${cfg.outFile}"
'';
};
};
}
...@@ -6,15 +6,25 @@ ...@@ -6,15 +6,25 @@
# monitoring system # monitoring system
# See https://nixos.org/manual/nixos/stable/#module-services-prometheus-exporters # See https://nixos.org/manual/nixos/stable/#module-services-prometheus-exporters
{ config, lib, pkgs, ... }: { config, lib, pkgs, options, ... }:
with lib; with lib;
let let
cfg = config.services.private-storage.monitoring.exporters.node;
mountsFileSystemType = fsType: {} != filterAttrs (n: v: v.fsType == fsType) config.fileSystems; mountsFileSystemType = fsType: {} != filterAttrs (n: v: v.fsType == fsType) config.fileSystems;
in { in {
config.services.prometheus.exporters.node = { options.services.private-storage.monitoring.exporters.node = {
enable = lib.mkEnableOption "Base system metrics collection";
textfiles-directory = lib.mkOption {
type = lib.types.str;
description = "Directory used by the textfiles collector.";
default = "/run/prometheus-node-exporter";
};
};
config.services.prometheus.exporters.node = lib.mkIf cfg.enable {
enable = true; enable = true;
openFirewall = true; openFirewall = true;
firewallFilter = "-i monitoringvpn -p tcp -m tcp --dport 9100"; firewallFilter = "-i monitoringvpn -p tcp -m tcp --dport 9100";
...@@ -22,7 +32,7 @@ in { ...@@ -22,7 +32,7 @@ in {
# extraFlags = [ "--collector.disable-defaults" ]; # not in nixpkgs 19.09 # extraFlags = [ "--collector.disable-defaults" ]; # not in nixpkgs 19.09
# Thanks https://github.com/mayflower/nixexprs/blob/master/modules/monitoring/default.nix # Thanks https://github.com/mayflower/nixexprs/blob/master/modules/monitoring/default.nix
enabledCollectors = [ enabledCollectors = [
"arp" # "arp" # is broken in 1.7.0 (2024-02-07)
"bcache" "bcache"
"conntrack" "conntrack"
"filefd" "filefd"
...@@ -30,16 +40,16 @@ in { ...@@ -30,16 +40,16 @@ in {
"netclass" "netclass"
"netdev" "netdev"
"netstat" "netstat"
#"rapl" # not in nixpkgs 19.09 "rapl"
"sockstat" "sockstat"
#"softnet" # not in nixpkgs 19.09 "softnet"
"stat" "stat"
"systemd" "systemd"
# "textfile" "textfile"
# "textfile.directory /run/prometheus-node-exporter" "textfile.directory ${cfg.textfiles-directory}"
#"thermal_zone" # not in nixpkgs 19.09 "thermal_zone"
"time" "time"
#"udp_queues" # not in nixpkgs 19.09 "udp_queues"
"uname" "uname"
"vmstat" "vmstat"
] ++ optionals (!config.boot.isContainer) [ ] ++ optionals (!config.boot.isContainer) [
...@@ -59,7 +69,7 @@ in { ...@@ -59,7 +69,7 @@ in {
] ++ ( ] ++ (
optionals (config.services.nfs.server.enable) [ "nfsd" ] optionals (config.services.nfs.server.enable) [ "nfsd" ]
) ++ ( ) ++ (
optionals ("" != config.boot.initrd.mdadmConf) [ "mdadm" ] optionals ("" != config.boot.swraid.mdadmConf) [ "mdadm" ]
) ++ ( ) ++ (
optionals ({} != config.networking.bonds) [ "bonding" ] optionals ({} != config.networking.bonds) [ "bonding" ]
) ++ ( ) ++ (
...@@ -67,7 +77,7 @@ in { ...@@ -67,7 +77,7 @@ in {
) ++ ( ) ++ (
optionals (mountsFileSystemType "xfs") [ "xfs" ] optionals (mountsFileSystemType "xfs") [ "xfs" ]
) ++ ( ) ++ (
optionals (mountsFileSystemType "zfs" || elem "zfs" config.boot.supportedFilesystems) [ "zfs" ] optionals (mountsFileSystemType "zfs" || config.boot.supportedFilesystems.zfs or false) [ "zfs" ]
); );
}; };
} }
......
# Promtail log forwarder configuration
#
# Scope: Tail logs on the local system and send them to Loki
#
# Description: This is not strictly an "exporter" like the Prometheus
# exporters, but it is very similar in what it is doing -
# preparing local data and sending it off to a TSDB.
{ config, options, lib, ... }:
let
cfg = config.services.private-storage.monitoring.exporters.promtail;
hostName = config.networking.hostName;
logRetention = toString(config.services.private-storage.monitoring.policy.logRetentionSeconds) + "s";
in {
options.services.private-storage.monitoring.exporters.promtail = {
enable = lib.mkEnableOption "Promtail log exporter service";
lokiUrl = lib.mkOption {
type = lib.types.str;
description = ''
The server URL that logs should be pushed to.
'';
# Resolving names is hard, let's have breakfast
# If you are curious why there's a plain IP address in here, read all of
# https://whetstone.private.storage/privatestorage/PrivateStorageio/-/merge_requests/251
# https://whetstone.private.storage/privatestorage/PrivateStorageio/-/merge_requests/257
# https://whetstone.private.storage/privatestorage/PrivateStorageio/-/merge_requests/258
default = "http://172.23.23.1:3100/loki/api/v1/push";
};
};
config = lib.mkIf cfg.enable {
services.promtail.enable = true;
networking.firewall.interfaces.monitoringvpn.allowedTCPPorts = [ 9080 ];
services.journald.extraConfig = ''
# This tells journald it can discard log files that contain only log
# entries older than...
MaxRetentionSec=${logRetention}
# This tells journald to start a new log file once a day. Together with
# the MaxRetentionSec setting, this means that entries are kept for
# up to a full day longer than MaxRetentionSec.
#
# https://www.freedesktop.org/software/systemd/man/journald.conf.html
# for further details about these options.
#
MaxFileSec=1day
# This asks journald to not use more than 500M of disk space. Due to
# journald's characteristics this might only be a week of logs, but that
# should be okay since we ship all logs to a central server that keeps
# them for a while longer.
SystemMaxUse=500M
'';
services.promtail.configuration = {
server = {
http_listen_port = 9080; # Using /metrics for health check
grpc_listen_address = "127.0.0.1"; # unused, but no option to turn it off.
grpc_listen_port = 9094; # unused, but no option to turn it off.
};
clients = [{
url = cfg.lokiUrl;
}];
scrape_configs = [{
job_name = "systemd-journal";
journal = {
labels = {
job = "systemd-journal";
host = hostName;
};
};
# The journal has many internal labels, that by default will
# be dropped because of their "__" prefix. To keep them, rename them.
# https://grafana.com/docs/loki/latest/clients/promtail/scraping/#journal-scraping-linux-only
# https://www.freedesktop.org/software/systemd/man/systemd.journal-fields.html
relabel_configs = [{
source_labels = [ "__journal__systemd_unit" ];
target_label = "unit";
}];
}];
};
};
}
# Tahoe Prometheus metrics collector
#
# Scope: Retrieve metrics from Tahoe and put them where Prometheus'
# node-exporter's textfile collector can find them.
#
# Usage: Import this to every server running Tahoe.
#
# See https://nixos.org/manual/nixos/stable/#module-services-prometheus-exporters
{ config, options, lib, pkgs, ... }:
let
cfg = config.services.private-storage.monitoring.exporters.tahoe;
inherit (config.services.private-storage.monitoring.exporters.node) textfiles-directory;
in {
options.services.private-storage.monitoring.exporters.tahoe = {
enable = lib.mkEnableOption "Tahoe OpenMetrics collecting service";
scrapeEndpoint = lib.mkOption {
type = lib.types.str;
description = "Where to get our metrics from?";
default = "http://localhost:3456/statistics?t=openmetrics";
};
outFile = lib.mkOption {
type = lib.types.str;
description = "Where to store the temporary file for node exporter to scrape?";
default = "${textfiles-directory}/tahoe.prom";
};
interval = lib.mkOption {
type = lib.types.str;
description = ''
How often to do it?
See https://www.freedesktop.org/software/systemd/man/systemd.time.html#Calendar%20Events
'';
# Every five minutes.
default = "*:0/5";
};
};
config =
lib.mkIf cfg.enable {
assertions = [
{
assertion = config.services.private-storage.monitoring.exporters.node.enable;
message = ''
services.private-storage.monitoring.tahoe requires services.private-storage.monitoring.exporters.node to provide the textfile prometheus collector.
'';
}
];
environment.systemPackages = [ pkgs.curl ];
systemd.services.tahoe-metrics-collector = {
enable = true;
description = "Tahoe metrics gathering service";
after = [ "tahoe.storage.service" ];
startAt = cfg.interval;
path = [ pkgs.coreutils pkgs.findutils pkgs.curl ];
restartIfChanged = false;
# Save to a temp file and then move atomically so the
# textfile collector won't read a partial file.
# See https://github.com/prometheus/node_exporter#textfile-collector
script = ''
set -euo pipefail
NUM_CORRUPTION_ADVISORIES=$(find /storage/corruption-advisories/ -type f | wc -l)
echo "tahoe_corruption_advisories_total $NUM_CORRUPTION_ADVISORIES" > "${cfg.outFile}.tmp"
NUM_INCIDENT_REPORTS=$(find /var/db/tahoe-lafs/storage/logs/incidents/ -type f | wc -l)
echo "tahoe_incident_reports_total $NUM_INCIDENT_REPORTS" >> "${cfg.outFile}.tmp"
curl --silent --show-error --fail-with-body "${cfg.scrapeEndpoint}" >> "${cfg.outFile}.tmp"
mv "${cfg.outFile}.tmp" "${cfg.outFile}"
'';
};
systemd.timers.tahoe-metrics-collector = {
after = [ "tahoe.storage.service" ];
};
};
}
# Codify our log data retention policy
#
# A maximum retention of 30 days conforms to the published log retention policy,
# see https://private.storage/privacy-policy/ .
{ options, lib, ... }: {
options.services.private-storage.monitoring.policy = {
logRetentionSeconds = lib.mkOption {
type = lib.types.int;
description = "How long do we retain logs (seconds)";
default = 29 * (24 * 60 * 60); # 29 days, to accomodate for the journald log rotation (1 day).
};
};
}
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "Watching the watchers",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"links": [],
"liveNow": false,
"panels": [
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
0
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "count"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "10m",
"frequency": "1m",
"handler": 1,
"name": "Scraping down",
"noDataState": "ok",
"notifications": []
},
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "Is Prometheus having problems scraping our instances? Should be zero.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "bars",
"fillOpacity": 100,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "line"
}
},
"decimals": 0,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "transparent",
"value": null
},
{
"color": "red",
"value": 0
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 6,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "10.4.6",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "count by (job, instance) (up == 0)",
"hide": false,
"interval": "",
"legendFormat": "{{job}}/{{instance}}",
"refId": "A"
}
],
"title": "Scraping failures",
"type": "timeseries"
},
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
600
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "5m",
"frequency": "1m",
"handler": 1,
"message": "A metrics text file is older than 10 minutes.",
"name": "Textcollector staleness alert",
"noDataState": "no_data",
"notifications": []
},
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"description": "Node-Exporter's TextCollector reads in plain text files containing metrics every few minutes. Make sure we're not reporting stale text files as new data - Alert if any of the text files is not getting updated.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 8,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "LocalPrometheus"
},
"exemplar": true,
"expr": "time() - node_textfile_mtime_seconds",
"interval": "",
"legendFormat": "{{instance}}/{{file}}",
"refId": "A"
}
],
"thresholds": [
{
"colorMode": "critical",
"op": "gt",
"value": 600,
"visible": true
}
],
"title": "Textfile collector freshness",
"type": "timeseries"
}
],
"refresh": false,
"schemaVersion": 39,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-30m",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Meta monitoring",
"uid": "MetaMonitoring",
"version": 1,
"weekStart": ""
}