From e24182764def764b84146c62a69059ef60f3af86 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone <exarkun@twistedmatrix.com> Date: Mon, 26 Sep 2022 12:22:39 -0400 Subject: [PATCH] Declare that the Tahoe unit *Requires* its corresponding Socket unit Otherwise Tahoe might start without the socket unit and not have access to the socket that's bound to the port and so just fail to start. --- nixos/modules/private-storage.nix | 18 ++++++++---------- nixos/modules/tahoe.nix | 21 ++++++++++++++++++--- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/nixos/modules/private-storage.nix b/nixos/modules/private-storage.nix index 7b7892cd..3a716cf0 100644 --- a/nixos/modules/private-storage.nix +++ b/nixos/modules/private-storage.nix @@ -111,21 +111,19 @@ in node = # XXX Should try to name that is unique across the grid. { nickname = "${storage-node-name}"; - # We have the web port active because the CLI uses it. We may - # eventually turn this off, or at least have it off by default (with - # an option to turn it on). I don't know how much we'll use the CLI - # on the nodes. Maybe very little? Or maybe it will be part of a - # health check for the node... In any case, we tell it to bind to - # localhost so no one *else* can use it. And the principle of the - # web interface is that merely having access to it doesn't grant - # access to any data. It does grant access to storage capabilities - # but with our plugin configuration you still need ZKAPs to use - # those... + + # We have the web port active because the CLI uses it and because it + # exposes a metrics endpoint for our monitoring system. The actual + # port configuration lives in systemd so that it can order binding + # the socket correctly with other dependencies (which we can't + # reliably do with Tahoe without a bunch of other work). "web.port" = "systemd:domain=INET:index=0"; + # We have to tell Tahoe-LAFS where to listen for Foolscap # connections for the storage protocol. We have to tell it twice. # First, in the syntax which it uses to listen. "tub.port" = "tcp:${toString cfg.publicStoragePort}"; + # Second, in the syntax it advertises to in the fURL. "tub.location" = "tcp:${cfg.publicAddress}:${toString cfg.publicStoragePort}"; }; diff --git a/nixos/modules/tahoe.nix b/nixos/modules/tahoe.nix index 3c0691f7..51c86954 100644 --- a/nixos/modules/tahoe.nix +++ b/nixos/modules/tahoe.nix @@ -188,7 +188,6 @@ in systemd.sockets."tahoe.storage" = { description = "Tahoe Web Server Socket"; wantedBy = [ "sockets.target" ]; - before = [ "multi-user.target" ]; socketConfig = { ListenStream = "127.0.0.1:3456"; }; @@ -203,10 +202,18 @@ in eliotLog = "file:${nodedir}/logs/eliot.json,rotate_length=${toString (1024 * 1024 * 32)},max_rotated_files=32"; in nameValuePair "tahoe.${node}" { description = "Tahoe LAFS node ${node}"; + + # We are partially socket activated but only for the web API port. + # For the actual storage service port, we bind ourselves. So make + # sure we actually do start up early in case storage requests come wantedBy = [ "multi-user.target" ]; + path = [ settings.package ]; - restartTriggers = [ - config.environment.etc."tahoe-lafs/${node}.cfg".source ]; + + # We don't know how to re-read our configuration file at runtime + # so restart if it ever changes. + restartTriggers = [ config.environment.etc."tahoe-lafs/${node}.cfg".source ]; + serviceConfig = { Type = "simple"; PIDFile = pidfile; @@ -254,6 +261,14 @@ in # now. So it makes sense to have the limit be 2^15 right now. LimitNOFILE = 32768; }; + + unitConfig = { + # Our config doesn't know how to bind all of its sockets on its + # own so don't start without the systemd units that *do* know + # how to bind them. + Requires = [ "tahoe.${node}.socket" ]; + }; + preStart = let created = "${nodedir}.created"; -- GitLab