From e24182764def764b84146c62a69059ef60f3af86 Mon Sep 17 00:00:00 2001
From: Jean-Paul Calderone <exarkun@twistedmatrix.com>
Date: Mon, 26 Sep 2022 12:22:39 -0400
Subject: [PATCH] Declare that the Tahoe unit *Requires* its corresponding
 Socket unit

Otherwise Tahoe might start without the socket unit and not have access to the
socket that's bound to the port and so just fail to start.
---
 nixos/modules/private-storage.nix | 18 ++++++++----------
 nixos/modules/tahoe.nix           | 21 ++++++++++++++++++---
 2 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/nixos/modules/private-storage.nix b/nixos/modules/private-storage.nix
index 7b7892cd..3a716cf0 100644
--- a/nixos/modules/private-storage.nix
+++ b/nixos/modules/private-storage.nix
@@ -111,21 +111,19 @@ in
         node =
         # XXX Should try to name that is unique across the grid.
         { nickname = "${storage-node-name}";
-          # We have the web port active because the CLI uses it.  We may
-          # eventually turn this off, or at least have it off by default (with
-          # an option to turn it on).  I don't know how much we'll use the CLI
-          # on the nodes.  Maybe very little?  Or maybe it will be part of a
-          # health check for the node...  In any case, we tell it to bind to
-          # localhost so no one *else* can use it.  And the principle of the
-          # web interface is that merely having access to it doesn't grant
-          # access to any data.  It does grant access to storage capabilities
-          # but with our plugin configuration you still need ZKAPs to use
-          # those...
+
+          # We have the web port active because the CLI uses it and because it
+          # exposes a metrics endpoint for our monitoring system.  The actual
+          # port configuration lives in systemd so that it can order binding
+          # the socket correctly with other dependencies (which we can't
+          # reliably do with Tahoe without a bunch of other work).
           "web.port" = "systemd:domain=INET:index=0";
+
           # We have to tell Tahoe-LAFS where to listen for Foolscap
           # connections for the storage protocol.  We have to tell it twice.
           # First, in the syntax which it uses to listen.
           "tub.port" = "tcp:${toString cfg.publicStoragePort}";
+
           # Second, in the syntax it advertises to in the fURL.
           "tub.location" = "tcp:${cfg.publicAddress}:${toString cfg.publicStoragePort}";
         };
diff --git a/nixos/modules/tahoe.nix b/nixos/modules/tahoe.nix
index 3c0691f7..51c86954 100644
--- a/nixos/modules/tahoe.nix
+++ b/nixos/modules/tahoe.nix
@@ -188,7 +188,6 @@ in
         systemd.sockets."tahoe.storage" = {
           description = "Tahoe Web Server Socket";
           wantedBy = [ "sockets.target" ];
-          before = [ "multi-user.target" ];
           socketConfig = {
             ListenStream = "127.0.0.1:3456";
           };
@@ -203,10 +202,18 @@ in
             eliotLog = "file:${nodedir}/logs/eliot.json,rotate_length=${toString (1024 * 1024 * 32)},max_rotated_files=32";
           in nameValuePair "tahoe.${node}" {
             description = "Tahoe LAFS node ${node}";
+
+            # We are partially socket activated but only for the web API port.
+            # For the actual storage service port, we bind ourselves.  So make
+            # sure we actually do start up early in case storage requests come
             wantedBy = [ "multi-user.target" ];
+
             path = [ settings.package ];
-            restartTriggers = [
-              config.environment.etc."tahoe-lafs/${node}.cfg".source ];
+
+            # We don't know how to re-read our configuration file at runtime
+            # so restart if it ever changes.
+            restartTriggers = [ config.environment.etc."tahoe-lafs/${node}.cfg".source ];
+
             serviceConfig = {
               Type = "simple";
               PIDFile = pidfile;
@@ -254,6 +261,14 @@ in
               # now. So it makes sense to have the limit be 2^15 right now.
               LimitNOFILE = 32768;
             };
+
+            unitConfig = {
+              # Our config doesn't know how to bind all of its sockets on its
+              # own so don't start without the systemd units that *do* know
+              # how to bind them.
+              Requires = [ "tahoe.${node}.socket" ];
+            };
+
             preStart =
             let
               created = "${nodedir}.created";
-- 
GitLab