From a86f1a4fdcd3cdc6daef389946280e57da2f91b1 Mon Sep 17 00:00:00 2001
From: Jean-Paul Calderone <exarkun@twistedmatrix.com>
Date: Thu, 1 Apr 2021 10:48:12 -0400
Subject: [PATCH] Boot VMs for the system tests one at a time

I hope this reduces the chances of running into this intermittent problem:

```
api_stripe_com# Timed out waiting for device /dev/vda, trying to mount anyway.
storage: exit status 0
(12.56 seconds)
storage: must succeed: [ -e /var/db/tahoe-lafs/storage/logs/eliot.json ]
storage: exit status 0
(0.00 seconds)
client: must succeed: set -eo pipefail; /nix/store/y56dhm5sap38iai2lkdn5n1lwn52z09d-run-client.py /tmp/client pb://rr7y46ixsg6qmck4jkkc7hke6xe4sv5f@tcp:introducer:35151/2k6
p3wrabat5jrj7otcih4cjdema4q3m http://issuer/ | systemd-cat
client: waiting for the VM to finish booting
client: connected to guest root shell
client: (connecting took 0.00 seconds)
(0.00 seconds)
api_stripe_com# [  154.358922] 9pnet: Installing 9P2000 support
api_stripe_com# [  156.637047] virtio_blk virtio8: [vda] 1048576 512-byte logical blocks (537 MB/512 MiB)
api_stripe_com# cannot check filesystem with type "auto"!
api_stripe_com# mounting /dev/vda on /...
api_stripe_com# mount: mounting /dev/vda on /mnt-root/ failed: No such file or directory
api_stripe_com# [  156.692286] Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000100
api_stripe_com# [  156.692286]
api_stripe_com# [  156.695508] CPU: 0 PID: 1 Comm: init Not tainted 4.19.80 #1-NixOS
api_stripe_com# [  156.697682] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014
api_stripe_com# [  156.701427] Call Trace:
api_stripe_com# [  156.702238]  dump_stack+0x5c/0x80
api_stripe_com# [  156.703518]  panic+0xe7/0x23b
api_stripe_com# [  156.704457]  do_exit.cold.24+0x1a/0x81
api_stripe_com# [  156.705812]  ? handle_mm_fault+0xd6/0x200
api_stripe_com# [  156.707211]  do_group_exit+0x3a/0xa0
api_stripe_com# [  156.708605]  __x64_sys_exit_group+0x14/0x20
api_stripe_com# [  156.710056]  do_syscall_64+0x4e/0x100
api_stripe_com# [  156.711387]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
api_stripe_com# [  156.713039] RIP: 0033:0x7f7c73b0f676
api_stripe_com# [  156.714201] Code: Bad RIP value.
api_stripe_com# [  156.715490] RSP: 002b:00007ffce6679da8 EFLAGS: 00000202 ORIG_RAX: 00000000000000e7
api_stripe_com# [  156.717860] RAX: ffffffffffffffda RBX: 0000000002239704 RCX: 00007f7c73b0f676
api_stripe_com# [  156.719931] RDX: 0000000000000001 RSI: 000000000000003c RDI: 0000000000000001
api_stripe_com# [  156.722315] RBP: 0000000000000001 R08: 00000000000000e7 R09: ffffffffffffff80
api_stripe_com# [  156.724720] R10: 0000000000000000 R11: 0000000000000202 R12: 00007ffce667a138
api_stripe_com# [  156.727106] R13: 00007ffce667a130 R14: 0000000000000000 R15: 0000000000000000
api_stripe_com# [  156.729583] Kernel Offset: 0xfc00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
api_stripe_com# [  156.732988] Rebooting in 1 seconds..
vde_switch: EOF data port: Interrupted system call
```
---
 nixos/modules/tests/private-storage.nix | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/nixos/modules/tests/private-storage.nix b/nixos/modules/tests/private-storage.nix
index bf15b397..e085f8bc 100644
--- a/nixos/modules/tests/private-storage.nix
+++ b/nixos/modules/tests/private-storage.nix
@@ -177,8 +177,21 @@ in {
   # Test the machines with a Perl program (sobbing).
   testScript =
     ''
-      # Start booting all the VMs in parallel to speed up operations down below.
-      startAll;
+      # Boot the VMs.  We used to do them all in parallel but the boot
+      # sequence got flaky at some point for some reason I don't
+      # understand. :/ It might be related to this:
+      #
+      # https://discourse.nixos.org/t/nixos-ppc64le-vm-does-not-have-dev-vda-device/11548/9
+      #
+      # See <nixpkgs/nixos/modules/virtualisation/qemu-vm.nix> for the Nix
+      # that constructs the QEMU command that gets run.
+      #
+      # Boot them one at a time for now.
+      $issuer->connect();
+      $introducer->connect();
+      $storage->connect();
+      $client->connect();
+      $api_stripe_com->connect();
 
       # The issuer and the storage server should accept SSH connections.  This
       # doesn't prove it is so but if it fails it's a pretty good indication
-- 
GitLab