diff --git a/nixos/lib/default.nix b/nixos/lib/default.nix new file mode 100644 index 0000000000000000000000000000000000000000..3ebaf60b7d536589d46a745d2945cbef96b2b554 --- /dev/null +++ b/nixos/lib/default.nix @@ -0,0 +1,6 @@ +{ callPackage }: +{ + /* A library of tools useful for writing tests with Nix. + */ + testing = callPackage ./testing.nix { }; +} diff --git a/nixos/lib/testing.nix b/nixos/lib/testing.nix new file mode 100644 index 0000000000000000000000000000000000000000..d89717a0a76f93bb6062ad63c6cfdbb91c12c746 --- /dev/null +++ b/nixos/lib/testing.nix @@ -0,0 +1,23 @@ +{ ...}: +{ + /* Returns a string that runs tests from the Python code at the given path. + + The Python code is loaded using *execfile* and the *test* global it + defines is called with the given keyword arguments. + + Type: makeTestScript :: Path -> AttrSet -> String + + Example: + testScript = (makeTestScript ./test_foo.py { x = "y"; }); + */ + makeTestScript = { testpath, kwargs ? {} }: + '' + # The driver runs pyflakes on this script before letting it + # run... Convince pyflakes that there is a `test` name. + test = None + with open("${testpath}") as testfile: + exec(testfile.read(), globals()) + # For simple types, JSON is compatible with Python syntax! + test(**${builtins.toJSON kwargs}) + ''; +} diff --git a/nixos/pkgs/default.nix b/nixos/pkgs/default.nix index bfc30b36101c220434606832127a7e8ca0a70490..435095f7890b7ac41afaebe050a756c4b4887641 100644 --- a/nixos/pkgs/default.nix +++ b/nixos/pkgs/default.nix @@ -5,6 +5,8 @@ # pkgs.callPackage ./nixos/pkgs {buildPlatform, hostPlatform, callPackage}: { + lib = callPackage ../lib {}; + leasereport = callPackage ./leasereport {}; # `privatestorage` is a derivation with a good Tahoe+ZKAP environment # that is exposed by ZKAPAuthorizer. diff --git a/nixos/tests/private-storage.nix b/nixos/tests/private-storage.nix index a208ce249f1f1227f966e38a1c62ab6166d187f8..eaff1ed5320607e6aabc94226804aea4b7186b0a 100644 --- a/nixos/tests/private-storage.nix +++ b/nixos/tests/private-storage.nix @@ -1,25 +1,14 @@ { pkgs }: let + ourpkgs = pkgs.callPackage ../pkgs { }; + sshPrivateKey = ./probeuser_ed25519; sshPublicKey = ./probeuser_ed25519.pub; + sshUsers = { root = (builtins.readFile sshPublicKey); probeuser = (builtins.readFile sshPublicKey); }; - # Generate a command which can be used with runOnNode to ssh to the given - # host. - ssh = username: hostname: [ - "cp" sshPrivateKey "/tmp/ssh_key" ";" - "chmod" "0400" "/tmp/ssh_key" ";" - "ssh" "-oStrictHostKeyChecking=no" "-i" "/tmp/ssh_key" "${username}@${hostname}" ":" - ]; - - # Separate helper programs so we can write as little python inside a string - # inside a nix expression as possible. - run-introducer = ./run-introducer.py; - run-client = ./run-client.py; - get-passes = ./get-passes.py; - exercise-storage = ./exercise-storage.py; # This is a test double of the Stripe API server. It is extremely simple. # It barely knows how to respond to exactly the API endpoints we use, @@ -72,18 +61,6 @@ let networking.firewall.enable = false; networking.dhcpcd.enable = false; }; - - # Return a python program fragment to run a shell command on one of the nodes. - # The first argument is the name of the node. The second is a list of the - # argv to run. - # - # The program's output is piped to systemd-cat and the python fragment - # evaluates to success if the command exits with a success status. - runOnNode = node: argv: - let - command = builtins.concatStringsSep " " argv; - in - "${node}.succeed('set -eo pipefail; ${command} | systemd-cat')"; in { # https://nixos.org/nixos/manual/index.html#sec-nixos-tests # https://nixos.mayflower.consulting/blog/2019/07/11/leveraging-nixos-tests-in-your-project/ @@ -177,134 +154,16 @@ in { }; # Test the machines with a Python program. - testScript = '' - # Boot the VMs. We used to do them all in parallel but the boot - # sequence got flaky at some point for some reason I don't - # understand. :/ It might be related to this: - # - # https://discourse.nixos.org/t/nixos-ppc64le-vm-does-not-have-dev-vda-device/11548/9 - # - # See <nixpkgs/nixos/modules/virtualisation/qemu-vm.nix> for the Nix - # that constructs the QEMU command that gets run. - # - # Boot them one at a time for now. - issuer.connect() - introducer.connect() - storage.connect() - client.connect() - api_stripe_com.connect() - - # The issuer and the storage server should accept SSH connections. This - # doesn't prove it is so but if it fails it's a pretty good indication - # it isn't so. - storage.wait_for_open_port(22) - ${runOnNode "issuer" (ssh "probeuser" "storage")} - ${runOnNode "issuer" (ssh "root" "storage")} - issuer.wait_for_open_port(22) - ${runOnNode "storage" (ssh "probeuser" "issuer")} - ${runOnNode "storage" (ssh "root" "issuer")} - - # Set up a Tahoe-LAFS introducer. - introducer.copy_from_host('${pemFile}', '/tmp/node.pem') - - try: - ${runOnNode "introducer" [ run-introducer "/tmp/node.pem" (toString introducerPort) introducerFURL ]} - except: - code, output = introducer.execute('cat /tmp/stdout /tmp/stderr') - introducer.log(output) - raise - - # - # Get a Tahoe-LAFS storage server up. - # - code, version = storage.execute('tahoe --version') - storage.log(version) - - # The systemd unit should reach the running state. - storage.wait_for_unit('tahoe.storage.service') - - # Some while after that the Tahoe-LAFS node should listen on the web API - # port. The port number here has to agree with the port number set in - # the private-storage.nix module. - storage.wait_for_open_port(3456) - - # Once the web API is listening it should be possible to scrape some - # status from the node if it is really working. - storage.succeed('tahoe -d /var/db/tahoe-lafs/storage status') - - # It should have Eliot logging turned on as well. - storage.succeed('[ -e /var/db/tahoe-lafs/storage/logs/eliot.json ]') - - # - # Storage appears to be working so try to get a client to speak with it. - # - ${runOnNode "client" [ run-client "/tmp/client" introducerFURL issuerURL ristrettoPublicKey ]} - client.wait_for_open_port(3456) - - # Make sure the fake Stripe API server is ready for requests. - try: - api_stripe_com.wait_for_unit("api.stripe.com") - except: - code, output = api_stripe_com.execute('journalctl -u api.stripe.com') - api_stripe_com.log(output) - raise - - # Get some ZKAPs from the issuer. - try: - ${runOnNode "client" [ - get-passes - "http://127.0.0.1:3456" - "/tmp/client/private/api_auth_token" - issuerURL - voucher - ]} - except: - code, output = client.execute('cat /tmp/stdout /tmp/stderr'); - client.log(output) - - # Dump the fake Stripe API server logs, too, since the error may arise - # from a PaymentServer/Stripe interaction. - code, output = api_stripe_com.execute('journalctl -u api.stripe.com') - api_stripe_com.log(output) - raise - - # The client should be prepped now. Make it try to use some storage. - try: - ${runOnNode "client" [ exercise-storage "/tmp/client" ]} - except: - code, output = client.execute('cat /tmp/stdout /tmp/stderr') - client.log(output) - raise - - # It should be possible to restart the storage service without the - # storage node fURL changing. - try: - furlfile = '/var/db/tahoe-lafs/storage/private/storage-plugin.privatestorageio-zkapauthz-v1.furl' - before = storage.execute('cat ' + furlfile) - ${runOnNode "storage" [ "systemctl" "restart" "tahoe.storage" ]} - after = storage.execute('cat ' + furlfile) - if (before != after): - raise Exception('fURL changes after storage node restart') - except: - code, output = storage.execute('cat /tmp/stdout /tmp/stderr') - storage.log(output) - raise - - # The client should actually still work, too. - try: - ${runOnNode "client" [ exercise-storage "/tmp/client" ]} - except: - code, output = client.execute('cat /tmp/stdout /tmp/stderr') - client.log(output) - raise - - # The issuer metrics should be accessible from the monitoring network. - issuer.execute('ifconfig lo:fauxvpn 172.23.23.2/24') - issuer.wait_until_succeeds("nc -z 172.23.23.2 80") - issuer.succeed('curl --silent --insecure --fail --output /dev/null http://172.23.23.2/metrics') - # The issuer metrics should NOT be accessible from any other network. - issuer.fail('curl --silent --insecure --fail --output /dev/null http://localhost/metrics') - client.fail('curl --silent --insecure --fail --output /dev/null http://issuer/metrics') - issuer.execute('ifconfig lo:fauxvpn down') - ''; + testScript = ourpkgs.lib.testing.makeTestScript { + testpath = ./test_privatestorage.py; + kwargs = { + inherit sshPrivateKey pemFile introducerPort introducerFURL issuerURL ristrettoPublicKey voucher; + + # Supply some helper programs to help the tests stay a bit higher level. + run_introducer = ./run-introducer.py; + run_client = ./run-client.py; + get_passes = ./get-passes.py; + exercise_storage = ./exercise-storage.py; + }; + }; } diff --git a/nixos/tests/tahoe.nix b/nixos/tests/tahoe.nix index e39fd6d3fcb776e8e5215bb1264e08e2b7306c1f..a007e65efd2d6bee8ab4adba9df3cb2901f53526 100644 --- a/nixos/tests/tahoe.nix +++ b/nixos/tests/tahoe.nix @@ -1,5 +1,8 @@ -{ ... }: - { +{ pkgs }: +let + ourpkgs = pkgs.callPackage ../pkgs { }; +in +{ nodes = { storage = { config, pkgs, ourpkgs, ... }: { imports = [ @@ -23,50 +26,7 @@ }; }; }; - testScript = '' - start_all() - - # After the service starts, destroy the "created" marker to force it to - # re-create its internal state. - storage.wait_for_open_port(4001) - storage.succeed("systemctl stop tahoe.storage") - storage.succeed("rm /var/db/tahoe-lafs/storage.created") - storage.succeed("systemctl start tahoe.storage") - - # After it starts up again, verify it has consistent internal state and a - # backup of the prior state. - storage.wait_for_open_port(4001) - storage.succeed("[ -e /var/db/tahoe-lafs/storage ]") - storage.succeed("[ -e /var/db/tahoe-lafs/storage.created ]") - storage.succeed("[ -e /var/db/tahoe-lafs/storage.1 ]") - storage.succeed("[ -e /var/db/tahoe-lafs/storage.1/private/node.privkey ]") - storage.succeed("[ -e /var/db/tahoe-lafs/storage.1/private/node.pem ]") - storage.succeed("[ ! -e /var/db/tahoe-lafs/storage.2 ]") - - # Stop it again, once again destroy the "created" marker, and this time also - # jam some partial state in the way that will need cleanup. - storage.succeed("systemctl stop tahoe.storage") - storage.succeed("rm /var/db/tahoe-lafs/storage.created") - storage.succeed("mkdir -p /var/db/tahoe-lafs/storage.atomic/partial") - try: - storage.succeed("systemctl start tahoe.storage") - except: - x, y = storage.execute("journalctl -u tahoe.storage") - storage.log(y) - raise - - # After it starts up again, verify it has consistent internal state and - # backups of the prior two states. It also has no copy of the inconsistent - # state because it could never have been used. - storage.wait_for_open_port(4001) - storage.succeed("[ -e /var/db/tahoe-lafs/storage ]") - storage.succeed("[ -e /var/db/tahoe-lafs/storage.created ]") - storage.succeed("[ -e /var/db/tahoe-lafs/storage.1 ]") - storage.succeed("[ -e /var/db/tahoe-lafs/storage.2 ]") - storage.succeed("[ -e /var/db/tahoe-lafs/storage.2/private/node.privkey ]") - storage.succeed("[ -e /var/db/tahoe-lafs/storage.2/private/node.pem ]") - storage.succeed("[ ! -e /var/db/tahoe-lafs/storage.atomic ]") - storage.succeed("[ ! -e /var/db/tahoe-lafs/storage/partial ]") - storage.succeed("[ ! -e /var/db/tahoe-lafs/storage.3 ]") - ''; + testScript = ourpkgs.lib.testing.makeTestScript { + testpath = ./test_tahoe.py; + }; } diff --git a/nixos/tests/test_privatestorage.py b/nixos/tests/test_privatestorage.py new file mode 100644 index 0000000000000000000000000000000000000000..dc060d51f1815f549485d3415b3b3af97d5c79af --- /dev/null +++ b/nixos/tests/test_privatestorage.py @@ -0,0 +1,148 @@ +def runOnNode(node, argv): + """ + Run a shell command on one of the nodes. The first argument is the name + of the node. The second is a list of the argv to run. + + The program's output is piped to systemd-cat and the python fragment + evaluates to success if the command exits with a success status. + """ + try: + node.succeed('set -eo pipefail; {} | systemd-cat'.format(" ".join(argv))) + except Exception as e: + code, output = node.execute('cat /tmp/stdout /tmp/stderr') + introducer.log(output) + raise + +def ssh(username, sshPrivateKey, hostname): + """ + Generate a command which can be used with runOnNode to ssh to the given + host. + """ + return [ + "cp", sshPrivateKey, "/tmp/ssh_key", ";", + "chmod", "0400", "/tmp/ssh_key", ";", + "ssh", "-oStrictHostKeyChecking=no", "-i", "/tmp/ssh_key", + "{username}@{hostname}".format(username=username, hostname=hostname), ":", + ] + +def test( + sshPrivateKey, + pemFile, + run_introducer, + run_client, + get_passes, + exercise_storage, + introducerPort, + introducerFURL, + issuerURL, + ristrettoPublicKey, + voucher, +): + """ + """ + # Boot the VMs. We used to do them all in parallel but the boot + # sequence got flaky at some point for some reason I don't + # understand. :/ It might be related to this: + # + # https://discourse.nixos.org/t/nixos-ppc64le-vm-does-not-have-dev-vda-device/11548/9 + # + # See <nixpkgs/nixos/modules/virtualisation/qemu-vm.nix> for the Nix + # that constructs the QEMU command that gets run. + # + # Boot them one at a time for now. + issuer.connect() + introducer.connect() + storage.connect() + client.connect() + api_stripe_com.connect() + + # The issuer and the storage server should accept SSH connections. This + # doesn't prove it is so but if it fails it's a pretty good indication + # it isn't so. + storage.wait_for_open_port(22) + runOnNode(issuer, ssh("probeuser", sshPrivateKey, "storage")) + runOnNode(issuer, ssh("root", sshPrivateKey, "storage")) + issuer.wait_for_open_port(22) + runOnNode(storage, ssh("probeuser", sshPrivateKey, "issuer")) + runOnNode(storage, ssh("root", sshPrivateKey, "issuer")) + + # Set up a Tahoe-LAFS introducer. + introducer.copy_from_host(pemFile, '/tmp/node.pem') + + runOnNode(introducer, [run_introducer, "/tmp/node.pem", str(introducerPort), introducerFURL]) + + # + # Get a Tahoe-LAFS storage server up. + # + code, version = storage.execute('tahoe --version') + storage.log(version) + + # The systemd unit should reach the running state. + storage.wait_for_unit('tahoe.storage.service') + + # Some while after that the Tahoe-LAFS node should listen on the web API + # port. The port number here has to agree with the port number set in + # the private-storage.nix module. + storage.wait_for_open_port(3456) + + # Once the web API is listening it should be possible to scrape some + # status from the node if it is really working. + storage.succeed('tahoe -d /var/db/tahoe-lafs/storage status') + + # It should have Eliot logging turned on as well. + storage.succeed('[ -e /var/db/tahoe-lafs/storage/logs/eliot.json ]') + + # + # Storage appears to be working so try to get a client to speak with it. + # + runOnNode(client, [run_client, "/tmp/client", introducerFURL, issuerURL, ristrettoPublicKey]) + client.wait_for_open_port(3456) + + # Make sure the fake Stripe API server is ready for requests. + try: + api_stripe_com.wait_for_unit("api.stripe.com") + except: + code, output = api_stripe_com.execute('journalctl -u api.stripe.com') + api_stripe_com.log(output) + raise + + # Get some ZKAPs from the issuer. + try: + runOnNode(client, [ + get_passes, + "http://127.0.0.1:3456", + "/tmp/client/private/api_auth_token", + issuerURL, + voucher, + ]) + except: + # Dump the fake Stripe API server logs, too, since the error may arise + # from a PaymentServer/Stripe interaction. + code, output = api_stripe_com.execute('journalctl -u api.stripe.com') + api_stripe_com.log(output) + + raise + + # The client should be prepped now. Make it try to use some storage. + runOnNode(client, [exercise_storage, "/tmp/client"]) + + # It should be possible to restart the storage service without the + # storage node fURL changing. + furlfile = '/var/db/tahoe-lafs/storage/private/storage-plugin.privatestorageio-zkapauthz-v1.furl' + before = storage.execute('cat ' + furlfile) + runOnNode(storage, ["systemctl", "restart", "tahoe.storage"]) + after = storage.execute('cat ' + furlfile) + if (before != after): + raise Exception('fURL changes after storage node restart') + + # The client should actually still work, too. + runOnNode(client, [exercise_storage, "/tmp/client"]) + + # The issuer metrics should be accessible from the monitoring network. + issuer.execute('ifconfig lo:fauxvpn 172.23.23.2/24') + issuer.wait_until_succeeds("nc -z 172.23.23.2 80") + issuer.succeed('curl --silent --insecure --fail --output /dev/null http://172.23.23.2/metrics') + # The issuer metrics should NOT be accessible from any other network. + issuer.fail('curl --silent --insecure --fail --output /dev/null http://localhost/metrics') + client.fail('curl --silent --insecure --fail --output /dev/null http://issuer/metrics') + issuer.execute('ifconfig lo:fauxvpn down') diff --git a/nixos/tests/test_tahoe.py b/nixos/tests/test_tahoe.py new file mode 100644 index 0000000000000000000000000000000000000000..c5190c78b04cb2cc59b5275e45869dd53e0e81c3 --- /dev/null +++ b/nixos/tests/test_tahoe.py @@ -0,0 +1,45 @@ +def test(): + start_all() + + # After the service starts, destroy the "created" marker to force it to + # re-create its internal state. + storage.wait_for_open_port(4001) + storage.succeed("systemctl stop tahoe.storage") + storage.succeed("rm /var/db/tahoe-lafs/storage.created") + storage.succeed("systemctl start tahoe.storage") + + # After it starts up again, verify it has consistent internal state and a + # backup of the prior state. + storage.wait_for_open_port(4001) + storage.succeed("[ -e /var/db/tahoe-lafs/storage ]") + storage.succeed("[ -e /var/db/tahoe-lafs/storage.created ]") + storage.succeed("[ -e /var/db/tahoe-lafs/storage.1 ]") + storage.succeed("[ -e /var/db/tahoe-lafs/storage.1/private/node.privkey ]") + storage.succeed("[ -e /var/db/tahoe-lafs/storage.1/private/node.pem ]") + storage.succeed("[ ! -e /var/db/tahoe-lafs/storage.2 ]") + + # Stop it again, once again destroy the "created" marker, and this time also + # jam some partial state in the way that will need cleanup. + storage.succeed("systemctl stop tahoe.storage") + storage.succeed("rm /var/db/tahoe-lafs/storage.created") + storage.succeed("mkdir -p /var/db/tahoe-lafs/storage.atomic/partial") + try: + storage.succeed("systemctl start tahoe.storage") + except: + x, y = storage.execute("journalctl -u tahoe.storage") + storage.log(y) + raise + + # After it starts up again, verify it has consistent internal state and + # backups of the prior two states. It also has no copy of the inconsistent + # state because it could never have been used. + storage.wait_for_open_port(4001) + storage.succeed("[ -e /var/db/tahoe-lafs/storage ]") + storage.succeed("[ -e /var/db/tahoe-lafs/storage.created ]") + storage.succeed("[ -e /var/db/tahoe-lafs/storage.1 ]") + storage.succeed("[ -e /var/db/tahoe-lafs/storage.2 ]") + storage.succeed("[ -e /var/db/tahoe-lafs/storage.2/private/node.privkey ]") + storage.succeed("[ -e /var/db/tahoe-lafs/storage.2/private/node.pem ]") + storage.succeed("[ ! -e /var/db/tahoe-lafs/storage.atomic ]") + storage.succeed("[ ! -e /var/db/tahoe-lafs/storage/partial ]") + storage.succeed("[ ! -e /var/db/tahoe-lafs/storage.3 ]")