diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d6b06fae42f6e738725238fac59617aeb161dfd4..4494a1656146337cf7c64c44eb1081ef172d39e1 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -5,20 +5,26 @@ default: - "nixos" - "shell" +variables: + # https://docs.gitlab.com/ee/ci/runners/configure_runners.html#job-stages-attempts + GET_SOURCES_ATTEMPTS: 10 + docs: stage: "build" script: - - "nix-build docs.nix" - - "cp --recursive --no-preserve=mode result/docs/. docs/build/" + - "nix-build --attr docs --out-link result-docs" + # GitLab wants to lchown artifacts. It can't do that to store paths. Get + # a copy of the docs outside of the store. + - "cp --recursive --no-preserve=mode ./result-docs/docs ./docs-build/" artifacts: paths: - - "docs/build/" + - "./docs-build/" expose_as: "documentation" unit-tests: stage: "test" script: - - "nix-shell --run 'nix-build nixos/unit-tests.nix' && cat result" + - "nix-build --attr unit-tests && cat result" .morph-build: &MORPH_BUILD stage: "test" @@ -68,7 +74,7 @@ system-tests: stage: "test" timeout: "3 hours" script: - - "nix-shell --run 'nix-build nixos/system-tests.nix'" + - "nix-build --attr system-tests" # A template for a job that can update one of the grids. .update-grid: &UPDATE_GRID diff --git a/DEPLOYMENT-NOTES.rst b/DEPLOYMENT-NOTES.rst index 5de83386dbb939cc3cfe2a8b68c198f218934933..fbde887b5ecfc5f1fa5b520df71a6a0ecc1980fe 100644 --- a/DEPLOYMENT-NOTES.rst +++ b/DEPLOYMENT-NOTES.rst @@ -1,6 +1,44 @@ Deployment notes ================ +- 2021-12-20 + + `https://whetstone.privatestorage.io/privatestorage/privatestorageops/-/issues/399`_ requires moving the PaymentServer database on the ``payments`` host onto a new dedicated filesystem. + + Follow these steps *before* deploying this version of PrivateStorageio: + + 0. Deploy the `PrivateStorageOps change <https://whetstone.privatestorage.io/privatestorage/privatestorageops/-/merge_requests/169>`_ that creates a new dedicated volume. + + 1. Put a disk label on the new dedicated volume :: + + nix-shell -p parted --run 'parted /dev/nvme1n1 mklabel msdos' + + 2. Put a properly aligned partition in the new disk label :: + + nix-shell -p parted --run 'parted /dev/nvme1n1 mkpart primary ext2 4096s 4G' + + 3. Create a labeled filesystem on the partition :: + + mkfs.ext4 -L zkapissuer-data /dev/nvme1n1p1 + + 4. Deploy the PrivateStorageio update. + + 5. Move the database file to the new location :: + + mv -iv /var/lib/zkapissuer/vouchers.sqlite3 /var/lib/zkapissuer-v2 + + 6. Clean up the old state directory :: + + rm -ir /var/lib/zkapissuer + + 7. Start the PaymentServer service (not running because its path assertions were not met earlier) :: + + systemctl start zkapissuer + +- 2021-10-12 The secret in ``private-keys/grafana-slack-url`` needs to be changed to remove the ``SLACKURL=`` prefix. + +- 2021-09-30 `Enable alerting <https://whetstone.privatestorage.io/privatestorage/PrivateStorageio/-/merge_requests/185>`_ needs a secret in ``private-keys/grafana-slack-url`` looking like the template in ``morph/grid/local/private-keys/grafana-slack-url`` and pointing to the secret API endpoint URL saved in `this 1Password entry <https://privatestorage.1password.com/vaults/7flqasy5hhhmlbtp5qozd3j4ga/allitems/cgznskz2oix2tyx5xyntwaos5i>`_ (or create a new secret URL at https://www.slack.com/apps/A0F7XDUAZ). + - 2021-09-07 `Manage access to payment metrics <https://whetstone.privatestorage.io/privatestorage/PrivateStorageio/-/merge_requests/146>`_ requires moving and chown'ing the PaymentServer database on the ``payments`` host:: mkdir /var/lib/zkapissuer @@ -11,4 +49,3 @@ Deployment notes chmod 750 /var/lib/zkapissuer chmod 640 /var/lib/zkapissuer/vouchers.sqlite3 - diff --git a/ci-tools/vulnerability-scan b/ci-tools/vulnerability-scan index 48bf51e071a398f37565717a22b2066d3f905fbe..67e1a21263fa65843b34d185884ea6df2596220a 100755 --- a/ci-tools/vulnerability-scan +++ b/ci-tools/vulnerability-scan @@ -32,6 +32,12 @@ else fi ' +# The version (1.9.6) of vulnix in nixos-21.05 incorrectly collapses +# derivations with the same name+version, but different sets of patches +# applied. Therefore, we use a recent nixos-unstable version that has a newer +# version of vulnix included. +export NIX_PATH=nixpkgs=https://api.github.com/repos/NixOS/nixpkgs/tarball/ee084c02040e864eeeb4cf4f8538d92f7c675671 + # vulnix exits with an error status if there are vulnerabilities. We told # GitLab to allow this by setting `allow_failure` to true in the GitLab CI # config. vulnix exit status indicates what vulnix thinks happened. If we diff --git a/default.nix b/default.nix new file mode 100644 index 0000000000000000000000000000000000000000..6441675a243e22e6154267c656652c8d8575940e --- /dev/null +++ b/default.nix @@ -0,0 +1,14 @@ +{ pkgs ? import ./nixpkgs-2105.nix { } }: +{ + # Render the project documentation source to some presentation format (ie, + # html) with Sphinx. + docs = pkgs.callPackage ./docs.nix { }; + + # Run some system integration tests in VMs covering some of the software + # we're integrating (ie, application functionality). + system-tests = pkgs.callPackage ./nixos/system-tests.nix { }; + + # Run some unit tests of the Nix that ties all of these things together (ie, + # PrivateStorageio-internal library functionality). + unit-tests = pkgs.callPackage ./nixos/unit-tests.nix { }; +} diff --git a/docs.nix b/docs.nix index 4c8b230a7eddb462bf47a4c3ee591e64fb3ce1ff..b13c7b58c100553c522cb71912089c6fdbfaed4b 100644 --- a/docs.nix +++ b/docs.nix @@ -1,2 +1,20 @@ -{ pkgs ? import ./nixpkgs-2105.nix { } }: -pkgs.callPackage ./privatestorageio.nix { } +{ stdenv, lib, graphviz, plantuml, python3, sphinx }: +let + pyenv = python3.withPackages (ps: [ ps.sphinx ps.sphinxcontrib_plantuml ]); +in +stdenv.mkDerivation rec { + version = "0.0"; + name = "privatestorageio-${version}"; + src = lib.cleanSource ./.; + + phases = [ "unpackPhase" "buildPhase" ]; + + depsBuildBuild = [ + graphviz + plantuml + ]; + + buildPhase = '' + ${pyenv}/bin/sphinx-build -W docs/ $out/docs + ''; +} diff --git a/docs/build/.gitignore b/docs/_static/.gitignore similarity index 100% rename from docs/build/.gitignore rename to docs/_static/.gitignore diff --git a/docs/source/_static/logo-ps.svg b/docs/_static/logo-ps.svg similarity index 100% rename from docs/source/_static/logo-ps.svg rename to docs/_static/logo-ps.svg diff --git a/docs/source/_static/.gitignore b/docs/_templates/.gitignore similarity index 100% rename from docs/source/_static/.gitignore rename to docs/_templates/.gitignore diff --git a/docs/source/conf.py b/docs/conf.py similarity index 98% rename from docs/source/conf.py rename to docs/conf.py index 66aa921e2ba799e1b1b4d8e7a778ab07ee07a73b..747a90a8cc039e65fd01c3d598170c001599c1c8 100644 --- a/docs/source/conf.py +++ b/docs/conf.py @@ -20,7 +20,7 @@ # -- Project information ----------------------------------------------------- project = 'PrivateStorageio' -copyright = '2019, PrivateStorage.io, LLC' +copyright = '2021, PrivateStorage.io, LLC' author = 'PrivateStorage.io, LLC' # The short X.Y version @@ -38,8 +38,10 @@ release = '0.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. + extensions = [ "sphinx.ext.graphviz", + "sphinxcontrib.plantuml", ] # Add any paths that contain templates here, relative to this directory. diff --git a/docs/dev/README.rst b/docs/dev/README.rst new file mode 100644 index 0000000000000000000000000000000000000000..29eb38e1f1695084d3276d41d4a063be4a53a015 --- /dev/null +++ b/docs/dev/README.rst @@ -0,0 +1,187 @@ +Developer documentation +======================= + +Building +-------- + +The build system uses `Nix`_ which must be installed before anything can be built. +Start by setting up the development/operations environment:: + + $ nix-shell + +Testing +------- + +The test system uses `Nix`_ which must be installed before any tests can be run. + +Unit tests are run using this command:: + + $ nix-build --attr unit-tests + +Unit tests are also run on CI. + +The system tests are run using this command:: + + $ nix-build --attr system-tests + +The system tests boot QEMU VMs which prevents them from running on CI at this time. +The build requires > 10 GB of disk space, +and the VMs might be timing out on slow or busy machines. +If you run into timeouts, +try `raising the number of retries <https://whetstone.privatestorage.io/privatestorage/PrivateStorageio/-/blob/e8233d2/nixos/modules/tests/run-introducer.py#L55-62>`_. + +It is also possible go through the testing script interactively - useful for debugging:: + + $ nix-build --attr system-tests.private-storage.driver + +This will give you a result symlink in the current directory. +Inside that is bin/nixos-test-driver which gives you a kind of REPL for interacting with the VMs. +The kind of `Python in this testScript <https://whetstone.privatestorage.io/privatestorage/PrivateStorageio/-/blob/78881a3/nixos/modules/tests/private-storage.nix#L180>`_ is what you can enter into this REPL. +Consult the `official documentation on NixOS Tests <https://nixos.org/manual/nixos/stable/index.html#sec-nixos-tests>`_ for more information. + +Updatings Pins +-------------- + +Nixpkgs +``````` + +To update the version of NixOS we deploy with, run: + +.. code: shell + + nix-shell --run 'update-nixpkgs' + +That will update ``nixpkgs-2015.json`` to the latest release on the nixos-21.05 channel. + +To update the channel, the script will need to be updated, +along with the filenames that have the channel in them. + +Gitlab Repositories +``````````````````` +To update the version of packages we import from gitlab, run: + +.. code: shell + + nix-shell --command 'update-gitlab-repo nixos/pkgs/<package>/repo.json' + +That will update the package to point at the latest version of the project.\ +The command uses branch and repository owner specified in the ``repo.json`` file, +but you can override them by passing the ``--branch`` or ``-owner`` arguments to the command. +A specific revision can also be pinned, by passing ``-rev``. + +Interactions +------------ + +Storage-Time Purchase (ie Payment) +`````````````````````````````````` + +.. uml:: + + actor User as User + participant GridSync + participant ZKAPAuthorizer + database ZKAPAuthzDB as "ZKAPAuthorizer" + participant Browser + participant PaymentServer as "Payment Server" + database PaymentServerDB as "Payment Server" + participant WebServer as "Web Server" + participant Stripe + + User -> GridSync : buy storage-time + activate User + GridSync -> GridSync : generate voucher + GridSync -> ZKAPAuthorizer : redeem voucher + activate ZKAPAuthorizer + ZKAPAuthorizer -> ZKAPAuthzDB : store voucher + ZKAPAuthorizer -> GridSync : acknowledge + GridSync -> Browser : open payment page + + loop until redeemed + GridSync -> ZKAPAuthorizer : query voucher state + ZKAPAuthorizer -> GridSync : not paid + end + + Browser -> WebServer : request payment form + WebServer -> Browser : payment form + Browser -> User : Payment form displayed + activate User + User -> Browser : Submit payment details + Browser -> Stripe : Submit payment details + + alt payment details accepted + Stripe -> Browser : details okay, return card token + Browser -> PaymentServer : create charge using card token + PaymentServer -> Stripe : charge card using token + note left: the user has now paid for the service + Stripe -> PaymentServer : acknowledge + PaymentServer -> PaymentServerDB : store voucher paid state + else payment details rejected + Stripe -> Browser : payment failure + end + + Browser -> User : payment processing results displayed + deactivate User + + group repeat for each redemption group + ZKAPAuthorizer -> ZKAPAuthzDB : generate and store random tokens + ZKAPAuthorizer -> PaymentServer : redeem voucher with blinded tokens + PaymentServer -> ZKAPAuthorizer : return signatures for blinded tokens + ZKAPAuthorizer -> ZKAPAuthzDB : store unblinded signatures for tokens + note right: the user has now been authorized to use the service + end + deactivate ZKAPAuthorizer + + loop until redeemed + GridSync -> ZKAPAuthorizer : query voucher state + ZKAPAuthorizer -> GridSync : fully redeemed + end + + GridSync -> User : storage-time available displayed + deactivate User + +Storage-Time Spending (ie Use) +`````````````````````````````` + +.. uml:: + + participant MagicFolder + participant TahoeLAFS as "Tahoe-LAFS" + participant ZKAPAuthorizer + database ZKAPAuthzDB as "ZKAPAuthorizer" + participant StorageNode as "Storage Node" + participant SpendingService as "Spending Service" + + [-> MagicFolder: upload triggered + activate MagicFolder + + MagicFolder -> TahoeLAFS : store some data + activate TahoeLAFS + + TahoeLAFS -> ZKAPAuthorizer : store some data + activate ZKAPAuthorizer + + loop until tokens accepted + ZKAPAuthorizer <- ZKAPAuthzDB : load some tokens + ZKAPAuthorizer -> StorageNode : store some data using these tokens + StorageNode -> SpendingService : spend these tokens + + alt spent tokens + SpendingService -> StorageNode: already spent, rejected + StorageNode -> ZKAPAuthorizer: already spent, rejected + else fresh tokens + SpendingService -> StorageNode: accepted + end + end + + StorageNode -> ZKAPAuthorizer: data stored + deactivate ZKAPAuthorizer + ZKAPAuthorizer -> ZKAPAuthzDB: discard spent tokens + ZKAPAuthorizer -> TahoeLAFS: data stored + deactivate TahoeLAFS + TahoeLAFS -> MagicFolder: data stored + deactivate MagicFolder + +.. include:: + ../../morph/grid/local/README.rst + +.. _Nix: https://nixos.org/nix diff --git a/docs/dev/designs/index.rst b/docs/dev/designs/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..84993e1bd8fbfc1eb010d0fd8b33fee0e7da1103 --- /dev/null +++ b/docs/dev/designs/index.rst @@ -0,0 +1,7 @@ +System Designs +-------------- + +.. toctree:: + :maxdepth: 2 + + System Design Template <template> diff --git a/docs/dev/designs/template.rst b/docs/dev/designs/template.rst new file mode 100644 index 0000000000000000000000000000000000000000..8fbc2a587848b21069c43178ec71f65aed42998a --- /dev/null +++ b/docs/dev/designs/template.rst @@ -0,0 +1,95 @@ +$HEADLINE +========= + +*The goal is to do the least design we can get away with while still making a quality product.* +*Think of this as a tool to help define the problem, analyze solutions, and share results.* +*Feel free to skip sections that you don't think are relevant* +*(but say that you are doing so).* +*Delete the bits in italics* + +**Contacts:** *The primary contacts for this design.* +**Date:** *The last time this design was modified. YYYY-MM-DD* + +*Short description of the feature.* +*Consider clarifying by also describing what it is not.* + +Rationale +--------- + +*Why are we doing this now?* +*What value does this give to our users?* +*Which users?* + +User Stories +------------ + +**$STORY NAME** + +**Category:** *must / nice to have / must not* + +As a **$PERSON** I want **$FEATURE** so that **$BENEFIT**. + +**Acceptance Criteria:** + + * *What concrete conditions must be met for the implementation to be acceptable?* + * *Surface assumptions about the user story that may not be shared across the team.* + * *Describe failure modes and negative scenarios when preconditions for using the feature are not met.* + * *Place the story in a performance/scaling context with real numbers.* + +*Have as many as you like.* +*Group user stories together into meaningfully deliverable units.* + +*Gather Feedback* +----------------- + +*It might be a good idea to stop at this point & get feedback to make sure you're solving the right problem.* + +Alternatives Considered +----------------------- + +*What we've considered.* +*What trade-offs are involved with each choice.* +*Why we've chosen the one we did.* + +Detailed Implementation Design +------------------------------ + +*Focus on:* + +* external and internal interfaces +* how externally-triggered system events (e.g. sudden reboot; network congestion) will affect the system +* scalability and performance + +Data Integrity +~~~~~~~~~~~~~~ + +*If we get this wrong once, we lose forever.* +*What data does the system need to operate on?* +*How will old data be upgraded to meet the requirements of the design?* +*How will data be upgraded to future versions of the implementation?* + +Security +~~~~~~~~ + +*What threat model does this design take into account?* +*What new attack surfaces are added by this design?* +*What defenses are deployed with the implementation to keep those surfaces safe?* + +Backwards Compatibility +~~~~~~~~~~~~~~~~~~~~~~~ + +*What existing systems are impacted by these changes?* +*How does the design ensure they will continue to work?* + +Performance and Scalability +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +*How will performance of the implementation be measured?* + +*After measuring it, record the results here.* + +Further Reading +--------------- + +*Links to related things.* +*Other designs, tickets, epics, mailing list threads, etc.* diff --git a/docs/source/index.rst b/docs/index.rst similarity index 85% rename from docs/source/index.rst rename to docs/index.rst index eb6b59ec783b8aa6dfb3227ca51fe47e5f1b6bc7..c95dc5790e8bd4cd82560319cfb6e9d4f26720ae 100644 --- a/docs/source/index.rst +++ b/docs/index.rst @@ -6,13 +6,16 @@ Welcome to PrivateStorageio's documentation! ============================================ -Howdy! We separated the documentation into parts addressing different audiences. Please enjoy our docs for: +Howdy! +We separated the documentation into parts addressing different audiences. +Please enjoy our docs for: .. toctree:: :maxdepth: 2 Administrators <ops/README> Developers <dev/README> + System Designs <dev/designs/index> Naming diff --git a/docs/source/ops/README.rst b/docs/ops/README.rst similarity index 50% rename from docs/source/ops/README.rst rename to docs/ops/README.rst index b78e5ef82c0ae4dad88e65e9517f3fc6ec7bdfd2..8026e1673b0dbed3708b1c4b0e7b600c049fabde 100644 --- a/docs/source/ops/README.rst +++ b/docs/ops/README.rst @@ -3,11 +3,10 @@ Administrator documentation This contains documentation regarding running PrivateStorageio. -.. include:: - ../../../morph/README.rst +.. toctree:: + :maxdepth: 2 -.. include:: - monitoring.rst - -.. include:: - generating-keys.rst + morph + monitoring + generating-keys + backup-recovery diff --git a/docs/ops/backup-recovery.rst b/docs/ops/backup-recovery.rst new file mode 100644 index 0000000000000000000000000000000000000000..a39c96dfa859203d6b54c1812e70414715b920e9 --- /dev/null +++ b/docs/ops/backup-recovery.rst @@ -0,0 +1,115 @@ +Backup/Recovery +=============== + +This document covers the details of backups of the data required for PrivateStorageio to operate. +It describes the situations in which these backups are intended to be useful. +It also explains how to use these backups to recover in these situations. + +Tahoe-LAFS Storage Nodes +------------------------ + +The state associated with a Tahoe-LAFS storage node consists of at least: + +1. the "node directory" containing + configuration, + logs, + public and private keys, + and service fURLs. +2. the "storage" directory containing + user ciphertext, + garbage collector state, + and corruption advisories. + +Node Directories +~~~~~~~~~~~~~~~~ + +The "node directory" changes gradually over time. +New logs are written (including incident reports). +The announcement sequence number is incremented. +The introducer cache is updated. + +The critical state necessary to reproduce an identical storage node does not change. +This state consists of + +* the node id (my_nodeid) +* the node private key (private/node.privkey) +* the node x509v3 certificate (private/node.pem) + +A backup of the node directory can be used to create a Tahoe-LAFS storage node with the same identity as the original storage node. +It *cannot* be used to recover the user ciphertext held by the original storage node. +Nor will it recover the state which gradually changes over time. + +Backup +`````` + +A one-time backup has been made of these directories in the PrivateStorageio 1Password account. +The "Tahoe-LAFS Storage Node Backups" vault contains backups of staging and production node directories. +The process for creating these backups is as follows: + +:: + + DOMAIN=private.storage + FILES="node.pubkey private/ tahoe.cfg my_nodeid tahoe-client.tac node.url permutation-seed" + DIR=/var/db/tahoe-lafs/storage + for n in $(seq 1 5); do + NODE=storage00${n}.${DOMAIN} + ssh $NODE tar vvjcf - -C $DIR $FILES > ${NODE}.tar.bz2 + done + + tar vvjcf ${DOMAIN}.tar.bz2 *.tar.bz2 + +Recovery +```````` + +#. Prepare a system onto which to recover the node directory. + The rest of these steps assume that PrivateStorageio is deployed on the node. + +#. Download the backup tarball from 1Password + +#. Extract the particular node directory backup to recover from :: + + [LOCAL]$ tar xvf ${DOMAIN}.tar.bz2 ${NODE}.${DOMAIN}.tar.bz2 + +#. Upload the node directory backup to the system onto which recovery is taking place :: + + [LOCAL]$ scp ${NODE}.${DOMAIN}.tar.bz2 ${NODE}.${DOMAIN}:recovery.tar.bz2 + +#. Clean up the local copies of the backup files :: + + [LOCAL]$ rm -iv ${DOMAIN}.tar.bz2 ${NODE}.${DOMAIN}.tar.bz2 + +#. The rest of the steps are executed on the system on which recovery is taking place. + Log in :: + + [LOCAL]$ ssh ${NODE}.${DOMAIN} + +#. On the node make sure there is no storage service running :: + + [REMOTE]$ systemctl status tahoe.storage.service + + If there is then figure out why and stop it if it is safe to do so :: + + [REMOTE]$ systemctl stop tahoe.storage.service + +#. On the node make sure there is no existing node directory :: + + [REMOTE]$ stat /var/db/tahoe-lafs/storage + + If there is then figure out why and remove it if it is safe to do so. + +#. Unpack the node directory backup into the correct location :: + + [REMOTE]$ mkdir -p /var/db/tahoe-lafs/storage + [REMOTE]$ tar xvf recovery.tar.bz2 -C /var/db/tahoe-lafs/storage + +#. Mark the node directory as created and consistent :: + + [REMOTE]$ touch /var/db/tahoe-lafs/storage.created + +#. Start the storage service :: + + [REMOTE]$ systemctl start tahoe.storage.service + +#. Clean up the remote copies of the backup file :: + + [REMOTE]$ rm -iv recovery.tar.bz2 diff --git a/docs/source/ops/generating-keys.rst b/docs/ops/generating-keys.rst similarity index 82% rename from docs/source/ops/generating-keys.rst rename to docs/ops/generating-keys.rst index c2f7028f2bc263c9e5bac40f78ca0adfb4861415..3da0410ce1e394fdd5b566a20a013fa5d29db213 100644 --- a/docs/source/ops/generating-keys.rst +++ b/docs/ops/generating-keys.rst @@ -42,17 +42,6 @@ For example:: echo -n "SILOWzbnkBjxC1hGde9d5Q3Ir/4yLosCLEnEQGAxEQE=" > ristretto.signing-key -ZKAP-Issuer TLS -``````````````` - -The ZKAPIssuer.service needs a working TLS certificate and expects it in the certbot directory for the domain you configured, in my case:: - - openssl req -x509 -newkey rsa:4096 -nodes -keyout privkey.pem -out cert.pem -days 3650 - touch chain.pem - -Move the three .pem files into the payment's server ``/var/lib/letsencrypt/live/payments.localdev/`` directory and issue a ``sudo systemctl restart zkapissuer.service``. - - Monitoring VPN `````````````` diff --git a/docs/source/ops/monitoring.rst b/docs/ops/monitoring.rst similarity index 100% rename from docs/source/ops/monitoring.rst rename to docs/ops/monitoring.rst diff --git a/docs/ops/morph.rst b/docs/ops/morph.rst new file mode 100644 index 0000000000000000000000000000000000000000..5bcffb5fb5a6928a69300dcfb3ac4cb7126ba09a --- /dev/null +++ b/docs/ops/morph.rst @@ -0,0 +1,2 @@ +.. include:: + ../../morph/README.rst diff --git a/docs/source/ops/service-dag-to-dashboard-order.dot b/docs/ops/service-dag-to-dashboard-order.dot similarity index 100% rename from docs/source/ops/service-dag-to-dashboard-order.dot rename to docs/ops/service-dag-to-dashboard-order.dot diff --git a/docs/source/_templates/.gitignore b/docs/source/_templates/.gitignore deleted file mode 100644 index f935021a8f8a7bd22f9d6703cafa5134bb6a57f8..0000000000000000000000000000000000000000 --- a/docs/source/_templates/.gitignore +++ /dev/null @@ -1 +0,0 @@ -!.gitignore diff --git a/docs/source/dev/README.rst b/docs/source/dev/README.rst deleted file mode 100644 index 14d2de31f932a0aa50545643e30c679c36696e19..0000000000000000000000000000000000000000 --- a/docs/source/dev/README.rst +++ /dev/null @@ -1,70 +0,0 @@ -Developer documentation -======================= - -Building --------- - -The build system uses `Nix`_ which must be installed before anything can be built. -Start by setting up the development/operations environment:: - - $ nix-shell - -Testing -------- - -The test system uses `Nix`_ which must be installed before any tests can be run. - -Unit tests are run using this command:: - - $ nix-build nixos/unit-tests.nix - -Unit tests are also run on CI. - -The system tests are run using this command:: - - $ nix-build nixos/system-tests.nix - -The system tests boot QEMU VMs which prevents them from running on CI at this time. -The build requires > 10 GB of disk space, -and the VMs might be timing out on slow or busy machines. -If you run into timeouts, -try `raising the number of retries <https://whetstone.privatestorage.io/privatestorage/PrivateStorageio/-/blob/e8233d2/nixos/modules/tests/run-introducer.py#L55-62>`_. - -It is also possible go through the testing script interactively - useful for debugging:: - - $ nix-build -A private-storage.driver nixos/system-tests.nix - -This will give you a result symlink in the current directory. -Inside that is bin/nixos-test-driver which gives you a kind of REPL for interacting with the VMs. -The kind of `Python in this testScript <https://whetstone.privatestorage.io/privatestorage/PrivateStorageio/-/blob/78881a3/nixos/modules/tests/private-storage.nix#L180>`_ is what you can enter into this REPL. -Consult the `official documentation on NixOS Tests <https://nixos.org/manual/nixos/stable/index.html#sec-nixos-tests>`_ for more information. - -Updatings Pins --------------- - -Nixpkgs -``````` - -To update the version of NixOS we deploy with, run: - -.. code: shell - - nix-shell --run 'update-nixpkgs' - -That will update ``nixpkgs-2015.json`` to the latest release on the nixos-21.05 channel. - -To update the channel, the script will need to be updated, -along with the filenames that have the channel in them. - - -Architecture overview ---------------------- - -.. graphviz:: architecture-overview.dot - - -.. include:: - ../../../morph/grid/local/README.rst - -.. _Nix: https://nixos.org/nix - diff --git a/docs/source/dev/architecture-overview.dot b/docs/source/dev/architecture-overview.dot deleted file mode 100644 index cc95fbb74a5b67fc290b80d53ac679a1d1c9d972..0000000000000000000000000000000000000000 --- a/docs/source/dev/architecture-overview.dot +++ /dev/null @@ -1,50 +0,0 @@ -digraph subscriptions { - rankdir=LR - - subgraph cluster_usercontrolled { - label = "User Operated" - rankdir=LR - GridSync [label="GridSync", shape=circle] - Browser [label="Browser", shape=circle] - TahoeLAFS [label="Tahoe-LAFS", shape=circle] - } - - - subgraph cluster_pscontrolled { - label = "PrivateStorage.io Operated" - rankdir = TB - PSWebServer [label="PrivateStorage.io Web Server", shape=box] - SubscriptionConfigWHPeer [label="Subscription Config Wormhole Peer", shape=box] - PaymentServer [label="Payment Server", shape=box] - SATIssuer [label="SAT Issuer", shape=box] - PSStorageGrid [label="PrivateStorage.io Storage Grid", shape=box] - } - - User [label="User", shape=egg] - Stripe [label="Stripe", shape=pentagon] - - User -> PSWebServer [label="1. Get wormhole code", fontcolor=red, color=red] - PSWebServer -> User [label="2. 7-petulant-banana", fontcolor=blue, color=blue] - User -> GridSync [label="3. 7-petulant-banana", fontcolor=brown, color=brown] - GridSync -> SubscriptionConfigWHPeer [label="4. Get configuration", fontcolor=black, color=black] - SubscriptionConfigWHPeer -> GridSync [label="5. Grid configuration", fontcolor=magenta, color=magenta] - GridSync -> TahoeLAFS [label="6. Instantiate", fontcolor=aquamarine3, color=aquamarine3] - GridSync -> TahoeLAFS [label="7. Redeem PRN", fontcolor=crimson, color=crimson] - TahoeLAFS -> PaymentServer [label="8. Redeem PRN", fontcolor=crimson, color=crimson] - PaymentServer -> TahoeLAFS [label="9. Payment required", fontcolor=gold3, color=gold3] - TahoeLAFS -> GridSync [label="10. Payment required", fontcolor=gold3, color=gold3] - GridSync -> Browser [label="11. Open payment window", fontcolor=gold3, color=gold3] - User -> Browser [label="12. Enter payment info", fontcolor=blue, color=blue] - Browser -> Stripe [label="13. Submit payment form", fontcolor=brown, color=brown] - Stripe -> Browser [label="14. Payment ok", fontcolor=black, color=black] - Stripe -> PaymentServer [label="15. Payment notification", fontcolor=magenta, color=magenta] - GridSync -> TahoeLAFS [label="16. Redeem PRN", fontcolor=aquamarine3, color=aquamarine3] - TahoeLAFS -> TahoeLAFS [label="17. Generate blinded tokens", fontcolor=crimson, color=crimson] - TahoeLAFS -> SATIssuer [label="18. Redeem PRN, blinded-tokens=xs", fontcolor=crimson, color=crimson] - SATIssuer -> PaymentServer [label="19. Check PRN", fontcolor=gold3, color=gold3] - PaymentServer -> SATIssuer [label="20. PRN Valid", fontcolor=gold3, color=gold3] - SATIssuer -> TahoeLAFS [label="21. PRN valid, signed-tokens=ys", fontcolor=crimson, color=crimson] - TahoeLAFS -> TahoeLAFS [label="22. Store signed tokens", fontcolor=crimson, color=crimson] - TahoeLAFS -> GridSync [label="23. PRN Redeemed", fontcolor=red, color=red] - TahoeLAFS -> PSStorageGrid [label="24. Use storage, passes=y", fontcolor=magenta, color=magenta] -} diff --git a/morph/grid/local/config.json b/morph/grid/local/config.json index 8b23b6f1152be4fa94e8935342bf11f7706d036c..8bd686a023b704688c8708b2408d0c3df8287f13 100644 --- a/morph/grid/local/config.json +++ b/morph/grid/local/config.json @@ -5,6 +5,7 @@ , "monitoringvpnEndpoint": "192.168.67.24:51820" , "passValue": 1000000 , "issuerDomains": ["payments.localdev"] +, "monitoringDomains": ["monitoring.localdev"] , "letsEncryptAdminEmail": "florian@privatestorage.io" , "allowedChargeOrigins": [ "http://localhost:5000" diff --git a/morph/grid/local/grid.nix b/morph/grid/local/grid.nix index 46cb9c8ec1dc5278823c9e3ffc405289e7510469..4a1524c6b6b7f5e085766aec6a79af5b569e72ba 100644 --- a/morph/grid/local/grid.nix +++ b/morph/grid/local/grid.nix @@ -27,6 +27,8 @@ let ../../../nixos/modules/deployment.nix # Give it a good SSH configuration. ../../../nixos/modules/ssh.nix + # Configure things specific to the virtualisation environment. + gridlib.hardware-vagrant ]; services.private-storage.sshUsers = ssh-users; @@ -46,7 +48,7 @@ let # depend on the format we use. mode = "0666"; text = '' - # Include the ssh-users config + # Include the ssh-users config builtins.fromJSON (builtins.readFile ./ssh-users.json) ''; }; @@ -68,51 +70,63 @@ let payments = { imports = [ gridlib.issuer - (gridlib.hardware-virtual ({ publicIPv4 = "192.168.67.21"; })) (gridlib.customize-issuer (grid-config // { monitoringvpnIPv4 = "172.23.23.11"; })) grid-module ]; + config = { + grid.publicIPv4 = "192.168.67.21"; + }; }; storage1 = { imports = [ gridlib.storage - (gridlib.hardware-virtual ({ publicIPv4 = "192.168.67.22"; })) (gridlib.customize-storage (grid-config // { monitoringvpnIPv4 = "172.23.23.12"; stateVersion = "19.09"; })) grid-module ]; + config = { + grid.publicIPv4 = "192.168.67.22"; + }; }; storage2 = { imports = [ gridlib.storage - (gridlib.hardware-virtual ({ publicIPv4 = "192.168.67.23"; })) (gridlib.customize-storage (grid-config // { monitoringvpnIPv4 = "172.23.23.13"; stateVersion = "19.09"; })) grid-module ]; + config = { + grid.publicIPv4 = "192.168.67.23"; + }; }; monitoring = { imports = [ gridlib.monitoring - (gridlib.hardware-virtual ({ publicIPv4 = "192.168.67.24"; })) (gridlib.customize-monitoring { - inherit hostsMap vpnClientIPs nodeExporterTargets paymentExporterTargets; - inherit (grid-config) letsEncryptAdminEmail; + inherit hostsMap vpnClientIPs + nodeExporterTargets + paymentExporterTargets + blackboxExporterHttpsTargets; + inherit (grid-config) letsEncryptAdminEmail monitoringDomains; googleOAuthClientID = grid-config.monitoringGoogleOAuthClientID; + enableSlackAlert = false; monitoringvpnIPv4 = "172.23.23.1"; stateVersion = "19.09"; }) grid-module ]; + config = { + grid.publicIPv4 = "192.168.67.24"; + }; }; # TBD: derive these automatically: @@ -125,10 +139,15 @@ let vpnClientIPs = [ "172.23.23.11" "172.23.23.12" "172.23.23.13" ]; nodeExporterTargets = [ "monitoring" "payments" "storage1" "storage2" ]; paymentExporterTargets = [ "payments" ]; + blackboxExporterHttpsTargets = [ + # "https://private.storage/" + # "https://payments.private.storage/" + ]; in { network = { description = "PrivateStorage.io LocalDev Grid"; + inherit (gridlib) pkgs; }; inherit payments monitoring storage1 storage2; } diff --git a/morph/grid/local/private-keys/README.rst b/morph/grid/local/private-keys/README.rst index 684bf942a8010129f49cfcf79f5df1b60965ae45..8ecd2dd261b02dd757862703944ad970688d3e7e 100644 --- a/morph/grid/local/private-keys/README.rst +++ b/morph/grid/local/private-keys/README.rst @@ -19,6 +19,13 @@ grafana-admin.password This is the initial admin password for the Grafana web admin on the monitoring host. +grafana-slack-url +----------------- + +This file is read by Grafana's systemd service to set an environment variable with a secret Slack WebHook URL to post alerts to. +The only line in the file should be the secret URL. +Use the url from `this 1Password entry <https://privatestorage.1password.com/vaults/7flqasy5hhhmlbtp5qozd3j4ga/allitems/cgznskz2oix2tyx5xyntwaos5i>`_ or get a new secret URL for your Slack channel at https://www.slack.com/apps/A0F7XDUAZ. + stripe.secret ------------- diff --git a/morph/grid/local/private-keys/grafana-slack-url b/morph/grid/local/private-keys/grafana-slack-url new file mode 100644 index 0000000000000000000000000000000000000000..0885b7bfe1786d19f845c45d749bafaf12756cb4 --- /dev/null +++ b/morph/grid/local/private-keys/grafana-slack-url @@ -0,0 +1,2 @@ +https://hooks.slack.com/services/x/y/z + diff --git a/morph/grid/production/config.json b/morph/grid/production/config.json index fcae1563a8fc0d3a8a11324fc6667105ae3179c8..1696b5fb3c45df94b8bf69aae9ca323e6bac2266 100644 --- a/morph/grid/production/config.json +++ b/morph/grid/production/config.json @@ -8,6 +8,10 @@ "payments.privatestorage.io" , "payments.private.storage" ] +, "monitoringDomains": [ + "monitoring.privatestorage.io" + , "monitoring.private.storage" + ] , "letsEncryptAdminEmail": "jean-paul@privatestorage.io" , "allowedChargeOrigins": [ "https://privatestorage.io" diff --git a/morph/grid/production/grid.nix b/morph/grid/production/grid.nix index 6009be84fb2a7ed7ca63e2e73b4f08f1f45ecb0d..950282f5573560f76355bcdcf4d6da51dacedd7d 100644 --- a/morph/grid/production/grid.nix +++ b/morph/grid/production/grid.nix @@ -45,9 +45,13 @@ let gridlib.monitoring gridlib.hardware-aws (gridlib.customize-monitoring { - inherit hostsMap vpnClientIPs nodeExporterTargets paymentExporterTargets; - inherit (grid-config) letsEncryptAdminEmail; + inherit hostsMap vpnClientIPs + nodeExporterTargets + paymentExporterTargets + blackboxExporterHttpsTargets; + inherit (grid-config) letsEncryptAdminEmail monitoringDomains; googleOAuthClientID = grid-config.monitoringGoogleOAuthClientID; + enableSlackAlert = true; monitoringvpnIPv4 = "172.23.23.1"; stateVersion = "19.09"; }) @@ -69,6 +73,10 @@ let # Slightly awkwardly, enable some of our hardware / network / bootloader options. ../../../nixos/modules/100tb.nix + # At least some of our storage nodes utilize MegaRAID storage controllers. + # Monitor their array status. + ../../../nixos/modules/monitoring/exporters/megacli2prom.nix + # Get all of the configuration that is common across all storage nodes. gridlib.storage @@ -87,6 +95,10 @@ let # name is quoted because `1` makes `100tb` look an awful lot like a # number. "100tb".config = nodecfg; + + # Enable statistics gathering for MegaRAID cards. + # TODO would be nice to enable only on machines that have such a device. + services.private-storage.monitoring.megacli2prom.enable = true; }; # Define all of the storage nodes for this grid. @@ -126,10 +138,21 @@ let "storage005" ]; paymentExporterTargets = [ "payments" ]; + blackboxExporterHttpsTargets = [ + "https://private.storage/" + "https://www.private.storage/" + "https://privatestorage.io/" + "https://www.privatestorage.io/" + "https://payments.private.storage/" + "https://payments.privatestorage.io/" + "https://monitoring.private.storage/" + "https://monitoring.privatestorage.io/" + ]; in { network = { description = "PrivateStorage.io Production Grid"; + inherit (gridlib) pkgs; }; inherit payments; inherit monitoring; diff --git a/morph/grid/testing/config.json b/morph/grid/testing/config.json index a10840db52e8cd74bbac2a0ad38f4887c1a03258..7c3775df55ce76cf6048712e644a3f2669b6f07c 100644 --- a/morph/grid/testing/config.json +++ b/morph/grid/testing/config.json @@ -8,6 +8,10 @@ "payments.privatestorage-staging.com" , "payments.extra.privatestorage-staging.com" ] +, "monitoringDomains": [ + "monitoring.privatestorage-staging.com" + , "monitoring.extra.privatestorage-staging.com" + ] , "letsEncryptAdminEmail": "jean-paul@privatestorage.io" , "allowedChargeOrigins": [ "http://localhost:5000" diff --git a/morph/grid/testing/grid.nix b/morph/grid/testing/grid.nix index 18983f0b32d28f13981b56475d7691a8cb434808..334518774851c22738c93b323223f255d871a394 100644 --- a/morph/grid/testing/grid.nix +++ b/morph/grid/testing/grid.nix @@ -58,9 +58,13 @@ let gridlib.monitoring gridlib.hardware-aws (gridlib.customize-monitoring { - inherit hostsMap vpnClientIPs nodeExporterTargets paymentExporterTargets; - inherit (grid-config) letsEncryptAdminEmail; + inherit hostsMap vpnClientIPs + nodeExporterTargets + paymentExporterTargets + blackboxExporterHttpsTargets; + inherit (grid-config) letsEncryptAdminEmail monitoringDomains; googleOAuthClientID = grid-config.monitoringGoogleOAuthClientID; + enableSlackAlert = true; monitoringvpnIPv4 = "172.23.23.1"; stateVersion = "19.09"; }) @@ -77,10 +81,21 @@ let vpnClientIPs = [ "172.23.23.11" "172.23.23.12" ]; nodeExporterTargets = [ "monitoring" "payments" "storage001" ]; paymentExporterTargets = [ "payments" ]; + blackboxExporterHttpsTargets = [ + "https://privatestorage-staging.com/" + "https://www.privatestorage-staging.com/" + "https://extra.privatestorage-staging.com/" + "https://www.extra.privatestorage-staging.com/" + "https://payments.privatestorage-staging.com/" + "https://payments.extra.privatestorage-staging.com/" + "https://monitoring.privatestorage-staging.com/" + "https://monitoring.extra.privatestorage-staging.com/" + ]; in { network = { description = "PrivateStorage.io Testing Grid"; + inherit (gridlib) pkgs; }; inherit payments monitoring storage001; } diff --git a/morph/lib/base.nix b/morph/lib/base.nix index 271766d9cff5253f6d9a72e475dec3398b2cd6b3..7390654ac167909149b0a6f4dfae897b8f3f43a3 100644 --- a/morph/lib/base.nix +++ b/morph/lib/base.nix @@ -20,8 +20,12 @@ }; }; + # Any extra NixOS modules to load on all our servers. Note that just + # because they're loaded doesn't *necessarily* mean they're turned on. imports = [ - ../../nixos/modules/packages.nix + # This brings in various other modules that define options for different + # areas of the service. + ../../nixos/modules/default.nix ]; config = { @@ -31,6 +35,19 @@ # being configured and using variable names complicates a lot of things). # Instead, just tell morph how to reach the node here - by using its fully # qualified domain name. - deployment.targetHost = "${config.networking.hostName}.${config.networking.domain}"; + deployment.targetHost = config.networking.fqdn; + + assertions = [ + # This is a check to save somebody in the future trying to debug why + # setting `nixpkgs.config` is not having an effect. + { + # `{}` is the default value for `nixpkgs.config` + assertion = config.nixpkgs.config == {}; + message = '' + Since we set `nixpkgs.pkgs` via morph's `network.pkgs`, the value for `nixpkgs.config` is ignored. + See https://whetstone.privatestorage.io/privatestorage/PrivateStorageio/-/issues/85#note_15876 for details. + ''; + } + ]; }; } diff --git a/morph/lib/customize-monitoring.nix b/morph/lib/customize-monitoring.nix index 19a800f1fa806c09f132f2bb2769869a30c65ec2..2899d9940d4309b81a31f96590f0d3df1d632dc4 100644 --- a/morph/lib/customize-monitoring.nix +++ b/morph/lib/customize-monitoring.nix @@ -11,6 +11,7 @@ # See ``customize-issuer.nix``. , monitoringvpnIPv4 , letsEncryptAdminEmail +, monitoringDomains # A list of VPN IP addresses as strings indicating which clients will be # allowed onto the VPN. @@ -28,10 +29,18 @@ # which nodes to scrape PaymentServer metrics from. , paymentExporterTargets ? [] + # A list of HTTPS servers (URLs, IP addresses or hostnames) as strings indicating + # which nodes the BlackboxExporter should scrape HTTP and TLS metrics from. +, blackboxExporterHttpsTargets ? [] + # A string containing the GSuite OAuth2 ClientID to use to authenticate # logins to Grafana. , googleOAuthClientID + # Whether to enable alerting via Slack. + # When true requires a grafana-slack-url file (see private-keys/README.rst). +, enableSlackAlert ? false + # A string giving the NixOS state version for the system. , stateVersion , ... @@ -71,12 +80,25 @@ in { action = ["sudo" "systemctl" "restart" "grafana.service"]; }; }; + grafanaSlackUrl = + if !enableSlackAlert + then { } + else { + "grafana-slack-url" = { + source = "${privateKeyPath}/grafana-slack-url"; + destination = "/run/keys/grafana-slack-url"; + owner.user = config.systemd.services.grafana.serviceConfig.User; + owner.group = config.users.users.grafana.group; + permissions = "0400"; + action = ["sudo" "systemctl" "restart" "grafana.service"]; + }; + }; monitoringvpn = { "monitoringvpn-private-key".source = "${privateKeyPath}/monitoringvpn/server.key"; "monitoringvpn-preshared-key".source = "${privateKeyPath}/monitoringvpn/preshared.key"; }; in - grafanaSSO // monitoringvpn; + grafanaSSO // grafanaSlackUrl // monitoringvpn; networking.hosts = hostsMap; @@ -91,12 +113,14 @@ in { inherit nodeExporterTargets; inherit nginxExporterTargets; inherit paymentExporterTargets; + inherit blackboxExporterHttpsTargets; }; services.private-storage.monitoring.grafana = { inherit letsEncryptAdminEmail; inherit googleOAuthClientID; - domain = "${config.networking.hostName}.${config.networking.domain}"; + inherit enableSlackAlert; + domains = monitoringDomains; }; system.stateVersion = stateVersion; diff --git a/morph/lib/default.nix b/morph/lib/default.nix index bf25e5a58d04d148296bffef48acc4e4e125684b..a820cc559b6b2da78c06bcb84282e392c3a1ebc7 100644 --- a/morph/lib/default.nix +++ b/morph/lib/default.nix @@ -5,7 +5,7 @@ base = import ./base.nix; hardware-aws = import ./issuer-aws.nix; - hardware-virtual = import ./hardware-virtual.nix; + hardware-vagrant = import ./hardware-vagrant.nix; issuer = import ./issuer.nix; customize-issuer = import ./customize-issuer.nix; @@ -17,4 +17,20 @@ customize-monitoring = import ./customize-monitoring.nix; modules = builtins.toString ../../nixos/modules; + + # The nixpkgs version used in our deployments. This affects both the packages + # installed, as well as the NixOS module set that is used. + # This is intended to be used in a grid definition like: + # network = { ... ; inherit (gridlib) pkgs; ... } + pkgs = import ../../nixpkgs-2105.nix { + # Ensure that configuration of the system where this runs + # doesn't leak into what we build. + # See https://github.com/NixOS/nixpkgs/issues/62513 + config = { pkgs }: let lib = pkgs.lib; in { + allowUnfreePredicate = pkg: builtins.elem (lib.getName pkg) [ + "megacli" + ]; + }; + overlays = []; + }; } diff --git a/morph/lib/hardware-vagrant.nix b/morph/lib/hardware-vagrant.nix new file mode 100644 index 0000000000000000000000000000000000000000..6c41af4923861e89d144303d129d7babde494363 --- /dev/null +++ b/morph/lib/hardware-vagrant.nix @@ -0,0 +1,57 @@ +{ config, lib, modulesPath, ... }: +{ + imports = [ + # modulesPath points at the upstream nixos/modules directory. + "${modulesPath}/virtualisation/vagrant-guest.nix" + ]; + + options.grid = { + publicIPv4 = lib.mkOption { + type = lib.types.str; + description = '' + The primary IPv4 address of the virtual machine. + ''; + }; + }; + + config = { + virtualisation.virtualbox.guest.enable = true; + + boot.loader.grub.device = "/dev/sda"; + + boot.initrd.availableKernelModules = [ "ata_piix" "sd_mod" "sr_mod" ]; + boot.kernel.sysctl = { "vm.swappiness" = 0; }; + + # remove the fsck that runs at startup. It will always fail to run, stopping + # your boot until you press *. + boot.initrd.checkJournalingFS = false; + + networking.interfaces.enp0s8.ipv4.addresses = [{ + address = config.grid.publicIPv4; + prefixLength = 24; + }]; + + # The issuer configuration wants to read the location of its database + # directory from the filesystem configuration. Since the Vagrant + # environment doesn't have separate volume-as-infrastructure management + # (maybe it could? but why bother?) we do a bind-mount here so there is a + # configured value readable. The database won't really have a dedicated + # volume but it will sort of appear as if it does. + services.private-storage-issuer.databaseFileSystem = { + device = "/var/lib/origin-zkapissuer-v2"; + options = ["bind"]; + }; + + # XXX This should be handled by the storage module like the zkap + # filesystem above is handled by the issuer module. + fileSystems."/storage" = { fsType = "tmpfs"; }; + + fileSystems."/" = + { device = "/dev/sda1"; + fsType = "ext4"; + }; + + # We want to push packages with morph without having to sign them + nix.trustedUsers = [ "@wheel" "root" "vagrant" ]; + }; +} diff --git a/morph/lib/hardware-virtual.nix b/morph/lib/hardware-virtual.nix deleted file mode 100644 index cf1582792bff77c491210ee5e91f99bfbffbf9f3..0000000000000000000000000000000000000000 --- a/morph/lib/hardware-virtual.nix +++ /dev/null @@ -1,36 +0,0 @@ -{ publicIPv4, ... }: -{ - imports = [ ./vagrant-guest.nix ]; - - virtualisation.virtualbox.guest.enable = true; - - # Use the GRUB 2 boot loader. - boot.loader.grub.enable = true; - boot.loader.grub.version = 2; - boot.loader.grub.device = "/dev/sda"; - - boot.initrd.availableKernelModules = [ "ata_piix" "sd_mod" "sr_mod" ]; - boot.initrd.kernelModules = [ ]; - boot.kernel.sysctl = { "vm.swappiness" = 0; }; - boot.kernelModules = [ ]; - boot.extraModulePackages = [ ]; - - # remove the fsck that runs at startup. It will always fail to run, stopping - # your boot until you press *. - boot.initrd.checkJournalingFS = false; - - networking.interfaces.enp0s8.ipv4.addresses = [{ - address = publicIPv4; - prefixLength = 24; - }]; - - fileSystems."/storage" = { fsType = "tmpfs"; }; - fileSystems."/" = - { device = "/dev/sda1"; - fsType = "ext4"; - }; - swapDevices = [ ]; - - # We want to push packages with morph without having to sign them - nix.trustedUsers = [ "@wheel" "root" "vagrant" ]; -} diff --git a/morph/lib/issuer-aws.nix b/morph/lib/issuer-aws.nix index bf7de56cfb570857da32c34ebfcc9b21c91e702e..8ff172803eda784898aba2d96636df1afcee36e5 100644 --- a/morph/lib/issuer-aws.nix +++ b/morph/lib/issuer-aws.nix @@ -18,6 +18,14 @@ # <https://github.com/DBCDK/morph/issues/146>. networking.hostName = name; + # Mount a dedicated filesystem (ideally on a dedicated volume, but that's + # beyond control of this particular part of the system) for the + # PaymentServer voucher database. This makes it easier to manage for + # tasks like backup/recovery and encryption. + services.private-storage-issuer.databaseFileSystem = { + label = "zkapissuer-data"; + }; + # Clean up packages after a while nix.gc = { automatic = true; diff --git a/morph/lib/issuer.nix b/morph/lib/issuer.nix index d60af799888c97ec8f97a061d40b54d3f2db82a7..d3ee812e865f741b01eb811589262ae01ece824f 100644 --- a/morph/lib/issuer.nix +++ b/morph/lib/issuer.nix @@ -45,7 +45,6 @@ in { }; imports = [ - ../../nixos/modules/issuer.nix ../../nixos/modules/monitoring/vpn/client.nix ../../nixos/modules/monitoring/exporters/node.nix ]; @@ -56,6 +55,6 @@ in { ristrettoSigningKeyPath = config.deployment.secrets.ristretto-signing-key.destination; stripeSecretKeyPath = config.deployment.secrets.stripe-secret-key.destination; database = "SQLite3"; - databasePath = "/var/lib/zkapissuer/vouchers.sqlite3"; + databasePath = "${config.fileSystems."zkapissuer-data".mountPoint}/vouchers.sqlite3"; }; } diff --git a/morph/lib/monitoring.nix b/morph/lib/monitoring.nix index bf92d1041f2bf9b9fb1ff4580a25ff7b596a9bbb..89a328e89a799b445dff7180dff552350b9629cf 100644 --- a/morph/lib/monitoring.nix +++ b/morph/lib/monitoring.nix @@ -25,6 +25,7 @@ ../../nixos/modules/monitoring/server/grafana.nix ../../nixos/modules/monitoring/server/prometheus.nix ../../nixos/modules/monitoring/exporters/node.nix + ../../nixos/modules/monitoring/exporters/blackbox.nix # Loki 0.3.0 from Nixpkgs 19.09 is too old and does not work: # ../../nixos/modules/monitoring/server/loki.nix ]; diff --git a/morph/lib/storage.nix b/morph/lib/storage.nix index 86e142286351237099337d38d03a9b54255b8246..15e2373737a7ff2f1efe8cf2c41b59de606f0a1a 100644 --- a/morph/lib/storage.nix +++ b/morph/lib/storage.nix @@ -43,8 +43,12 @@ in { ../../nixos/modules/monitoring/vpn/client.nix # Expose base system metrics over the monitoringvpn. ../../nixos/modules/monitoring/exporters/node.nix + # Collect Tahoe OpenMetrics statistics. + ../../nixos/modules/monitoring/exporters/tahoe.nix ]; + services.private-storage.monitoring.tahoe.enable = true; + # Turn on the Private Storage (Tahoe-LAFS) service. services.private-storage = { # Yep. Turn it on. diff --git a/morph/lib/vagrant-guest.nix b/morph/lib/vagrant-guest.nix deleted file mode 100644 index 360671f5e8391571d37da6db37b2de8dc02b66bd..0000000000000000000000000000000000000000 --- a/morph/lib/vagrant-guest.nix +++ /dev/null @@ -1,91 +0,0 @@ -# Minimal configuration that vagrant depends on - -{ config, pkgs, lib, ... }: -let - # Vagrant uses an insecure shared private key by default, but we - # don't use the authorizedKeys attribute under users because it should be - # removed on first boot and replaced with a random one. This script sets - # the correct permissions and installs the temporary key if no - # ~/.ssh/authorized_keys exists. - install-vagrant-ssh-key = pkgs.writeScriptBin "install-vagrant-ssh-key" '' - #!${pkgs.runtimeShell} - if [ ! -e ~/.ssh/authorized_keys ]; then - mkdir -m 0700 -p ~/.ssh - echo "ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEA6NF8iallvQVp22WDkTkyrtvp9eWW6A8YVr+kz4TjGYe7gHzIw+niNltGEFHzD8+v1I2YJ6oXevct1YeS0o9HZyN1Q9qgCgzUFtdOKLv6IedplqoPkcmF0aYet2PkEDo3MlTBckFXPITAMzF8dJSIFo9D8HfdOV0IAdx4O7PtixWKn5y2hMNG0zQPyUecp4pzC6kivAIhyfHilFR61RGL+GPXQ2MWZWFYbAGjyiYJnAmCP3NOTd0jMZEnDkbUvxhMmBYSdETk1rRgm+R4LOzFUGaHqHDLKLX+FIPKcF96hrucXzcWyLbIbEgE98OHlnVYCzRdK8jlqm8tehUc9c9WhQ== vagrant insecure public key" >> ~/.ssh/authorized_keys - chmod 0600 ~/.ssh/authorized_keys - fi - ''; -in -{ - # Services to enable: - - # Enable the OpenSSH daemon. - services.openssh.enable = true; - - # Wireguard kernel module for Kernels < 5.6 - boot = lib.mkIf (lib.versionOlder pkgs.linuxPackages.kernel.version "5.6") { - extraModulePackages = [ config.boot.kernelPackages.wireguard ] ; - }; - - # Enable DBus - services.dbus.enable = true; - - # Replace ntpd by timesyncd - services.timesyncd.enable = true; - - # Packages for Vagrant - environment.systemPackages = with pkgs; [ - findutils - gnumake - iputils - jq - nettools - netcat - nfs-utils - rsync - ]; - - users.users.root = { password = "vagrant"; }; - - # Creates a "vagrant" group & user with password-less sudo access - users.groups.vagrant = { - name = "vagrant"; - members = [ "vagrant" ]; - }; - users.extraUsers.vagrant = { - isNormalUser = true; - createHome = true; - group = "vagrant"; - extraGroups = [ "users" "wheel" ]; - password = "vagrant"; - home = "/home/vagrant"; - useDefaultShell = true; - }; - - systemd.services.install-vagrant-ssh-key = { - description = "Vagrant SSH key install (if needed)"; - after = [ "fs.target" ]; - wants = [ "fs.target" ]; - wantedBy = [ "multi-user.target" ]; - serviceConfig = { - ExecStart = "${install-vagrant-ssh-key}/bin/install-vagrant-ssh-key"; - User = "vagrant"; - # So it won't be (needlessly) restarted: - RemainAfterExit = true; - }; - }; - - security.sudo.wheelNeedsPassword = false; - - security.sudo.extraConfig = - '' - Defaults:root,%wheel env_keep+=LOCALE_ARCHIVE - Defaults:root,%wheel env_keep+=NIX_PATH - Defaults:root,%wheel env_keep+=TERMINFO_DIRS - Defaults env_keep+=SSH_AUTH_SOCK - Defaults lecture = never - root ALL=(ALL) SETENV: ALL - %wheel ALL=(ALL) NOPASSWD: ALL, SETENV: ALL - ''; -} - diff --git a/nixos/modules/100tb.nix b/nixos/modules/100tb.nix index a8db0e8be56798e4491033bfaeec3e4d48c01a8f..87554ef316b3c25076260d0f43aa8008941580e2 100644 --- a/nixos/modules/100tb.nix +++ b/nixos/modules/100tb.nix @@ -34,39 +34,39 @@ let options = { hostId = lib.mkOption { type = lib.types.str; - example = lib.literalExample "abcdefab"; + example = "abcdefab"; description = "The 32-bit host ID of the machine, formatted as 8 hexadecimal characters."; }; interface = lib.mkOption { type = lib.types.str; - example = lib.literalExample "eno0"; + example = "eno0"; description = "The name of the network interface on which to configure a static address."; }; publicIPv4 = lib.mkOption { type = lib.types.str; - example = lib.literalExample "192.0.2.0"; + example = "192.0.2.0"; description = "The IPv4 address to statically assign to `interface`."; }; prefixLength = lib.mkOption { type = lib.types.int; - example = lib.literalExample 24; + example = 24; description = "The statically configured network's prefix length."; }; gateway = lib.mkOption { type = lib.types.str; - example = lib.literalExample "192.0.2.1"; + example = "192.0.2.1"; description = "The statically configured address of the network gateway."; }; gatewayInterface = lib.mkOption { type = lib.types.str; - example = lib.literalExample "eno0"; + example = "eno0"; description = "The name of the network interface for the default route."; default = cfg.interface; }; grubDeviceID = lib.mkOption { type = lib.types.str; - example = lib.literalExample "wwn-0x5000c500936410b9"; + example = "wwn-0x5000c500936410b9"; description = "The ID of the disk on which to install grub."; }; }; diff --git a/nixos/modules/default.nix b/nixos/modules/default.nix new file mode 100644 index 0000000000000000000000000000000000000000..1772d399639aa8b4ec2c9ac6a218c4dd8a6169da --- /dev/null +++ b/nixos/modules/default.nix @@ -0,0 +1,16 @@ +{ + # Load modules that are sometimes universally useful and other times useful + # only for a specific service. Where functionality is not universally + # useful, it needs to be enabled by a node's configuration. By loading more + # modules (and therefore defining more options) than is strictly necessary + # for any single node the logic for supplying conditional configuration + # elsewhere is much simplified. For example, a Vagrant module can + # unconditionally set up a filesystem for PaymentServer. If PaymentServer + # is running on that node then it will get a Vagrant-appropriate + # configuration. If PaymentServer hasn't been enabled then the + # configuration will just be ignored. + imports = [ + ./packages.nix + ./issuer.nix + ]; +} diff --git a/nixos/modules/deployment.nix b/nixos/modules/deployment.nix index b0a5e3c4c761d188922a076643fcd3a25a4b81f0..41381ce5d33e62f4e569b87709d591f3586804df 100755 --- a/nixos/modules/deployment.nix +++ b/nixos/modules/deployment.nix @@ -16,7 +16,7 @@ in { options = { services.private-storage.deployment.authorizedKey = lib.mkOption { type = lib.types.str; - example = lib.literalExample '' + example = '' ssh-ed25519 AAAAC3N... ''; description = '' @@ -25,7 +25,7 @@ in { }; services.private-storage.deployment.gridName = lib.mkOption { type = lib.types.str; - example = lib.literalExample "staging"; + example = "staging"; description = '' The name of the grid configuration to use to update this deployment. ''; diff --git a/nixos/modules/issuer.nix b/nixos/modules/issuer.nix index 85c39c7271e9273b5e299980ebf7a46849bc9457..67bc3c5029c290676a777115179b273b2e8851ef 100644 --- a/nixos/modules/issuer.nix +++ b/nixos/modules/issuer.nix @@ -8,14 +8,14 @@ in { services.private-storage-issuer.package = lib.mkOption { default = ourpkgs.zkapissuer; type = lib.types.package; - example = lib.literalExample "pkgs.zkapissuer.components.exes.\"PaymentServer-exe\""; + example = lib.literalExpression "pkgs.zkapissuer.components.exes.\"PaymentServer-exe\""; description = '' The package to use for the ZKAP issuer. ''; }; services.private-storage-issuer.domains = lib.mkOption { type = lib.types.listOf lib.types.str; - example = lib.literalExample [ "payments.example.com" ]; + example = [ "payments.example.com" ]; description = '' The domain names at which the issuer is reachable. ''; @@ -32,7 +32,7 @@ in { services.private-storage-issuer.issuer = lib.mkOption { default = "Ristretto"; type = lib.types.enum [ "Trivial" "Ristretto" ]; - example = lib.literalExample "Trivial"; + example = "Trivial"; description = '' The issuer algorithm to use. Either Trivial for a fake no-crypto algorithm or Ristretto for Ristretto-flavored PrivacyPass. @@ -81,6 +81,15 @@ in { The kind of voucher database to use. ''; }; + services.private-storage-issuer.databaseFileSystem = lib.mkOption { + # Logically, the type is the type of an entry in fileSystems - but we'll + # just let the type system enforce that when we pass the value on to + # fileSystems. + description = '' + Configuration for a filesystem to mount which will hold the issuer's + internal state database. + ''; + }; services.private-storage-issuer.databasePath = lib.mkOption { default = null; type = lib.types.str; @@ -111,11 +120,26 @@ in { # We'll refer to this collection of domains by the first domain in the # list. domain = builtins.head cfg.domains; - certServiceName = "acme-${domain}"; # Payment server internal http port (arbitrary, non-priviledged): internalHttpPort = "1061"; + # The "-vN" suffix indicates that this Nth incompatible version of on + # disk state as managed by this deployment system. This does not have + # anything to do with what's inside the PaymentServer-managed state. + # Instead it's about things like the type of filesystem used or options + # having to do with the backing volume behind the filesystem. In + # general I expect that to get from "-vN" to "-v(N+1)" some manual + # upgrade steps will be required. + stateDirectory = "zkapissuer-v2"; + in lib.mkIf cfg.enable { + # Make sure the voucher database filesystem is mounted. + fileSystems = { + "zkapissuer-data" = cfg.databaseFileSystem // { + mountPoint = "/var/lib/${stateDirectory}"; + }; + }; + # Add a systemd service to run PaymentServer. systemd.services.zkapissuer = { enable = true; @@ -138,15 +162,30 @@ in { # Make systemd create a User/Group owned directory for PaymentServer # state. According to the docs at # https://www.freedesktop.org/software/systemd/man/systemd.exec.html#RuntimeDirectory= - # "The specified directory names must be relative" ... this - # makes systemd create /var/lib/zkapissuer/ for us: - serviceConfig.StateDirectory = "zkapissuer"; + # "The specified directory names must be relative" ... this makes + # systemd create this directory in /var/lib/ for us. + serviceConfig.StateDirectory = stateDirectory; serviceConfig.StateDirectoryMode = "0750"; - # Bail if there is still an old (root-owned) DB file on this system. - # If you hit this, and this /var/db/ file is indeed current, move it to - # /var/lib/zkapissuer/vouchers.sqlite3 and chown it to zkapissuer:zkapissuer. - unitConfig.AssertPathExists = "!/var/db/vouchers.sqlite3"; + unitConfig.AssertPathExists = [ + # Bail if there is still an old (root-owned) DB file on this system. + # If you hit this, and this /var/db/ file is indeed current, move it + # to /var/lib/zkapissuer/vouchers.sqlite3 and chown it to + # zkapissuer:zkapissuer. + "!/var/db/vouchers.sqlite3" + + # Similarly, bail if the newer path you were just told to create -- + # /var/lib/zkapissuer/vouchers.sqlite3 -- exists. It needs to be + # moved /var/lib/zkapissuer-v2 where a dedicated filesystem has been + # created for it. + "!/var/lib/zkapissuer/vouchers.sqlite3" + ]; + + # Only start if the dedicated vouchers database filesystem is mounted so + # that we know we're going to find our vouchers database there (or that + # we will create it in the right place). + unitConfig.Requires = ["local-fs.target"]; + unitConfig.After = ["local-fs.target"]; script = let @@ -189,7 +228,7 @@ in { extraGroups = [ "keys" ]; }; - # Open 80 and 443 for the certbot HTTP server and the PaymentServer HTTPS server. + # Open 80 and 443 for nginx networking.firewall.allowedTCPPorts = [ 80 443 @@ -224,6 +263,12 @@ in { ''; proxyPass = "http://127.0.0.1:${internalHttpPort}"; }; + locations."/" = { + # Return a 404 error for any paths not specified above. + extraConfig = '' + return 404; + ''; + }; }; }; diff --git a/nixos/modules/monitoring/exporters/blackbox.nix b/nixos/modules/monitoring/exporters/blackbox.nix new file mode 100644 index 0000000000000000000000000000000000000000..c08dee2a5778b9ac037268cef9907f20537e96b2 --- /dev/null +++ b/nixos/modules/monitoring/exporters/blackbox.nix @@ -0,0 +1,32 @@ +# Prometheus Blackbox exporter configuration +# +# Scope: From the monitoring machine, ping (etc.) hosts to check whether +# they are reachable, certs still are valid for a while, etc. +# +# Notes: The Blackbox exporter is using the "Multi Target Exporter" pattern, +# see https://prometheus.io/docs/guides/multi-target-exporter/ . +# +# Usage: Import this on a monitoring server + +{ config, lib, pkgs, ... }: { + config.services.prometheus.exporters.blackbox = { + enable = true; + + configFile = pkgs.writeText "blackbox-exporter.yaml" (builtins.toJSON { + modules = { + https_2xx = { + prober = "http"; + timeout = "5s"; + http = { + fail_if_not_ssl = true; + # This prober is for IPv4 only. + preferred_ip_protocol = "ip4"; + ip_protocol_fallback = false; + }; + }; + }; + }); + + }; +} + diff --git a/nixos/modules/monitoring/exporters/megacli2prom.nix b/nixos/modules/monitoring/exporters/megacli2prom.nix new file mode 100644 index 0000000000000000000000000000000000000000..a38f1ccc18b59073ff835e50babeb565f79a20b8 --- /dev/null +++ b/nixos/modules/monitoring/exporters/megacli2prom.nix @@ -0,0 +1,55 @@ +# MegaCli to Prometheus text format exporter +# +# Scope: Gets data from MegaRAID compatible storage controllers and mogrifies +# to Prometheus text format, saves to a temp file, to later be scraped +# by the node exporter. +# +# Usage: Import this to every server with a MegaRAID card that you want to +# include in the central monitoring system +# +# See https://nixos.org/manual/nixos/stable/#module-services-prometheus-exporters + +{ config, options, lib, ourpkgs, pkgs, ... }: + +let + cfg = config.services.private-storage.monitoring.megacli2prom; + +in { + options.services.private-storage.monitoring.megacli2prom = { + enable = lib.mkEnableOption "MegaCli2Prom metrics gathering service"; + outFile = lib.mkOption { + type = lib.types.str; + description = "Where to store the temporary file for node exporter to scrape?"; + default = "/run/prometheus-node-exporter/megacli.prom"; + }; + interval = lib.mkOption { + type = lib.types.str; + description = '' + How often to do it? + See https://www.freedesktop.org/software/systemd/man/systemd.time.html#Calendar%20Events + ''; + # Every five minutes. + default = "*:0/5"; + }; + }; + + config = + lib.mkIf cfg.enable { + environment.systemPackages = [ ourpkgs.megacli2prom ]; + systemd.services.megacli2prom = { + enable = true; + description = "MegaCli2Prom metrics gathering service"; + wantedBy = [ "multi-user.target" ]; + startAt = cfg.interval; + path = [ pkgs.megacli ]; + # Save to a temp file and then move atomically so the + # textfile collector won't read a partial file. + # See https://github.com/prometheus/node_exporter#textfile-collector + script = '' + "${ourpkgs.megacli2prom}/bin/megacli2prom" > "${cfg.outFile}.tmp" + mv "${cfg.outFile}.tmp" "${cfg.outFile}" + ''; + }; + }; +} + diff --git a/nixos/modules/monitoring/exporters/node.nix b/nixos/modules/monitoring/exporters/node.nix index 62702e82f1e0a6bd9effae871f275c5dd23a37ae..d854ff7398cd19ac4d4d5b3f8739073feb84834a 100644 --- a/nixos/modules/monitoring/exporters/node.nix +++ b/nixos/modules/monitoring/exporters/node.nix @@ -35,8 +35,8 @@ in { #"softnet" # not in nixpkgs 19.09 "stat" "systemd" - # "textfile" - # "textfile.directory /run/prometheus-node-exporter" + "textfile" + "textfile.directory /run/prometheus-node-exporter" #"thermal_zone" # not in nixpkgs 19.09 "time" #"udp_queues" # not in nixpkgs 19.09 diff --git a/nixos/modules/monitoring/exporters/tahoe.nix b/nixos/modules/monitoring/exporters/tahoe.nix new file mode 100644 index 0000000000000000000000000000000000000000..9bffabe74d45341b981ce3eb1cf05323e71fb5c4 --- /dev/null +++ b/nixos/modules/monitoring/exporters/tahoe.nix @@ -0,0 +1,65 @@ +# Tahoe Prometheus metrics collector +# +# Scope: Retrieves OpenMetrics from Tahoe and puts them +# where textfile collector can find them. +# +# Usage: Import this to every server running Tahoe. +# +# See https://nixos.org/manual/nixos/stable/#module-services-prometheus-exporters + +{ config, options, lib, pkgs, ... }: + +let + cfg = config.services.private-storage.monitoring.tahoe; + +in { + options.services.private-storage.monitoring.tahoe = { + enable = lib.mkEnableOption "Tahoe OpenMetrics collecting service"; + scrapeEndpoint = lib.mkOption { + type = lib.types.str; + description = "Where to get our metrics from?"; + default = "http://localhost:3456/statistics?t=openmetrics"; + }; + outFile = lib.mkOption { + type = lib.types.str; + description = "Where to store the temporary file for node exporter to scrape?"; + default = "/run/prometheus-node-exporter/tahoe.prom"; + }; + interval = lib.mkOption { + type = lib.types.str; + description = '' + How often to do it? + See https://www.freedesktop.org/software/systemd/man/systemd.time.html#Calendar%20Events + ''; + # Every five minutes. + default = "*:0/5"; + }; + }; + + config = + lib.mkIf cfg.enable { + environment.systemPackages = [ pkgs.curl ]; + + systemd.services.tahoe-metrics-collector = { + enable = true; + description = "Tahoe metrics gathering service"; + after = [ "tahoe.storage.service" ]; + startAt = cfg.interval; + path = [ pkgs.curl ]; + restartIfChanged = false; + + # Save to a temp file and then move atomically so the + # textfile collector won't read a partial file. + # See https://github.com/prometheus/node_exporter#textfile-collector + script = '' + curl --silent --show-error --fail-with-body --output "${cfg.outFile}.tmp" "${cfg.scrapeEndpoint}" + mv "${cfg.outFile}.tmp" "${cfg.outFile}" + ''; + }; + + systemd.timers.tahoe-metrics-collector = { + after = [ "tahoe.storage.service" ]; + }; + }; +} + diff --git a/nixos/modules/monitoring/server/grafana-dashboards/meta-monitoring.json b/nixos/modules/monitoring/server/grafana-dashboards/meta-monitoring.json new file mode 100644 index 0000000000000000000000000000000000000000..17564492ffc163c2c98a1a5e6ed35bc52d63e6c0 --- /dev/null +++ b/nixos/modules/monitoring/server/grafana-dashboards/meta-monitoring.json @@ -0,0 +1,180 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Watching the watchers", + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 22, + "links": [], + "panels": [ + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "count" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "5m", + "frequency": "1m", + "handler": 1, + "name": "Scraping down", + "noDataState": "ok", + "notifications": [] + }, + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Is Prometheus having problems scraping our instances? Should be zero.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null as zero", + "options": { + "alertThreshold": false + }, + "percentage": false, + "pluginVersion": "7.5.10", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "count by (job, instance) (up == 0)", + "hide": false, + "interval": "", + "legendFormat": "{{job}}/{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": false, + "line": false, + "op": "gt", + "value": 0, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Scraping failures", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": false, + "schemaVersion": 27, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Meta monitoring", + "uid": "MetaMonitoring", + "version": 1 +} diff --git a/nixos/modules/monitoring/server/grafana-config/payments.json b/nixos/modules/monitoring/server/grafana-dashboards/payments.json similarity index 99% rename from nixos/modules/monitoring/server/grafana-config/payments.json rename to nixos/modules/monitoring/server/grafana-dashboards/payments.json index 6bb121e4f0ad377145956fb2d75bb0679524bd8a..7d6f6bb12bae5c1401b1199a8b2831b39a4ba955 100644 --- a/nixos/modules/monitoring/server/grafana-config/payments.json +++ b/nixos/modules/monitoring/server/grafana-dashboards/payments.json @@ -208,7 +208,7 @@ }, { "exemplar": true, - "expr": "payment_redemption_voucher_redeemed", + "expr": "payment_redemption_vouchers_redeemed", "format": "time_series", "hide": false, "interval": "", diff --git a/nixos/modules/monitoring/server/grafana-config/resources-overview.json b/nixos/modules/monitoring/server/grafana-dashboards/resources-overview.json similarity index 77% rename from nixos/modules/monitoring/server/grafana-config/resources-overview.json rename to nixos/modules/monitoring/server/grafana-dashboards/resources-overview.json index cb5bc91da7c3adbb1c9377473b053d31d53550f0..745f7b7ff03d9a0a4e93fc50946f8066fb08e7b9 100644 --- a/nixos/modules/monitoring/server/grafana-config/resources-overview.json +++ b/nixos/modules/monitoring/server/grafana-dashboards/resources-overview.json @@ -41,7 +41,7 @@ "description": "Some of our software runs in a single thread, so this shows max CPU per core (instead of averaged over all cores)", "fieldConfig": { "defaults": { - "custom": {} + "links": [] }, "overrides": [] }, @@ -49,7 +49,7 @@ "fillGradient": 0, "gridPos": { "h": 7, - "w": 8, + "w": 6, "x": 0, "y": 1 }, @@ -68,11 +68,10 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "alertThreshold": true, - "dataLinks": [] + "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.3.5", + "pluginVersion": "7.5.10", "pointradius": 2, "points": false, "renderer": "flot", @@ -173,8 +172,8 @@ "datasource": null, "fieldConfig": { "defaults": { - "custom": {}, - "displayName": "${__field.labels.instance}" + "displayName": "${__field.labels.instance}", + "links": [] }, "overrides": [ { @@ -194,8 +193,8 @@ "fillGradient": 0, "gridPos": { "h": 7, - "w": 8, - "x": 8, + "w": 6, + "x": 6, "y": 1 }, "hiddenSeries": false, @@ -213,11 +212,10 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "alertThreshold": true, - "dataLinks": [] + "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.3.5", + "pluginVersion": "7.5.10", "pointradius": 2, "points": false, "renderer": "flot", @@ -241,7 +239,7 @@ "line": true, "op": "gt", "value": 1, - "yaxis": "left" + "visible": true } ], "timeFrom": null, @@ -328,7 +326,7 @@ "description": "How much RAM is in use? Relative to available system memory.", "fieldConfig": { "defaults": { - "custom": {} + "links": [] }, "overrides": [] }, @@ -336,8 +334,8 @@ "fillGradient": 0, "gridPos": { "h": 7, - "w": 8, - "x": 16, + "w": 6, + "x": 12, "y": 1 }, "hiddenSeries": false, @@ -356,11 +354,10 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "alertThreshold": true, - "dataLinks": [] + "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.3.5", + "pluginVersion": "7.5.10", "pointradius": 2, "points": false, "renderer": "flot", @@ -384,7 +381,7 @@ "line": true, "op": "gt", "value": 0.8, - "yaxis": "left" + "visible": true } ], "timeFrom": null, @@ -428,6 +425,152 @@ "alignLevel": null } }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0.1 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "5m", + "frequency": "1m", + "handler": 1, + "name": "Swap usage alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "How much Swap is in use? Relative to available swap.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 1 + }, + "hiddenSeries": false, + "id": 30, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.10", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "1 - node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.1, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Swap used %", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:98", + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": "0", + "show": true + }, + { + "$$hashKey": "object:99", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, { "collapsed": false, "datasource": null, @@ -448,10 +591,10 @@ "dashLength": 10, "dashes": false, "datasource": null, - "description": "Shows most saturated network link for every node. Baseline is the reported NIC link speed - that might not be the actual limit.", + "description": "Shows most saturated network link for every node. Bit/s.", "fieldConfig": { "defaults": { - "custom": {} + "links": [] }, "overrides": [] }, @@ -478,11 +621,10 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "alertThreshold": true, - "dataLinks": [] + "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.3.5", + "pluginVersion": "7.5.10", "pointradius": 2, "points": false, "renderer": "flot", @@ -492,14 +634,14 @@ "steppedLine": false, "targets": [ { - "expr": "max by (instance) (rate(node_network_transmit_bytes_total{device!~\"lo|monitoringvpn\"}[5m]) / node_network_speed_bytes)", + "expr": "max by (instance) (rate(node_network_transmit_bytes_total{device!~\"lo|monitoringvpn\"}[5m]) * 8)", "interval": "", "intervalFactor": 4, "legendFormat": "{{instance}} out", "refId": "A" }, { - "expr": "- max by (instance) (rate(node_network_receive_bytes_total{device!~\"lo|monitoringvpn\"}[5m]) / node_network_speed_bytes)", + "expr": "- max by (instance) (rate(node_network_receive_bytes_total{device!~\"lo|monitoringvpn\"}[5m]) * 8)", "interval": "", "intervalFactor": 4, "legendFormat": "{{instance}} in", @@ -510,7 +652,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Throughput %", + "title": "Throughput", "tooltip": { "shared": false, "sort": 2, @@ -527,15 +669,17 @@ }, "yaxes": [ { + "$$hashKey": "object:226", "decimals": null, - "format": "percentunit", + "format": "bps", "label": null, "logBase": 1, - "max": "1", - "min": "-1", + "max": null, + "min": null, "show": true }, { + "$$hashKey": "object:227", "format": "short", "label": null, "logBase": 1, @@ -558,7 +702,7 @@ "description": "Packet and error count. Positive values mean transmit, negative receive.", "fieldConfig": { "defaults": { - "custom": {} + "links": [] }, "overrides": [] }, @@ -585,11 +729,10 @@ "linewidth": 1, "nullPointMode": "null as zero", "options": { - "alertThreshold": true, - "dataLinks": [] + "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.3.5", + "pluginVersion": "7.5.10", "pointradius": 2, "points": false, "renderer": "flot", @@ -602,28 +745,28 @@ "expr": "- rate(node_network_receive_packets_total{device!~\"lo|monitoringvpn\"}[5m])", "interval": "", "intervalFactor": 4, - "legendFormat": "{{instance}} {{device}}", + "legendFormat": "{{instance}} in", "refId": "A" }, { "expr": "- rate(node_network_receive_errs_total{device!~\"lo|monitoringvpn\"}[5m])", "interval": "", "intervalFactor": 4, - "legendFormat": "{{instance}} {{device}}", + "legendFormat": "{{instance}} in err", "refId": "B" }, { "expr": "rate(node_network_transmit_packets_total{device!~\"lo|monitoringvpn\"}[5m])", "interval": "", "intervalFactor": 4, - "legendFormat": "{{instance}} {{device}}", + "legendFormat": "{{instance}} out", "refId": "C" }, { "expr": "rate(node_network_transmit_errs_total{device!~\"lo|monitoringvpn\"}[5m])", "interval": "", "intervalFactor": 4, - "legendFormat": "{{instance}} {{device}}", + "legendFormat": "{{instance}} out err", "refId": "D" } ], @@ -647,7 +790,7 @@ }, "yaxes": [ { - "format": "short", + "format": "pps", "label": null, "logBase": 1, "max": null, @@ -781,7 +924,7 @@ "description": "Network errors, drops etc. Should all be 0.", "fieldConfig": { "defaults": { - "custom": {} + "links": [] }, "overrides": [] }, @@ -808,11 +951,10 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "alertThreshold": true, - "dataLinks": [] + "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.3.5", + "pluginVersion": "7.5.10", "pointradius": 2, "points": false, "renderer": "flot", @@ -852,7 +994,8 @@ "fill": true, "line": true, "op": "gt", - "value": 10 + "value": 10, + "visible": true } ], "timeFrom": null, @@ -953,7 +1096,7 @@ "description": "Watch filesystems filling up. Shows only mounts over 10 % of available bytes used.", "fieldConfig": { "defaults": { - "custom": {}, + "links": [], "unit": "percentunit" }, "overrides": [] @@ -962,7 +1105,7 @@ "fillGradient": 0, "gridPos": { "h": 7, - "w": 8, + "w": 6, "x": 0, "y": 17 }, @@ -981,11 +1124,10 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "alertThreshold": true, - "dataLinks": [] + "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.3.5", + "pluginVersion": "7.5.10", "pointradius": 2, "points": false, "renderer": "flot", @@ -1012,7 +1154,7 @@ "line": true, "op": "gt", "value": 0.8, - "yaxis": "left" + "visible": true } ], "timeFrom": null, @@ -1035,6 +1177,7 @@ }, "yaxes": [ { + "$$hashKey": "object:131", "format": "percentunit", "label": null, "logBase": 1, @@ -1043,6 +1186,7 @@ "show": true }, { + "$$hashKey": "object:132", "format": "short", "label": null, "logBase": 1, @@ -1065,7 +1209,7 @@ "description": "Input Output Operations per second. Positive values mean read, negative write.", "fieldConfig": { "defaults": { - "custom": {} + "links": [] }, "overrides": [] }, @@ -1073,8 +1217,8 @@ "fillGradient": 0, "gridPos": { "h": 7, - "w": 8, - "x": 8, + "w": 6, + "x": 6, "y": 17 }, "hiddenSeries": false, @@ -1092,11 +1236,10 @@ "linewidth": 1, "nullPointMode": "null as zero", "options": { - "alertThreshold": true, - "dataLinks": [] + "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.3.5", + "pluginVersion": "7.5.10", "pointradius": 2, "points": false, "renderer": "flot", @@ -1170,7 +1313,7 @@ "description": "Max average storage latency per node. Positive values mean read, negative write.", "fieldConfig": { "defaults": { - "custom": {} + "links": [] }, "overrides": [] }, @@ -1178,8 +1321,8 @@ "fillGradient": 0, "gridPos": { "h": 7, - "w": 8, - "x": 16, + "w": 6, + "x": 12, "y": 17 }, "hiddenSeries": false, @@ -1197,11 +1340,10 @@ "linewidth": 1, "nullPointMode": "null as zero", "options": { - "alertThreshold": true, - "dataLinks": [] + "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.3.5", + "pluginVersion": "7.5.10", "pointradius": 2, "points": false, "renderer": "flot", @@ -1265,17 +1407,159 @@ "align": false, "alignLevel": null } + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "count" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "5m", + "frequency": "5m", + "handler": 1, + "name": "Degraded RAID alert", + "noDataState": "ok", + "notifications": [] + }, + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 17 + }, + "hiddenSeries": false, + "id": 32, + "legend": { + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": false + }, + "percentage": false, + "pluginVersion": "7.5.10", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "megacli_drives{state=\"Degraded\"}", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": false, + "line": false, + "op": "gt", + "value": 0, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Degraded RAID arrays", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:151", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:152", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "refresh": "30s", - "schemaVersion": 20, + "schemaVersion": 27, "style": "dark", "tags": [], "templating": { "list": [] }, "time": { - "from": "now-1h", + "from": "now-3h", "to": "now" }, "timepicker": {}, diff --git a/nixos/modules/monitoring/server/grafana-dashboards/services-overview.json b/nixos/modules/monitoring/server/grafana-dashboards/services-overview.json new file mode 100644 index 0000000000000000000000000000000000000000..09bfa3acde10c14fb41d3082779bc14d064b6554 --- /dev/null +++ b/nixos/modules/monitoring/server/grafana-dashboards/services-overview.json @@ -0,0 +1,477 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "How are our user-facing services doing?", + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 32, + "panels": [], + "title": "HTTPS endpoints", + "type": "row" + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 3.142 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "5m", + "frequency": "1m", + "handler": 1, + "name": "Response times alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 1 + }, + "hiddenSeries": false, + "id": 36, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "probe_duration_seconds", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 3.142, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Response times", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:425", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:426", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "count" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "5m", + "frequency": "1m", + "handler": 1, + "name": "Probe fails alert", + "noDataState": "ok", + "notifications": [] + }, + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Shows all HTTP endpoints where probe_success == 0. This could have different reasons, likely ones being the service is down or the TLS certificate is not trusted.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 1 + }, + "hiddenSeries": false, + "id": 38, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "count by (instance) (probe_http_status_code!=200 and probe_http_status_code!=401 and probe_http_status_code!=404)", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Probe fails", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:903", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:904", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 2419200 + ], + "type": "lt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "5m", + "frequency": "60m", + "handler": 1, + "message": "A TLS certificate is expiring within four weeks.", + "name": "TLS certificate expiry alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 1 + }, + "hiddenSeries": false, + "id": 34, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "probe_ssl_earliest_cert_expiry - time()", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "lt", + "value": 2419200, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "TLS certificate expiry", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:179", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:180", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "30s", + "schemaVersion": 27, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Services overview", + "uid": "ServicesOverview", + "version": 1 +} diff --git a/nixos/modules/monitoring/server/grafana-dashboards/tahoe-lafs.json b/nixos/modules/monitoring/server/grafana-dashboards/tahoe-lafs.json new file mode 100644 index 0000000000000000000000000000000000000000..6d7e7014e12753ce791c0059630e009d4172544d --- /dev/null +++ b/nixos/modules/monitoring/server/grafana-dashboards/tahoe-lafs.json @@ -0,0 +1,1476 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "", + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 41, + "iteration": 1636742282779, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 38, + "panels": [], + "title": "Inbound operations", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "These stats keep track of local disk latencies for storage-server operations. All values are in seconds. These are recorded by the storage server, starting from the time the request arrives (post-deserialization) and ending when the response begins serialization. As such, they are mostly useful for measuring disk speeds.", + "fieldConfig": { + "defaults": { + "unit": "s" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 1 + }, + "hiddenSeries": false, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "tahoe_stats_storage_server_latencies_allocate_mean{instance=\"$node\"}", + "interval": "", + "legendFormat": "allocate", + "refId": "allocate_mean" + }, + { + "exemplar": true, + "expr": "tahoe_stats_storage_server_latencies_close_mean{instance=\"$node\"}", + "hide": false, + "interval": "", + "legendFormat": "close", + "refId": "close_mean" + }, + { + "exemplar": true, + "expr": "tahoe_stats_storage_server_latencies_get_mean{instance=\"$node\"}", + "hide": false, + "interval": "", + "legendFormat": "get", + "refId": "get_mean" + }, + { + "exemplar": true, + "expr": "tahoe_stats_storage_server_latencies_read_mean{instance=\"$node\"}", + "hide": false, + "interval": "", + "legendFormat": "read", + "refId": "read_mean" + }, + { + "exemplar": true, + "expr": "tahoe_stats_storage_server_latencies_readv_mean{instance=\"$node\"}", + "hide": false, + "interval": "", + "legendFormat": "readv", + "refId": "readv_mean" + }, + { + "exemplar": true, + "expr": "tahoe_stats_storage_server_latencies_write_mean{instance=\"$node\"}", + "hide": false, + "interval": "", + "legendFormat": "write", + "refId": "write_mean" + }, + { + "exemplar": true, + "expr": "tahoe_stats_storage_server_latencies_writev_mean{instance=\"$node\"}", + "hide": false, + "interval": "", + "legendFormat": "writev", + "refId": "writev_mean" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Latency means", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1111", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:1112", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "This counts inbound storage-server operations.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 1 + }, + "hiddenSeries": false, + "id": 34, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "rate(tahoe_counters_storage_server_allocate{instance=\"$node\"}[5m])", + "interval": "", + "legendFormat": "allocate", + "refId": "allocate" + }, + { + "exemplar": true, + "expr": "rate(tahoe_counters_storage_server_write{instance=\"$node\"}[5m])", + "hide": false, + "interval": "", + "legendFormat": "write", + "refId": "write" + }, + { + "exemplar": true, + "expr": "rate(tahoe_counters_storage_server_close{instance=\"$node\"}[5m])", + "hide": false, + "interval": "", + "legendFormat": "close", + "refId": "close" + }, + { + "exemplar": true, + "expr": "rate(tahoe_counters_storage_server_get{instance=\"$node\"}[5m])", + "hide": false, + "interval": "", + "legendFormat": "get", + "refId": "get" + }, + { + "exemplar": true, + "expr": "rate(tahoe_counters_storage_server_read{instance=\"$node\"}[5m])", + "hide": false, + "interval": "", + "legendFormat": "read", + "refId": "read" + }, + { + "exemplar": true, + "expr": "rate(tahoe_counters_storage_server_add-lease{instance=\"$node\"}[5m])", + "hide": true, + "interval": "", + "legendFormat": "add-lease", + "refId": "add-lease" + }, + { + "exemplar": true, + "expr": "rate(tahoe_counters_storage_server_renew{instance=\"$node\"}[5m])", + "hide": false, + "interval": "", + "legendFormat": "renew", + "refId": "renew" + }, + { + "exemplar": true, + "expr": "rate(tahoe_counters_storage_server_cancel{instance=\"$node\"}[5m])", + "hide": false, + "interval": "", + "legendFormat": "cancel", + "refId": "cancel" + }, + { + "exemplar": true, + "expr": "rate(tahoe_counters_storage_server_readv{instance=\"$node\"}[5m])", + "hide": false, + "interval": "", + "legendFormat": "readv", + "refId": "readv" + }, + { + "exemplar": true, + "expr": "rate(tahoe_counters_storage_server_writev{instance=\"$node\"}[5m])", + "hide": false, + "interval": "", + "legendFormat": "writev", + "refId": "writev" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Counts/s", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:2483", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:2484", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 1 + }, + "hiddenSeries": false, + "id": 36, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "rate(tahoe_counters_storage_server_bytes_added{instance=\"$node\"}[5m])", + "interval": "", + "legendFormat": "Added", + "refId": "A" + }, + { + "exemplar": true, + "expr": "rate(tahoe_counters_storage_server_bytes_freed{instance=\"$node\"}[5m])", + "hide": false, + "interval": "", + "legendFormat": "Freed", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Bytes/s", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:2568", + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:2569", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 19, + "panels": [], + "repeat": null, + "title": "Latency Histograms", + "type": "row" + }, + { + "datasource": null, + "description": "These stats keep track of local disk latencies for storage-server operations. All values are in seconds. These are recorded by the storage server, starting from the time the request arrives (post-deserialization) and ending when the response begins serialization. As such, they are mostly useful for measuring disk speeds.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 10 + }, + "id": 11, + "options": { + "displayMode": "gradient", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "7.5.11", + "repeat": "storageserverop", + "repeatDirection": "h", + "scopedVars": { + "storageserverop": { + "selected": true, + "text": "allocate", + "value": "allocate" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "tahoe_stats_storage_server_latencies_$storageserverop{instance=\"$node\"}", + "interval": "", + "legendFormat": "{{quantile}}", + "refId": "A" + } + ], + "title": "$storageserverop", + "type": "bargauge" + }, + { + "datasource": null, + "description": "These stats keep track of local disk latencies for storage-server operations. All values are in seconds. These are recorded by the storage server, starting from the time the request arrives (post-deserialization) and ending when the response begins serialization. As such, they are mostly useful for measuring disk speeds.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 10 + }, + "id": 39, + "options": { + "displayMode": "gradient", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "7.5.11", + "repeatDirection": "h", + "repeatIteration": 1636742282779, + "repeatPanelId": 11, + "scopedVars": { + "storageserverop": { + "selected": true, + "text": "write", + "value": "write" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "tahoe_stats_storage_server_latencies_$storageserverop", + "interval": "", + "legendFormat": "{{quantile}}", + "refId": "A" + } + ], + "title": "$storageserverop", + "type": "bargauge" + }, + { + "datasource": null, + "description": "These stats keep track of local disk latencies for storage-server operations. All values are in seconds. These are recorded by the storage server, starting from the time the request arrives (post-deserialization) and ending when the response begins serialization. As such, they are mostly useful for measuring disk speeds.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 10 + }, + "id": 40, + "options": { + "displayMode": "gradient", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "7.5.11", + "repeatDirection": "h", + "repeatIteration": 1636742282779, + "repeatPanelId": 11, + "scopedVars": { + "storageserverop": { + "selected": true, + "text": "readv", + "value": "readv" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "tahoe_stats_storage_server_latencies_$storageserverop", + "interval": "", + "legendFormat": "{{quantile}}", + "refId": "A" + } + ], + "title": "$storageserverop", + "type": "bargauge" + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 30, + "panels": [], + "title": "Storage overview", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "These all reflect disk-space usage policies and status.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 19 + }, + "hiddenSeries": false, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "tahoe_stats_storage_server_disk_avail{instance=\"$node\"}", + "hide": false, + "interval": "", + "legendFormat": "disk_avail", + "refId": "disk_avail" + }, + { + "exemplar": true, + "expr": "tahoe_stats_storage_server_disk_free_for_nonroot{instance=\"$node\"}", + "hide": false, + "interval": "", + "legendFormat": "free_for_nonroot", + "refId": "disk_free_for_nonroot" + }, + { + "exemplar": true, + "expr": "tahoe_stats_storage_server_disk_free_for_root{instance=\"$node\"}", + "hide": false, + "interval": "", + "legendFormat": "free_for_root", + "refId": "disk_free_for_root" + }, + { + "exemplar": true, + "expr": "", + "hide": true, + "interval": "", + "legendFormat": "disk_total", + "refId": "disk_total" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Bytes free", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:712", + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:713", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "These all reflect disk-space usage policies and status.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 19 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "tahoe_stats_storage_server_allocated{instance=\"$node\"}", + "interval": "", + "legendFormat": "allocated", + "refId": "allocated" + }, + { + "exemplar": true, + "expr": "tahoe_stats_storage_server_disk_used{instance=\"$node\"}", + "hide": false, + "interval": "", + "legendFormat": "disk_used", + "refId": "disk_used" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Bytes used", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:712", + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:713", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "This counts the number of ‘buckets’ (i.e. unique storage-index values) currently managed by the storage server. It indicates roughly how many files are managed by the server.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 19 + }, + "hiddenSeries": false, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "tahoe_stats_storage_server_total_bucket_count{instance=\"$node\"}", + "interval": "", + "legendFormat": "total_bucket_count", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Total bucket count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:797", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:798", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 27 + }, + "id": 15, + "panels": [], + "title": "CPU", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Estimate of what percentage of system CPU time was consumed by the node process, over the given time interval. ", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 28 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "tahoe_stats_cpu_monitor_1min_avg{instance=\"$node\"}\n", + "interval": "", + "legendFormat": "1 min avg", + "refId": "A" + }, + { + "exemplar": true, + "expr": "tahoe_stats_cpu_monitor_5min_avg{instance=\"$node\"}\n", + "hide": false, + "interval": "", + "intervalFactor": 5, + "legendFormat": "5 mins avg", + "refId": "B" + }, + { + "exemplar": true, + "expr": "tahoe_stats_cpu_monitor_15min_avg{instance=\"$node\"}\n", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "15 mins avg", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU monitor", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:62", + "decimals": null, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": "0", + "show": true + }, + { + "$$hashKey": "object:63", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Estimate of total number of CPU seconds consumed by node since the process was started. Ticket #472 indicates that .total may sometimes be negative due to wraparound of the kernel’s counter.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 28 + }, + "hiddenSeries": false, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "tahoe_stats_cpu_monitor_total{instance=\"$node\"}\n", + "interval": "", + "legendFormat": "Total CPU seconds", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU time total", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:62", + "decimals": null, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:63", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "How many seconds since the node process was started.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 28 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "tahoe_stats_node_uptime{instance=\"$node\"}", + "interval": "", + "legendFormat": "{{instance}} uptime", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Node uptime", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:386", + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:387", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "schemaVersion": 27, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": false, + "text": "storage1", + "value": "storage1" + }, + "datasource": null, + "definition": "tahoe_stats_cpu_monitor_1min_avg", + "description": "Which node (instamce) to show", + "error": null, + "hide": 0, + "includeAll": false, + "label": "Node", + "multi": false, + "name": "node", + "options": [], + "query": { + "query": "tahoe_stats_cpu_monitor_1min_avg", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "/.*instance=\"([^\"]*)\".*/", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "tags": [], + "text": [ + "allocate", + "write", + "readv" + ], + "value": [ + "allocate", + "write", + "readv" + ] + }, + "description": "Inbound storage-server operations ", + "error": null, + "hide": 0, + "includeAll": true, + "label": "Detailed latencies for", + "multi": true, + "name": "storageserverop", + "options": [ + { + "selected": false, + "text": "All", + "value": "$__all" + }, + { + "selected": true, + "text": "allocate", + "value": "allocate" + }, + { + "selected": true, + "text": "write", + "value": "write" + }, + { + "selected": false, + "text": "close", + "value": "close" + }, + { + "selected": false, + "text": "get", + "value": "get" + }, + { + "selected": false, + "text": "read", + "value": "read" + }, + { + "selected": false, + "text": "add-lease", + "value": "add-lease" + }, + { + "selected": false, + "text": "renew", + "value": "renew" + }, + { + "selected": false, + "text": "cancel", + "value": "cancel" + }, + { + "selected": true, + "text": "readv", + "value": "readv" + }, + { + "selected": false, + "text": "writev", + "value": "writev" + } + ], + "query": "allocate, write, close, get, read, add-lease, renew, cancel, readv, writev", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Tahoe-LAFS", + "uid": "TahoeLAFS", + "version": 1 +} diff --git a/nixos/modules/monitoring/server/grafana.nix b/nixos/modules/monitoring/server/grafana.nix index c23150238241db561bae52aa50e4878b6961f9e6..d7efd4c7d3f92d0120444374aa5250d68d4764a8 100644 --- a/nixos/modules/monitoring/server/grafana.nix +++ b/nixos/modules/monitoring/server/grafana.nix @@ -20,20 +20,20 @@ let in { options.services.private-storage.monitoring.grafana = { - domain = lib.mkOption - { type = lib.types.str; - example = lib.literalExample "grafana.grid.private.storage"; - description = "The FQDN of the Grafana host"; + domains = lib.mkOption + { type = lib.types.listOf lib.types.str; + example = [ "grafana.grid.private.storage" ]; + description = "The domain names at which the server is reachable."; }; prometheusUrl = lib.mkOption { type = lib.types.str; - example = lib.literalExample "http://prometheus:9090/"; + example = "http://prometheus:9090/"; default = "http://localhost:9090/"; description = "The URL of the Prometheus host to access"; }; lokiUrl = lib.mkOption { type = lib.types.str; - example = lib.literalExample "http://loki:3100/"; + example = "http://loki:3100/"; default = "http://localhost:3100/"; description = "The URL of the Loki host to access"; }; @@ -46,31 +46,51 @@ in { }; googleOAuthClientID = lib.mkOption { type = lib.types.str; - example = lib.literalExample "grafana-staging-345678"; + example = "grafana-staging-345678"; default = "replace-by-your-client-id-or-set-empty-string-for-anonymous-access"; description = "The GSuite OAuth2 SSO Client ID. Empty string turns SSO auth off and anonymous (free for all) access on."; }; googleOAuthClientSecretFile = lib.mkOption { type = lib.types.path; - example = lib.literalExample "/var/secret/monitoring-gsuite-client-secret"; + example = /var/secret/monitoring-gsuite-client-secret; default = /run/keys/grafana-google-sso.secret; description = "The path to the GSuite SSO secret file."; }; adminPasswordFile = lib.mkOption { type = lib.types.path; - example = lib.literalExample "/var/secret/monitoring-admin-password"; + example = "/var/secret/monitoring-admin-password"; default = /run/keys/grafana-admin.password; description = "A file containing the password for the Grafana Admin account."; }; + enableSlackAlert = lib.mkOption + { type = lib.types.bool; + default = false; + description = '' + Enables the slack alerter. Expects a file that contains + the secret Slack Web Hook URL in grafanaSlackUrlFile (see below). + ''; + }; + grafanaSlackUrlFile = lib.mkOption + { type = lib.types.path; + default = /run/keys/grafana-slack-url; + description = '' + Where to find the file that containts the slack URL. + ''; + }; }; - config = { + config = + let + # We'll refer to this collection of domains by the first domain in the list. + domain = builtins.head cfg.domains; + + in { # Port 80 for ACME ssl retrieval only. 443 for nginx -> grafana. networking.firewall.allowedTCPPorts = [ 80 443 ]; services.grafana = { enable = true; - domain = cfg.domain; + inherit domain; port = 2342; addr = "127.0.0.1"; @@ -119,8 +139,25 @@ in { # See https://grafana.com/docs/grafana/latest/administration/provisioning/#dashboards dashboards = [{ name = "provisioned"; - options.path = ./grafana-config; + options.path = ./grafana-dashboards; }]; + # See https://grafana.com/docs/grafana/latest/administration/provisioning/#example-alert-notification-channels-config-file + notifiers = [ ] ++ (lib.optionals (cfg.enableSlackAlert) [{ + uid = "slack-notifier-1"; + name = "Slack"; + type = "slack"; + is_default = true; + send_reminder = false; + settings = { + username = "${domain}"; + uploadImage = true; + }; + secure_settings = { + # `$__file{}` reads the value from the named file. + # See https://grafana.com/docs/grafana/latest/administration/configuration/#file-provider + url = "$__file{${toString cfg.grafanaSlackUrlFile}}"; + }; + }]); }; }; @@ -138,7 +175,8 @@ in { # Only allow PFS-enabled ciphers with AES256: sslCiphers = "AES256+EECDH:AES256+EDH:!aNULL"; - virtualHosts.${config.services.grafana.domain} = { + virtualHosts."${domain}" = { + serverAliases = builtins.tail cfg.domains; enableACME = true; forceSSL = true; locations."/" = { diff --git a/nixos/modules/monitoring/server/prometheus.nix b/nixos/modules/monitoring/server/prometheus.nix index 1f27f023df5b3211a81e3603226cc7cfe2c25e27..3bb00a5b95855859e455b5df8fb065b3d70bc855 100644 --- a/nixos/modules/monitoring/server/prometheus.nix +++ b/nixos/modules/monitoring/server/prometheus.nix @@ -18,19 +18,24 @@ in { options.services.private-storage.monitoring.prometheus = { nodeExporterTargets = lib.mkOption { type = with lib.types; listOf str; - example = lib.literalExample "[ node1 node2 ]"; + example = [ "node1" "node2" ]; description = "List of nodes (hostnames or IPs) to scrape."; }; nginxExporterTargets = lib.mkOption { type = with lib.types; listOf str; - example = lib.literalExample "[ node1 node2 ]"; + example = [ "node1" "node2" ]; description = "List of nodes (hostnames or IPs) to scrape."; }; paymentExporterTargets = lib.mkOption { type = with lib.types; listOf str; - example = lib.literalExample "[ node1 node2 ]"; + example = [ "node1" "node2" ]; description = "List of nodes (hostnames or IPs) to scrape."; }; + blackboxExporterHttpsTargets = lib.mkOption { + type = with lib.types; listOf str; + example = [ "https://node1.com/" "https://node2.org/" ]; + description = "List of https URLs to scrape."; + }; }; config = rec { @@ -65,6 +70,32 @@ in { }]; relabel_configs = [ dropPortNumber ]; } + { + # The Blackbox exporter is using Prometheus' "Multi-Target Exporter Pattern", + # see https://prometheus.io/docs/guides/multi-target-exporter/ + job_name = "blackboxExporterHttps"; + static_configs = [{ + targets = cfg.blackboxExporterHttpsTargets; + }]; + metrics_path = "/probe"; + params.module = [ "https_2xx" ]; + relabel_configs = [ + { + source_labels = [ "__address__" ]; + target_label = "__param_target"; + } + { + source_labels = [ "__param_target" ]; + target_label = "instance"; + } + { + source_labels = []; + target_label = "__address__"; + # The blackbox exporter’s real hostname:port + replacement = "monitoring:9115"; + } + ]; + } ]; }; }; diff --git a/nixos/modules/monitoring/vpn/client.nix b/nixos/modules/monitoring/vpn/client.nix index ed1933e34d715fba0933f32d606e989b4d1ed4ec..afa4f77c76b8d109d807148b9d2258045e5cabc0 100644 --- a/nixos/modules/monitoring/vpn/client.nix +++ b/nixos/modules/monitoring/vpn/client.nix @@ -8,7 +8,7 @@ in { enable = lib.mkEnableOption "PrivateStorageio Monitoring VPN client service"; privateKeyFile = lib.mkOption { type = lib.types.path; - example = lib.literalExample /run/keys/monitoringvpn/host.key; + example = /run/keys/monitoringvpn/host.key; default = /run/keys/monitoringvpn/client.key; description = '' File with base64 private key generated by <command>wg genkey</command>. @@ -18,7 +18,7 @@ in { }; presharedKeyFile = lib.mkOption { type = lib.types.path; - example = lib.literalExample /run/keys/monitoringvpn/preshared.key; + example = /run/keys/monitoringvpn/preshared.key; default = /run/keys/monitoringvpn/preshared.key; description = '' File with base64 preshared key generated by <command>wg genpsk</command>. @@ -26,7 +26,7 @@ in { }; allowedIPs = lib.mkOption { type = lib.types.listOf lib.types.str; - example = lib.literalExample [ "172.23.23.1/32" ]; + example = [ "172.23.23.1/32" ]; default = [ "172.23.23.1/32" ]; description = '' Limits which IPs this client receives data from. @@ -34,21 +34,21 @@ in { }; ip = lib.mkOption { type = lib.types.str; - example = lib.literalExample "172.23.23.11"; + example = "172.23.23.11"; description = '' The IP addresses of the interface. ''; }; endpoint = lib.mkOption { type = lib.types.str; - example = lib.literalExample "vpn.monitoring.private.storage:54321"; + example = "vpn.monitoring.private.storage:54321"; description = '' The address and port number of the server to establish the VPN with. ''; }; endpointPublicKeyFile = lib.mkOption { type = lib.types.path; - example = lib.literalExample ./monitoringvpn/server.pub; + example = ./monitoringvpn/server.pub; description = '' File with base64 public key generated by <command>cat private.key | wg pubkey > pubkey.pub</command>. ''; diff --git a/nixos/modules/monitoring/vpn/server.nix b/nixos/modules/monitoring/vpn/server.nix index 3c41e0209bb7fe18f1a81a44ab509c8442372bbf..80881a2060638f7e0f6cce9853d50c98f3081d6c 100644 --- a/nixos/modules/monitoring/vpn/server.nix +++ b/nixos/modules/monitoring/vpn/server.nix @@ -13,7 +13,7 @@ in { enable = lib.mkEnableOption "PrivateStorageio Monitoring VPN server service"; privateKeyFile = lib.mkOption { type = lib.types.path; - example = lib.literalExample /run/keys/monitoringvpn/server.key; + example = /run/keys/monitoringvpn/server.key; default = /run/keys/monitoringvpn/server.key; description = '' File with base64 private key generated by <command>wg genkey</command>. @@ -21,7 +21,7 @@ in { }; presharedKeyFile = lib.mkOption { type = lib.types.path; - example = lib.literalExample /run/keys/monitoringvpn/preshared.key; + example = /run/keys/monitoringvpn/preshared.key; default = /run/keys/monitoringvpn/preshared.key; description = '' File with base64 preshared key generated by <command>wg genpsk</command>. @@ -29,14 +29,14 @@ in { }; ip = lib.mkOption { type = lib.types.str; - example = lib.literalExample [ "172.23.23.23" ]; + example = [ "172.23.23.23" ]; description = '' The IP address of the interface. ''; }; port = lib.mkOption { type = lib.types.port; - example = lib.literalExample 54321; + example = 54321; default = 51820; description = '' The UDP port to listen on. @@ -44,14 +44,14 @@ in { }; vpnClientIPs = lib.mkOption { type = lib.types.listOf lib.types.str; - example = lib.literalExample [ "172.23.23.23" "172.23.23.42" ]; + example = [ "172.23.23.23" "172.23.23.42" ]; description = '' The IP addresses to allow connections from. ''; }; pubKeysPath = lib.mkOption { type = lib.types.path; - example = lib.literalExample ./monitoringvpn; + example = ./monitoringvpn; description = '' The path to the directory that holds the public keys. ''; diff --git a/nixos/modules/packages.nix b/nixos/modules/packages.nix index d6518dcf290c27b95e3428434623a63cfbdb8e19..c4390dc00f3948e04e3e90ef270261cc0dd1cdbb 100644 --- a/nixos/modules/packages.nix +++ b/nixos/modules/packages.nix @@ -1,8 +1,13 @@ # A NixOS module which exposes custom packages to other modules. { pkgs, ...}: -{ +let + ourpkgs = pkgs.callPackage ../../nixos/pkgs {}; +in { config = { # Expose `nixos/pkgs` as a new module argument `ourpkgs`. - _module.args.ourpkgs = pkgs.callPackage ../../nixos/pkgs {}; + _module.args.ourpkgs = ourpkgs; + # Also expose it as a config setting, for usage by tests, + # since the `_module` config is not exposed in the result. + passthru.ourpkgs = ourpkgs; }; } diff --git a/nixos/modules/private-storage.nix b/nixos/modules/private-storage.nix index c119a3d3417f7d4b7ec07c5652b65122dc5fce12..c620e2fb00fe3b24fc1f6b3c5defc12cddc30aa1 100644 --- a/nixos/modules/private-storage.nix +++ b/nixos/modules/private-storage.nix @@ -37,7 +37,7 @@ in services.private-storage.tahoe.package = lib.mkOption { default = ourpkgs.privatestorage; type = lib.types.package; - example = lib.literalExample "pkgs.tahoelafs"; + example = lib.literalExpression "pkgs.tahoelafs"; description = '' The package to use for the Tahoe-LAFS daemon. ''; @@ -45,7 +45,7 @@ in services.private-storage.publicAddress = lib.mkOption { default = "${fqdn}"; type = lib.types.str; - example = lib.literalExample "storage.example.invalid"; + example = "storage.example.invalid"; description = '' A publicly-visible address to use in Tahoe-LAFS advertisements for this storage service. @@ -54,7 +54,7 @@ in services.private-storage.introducerFURL = lib.mkOption { default = null; type = lib.types.nullOr lib.types.str; - example = lib.literalExample "pb://<tubid>@<location hint>/<swissnum>"; + example = "pb://<tubid>@<location hint>/<swissnum>"; description = '' A Tahoe-LAFS introducer node fURL at which this storage node should announce itself. ''; @@ -62,7 +62,7 @@ in services.private-storage.publicStoragePort = lib.mkOption { default = 8898; type = lib.types.int; - example = lib.literalExample 8098; + example = 8098; description = '' The port number on which to service storage clients. ''; @@ -70,14 +70,14 @@ in services.private-storage.issuerRootURL = lib.mkOption { default = "https://issuer.${config.networking.domain}/"; type = lib.types.str; - example = lib.literalExample "https://example.invalid/"; + example = "https://example.invalid/"; description = '' The URL of the Ristretto issuer service to announce. ''; }; services.private-storage.ristrettoSigningKeyPath = lib.mkOption { type = lib.types.path; - example = lib.literalExample "/var/run/secrets/signing-key.private"; + example = "/var/run/secrets/signing-key.private"; description = '' The path to the Ristretto signing key for the service. ''; diff --git a/nixos/modules/spending.nix b/nixos/modules/spending.nix new file mode 100644 index 0000000000000000000000000000000000000000..ccbce1d3b4fbec4cc4bdeb0efd67653f5160581a --- /dev/null +++ b/nixos/modules/spending.nix @@ -0,0 +1,155 @@ +# A NixOS module which can run a Ristretto-based issuer for PrivateStorage +# ZKAPs. +{ lib, pkgs, config, ourpkgs, ... }@args: let + cfg = config.services.private-storage-spending; +in +{ + options = { + services.private-storage-spending = { + enable = lib.mkEnableOption "PrivateStorage Spending Service"; + package = lib.mkOption { + default = ourpkgs.zkap-spending-service; + type = lib.types.package; + example = "ourpkgs.zkap-spending-service"; + description = '' + The package to use for the spending service. + ''; + }; + unixSocket = lib.mkOption { + default = "/run/zkap-spending-service/api.socket"; + type = lib.types.path; + description = '' + The unix socket that the spending service API listens on. + ''; + }; + }; + services.private-storage-spending.domain = lib.mkOption { + default = config.networking.fqdn; + type = lib.types.str; + example = [ "spending.example.com" ]; + description = '' + The domain name at which the spending service is reachable. + ''; + }; + }; + + config = + lib.mkIf cfg.enable { + systemd.sockets.zkap-spending-service = { + enable = true; + wantedBy = [ "sockets.target" ]; + listenStreams = [ cfg.unixSocket ]; + }; + # Add a systemd service to run zkap-spending-service. + systemd.services.zkap-spending-service = { + enable = true; + description = "ZKAP Spending Service"; + wantedBy = [ "multi-user.target" ]; + + serviceConfig.NonBlocking = true; + + # It really shouldn't ever exit on its own! If it does, it's a bug + # we'll have to fix. Restart it and hope it doesn't happen too much + # before we can fix whatever the issue is. + serviceConfig.Restart = "always"; + serviceConfig.Type = "simple"; + + # Use a unnamed user. + serviceConfig.DynamicUser = true; + + serviceConfig = { + # Work around https://twistedmatrix.com/trac/ticket/10261 + # Create a runtime directory so that the service has permission + # to change the mode on the socket. + RuntimeDirectory = "zkap-spending-service"; + + # This set of restrictions is mostly dervied from + # - running `systemd-analyze security zkap-spending-service.service + # - Looking at the restrictions from the nixos nginx config. + AmbientCapabilities = ""; + CapabilityBoundingSet = ""; + LockPersonality = true; + MemoryDenyWriteExecute = true; + NoNewPrivileges = true; + PrivateDevices = true; + PrivateMounts = true; + PrivateNetwork = true; + PrivateTmp = true; + PrivateUsers = true; + ProcSubset = "pid"; + ProtectClock = true; + ProtectControlGroups = true; + ProtectHome = true; + ProtectHostname = true; + ProtectKernelLogs = true; + ProtectKernelModules = true; + ProtectKernelTunables = true; + ProtectProc = "invisible"; + ProtectSystem = "strict"; + RemoveIPC = true; + RestrictAddressFamilies = "AF_UNIX"; + RestrictNamespaces = true; + RestrictRealtime = true; + RestrictSUIDSGID = true; + SystemCallArchitectures = "native"; + # Lines starting with "~" are deny-list the others are allow-list + # Since the first line is allow, that bounds the set of allowed syscalls + # and the further lines restrict it. + SystemCallFilter = [ + # From systemd.exec(5), @system-service is "A reasonable set of + # system calls used by common system [...]" + "@system-service" + # This is from the nginx config, except that `@ipc` is not removed, + # since twisted uses a self-pipe. + "~@cpu-emulation @debug @keyring @mount @obsolete @privileged @setuid" + ]; + Umask = "0077"; + }; + + script = let + httpArgs = "--http-endpoint systemd:domain=UNIX:index=0"; + in + "exec ${cfg.package}/bin/${cfg.package.meta.mainProgram} run ${httpArgs}"; + }; + + services.nginx = { + enable = true; + + recommendedGzipSettings = true; + recommendedOptimisation = true; + recommendedProxySettings = true; + recommendedTlsSettings = true; + + virtualHosts."${cfg.domain}" = { + locations."/v1/" = { + # Only forward requests beginning with /v1/ so + # we pass less scanning spam on to our backend + # Want a regex instead? try locations."~ /v\d+/" + proxyPass = "http://unix:${cfg.unixSocket}"; + }; + locations."/metrics" = { + proxyPass = "http://unix:${cfg.unixSocket}"; + # Only allow our monitoringvpn subnet + extraConfig = '' + allow 172.23.23.0/24; + allow 127.0.0.1; + allow ::1; + deny all; + ''; + }; + locations."/" = { + # Return a 404 error for any paths not specified above. + extraConfig = '' + return 404; + ''; + }; + }; + }; + + # Open 80 and 443 for nginx + networking.firewall.allowedTCPPorts = [ + 80 + 443 + ]; + }; +} diff --git a/nixos/modules/ssh.nix b/nixos/modules/ssh.nix index 3e90528322c153d6b96679af5d914c4e753b49bf..eb55fbf2ee4d3e6c04dd08039a8a9f9012f069b8 100644 --- a/nixos/modules/ssh.nix +++ b/nixos/modules/ssh.nix @@ -7,7 +7,7 @@ options = { services.private-storage.sshUsers = lib.mkOption { type = lib.types.attrsOf lib.types.str; - example = lib.literalExample { root = "ssh-ed25519 AAA..."; }; + example = { root = "ssh-ed25519 AAA..."; }; description = '' Users to configure on the issuer server and the storage servers and the SSH public keys to use to authenticate them. diff --git a/nixos/modules/tahoe.nix b/nixos/modules/tahoe.nix index 330474912a0eb9aecc98fbb71cef9e7f9da15b39..e0b6eb4d8be3c5359de1d391c42b2ba83f7a1ba4 100644 --- a/nixos/modules/tahoe.nix +++ b/nixos/modules/tahoe.nix @@ -48,7 +48,7 @@ in default = pkgs.tahoelafs; defaultText = "pkgs.tahoelafs"; type = types.package; - example = literalExample "pkgs.tahoelafs"; + example = "pkgs.tahoelafs"; description = '' The package to use for the Tahoe LAFS daemon. ''; @@ -78,7 +78,7 @@ in default = pkgs.tahoelafs; defaultText = "pkgs.tahoelafs"; type = types.package; - example = literalExample "pkgs.tahoelafs"; + example = "pkgs.tahoelafs"; description = '' The package to use for the Tahoe LAFS daemon. ''; diff --git a/nixos/pkgs/default.nix b/nixos/pkgs/default.nix index 3d534430377cb5fbbf0739d60a8a7ca9bb0419f6..bfc30b36101c220434606832127a7e8ca0a70490 100644 --- a/nixos/pkgs/default.nix +++ b/nixos/pkgs/default.nix @@ -4,21 +4,12 @@ # # pkgs.callPackage ./nixos/pkgs {buildPlatform, hostPlatform, callPackage}: -let - # Our own nixpkgs fork: - ourpkgs = import ../../nixpkgs-ps.nix { - # Ensure that the fork is configured for the same system - # as we were called with. - localSystem = buildPlatform; - crossSystem = hostPlatform; - # Ensure that configuration of the system where this runs - # doesn't leak into what we build. - # See https://github.com/NixOS/nixpkgs/issues/62513 - config = {}; - overlays = []; - }; -in { + leasereport = callPackage ./leasereport {}; + # `privatestorage` is a derivation with a good Tahoe+ZKAP environment + # that is exposed by ZKAPAuthorizer. + privatestorage = callPackage ./privatestorage {}; + zkap-spending-service = callPackage ./zkap-spending-service {}; zkapissuer = callPackage ./zkapissuer {}; - inherit (ourpkgs) privatestorage leasereport; + megacli2prom = callPackage ./megacli2prom {}; } diff --git a/nixos/pkgs/leasereport/default.nix b/nixos/pkgs/leasereport/default.nix new file mode 100644 index 0000000000000000000000000000000000000000..7b028ab35c6cf9f78b9a11b0c6dafe1a82d5cb84 --- /dev/null +++ b/nixos/pkgs/leasereport/default.nix @@ -0,0 +1,8 @@ +{ callPackage, fetchFromGitLab, lib }: +let + repo-data = lib.importJSON ./repo.json; + repo = fetchFromGitLab (builtins.removeAttrs repo-data [ "branch" ]); + LeaseReport = (import "${repo}/nix").LeaseReport; +in + LeaseReport.components.exes.LeaseReport + diff --git a/nixos/pkgs/leasereport/repo.json b/nixos/pkgs/leasereport/repo.json new file mode 100644 index 0000000000000000000000000000000000000000..759814a124d0a4bab23411bebd8de19f5f021060 --- /dev/null +++ b/nixos/pkgs/leasereport/repo.json @@ -0,0 +1,9 @@ +{ + "owner": "privatestorage", + "repo": "LeaseReport", + "branch": "main", + "domain": "whetstone.privatestorage.io", + "rev": "3739ffde14e698f56118a444e6946edb736b6983", + "outputHashAlgo": "sha512", + "outputHash": "37b4hrhjghvza0bqvmngcdapqfjjjiv0gx90y0i4wvj72nf1xsh7g2kwpvjm4prpb5s7fxb50x971xfw4sqpwwsk2zdll4nbl5764ij" +} diff --git a/nixos/pkgs/megacli2prom/default.nix b/nixos/pkgs/megacli2prom/default.nix new file mode 100644 index 0000000000000000000000000000000000000000..942f43ff747e4e6ecaa90a7bd1d6bc3c1927cb0e --- /dev/null +++ b/nixos/pkgs/megacli2prom/default.nix @@ -0,0 +1,18 @@ +{ pkgs ? import <nixpkgs> {} }: + +let + repo-data = pkgs.lib.importJSON ./repo.json; + repo = pkgs.fetchFromGitHub (builtins.removeAttrs repo-data [ "branch" ]); + +in +pkgs.stdenv.mkDerivation { + name = "megacli2prom"; + buildInputs = [ pkgs.python3 pkgs.megacli ]; + src = repo; + installPhase = '' + mkdir -p $out/bin + cp ./megacli2prom.py $out/bin/megacli2prom + chmod +x $out/bin/megacli2prom + ''; +} + diff --git a/nixos/pkgs/megacli2prom/repo.json b/nixos/pkgs/megacli2prom/repo.json new file mode 100644 index 0000000000000000000000000000000000000000..3c8cd0af95adf95e22def4e727b8c2c5d12044aa --- /dev/null +++ b/nixos/pkgs/megacli2prom/repo.json @@ -0,0 +1,8 @@ +{ + "owner": "PrivateStorageio", + "repo": "megacli2prom", + "branch": "main", + "rev": "9536933d325c843b2662f80486660bf81d73941e", + "outputHashAlgo": "sha512", + "outputHash": "1xrsv0bkmazbhqarx84lhvmrzzdv1bm04xvr0hw1yrw1f4xb450f4pwgapnkjczy0l4c6rp3pmh64cblgbs3ki30wacbv1bqzv5745g" +} \ No newline at end of file diff --git a/nixos/pkgs/privatestorage/default.nix b/nixos/pkgs/privatestorage/default.nix new file mode 100644 index 0000000000000000000000000000000000000000..bd487af32941f6db920ea2d43ec89e9eded38201 --- /dev/null +++ b/nixos/pkgs/privatestorage/default.nix @@ -0,0 +1,8 @@ +{ fetchFromGitHub, callPackage, lib }: +let + repo-data = lib.importJSON ./repo.json; + repo = fetchFromGitHub (builtins.removeAttrs repo-data [ "branch" ]); + privatestorage = callPackage repo {}; +in + privatestorage.privatestorage + diff --git a/nixos/pkgs/privatestorage/repo.json b/nixos/pkgs/privatestorage/repo.json new file mode 100644 index 0000000000000000000000000000000000000000..81f6e18ba4bbec657a5a5ba543ef05408bf472ad --- /dev/null +++ b/nixos/pkgs/privatestorage/repo.json @@ -0,0 +1,8 @@ +{ + "owner": "PrivateStorageio", + "branch": "main", + "repo": "ZKAPAuthorizer", + "rev": "b61f3d4a3f5eb72cb600dd83796a1aaca2931e07", + "outputHashAlgo": "sha512", + "outputHash": "2d7a9m34jx1k38fmiwskgwd1ryyhrb56m9nam12fd66shl8qzmlfcr1lwf063qi1wqdzb2g7998vxbv3c2bmvw7g6iqwzjmsck2czpn" +} \ No newline at end of file diff --git a/nixos/pkgs/zkap-spending-service/default.nix b/nixos/pkgs/zkap-spending-service/default.nix new file mode 100644 index 0000000000000000000000000000000000000000..4716109e7add7af74f032ee1668be1394cf05b17 --- /dev/null +++ b/nixos/pkgs/zkap-spending-service/default.nix @@ -0,0 +1,12 @@ +{ callPackage, fetchFromGitLab, lib }: +let + repo-data = lib.importJSON ./repo.json; + + repo = fetchFromGitLab (builtins.removeAttrs repo-data [ "branch" ]); +in +# We want to check the revision the service reports against the revsion +# that we install. The upsream derivation doesn't currently know its own +# version, but we do have it here. Thus, we add it as a meta attribute +# to the derviation provided from upstream. +lib.addMetaAttrs { inherit (repo-data) rev; } + (callPackage repo {}) diff --git a/nixos/pkgs/zkap-spending-service/repo.json b/nixos/pkgs/zkap-spending-service/repo.json new file mode 100644 index 0000000000000000000000000000000000000000..69f7a30053de661f2c7829384e9496e49077cfd9 --- /dev/null +++ b/nixos/pkgs/zkap-spending-service/repo.json @@ -0,0 +1,9 @@ +{ + "owner": "privatestorage", + "repo": "zkap-spending-service", + "rev": "cbf7509f429ffd6e6cf37a73e4ff84a9c5ce1141", + "branch": "main", + "domain": "whetstone.privatestorage.io", + "outputHash": "04g7pcykc2525cg3z7wg5834s7vqn82xaqjvf52l6dnxv3mb9xr93kk505dvxcwhgfbqpim5i479s9kqd8gi7q3lq5wn5fq7rf7lkrj", + "outputHashAlgo": "sha512" +} diff --git a/nixos/pkgs/zkapissuer/default.nix b/nixos/pkgs/zkapissuer/default.nix index b4f90d3582cd686fbdf62a6267cb1070c05e9c57..efa55ff108e72fb7d78d95c6db46bddcdca1116f 100644 --- a/nixos/pkgs/zkapissuer/default.nix +++ b/nixos/pkgs/zkapissuer/default.nix @@ -1,6 +1,7 @@ -{ callPackage }: +{ callPackage, fetchFromGitHub, lib }: let - repo = callPackage ./repo.nix { }; + repo-data = lib.importJSON ./repo.json; + repo = fetchFromGitHub (builtins.removeAttrs repo-data [ "branch" ]); PaymentServer = (import "${repo}/nix").PaymentServer; in PaymentServer.components.exes."PaymentServer-exe" diff --git a/nixos/pkgs/zkapissuer/repo.json b/nixos/pkgs/zkapissuer/repo.json new file mode 100644 index 0000000000000000000000000000000000000000..0a003dc61620fd92b1a618e9845763e276c9693a --- /dev/null +++ b/nixos/pkgs/zkapissuer/repo.json @@ -0,0 +1,8 @@ +{ + "owner": "PrivateStorageio", + "repo": "PaymentServer", + "rev": "e080beb14ec58ffe8e55c35e6dddd46c5082887f", + "branch": "main", + "outputHashAlgo": "sha256", + "outputHash": "1zck9kawbs2lkr3qjipira9gawa4gxlqijqqjrmlvvyp9mr0fgxm" +} diff --git a/nixos/pkgs/zkapissuer/repo.nix b/nixos/pkgs/zkapissuer/repo.nix deleted file mode 100644 index 6646a2e32eb8e5a747e4491ce43f706fee65724c..0000000000000000000000000000000000000000 --- a/nixos/pkgs/zkapissuer/repo.nix +++ /dev/null @@ -1,7 +0,0 @@ -{ fetchFromGitHub }: -fetchFromGitHub { - owner = "PrivateStorageio"; - repo = "PaymentServer"; - rev = "ff30e85c231a3b5ad76426bbf8801f8f76884367"; - sha256 = "1spz19f5z96shmfpazj0rv6877xvchf3gl49a4xahjbbsz39x34x"; -} diff --git a/nixos/system-tests.nix b/nixos/system-tests.nix index 73b6665ab91e4d9a8a2200fb0eec7ff596f79b39..218132fe2cd3857f4c201085b4df56a411c794d4 100644 --- a/nixos/system-tests.nix +++ b/nixos/system-tests.nix @@ -1,7 +1,7 @@ # The overall system test suite for PrivateStorageio NixOS configuration. -let - pkgs = import ../nixpkgs-2105.nix { }; -in { +{ pkgs }: +{ private-storage = pkgs.nixosTest ./tests/private-storage.nix; + spending = pkgs.nixosTest ./tests/spending.nix; tahoe = pkgs.nixosTest ./tests/tahoe.nix; } diff --git a/nixos/tests/private-storage.nix b/nixos/tests/private-storage.nix index 6fb85a6713b4668ef4bdfa239480485bfbb52a18..a208ce249f1f1227f966e38a1c62ab6166d187f8 100644 --- a/nixos/tests/private-storage.nix +++ b/nixos/tests/private-storage.nix @@ -40,6 +40,7 @@ let basename = "signing-key.private"; in pkgs.writeText basename key; + ristrettoPublicKey = "xoNHEqAi+kC5EWfqN+kuDINhjQTwGrSQyshHvGFpoys="; stripeSecretKeyPath = let @@ -209,8 +210,8 @@ in { try: ${runOnNode "introducer" [ run-introducer "/tmp/node.pem" (toString introducerPort) introducerFURL ]} except: - code, log = introducer.execute('cat /tmp/stdout /tmp/stderr') - introducer.log(log) + code, output = introducer.execute('cat /tmp/stdout /tmp/stderr') + introducer.log(output) raise # @@ -237,15 +238,15 @@ in { # # Storage appears to be working so try to get a client to speak with it. # - ${runOnNode "client" [ run-client "/tmp/client" introducerFURL issuerURL ]} + ${runOnNode "client" [ run-client "/tmp/client" introducerFURL issuerURL ristrettoPublicKey ]} client.wait_for_open_port(3456) # Make sure the fake Stripe API server is ready for requests. try: api_stripe_com.wait_for_unit("api.stripe.com") except: - code, log = api_stripe_com.execute('journalctl -u api.stripe.com') - api_stripe_com.log(log) + code, output = api_stripe_com.execute('journalctl -u api.stripe.com') + api_stripe_com.log(output) raise # Get some ZKAPs from the issuer. @@ -258,21 +259,21 @@ in { voucher ]} except: - code, log = client.execute('cat /tmp/stdout /tmp/stderr'); - client.log(log) + code, output = client.execute('cat /tmp/stdout /tmp/stderr'); + client.log(output) # Dump the fake Stripe API server logs, too, since the error may arise # from a PaymentServer/Stripe interaction. - code, log = api_stripe_com.execute('journalctl -u api.stripe.com') - api_stripe_com.log(log) + code, output = api_stripe_com.execute('journalctl -u api.stripe.com') + api_stripe_com.log(output) raise # The client should be prepped now. Make it try to use some storage. try: ${runOnNode "client" [ exercise-storage "/tmp/client" ]} except: - code, log = client.execute('cat /tmp/stdout /tmp/stderr') - client.log(log) + code, output = client.execute('cat /tmp/stdout /tmp/stderr') + client.log(output) raise # It should be possible to restart the storage service without the @@ -285,16 +286,16 @@ in { if (before != after): raise Exception('fURL changes after storage node restart') except: - code, log = storage.execute('cat /tmp/stdout /tmp/stderr') - storage.log(log) + code, output = storage.execute('cat /tmp/stdout /tmp/stderr') + storage.log(output) raise # The client should actually still work, too. try: ${runOnNode "client" [ exercise-storage "/tmp/client" ]} except: - code, log = client.execute('cat /tmp/stdout /tmp/stderr') - client.log(log) + code, output = client.execute('cat /tmp/stdout /tmp/stderr') + client.log(output) raise # The issuer metrics should be accessible from the monitoring network. diff --git a/nixos/tests/run-client.py b/nixos/tests/run-client.py index e6cde321bdeb8a2b2493c984cce116a0287b16d1..df37bb48901a8c97dbe6889c86af2b40d52b22f3 100755 --- a/nixos/tests/run-client.py +++ b/nixos/tests/run-client.py @@ -12,7 +12,7 @@ from subprocess import check_output from configparser import ConfigParser def main(): - (nodePath, introducerFURL, issuerURL) = argv[1:] + (nodePath, introducerFURL, issuerURL, publicKey) = argv[1:] run(["tahoe", "--version"]) run([ @@ -33,6 +33,7 @@ def main(): config.add_section(u"storageclient.plugins.privatestorageio-zkapauthz-v1") config.set(u"storageclient.plugins.privatestorageio-zkapauthz-v1", u"redeemer", u"ristretto") config.set(u"storageclient.plugins.privatestorageio-zkapauthz-v1", u"ristretto-issuer-root-url", issuerURL) + config.set(u"storageclient.plugins.privatestorageio-zkapauthz-v1", u"allowed-public-keys", publicKey) # This has to agree with the PaymentServer configuration at the configured # issuer location. Presently PaymentServer has 50000 hard-coded as the # correct value. diff --git a/nixos/tests/spending.nix b/nixos/tests/spending.nix new file mode 100644 index 0000000000000000000000000000000000000000..8500471a58ff3f447e03ec1bf9005ff626169113 --- /dev/null +++ b/nixos/tests/spending.nix @@ -0,0 +1,45 @@ +{ pkgs, lib, ... }: +{ + name = "zkap-spending-service"; + nodes = { + spending = { config, pkgs, ourpkgs, modulesPath, ... }: { + imports = [ + ../modules/packages.nix + ../modules/spending.nix + ]; + + services.private-storage-spending.enable = true; + services.private-storage-spending.domain = "localhost"; + }; + external = { ... }: { + # A node that has no particular configuration, for testing access rules + # for external hosts. + }; + }; + testScript = { nodes }: let + revision = nodes.spending.config.passthru.ourpkgs.zkap-spending-service.meta.rev; + curl = "${pkgs.curl}/bin/curl -sSf --max-time 5"; + in + '' + import json + + start_all() + + spending.wait_for_open_port(80) + with subtest("Ensure we can ping the spending service"): + output = spending.succeed("${curl} http://localhost/v1/_ping") + assert json.loads(output)["status"] == "ok", "Could not ping spending service." + with subtest("Ensure external hosts can ping the spending service"): + output = external.succeed("${curl} http://spending/v1/_ping") + assert json.loads(output)["status"] == "ok", "Could not ping spending service." + with subtest("Ensure that the spending service version matches the expected version"): + output = spending.succeed("${curl} http://localhost/v1/_version") + assert json.loads(output)["revision"] == "${revision}", "Spending service revision does not match." + with subtest("Ensure that the spending service generates metrics"): + # TODO: We should pass "-H 'accept: application/openmetrics-text'" here. + # See https://github.com/prometheus/prometheus/issues/8932 + output = spending.succeed("${curl} http://localhost/metrics | ${pkgs.prometheus}/bin/promtool check metrics") + with subtest("Ensure that the metrics are not accesible from other machines"): + output = external.fail("${curl} http://spending/metrics") + ''; +} diff --git a/nixos/unit-tests.nix b/nixos/unit-tests.nix index 75016a17d128fabe11f4ecaad65dba3471ed863d..b9f72bf95901af2668d68d1a09814c2bc2a9cd93 100644 --- a/nixos/unit-tests.nix +++ b/nixos/unit-tests.nix @@ -1,7 +1,6 @@ # The overall unit test suite for PrivateStorageio NixOS configuration. +{ pkgs }: let - pkgs = import <nixpkgs> { }; - # Total the numbers in a list. sum = builtins.foldl' (a: b: a + b) 0; diff --git a/nixpkgs-2105.json b/nixpkgs-2105.json index f79aa88bc0bb97b26c4668ac1d2c4efcdb25b9fb..523c1468f35019c6685f3a8486603d0936732dbe 100644 --- a/nixpkgs-2105.json +++ b/nixpkgs-2105.json @@ -1,5 +1,5 @@ { "name": "release2105", - "url": "https://releases.nixos.org/nixos/21.05/nixos-21.05.3065.b3083bc6933/nixexprs.tar.xz", - "sha256": "186vni8rij8bhd6n5n9h55jf2x78v9zdy2gn9v4cpjhajp4pvzm0" -} + "url": "https://releases.nixos.org/nixos/21.05/nixos-21.05.4547.2949ed36539/nixexprs.tar.xz", + "sha256": "0nm5znl7lh3qws29ppzpzsqscyw3hk7q0128xqmga2g86qcmy38x" +} \ No newline at end of file diff --git a/nixpkgs-ps.json b/nixpkgs-ps.json deleted file mode 100644 index 58add30734e8f7b9b2840a9be38693ceb6a4249d..0000000000000000000000000000000000000000 --- a/nixpkgs-ps.json +++ /dev/null @@ -1,4 +0,0 @@ -{ "name": "nixpkgs" -, "url": "https://github.com/PrivateStorageio/nixpkgs/archive/5ebd5af2d5c6caf23735c8c0e6bc27357fa8d2a8.tar.gz" -, "sha256": "1g2bvs8prqjskzv8s1qmh36k7rmj98jib0syqbrq02xxzw5dpqb4" -} diff --git a/nixpkgs-ps.nix b/nixpkgs-ps.nix deleted file mode 100644 index d98a53843052fda824f4ed7e34db50524df36ce2..0000000000000000000000000000000000000000 --- a/nixpkgs-ps.nix +++ /dev/null @@ -1 +0,0 @@ -import (builtins.fetchTarball (builtins.fromJSON (builtins.readFile ./nixpkgs-ps.json))) diff --git a/privatestorageio.nix b/privatestorageio.nix deleted file mode 100644 index cde46b16f6ed537bb5ee74c5641409b11716e11a..0000000000000000000000000000000000000000 --- a/privatestorageio.nix +++ /dev/null @@ -1,19 +0,0 @@ -{ stdenv, lib, graphviz, python3Packages }: -stdenv.mkDerivation rec { - version = "0.0"; - name = "privatestorageio-${version}"; - src = lib.cleanSource ./.; - - depsBuildBuild = [ - graphviz - ]; - - buildPhase = '' - ${python3Packages.sphinx}/bin/sphinx-build -W docs/source docs/build - ''; - - installPhase = '' - mkdir $out - mv docs/build $out/docs - ''; -} diff --git a/tools/default.nix b/tools/default.nix index f9a0b1ff8d902f3072886939ad11e1e223ffbb7e..b10bb5f209c44c3ccba5cf509655e6d25fbb88da 100644 --- a/tools/default.nix +++ b/tools/default.nix @@ -15,6 +15,8 @@ let }; python-commands = [ ./update-nixpkgs + ./update-gitlab-repo + ./update-github-repo ]; in # This derivation creates a package that wraps our tools to setup an environment diff --git a/tools/update-github-repo b/tools/update-github-repo new file mode 100755 index 0000000000000000000000000000000000000000..0e7e1511fc017c360660dc9fb752ff03f315f9bb --- /dev/null +++ b/tools/update-github-repo @@ -0,0 +1,89 @@ +#!/usr/bin/env python + +""" +Update a pinned github repository. + +Pass this path to a JSON file and it will update it to the latest +version of the branch it specifies. You can also pass a different +branch or repository owner, which will update the file to point at +the new branch/repository, and update to the latest version. +""" + +import argparse +import json +from pathlib import Path + +import httpx +from ps_tools import get_url_hash + +HASH_TYPE = "sha512" + +ARCHIVE_TEMPLATE = "https://api.github.com/repos/{owner}/{repo}/tarball/{rev}" +BRANCH_TEMPLATE = ( + "https://api.github.com/repos/{owner}/{repo}/commits/{branch}" +) + + +def get_github_commit(config): + response = httpx.get(BRANCH_TEMPLATE.format(**config)) + response.raise_for_status() + return response.json()["sha"] + + +def get_github_archive_url(config): + return ARCHIVE_TEMPLATE.format(**config) + + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "repo_file", + metavar="repo-file", + type=Path, + help="JSON file with pinned configuration.", + ) + parser.add_argument( + "--branch", + type=str, + help="Branch to update to.", + ) + parser.add_argument( + "--owner", + type=str, + help="Repository owner to update to.", + ) + parser.add_argument( + "--rev", + type=str, + help="Revision to pin.", + ) + parser.add_argument( + "--dry-run", + action="store_true", + ) + args = parser.parse_args() + + repo_file = args.repo_file + config = json.loads(repo_file.read_text()) + + for key in ["owner", "branch"]: + if getattr(args, key) is not None: + config[key] = getattr(args, key) + + if args.rev is not None: + config["rev"] = args.rev + else: + config["rev"] = get_github_commit(config) + + archive_url = get_github_archive_url(config) + config.update(get_url_hash(HASH_TYPE, "source", archive_url)) + + output = json.dumps(config, indent=2) + if args.dry_run: + print(output) + else: + repo_file.write_text(output) + + +if __name__ == "__main__": + main() diff --git a/tools/update-gitlab-repo b/tools/update-gitlab-repo new file mode 100755 index 0000000000000000000000000000000000000000..ddc82cb7bfd943ed3b4b80f79cf9e47b447c8b7d --- /dev/null +++ b/tools/update-gitlab-repo @@ -0,0 +1,89 @@ +#!/usr/bin/env python + +""" +Update a pinned gitlab repository. + +Pass this path to a JSON file and it will update it to the latest +version of the branch it specifies. You can also pass a different +branch or repository owner, which will update the file to point at +the new branch/repository, and update to the latest version. +""" + +import argparse +import json +from pathlib import Path + +import httpx +from ps_tools import get_url_hash + +HASH_TYPE = "sha512" + +ARCHIVE_TEMPLATE = "https://{domain}/api/v4/projects/{owner}%2F{repo}/repository/archive.tar.gz?sha={rev}" +BRANCH_TEMPLATE = ( + "https://{domain}/api/v4/projects/{owner}%2F{repo}/repository/branches/{branch}" +) + + +def get_gitlab_commit(config): + response = httpx.get(BRANCH_TEMPLATE.format(**config)) + response.raise_for_status() + return response.json()["commit"]["id"] + + +def get_gitlab_archive_url(config): + return ARCHIVE_TEMPLATE.format(**config) + + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "repo_file", + metavar="repo-file", + type=Path, + help="JSON file with pinned configuration.", + ) + parser.add_argument( + "--branch", + type=str, + help="Branch to update to.", + ) + parser.add_argument( + "--owner", + type=str, + help="Repository owner to update to.", + ) + parser.add_argument( + "--rev", + type=str, + help="Revision to pin.", + ) + parser.add_argument( + "--dry-run", + action="store_true", + ) + args = parser.parse_args() + + repo_file = args.repo_file + config = json.loads(repo_file.read_text()) + + for key in ["owner", "branch"]: + if getattr(args, key) is not None: + config[key] = getattr(args, key) + + if args.rev is not None: + config["rev"] = args.rev + else: + config["rev"] = get_gitlab_commit(config) + + archive_url = get_gitlab_archive_url(config) + config.update(get_url_hash(HASH_TYPE, "source", archive_url)) + + output = json.dumps(config, indent=2) + if args.dry_run: + print(output) + else: + repo_file.write_text(output) + + +if __name__ == "__main__": + main()