From 53ce0477bd344dc02237d31c80a78408671aa613 Mon Sep 17 00:00:00 2001
From: Jean-Paul Calderone <exarkun@twistedmatrix.com>
Date: Mon, 22 May 2023 16:34:27 -0400
Subject: [PATCH] some simple parsing code for SDMF capabilities

---
 src/Tahoe/SDMF.hs                     |   4 +
 src/Tahoe/SDMF/Internal/Capability.hs | 123 ++++++++++++++++++++++++--
 src/Tahoe/SDMF/Internal/Encoding.hs   |   5 +-
 tahoe-ssk.cabal                       |  38 ++++----
 test/Spec.hs                          |  17 +++-
 5 files changed, 158 insertions(+), 29 deletions(-)

diff --git a/src/Tahoe/SDMF.hs b/src/Tahoe/SDMF.hs
index 3546634..194439d 100644
--- a/src/Tahoe/SDMF.hs
+++ b/src/Tahoe/SDMF.hs
@@ -9,6 +9,10 @@ module Tahoe.SDMF (
 import Tahoe.SDMF.Internal.Capability (
     Reader (..),
     Writer (..),
+    pCapability,
+    pReader,
+    pVerifier,
+    pWriter,
  )
 import Tahoe.SDMF.Internal.Encoding (
     decode,
diff --git a/src/Tahoe/SDMF/Internal/Capability.hs b/src/Tahoe/SDMF/Internal/Capability.hs
index 9db37c3..e259a5b 100644
--- a/src/Tahoe/SDMF/Internal/Capability.hs
+++ b/src/Tahoe/SDMF/Internal/Capability.hs
@@ -3,23 +3,134 @@ module Tahoe.SDMF.Internal.Capability where
 
 import Prelude hiding (Read)
 
+import Control.Applicative ((<|>))
+import Control.Monad (void)
+import Crypto.Hash (Digest, SHA256, digestFromByteString)
+import Data.Binary (decode)
 import qualified Data.ByteString as B
-import Tahoe.SDMF.Internal.Keys (Read, Write, deriveReadKey)
+import qualified Data.ByteString.Base32 as B
+import qualified Data.ByteString.Lazy as LB
+import Data.Maybe (fromMaybe)
+import qualified Data.Set as Set
+import qualified Data.Text as T
+import qualified Data.Text.Encoding as T
+import Data.Void (Void)
+import Data.Word (Word16)
+import Tahoe.SDMF.Internal.Keys (Read, StorageIndex (StorageIndex), Write, deriveReadKey, deriveStorageIndex)
+import Text.Megaparsec (ErrorFancy (ErrorFail), Parsec, count, failure, fancyFailure, oneOf)
+import Text.Megaparsec.Char (char, string)
+
+data Verifier = Verifier
+    { verifierStorageIndex :: StorageIndex
+    , verifierVerificationKeyHash :: Digest SHA256
+    }
 
 -- | A read capability for an SDMF object.
 data Reader = Reader
     { readerReadKey :: Read
-    , readerVerificationKeyHash :: B.ByteString
+    , readerVerifier :: Verifier
     }
-    deriving (Show)
 
 -- | A write capability for an SDMF object.
 data Writer = Writer
     { writerWriteKey :: Write
     , writerReader :: Reader
     }
-    deriving (Show)
 
 -- | Diminish a write key to a read key and wrap it in a reader capability.
-deriveReader :: Write -> B.ByteString -> Maybe Reader
-deriveReader w fingerprint = Reader <$> deriveReadKey w <*> pure fingerprint
+deriveReader :: Write -> Digest SHA256 -> Maybe Reader
+deriveReader w fingerprint = Reader <$> readKey <*> verifier
+  where
+    readKey = deriveReadKey w
+    verifier = flip deriveVerifier fingerprint <$> readKey
+
+deriveVerifier :: Read -> Digest SHA256 -> Verifier
+deriveVerifier readKey = Verifier storageIndex
+  where
+    storageIndex = deriveStorageIndex readKey
+
+data SDMF = SDMFVerifier Verifier | SDMFReader Reader | SDMFWriter Writer
+
+type Parser = Parsec Void T.Text
+
+-- | A parser for any kind of SDMF capability type.
+pCapability :: Parser SDMF
+pCapability = (SDMFVerifier <$> pVerifier) <|> (SDMFReader <$> pReader) <|> (SDMFWriter <$> pWriter)
+
+-- | A parser for an SDMF verifier capability.
+pVerifier :: Parser Verifier
+pVerifier = uncurry Verifier <$> pPieces "URI:SSK-Verifier:" StorageIndex
+
+-- | A parser for an SDMF reader capability.
+pReader :: Parser Reader
+pReader = do
+    (readKey, verificationKeyHash) <- pPieces "URI:SSK-RO:" (decode . LB.fromStrict)
+    let verifier = deriveVerifier readKey verificationKeyHash
+    pure $ Reader readKey verifier
+
+-- | A parser for an SDMF writer capability.
+pWriter :: Parser Writer
+pWriter = do
+    (writeKey, verificationKeyHash) <- pPieces "URI:SSK:" (decode . LB.fromStrict)
+    let reader = deriveReader writeKey verificationKeyHash
+    case Writer writeKey <$> reader of
+        Nothing -> failure Nothing mempty
+        Just writer -> pure writer
+
+pPieces :: T.Text -> (B.ByteString -> a) -> Parser (a, Digest SHA256)
+pPieces prefix convertSecret = do
+    void $ string prefix
+    secret <- convertSecret <$> pBase32 rfc3548Alphabet 128
+    void $ char ':'
+    digestBytes <- pBase32 rfc3548Alphabet 256
+    case digestFromByteString digestBytes of
+        Nothing -> failure Nothing mempty
+        Just verificationKeyHash ->
+            pure (secret, verificationKeyHash)
+
+{- | A parser combinator for an arbitrary byte string of a fixed length,
+ encoded using base32.
+
+ TODO: Avoid duplicating this implementation here and in tahoe-chk.
+-}
+pBase32 ::
+    -- | The alphabet to use.  For example, *rfc3548Alphabet*.
+    [Char] ->
+    -- | The number of bits in the encoded byte string.
+    Word16 ->
+    -- | A parser for the byte string.  Strings that are not valid base32 will
+    -- be rejected.  Strings that are the wrong length are *not necessarily*
+    -- currently rejected!  Please fix that, somebody.
+    Parser B.ByteString
+pBase32 alpha bits = do
+    b32Text <- pBase32Text
+    either (fancyFailure . Set.singleton . ErrorFail . T.unpack) pure (decodeBase32Text b32Text)
+  where
+    decodeBase32Text = B.decodeBase32Unpadded . T.encodeUtf8
+    pBase32Text = T.snoc <$> stem <*> trailer
+
+    -- Determine how many full characters to expect along with how many bits
+    -- are left to expect encoded in the final character.
+    (full, extra) = bits `divMod` 5
+
+    -- Match the base32 characters that represent the full 5 bits
+    -- possible.  fromIntegral is okay here because `full` is only a
+    -- Word16 and will definitely fit safely into the Int count wants.
+    stem :: Parser T.Text
+    stem = T.pack <$> count (fromIntegral full) (oneOf alpha)
+
+    -- Match the final character that represents fewer than 5 bits.
+    trailer :: Parser Char
+    trailer = oneOf $ trailingChars alpha extra
+
+    -- XXX The real trailing character set is smaller than this.  This
+    -- parser will let through invalid characters that result in giving us
+    -- possibly too many bits.
+    trailingChars :: [Char] -> Word16 -> [Char]
+    trailingChars alpha' _ = alpha'
+
+{- | The RFC3548 standard alphabet used by Gnutella, Content-Addressable Web,
+ THEX, Bitzi, Web-Calculus...
+-}
+rfc3548Alphabet :: [Char]
+rfc3548Alphabet = "abcdefghijklmnopqrstuvwxyz234567"
diff --git a/src/Tahoe/SDMF/Internal/Encoding.hs b/src/Tahoe/SDMF/Internal/Encoding.hs
index 085a09f..f95b90d 100644
--- a/src/Tahoe/SDMF/Internal/Encoding.hs
+++ b/src/Tahoe/SDMF/Internal/Encoding.hs
@@ -8,6 +8,7 @@ module Tahoe.SDMF.Internal.Encoding where
 import Control.Monad (when)
 import Control.Monad.IO.Class (MonadIO (liftIO))
 import Crypto.Cipher.Types (BlockCipher (blockSize), IV, makeIV)
+import Crypto.Hash (digestFromByteString)
 import Crypto.Random (MonadRandom (getRandomBytes))
 import Data.Bifunctor (Bifunctor (bimap))
 import qualified Data.ByteString as B
@@ -129,8 +130,8 @@ capabilityForKeyPair keypair =
     Writer <$> writerWriteKey <*> maybeToEither' "Failed to derive read capability" writerReader
   where
     writerWriteKey = maybeToEither "Failed to derive write key" . Keys.deriveWriteKey . Keys.toSignatureKey $ keypair
-    verificationKeyHash = Keys.deriveVerificationHash . Keys.toVerificationKey $ keypair
-    writerReader = deriveReader <$> writerWriteKey <*> pure verificationKeyHash
+    verificationKeyHash = digestFromByteString . Keys.deriveVerificationHash . Keys.toVerificationKey $ keypair
+    writerReader = deriveReader <$> writerWriteKey <*> maybeToEither "Failed to interpret verification hash" verificationKeyHash
 
 maybeToEither :: a -> Maybe b -> Either a b
 maybeToEither a Nothing = Left a
diff --git a/tahoe-ssk.cabal b/tahoe-ssk.cabal
index ba613c5..d7562cc 100644
--- a/tahoe-ssk.cabal
+++ b/tahoe-ssk.cabal
@@ -58,7 +58,7 @@ extra-doc-files:    CHANGELOG.md
 extra-source-files: test/data/*
 
 common warnings
-  ghc-options: -Wall
+  ghc-options: -Wall -Werror=missing-fields
 
 common language
   default-extensions:
@@ -70,11 +70,14 @@ common language
     RecordWildCards
     TypeApplications
 
+  -- Base language which the package is written in.
   default-language:   Haskell2010
 
 library
-  import:          warnings
-  import:          language
+  import:
+    warnings
+    , language
+
   hs-source-dirs:  src
   exposed-modules:
     Tahoe.SDMF
@@ -94,7 +97,9 @@ library
     , binary
     , bytestring
     , cereal
+    , containers
     , cryptonite
+    , megaparsec
     , memory
     , text
     , x509
@@ -104,13 +109,9 @@ library
   build-depends:   tahoe-chk
 
 test-suite tahoe-ssk-test
-  -- Import common warning flags.
-  import:             warnings
-  import:             language
-
-  -- Base language which the package is written in.
-  default-language:   Haskell2010
-  default-extensions: OverloadedStrings
+  import:
+    warnings
+    , language
 
   -- Modules included in this executable, other than Main.
   -- other-modules:
@@ -119,13 +120,13 @@ test-suite tahoe-ssk-test
   -- other-extensions:
 
   -- The interface type and version of the test suite.
-  type:               exitcode-stdio-1.0
+  type:           exitcode-stdio-1.0
 
   -- Directories containing source files.
-  hs-source-dirs:     test
+  hs-source-dirs: test
 
   -- The entrypoint to the test suite.
-  main-is:            Main.hs
+  main-is:        Main.hs
   other-modules:
     Generators
     Spec
@@ -140,6 +141,7 @@ test-suite tahoe-ssk-test
     , bytestring
     , cryptonite
     , hedgehog
+    , megaparsec
     , memory
     , tahoe-chk
     , tahoe-ssk
@@ -151,10 +153,12 @@ test-suite tahoe-ssk-test
 
 -- A helper for generating RSA key pairs for use by the test suite.
 executable make-keypairs
-  import:           warnings
-  default-language: Haskell2010
-  main-is:          Main.hs
-  hs-source-dirs:   make-keypairs
+  import:
+    warnings
+    , language
+
+  main-is:        Main.hs
+  hs-source-dirs: make-keypairs
   build-depends:
     , asn1-encoding
     , asn1-types
diff --git a/test/Spec.hs b/test/Spec.hs
index 176fe88..24ebcb9 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -1,6 +1,3 @@
-{-# LANGUAGE FlexibleInstances #-}
-{-# LANGUAGE NamedFieldPuns #-}
-
 module Spec where
 
 import Hedgehog (
@@ -14,6 +11,7 @@ import Hedgehog (
 import Control.Monad (when)
 import Control.Monad.IO.Class (liftIO)
 import Crypto.Cipher.Types (makeIV)
+import Crypto.Hash (digestFromByteString)
 import Data.ASN1.BinaryEncoding (DER (DER))
 import Data.ASN1.Encoding (decodeASN1')
 import qualified Data.Binary as Binary
@@ -22,17 +20,20 @@ import qualified Data.ByteArray as ByteArray
 import qualified Data.ByteString as B
 import Data.ByteString.Base32 (decodeBase32Unpadded, encodeBase32Unpadded)
 import qualified Data.ByteString.Lazy as LB
+import Data.Either (rights)
 import qualified Data.Text as T
 import Generators (encodingParameters, genRSAKeys, shareHashChains, shares)
 import qualified Hedgehog.Gen as Gen
 import qualified Hedgehog.Range as Range
 import System.IO (hSetEncoding, stderr, stdout, utf8)
 import qualified Tahoe.SDMF
+import Tahoe.SDMF.Internal.Capability (deriveVerifier)
 import Tahoe.SDMF.Internal.Keys (signatureKeyFromBytes, signatureKeyToBytes)
 import qualified Tahoe.SDMF.Keys as Keys
 import Test.Tasty (TestTree, defaultMain, testGroup)
 import Test.Tasty.HUnit (assertEqual, testCase)
 import Test.Tasty.Hedgehog (testProperty)
+import Text.Megaparsec (parse)
 
 -- The test suite compares against some hard-coded opaque strings.  These
 -- expected values were determined using the expected_values.py program in
@@ -142,6 +143,14 @@ tests =
                             "write enabler: expected /= derived"
                             expectedWriteEnabler
                             (fmtKey derivedWriteEnabler)
+        , testCase "known-correct SDMF capabilities can be parsed" $ do
+            let validWrite = "URI:SSK:vbopclzrkxces6okoqfarapmou:xlwog3jxbgsuaddh3bsofwmyhncv7fanmo7ujhqiy26usx2v2neq"
+                validRead = "URI:SSK-RO:ro7pnpq6duaduuolookwbv5lqy:xlwog3jxbgsuaddh3bsofwmyhncv7fanmo7ujhqiy26usx2v2neq"
+                validVerify = "URI:SSK-Verifier:gz4s2zkkqy2geblvv77atyoppi:xlwog3jxbgsuaddh3bsofwmyhncv7fanmo7ujhqiy26usx2v2neq"
+
+                parsed = rights $ parse Tahoe.SDMF.pCapability "<test>" <$> [validWrite, validVerify, validRead]
+
+            assertEqual "parsing failed" 3 (length parsed)
         , testProperty "Share round-trips through bytes" $
             property $ do
                 share <- forAll shares
@@ -179,7 +188,7 @@ tests =
 
             let (Right writeKey) = Binary.decode . LB.fromStrict <$> decodeBase32Unpadded "vdv6pcqkblsguvkagrblr3gopu"
                 (Just readerReadKey) = Keys.deriveReadKey writeKey
-                readerVerificationKeyHash = "junk"
+                (Just readerVerifier) = deriveVerifier readerReadKey <$> digestFromByteString ("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" :: B.ByteString)
                 reader = Tahoe.SDMF.Reader{..}
             ciphertext <- Tahoe.SDMF.decode reader [(0, s0), (6, s6), (9, s9)]
             let (Right expectedCiphertext) = LB.fromStrict <$> decodeBase32Unpadded "6gutkha6qd4g3lxahth2dw2wjekadwoxvmazrnfq5u5j6a7quu5qy6nz3dvosx2gisdjshdtd5xphqvqjco5pq73qi"
-- 
GitLab