diff --git a/src/Tahoe/Directory.hs b/src/Tahoe/Directory.hs index c0c58dbcaf1b224137cac6f030921999bb5f1681..611a9b08e837e93b337f8997fcd8559fc16df65c 100644 --- a/src/Tahoe/Directory.hs +++ b/src/Tahoe/Directory.hs @@ -1 +1,7 @@ -module Tahoe.Directory where +module Tahoe.Directory ( + module Tahoe.Directory.Internal.Parsing, + module Tahoe.Directory.Internal.Types, +) where + +import Tahoe.Directory.Internal.Parsing (parse, serialize) +import Tahoe.Directory.Internal.Types (Directory (..), Entry (..)) diff --git a/src/Tahoe/Directory/Internal/Parsing.hs b/src/Tahoe/Directory/Internal/Parsing.hs new file mode 100644 index 0000000000000000000000000000000000000000..8b8106577d5091ac1af668ff7c848d7481b42891 --- /dev/null +++ b/src/Tahoe/Directory/Internal/Parsing.hs @@ -0,0 +1,87 @@ +-- | Parsing and serialization for directories and their entries. +module Tahoe.Directory.Internal.Parsing where + +import Control.Monad (void) +import qualified Data.ByteString as B +import qualified Data.ByteString.Char8 as C8 +import qualified Data.Text as T +import Data.Text.Encoding (decodeUtf8', encodeUtf8) +import Data.Void (Void) +import Tahoe.Directory.Internal.Types (Directory (..), Entry (..)) +import Text.Megaparsec (MonadParsec (eof, label, takeP), ParseErrorBundle, Parsec, many, parse) +import Text.Megaparsec.Byte (string) +import Text.Megaparsec.Byte.Lexer (decimal) + +-- | Parse the serialized form of a directory into a Directory. +parse :: B.ByteString -> Either (ParseErrorBundle B.ByteString Void) Directory +parse = Text.Megaparsec.parse pDirectory "Directory" + +-- | The parser type we will parse in. +type Parser = Parsec Void B.ByteString + +-- XXX This doesn't do bounds checking. + +-- | Parse the base ten representation of a natural number. +natural :: Integral i => Parser i +natural = decimal + +{- | Parse a netstring-encoded value, applying a sub-parser to the encoded + string. +-} +pNetstring :: + -- | A function that takes the length of the string encoded in the + -- netstring and returns a parser for the value the encoded string + -- represents. + (Int -> Parser a) -> + -- | A parser for the value. + Parser a +pNetstring pInner = do + len <- natural + void $ string ":" + result <- pInner len + void $ string "," + pure result + +pDirectory :: Parser Directory +pDirectory = Directory <$> (many pEntry <* eof) + +pEntry :: Parser Entry +pEntry = + label "entry" $ + pNetstring $ \_ -> + Entry + <$> label "name" (pNetstring pUTF8) + <*> label "ro_uri" (pNetstring (takeP Nothing)) + <*> label "rw_uri" (pNetstring (takeP Nothing)) + <*> label "metadata" (pNetstring (takeP Nothing)) + +pUTF8 :: Int -> Parser T.Text +pUTF8 n = do + bs <- takeP Nothing n + either (\e -> fail $ "UTF-8 parsing failed: " <> show e) pure (decodeUtf8' bs) + +-- | Serialize a Directory to the canonical bytes representation. +serialize :: Directory -> B.ByteString +serialize Directory{directoryChildren} = B.concat $ serializeEntry <$> directoryChildren + +serializeEntry :: Entry -> B.ByteString +serializeEntry Entry{..} = + -- XXX The name must be NFC normalized apparently, try unicode-transforms + -- library. Perhaps we should enforce normalization in the Entry + -- constructor? + netstring . B.concat $ + [ netstring . encodeUtf8 $ entryName + , netstring entryReader + , netstring entryEncryptedWriter + , netstring entryMetadata + ] + +-- | Encode a bytestring as a netstring. +netstring :: B.ByteString -> B.ByteString +netstring xs = + B.concat + [ C8.pack . show . B.length $ xs + , ":" + , xs + , "," + ] diff --git a/src/Tahoe/Directory/Internal/Types.hs b/src/Tahoe/Directory/Internal/Types.hs new file mode 100644 index 0000000000000000000000000000000000000000..8a4c723c1c1cbd976027a1ce2065c1a5c246b8be --- /dev/null +++ b/src/Tahoe/Directory/Internal/Types.hs @@ -0,0 +1,24 @@ +module Tahoe.Directory.Internal.Types where + +import qualified Data.ByteString as B +import qualified Data.Text as T + +-- | A collection of references to other objects. +newtype Directory = Directory + { directoryChildren :: [Entry] + } + deriving (Eq, Show) + +-- | A reference to an object of any kind. +data Entry = Entry + { -- | The name of this entry in its containing directory. XXX What if UTF-8 decoding fails? + entryName :: T.Text + , -- | A capability for reading the contents of this entry. XXX Structured cap instead + entryReader :: B.ByteString + , -- | An encrypted capability for performing writes to this entry. XXX + -- Document the encryption scheme. + entryEncryptedWriter :: B.ByteString + , -- | Additional metadata about this entry such as last modification time. XXX How to represent this mixed type collection? + entryMetadata :: B.ByteString + } + deriving (Eq, Show) diff --git a/tahoe-directory.cabal b/tahoe-directory.cabal index 47eae685561004b35e87986fb30cdb1827845c01..fad8cfd975977dd00e4c5ef0a71bec7e1a352f60 100644 --- a/tahoe-directory.cabal +++ b/tahoe-directory.cabal @@ -88,9 +88,16 @@ library import: warnings import: language hs-source-dirs: src - exposed-modules: Tahoe.Directory - build-depends: base >=4 && <5.0 + exposed-modules: + Tahoe.Directory + Tahoe.Directory.Internal.Parsing + Tahoe.Directory.Internal.Types + build-depends: + , base >=4 && <5.0 + , bytestring + , megaparsec + , text test-suite tahoe-directory-test -- Import common warning flags. @@ -115,11 +122,17 @@ test-suite tahoe-directory-test -- The entrypoint to the test suite. main-is: Main.hs - other-modules: Spec + other-modules: + Generators + Spec -- Test dependencies. build-depends: , base + , bytestring + , hedgehog , tahoe-directory , tasty + , tasty-hedgehog , tasty-hunit + , text diff --git a/test/Generators.hs b/test/Generators.hs new file mode 100644 index 0000000000000000000000000000000000000000..6fddafd6c0c31bbcb24891b41077b96e9af76879 --- /dev/null +++ b/test/Generators.hs @@ -0,0 +1,26 @@ +module Generators where + +import qualified Data.ByteString as B +import qualified Data.Text as T +import Hedgehog (MonadGen) +import qualified Hedgehog.Gen as Gen +import qualified Hedgehog.Range as Range +import Tahoe.Directory (Directory (..), Entry (..)) + +directories :: MonadGen m => m Directory +directories = Directory <$> Gen.list (Range.exponential 0 100) entries + +entries :: MonadGen m => m Entry +entries = Entry <$> entryNames <*> entryReaders <*> entryEncryptedWriters <*> entryMetadatas + +entryNames :: MonadGen m => m T.Text +entryNames = Gen.text (Range.exponential 1 100) Gen.unicode + +entryReaders :: MonadGen m => m B.ByteString +entryReaders = pure "URI:CHK:blub:blab" + +entryEncryptedWriters :: MonadGen m => m B.ByteString +entryEncryptedWriters = pure "\1\2\3\4\5\6\7\8\9\10" + +entryMetadatas :: MonadGen m => m B.ByteString +entryMetadatas = pure "{stuff: [goes, here]}" diff --git a/test/Spec.hs b/test/Spec.hs index 95f6a5305a515305c3e3861c779c6a1447a83158..ca23fb82884a3afcc25418e037a9cb329d898d74 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -1,10 +1,29 @@ module Spec where +import qualified Data.ByteString as B +import Generators (directories) +import Hedgehog (forAll, property, tripping) import System.IO (hSetEncoding, stderr, stdout, utf8) +import qualified Tahoe.Directory as Directory import Test.Tasty (TestTree, defaultMain, testGroup) +import Test.Tasty.HUnit (assertEqual, testCase) +import Test.Tasty.Hedgehog (testProperty) tests :: TestTree -tests = testGroup "Directory" [] +tests = + testGroup + "Directory" + [ testCase "well-known serialized directory round-trips through parse . serialize" $ do + original <- B.readFile "test/example.dir" + let parsed = Directory.parse original + serialized = Directory.serialize <$> parsed + + assertEqual "original /= serialized" (Right original) serialized + , testProperty "Directory round-trips through serialize . parse" $ + property $ do + directory <- forAll directories + tripping directory Directory.serialize Directory.parse + ] main :: IO () main = do diff --git a/test/example.dir b/test/example.dir new file mode 100644 index 0000000000000000000000000000000000000000..9320fd5c4edbf2c89046d7c82ee5b8be5c8a98e3 Binary files /dev/null and b/test/example.dir differ