diff --git a/src/Tahoe/Directory.hs b/src/Tahoe/Directory.hs index c0c58dbcaf1b224137cac6f030921999bb5f1681..94696e215b2a3d01218492bc5b2baea84aad8390 100644 --- a/src/Tahoe/Directory.hs +++ b/src/Tahoe/Directory.hs @@ -1 +1,5 @@ -module Tahoe.Directory where +module Tahoe.Directory ( + module Tahoe.Directory.Internal.Parsing, +) where + +import Tahoe.Directory.Internal.Parsing (parse, serialize) diff --git a/src/Tahoe/Directory/Internal/Parsing.hs b/src/Tahoe/Directory/Internal/Parsing.hs new file mode 100644 index 0000000000000000000000000000000000000000..2210fe3ff421ab48510dda06afadda717c1262e8 --- /dev/null +++ b/src/Tahoe/Directory/Internal/Parsing.hs @@ -0,0 +1,106 @@ +-- | Parsing and serialization for directories and their entries. +module Tahoe.Directory.Internal.Parsing where + +import Control.Monad (void) +import qualified Data.ByteString as B +import qualified Data.ByteString.Char8 as C8 +import qualified Data.Text as T +import Data.Text.Encoding (decodeUtf8', encodeUtf8) +import Data.Void (Void) +import Text.Megaparsec (MonadParsec (eof, label, takeP), ParseErrorBundle, Parsec, many, parse) +import Text.Megaparsec.Byte (string) +import Text.Megaparsec.Byte.Lexer (decimal) + +-- | A collection of references to other objects. +newtype Directory = Directory + { directoryChildren :: [Entry] + } + deriving (Eq) + +-- | A reference to an object of any kind. +data Entry = Entry + { -- | The name of this entry in its containing directory. + entryName :: T.Text -- XXX What if decoding fails? + , -- | A capability for reading the contents of this entry. + entryReader :: B.ByteString -- XXX Structured cap instead + , -- | An encrypted capability for performing writes to this entry. XXX + -- Document the encryption scheme. + entryEncryptedWriter :: B.ByteString + , -- | Additional metadata about this entry such as last modification time. + entryMetadata :: B.ByteString -- XXX How to represent this mixed type collection? + } + deriving (Eq) + +-- | Parse the serialized form of a directory into a Directory. +parse :: B.ByteString -> Either (ParseErrorBundle B.ByteString Void) Directory +parse = Text.Megaparsec.parse pDirectory "Directory" + +-- | The parser type we will parse in. +type Parser = Parsec Void B.ByteString + +-- XXX This doesn't do bounds checking. + +-- | Parse the base ten representation of a natural number. +natural :: Integral i => Parser i +natural = decimal + +{- | Parse a netstring-encoded value, applying a sub-parser to the encoded + string. +-} +pNetstring :: + -- | A function that takes the length of the string encoded in the + -- netstring and returns a parser for the value the encoded string + -- represents. + (Int -> Parser a) -> + -- | A parser for the value. + Parser a +pNetstring pInner = do + len <- natural + void $ string ":" + result <- pInner len + void $ string "," + pure result + +pDirectory :: Parser Directory +pDirectory = Directory <$> (many pEntry <* eof) + +pEntry :: Parser Entry +pEntry = + label "entry" $ + pNetstring $ \_ -> + Entry + <$> label "name" (pNetstring pUTF8) + <*> label "ro_uri" (pNetstring (takeP Nothing)) + <*> label "rw_uri" (pNetstring (takeP Nothing)) + <*> label "metadata" (pNetstring (takeP Nothing)) + +pUTF8 :: Int -> Parser T.Text +pUTF8 n = do + bs <- takeP Nothing n + either (\e -> fail $ "UTF-8 parsing failed: " <> show e) pure (decodeUtf8' bs) + +-- | Serialize a Directory to the canonical bytes representation. +serialize :: Directory -> B.ByteString +serialize Directory{directoryChildren} = B.concat $ serializeEntry <$> directoryChildren + +serializeEntry :: Entry -> B.ByteString +serializeEntry Entry{..} = + -- XXX The name must be NFC normalized apparently, try unicode-transforms + -- library. Perhaps we should enforce normalization in the Entry + -- constructor? + netstring . B.concat $ + [ netstring . encodeUtf8 $ entryName + , netstring entryReader + , netstring entryEncryptedWriter + , netstring entryMetadata + ] + +-- | Encode a bytestring as a netstring. +netstring :: B.ByteString -> B.ByteString +netstring xs = + B.concat + [ C8.pack . show . B.length $ xs + , ":" + , xs + , "," + ] diff --git a/tahoe-directory.cabal b/tahoe-directory.cabal index 47eae685561004b35e87986fb30cdb1827845c01..df45944d6d44e79bb577a06eee1a91f135854102 100644 --- a/tahoe-directory.cabal +++ b/tahoe-directory.cabal @@ -88,9 +88,15 @@ library import: warnings import: language hs-source-dirs: src - exposed-modules: Tahoe.Directory - build-depends: base >=4 && <5.0 + exposed-modules: + Tahoe.Directory + Tahoe.Directory.Internal.Parsing + build-depends: + , base >=4 && <5.0 + , bytestring + , megaparsec + , text test-suite tahoe-directory-test -- Import common warning flags. @@ -120,6 +126,7 @@ test-suite tahoe-directory-test -- Test dependencies. build-depends: , base + , bytestring , tahoe-directory , tasty , tasty-hunit diff --git a/test/Spec.hs b/test/Spec.hs index 95f6a5305a515305c3e3861c779c6a1447a83158..d73a44947465bbf890238fdcbf831b09d1dc95f5 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -1,10 +1,22 @@ module Spec where +import qualified Data.ByteString as B import System.IO (hSetEncoding, stderr, stdout, utf8) +import qualified Tahoe.Directory as Directory import Test.Tasty (TestTree, defaultMain, testGroup) +import Test.Tasty.HUnit (assertEqual, testCase) tests :: TestTree -tests = testGroup "Directory" [] +tests = + testGroup + "Directory" + [ testCase "well-known serialized directory round-trips through parse . serialize" $ do + original <- B.readFile "test/example.dir" + let parsed = Directory.parse original + serialized = Directory.serialize <$> parsed + + assertEqual "original /= serialized" (Right original) serialized + ] main :: IO () main = do diff --git a/test/example.dir b/test/example.dir new file mode 100644 index 0000000000000000000000000000000000000000..9320fd5c4edbf2c89046d7c82ee5b8be5c8a98e3 Binary files /dev/null and b/test/example.dir differ