From 9119cf42fe057db1f4a1bfe0afcd5390ec0d19ec Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone <exarkun@twistedmatrix.com> Date: Sat, 20 May 2023 16:52:24 -0400 Subject: [PATCH] Parse and serialize one known-correct example from Tahoe-LAFS At least somewhat. We get all the fields out but we don't really interpret most of them yet. --- src/Tahoe/Directory.hs | 6 +- src/Tahoe/Directory/Internal/Parsing.hs | 106 ++++++++++++++++++++++++ tahoe-directory.cabal | 11 ++- test/Spec.hs | 14 +++- test/example.dir | Bin 0 -> 799 bytes 5 files changed, 133 insertions(+), 4 deletions(-) create mode 100644 src/Tahoe/Directory/Internal/Parsing.hs create mode 100644 test/example.dir diff --git a/src/Tahoe/Directory.hs b/src/Tahoe/Directory.hs index c0c58db..94696e2 100644 --- a/src/Tahoe/Directory.hs +++ b/src/Tahoe/Directory.hs @@ -1 +1,5 @@ -module Tahoe.Directory where +module Tahoe.Directory ( + module Tahoe.Directory.Internal.Parsing, +) where + +import Tahoe.Directory.Internal.Parsing (parse, serialize) diff --git a/src/Tahoe/Directory/Internal/Parsing.hs b/src/Tahoe/Directory/Internal/Parsing.hs new file mode 100644 index 0000000..2210fe3 --- /dev/null +++ b/src/Tahoe/Directory/Internal/Parsing.hs @@ -0,0 +1,106 @@ +-- | Parsing and serialization for directories and their entries. +module Tahoe.Directory.Internal.Parsing where + +import Control.Monad (void) +import qualified Data.ByteString as B +import qualified Data.ByteString.Char8 as C8 +import qualified Data.Text as T +import Data.Text.Encoding (decodeUtf8', encodeUtf8) +import Data.Void (Void) +import Text.Megaparsec (MonadParsec (eof, label, takeP), ParseErrorBundle, Parsec, many, parse) +import Text.Megaparsec.Byte (string) +import Text.Megaparsec.Byte.Lexer (decimal) + +-- | A collection of references to other objects. +newtype Directory = Directory + { directoryChildren :: [Entry] + } + deriving (Eq) + +-- | A reference to an object of any kind. +data Entry = Entry + { -- | The name of this entry in its containing directory. + entryName :: T.Text -- XXX What if decoding fails? + , -- | A capability for reading the contents of this entry. + entryReader :: B.ByteString -- XXX Structured cap instead + , -- | An encrypted capability for performing writes to this entry. XXX + -- Document the encryption scheme. + entryEncryptedWriter :: B.ByteString + , -- | Additional metadata about this entry such as last modification time. + entryMetadata :: B.ByteString -- XXX How to represent this mixed type collection? + } + deriving (Eq) + +-- | Parse the serialized form of a directory into a Directory. +parse :: B.ByteString -> Either (ParseErrorBundle B.ByteString Void) Directory +parse = Text.Megaparsec.parse pDirectory "Directory" + +-- | The parser type we will parse in. +type Parser = Parsec Void B.ByteString + +-- XXX This doesn't do bounds checking. + +-- | Parse the base ten representation of a natural number. +natural :: Integral i => Parser i +natural = decimal + +{- | Parse a netstring-encoded value, applying a sub-parser to the encoded + string. +-} +pNetstring :: + -- | A function that takes the length of the string encoded in the + -- netstring and returns a parser for the value the encoded string + -- represents. + (Int -> Parser a) -> + -- | A parser for the value. + Parser a +pNetstring pInner = do + len <- natural + void $ string ":" + result <- pInner len + void $ string "," + pure result + +pDirectory :: Parser Directory +pDirectory = Directory <$> (many pEntry <* eof) + +pEntry :: Parser Entry +pEntry = + label "entry" $ + pNetstring $ \_ -> + Entry + <$> label "name" (pNetstring pUTF8) + <*> label "ro_uri" (pNetstring (takeP Nothing)) + <*> label "rw_uri" (pNetstring (takeP Nothing)) + <*> label "metadata" (pNetstring (takeP Nothing)) + +pUTF8 :: Int -> Parser T.Text +pUTF8 n = do + bs <- takeP Nothing n + either (\e -> fail $ "UTF-8 parsing failed: " <> show e) pure (decodeUtf8' bs) + +-- | Serialize a Directory to the canonical bytes representation. +serialize :: Directory -> B.ByteString +serialize Directory{directoryChildren} = B.concat $ serializeEntry <$> directoryChildren + +serializeEntry :: Entry -> B.ByteString +serializeEntry Entry{..} = + -- XXX The name must be NFC normalized apparently, try unicode-transforms + -- library. Perhaps we should enforce normalization in the Entry + -- constructor? + netstring . B.concat $ + [ netstring . encodeUtf8 $ entryName + , netstring entryReader + , netstring entryEncryptedWriter + , netstring entryMetadata + ] + +-- | Encode a bytestring as a netstring. +netstring :: B.ByteString -> B.ByteString +netstring xs = + B.concat + [ C8.pack . show . B.length $ xs + , ":" + , xs + , "," + ] diff --git a/tahoe-directory.cabal b/tahoe-directory.cabal index 47eae68..df45944 100644 --- a/tahoe-directory.cabal +++ b/tahoe-directory.cabal @@ -88,9 +88,15 @@ library import: warnings import: language hs-source-dirs: src - exposed-modules: Tahoe.Directory - build-depends: base >=4 && <5.0 + exposed-modules: + Tahoe.Directory + Tahoe.Directory.Internal.Parsing + build-depends: + , base >=4 && <5.0 + , bytestring + , megaparsec + , text test-suite tahoe-directory-test -- Import common warning flags. @@ -120,6 +126,7 @@ test-suite tahoe-directory-test -- Test dependencies. build-depends: , base + , bytestring , tahoe-directory , tasty , tasty-hunit diff --git a/test/Spec.hs b/test/Spec.hs index 95f6a53..d73a449 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -1,10 +1,22 @@ module Spec where +import qualified Data.ByteString as B import System.IO (hSetEncoding, stderr, stdout, utf8) +import qualified Tahoe.Directory as Directory import Test.Tasty (TestTree, defaultMain, testGroup) +import Test.Tasty.HUnit (assertEqual, testCase) tests :: TestTree -tests = testGroup "Directory" [] +tests = + testGroup + "Directory" + [ testCase "well-known serialized directory round-trips through parse . serialize" $ do + original <- B.readFile "test/example.dir" + let parsed = Directory.parse original + serialized = Directory.serialize <$> parsed + + assertEqual "original /= serialized" (Right original) serialized + ] main :: IO () main = do diff --git a/test/example.dir b/test/example.dir new file mode 100644 index 0000000000000000000000000000000000000000..9320fd5c4edbf2c89046d7c82ee5b8be5c8a98e3 GIT binary patch literal 799 zcmcJNPe>F|9LLjyz_dT$twSnwh{kp2y?OKJDN&f<mS~xRprUWy?#{m1nVp?^Gdnvg zLPiBe(V>FKOP2(%QF-eSm6(?n7du!4T_RFCq=W}4wL@KmE**dHxqLp~A3z9;5XW@F zGou;CeaDLgO~V6y3Ea_j4%@Dh&QXBE&_G0IDOv(~Zu&*f#YyCGXqzF;7n6a>EyK#Q z&;rSVt?)E2Al)lCZr~ODOu(#?&5<P$46zu;0BI_a1xdwkNBNDKwktKWiQ$EjfvRhj zspZ!vMjl;0-1A}c^I*1oX>q!3b?xTtK$*2&A0u}KMZ@FGKFK(#X52X5Y_VLHdOowa zPf=DSq{T%U#^REs$_f%1cL>|rm6(_i1P~Xo@;iY?1q62FyA%D`E<l4qounA2ZYgOx zCJ!a#k(^!(Wl$ov=@t#3n<<ti|IG%$n2a<aYwB(`e~4`x2vjU%9_UHt37WVgb0*OT zTKal%=-WQca|3zlC<;rKo6Z7~N+E?|ANd>xP|0n1(gLv}TQ;d-nrYi5%$9+<H6cwp zQGRP-k17OE&lN-{<GGJ)=%u5#d^vTe13s9ne0P?$rGvF%ccp3S#rw+B$G4}KYO7cG z=K9w=7wbFk)n9FRK6UF@-K+CeP0_=ld84a$UvBWz<nYh?7utjN>OIGw1+5$F<<6DU zb1O54_Gi~tq|(>Tu_q1k=C|S6nUl(LbmU^){MiGo?GF8M6!vtzVIv#w{^d0Qh{vF! ODoB!ciTM|?9rG8=>mw`x literal 0 HcmV?d00001 -- GitLab