From 0ac87d412ced1dd8550d1e1a8446017f6da6a5b5 Mon Sep 17 00:00:00 2001
From: Jean-Paul Calderone <exarkun@twistedmatrix.com>
Date: Fri, 23 Jun 2023 10:59:14 -0400
Subject: [PATCH] Try to ask for the right share number

And try more than one if necessary / possible
---
 gbs-downloader.cabal                      |   7 +-
 src/Tahoe/Download/Internal/Capability.hs |  59 +++++++++--
 test/Spec.hs                              | 121 +++++++++++++++++++---
 3 files changed, 166 insertions(+), 21 deletions(-)

diff --git a/gbs-downloader.cabal b/gbs-downloader.cabal
index 56b04e1..23a9f79 100644
--- a/gbs-downloader.cabal
+++ b/gbs-downloader.cabal
@@ -233,8 +233,13 @@ test-suite gbs-downloader-test
     , data-default-class
     , gbs-downloader
     , hedgehog
+    , http-client
+    , http-types
+    , memory
+    , servant-client
+    , servant-client-core
     , tahoe-chk
-    , tahoe-ssk           >=0.2 && <0.3
+    , tahoe-ssk            >=0.2 && <0.3
     , tasty
     , tasty-hedgehog
     , tasty-hunit
diff --git a/src/Tahoe/Download/Internal/Capability.hs b/src/Tahoe/Download/Internal/Capability.hs
index 540027a..e0b4c3e 100644
--- a/src/Tahoe/Download/Internal/Capability.hs
+++ b/src/Tahoe/Download/Internal/Capability.hs
@@ -2,12 +2,16 @@
 
 module Tahoe.Download.Internal.Capability where
 
+import Control.Exception (SomeException, throwIO, try)
 import Control.Monad.IO.Class
 import Data.Bifunctor (Bifunctor (..))
-import Data.Binary (decodeOrFail)
+import Data.Binary (Word8, decodeOrFail)
 import Data.Binary.Get (ByteOffset)
 import qualified Data.ByteString.Lazy as LB
+import Data.Foldable (foldlM)
 import qualified Data.Set as Set
+import Network.HTTP.Types (Status (statusCode))
+import Servant.Client (ClientError (FailureResponse), ResponseF (..))
 import qualified Tahoe.CHK
 import qualified Tahoe.CHK.Capability as CHK
 import qualified Tahoe.CHK.Encrypt
@@ -31,6 +35,9 @@ class Verifiable v where
 
     -- | Get the encoding parameters used for the shares of this capability.
     -- The information is presented as a tuple of (required, total).
+
+    -- SDMF can fail to figure this out in lots of ways so consider switching
+    -- to Either or something?
     getRequiredTotal :: MonadIO m => v -> StorageServer -> m (Maybe (Int, Int))
 
     -- | Get the location information for shares of this capability.
@@ -90,19 +97,59 @@ instance Readable CHK.Reader where
             Just ct ->
                 pure . Right $ Tahoe.CHK.Encrypt.decrypt (CHK.readKey r) ct
 
+firstJustsM :: (Monad m, Foldable f) => f (m (Maybe a)) -> m (Maybe a)
+firstJustsM = foldlM go Nothing
+  where
+    go :: Monad m => Maybe a -> m (Maybe a) -> m (Maybe a)
+    go Nothing action = action
+    go result@(Just _) _action = return result
+
 instance Verifiable SDMF.Verifier where
     type ShareT SDMF.Verifier = SDMF.Share
 
     getShareNumbers v s = liftIO $ storageServerGetBuckets s (SDMF.Keys.unStorageIndex $ SDMF.verifierStorageIndex v)
     getStorageIndex = SDMF.Keys.unStorageIndex . SDMF.verifierStorageIndex
-    getRequiredTotal SDMF.Verifier{..} ss = do
-        shareBytes <- liftIO $ storageServerRead ss (SDMF.Keys.unStorageIndex verifierStorageIndex) 0
-        case decodeOrFail (LB.fromStrict shareBytes) of
-            Left _ -> pure Nothing
-            Right (_, _, sh) -> pure $ pure (fromIntegral $ SDMF.shareRequiredShares sh, fromIntegral $ SDMF.shareTotalShares sh)
+    getRequiredTotal SDMF.Verifier{..} ss = liftIO $ do
+        -- Find out what shares it has.  Any share will do but we need to tell
+        -- it which we want.
+        errorOrShareNums <- try $ storageServerGetBuckets ss storageIndex
+        case Set.toList <$> errorOrShareNums of
+            -- Literally anything could go wrong with that...
+            Left (e :: SomeException) -> throwIO e
+            -- Or the server may have no shares for this storage index.
+            Right [] -> pure Nothing
+            -- Or it might have at least one.  Check each in turn, stopping as
+            -- soon as we get a result.
+            Right shareNums -> firstJustsM (getParams <$> shareNums)
+      where
+        -- Get the Required, Total parameters for one share number, if
+        -- possible.
+        getParams :: MonadIO m => Word8 -> m (Maybe (Int, Int))
+        getParams shareNum = liftIO $ do
+            errorOrShareBytes <- try $ storageServerRead ss storageIndex shareNum
+            case errorOrShareBytes of
+                Left e@(FailureResponse _ response) ->
+                    -- It should not be very surprising for the requested share to
+                    -- be missing from the server (you can never be sure what a
+                    -- server will have).  Other issues should probably be kept
+                    -- visible.
+                    if isStatusCode 404 response
+                        then pure Nothing
+                        else throwIO e
+                Left e -> throwIO e
+                Right shareBytes ->
+                    case decodeOrFail (LB.fromStrict shareBytes) of
+                        Left _ -> pure Nothing
+                        Right (_, _, sh) -> pure $ pure (fromIntegral $ SDMF.shareRequiredShares sh, fromIntegral $ SDMF.shareTotalShares sh)
+
+        storageIndex = SDMF.Keys.unStorageIndex verifierStorageIndex
 
     deserializeShare _ = fmap (\(_, _, c) -> c) . decodeOrFail
 
+-- | Test the status code of a response for equality against a given value.
+isStatusCode :: Int -> ResponseF a -> Bool
+isStatusCode expected = (expected ==) . statusCode . responseStatusCode
+
 instance Readable SDMF.Reader where
     type Verifier SDMF.Reader = SDMF.Verifier
     getVerifiable = SDMF.readerVerifier
diff --git a/test/Spec.hs b/test/Spec.hs
index f9111f6..4a3cb85 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -1,25 +1,31 @@
 module Main where
 
 import Control.Exception (Exception, throwIO)
-import Control.Monad (replicateM, zipWithM_)
+import Control.Monad (replicateM, when)
 import Control.Monad.IO.Class (liftIO)
 import Crypto.Cipher.Types (nullIV)
 import Crypto.Classes (buildKey)
+import qualified Crypto.Hash
+import Data.Bifunctor (bimap)
 import qualified Data.Binary as Binary
 import qualified Data.ByteString as B
 import Data.ByteString.Base32 (encodeBase32Unpadded)
 import qualified Data.ByteString.Lazy as BL
 import Data.Default.Class (Default (def))
 import qualified Data.Map.Strict as Map
+import Data.Sequence (Seq (Empty), fromList)
 import qualified Data.Set as Set
 import qualified Data.Text as T
 import Data.Text.Encoding (encodeUtf8)
 import Data.Word (Word16)
 import qualified Data.Yaml as Yaml
 import Generators (genAnnouncements, genParameters, genRSAKeys)
-import Hedgehog (MonadGen, annotateShow, diff, forAll, property, tripping)
+import Hedgehog (MonadGen, annotateShow, diff, discard, forAll, property, tripping)
 import qualified Hedgehog.Gen as Gen
 import qualified Hedgehog.Range as Range
+import Network.HTTP.Types.Status (Status (..))
+import Network.HTTP.Types.Version (http11)
+import Servant.Client.Core (BaseUrl (..), ClientError (..), RequestF (..), ResponseF (..), Scheme (..))
 import System.IO (hSetEncoding, stderr, stdout, utf8)
 import Tahoe.Announcement (
     Announcements,
@@ -42,6 +48,7 @@ import Tahoe.Download (
     LookupServer,
     download,
  )
+import Tahoe.Download.Internal.Capability (getRequiredTotal)
 import qualified Tahoe.SDMF as SDMF
 import qualified Tahoe.SDMF.Keys as SDMF.Keys
 import Tahoe.Server (memoryStorageServer)
@@ -220,6 +227,42 @@ tests =
                 "download should fail with details about unreachable server"
                 (Left (NoReachableServers [StorageServerCommunicationError "BespokeFailure"]))
                 result
+        , testProperty "getRequiredTotal handles a share being missing from the server" $
+            property $ do
+                -- If we can recover any single share from the server then we can
+                -- inspect it for encoding parameters.
+
+                -- Generates configurations where the encoding parameters dicated
+                -- multiple shares but they may not be all placed on the server.
+                sequenceNumber <- forAll $ Gen.integral (Range.exponential 1 10000)
+                plaintext <- forAll $ BL.fromStrict <$> Gen.bytes (Range.exponential 56 1024)
+                Parameters{paramTotalShares, paramRequiredShares} <- forAll genParameters
+
+                -- Encrypt and encode the data into shares.
+                keypair <- SDMF.Keys.KeyPair <$> forAll genRSAKeys
+                let iv = SDMF.Keys.SDMF_IV nullIV
+                    ciphertext = SDMF.encrypt keypair iv plaintext
+                (shares, cap) <- liftIO $ SDMF.encode keypair iv sequenceNumber paramRequiredShares paramTotalShares ciphertext
+
+                -- Pick some shares for placement on a single server.
+                placedShares <- forAll $ Gen.subsequence (zip [0 ..] (Binary.encode <$> shares))
+                when (null placedShares) discard
+
+                let verifier = SDMF.readerVerifier . SDMF.writerReader $ cap
+                    storageIndex = SDMF.Keys.unStorageIndex . SDMF.verifierStorageIndex $ verifier
+
+                -- Be sure to create the server last to avoid having Hedgehog
+                -- re-use it for multiple cases.
+                server <- liftIO memoryStorageServer
+                liftIO $
+                    placeShares
+                        storageIndex
+                        placedShares
+                        [length placedShares]
+                        [server]
+
+                r <- getRequiredTotal verifier server
+                diff (Just (fromIntegral paramRequiredShares, fromIntegral paramTotalShares)) (==) r
         , testCase "IO exceptions from storageServerRead are handled" $ do
             -- An announcement for our server
             let anns = Map.fromList [simpleAnnouncement "abc123" "somewhere"]
@@ -285,7 +328,7 @@ tests =
                 (shares, cap) <- liftIO $ Tahoe.CHK.encode key params ciphertext
 
                 -- Distribute the shares.
-                liftIO $ placeShares (storageIndex . verifier $ cap) (Binary.encode <$> shares) perServerShareCount servers 0
+                liftIO $ placeShares (storageIndex . verifier $ cap) (zip [0 ..] (Binary.encode <$> shares)) perServerShareCount servers
 
                 let serverMap = Map.fromList $ zip (Set.toList serverIDs') servers
                     lookupServer = someServers serverMap
@@ -329,7 +372,7 @@ tests =
                 let storageIndex = SDMF.Keys.unStorageIndex . SDMF.verifierStorageIndex . SDMF.readerVerifier . SDMF.writerReader $ writeCap
                     readCap = SDMF.writerReader writeCap
                 -- Distribute the shares.
-                liftIO $ placeShares storageIndex (Binary.encode <$> shares) perServerShareCount servers 0
+                liftIO $ placeShares storageIndex (zip [0 ..] (Binary.encode <$> shares)) perServerShareCount servers
 
                 let serverMap = Map.fromList $ zip (Set.toList serverIDs') servers
                     lookupServer = someServers serverMap
@@ -374,22 +417,30 @@ tests =
     -- We wish that share numbers were an opaque type instead of a
     -- numeric/integral type.  This is not the place to argue the point
     -- though.
-    placeShares :: B.ByteString -> [BL.ByteString] -> [Int] -> [StorageServer] -> Int -> IO ()
+    placeShares ::
+        --  The storage index to place shares at.
+        B.ByteString ->
+        -- The number and bytes of the shares themselves.
+        [(Int, BL.ByteString)] ->
+        -- The number of shares to place on each server.
+        [Int] ->
+        -- The servers to place shares on.
+        [StorageServer] ->
+        IO ()
     -- Out of shares, done.
-    placeShares _ [] _ _ _ = pure ()
+    placeShares _ [] _ _ = pure ()
     -- Out of placement info but not out of shares is a programming error.
-    placeShares _ _ [] _ _ = throwIO RanOutOfPlacementInfo
+    placeShares _ _ [] _ = throwIO RanOutOfPlacementInfo
     -- Out of servers but not out of shares is a programming error.
-    placeShares _ _ _ [] _ = throwIO RanOutOfServers
+    placeShares _ _ _ [] = throwIO RanOutOfServers
     -- Having some of all three means we can make progress.
-    placeShares si shares (n : ns) (s : ss) sharesSoFar = do
+    placeShares si shares (n : ns) (s : ss) = do
         -- write the right number of shares to this server
-        zipWithM_
-            (\shnum share -> storageServerWrite s si shnum 0 share)
-            [fromIntegral sharesSoFar ..]
-            (BL.toStrict <$> take n shares)
+        mapM_
+            (\(shnum, share) -> storageServerWrite s si shnum 0 share)
+            (bimap fromIntegral BL.toStrict <$> take n shares)
         -- recurse to write the rest
-        placeShares si (drop n shares) ns ss (sharesSoFar + n)
+        placeShares si (drop n shares) ns ss
 
     -- Make up a distinct (but nonsense) announcement for a given storage
     -- server identifier.
@@ -420,6 +471,48 @@ trivialCap required total = Reader{..}
     size = 1234
     verifier = Verifier{..}
 
+trivialSDMFVerifier :: SDMF.Verifier
+trivialSDMFVerifier = SDMF.Verifier{..}
+  where
+    verifierStorageIndex = SDMF.Keys.StorageIndex $ B.pack [0 .. 15]
+    verifierVerificationKeyHash = Crypto.Hash.hash $ B.pack [0 .. 31]
+
+-- | A real 404 response from tahoe-great-black-swamp 0.3.0.0.
+failure404 :: ClientError
+failure404 =
+    FailureResponse
+        ( Request
+            { requestPath =
+                ( BaseUrl
+                    { baseUrlScheme = Https
+                    , baseUrlHost = "storage002.private.storage"
+                    , baseUrlPort = 8899
+                    , baseUrlPath = ""
+                    }
+                , "/storage/v1/mutable/6yo5yo6uxniiiwtyxv46bfvwm4/0"
+                )
+            , requestQueryString = Empty
+            , requestBody = Nothing
+            , requestAccept = fromList ["application/octet-stream"]
+            , requestHeaders = Empty
+            , requestHttpVersion = http11
+            , requestMethod = "GET"
+            }
+        )
+        ( Response
+            { responseStatusCode = Status{statusCode = 404, statusMessage = "Not Found"}
+            , responseHttpVersion = http11
+            , responseHeaders =
+                fromList
+                    [ ("Transfer-Encoding", "chunked")
+                    , ("Server", "TwistedWeb/22.10.0")
+                    , ("Date", "Fri, 23 Jun 2023 11:27:49 GMT")
+                    , ("Content-Type", "application/octet-stream")
+                    ]
+            , responseBody = ""
+            }
+        )
+
 main :: IO ()
 main = do
     -- Hedgehog writes some non-ASCII and the whole test process will die if
-- 
GitLab