diff --git a/src/_zkapauthorizer/_storage_server.py b/src/_zkapauthorizer/_storage_server.py index a2d4b9f2c5cce038eff98184ae4859cbde8f8b81..7aa17c840a705ffb15a656a8f85befda42836781 100644 --- a/src/_zkapauthorizer/_storage_server.py +++ b/src/_zkapauthorizer/_storage_server.py @@ -42,6 +42,7 @@ from os.path import ( ) from os import ( listdir, + stat, ) from datetime import ( timedelta, @@ -517,23 +518,50 @@ def get_storage_index_share_size(sharepath): :return int: The data size of the share in bytes. """ - # Note Tahoe-LAFS immutable/layout.py makes some claims about how the - # share data is structured. A lot of this seems to be wrong. - # storage/immutable.py appears to have the correct information. - fmt = ">LL" + # From src/allmydata/storage/immutable.py + # + # The share file has the following layout: + # 0x00: share file version number, four bytes, current version is 1 + # 0x04: share data length, four bytes big-endian = A # See Footnote 1 below. + # 0x08: number of leases, four bytes big-endian + # 0x0c: beginning of share data (see immutable.layout.WriteBucketProxy) + # A+0x0c = B: first lease. Lease format is: + # B+0x00: owner number, 4 bytes big-endian, 0 is reserved for no-owner + # B+0x04: renew secret, 32 bytes (SHA256) + # B+0x24: cancel secret, 32 bytes (SHA256) + # B+0x44: expiration time, 4 bytes big-endian seconds-since-epoch + # B+0x48: next lease, or end of record + # + # Footnote 1: as of Tahoe v1.3.0 this field is not used by storage + # servers, but it is still filled in by storage servers in case the + # storage server software gets downgraded from >= Tahoe v1.3.0 to < Tahoe + # v1.3.0, or the share file is moved from one storage server to + # another. The value stored in this field is truncated, so if the actual + # share data length is >= 2**32, then the value stored in this field will + # be the actual share data length modulo 2**32. + + share_file_size = stat(sharepath).st_size + header_format = ">LLL" + header_size = calcsize(header_format) with open(sharepath, "rb") as share_file: - header = share_file.read(calcsize(fmt)) + header = share_file.read(calcsize(header_format)) - if len(header) != calcsize(fmt): + if len(header) != header_size: raise ValueError( "Tried to read {} bytes of share file header, got {!r} instead.".format( - calcsize(fmt), + calcsize(header_format), header, ), ) - version, share_data_length = unpack(fmt, header) - return share_data_length + version, _, number_of_leases = unpack(header_format, header) + + if version != 1: + raise ValueError( + "Cannot interpret version {} share file.".format(version), + ) + + return share_file_size - header_size - (number_of_leases * (4 + 32 + 32 + 4)) def get_lease_expiration(get_leases, storage_index_or_slot): diff --git a/src/_zkapauthorizer/tests/storage_common.py b/src/_zkapauthorizer/tests/storage_common.py index 4baf4de7b89ff6ab3c0ea7145b5d0fa347f3a8e9..d00a580c29adf51f1d39583012fbe09b11555678 100644 --- a/src/_zkapauthorizer/tests/storage_common.py +++ b/src/_zkapauthorizer/tests/storage_common.py @@ -16,6 +16,13 @@ ``allmydata.storage``-related helpers shared across the test suite. """ +from os import ( + SEEK_CUR, +) +from struct import ( + pack, +) + from twisted.python.filepath import ( FilePath, ) @@ -25,6 +32,9 @@ from .strategies import ( bytes_for_share, ) +# Hard-coded in Tahoe-LAFS +LEASE_INTERVAL = 60 * 60 * 24 * 31 + def cleanup_storage_server(storage_server): """ Delete all of the shares held by the given storage server. @@ -73,3 +83,53 @@ def write_toy_shares( for (sharenum, writer) in allocated.items(): writer.remote_write(0, bytes_for_share(sharenum, size)) writer.remote_close() + + +def whitebox_write_sparse_share(sharepath, version, size, leases, now): + """ + Write a zero-filled sparse (if the filesystem supports it) immutable share + to the given path. + + This assumes knowledge of the Tahoe-LAFS share file format. + + :param FilePath sharepath: The path to which to write the share file. + :param int version: The share version to write to the file. + :param int size: The share data size to write. + :param list leases: Renewal secrets for leases to write to the share file. + :param float now: The current time as a POSIX timestamp. + """ + # Maybe-saturated size (what at least one Tahoe-LAFS comment claims is + # appropriate for large files) + internal_size = min(size, 2 ** 32 - 1) + apparent_size = size + + header_format = ">LLL" + lease_format = ">L32s32sL" + with sharepath.open("wb") as share: + share.write( + pack( + header_format, + version, + internal_size, + len(leases), + ), + ) + # Try to make it sparse by skipping all the data. + share.seek(apparent_size - 1, SEEK_CUR), + share.write(b"\0") + share.write( + b"".join( + pack( + lease_format, + # no owner + 0, + renew, + # no cancel secret + b"", + # expiration timestamp + int(now + LEASE_INTERVAL), + ) + for renew + in leases + ), + ) diff --git a/src/_zkapauthorizer/tests/strategies.py b/src/_zkapauthorizer/tests/strategies.py index 7776ff739b3ec50b88b37f8ecf1a307a7eba1318..5faf8e74004357f64c009619da005465cd531515 100644 --- a/src/_zkapauthorizer/tests/strategies.py +++ b/src/_zkapauthorizer/tests/strategies.py @@ -511,6 +511,13 @@ def write_enabler_secrets(): ) +def share_versions(): + """ + Build integers which could be Tahoe-LAFS share file version numbers. + """ + return integers(min_value=0, max_value=2 ** 32 - 1) + + def sharenums(): """ Build Tahoe-LAFS share numbers. @@ -532,17 +539,19 @@ def sharenum_sets(): ) -def sizes(): +def sizes( + # Size 0 data isn't data, it's nothing. + min_value=1, + # Let this be larger than a single segment (2 ** 17) in case that matters + # to Tahoe-LAFS storage at all. I don't think it does, though. + max_value=2 ** 18, +): """ Build Tahoe-LAFS share sizes. """ return integers( - # Size 0 data isn't data, it's nothing. - min_value=1, - # Let this be larger than a single segment (2 ** 17) in case that - # matters to Tahoe-LAFS storage at all. I don't think it does, - # though. - max_value=2 ** 18, + min_value=min_value, + max_value=max_value, ) diff --git a/src/_zkapauthorizer/tests/test_storage_protocol.py b/src/_zkapauthorizer/tests/test_storage_protocol.py index f2b9b6895246583018d498422e84ad2265e4eb4c..a267c0667f3fe1b3101ab9e5a7a9eb10d8091a32 100644 --- a/src/_zkapauthorizer/tests/test_storage_protocol.py +++ b/src/_zkapauthorizer/tests/test_storage_protocol.py @@ -70,6 +70,10 @@ from challenge_bypass_ristretto import ( random_signing_key, ) +from allmydata.storage.common import ( + storage_index_to_dir, +) + from .common import ( skipIf, ) @@ -82,6 +86,7 @@ from .strategies import ( lease_renew_secrets, lease_cancel_secrets, write_enabler_secrets, + share_versions, sharenums, sharenum_sets, sizes, @@ -97,8 +102,10 @@ from .fixtures import ( AnonymousStorageServer, ) from .storage_common import ( + LEASE_INTERVAL, cleanup_storage_server, write_toy_shares, + whitebox_write_sparse_share, ) from .foolscap import ( LocalRemote, @@ -119,7 +126,6 @@ from ..foolscap import ( ShareStat, ) - class RequiredPassesTests(TestCase): """ Tests for ``required_passes``. @@ -353,19 +359,7 @@ class ShareTests(TestCase): Equals(int(now + self.server.LEASE_PERIOD.total_seconds())), ) - @given( - storage_index=storage_indexes(), - renew_secret=lease_renew_secrets(), - cancel_secret=lease_cancel_secrets(), - sharenum=sharenums(), - size=sizes(), - clock=clocks(), - ) - def test_stat_shares_immutable(self, storage_index, renew_secret, cancel_secret, sharenum, size, clock): - """ - Size and lease information about immutable shares can be retrieved from a - storage server. - """ + def _stat_shares_immutable_test(self, storage_index, sharenum, size, clock, leases, write_shares): # Hypothesis causes our storage server to be used many times. Clean # up between iterations. cleanup_storage_server(self.anonymous_storage_server) @@ -376,23 +370,27 @@ class ShareTests(TestCase): try: patch.setUp() # Create a share we can toy with. - write_toy_shares( + write_shares( self.anonymous_storage_server, storage_index, - renew_secret, - cancel_secret, {sharenum}, size, canary=self.canary, ) + # Perhaps put some more leases on it. Leases might impact our + # ability to determine share data size. + for renew_secret in leases: + self.anonymous_storage_server.remote_add_lease( + storage_index, + renew_secret, + b"", + ) finally: patch.cleanUp() stats = extract_result( self.client.stat_shares([storage_index]), ) - # Hard-coded in Tahoe-LAFS - LEASE_INTERVAL = 60 * 60 * 24 * 31 expected = [{ sharenum: ShareStat( size=size, @@ -404,6 +402,172 @@ class ShareTests(TestCase): Equals(expected), ) + @given( + storage_index=storage_indexes(), + renew_secret=lease_renew_secrets(), + cancel_secret=lease_cancel_secrets(), + sharenum=sharenums(), + size=sizes(), + clock=clocks(), + leases=lists(lease_renew_secrets(), unique=True), + ) + def test_stat_shares_immutable(self, storage_index, renew_secret, cancel_secret, sharenum, size, clock, leases): + """ + Size and lease information about immutable shares can be retrieved from a + storage server. + """ + return self._stat_shares_immutable_test( + storage_index, + sharenum, + size, + clock, + leases, + lambda storage_server, storage_index, sharenums, size, canary: write_toy_shares( + storage_server, + storage_index, + renew_secret, + cancel_secret, + sharenums, + size, + canary, + ), + ) + + @given( + storage_index=storage_indexes(), + sharenum=sharenums(), + size=sizes(), + clock=clocks(), + leases=lists(lease_renew_secrets(), unique=True, min_size=1), + version=share_versions(), + ) + def test_stat_shares_immutable_wrong_version(self, storage_index, sharenum, size, clock, leases, version): + """ + If a share file with an unexpected version is found, ``stat_shares`` + declines to offer a result (by raising ``ValueError``). + """ + assume(version != 1) + + # Hypothesis causes our storage server to be used many times. Clean + # up between iterations. + cleanup_storage_server(self.anonymous_storage_server) + + sharedir = FilePath(self.anonymous_storage_server.sharedir).preauthChild( + # storage_index_to_dir likes to return multiple segments + # joined by pathsep + storage_index_to_dir(storage_index), + ) + sharepath = sharedir.child(u"{}".format(sharenum)) + sharepath.parent().makedirs() + whitebox_write_sparse_share( + sharepath, + version=version, + size=size, + leases=leases, + now=clock.seconds(), + ) + + self.assertThat( + self.client.stat_shares([storage_index]), + failed( + AfterPreprocessing( + lambda f: f.value, + IsInstance(ValueError), + ), + ), + ) + + @given( + storage_index=storage_indexes(), + sharenum=sharenums(), + size=sizes(), + clock=clocks(), + version=share_versions(), + # Encode our knowledge of the share header format and size right here... + position=integers(min_value=0, max_value=11), + ) + def test_stat_shares_truncated_file(self, storage_index, sharenum, size, clock, version, position): + """ + If a share file is truncated in the middle of the header, + ``stat_shares`` declines to offer a result (by raising + ``ValueError``). + """ + # Hypothesis causes our storage server to be used many times. Clean + # up between iterations. + cleanup_storage_server(self.anonymous_storage_server) + + sharedir = FilePath(self.anonymous_storage_server.sharedir).preauthChild( + # storage_index_to_dir likes to return multiple segments + # joined by pathsep + storage_index_to_dir(storage_index), + ) + sharepath = sharedir.child(u"{}".format(sharenum)) + sharepath.parent().makedirs() + whitebox_write_sparse_share( + sharepath, + version=version, + size=size, + # We know leases are at the end, where they'll get chopped off, so + # we don't bother to write any. + leases=[], + now=clock.seconds(), + ) + with sharepath.open("wb") as fobj: + fobj.truncate(position) + + self.assertThat( + self.client.stat_shares([storage_index]), + failed( + AfterPreprocessing( + lambda f: f.value, + IsInstance(ValueError), + ), + ), + ) + + + @skipIf(platform.isWindows(), "Creating large files on Windows (no sparse files) is too slow") + @given( + storage_index=storage_indexes(), + sharenum=sharenums(), + size=sizes(min_value=2 ** 18, max_value=2 ** 40), + clock=clocks(), + leases=lists(lease_renew_secrets(), unique=True, min_size=1), + ) + def test_stat_shares_immutable_large(self, storage_index, sharenum, size, clock, leases): + """ + Size and lease information about very large immutable shares can be + retrieved from a storage server. + + This is more of a whitebox test. It assumes knowledge of Tahoe-LAFS + share placement and layout. This is necessary to avoid having to + write real multi-gigabyte files to exercise the behavior. + """ + def write_shares(storage_server, storage_index, sharenums, size, canary): + sharedir = FilePath(storage_server.sharedir).preauthChild( + # storage_index_to_dir likes to return multiple segments + # joined by pathsep + storage_index_to_dir(storage_index), + ) + for sharenum in sharenums: + sharepath = sharedir.child(u"{}".format(sharenum)) + sharepath.parent().makedirs() + whitebox_write_sparse_share( + sharepath, + version=1, + size=size, + leases=leases, + now=clock.seconds(), + ) + + return self._stat_shares_immutable_test( + storage_index, + sharenum, + size, + clock, + leases, + write_shares, + ) @skipIf(platform.isWindows(), "Storage server miscomputes slot size on Windows") @given( @@ -454,8 +618,6 @@ class ShareTests(TestCase): stats = extract_result( self.client.stat_shares([storage_index]), ) - # Hard-coded in Tahoe-LAFS - LEASE_INTERVAL = 60 * 60 * 24 * 31 expected = [{ sharenum: ShareStat( size=get_implied_data_length(