From b3db93f7fc8f2a2ea908d4e81936e3ca91146a7d Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone <exarkun@twistedmatrix.com> Date: Fri, 1 May 2020 16:16:02 -0400 Subject: [PATCH] Stop trusting the immutable share file size field Compute it ourselves so it is correct for large files --- src/_zkapauthorizer/_storage_server.py | 45 ++++-- src/_zkapauthorizer/tests/strategies.py | 16 ++- .../tests/test_storage_protocol.py | 135 +++++++++++++++--- 3 files changed, 159 insertions(+), 37 deletions(-) diff --git a/src/_zkapauthorizer/_storage_server.py b/src/_zkapauthorizer/_storage_server.py index a2d4b9f..4e0451e 100644 --- a/src/_zkapauthorizer/_storage_server.py +++ b/src/_zkapauthorizer/_storage_server.py @@ -42,6 +42,7 @@ from os.path import ( ) from os import ( listdir, + stat, ) from datetime import ( timedelta, @@ -517,23 +518,49 @@ def get_storage_index_share_size(sharepath): :return int: The data size of the share in bytes. """ - # Note Tahoe-LAFS immutable/layout.py makes some claims about how the - # share data is structured. A lot of this seems to be wrong. - # storage/immutable.py appears to have the correct information. - fmt = ">LL" + # From src/allmydata/storage/immutable.py + # + # The share file has the following layout: + # 0x00: share file version number, four bytes, current version is 1 + # 0x04: share data length, four bytes big-endian = A # See Footnote 1 below. + # 0x08: number of leases, four bytes big-endian + # 0x0c: beginning of share data (see immutable.layout.WriteBucketProxy) + # A+0x0c = B: first lease. Lease format is: + # B+0x00: owner number, 4 bytes big-endian, 0 is reserved for no-owner + # B+0x04: renew secret, 32 bytes (SHA256) + # B+0x24: cancel secret, 32 bytes (SHA256) + # B+0x44: expiration time, 4 bytes big-endian seconds-since-epoch + # B+0x48: next lease, or end of record + # + # Footnote 1: as of Tahoe v1.3.0 this field is not used by storage + # servers, but it is still filled in by storage servers in case the + # storage server software gets downgraded from >= Tahoe v1.3.0 to < Tahoe + # v1.3.0, or the share file is moved from one storage server to + # another. The value stored in this field is truncated, so if the actual + # share data length is >= 2**32, then the value stored in this field will + # be the actual share data length modulo 2**32. + + share_file_size = stat(sharepath).st_size + header_format = ">LLL" with open(sharepath, "rb") as share_file: - header = share_file.read(calcsize(fmt)) + header = share_file.read(calcsize(header_format)) - if len(header) != calcsize(fmt): + if len(header) != calcsize(header_format): raise ValueError( "Tried to read {} bytes of share file header, got {!r} instead.".format( - calcsize(fmt), + calcsize(header_format), header, ), ) - version, share_data_length = unpack(fmt, header) - return share_data_length + version, _, number_of_leases = unpack(header_format, header) + + if version != 1: + raise ValueError( + "Cannot interpret version {} share file.".format(version), + ) + + return share_file_size - 0x0c - (number_of_leases * (4 + 32 + 32 + 4)) def get_lease_expiration(get_leases, storage_index_or_slot): diff --git a/src/_zkapauthorizer/tests/strategies.py b/src/_zkapauthorizer/tests/strategies.py index 7776ff7..533b649 100644 --- a/src/_zkapauthorizer/tests/strategies.py +++ b/src/_zkapauthorizer/tests/strategies.py @@ -532,17 +532,19 @@ def sharenum_sets(): ) -def sizes(): +def sizes( + # Size 0 data isn't data, it's nothing. + min_value=1, + # Let this be larger than a single segment (2 ** 17) in case that matters + # to Tahoe-LAFS storage at all. I don't think it does, though. + max_value=2 ** 18, +): """ Build Tahoe-LAFS share sizes. """ return integers( - # Size 0 data isn't data, it's nothing. - min_value=1, - # Let this be larger than a single segment (2 ** 17) in case that - # matters to Tahoe-LAFS storage at all. I don't think it does, - # though. - max_value=2 ** 18, + min_value=min_value, + max_value=max_value, ) diff --git a/src/_zkapauthorizer/tests/test_storage_protocol.py b/src/_zkapauthorizer/tests/test_storage_protocol.py index 55a0fb9..c4d9d84 100644 --- a/src/_zkapauthorizer/tests/test_storage_protocol.py +++ b/src/_zkapauthorizer/tests/test_storage_protocol.py @@ -20,6 +20,13 @@ from __future__ import ( absolute_import, ) +from os import ( + SEEK_CUR, +) +from struct import ( + pack, +) + from fixtures import ( MonkeyPatch, ) @@ -70,6 +77,10 @@ from challenge_bypass_ristretto import ( random_signing_key, ) +from allmydata.storage.common import ( + storage_index_to_dir, +) + from .common import ( skipIf, ) @@ -119,6 +130,8 @@ from ..foolscap import ( ShareStat, ) +# Hard-coded in Tahoe-LAFS +LEASE_INTERVAL = 60 * 60 * 24 * 31 class RequiredPassesTests(TestCase): """ @@ -353,20 +366,7 @@ class ShareTests(TestCase): Equals(int(now + self.server.LEASE_PERIOD.total_seconds())), ) - @given( - storage_index=storage_indexes(), - renew_secret=lease_renew_secrets(), - cancel_secret=lease_cancel_secrets(), - sharenum=sharenums(), - size=sizes(), - clock=clocks(), - leases=lists(lease_renew_secrets(), unique=True), - ) - def test_stat_shares_immutable(self, storage_index, renew_secret, cancel_secret, sharenum, size, clock, leases): - """ - Size and lease information about immutable shares can be retrieved from a - storage server. - """ + def _stat_shares_immutable_test(self, storage_index, sharenum, size, clock, leases, write_shares): # Hypothesis causes our storage server to be used many times. Clean # up between iterations. cleanup_storage_server(self.anonymous_storage_server) @@ -377,11 +377,9 @@ class ShareTests(TestCase): try: patch.setUp() # Create a share we can toy with. - write_toy_shares( + write_shares( self.anonymous_storage_server, storage_index, - renew_secret, - cancel_secret, {sharenum}, size, canary=self.canary, @@ -400,8 +398,6 @@ class ShareTests(TestCase): stats = extract_result( self.client.stat_shares([storage_index]), ) - # Hard-coded in Tahoe-LAFS - LEASE_INTERVAL = 60 * 60 * 24 * 31 expected = [{ sharenum: ShareStat( size=size, @@ -413,6 +409,105 @@ class ShareTests(TestCase): Equals(expected), ) + @given( + storage_index=storage_indexes(), + renew_secret=lease_renew_secrets(), + cancel_secret=lease_cancel_secrets(), + sharenum=sharenums(), + size=sizes(), + clock=clocks(), + leases=lists(lease_renew_secrets(), unique=True), + ) + def test_stat_shares_immutable(self, storage_index, renew_secret, cancel_secret, sharenum, size, clock, leases): + """ + Size and lease information about immutable shares can be retrieved from a + storage server. + """ + return self._stat_shares_immutable_test( + storage_index, + sharenum, + size, + clock, + leases, + lambda storage_server, storage_index, sharenums, size, canary: write_toy_shares( + storage_server, + storage_index, + renew_secret, + cancel_secret, + sharenums, + size, + canary, + ), + ) + + @given( + storage_index=storage_indexes(), + sharenum=sharenums(), + size=sizes(min_value=2 ** 18, max_value=2 ** 40), + clock=clocks(), + leases=lists(lease_renew_secrets(), unique=True, min_size=1), + ) + def test_stat_shares_immutable_large(self, storage_index, sharenum, size, clock, leases): + """ + Size and lease information about very large immutable shares can be + retrieved from a storage server. + + This is more of a whitebox test. It assumes knowledge of Tahoe-LAFS + share placement and layout. This is necessary to avoid having to + write real multi-gigabyte files to exercise the behavior. + """ + header_format = ">LLL" + lease_format = ">L32s32sL" + def write_shares(storage_server, storage_index, sharenums, size, canary): + sharedir = FilePath(storage_server.sharedir).preauthChild( + # storage_index_to_dir likes to return multiple segments + # joined by pathsep + storage_index_to_dir(storage_index), + ) + for sharenum in sharenums: + sharepath = sharedir.child(u"{}".format(sharenum)) + sharepath.parent().makedirs() + with sharepath.open("wb") as share: + share.write( + pack( + header_format, + # Version + 1, + # Maybe-saturated size (what at least one + # Tahoe-LAFS comment claims is appropriate for + # large files) + min(size, 2 ** 32 - 1), + len(leases), + ), + ) + # Try to make it sparse by skipping all the data. + share.seek(size - 1, SEEK_CUR), + share.write(b"\0") + share.write( + b"".join( + pack( + lease_format, + # no owner + 0, + renew, + # no cancel secret + b"", + # expiration timestamp + int(clock.seconds() + LEASE_INTERVAL), + ) + for renew + in leases + ), + ) + + return self._stat_shares_immutable_test( + storage_index, + sharenum, + size, + clock, + leases, + write_shares, + ) @skipIf(platform.isWindows(), "Storage server miscomputes slot size on Windows") @given( @@ -463,8 +558,6 @@ class ShareTests(TestCase): stats = extract_result( self.client.stat_shares([storage_index]), ) - # Hard-coded in Tahoe-LAFS - LEASE_INTERVAL = 60 * 60 * 24 * 31 expected = [{ sharenum: ShareStat( size=get_implied_data_length( -- GitLab