summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/backupstore/BackupStoreFile.cpp350
-rw-r--r--lib/backupstore/BackupStoreFile.h60
-rw-r--r--test/backupstore/testbackupstore.cpp36
3 files changed, 445 insertions, 1 deletions
diff --git a/lib/backupstore/BackupStoreFile.cpp b/lib/backupstore/BackupStoreFile.cpp
index ec414253..2ddb82e1 100644
--- a/lib/backupstore/BackupStoreFile.cpp
+++ b/lib/backupstore/BackupStoreFile.cpp
@@ -102,7 +102,15 @@ std::auto_ptr<BackupStoreFileEncodeStream> BackupStoreFile::EncodeFile(
// requirements. Doesn't verify that the data is intact
// and can be decoded. Optionally returns the ID of the
// file which it is diffed from, and the (original)
-// container ID.
+// container ID. This is more efficient than
+// BackupStoreFile::VerifyStream() when the file data
+// already exists on disk and we can Seek() around in
+// it, but less efficient if we are reading the stream
+// from the network and not intending to Write() it to
+// a file first, so we need both unfortunately.
+// TODO FIXME: use a modified VerifyStream() which
+// repositions the file pointer and Close()s early to
+// deduplicate this code.
// Created: 2003/08/28
//
// --------------------------------------------------------------------------
@@ -266,6 +274,346 @@ bool BackupStoreFile::VerifyEncodedFileFormat(IOStream &rFile, int64_t *pDiffFro
return true;
}
+
+// --------------------------------------------------------------------------
+//
+// Function
+// Name: BackupStoreFile::VerifyStream::Write()
+// Purpose: Handles writes to the verifying stream. If the write
+// completes the current unit, then verify it, copy it
+// to mpCopyToStream if not NULL, and move on to the
+// next unit, otherwise throw an exception.
+// Created: 2015/08/07
+//
+// --------------------------------------------------------------------------
+
+void BackupStoreFile::VerifyStream::Write(const void *pBuffer, int NBytes, int Timeout)
+{
+ // Check that we haven't already written too much to the current unit
+ size_t BytesToAdd;
+ if(mState == State_Blocks)
+ {
+ // We don't know how many bytes to expect
+ ASSERT(mCurrentUnitSize == 0);
+ BytesToAdd = NBytes;
+ }
+ else
+ {
+ ASSERT(mCurrentUnitData.GetSize() < mCurrentUnitSize);
+ size_t BytesLeftInCurrentUnit = mCurrentUnitSize -
+ mCurrentUnitData.GetSize();
+ // Add however many bytes are needed/available to the current unit's buffer.
+ BytesToAdd = std::min(BytesLeftInCurrentUnit, (size_t)NBytes);
+ }
+
+ // We must make progress here, or we could have infinite recursion.
+ ASSERT(BytesToAdd > 0);
+
+ CollectInBufferStream* pCurrentBuffer = (mCurrentBufferIsAlternate ?
+ &mAlternateData : &mCurrentUnitData);
+ pCurrentBuffer->Write(pBuffer, BytesToAdd, Timeout);
+ if(mpCopyToStream)
+ {
+ mpCopyToStream->Write(pBuffer, BytesToAdd, Timeout);
+ }
+
+ pBuffer = (uint8_t *)pBuffer + BytesToAdd;
+ NBytes -= BytesToAdd;
+ mCurrentPosition += BytesToAdd;
+
+ if(mState == State_Blocks)
+ {
+ // The block index follows the blocks themselves, but without seeing the
+ // index we don't know how big the blocks are. So we just have to keep
+ // reading, holding the last mBlockIndexSize bytes in two buffers, until
+ // we reach the end of the file (when Close() is called) when we can look
+ // back over those buffers and extract the block index from them.
+ if(pCurrentBuffer->GetSize() >= mBlockIndexSize)
+ {
+ // Time to swap buffers, and clear the one we're about to
+ // overwrite.
+ mCurrentBufferIsAlternate = !mCurrentBufferIsAlternate;
+ pCurrentBuffer = (mCurrentBufferIsAlternate ?
+ &mAlternateData : &mCurrentUnitData);
+ pCurrentBuffer->Reset();
+ }
+
+ // We don't want to move data into the finished buffer while we're in this
+ // state, and we don't need to call ourselves recursively, so just return.
+ return;
+ }
+
+ ASSERT(mState != State_Blocks);
+
+ // If the current unit is not complete, just return now.
+ if(mCurrentUnitData.GetSize() < mCurrentUnitSize)
+ {
+ return;
+ }
+
+ ASSERT(mCurrentUnitData.GetSize() == mCurrentUnitSize);
+ mCurrentUnitData.SetForReading();
+ CollectInBufferStream finished(mCurrentUnitData);
+
+ // This should leave mCurrentUnitData empty in write phase
+ ASSERT(!mCurrentUnitData.StreamClosed());
+ ASSERT(mCurrentUnitData.GetSize() == 0);
+
+ // Advance automatically to next state (to reduce code duplication) if the current
+ // state is anything but State_Blocks. We remain in that state for more than one
+ // read (processing one block at a time), and exit it manually.
+ int oldState = mState;
+ if(mState != State_Blocks)
+ {
+ mState++;
+ }
+
+ // Process and complete the current unit, depending what it is.
+ if(oldState == State_Header)
+ {
+ // Get the header...
+ file_StreamFormat hdr;
+ ASSERT(finished.GetSize() == sizeof(hdr));
+ memcpy(&hdr, finished.GetBuffer(), sizeof(hdr));
+
+ // Check magic number
+ if(ntohl(hdr.mMagicValue) != OBJECTMAGIC_FILE_MAGIC_VALUE_V1
+#ifndef BOX_DISABLE_BACKWARDS_COMPATIBILITY_BACKUPSTOREFILE
+ && ntohl(hdr.mMagicValue) != OBJECTMAGIC_FILE_MAGIC_VALUE_V0
+#endif
+ )
+ {
+ THROW_EXCEPTION_MESSAGE(BackupStoreException, BadBackupStoreFile,
+ "Invalid header magic in stream: expected " <<
+ BOX_FORMAT_HEX32(OBJECTMAGIC_FILE_MAGIC_VALUE_V1) <<
+ " or " <<
+ BOX_FORMAT_HEX32(OBJECTMAGIC_FILE_MAGIC_VALUE_V0) <<
+ " but found " <<
+ BOX_FORMAT_HEX32(ntohl(hdr.mMagicValue)));
+ }
+
+ mNumBlocks = box_ntoh64(hdr.mNumBlocks);
+ mBlockIndexSize = (mNumBlocks * sizeof(file_BlockIndexEntry)) +
+ sizeof(file_BlockIndexHeader);
+ mContainerID = box_ntoh64(hdr.mContainerID);
+
+ ASSERT(mState == State_FilenameHeader);
+ mCurrentUnitSize = 2;
+ }
+ else if(oldState == State_FilenameHeader)
+ {
+ // Check that the encoding is an accepted value.
+ unsigned int encoding =
+ BACKUPSTOREFILENAME_GET_ENCODING(
+ (uint8_t *)finished.GetBuffer());
+ if(encoding < BackupStoreFilename::Encoding_Min ||
+ encoding > BackupStoreFilename::Encoding_Max)
+ {
+ THROW_EXCEPTION_MESSAGE(BackupStoreException, BadBackupStoreFile,
+ "Invalid encoding in filename: " << encoding);
+ }
+
+ ASSERT(mState == State_Filename);
+ mCurrentUnitSize = BACKUPSTOREFILENAME_GET_SIZE(
+ (uint8_t *)finished.GetBuffer());
+ // Copy the first two bytes back into the new buffer, to be used by the
+ // completed filename.
+ finished.CopyStreamTo(mCurrentUnitData);
+ }
+ else if(oldState == State_Filename)
+ {
+ BackupStoreFilename fn;
+ fn.ReadFromStream(finished, IOStream::TimeOutInfinite);
+ ASSERT(mState == State_AttributesSize);
+ mCurrentUnitSize = sizeof(int32_t);
+ }
+ else if(oldState == State_AttributesSize)
+ {
+ ASSERT(mState == State_Attributes);
+ mCurrentUnitSize = ntohl(*(int32_t *)finished.GetBuffer());
+ }
+ else if(oldState == State_Attributes)
+ {
+ // Skip the attributes -- because they're encrypted, the server can't tell
+ // whether they're OK or not.
+ ASSERT(mState == State_Blocks);
+ mBlockDataPosition = mCurrentPosition;
+ mCurrentUnitSize = 0;
+ }
+ else if(oldState == State_Blocks)
+ {
+ // The block index follows the blocks themselves, but without seeing the
+ // index we don't know how big the blocks are. So we just have to keep
+ // reading, holding the last mBlockIndexSize bytes in two buffers, until
+ // we reach the end of the file (when Close() is called) when we can look
+ // back over those buffers and extract the block index from them.
+ ASSERT(mState == State_Blocks);
+ if(pCurrentBuffer->GetSize() >= mBlockIndexSize)
+ {
+ // Time to swap buffers, and clear the one we're about to
+ // overwrite.
+ mCurrentBufferIsAlternate = !mCurrentBufferIsAlternate;
+ pCurrentBuffer = (mCurrentBufferIsAlternate ?
+ &mAlternateData : &mCurrentUnitData);
+ pCurrentBuffer->Reset();
+ }
+ }
+
+ if(NBytes > 0)
+ {
+ // Still some data to process, so call recursively to deal with it.
+ Write(pBuffer, NBytes, Timeout);
+ }
+}
+
+// --------------------------------------------------------------------------
+//
+// Function
+// Name: BackupStoreFile::VerifyStream::Write()
+// Purpose: Handles closing the verifying stream, which tells us
+// that there is no more incoming data, at which point
+// we can extract the block index from the data most
+// recently read, and verify it.
+// Created: 2015/08/07
+//
+// --------------------------------------------------------------------------
+
+
+void BackupStoreFile::VerifyStream::Close()
+{
+ if(mpCopyToStream)
+ {
+ mpCopyToStream->Close();
+ }
+
+ if(mState != State_Blocks)
+ {
+ THROW_EXCEPTION_MESSAGE(BackupStoreException, BadBackupStoreFile,
+ "Stream closed too early to be a valid BackupStoreFile: was in "
+ "state " << mState << " instead of " << State_Blocks);
+ }
+
+ // Find the last mBlockIndexSize bytes.
+ CollectInBufferStream finished;
+
+ CollectInBufferStream* pCurrentBuffer = mCurrentBufferIsAlternate ?
+ &mAlternateData : &mCurrentUnitData;
+ CollectInBufferStream* pPreviousBuffer = mCurrentBufferIsAlternate ?
+ &mCurrentUnitData : &mAlternateData;
+
+ int64_t BytesFromCurrentBuffer = std::min(mBlockIndexSize,
+ (int64_t)pCurrentBuffer->GetSize());
+ int64_t BytesFromPreviousBuffer = mBlockIndexSize - BytesFromCurrentBuffer;
+
+ if(pPreviousBuffer->GetSize() < BytesFromPreviousBuffer)
+ {
+ THROW_EXCEPTION_MESSAGE(BackupStoreException, BadBackupStoreFile,
+ "Not enough bytes for block index: expected " <<
+ mBlockIndexSize << " but had collected " <<
+ (pPreviousBuffer->GetSize() + pCurrentBuffer->GetSize()) <<
+ " at " << mCurrentPosition << " bytes into file");
+ }
+
+ size_t PreviousBufferOffset = pPreviousBuffer->GetSize() -
+ BytesFromPreviousBuffer;
+ size_t CurrentBufferOffset = pCurrentBuffer->GetSize() -
+ BytesFromCurrentBuffer;
+
+ file_BlockIndexHeader blkhdr;
+ finished.Write((uint8_t *)pPreviousBuffer->GetBuffer() + PreviousBufferOffset,
+ BytesFromPreviousBuffer);
+ finished.Write((uint8_t *)pCurrentBuffer->GetBuffer() + CurrentBufferOffset,
+ BytesFromCurrentBuffer);
+ ASSERT(finished.GetSize() == mBlockIndexSize);
+
+ // Load the block index header
+ memcpy(&blkhdr, finished.GetBuffer(), sizeof(blkhdr));
+
+ if(ntohl(blkhdr.mMagicValue) != OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1
+#ifndef BOX_DISABLE_BACKWARDS_COMPATIBILITY_BACKUPSTOREFILE
+ && ntohl(blkhdr.mMagicValue) != OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V0
+#endif
+ )
+ {
+ THROW_EXCEPTION_MESSAGE(BackupStoreException, BadBackupStoreFile,
+ "Invalid block index magic in stream: expected " <<
+ BOX_FORMAT_HEX32(OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1) <<
+ " or " <<
+ BOX_FORMAT_HEX32(OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V0) <<
+ " but found " <<
+ BOX_FORMAT_HEX32(ntohl(blkhdr.mMagicValue)));
+ }
+
+ if((int64_t)box_ntoh64(blkhdr.mNumBlocks) != mNumBlocks)
+ {
+ THROW_EXCEPTION_MESSAGE(BackupStoreException, BadBackupStoreFile,
+ "Invalid block index size in stream: expected " <<
+ BOX_FORMAT_OBJECTID(mNumBlocks) <<
+ " but found " <<
+ BOX_FORMAT_OBJECTID(box_ntoh64(blkhdr.mNumBlocks)));
+ }
+
+ // Flag for recording whether a block is referenced from another file
+ mBlockFromOtherFileReferenced = false;
+ int64_t blockIndexLoc = mCurrentPosition - mBlockIndexSize;
+
+ // Read the index, checking that the length values all make sense
+ int64_t currentBlockStart = mBlockDataPosition;
+ file_BlockIndexEntry* pBlk = (file_BlockIndexEntry *)
+ ((uint8_t *)finished.GetBuffer() + sizeof(blkhdr));
+ for(int64_t b = 0; b < mNumBlocks; ++b)
+ {
+ // Check size and location
+ int64_t blkSize = box_ntoh64(pBlk[b].mEncodedSize);
+ if(blkSize <= 0)
+ {
+ // Mark that this file references another file
+ mBlockFromOtherFileReferenced = true;
+ }
+ else
+ {
+ // This block is actually in this file
+ if((currentBlockStart + blkSize) > blockIndexLoc)
+ {
+ // Encoded size makes the block run over the index
+ THROW_EXCEPTION_MESSAGE(BackupStoreException, BadBackupStoreFile,
+ "Invalid block index: entry " << b << " makes "
+ "total size of block data exceed the available "
+ "space");
+ }
+
+ // Move the current block start to the end of this block
+ currentBlockStart += blkSize;
+ }
+ }
+
+ // Check that there's no empty space
+ if(currentBlockStart != blockIndexLoc)
+ {
+ THROW_EXCEPTION_MESSAGE(BackupStoreException, BadBackupStoreFile,
+ "Invalid block index: total size of blocks does not match the "
+ "actual amount of block data in the stream: expected " <<
+ (blockIndexLoc - mBlockDataPosition) << " but found " <<
+ (currentBlockStart - mBlockDataPosition));
+ }
+
+ // Check that if another file is referenced, then the ID is there, and if one
+ // isn't then there is no ID.
+ int64_t otherID = box_ntoh64(blkhdr.mOtherFileID);
+ if((otherID != 0 && mBlockFromOtherFileReferenced == false)
+ || (otherID == 0 && mBlockFromOtherFileReferenced == true))
+ {
+ // Doesn't look good!
+ THROW_EXCEPTION_MESSAGE(BackupStoreException, BadBackupStoreFile,
+ "Invalid block index header: actual dependency status does not "
+ "match the file header: expected to depend on file object " <<
+ BOX_FORMAT_OBJECTID(otherID));
+ }
+
+ mDiffFromObjectID = otherID;
+}
+
+
// --------------------------------------------------------------------------
//
// Function
diff --git a/lib/backupstore/BackupStoreFile.h b/lib/backupstore/BackupStoreFile.h
index 42e527e3..3b874e0e 100644
--- a/lib/backupstore/BackupStoreFile.h
+++ b/lib/backupstore/BackupStoreFile.h
@@ -16,7 +16,9 @@
#include "autogen_BackupProtocol.h"
#include "BackupClientFileAttributes.h"
+#include "BackupStoreFileWire.h"
#include "BackupStoreFilename.h"
+#include "CollectInBufferStream.h"
#include "IOStream.h"
#include "ReadLoggingStream.h"
@@ -122,6 +124,64 @@ public:
#endif
};
+ class VerifyStream : public IOStream
+ {
+ private:
+ enum
+ {
+ State_Header = 0,
+ State_FilenameHeader,
+ State_Filename,
+ State_AttributesSize,
+ State_Attributes,
+ State_Blocks,
+ };
+
+ int mState;
+ IOStream* mpCopyToStream;
+ CollectInBufferStream mCurrentUnitData;
+ size_t mCurrentUnitSize;
+ int64_t mNumBlocks;
+ int64_t mBlockIndexSize;
+ int64_t mCurrentPosition;
+ int64_t mBlockDataPosition;
+ bool mCurrentBufferIsAlternate;
+ CollectInBufferStream mAlternateData;
+ bool mBlockFromOtherFileReferenced;
+ int64_t mContainerID;
+ int64_t mDiffFromObjectID;
+
+ public:
+ VerifyStream(IOStream* pCopyToStream = NULL)
+ : mState(State_Header),
+ mpCopyToStream(pCopyToStream),
+ mCurrentUnitSize(sizeof(file_StreamFormat)),
+ mNumBlocks(0),
+ mBlockIndexSize(0),
+ mCurrentPosition(0),
+ mBlockDataPosition(0),
+ mCurrentBufferIsAlternate(false),
+ mBlockFromOtherFileReferenced(false),
+ mContainerID(0),
+ mDiffFromObjectID(0)
+ { }
+ virtual int Read(void *pBuffer, int NBytes,
+ int Timeout = IOStream::TimeOutInfinite)
+ {
+ THROW_EXCEPTION(CommonException, NotSupported);
+ }
+ virtual void Write(const void *pBuffer, int NBytes,
+ int Timeout = IOStream::TimeOutInfinite);
+ virtual void Close();
+ virtual bool StreamDataLeft()
+ {
+ THROW_EXCEPTION(CommonException, NotSupported);
+ }
+ virtual bool StreamClosed()
+ {
+ THROW_EXCEPTION(CommonException, NotSupported);
+ }
+ };
// Main interface
static std::auto_ptr<BackupStoreFileEncodeStream> EncodeFile
diff --git a/test/backupstore/testbackupstore.cpp b/test/backupstore/testbackupstore.cpp
index 3a219091..f30fd42a 100644
--- a/test/backupstore/testbackupstore.cpp
+++ b/test/backupstore/testbackupstore.cpp
@@ -2323,6 +2323,42 @@ bool test_encoding()
{
FileStream enc("testfiles/testenc1_enc");
TEST_THAT(BackupStoreFile::VerifyEncodedFileFormat(enc) == true);
+
+ // And using the stream-based interface, writing different
+ // block sizes at a time.
+ CollectInBufferStream contents;
+ enc.Seek(0, IOStream::SeekType_Absolute);
+ enc.CopyStreamTo(contents);
+ contents.SetForReading();
+
+ enc.Seek(0, IOStream::SeekType_End);
+ size_t file_size = enc.GetPosition();
+ TEST_EQUAL(file_size, contents.GetSize());
+
+ for(int buffer_size = 1; ; buffer_size <<= 1)
+ {
+ enc.Seek(0, IOStream::SeekType_Absolute);
+ CollectInBufferStream temp_copy;
+ BackupStoreFile::VerifyStream verifier(&temp_copy);
+ enc.CopyStreamTo(verifier, IOStream::TimeOutInfinite,
+ buffer_size);
+
+ // The block index is only validated on Close(), which
+ // CopyStreamTo() doesn't do.
+ verifier.Close();
+
+ temp_copy.SetForReading();
+ TEST_EQUAL(file_size, temp_copy.GetSize());
+ TEST_THAT(memcmp(contents.GetBuffer(),
+ temp_copy.GetBuffer(), file_size) == 0);
+
+ // Keep doubling buffer size until we've copied the
+ // entire encoded file in a single pass, then stop.
+ if(buffer_size > file_size)
+ {
+ break;
+ }
+ }
}
// Decode it