diff options
Diffstat (limited to 'lib/backupclient')
-rw-r--r-- | lib/backupclient/BackupStoreFileCmbDiff.cpp | 326 | ||||
-rw-r--r-- | lib/backupclient/BackupStoreFileCmbIdx.cpp | 324 | ||||
-rw-r--r-- | lib/backupclient/BackupStoreFileCombine.cpp | 410 | ||||
-rw-r--r-- | lib/backupclient/BackupStoreFileDiff.cpp | 1046 |
4 files changed, 0 insertions, 2106 deletions
diff --git a/lib/backupclient/BackupStoreFileCmbDiff.cpp b/lib/backupclient/BackupStoreFileCmbDiff.cpp deleted file mode 100644 index 1a88fa3f..00000000 --- a/lib/backupclient/BackupStoreFileCmbDiff.cpp +++ /dev/null @@ -1,326 +0,0 @@ -// -------------------------------------------------------------------------- -// -// File -// Name: BackupStoreFileCmbDiff.cpp -// Purpose: Combine two diffs together -// Created: 12/7/04 -// -// -------------------------------------------------------------------------- - -#include "Box.h" - -#include <new> -#include <stdlib.h> - -#include "BackupStoreFile.h" -#include "BackupStoreFileWire.h" -#include "BackupStoreObjectMagic.h" -#include "BackupStoreException.h" -#include "BackupStoreConstants.h" -#include "BackupStoreFilename.h" - -#include "MemLeakFindOn.h" - -// -------------------------------------------------------------------------- -// -// Function -// Name: BackupStoreFile::CombineDiffs(IOStream &, IOStream &, IOStream &rOut) -// Purpose: Given two diffs, combine them into a single diff, to produce a diff -// which, combined with the original file, creates the result of applying -// rDiff, then rDiff2. Two opens of rDiff2 are required -// Created: 12/7/04 -// -// -------------------------------------------------------------------------- -void BackupStoreFile::CombineDiffs(IOStream &rDiff1, IOStream &rDiff2, IOStream &rDiff2b, IOStream &rOut) -{ - // Skip header of first diff, record where the data starts, and skip to the index - int64_t diff1DataStarts = 0; - { - // Read the header for the From file - file_StreamFormat diff1Hdr; - if(!rDiff1.ReadFullBuffer(&diff1Hdr, sizeof(diff1Hdr), 0)) - { - THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine) - } - if(ntohl(diff1Hdr.mMagicValue) != OBJECTMAGIC_FILE_MAGIC_VALUE_V1) - { - THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile) - } - // Skip over the filename and attributes of the From file - // BLOCK - { - BackupStoreFilename filename2; - filename2.ReadFromStream(rDiff1, IOStream::TimeOutInfinite); - int32_t size_s; - if(!rDiff1.ReadFullBuffer(&size_s, sizeof(size_s), 0 /* not interested in bytes read if this fails */)) - { - THROW_EXCEPTION(CommonException, StreamableMemBlockIncompleteRead) - } - int size = ntohl(size_s); - // Skip forward the size - rDiff1.Seek(size, IOStream::SeekType_Relative); - } - // Record position - diff1DataStarts = rDiff1.GetPosition(); - // Skip to index - rDiff1.Seek(0 - (((box_ntoh64(diff1Hdr.mNumBlocks)) * sizeof(file_BlockIndexEntry)) + sizeof(file_BlockIndexHeader)), IOStream::SeekType_End); - } - - // Read the index of the first diff - // Header first - file_BlockIndexHeader diff1IdxHdr; - if(!rDiff1.ReadFullBuffer(&diff1IdxHdr, sizeof(diff1IdxHdr), 0)) - { - THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream) - } - if(ntohl(diff1IdxHdr.mMagicValue) != OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1) - { - THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile) - } - int64_t diff1NumBlocks = box_ntoh64(diff1IdxHdr.mNumBlocks); - // Allocate some memory - int64_t *diff1BlockStartPositions = (int64_t*)::malloc((diff1NumBlocks + 1) * sizeof(int64_t)); - if(diff1BlockStartPositions == 0) - { - throw std::bad_alloc(); - } - - // Buffer data - void *buffer = 0; - int bufferSize = 0; - - try - { - // Then the entries: - // For each entry, want to know if it's in the file, and if so, how big it is. - // We'll store this as an array of file positions in the file, with an additioal - // entry on the end so that we can work out the length of the last block. - // If an entry isn't in the file, then store 0 - (position in other file). - int64_t diff1Position = diff1DataStarts; - for(int64_t b = 0; b < diff1NumBlocks; ++b) - { - file_BlockIndexEntry e; - if(!rDiff1.ReadFullBuffer(&e, sizeof(e), 0)) - { - THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream) - } - - // Where's the block? - int64_t blockEn = box_ntoh64(e.mEncodedSize); - if(blockEn <= 0) - { - // Just store the negated block number - diff1BlockStartPositions[b] = blockEn; - } - else - { - // Block is present in this file - diff1BlockStartPositions[b] = diff1Position; - diff1Position += blockEn; - } - } - - // Finish off the list, so the last entry can have it's size calcuated. - diff1BlockStartPositions[diff1NumBlocks] = diff1Position; - - // Now read the second diff's header, copying it to the out file - file_StreamFormat diff2Hdr; - if(!rDiff2.ReadFullBuffer(&diff2Hdr, sizeof(diff2Hdr), 0)) - { - THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine) - } - if(ntohl(diff2Hdr.mMagicValue) != OBJECTMAGIC_FILE_MAGIC_VALUE_V1) - { - THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile) - } - // Copy - rOut.Write(&diff2Hdr, sizeof(diff2Hdr)); - // Copy over filename and attributes - // BLOCK - { - BackupStoreFilename filename; - filename.ReadFromStream(rDiff2, IOStream::TimeOutInfinite); - filename.WriteToStream(rOut); - StreamableMemBlock attr; - attr.ReadFromStream(rDiff2, IOStream::TimeOutInfinite); - attr.WriteToStream(rOut); - } - - // Get to the index of rDiff2b, and read the header - MoveStreamPositionToBlockIndex(rDiff2b); - file_BlockIndexHeader diff2IdxHdr; - if(!rDiff2b.ReadFullBuffer(&diff2IdxHdr, sizeof(diff2IdxHdr), 0)) - { - THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream) - } - if(ntohl(diff2IdxHdr.mMagicValue) != OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1) - { - THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile) - } - int64_t diff2NumBlocks = box_ntoh64(diff2IdxHdr.mNumBlocks); - int64_t diff2IndexEntriesStart = rDiff2b.GetPosition(); - - // Then read all the entries - int64_t diff2FilePosition = rDiff2.GetPosition(); - for(int64_t b = 0; b < diff2NumBlocks; ++b) - { - file_BlockIndexEntry e; - if(!rDiff2b.ReadFullBuffer(&e, sizeof(e), 0)) - { - THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream) - } - - // What do to next about copying data - bool copyBlock = false; - int copySize = 0; - int64_t copyFrom = 0; - bool fromFileDiff1 = false; - - // Where's the block? - int64_t blockEn = box_ntoh64(e.mEncodedSize); - if(blockEn > 0) - { - // Block is present in this file -- copy to out - copyBlock = true; - copyFrom = diff2FilePosition; - copySize = (int)blockEn; - - // Move pointer onwards - diff2FilePosition += blockEn; - } - else - { - // Block isn't present here -- is it present in the old one? - int64_t blockIndex = 0 - blockEn; - if(blockIndex < 0 || blockIndex > diff1NumBlocks) - { - THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile) - } - if(diff1BlockStartPositions[blockIndex] > 0) - { - // Block is in the old diff file, copy it across - copyBlock = true; - copyFrom = diff1BlockStartPositions[blockIndex]; - int nb = blockIndex + 1; - while(diff1BlockStartPositions[nb] <= 0) - { - // This is safe, because the last entry will terminate it properly! - ++nb; - ASSERT(nb <= diff1NumBlocks); - } - copySize = diff1BlockStartPositions[nb] - copyFrom; - fromFileDiff1 = true; - } - } - //TRACE4("%d %d %lld %d\n", copyBlock, copySize, copyFrom, fromFileDiff1); - - // Copy data to the output file? - if(copyBlock) - { - // Allocate enough space - if(bufferSize < copySize || buffer == 0) - { - // Free old block - if(buffer != 0) - { - ::free(buffer); - buffer = 0; - bufferSize = 0; - } - // Allocate new block - buffer = ::malloc(copySize); - if(buffer == 0) - { - throw std::bad_alloc(); - } - bufferSize = copySize; - } - ASSERT(bufferSize >= copySize); - - // Load in the data - if(fromFileDiff1) - { - rDiff1.Seek(copyFrom, IOStream::SeekType_Absolute); - if(!rDiff1.ReadFullBuffer(buffer, copySize, 0)) - { - THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine) - } - } - else - { - rDiff2.Seek(copyFrom, IOStream::SeekType_Absolute); - if(!rDiff2.ReadFullBuffer(buffer, copySize, 0)) - { - THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine) - } - } - // Write out data - rOut.Write(buffer, copySize); - } - } - - // Write the modified header - diff2IdxHdr.mOtherFileID = diff1IdxHdr.mOtherFileID; - rOut.Write(&diff2IdxHdr, sizeof(diff2IdxHdr)); - - // Then we'll write out the index, reading the data again - rDiff2b.Seek(diff2IndexEntriesStart, IOStream::SeekType_Absolute); - for(int64_t b = 0; b < diff2NumBlocks; ++b) - { - file_BlockIndexEntry e; - if(!rDiff2b.ReadFullBuffer(&e, sizeof(e), 0)) - { - THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream) - } - - // Where's the block? - int64_t blockEn = box_ntoh64(e.mEncodedSize); - - // If it's not in this file, it needs modification... - if(blockEn <= 0) - { - int64_t blockIndex = 0 - blockEn; - // In another file. Need to translate this against the other diff - if(diff1BlockStartPositions[blockIndex] > 0) - { - // Block is in the first diff file, stick in size - int nb = blockIndex + 1; - while(diff1BlockStartPositions[nb] <= 0) - { - // This is safe, because the last entry will terminate it properly! - ++nb; - ASSERT(nb <= diff1NumBlocks); - } - int64_t size = diff1BlockStartPositions[nb] - diff1BlockStartPositions[blockIndex]; - e.mEncodedSize = box_hton64(size); - } - else - { - // Block in the original file, use translated value - e.mEncodedSize = box_hton64(diff1BlockStartPositions[blockIndex]); - } - } - - // Write entry - rOut.Write(&e, sizeof(e)); - } - } - catch(...) - { - // clean up - ::free(diff1BlockStartPositions); - if(buffer != 0) - { - ::free(buffer); - } - throw; - } - - // Clean up allocated memory - ::free(diff1BlockStartPositions); - if(buffer != 0) - { - ::free(buffer); - } -} - diff --git a/lib/backupclient/BackupStoreFileCmbIdx.cpp b/lib/backupclient/BackupStoreFileCmbIdx.cpp deleted file mode 100644 index c8bcc3b9..00000000 --- a/lib/backupclient/BackupStoreFileCmbIdx.cpp +++ /dev/null @@ -1,324 +0,0 @@ -// -------------------------------------------------------------------------- -// -// File -// Name: BackupStoreFileCmbIdx.cpp -// Purpose: Combine indicies of a delta file and the file it's a diff from. -// Created: 8/7/04 -// -// -------------------------------------------------------------------------- - -#include "Box.h" - -#include <new> -#include <string.h> - -#include "BackupStoreFile.h" -#include "BackupStoreFileWire.h" -#include "BackupStoreObjectMagic.h" -#include "BackupStoreException.h" -#include "BackupStoreConstants.h" -#include "BackupStoreFilename.h" - -#include "MemLeakFindOn.h" - -// Hide from outside world -namespace -{ - -class BSFCombinedIndexStream : public IOStream -{ -public: - BSFCombinedIndexStream(IOStream *pDiff); - ~BSFCombinedIndexStream(); - - virtual int Read(void *pBuffer, int NBytes, int Timeout = IOStream::TimeOutInfinite); - virtual void Write(const void *pBuffer, int NBytes); - virtual bool StreamDataLeft(); - virtual bool StreamClosed(); - virtual void Initialise(IOStream &rFrom); - -private: - IOStream *mpDiff; - bool mIsInitialised; - bool mHeaderWritten; - file_BlockIndexHeader mHeader; - int64_t mNumEntriesToGo; - int64_t mNumEntriesInFromFile; - int64_t *mFromBlockSizes; // NOTE: Entries in network byte order -}; - -}; - -// -------------------------------------------------------------------------- -// -// Function -// Name: BackupStoreFile::CombineFileIndices(IOStream &, IOStream &, bool) -// Purpose: Given a diff file and the file it's a diff from, return a stream from which -// can be read the index of the combined file, without actually combining them. -// The stream of the diff must have a lifetime greater than or equal to the -// lifetime of the returned stream object. The full "from" file stream -// only needs to exist during the actual function call. -// If you pass in dodgy files which aren't related, then you will either -// get an error or bad results. So don't do that. -// If DiffIsIndexOnly is true, then rDiff is assumed to be a stream positioned -// at the beginning of the block index. Similarly for FromIsIndexOnly. -// WARNING: Reads of the returned streams with buffer sizes less than 64 bytes -// will not return any data. -// Created: 8/7/04 -// -// -------------------------------------------------------------------------- -std::auto_ptr<IOStream> BackupStoreFile::CombineFileIndices(IOStream &rDiff, IOStream &rFrom, bool DiffIsIndexOnly, bool FromIsIndexOnly) -{ - // Reposition file pointers? - if(!DiffIsIndexOnly) - { - MoveStreamPositionToBlockIndex(rDiff); - } - if(!FromIsIndexOnly) - { - MoveStreamPositionToBlockIndex(rFrom); - } - - // Create object - std::auto_ptr<IOStream> stream(new BSFCombinedIndexStream(&rDiff)); - - // Initialise it - ((BSFCombinedIndexStream *)stream.get())->Initialise(rFrom); - - // And return the stream - return stream; -} - - -// -------------------------------------------------------------------------- -// -// Function -// Name: BSFCombinedIndexStream::BSFCombinedIndexStream() -// Purpose: Private class. Constructor. -// Created: 8/7/04 -// -// -------------------------------------------------------------------------- -BSFCombinedIndexStream::BSFCombinedIndexStream(IOStream *pDiff) - : mpDiff(pDiff), - mIsInitialised(false), - mHeaderWritten(false), - mNumEntriesToGo(0), - mNumEntriesInFromFile(0), - mFromBlockSizes(0) -{ - ASSERT(mpDiff != 0); -} - - -// -------------------------------------------------------------------------- -// -// Function -// Name: BSFCombinedIndexStream::~BSFCombinedIndexStream() -// Purpose: Private class. Destructor. -// Created: 8/7/04 -// -// -------------------------------------------------------------------------- -BSFCombinedIndexStream::~BSFCombinedIndexStream() -{ - if(mFromBlockSizes != 0) - { - ::free(mFromBlockSizes); - mFromBlockSizes = 0; - } -} - - -// -------------------------------------------------------------------------- -// -// Function -// Name: BSFCombinedIndexStream::Initialise(IOStream &) -// Purpose: Private class. Initalise from the streams (diff passed in constructor). -// Both streams must have file pointer positioned at the block index. -// Created: 8/7/04 -// -// -------------------------------------------------------------------------- -void BSFCombinedIndexStream::Initialise(IOStream &rFrom) -{ - // Paranoia is good. - if(mIsInitialised) - { - THROW_EXCEPTION(BackupStoreException, Internal) - } - - // Look at the diff file: Read in the header - if(!mpDiff->ReadFullBuffer(&mHeader, sizeof(mHeader), 0)) - { - THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream) - } - if(ntohl(mHeader.mMagicValue) != OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1) - { - THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile) - } - - // Read relevant data. - mNumEntriesToGo = box_ntoh64(mHeader.mNumBlocks); - - // Adjust a bit to reflect the fact it's no longer a diff - mHeader.mOtherFileID = box_hton64(0); - - // Now look at the from file: Read header - file_BlockIndexHeader fromHdr; - if(!rFrom.ReadFullBuffer(&fromHdr, sizeof(fromHdr), 0)) - { - THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream) - } - if(ntohl(fromHdr.mMagicValue) != OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1) - { - THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile) - } - - // Then... allocate memory for the list of sizes - mNumEntriesInFromFile = box_ntoh64(fromHdr.mNumBlocks); - mFromBlockSizes = (int64_t*)::malloc(mNumEntriesInFromFile * sizeof(int64_t)); - if(mFromBlockSizes == 0) - { - throw std::bad_alloc(); - } - - // And read them all in! - for(int64_t b = 0; b < mNumEntriesInFromFile; ++b) - { - file_BlockIndexEntry e; - if(!rFrom.ReadFullBuffer(&e, sizeof(e), 0)) - { - THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream) - } - - // Check that the from file isn't a delta in itself - if(box_ntoh64(e.mEncodedSize) <= 0) - { - THROW_EXCEPTION(BackupStoreException, OnCombineFromFileIsIncomplete) - } - - // Store size (in network byte order) - mFromBlockSizes[b] = e.mEncodedSize; - } - - // Flag as initialised - mIsInitialised = true; -} - - -// -------------------------------------------------------------------------- -// -// Function -// Name: BSFCombinedIndexStream::Read(void *, int, int) -// Purpose: Private class. As interface. -// Created: 8/7/04 -// -// -------------------------------------------------------------------------- -int BSFCombinedIndexStream::Read(void *pBuffer, int NBytes, int Timeout) -{ - // Paranoia is good. - if(!mIsInitialised || mFromBlockSizes == 0 || mpDiff == 0) - { - THROW_EXCEPTION(BackupStoreException, Internal) - } - - int written = 0; - - // Header output yet? - if(!mHeaderWritten) - { - // Enough space? - if(NBytes < (int)sizeof(mHeader)) return 0; - - // Copy in - ::memcpy(pBuffer, &mHeader, sizeof(mHeader)); - NBytes -= sizeof(mHeader); - written += sizeof(mHeader); - - // Flag it's done - mHeaderWritten = true; - } - - // How many entries can be written? - int entriesToWrite = NBytes / sizeof(file_BlockIndexEntry); - if(entriesToWrite > mNumEntriesToGo) - { - entriesToWrite = mNumEntriesToGo; - } - - // Setup ready to go - file_BlockIndexEntry *poutput = (file_BlockIndexEntry*)(((uint8_t*)pBuffer) + written); - - // Write entries - for(int b = 0; b < entriesToWrite; ++b) - { - if(!mpDiff->ReadFullBuffer(&(poutput[b]), sizeof(file_BlockIndexEntry), 0)) - { - THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream) - } - - // Does this need adjusting? - int s = box_ntoh64(poutput[b].mEncodedSize); - if(s <= 0) - { - // A reference to a block in the from file - int block = 0 - s; - ASSERT(block >= 0); - if(block >= mNumEntriesInFromFile) - { - // That's not good, the block doesn't exist - THROW_EXCEPTION(BackupStoreException, OnCombineFromFileIsIncomplete) - } - - // Adjust the entry in the buffer - poutput[b].mEncodedSize = mFromBlockSizes[block]; // stored in network byte order, no translation necessary - } - } - - // Update written count - written += entriesToWrite * sizeof(file_BlockIndexEntry); - mNumEntriesToGo -= entriesToWrite; - - return written; -} - - -// -------------------------------------------------------------------------- -// -// Function -// Name: BSFCombinedIndexStream::Write(const void *, int) -// Purpose: Private class. As interface. -// Created: 8/7/04 -// -// -------------------------------------------------------------------------- -void BSFCombinedIndexStream::Write(const void *pBuffer, int NBytes) -{ - THROW_EXCEPTION(BackupStoreException, StreamDoesntHaveRequiredFeatures) -} - - -// -------------------------------------------------------------------------- -// -// Function -// Name: BSFCombinedIndexStream::StreamDataLeft() -// Purpose: Private class. As interface -// Created: 8/7/04 -// -// -------------------------------------------------------------------------- -bool BSFCombinedIndexStream::StreamDataLeft() -{ - return (!mHeaderWritten) || (mNumEntriesToGo > 0); -} - - -// -------------------------------------------------------------------------- -// -// Function -// Name: BSFCombinedIndexStream::StreamClosed() -// Purpose: Private class. As interface. -// Created: 8/7/04 -// -// -------------------------------------------------------------------------- -bool BSFCombinedIndexStream::StreamClosed() -{ - return true; // doesn't do writing -} - diff --git a/lib/backupclient/BackupStoreFileCombine.cpp b/lib/backupclient/BackupStoreFileCombine.cpp deleted file mode 100644 index baa331f0..00000000 --- a/lib/backupclient/BackupStoreFileCombine.cpp +++ /dev/null @@ -1,410 +0,0 @@ -// -------------------------------------------------------------------------- -// -// File -// Name: BackupStoreFileCombine.cpp -// Purpose: File combining for BackupStoreFile -// Created: 16/1/04 -// -// -------------------------------------------------------------------------- - -#include "Box.h" - -#include <new> - -#include "BackupStoreFile.h" -#include "BackupStoreFileWire.h" -#include "BackupStoreObjectMagic.h" -#include "BackupStoreException.h" -#include "BackupStoreConstants.h" -#include "BackupStoreFilename.h" -#include "FileStream.h" - -#include "MemLeakFindOn.h" - -typedef struct -{ - int64_t mFilePosition; -} FromIndexEntry; - -static void LoadFromIndex(IOStream &rFrom, FromIndexEntry *pIndex, int64_t NumEntries); -static void CopyData(IOStream &rDiffData, IOStream &rDiffIndex, int64_t DiffNumBlocks, IOStream &rFrom, FromIndexEntry *pFromIndex, int64_t FromNumBlocks, IOStream &rOut); -static void WriteNewIndex(IOStream &rDiff, int64_t DiffNumBlocks, FromIndexEntry *pFromIndex, int64_t FromNumBlocks, IOStream &rOut); - -// -------------------------------------------------------------------------- -// -// Function -// Name: BackupStoreFile::CombineFile(IOStream &, IOStream &, IOStream &) -// Purpose: Where rDiff is a store file which is incomplete as a result of a -// diffing operation, rFrom is the file it is diffed from, and -// rOut is the stream in which to place the result, the old file -// and new file are combined into a file containing all the data. -// rDiff2 is the same file as rDiff, opened again to get two -// independent streams to the same file. -// Created: 16/1/04 -// -// -------------------------------------------------------------------------- -void BackupStoreFile::CombineFile(IOStream &rDiff, IOStream &rDiff2, IOStream &rFrom, IOStream &rOut) -{ - // Read and copy the header. - file_StreamFormat hdr; - if(!rDiff.ReadFullBuffer(&hdr, sizeof(hdr), 0)) - { - THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine) - } - if(ntohl(hdr.mMagicValue) != OBJECTMAGIC_FILE_MAGIC_VALUE_V1) - { - THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile) - } - // Copy - rOut.Write(&hdr, sizeof(hdr)); - // Copy over filename and attributes - // BLOCK - { - BackupStoreFilename filename; - filename.ReadFromStream(rDiff, IOStream::TimeOutInfinite); - filename.WriteToStream(rOut); - StreamableMemBlock attr; - attr.ReadFromStream(rDiff, IOStream::TimeOutInfinite); - attr.WriteToStream(rOut); - } - - // Read the header for the From file - file_StreamFormat fromHdr; - if(!rFrom.ReadFullBuffer(&fromHdr, sizeof(fromHdr), 0)) - { - THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine) - } - if(ntohl(fromHdr.mMagicValue) != OBJECTMAGIC_FILE_MAGIC_VALUE_V1) - { - THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile) - } - // Skip over the filename and attributes of the From file - // BLOCK - { - BackupStoreFilename filename2; - filename2.ReadFromStream(rFrom, IOStream::TimeOutInfinite); - int32_t size_s; - if(!rFrom.ReadFullBuffer(&size_s, sizeof(size_s), 0 /* not interested in bytes read if this fails */)) - { - THROW_EXCEPTION(CommonException, StreamableMemBlockIncompleteRead) - } - int size = ntohl(size_s); - // Skip forward the size - rFrom.Seek(size, IOStream::SeekType_Relative); - } - - // Allocate memory for the block index of the From file - int64_t fromNumBlocks = box_ntoh64(fromHdr.mNumBlocks); - // NOTE: An extra entry is required so that the length of the last block can be calculated - FromIndexEntry *pFromIndex = (FromIndexEntry*)::malloc((fromNumBlocks+1) * sizeof(FromIndexEntry)); - if(pFromIndex == 0) - { - throw std::bad_alloc(); - } - - try - { - // Load the index from the From file, calculating the offsets in the - // file as we go along, and enforce that everything should be present. - LoadFromIndex(rFrom, pFromIndex, fromNumBlocks); - - // Read in the block index of the Diff file in small chunks, and output data - // for each block, either from this file, or the other file. - int64_t diffNumBlocks = box_ntoh64(hdr.mNumBlocks); - CopyData(rDiff /* positioned at start of data */, rDiff2, diffNumBlocks, rFrom, pFromIndex, fromNumBlocks, rOut); - - // Read in the block index again, and output the new block index, simply - // filling in the sizes of blocks from the old file. - WriteNewIndex(rDiff, diffNumBlocks, pFromIndex, fromNumBlocks, rOut); - - // Free buffers - ::free(pFromIndex); - pFromIndex = 0; - } - catch(...) - { - // Clean up - if(pFromIndex != 0) - { - ::free(pFromIndex); - pFromIndex = 0; - } - throw; - } -} - - -// -------------------------------------------------------------------------- -// -// Function -// Name: static LoadFromIndex(IOStream &, FromIndexEntry *, int64_t) -// Purpose: Static. Load the index from the From file -// Created: 16/1/04 -// -// -------------------------------------------------------------------------- -static void LoadFromIndex(IOStream &rFrom, FromIndexEntry *pIndex, int64_t NumEntries) -{ - ASSERT(pIndex != 0); - ASSERT(NumEntries >= 0); - - // Get the starting point in the file - int64_t filePos = rFrom.GetPosition(); - - // Jump to the end of the file to read the index - rFrom.Seek(0 - ((NumEntries * sizeof(file_BlockIndexEntry)) + sizeof(file_BlockIndexHeader)), IOStream::SeekType_End); - - // Read block index header - file_BlockIndexHeader blkhdr; - if(!rFrom.ReadFullBuffer(&blkhdr, sizeof(blkhdr), 0)) - { - THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine) - } - if(ntohl(blkhdr.mMagicValue) != OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1 - || (int64_t)box_ntoh64(blkhdr.mNumBlocks) != NumEntries) - { - THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile) - } - - // And then the block entries - for(int64_t b = 0; b < NumEntries; ++b) - { - // Read - file_BlockIndexEntry en; - if(!rFrom.ReadFullBuffer(&en, sizeof(en), 0)) - { - THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine) - } - - // Add to list - pIndex[b].mFilePosition = filePos; - - // Encoded size? - int64_t encodedSize = box_ntoh64(en.mEncodedSize); - // Check that the block is actually there - if(encodedSize <= 0) - { - THROW_EXCEPTION(BackupStoreException, OnCombineFromFileIsIncomplete) - } - - // Move file pointer on - filePos += encodedSize; - } - - // Store the position in the very last entry, so the size of the last entry can be calculated - pIndex[NumEntries].mFilePosition = filePos; -} - - -// -------------------------------------------------------------------------- -// -// Function -// Name: static CopyData(IOStream &, IOStream &, int64_t, IOStream &, FromIndexEntry *, int64_t, IOStream &) -// Purpose: Static. Copy data from the Diff and From file to the out file. -// rDiffData is at beginning of data. -// rDiffIndex at any position. -// rFrom is at any position. -// rOut is after the header, ready for data -// Created: 16/1/04 -// -// -------------------------------------------------------------------------- -static void CopyData(IOStream &rDiffData, IOStream &rDiffIndex, int64_t DiffNumBlocks, - IOStream &rFrom, FromIndexEntry *pFromIndex, int64_t FromNumBlocks, IOStream &rOut) -{ - // Jump to the end of the diff file to read the index - rDiffIndex.Seek(0 - ((DiffNumBlocks * sizeof(file_BlockIndexEntry)) + sizeof(file_BlockIndexHeader)), IOStream::SeekType_End); - - // Read block index header - file_BlockIndexHeader diffBlkhdr; - if(!rDiffIndex.ReadFullBuffer(&diffBlkhdr, sizeof(diffBlkhdr), 0)) - { - THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine) - } - if(ntohl(diffBlkhdr.mMagicValue) != OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1 - || (int64_t)box_ntoh64(diffBlkhdr.mNumBlocks) != DiffNumBlocks) - { - THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile) - } - - // Record where the From file is - int64_t fromPos = rFrom.GetPosition(); - - // Buffer data - void *buffer = 0; - int bufferSize = 0; - - try - { - // Read the blocks in! - for(int64_t b = 0; b < DiffNumBlocks; ++b) - { - // Read - file_BlockIndexEntry en; - if(!rDiffIndex.ReadFullBuffer(&en, sizeof(en), 0)) - { - THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine) - } - - // What's the size value stored in the entry - int64_t encodedSize = box_ntoh64(en.mEncodedSize); - - // How much data will be read? - int32_t blockSize = 0; - if(encodedSize > 0) - { - // The block is actually in the diff file - blockSize = encodedSize; - } - else - { - // It's in the from file. First, check to see if it's valid - int64_t blockIdx = (0 - encodedSize); - if(blockIdx > FromNumBlocks) - { - // References a block which doesn't actually exist - THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile) - } - // Calculate size. This operation is safe because of the extra entry at the end - blockSize = pFromIndex[blockIdx + 1].mFilePosition - pFromIndex[blockIdx].mFilePosition; - } - ASSERT(blockSize > 0); - - // Make sure there's memory available to copy this - if(bufferSize < blockSize || buffer == 0) - { - // Free old block - if(buffer != 0) - { - ::free(buffer); - buffer = 0; - bufferSize = 0; - } - // Allocate new block - buffer = ::malloc(blockSize); - if(buffer == 0) - { - throw std::bad_alloc(); - } - bufferSize = blockSize; - } - ASSERT(bufferSize >= blockSize); - - // Load in data from one of the files - if(encodedSize > 0) - { - // Load from diff file - if(!rDiffData.ReadFullBuffer(buffer, blockSize, 0)) - { - THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine) - } - } - else - { - // Locate and read the data from the from file - int64_t blockIdx = (0 - encodedSize); - // Seek if necessary - if(fromPos != pFromIndex[blockIdx].mFilePosition) - { - rFrom.Seek(pFromIndex[blockIdx].mFilePosition, IOStream::SeekType_Absolute); - fromPos = pFromIndex[blockIdx].mFilePosition; - } - // Read - if(!rFrom.ReadFullBuffer(buffer, blockSize, 0)) - { - THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine) - } - - // Update fromPos to current position - fromPos += blockSize; - } - - // Write data to out file - rOut.Write(buffer, blockSize); - } - - // Free buffer, if allocated - if(buffer != 0) - { - ::free(buffer); - buffer = 0; - } - } - catch(...) - { - if(buffer != 0) - { - ::free(buffer); - buffer = 0; - } - throw; - } -} - - - -// -------------------------------------------------------------------------- -// -// Function -// Name: static WriteNewIndex(IOStream &, int64_t, FromIndexEntry *, int64_t, IOStream &) -// Purpose: Write the index to the out file, just copying from the diff file and -// adjusting the entries. -// Created: 16/1/04 -// -// -------------------------------------------------------------------------- -static void WriteNewIndex(IOStream &rDiff, int64_t DiffNumBlocks, FromIndexEntry *pFromIndex, int64_t FromNumBlocks, IOStream &rOut) -{ - // Jump to the end of the diff file to read the index - rDiff.Seek(0 - ((DiffNumBlocks * sizeof(file_BlockIndexEntry)) + sizeof(file_BlockIndexHeader)), IOStream::SeekType_End); - - // Read block index header - file_BlockIndexHeader diffBlkhdr; - if(!rDiff.ReadFullBuffer(&diffBlkhdr, sizeof(diffBlkhdr), 0)) - { - THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine) - } - if(ntohl(diffBlkhdr.mMagicValue) != OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1 - || (int64_t)box_ntoh64(diffBlkhdr.mNumBlocks) != DiffNumBlocks) - { - THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile) - } - - // Write it out with a blanked out other file ID - diffBlkhdr.mOtherFileID = box_hton64(0); - rOut.Write(&diffBlkhdr, sizeof(diffBlkhdr)); - - // Rewrite the index - for(int64_t b = 0; b < DiffNumBlocks; ++b) - { - file_BlockIndexEntry en; - if(!rDiff.ReadFullBuffer(&en, sizeof(en), 0)) - { - THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine) - } - - // What's the size value stored in the entry - int64_t encodedSize = box_ntoh64(en.mEncodedSize); - - // Need to adjust it? - if(encodedSize <= 0) - { - // This actually refers to a block in the from file. So rewrite this. - int64_t blockIdx = (0 - encodedSize); - if(blockIdx > FromNumBlocks) - { - // References a block which doesn't actually exist - THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile) - } - // Calculate size. This operation is safe because of the extra entry at the end - int32_t blockSize = pFromIndex[blockIdx + 1].mFilePosition - pFromIndex[blockIdx].mFilePosition; - // Then replace entry - en.mEncodedSize = box_hton64(((uint64_t)blockSize)); - } - - // Write entry - rOut.Write(&en, sizeof(en)); - } -} - - - - - diff --git a/lib/backupclient/BackupStoreFileDiff.cpp b/lib/backupclient/BackupStoreFileDiff.cpp deleted file mode 100644 index 5705c3aa..00000000 --- a/lib/backupclient/BackupStoreFileDiff.cpp +++ /dev/null @@ -1,1046 +0,0 @@ -// -------------------------------------------------------------------------- -// -// File -// Name: BackupStoreFileDiff.cpp -// Purpose: Functions relating to diffing BackupStoreFiles -// Created: 12/1/04 -// -// -------------------------------------------------------------------------- - -#include "Box.h" - -#include <string.h> - -#include <new> -#include <map> - -#ifdef HAVE_TIME_H - #include <time.h> -#elif HAVE_SYS_TIME_H - #include <sys/time.h> -#endif - -#include "BackupStoreConstants.h" -#include "BackupStoreException.h" -#include "BackupStoreFile.h" -#include "BackupStoreFileCryptVar.h" -#include "BackupStoreFileEncodeStream.h" -#include "BackupStoreFileWire.h" -#include "BackupStoreObjectMagic.h" -#include "CommonException.h" -#include "FileStream.h" -#include "MD5Digest.h" -#include "RollingChecksum.h" -#include "Timer.h" - -#include "MemLeakFindOn.h" - -#include <cstring> - -using namespace BackupStoreFileCryptVar; -using namespace BackupStoreFileCreation; - -// By default, don't trace out details of the diff as we go along -- would fill up logs significantly. -// But it's useful for the test. -#ifndef BOX_RELEASE_BUILD - bool BackupStoreFile::TraceDetailsOfDiffProcess = false; -#endif - -static void LoadIndex(IOStream &rBlockIndex, int64_t ThisID, BlocksAvailableEntry **ppIndex, int64_t &rNumBlocksOut, int Timeout, bool &rCanDiffFromThis); -static void FindMostUsedSizes(BlocksAvailableEntry *pIndex, int64_t NumBlocks, int32_t Sizes[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES]); -static void SearchForMatchingBlocks(IOStream &rFile, - std::map<int64_t, int64_t> &rFoundBlocks, BlocksAvailableEntry *pIndex, - int64_t NumBlocks, int32_t Sizes[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES], - DiffTimer *pDiffTimer); -static void SetupHashTable(BlocksAvailableEntry *pIndex, int64_t NumBlocks, int32_t BlockSize, BlocksAvailableEntry **pHashTable); -static bool SecondStageMatch(BlocksAvailableEntry *pFirstInHashList, RollingChecksum &fastSum, uint8_t *pBeginnings, uint8_t *pEndings, int Offset, int32_t BlockSize, int64_t FileBlockNumber, -BlocksAvailableEntry *pIndex, std::map<int64_t, int64_t> &rFoundBlocks); -static void GenerateRecipe(BackupStoreFileEncodeStream::Recipe &rRecipe, BlocksAvailableEntry *pIndex, int64_t NumBlocks, std::map<int64_t, int64_t> &rFoundBlocks, int64_t SizeOfInputFile); - -// -------------------------------------------------------------------------- -// -// Function -// Name: BackupStoreFile::MoveStreamPositionToBlockIndex(IOStream &) -// Purpose: Move the file pointer in this stream to just before the block index. -// Assumes that the stream is at the beginning, seekable, and -// reading from the stream is OK. -// Created: 12/1/04 -// -// -------------------------------------------------------------------------- -void BackupStoreFile::MoveStreamPositionToBlockIndex(IOStream &rStream) -{ - // Size of file - int64_t fileSize = rStream.BytesLeftToRead(); - - // Get header - file_StreamFormat hdr; - - // Read the header - if(!rStream.ReadFullBuffer(&hdr, sizeof(hdr), 0 /* not interested in bytes read if this fails */, IOStream::TimeOutInfinite)) - { - // Couldn't read header - THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream) - } - - // Check magic number - if(ntohl(hdr.mMagicValue) != OBJECTMAGIC_FILE_MAGIC_VALUE_V1 -#ifndef BOX_DISABLE_BACKWARDS_COMPATIBILITY_BACKUPSTOREFILE - && ntohl(hdr.mMagicValue) != OBJECTMAGIC_FILE_MAGIC_VALUE_V0 -#endif - ) - { - THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile) - } - - // Work out where the index is - int64_t numBlocks = box_ntoh64(hdr.mNumBlocks); - int64_t blockHeaderPosFromEnd = ((numBlocks * sizeof(file_BlockIndexEntry)) + sizeof(file_BlockIndexHeader)); - - // Sanity check - if(blockHeaderPosFromEnd > static_cast<int64_t>(fileSize - sizeof(file_StreamFormat))) - { - THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile) - } - - // Seek to that position - rStream.Seek(0 - blockHeaderPosFromEnd, IOStream::SeekType_End); - - // Done. Stream now in right position (as long as the file is formatted correctly) -} - - -// -------------------------------------------------------------------------- -// -// Function -// Name: BackupStoreFile::EncodeFileDiff(const char *, int64_t, const BackupStoreFilename &, int64_t, IOStream &, int64_t *) -// Purpose: Similar to EncodeFile, but takes the object ID of the file it's -// diffing from, and the index of the blocks in a stream. It'll then -// calculate which blocks can be reused from that old file. -// The timeout is the timeout value for reading the diff block index. -// If pIsCompletelyDifferent != 0, it will be set to true if the -// the two files are completely different (do not share any block), false otherwise. -// -// Created: 12/1/04 -// -// -------------------------------------------------------------------------- -std::auto_ptr<IOStream> BackupStoreFile::EncodeFileDiff -( - const char *Filename, int64_t ContainerID, - const BackupStoreFilename &rStoreFilename, int64_t DiffFromObjectID, - IOStream &rDiffFromBlockIndex, int Timeout, DiffTimer *pDiffTimer, - int64_t *pModificationTime, bool *pIsCompletelyDifferent) -{ - // Is it a symlink? - { - EMU_STRUCT_STAT st; - if(EMU_LSTAT(Filename, &st) != 0) - { - THROW_EXCEPTION(CommonException, OSFileError) - } - if((st.st_mode & S_IFLNK) == S_IFLNK) - { - // Don't do diffs for symlinks - if(pIsCompletelyDifferent != 0) - { - *pIsCompletelyDifferent = true; - } - return EncodeFile(Filename, ContainerID, rStoreFilename, pModificationTime); - } - } - - // Load in the blocks - BlocksAvailableEntry *pindex = 0; - int64_t blocksInIndex = 0; - bool canDiffFromThis = false; - LoadIndex(rDiffFromBlockIndex, DiffFromObjectID, &pindex, blocksInIndex, Timeout, canDiffFromThis); - // BOX_TRACE("Diff: Blocks in index: " << blocksInIndex); - - if(!canDiffFromThis) - { - // Don't do diffing... - if(pIsCompletelyDifferent != 0) - { - *pIsCompletelyDifferent = true; - } - return EncodeFile(Filename, ContainerID, rStoreFilename, pModificationTime); - } - - // Pointer to recipe we're going to create - BackupStoreFileEncodeStream::Recipe *precipe = 0; - - try - { - // Find which sizes should be scanned - int32_t sizesToScan[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES]; - FindMostUsedSizes(pindex, blocksInIndex, sizesToScan); - - // Flag for reporting to the user - bool completelyDifferent; - - // BLOCK - { - // Search the file to find matching blocks - std::map<int64_t, int64_t> foundBlocks; // map of offset in file to index in block index - int64_t sizeOfInputFile = 0; - // BLOCK - { - FileStream file(Filename); - // Get size of file - sizeOfInputFile = file.BytesLeftToRead(); - // Find all those lovely matching blocks - SearchForMatchingBlocks(file, foundBlocks, pindex, - blocksInIndex, sizesToScan, pDiffTimer); - - // Is it completely different? - completelyDifferent = (foundBlocks.size() == 0); - } - - // Create a recipe -- if the two files are completely different, don't put the from file ID in the recipe. - precipe = new BackupStoreFileEncodeStream::Recipe(pindex, blocksInIndex, completelyDifferent?(0):(DiffFromObjectID)); - BlocksAvailableEntry *pindexKeptRef = pindex; // we need this later, but must set pindex == 0 now, because of exceptions - pindex = 0; // Recipe now has ownership - - // Fill it in - GenerateRecipe(*precipe, pindexKeptRef, blocksInIndex, foundBlocks, sizeOfInputFile); - } - // foundBlocks no longer required - - // Create the stream - std::auto_ptr<IOStream> stream(new BackupStoreFileEncodeStream); - - // Do the initial setup - ((BackupStoreFileEncodeStream*)stream.get())->Setup(Filename, precipe, ContainerID, rStoreFilename, pModificationTime); - precipe = 0; // Stream has taken ownership of this - - // Tell user about completely different status? - if(pIsCompletelyDifferent != 0) - { - *pIsCompletelyDifferent = completelyDifferent; - } - - // Return the stream for the caller - return stream; - } - catch(...) - { - // cleanup - if(pindex != 0) - { - ::free(pindex); - pindex = 0; - } - if(precipe != 0) - { - delete precipe; - precipe = 0; - } - throw; - } -} - -// -------------------------------------------------------------------------- -// -// Function -// Name: static LoadIndex(IOStream &, int64_t, BlocksAvailableEntry **, int64_t, bool &) -// Purpose: Read in an index, and decrypt, and store in the in memory block format. -// rCanDiffFromThis is set to false if the version of the from file is too old. -// Created: 12/1/04 -// -// -------------------------------------------------------------------------- -static void LoadIndex(IOStream &rBlockIndex, int64_t ThisID, BlocksAvailableEntry **ppIndex, int64_t &rNumBlocksOut, int Timeout, bool &rCanDiffFromThis) -{ - // Reset - rNumBlocksOut = 0; - rCanDiffFromThis = false; - - // Read header - file_BlockIndexHeader hdr; - if(!rBlockIndex.ReadFullBuffer(&hdr, sizeof(hdr), 0 /* not interested in bytes read if this fails */, Timeout)) - { - // Couldn't read header - THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream) - } - -#ifndef BOX_DISABLE_BACKWARDS_COMPATIBILITY_BACKUPSTOREFILE - // Check against backwards comptaibility stuff - if(hdr.mMagicValue == (int32_t)htonl(OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V0)) - { - // Won't diff against old version - - // Absorb rest of stream - char buffer[2048]; - while(rBlockIndex.StreamDataLeft()) - { - rBlockIndex.Read(buffer, sizeof(buffer), 1000 /* 1 sec timeout */); - } - - // Tell caller - rCanDiffFromThis = false; - return; - } -#endif - - // Check magic - if(hdr.mMagicValue != (int32_t)htonl(OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1)) - { - THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile) - } - - // Check that we're not trying to diff against a file which references blocks from another file - if(((int64_t)box_ntoh64(hdr.mOtherFileID)) != 0) - { - THROW_EXCEPTION(BackupStoreException, CannotDiffAnIncompleteStoreFile) - } - - // Mark as an acceptable diff. - rCanDiffFromThis = true; - - // Get basic information - int64_t numBlocks = box_ntoh64(hdr.mNumBlocks); - uint64_t entryIVBase = box_ntoh64(hdr.mEntryIVBase); - - //TODO: Verify that these sizes look reasonable - - // Allocate space for the index - BlocksAvailableEntry *pindex = (BlocksAvailableEntry*)::malloc(sizeof(BlocksAvailableEntry) * numBlocks); - if(pindex == 0) - { - throw std::bad_alloc(); - } - - try - { - for(int64_t b = 0; b < numBlocks; ++b) - { - // Read an entry from the stream - file_BlockIndexEntry entry; - if(!rBlockIndex.ReadFullBuffer(&entry, sizeof(entry), 0 /* not interested in bytes read if this fails */, Timeout)) - { - // Couldn't read entry - THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream) - } - - // Calculate IV for this entry - uint64_t iv = entryIVBase; - iv += b; - // Network byte order - iv = box_hton64(iv); - sBlowfishDecryptBlockEntry.SetIV(&iv); - - // Decrypt the encrypted section - file_BlockIndexEntryEnc entryEnc; - int sectionSize = sBlowfishDecryptBlockEntry.TransformBlock(&entryEnc, sizeof(entryEnc), - entry.mEnEnc, sizeof(entry.mEnEnc)); - if(sectionSize != sizeof(entryEnc)) - { - THROW_EXCEPTION(BackupStoreException, BlockEntryEncodingDidntGiveExpectedLength) - } - - // Check that we're not trying to diff against a file which references blocks from another file - if(((int64_t)box_ntoh64(entry.mEncodedSize)) <= 0) - { - THROW_EXCEPTION(BackupStoreException, CannotDiffAnIncompleteStoreFile) - } - - // Store all the required information - pindex[b].mpNextInHashList = 0; // hash list not set up yet - pindex[b].mSize = ntohl(entryEnc.mSize); - pindex[b].mWeakChecksum = ntohl(entryEnc.mWeakChecksum); - ::memcpy(pindex[b].mStrongChecksum, entryEnc.mStrongChecksum, sizeof(pindex[b].mStrongChecksum)); - } - - // Store index pointer for called - ASSERT(ppIndex != 0); - *ppIndex = pindex; - - // Store number of blocks for caller - rNumBlocksOut = numBlocks; - - } - catch(...) - { - // clean up and send the exception along its way - ::free(pindex); - throw; - } -} - - -// -------------------------------------------------------------------------- -// -// Function -// Name: static FindMostUsedSizes(BlocksAvailableEntry *, int64_t, int32_t[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES]) -// Purpose: Finds the most commonly used block sizes in the index -// Created: 12/1/04 -// -// -------------------------------------------------------------------------- -static void FindMostUsedSizes(BlocksAvailableEntry *pIndex, int64_t NumBlocks, int32_t Sizes[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES]) -{ - // Array for lengths - int64_t sizeCounts[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES]; - - // Set arrays to lots of zeros (= unused entries) - for(int l = 0; l < BACKUP_FILE_DIFF_MAX_BLOCK_SIZES; ++l) - { - Sizes[l] = 0; - sizeCounts[l] = 0; - } - - // Array for collecting sizes - std::map<int32_t, int64_t> foundSizes; - - // Run through blocks and make a count of the entries - for(int64_t b = 0; b < NumBlocks; ++b) - { - // Only if the block size is bigger than the minimum size we'll scan for - if(pIndex[b].mSize > BACKUP_FILE_DIFF_MIN_BLOCK_SIZE) - { - // Find entry? - std::map<int32_t, int64_t>::const_iterator f(foundSizes.find(pIndex[b].mSize)); - if(f != foundSizes.end()) - { - // Increment existing entry - foundSizes[pIndex[b].mSize] = foundSizes[pIndex[b].mSize] + 1; - } - else - { - // New entry - foundSizes[pIndex[b].mSize] = 1; - } - } - } - - // Make the block sizes - for(std::map<int32_t, int64_t>::const_iterator i(foundSizes.begin()); i != foundSizes.end(); ++i) - { - // Find the position of the size in the array - for(int t = 0; t < BACKUP_FILE_DIFF_MAX_BLOCK_SIZES; ++t) - { - // Instead of sorting on the raw count of blocks, - // take the file area covered by this block size. - if(i->second * i->first > sizeCounts[t] * Sizes[t]) - { - // Then this size belong before this entry -- shuffle them up - for(int s = (BACKUP_FILE_DIFF_MAX_BLOCK_SIZES - 1); s >= t; --s) - { - Sizes[s] = Sizes[s-1]; - sizeCounts[s] = sizeCounts[s-1]; - } - - // Insert this size - Sizes[t] = i->first; - sizeCounts[t] = i->second; - - // Shouldn't do any more searching - break; - } - } - } - - // trace the size table in debug builds -#ifndef BOX_RELEASE_BUILD - if(BackupStoreFile::TraceDetailsOfDiffProcess) - { - for(int t = 0; t < BACKUP_FILE_DIFF_MAX_BLOCK_SIZES; ++t) - { - BOX_TRACE("Diff block size " << t << ": " << - Sizes[t] << " (count = " << - sizeCounts[t] << ")"); - } - } -#endif -} - - - -// -------------------------------------------------------------------------- -// -// Function -// Name: static SearchForMatchingBlocks(IOStream &, std::map<int64_t, int64_t> &, BlocksAvailableEntry *, int64_t, int32_t[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES]) -// Purpose: Find the matching blocks within the file. -// Created: 12/1/04 -// -// -------------------------------------------------------------------------- -static void SearchForMatchingBlocks(IOStream &rFile, std::map<int64_t, int64_t> &rFoundBlocks, - BlocksAvailableEntry *pIndex, int64_t NumBlocks, - int32_t Sizes[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES], DiffTimer *pDiffTimer) -{ - Timer maximumDiffingTime(0, "MaximumDiffingTime"); - - if(pDiffTimer && pDiffTimer->IsManaged()) - { - maximumDiffingTime = Timer(pDiffTimer->GetMaximumDiffingTime(), - "MaximumDiffingTime"); - } - - std::map<int64_t, int32_t> goodnessOfFit; - - // Allocate the hash lookup table - BlocksAvailableEntry **phashTable = (BlocksAvailableEntry **)::malloc(sizeof(BlocksAvailableEntry *) * (64*1024)); - - // Choose a size for the buffer, just a little bit more than the maximum block size - int32_t bufSize = Sizes[0]; - for(int z = 1; z < BACKUP_FILE_DIFF_MAX_BLOCK_SIZES; ++z) - { - if(Sizes[z] > bufSize) bufSize = Sizes[z]; - } - bufSize += 4; - ASSERT(bufSize > Sizes[0]); - ASSERT(bufSize > 0); - if(bufSize > (BACKUP_FILE_MAX_BLOCK_SIZE + 1024)) - { - THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile) - } - - // TODO: Because we read in the file a scanned block size at a time, - // it is likely to be inefficient. Probably will be much better to - // calculate checksums for all block sizes in a single pass. - - // Allocate the buffers. - uint8_t *pbuffer0 = (uint8_t *)::malloc(bufSize); - uint8_t *pbuffer1 = (uint8_t *)::malloc(bufSize); - try - { - // Check buffer allocation - if(pbuffer0 == 0 || pbuffer1 == 0 || phashTable == 0) - { - // If a buffer got allocated, it will be cleaned up in the catch block - throw std::bad_alloc(); - } - - // Flag to abort the run, if too many blocks are found -- avoid using - // huge amounts of processor time when files contain many similar blocks. - bool abortSearch = false; - - // Search for each block size in turn - // NOTE: Do the smallest size first, so that the scheme for adding - // entries in the found list works as expected and replaces smallers block - // with larger blocks when it finds matches at the same offset in the file. - for(int s = BACKUP_FILE_DIFF_MAX_BLOCK_SIZES - 1; s >= 0; --s) - { - ASSERT(Sizes[s] <= bufSize); - BOX_TRACE("Diff pass " << s << ", for block size " << - Sizes[s]); - - // Check we haven't finished - if(Sizes[s] == 0) - { - // empty entry, try next size - continue; - } - - // Set up the hash table entries - SetupHashTable(pIndex, NumBlocks, Sizes[s], phashTable); - - // Shift file position to beginning - rFile.Seek(0, IOStream::SeekType_Absolute); - - // Read first block - if(rFile.Read(pbuffer0, Sizes[s]) != Sizes[s]) - { - // Size of file too short to match -- do next size - continue; - } - - // Setup block pointers - uint8_t *beginnings = pbuffer0; - uint8_t *endings = pbuffer1; - int offset = 0; - - // Calculate the first checksum, ready for rolling - RollingChecksum rolling(beginnings, Sizes[s]); - - // Then roll, until the file is exhausted - int64_t fileBlockNumber = 0; - int64_t fileOffset = 0; - int rollOverInitialBytes = 0; - while(true) - { - if(maximumDiffingTime.HasExpired()) - { - ASSERT(pDiffTimer != NULL); - BOX_INFO("MaximumDiffingTime reached - " - "suspending file diff"); - abortSearch = true; - break; - } - - if(pDiffTimer) - { - pDiffTimer->DoKeepAlive(); - } - - // Load in another block of data, and record how big it is - int bytesInEndings = rFile.Read(endings, Sizes[s]); - int tmp; - - // Skip any bytes from a previous matched block - if(rollOverInitialBytes > 0 && offset < bytesInEndings) - { - int spaceLeft = bytesInEndings - offset; - int thisRoll = (rollOverInitialBytes > spaceLeft) ? spaceLeft : rollOverInitialBytes; - - rolling.RollForwardSeveral(beginnings+offset, endings+offset, Sizes[s], thisRoll); - - offset += thisRoll; - fileOffset += thisRoll; - rollOverInitialBytes -= thisRoll; - - if(rollOverInitialBytes) - { - goto refresh; - } - } - - if(goodnessOfFit.count(fileOffset)) - { - tmp = goodnessOfFit[fileOffset]; - } - else - { - tmp = 0; - } - - if(tmp >= Sizes[s]) - { - // Skip over bigger ready-matched blocks completely - rollOverInitialBytes = tmp; - int spaceLeft = bytesInEndings - offset; - int thisRoll = (rollOverInitialBytes > spaceLeft) ? spaceLeft : rollOverInitialBytes; - - rolling.RollForwardSeveral(beginnings+offset, endings+offset, Sizes[s], thisRoll); - - offset += thisRoll; - fileOffset += thisRoll; - rollOverInitialBytes -= thisRoll; - - if(rollOverInitialBytes) - { - goto refresh; - } - } - - while(offset < bytesInEndings) - { - // Is current checksum in hash list? - uint16_t hash = rolling.GetComponentForHashing(); - if(phashTable[hash] != 0 && (goodnessOfFit.count(fileOffset) == 0 || goodnessOfFit[fileOffset] < Sizes[s])) - { - if(SecondStageMatch(phashTable[hash], rolling, beginnings, endings, offset, Sizes[s], fileBlockNumber, pIndex, rFoundBlocks)) - { - BOX_TRACE("Found block match for " << hash << " of " << Sizes[s] << " bytes at offset " << fileOffset); - goodnessOfFit[fileOffset] = Sizes[s]; - - // Block matched, roll the checksum forward to the next block without doing - // any more comparisons, because these are pointless (as any more matches will be ignored when - // the recipe is generated) and just take up valuable processor time. Edge cases are - // especially nasty, using huge amounts of time and memory. - int skip = Sizes[s]; - if(offset < bytesInEndings && skip > 0) - { - int spaceLeft = bytesInEndings - offset; - int thisRoll = (skip > spaceLeft) ? spaceLeft : skip; - - rolling.RollForwardSeveral(beginnings+offset, endings+offset, Sizes[s], thisRoll); - - offset += thisRoll; - fileOffset += thisRoll; - skip -= thisRoll; - } - // Not all the bytes necessary will have been skipped, so get them - // skipped after the next block is loaded. - rollOverInitialBytes = skip; - - // End this loop, so the final byte isn't used again - break; - } - else - { - BOX_TRACE("False alarm match for " << hash << " of " << Sizes[s] << " bytes at offset " << fileOffset); - } - - int64_t NumBlocksFound = static_cast<int64_t>( - rFoundBlocks.size()); - int64_t MaxBlocksFound = NumBlocks * - BACKUP_FILE_DIFF_MAX_BLOCK_FIND_MULTIPLE; - - if(NumBlocksFound > MaxBlocksFound) - { - abortSearch = true; - break; - } - } - - // Roll checksum forward - rolling.RollForward(beginnings[offset], endings[offset], Sizes[s]); - - // Increment offsets - ++offset; - ++fileOffset; - } - - if(abortSearch) break; - - refresh: - // Finished? - if(bytesInEndings != Sizes[s]) - { - // No more data in file -- check the final block - // (Do a copy and paste of 5 lines of code instead of introducing a comparison for - // each byte of the file) - uint16_t hash = rolling.GetComponentForHashing(); - if(phashTable[hash] != 0 && (goodnessOfFit.count(fileOffset) == 0 || goodnessOfFit[fileOffset] < Sizes[s])) - { - if(SecondStageMatch(phashTable[hash], rolling, beginnings, endings, offset, Sizes[s], fileBlockNumber, pIndex, rFoundBlocks)) - { - goodnessOfFit[fileOffset] = Sizes[s]; - } - } - - // finish - break; - } - - // Switch buffers, reset offset - beginnings = endings; - endings = (beginnings == pbuffer0)?(pbuffer1):(pbuffer0); // ie the other buffer - offset = 0; - - // And count the blocks which have been done - ++fileBlockNumber; - } - - if(abortSearch) break; - } - - // Free buffers and hash table - ::free(pbuffer1); - pbuffer1 = 0; - ::free(pbuffer0); - pbuffer0 = 0; - ::free(phashTable); - phashTable = 0; - } - catch(...) - { - // Cleanup and throw - if(pbuffer1 != 0) ::free(pbuffer1); - if(pbuffer0 != 0) ::free(pbuffer0); - if(phashTable != 0) ::free(phashTable); - throw; - } - -#ifndef BOX_RELEASE_BUILD - if(BackupStoreFile::TraceDetailsOfDiffProcess) - { - // Trace out the found blocks in debug mode - BOX_TRACE("Diff: list of found blocks"); - BOX_TRACE("======== ======== ======== ========"); - BOX_TRACE(" Offset BlkIdx Size Movement"); - for(std::map<int64_t, int64_t>::const_iterator i(rFoundBlocks.begin()); i != rFoundBlocks.end(); ++i) - { - int64_t orgLoc = 0; - for(int64_t b = 0; b < i->second; ++b) - { - orgLoc += pIndex[b].mSize; - } - BOX_TRACE(std::setw(8) << i->first << " " << - std::setw(8) << i->second << " " << - std::setw(8) << pIndex[i->second].mSize << - " " << - std::setw(8) << (i->first - orgLoc)); - } - BOX_TRACE("======== ======== ======== ========"); - } -#endif -} - - -// -------------------------------------------------------------------------- -// -// Function -// Name: static SetupHashTable(BlocksAvailableEntry *, int64_t, in32_t, BlocksAvailableEntry **) -// Purpose: Set up the hash table ready for a scan -// Created: 14/1/04 -// -// -------------------------------------------------------------------------- -static void SetupHashTable(BlocksAvailableEntry *pIndex, int64_t NumBlocks, int32_t BlockSize, BlocksAvailableEntry **pHashTable) -{ - // Set all entries in the hash table to zero - ::memset(pHashTable, 0, (sizeof(BlocksAvailableEntry *) * (64*1024))); - - // Scan through the blocks, building the hash table - for(int64_t b = 0; b < NumBlocks; ++b) - { - // Only look at the required block size - if(pIndex[b].mSize == BlockSize) - { - // Get the value under which to hash this entry - uint16_t hash = RollingChecksum::ExtractHashingComponent(pIndex[b].mWeakChecksum); - - // Already present in table? - if(pHashTable[hash] != 0) - { - //BOX_TRACE("Another hash entry for " << hash << " found"); - // Yes -- need to set the pointer in this entry to the current entry to build the linked list - pIndex[b].mpNextInHashList = pHashTable[hash]; - } - - // Put a pointer to this entry in the hash table - pHashTable[hash] = pIndex + b; - } - } -} - - -// -------------------------------------------------------------------------- -// -// Function -// Name: static bool SecondStageMatch(xxx) -// Purpose: When a match in the hash table is found, scan for second stage match using strong checksum. -// Created: 14/1/04 -// -// -------------------------------------------------------------------------- -static bool SecondStageMatch(BlocksAvailableEntry *pFirstInHashList, RollingChecksum &fastSum, uint8_t *pBeginnings, uint8_t *pEndings, - int Offset, int32_t BlockSize, int64_t FileBlockNumber, BlocksAvailableEntry *pIndex, std::map<int64_t, int64_t> &rFoundBlocks) -{ - // Check parameters - ASSERT(pBeginnings != 0); - ASSERT(pEndings != 0); - ASSERT(Offset >= 0); - ASSERT(BlockSize > 0); - ASSERT(pFirstInHashList != 0); - ASSERT(pIndex != 0); - -#ifndef BOX_RELEASE_BUILD - uint16_t DEBUG_Hash = fastSum.GetComponentForHashing(); -#endif - uint32_t Checksum = fastSum.GetChecksum(); - - // Before we go to the expense of the MD5, make sure it's a darn good match on the checksum we already know. - BlocksAvailableEntry *scan = pFirstInHashList; - bool found=false; - while(scan != 0) - { - if(scan->mWeakChecksum == Checksum) - { - found = true; - break; - } - scan = scan->mpNextInHashList; - } - if(!found) - { - return false; - } - - // Calculate the strong MD5 digest for this block - MD5Digest strong; - // Add the data from the beginnings - strong.Add(pBeginnings + Offset, BlockSize - Offset); - // Add any data from the endings - if(Offset > 0) - { - strong.Add(pEndings, Offset); - } - strong.Finish(); - - // Then go through the entries in the hash list, comparing with the strong digest calculated - scan = pFirstInHashList; - //BOX_TRACE("second stage match"); - while(scan != 0) - { - //BOX_TRACE("scan size " << scan->mSize << - // ", block size " << BlockSize << - // ", hash " << Hash); - ASSERT(scan->mSize == BlockSize); - ASSERT(RollingChecksum::ExtractHashingComponent(scan->mWeakChecksum) == DEBUG_Hash); - - // Compare? - if(strong.DigestMatches(scan->mStrongChecksum)) - { - //BOX_TRACE("Match!\n"); - // Found! Add to list of found blocks... - int64_t fileOffset = (FileBlockNumber * BlockSize) + Offset; - int64_t blockIndex = (scan - pIndex); // pointer arthmitic is frowned upon. But most efficient way of doing it here -- alternative is to use more memory - - // We do NOT search for smallest blocks first, as this code originally assumed. - // To prevent this from potentially overwriting a better match, the caller must determine - // the relative "goodness" of any existing match and this one, and avoid the call if it - // could be detrimental. - rFoundBlocks[fileOffset] = blockIndex; - - // No point in searching further, report success - return true; - } - - // Next - scan = scan->mpNextInHashList; - } - - // Not matched - return false; -} - - -// -------------------------------------------------------------------------- -// -// Function -// Name: static GenerateRecipe(BackupStoreFileEncodeStream::Recipe &, BlocksAvailableEntry *, int64_t, std::map<int64_t, int64_t> &) -// Purpose: Fills in the recipe from the found block list -// Created: 15/1/04 -// -// -------------------------------------------------------------------------- -static void GenerateRecipe(BackupStoreFileEncodeStream::Recipe &rRecipe, BlocksAvailableEntry *pIndex, - int64_t NumBlocks, std::map<int64_t, int64_t> &rFoundBlocks, int64_t SizeOfInputFile) -{ - // NOTE: This function could be a lot more sophisiticated. For example, if - // a small block overlaps a big block like this - // **** - // ******************************* - // then the small block will be used, not the big one. But it'd be better to - // just ignore the small block and keep the big one. However, some stats should - // be gathered about real world files before writing complex code which might - // go wrong. - - // Initialise a blank instruction - BackupStoreFileEncodeStream::RecipeInstruction instruction; - #define RESET_INSTRUCTION \ - instruction.mSpaceBefore = 0; \ - instruction.mBlocks = 0; \ - instruction.mpStartBlock = 0; - RESET_INSTRUCTION - - // First, a special case for when there are no found blocks - if(rFoundBlocks.size() == 0) - { - // No blocks, just a load of space - instruction.mSpaceBefore = SizeOfInputFile; - rRecipe.push_back(instruction); - - #ifndef BOX_RELEASE_BUILD - if(BackupStoreFile::TraceDetailsOfDiffProcess) - { - BOX_TRACE("Diff: Default recipe generated, " << - SizeOfInputFile << " bytes of file"); - } - #endif - - // Don't do anything - return; - } - - // Current location - int64_t loc = 0; - - // Then iterate through the list, generating the recipe - std::map<int64_t, int64_t>::const_iterator i(rFoundBlocks.begin()); - ASSERT(i != rFoundBlocks.end()); // check logic - - // Counting for debug tracing -#ifndef BOX_RELEASE_BUILD - int64_t debug_NewBytesFound = 0; - int64_t debug_OldBlocksUsed = 0; -#endif - - for(; i != rFoundBlocks.end(); ++i) - { - // Remember... map is (position in file) -> (index of block in pIndex) - - if(i->first < loc) - { - // This block overlaps the last one - continue; - } - else if(i->first > loc) - { - // There's a gap between the end of the last thing and this block. - // If there's an instruction waiting, push it onto the list - if(instruction.mSpaceBefore != 0 || instruction.mpStartBlock != 0) - { - rRecipe.push_back(instruction); - } - // Start a new instruction, with the gap ready - RESET_INSTRUCTION - instruction.mSpaceBefore = i->first - loc; - // Move location forward to match - loc += instruction.mSpaceBefore; -#ifndef BOX_RELEASE_BUILD - debug_NewBytesFound += instruction.mSpaceBefore; -#endif - } - - // First, does the current instruction need pushing back, because this block is not - // sequential to the last one? - if(instruction.mpStartBlock != 0 && (pIndex + i->second) != (instruction.mpStartBlock + instruction.mBlocks)) - { - rRecipe.push_back(instruction); - RESET_INSTRUCTION - } - - // Add in this block - if(instruction.mpStartBlock == 0) - { - // This block starts a new instruction - instruction.mpStartBlock = pIndex + i->second; - instruction.mBlocks = 1; - } - else - { - // It continues the previous section of blocks - instruction.mBlocks += 1; - } - -#ifndef BOX_RELEASE_BUILD - debug_OldBlocksUsed++; -#endif - - // Move location forward - loc += pIndex[i->second].mSize; - } - - // Push the last instruction generated - rRecipe.push_back(instruction); - - // Is there any space left at the end which needs sending? - if(loc != SizeOfInputFile) - { - RESET_INSTRUCTION - instruction.mSpaceBefore = SizeOfInputFile - loc; -#ifndef BOX_RELEASE_BUILD - debug_NewBytesFound += instruction.mSpaceBefore; -#endif - rRecipe.push_back(instruction); - } - - // dump out the recipe -#ifndef BOX_RELEASE_BUILD - BOX_TRACE("Diff: " << - debug_NewBytesFound << " new bytes found, " << - debug_OldBlocksUsed << " old blocks used"); - if(BackupStoreFile::TraceDetailsOfDiffProcess) - { - BOX_TRACE("Diff: Recipe generated (size " << rRecipe.size()); - BOX_TRACE("======== ========= ========"); - BOX_TRACE("Space b4 FirstBlk NumBlks"); - { - for(unsigned int e = 0; e < rRecipe.size(); ++e) - { - char b[64]; -#ifdef WIN32 - sprintf(b, "%8I64d", (int64_t)(rRecipe[e].mpStartBlock - pIndex)); -#else - sprintf(b, "%8lld", (int64_t)(rRecipe[e].mpStartBlock - pIndex)); -#endif - BOX_TRACE(std::setw(8) << - rRecipe[e].mSpaceBefore << - " " << - ((rRecipe[e].mpStartBlock == 0)?" -":b) << - " " << std::setw(8) << - rRecipe[e].mBlocks); - } - } - BOX_TRACE("======== ========= ========"); - } -#endif -} |