Move remaining parts of BackupStoreFile into lib/backupstore, and fix module

dependencies to fail if anything else required by bbstored is still in lib/backupclient instead of lib/backupstore.
author: Chris Wilson <chris+github@qwirx.com> 2011-05-24 15:08:59 +0000
committer: Chris Wilson <chris+github@qwirx.com> 2011-05-24 15:08:59 +0000
commit: 2bca085d623e857e89878a48efba59f78875370f (patch)
tree: 1cc119e7886cf2293c161f700323444934b85904 /lib/backupstore
parent: f04cd092a2711160519c957a5ba8c28babd3004c (diff)
4 files changed, 2106 insertions, 0 deletions
diff --git a/lib/backupstore/BackupStoreFileCmbDiff.cpp b/lib/backupstore/BackupStoreFileCmbDiff.cpp
new file mode 100644
index 00000000..1a88fa3f
--- /dev/null
+++ b/lib/backupstore/BackupStoreFileCmbDiff.cpp
@@ -0,0 +1,326 @@
+// --------------------------------------------------------------------------
+//
+// File
+//		Name:    BackupStoreFileCmbDiff.cpp
+//		Purpose: Combine two diffs together
+//		Created: 12/7/04
+//
+// --------------------------------------------------------------------------
+
+#include "Box.h"
+
+#include <new>
+#include <stdlib.h>
+
+#include "BackupStoreFile.h"
+#include "BackupStoreFileWire.h"
+#include "BackupStoreObjectMagic.h"
+#include "BackupStoreException.h"
+#include "BackupStoreConstants.h"
+#include "BackupStoreFilename.h"
+
+#include "MemLeakFindOn.h"
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    BackupStoreFile::CombineDiffs(IOStream &, IOStream &, IOStream &rOut)
+//		Purpose: Given two diffs, combine them into a single diff, to produce a diff
+//				 which, combined with the original file, creates the result of applying
+//				 rDiff, then rDiff2. Two opens of rDiff2 are required
+//		Created: 12/7/04
+//
+// --------------------------------------------------------------------------
+void BackupStoreFile::CombineDiffs(IOStream &rDiff1, IOStream &rDiff2, IOStream &rDiff2b, IOStream &rOut)
+{
+	// Skip header of first diff, record where the data starts, and skip to the index
+	int64_t diff1DataStarts = 0;
+	{
+		// Read the header for the From file
+		file_StreamFormat diff1Hdr;
+		if(!rDiff1.ReadFullBuffer(&diff1Hdr, sizeof(diff1Hdr), 0))
+		{
+			THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
+		}
+		if(ntohl(diff1Hdr.mMagicValue) != OBJECTMAGIC_FILE_MAGIC_VALUE_V1)
+		{
+			THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+		}
+		// Skip over the filename and attributes of the From file
+		// BLOCK
+		{
+			BackupStoreFilename filename2;
+			filename2.ReadFromStream(rDiff1, IOStream::TimeOutInfinite);
+			int32_t size_s;
+			if(!rDiff1.ReadFullBuffer(&size_s, sizeof(size_s), 0 /* not interested in bytes read if this fails */))
+			{
+				THROW_EXCEPTION(CommonException, StreamableMemBlockIncompleteRead)
+			}
+			int size = ntohl(size_s);
+			// Skip forward the size
+			rDiff1.Seek(size, IOStream::SeekType_Relative);		
+		}
+		// Record position
+		diff1DataStarts = rDiff1.GetPosition();
+		// Skip to index
+		rDiff1.Seek(0 - (((box_ntoh64(diff1Hdr.mNumBlocks)) * sizeof(file_BlockIndexEntry)) + sizeof(file_BlockIndexHeader)), IOStream::SeekType_End);
+	}
+
+	// Read the index of the first diff
+	// Header first
+	file_BlockIndexHeader diff1IdxHdr;
+	if(!rDiff1.ReadFullBuffer(&diff1IdxHdr, sizeof(diff1IdxHdr), 0))
+	{
+		THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
+	}
+	if(ntohl(diff1IdxHdr.mMagicValue) != OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1)
+	{
+		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+	}
+	int64_t diff1NumBlocks = box_ntoh64(diff1IdxHdr.mNumBlocks);
+	// Allocate some memory
+	int64_t *diff1BlockStartPositions = (int64_t*)::malloc((diff1NumBlocks + 1) * sizeof(int64_t));
+	if(diff1BlockStartPositions == 0)
+	{
+		throw std::bad_alloc();
+	}
+
+	// Buffer data
+	void *buffer = 0;
+	int bufferSize = 0;	
+	
+	try
+	{
+		// Then the entries:
+		// For each entry, want to know if it's in the file, and if so, how big it is.
+		// We'll store this as an array of file positions in the file, with an additioal
+		// entry on the end so that we can work out the length of the last block.
+		// If an entry isn't in the file, then store 0 - (position in other file).
+		int64_t diff1Position = diff1DataStarts;
+		for(int64_t b = 0; b < diff1NumBlocks; ++b)
+		{
+			file_BlockIndexEntry e;
+			if(!rDiff1.ReadFullBuffer(&e, sizeof(e), 0))
+			{
+				THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
+			}
+	
+			// Where's the block?
+			int64_t blockEn = box_ntoh64(e.mEncodedSize);
+			if(blockEn <= 0)
+			{
+				// Just store the negated block number
+				diff1BlockStartPositions[b] = blockEn;
+			}
+			else
+			{
+				// Block is present in this file
+				diff1BlockStartPositions[b] = diff1Position;
+				diff1Position += blockEn;
+			}
+		}
+		
+		// Finish off the list, so the last entry can have it's size calcuated.
+		diff1BlockStartPositions[diff1NumBlocks] = diff1Position;
+
+		// Now read the second diff's header, copying it to the out file
+		file_StreamFormat diff2Hdr;
+		if(!rDiff2.ReadFullBuffer(&diff2Hdr, sizeof(diff2Hdr), 0))
+		{
+			THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
+		}
+		if(ntohl(diff2Hdr.mMagicValue) != OBJECTMAGIC_FILE_MAGIC_VALUE_V1)
+		{
+			THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+		}
+		// Copy
+		rOut.Write(&diff2Hdr, sizeof(diff2Hdr));
+		// Copy over filename and attributes
+		// BLOCK
+		{
+			BackupStoreFilename filename;
+			filename.ReadFromStream(rDiff2, IOStream::TimeOutInfinite);
+			filename.WriteToStream(rOut);
+			StreamableMemBlock attr;
+			attr.ReadFromStream(rDiff2, IOStream::TimeOutInfinite);
+			attr.WriteToStream(rOut);
+		}
+		
+		// Get to the index of rDiff2b, and read the header
+		MoveStreamPositionToBlockIndex(rDiff2b);
+		file_BlockIndexHeader diff2IdxHdr;
+		if(!rDiff2b.ReadFullBuffer(&diff2IdxHdr, sizeof(diff2IdxHdr), 0))
+		{
+			THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
+		}
+		if(ntohl(diff2IdxHdr.mMagicValue) != OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1)
+		{
+			THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+		}
+		int64_t diff2NumBlocks = box_ntoh64(diff2IdxHdr.mNumBlocks);
+		int64_t diff2IndexEntriesStart = rDiff2b.GetPosition();
+		
+		// Then read all the entries
+		int64_t diff2FilePosition = rDiff2.GetPosition();
+		for(int64_t b = 0; b < diff2NumBlocks; ++b)
+		{
+			file_BlockIndexEntry e;
+			if(!rDiff2b.ReadFullBuffer(&e, sizeof(e), 0))
+			{
+				THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
+			}
+
+			// What do to next about copying data
+			bool copyBlock = false;
+			int copySize = 0;
+			int64_t copyFrom = 0;
+			bool fromFileDiff1 = false;
+	
+			// Where's the block?
+			int64_t blockEn = box_ntoh64(e.mEncodedSize);
+			if(blockEn > 0)
+			{
+				// Block is present in this file -- copy to out
+				copyBlock = true;
+				copyFrom = diff2FilePosition;
+				copySize = (int)blockEn;
+				
+				// Move pointer onwards
+				diff2FilePosition += blockEn;
+			}
+			else
+			{
+				// Block isn't present here -- is it present in the old one?
+				int64_t blockIndex = 0 - blockEn;
+				if(blockIndex < 0 || blockIndex > diff1NumBlocks)
+				{
+					THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+				}
+				if(diff1BlockStartPositions[blockIndex] > 0)
+				{
+					// Block is in the old diff file, copy it across
+					copyBlock = true;
+					copyFrom = diff1BlockStartPositions[blockIndex];
+					int nb = blockIndex + 1;
+					while(diff1BlockStartPositions[nb] <= 0)
+					{
+						// This is safe, because the last entry will terminate it properly!
+						++nb;
+						ASSERT(nb <= diff1NumBlocks);
+					}
+					copySize = diff1BlockStartPositions[nb] - copyFrom;
+					fromFileDiff1 = true;
+				}
+			}
+			//TRACE4("%d %d %lld %d\n", copyBlock, copySize, copyFrom, fromFileDiff1);
+						
+			// Copy data to the output file?
+			if(copyBlock)
+			{
+				// Allocate enough space
+				if(bufferSize < copySize || buffer == 0)
+				{
+					// Free old block
+					if(buffer != 0)
+					{
+						::free(buffer);
+						buffer = 0;
+						bufferSize = 0;
+					}
+					// Allocate new block
+					buffer = ::malloc(copySize);
+					if(buffer == 0)
+					{
+						throw std::bad_alloc();
+					}
+					bufferSize = copySize;
+				}
+				ASSERT(bufferSize >= copySize);
+				
+				// Load in the data
+				if(fromFileDiff1)
+				{
+					rDiff1.Seek(copyFrom, IOStream::SeekType_Absolute);
+					if(!rDiff1.ReadFullBuffer(buffer, copySize, 0))
+					{
+						THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
+					}
+				}
+				else
+				{
+					rDiff2.Seek(copyFrom, IOStream::SeekType_Absolute);
+					if(!rDiff2.ReadFullBuffer(buffer, copySize, 0))
+					{
+						THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
+					}
+				}
+				// Write out data
+				rOut.Write(buffer, copySize);
+			}
+		}
+		
+		// Write the modified header
+		diff2IdxHdr.mOtherFileID = diff1IdxHdr.mOtherFileID;
+		rOut.Write(&diff2IdxHdr, sizeof(diff2IdxHdr));
+		
+		// Then we'll write out the index, reading the data again
+		rDiff2b.Seek(diff2IndexEntriesStart, IOStream::SeekType_Absolute);
+		for(int64_t b = 0; b < diff2NumBlocks; ++b)
+		{
+			file_BlockIndexEntry e;
+			if(!rDiff2b.ReadFullBuffer(&e, sizeof(e), 0))
+			{
+				THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
+			}
+	
+			// Where's the block?
+			int64_t blockEn = box_ntoh64(e.mEncodedSize);
+		
+			// If it's not in this file, it needs modification...
+			if(blockEn <= 0)
+			{
+				int64_t blockIndex = 0 - blockEn;
+				// In another file. Need to translate this against the other diff
+				if(diff1BlockStartPositions[blockIndex] > 0)
+				{
+					// Block is in the first diff file, stick in size
+					int nb = blockIndex + 1;
+					while(diff1BlockStartPositions[nb] <= 0)
+					{
+						// This is safe, because the last entry will terminate it properly!
+						++nb;
+						ASSERT(nb <= diff1NumBlocks);
+					}
+					int64_t size = diff1BlockStartPositions[nb] - diff1BlockStartPositions[blockIndex];
+					e.mEncodedSize = box_hton64(size);
+				}
+				else
+				{
+					// Block in the original file, use translated value
+					e.mEncodedSize = box_hton64(diff1BlockStartPositions[blockIndex]);
+				}
+			}
+			
+			// Write entry
+			rOut.Write(&e, sizeof(e));
+		}
+	}
+	catch(...)
+	{
+		// clean up
+		::free(diff1BlockStartPositions);
+		if(buffer != 0)
+		{
+			::free(buffer);
+		}
+		throw;
+	}
+	
+	// Clean up allocated memory
+	::free(diff1BlockStartPositions);
+	if(buffer != 0)
+	{
+		::free(buffer);
+	}
+}
+
diff --git a/lib/backupstore/BackupStoreFileCmbIdx.cpp b/lib/backupstore/BackupStoreFileCmbIdx.cpp
new file mode 100644
index 00000000..c8bcc3b9
--- /dev/null
+++ b/lib/backupstore/BackupStoreFileCmbIdx.cpp
@@ -0,0 +1,324 @@
+// --------------------------------------------------------------------------
+//
+// File
+//		Name:    BackupStoreFileCmbIdx.cpp
+//		Purpose: Combine indicies of a delta file and the file it's a diff from.
+//		Created: 8/7/04
+//
+// --------------------------------------------------------------------------
+
+#include "Box.h"
+
+#include <new>
+#include <string.h>
+
+#include "BackupStoreFile.h"
+#include "BackupStoreFileWire.h"
+#include "BackupStoreObjectMagic.h"
+#include "BackupStoreException.h"
+#include "BackupStoreConstants.h"
+#include "BackupStoreFilename.h"
+
+#include "MemLeakFindOn.h"
+
+// Hide from outside world
+namespace
+{
+
+class BSFCombinedIndexStream : public IOStream
+{
+public:
+	BSFCombinedIndexStream(IOStream *pDiff);
+	~BSFCombinedIndexStream();
+	
+	virtual int Read(void *pBuffer, int NBytes, int Timeout = IOStream::TimeOutInfinite);
+	virtual void Write(const void *pBuffer, int NBytes);
+	virtual bool StreamDataLeft();
+	virtual bool StreamClosed();
+	virtual void Initialise(IOStream &rFrom);
+	
+private:
+	IOStream *mpDiff;
+	bool mIsInitialised;
+	bool mHeaderWritten;
+	file_BlockIndexHeader mHeader;
+	int64_t mNumEntriesToGo;
+	int64_t mNumEntriesInFromFile;
+	int64_t *mFromBlockSizes;		// NOTE: Entries in network byte order
+};
+
+};
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    BackupStoreFile::CombineFileIndices(IOStream &, IOStream &, bool)
+//		Purpose: Given a diff file and the file it's a diff from, return a stream from which
+//				 can be read the index of the combined file, without actually combining them.
+//				 The stream of the diff must have a lifetime greater than or equal to the
+//				 lifetime of the returned stream object. The full "from" file stream
+//				 only needs to exist during the actual function call.
+//				 If you pass in dodgy files which aren't related, then you will either
+//				 get an error or bad results. So don't do that.
+//				 If DiffIsIndexOnly is true, then rDiff is assumed to be a stream positioned
+//				 at the beginning of the block index. Similarly for FromIsIndexOnly.
+//				 WARNING: Reads of the returned streams with buffer sizes less than 64 bytes
+//				 will not return any data.
+//		Created: 8/7/04
+//
+// --------------------------------------------------------------------------
+std::auto_ptr<IOStream> BackupStoreFile::CombineFileIndices(IOStream &rDiff, IOStream &rFrom, bool DiffIsIndexOnly, bool FromIsIndexOnly)
+{
+	// Reposition file pointers?
+	if(!DiffIsIndexOnly)
+	{
+		MoveStreamPositionToBlockIndex(rDiff);
+	}
+	if(!FromIsIndexOnly)
+	{
+		MoveStreamPositionToBlockIndex(rFrom);
+	}
+
+	// Create object
+	std::auto_ptr<IOStream> stream(new BSFCombinedIndexStream(&rDiff));
+
+	// Initialise it
+	((BSFCombinedIndexStream *)stream.get())->Initialise(rFrom);
+
+	// And return the stream
+	return stream;
+}
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    BSFCombinedIndexStream::BSFCombinedIndexStream()
+//		Purpose: Private class. Constructor.
+//		Created: 8/7/04
+//
+// --------------------------------------------------------------------------
+BSFCombinedIndexStream::BSFCombinedIndexStream(IOStream *pDiff)
+	: mpDiff(pDiff),
+	  mIsInitialised(false),
+	  mHeaderWritten(false),
+	  mNumEntriesToGo(0),
+	  mNumEntriesInFromFile(0),
+	  mFromBlockSizes(0)
+{
+	ASSERT(mpDiff != 0);
+}
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    BSFCombinedIndexStream::~BSFCombinedIndexStream()
+//		Purpose: Private class. Destructor.
+//		Created: 8/7/04
+//
+// --------------------------------------------------------------------------
+BSFCombinedIndexStream::~BSFCombinedIndexStream()
+{
+	if(mFromBlockSizes != 0)
+	{
+		::free(mFromBlockSizes);
+		mFromBlockSizes = 0;
+	}
+}
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    BSFCombinedIndexStream::Initialise(IOStream &)
+//		Purpose: Private class. Initalise from the streams (diff passed in constructor).
+//				 Both streams must have file pointer positioned at the block index.
+//		Created: 8/7/04
+//
+// --------------------------------------------------------------------------
+void BSFCombinedIndexStream::Initialise(IOStream &rFrom)
+{
+	// Paranoia is good.
+	if(mIsInitialised)
+	{
+		THROW_EXCEPTION(BackupStoreException, Internal)
+	}
+	
+	// Look at the diff file: Read in the header
+	if(!mpDiff->ReadFullBuffer(&mHeader, sizeof(mHeader), 0))
+	{
+		THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
+	}
+	if(ntohl(mHeader.mMagicValue) != OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1)
+	{
+		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+	}
+	
+	// Read relevant data.
+	mNumEntriesToGo = box_ntoh64(mHeader.mNumBlocks);
+	
+	// Adjust a bit to reflect the fact it's no longer a diff
+	mHeader.mOtherFileID = box_hton64(0);
+	
+	// Now look at the from file: Read header
+	file_BlockIndexHeader fromHdr;
+	if(!rFrom.ReadFullBuffer(&fromHdr, sizeof(fromHdr), 0))
+	{
+		THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
+	}
+	if(ntohl(fromHdr.mMagicValue) != OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1)
+	{
+		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+	}
+	
+	// Then... allocate memory for the list of sizes
+	mNumEntriesInFromFile = box_ntoh64(fromHdr.mNumBlocks);
+	mFromBlockSizes = (int64_t*)::malloc(mNumEntriesInFromFile * sizeof(int64_t));
+	if(mFromBlockSizes == 0)
+	{
+		throw std::bad_alloc();
+	}
+	
+	// And read them all in!
+	for(int64_t b = 0; b < mNumEntriesInFromFile; ++b)
+	{
+		file_BlockIndexEntry e;
+		if(!rFrom.ReadFullBuffer(&e, sizeof(e), 0))
+		{
+			THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
+		}
+		
+		// Check that the from file isn't a delta in itself
+		if(box_ntoh64(e.mEncodedSize) <= 0)
+		{
+			THROW_EXCEPTION(BackupStoreException, OnCombineFromFileIsIncomplete)
+		}
+
+		// Store size (in network byte order)
+		mFromBlockSizes[b] = e.mEncodedSize;
+	}
+	
+	// Flag as initialised
+	mIsInitialised = true;
+}
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    BSFCombinedIndexStream::Read(void *, int, int)
+//		Purpose: Private class. As interface.
+//		Created: 8/7/04
+//
+// --------------------------------------------------------------------------
+int BSFCombinedIndexStream::Read(void *pBuffer, int NBytes, int Timeout)
+{
+	// Paranoia is good.
+	if(!mIsInitialised || mFromBlockSizes == 0 || mpDiff == 0)
+	{
+		THROW_EXCEPTION(BackupStoreException, Internal)
+	}
+	
+	int written = 0;
+	
+	// Header output yet?
+	if(!mHeaderWritten)
+	{
+		// Enough space?
+		if(NBytes < (int)sizeof(mHeader)) return 0;
+		
+		// Copy in
+		::memcpy(pBuffer, &mHeader, sizeof(mHeader));
+		NBytes -= sizeof(mHeader);
+		written += sizeof(mHeader);
+	
+		// Flag it's done
+		mHeaderWritten = true;
+	}
+
+	// How many entries can be written?
+	int entriesToWrite = NBytes / sizeof(file_BlockIndexEntry);
+	if(entriesToWrite > mNumEntriesToGo)
+	{
+		entriesToWrite = mNumEntriesToGo;
+	}
+	
+	// Setup ready to go
+	file_BlockIndexEntry *poutput = (file_BlockIndexEntry*)(((uint8_t*)pBuffer) + written);
+
+	// Write entries
+	for(int b = 0; b < entriesToWrite; ++b)
+	{
+		if(!mpDiff->ReadFullBuffer(&(poutput[b]), sizeof(file_BlockIndexEntry), 0))
+		{
+			THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
+		}
+		
+		// Does this need adjusting?
+		int s = box_ntoh64(poutput[b].mEncodedSize);
+		if(s <= 0)
+		{
+			// A reference to a block in the from file
+			int block = 0 - s;
+			ASSERT(block >= 0);
+			if(block >= mNumEntriesInFromFile)
+			{
+				// That's not good, the block doesn't exist
+				THROW_EXCEPTION(BackupStoreException, OnCombineFromFileIsIncomplete)
+			}
+			
+			// Adjust the entry in the buffer
+			poutput[b].mEncodedSize = mFromBlockSizes[block];	// stored in network byte order, no translation necessary
+		}
+	}
+	
+	// Update written count
+	written += entriesToWrite * sizeof(file_BlockIndexEntry);
+	mNumEntriesToGo -= entriesToWrite;
+	
+	return written;
+}
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    BSFCombinedIndexStream::Write(const void *, int)
+//		Purpose: Private class. As interface.
+//		Created: 8/7/04
+//
+// --------------------------------------------------------------------------
+void BSFCombinedIndexStream::Write(const void *pBuffer, int NBytes)
+{
+	THROW_EXCEPTION(BackupStoreException, StreamDoesntHaveRequiredFeatures)
+}
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    BSFCombinedIndexStream::StreamDataLeft()
+//		Purpose: Private class. As interface
+//		Created: 8/7/04
+//
+// --------------------------------------------------------------------------
+bool BSFCombinedIndexStream::StreamDataLeft()
+{
+	return (!mHeaderWritten) || (mNumEntriesToGo > 0);
+}
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    BSFCombinedIndexStream::StreamClosed()
+//		Purpose: Private class. As interface.
+//		Created: 8/7/04
+//
+// --------------------------------------------------------------------------
+bool BSFCombinedIndexStream::StreamClosed()
+{
+	return true;	// doesn't do writing
+}
+
diff --git a/lib/backupstore/BackupStoreFileCombine.cpp b/lib/backupstore/BackupStoreFileCombine.cpp
new file mode 100644
index 00000000..baa331f0
--- /dev/null
+++ b/lib/backupstore/BackupStoreFileCombine.cpp
@@ -0,0 +1,410 @@
+// --------------------------------------------------------------------------
+//
+// File
+//		Name:    BackupStoreFileCombine.cpp
+//		Purpose: File combining for BackupStoreFile
+//		Created: 16/1/04
+//
+// --------------------------------------------------------------------------
+
+#include "Box.h"
+
+#include <new>
+
+#include "BackupStoreFile.h"
+#include "BackupStoreFileWire.h"
+#include "BackupStoreObjectMagic.h"
+#include "BackupStoreException.h"
+#include "BackupStoreConstants.h"
+#include "BackupStoreFilename.h"
+#include "FileStream.h"
+
+#include "MemLeakFindOn.h"
+
+typedef struct
+{
+	int64_t mFilePosition;
+} FromIndexEntry;
+
+static void LoadFromIndex(IOStream &rFrom, FromIndexEntry *pIndex, int64_t NumEntries);
+static void CopyData(IOStream &rDiffData, IOStream &rDiffIndex, int64_t DiffNumBlocks, IOStream &rFrom, FromIndexEntry *pFromIndex, int64_t FromNumBlocks, IOStream &rOut);
+static void WriteNewIndex(IOStream &rDiff, int64_t DiffNumBlocks, FromIndexEntry *pFromIndex, int64_t FromNumBlocks, IOStream &rOut);
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    BackupStoreFile::CombineFile(IOStream &, IOStream &, IOStream &)
+//		Purpose: Where rDiff is a store file which is incomplete as a result of a
+//				 diffing operation, rFrom is the file it is diffed from, and 
+//				 rOut is the stream in which to place the result, the old file
+//				 and new file are combined into a file containing all the data.
+//				 rDiff2 is the same file as rDiff, opened again to get two
+//				 independent streams to the same file.
+//		Created: 16/1/04
+//
+// --------------------------------------------------------------------------
+void BackupStoreFile::CombineFile(IOStream &rDiff, IOStream &rDiff2, IOStream &rFrom, IOStream &rOut)
+{
+	// Read and copy the header.
+	file_StreamFormat hdr;
+	if(!rDiff.ReadFullBuffer(&hdr, sizeof(hdr), 0))
+	{
+		THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
+	}
+	if(ntohl(hdr.mMagicValue) != OBJECTMAGIC_FILE_MAGIC_VALUE_V1)
+	{
+		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+	}
+	// Copy
+	rOut.Write(&hdr, sizeof(hdr));
+	// Copy over filename and attributes
+	// BLOCK
+	{
+		BackupStoreFilename filename;
+		filename.ReadFromStream(rDiff, IOStream::TimeOutInfinite);
+		filename.WriteToStream(rOut);
+		StreamableMemBlock attr;
+		attr.ReadFromStream(rDiff, IOStream::TimeOutInfinite);
+		attr.WriteToStream(rOut);
+	}
+	
+	// Read the header for the From file
+	file_StreamFormat fromHdr;
+	if(!rFrom.ReadFullBuffer(&fromHdr, sizeof(fromHdr), 0))
+	{
+		THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
+	}
+	if(ntohl(fromHdr.mMagicValue) != OBJECTMAGIC_FILE_MAGIC_VALUE_V1)
+	{
+		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+	}
+	// Skip over the filename and attributes of the From file
+	// BLOCK
+	{
+		BackupStoreFilename filename2;
+		filename2.ReadFromStream(rFrom, IOStream::TimeOutInfinite);
+		int32_t size_s;
+		if(!rFrom.ReadFullBuffer(&size_s, sizeof(size_s), 0 /* not interested in bytes read if this fails */))
+		{
+			THROW_EXCEPTION(CommonException, StreamableMemBlockIncompleteRead)
+		}
+		int size = ntohl(size_s);
+		// Skip forward the size
+		rFrom.Seek(size, IOStream::SeekType_Relative);		
+	}
+	
+	// Allocate memory for the block index of the From file
+	int64_t fromNumBlocks = box_ntoh64(fromHdr.mNumBlocks);
+	// NOTE: An extra entry is required so that the length of the last block can be calculated
+	FromIndexEntry *pFromIndex = (FromIndexEntry*)::malloc((fromNumBlocks+1) * sizeof(FromIndexEntry));
+	if(pFromIndex == 0)
+	{
+		throw std::bad_alloc();
+	}
+	
+	try
+	{
+		// Load the index from the From file, calculating the offsets in the
+		// file as we go along, and enforce that everything should be present.
+		LoadFromIndex(rFrom, pFromIndex, fromNumBlocks);
+		
+		// Read in the block index of the Diff file in small chunks, and output data
+		// for each block, either from this file, or the other file.
+		int64_t diffNumBlocks = box_ntoh64(hdr.mNumBlocks);
+		CopyData(rDiff /* positioned at start of data */, rDiff2, diffNumBlocks, rFrom, pFromIndex, fromNumBlocks, rOut);
+		
+		// Read in the block index again, and output the new block index, simply
+		// filling in the sizes of blocks from the old file.
+		WriteNewIndex(rDiff, diffNumBlocks, pFromIndex, fromNumBlocks, rOut);
+		
+		// Free buffers
+		::free(pFromIndex);
+		pFromIndex = 0;
+	}
+	catch(...)
+	{
+		// Clean up
+		if(pFromIndex != 0)
+		{
+			::free(pFromIndex);
+			pFromIndex = 0;
+		}	
+		throw;
+	}
+}
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    static LoadFromIndex(IOStream &, FromIndexEntry *, int64_t)
+//		Purpose: Static. Load the index from the From file
+//		Created: 16/1/04
+//
+// --------------------------------------------------------------------------
+static void LoadFromIndex(IOStream &rFrom, FromIndexEntry *pIndex, int64_t NumEntries)
+{
+	ASSERT(pIndex != 0);
+	ASSERT(NumEntries >= 0);
+
+	// Get the starting point in the file
+	int64_t filePos = rFrom.GetPosition();
+	
+	// Jump to the end of the file to read the index
+	rFrom.Seek(0 - ((NumEntries * sizeof(file_BlockIndexEntry)) + sizeof(file_BlockIndexHeader)), IOStream::SeekType_End);
+	
+	// Read block index header
+	file_BlockIndexHeader blkhdr;
+	if(!rFrom.ReadFullBuffer(&blkhdr, sizeof(blkhdr), 0))
+	{
+		THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
+	}
+	if(ntohl(blkhdr.mMagicValue) != OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1
+		|| (int64_t)box_ntoh64(blkhdr.mNumBlocks) != NumEntries)
+	{
+		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+	}
+	
+	// And then the block entries
+	for(int64_t b = 0; b < NumEntries; ++b)
+	{
+		// Read
+		file_BlockIndexEntry en;
+		if(!rFrom.ReadFullBuffer(&en, sizeof(en), 0))
+		{
+			THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
+		}
+		
+		// Add to list
+		pIndex[b].mFilePosition = filePos;
+
+		// Encoded size?
+		int64_t encodedSize = box_ntoh64(en.mEncodedSize);
+		// Check that the block is actually there
+		if(encodedSize <= 0)
+		{
+			THROW_EXCEPTION(BackupStoreException, OnCombineFromFileIsIncomplete)
+		}
+
+		// Move file pointer on
+		filePos += encodedSize;
+	}
+	
+	// Store the position in the very last entry, so the size of the last entry can be calculated
+	pIndex[NumEntries].mFilePosition = filePos;
+}
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    static CopyData(IOStream &, IOStream &, int64_t, IOStream &, FromIndexEntry *, int64_t, IOStream &)
+//		Purpose: Static. Copy data from the Diff and From file to the out file.
+//				 rDiffData is at beginning of data.
+//				 rDiffIndex at any position.
+//				 rFrom is at any position.
+//				 rOut is after the header, ready for data
+//		Created: 16/1/04
+//
+// --------------------------------------------------------------------------
+static void CopyData(IOStream &rDiffData, IOStream &rDiffIndex, int64_t DiffNumBlocks,
+	IOStream &rFrom, FromIndexEntry *pFromIndex, int64_t FromNumBlocks, IOStream &rOut)
+{
+	// Jump to the end of the diff file to read the index
+	rDiffIndex.Seek(0 - ((DiffNumBlocks * sizeof(file_BlockIndexEntry)) + sizeof(file_BlockIndexHeader)), IOStream::SeekType_End);
+	
+	// Read block index header
+	file_BlockIndexHeader diffBlkhdr;
+	if(!rDiffIndex.ReadFullBuffer(&diffBlkhdr, sizeof(diffBlkhdr), 0))
+	{
+		THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
+	}
+	if(ntohl(diffBlkhdr.mMagicValue) != OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1
+		|| (int64_t)box_ntoh64(diffBlkhdr.mNumBlocks) != DiffNumBlocks)
+	{
+		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+	}
+	
+	// Record where the From file is
+	int64_t fromPos = rFrom.GetPosition();
+	
+	// Buffer data
+	void *buffer = 0;
+	int bufferSize = 0;
+	
+	try
+	{
+		// Read the blocks in!
+		for(int64_t b = 0; b < DiffNumBlocks; ++b)
+		{
+			// Read
+			file_BlockIndexEntry en;
+			if(!rDiffIndex.ReadFullBuffer(&en, sizeof(en), 0))
+			{
+				THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
+			}
+			
+			// What's the size value stored in the entry
+			int64_t encodedSize = box_ntoh64(en.mEncodedSize);
+			
+			// How much data will be read?
+			int32_t blockSize = 0;
+			if(encodedSize > 0)
+			{
+				// The block is actually in the diff file
+				blockSize = encodedSize;
+			}
+			else
+			{
+				// It's in the from file. First, check to see if it's valid
+				int64_t blockIdx = (0 - encodedSize);
+				if(blockIdx > FromNumBlocks)
+				{
+					// References a block which doesn't actually exist
+					THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+				}
+				// Calculate size. This operation is safe because of the extra entry at the end
+				blockSize = pFromIndex[blockIdx + 1].mFilePosition - pFromIndex[blockIdx].mFilePosition;
+			}
+			ASSERT(blockSize > 0);
+			
+			// Make sure there's memory available to copy this
+			if(bufferSize < blockSize || buffer == 0)
+			{
+				// Free old block
+				if(buffer != 0)
+				{
+					::free(buffer);
+					buffer = 0;
+					bufferSize = 0;
+				}
+				// Allocate new block
+				buffer = ::malloc(blockSize);
+				if(buffer == 0)
+				{
+					throw std::bad_alloc();
+				}
+				bufferSize = blockSize;
+			}
+			ASSERT(bufferSize >= blockSize);
+			
+			// Load in data from one of the files
+			if(encodedSize > 0)
+			{
+				// Load from diff file
+				if(!rDiffData.ReadFullBuffer(buffer, blockSize, 0))
+				{
+					THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
+				}				
+			}
+			else
+			{
+				// Locate and read the data from the from file
+				int64_t blockIdx = (0 - encodedSize);
+				// Seek if necessary
+				if(fromPos != pFromIndex[blockIdx].mFilePosition)
+				{
+					rFrom.Seek(pFromIndex[blockIdx].mFilePosition, IOStream::SeekType_Absolute);
+					fromPos = pFromIndex[blockIdx].mFilePosition;
+				}
+				// Read
+				if(!rFrom.ReadFullBuffer(buffer, blockSize, 0))
+				{
+					THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
+				}
+				
+				// Update fromPos to current position
+				fromPos += blockSize;
+			}
+			
+			// Write data to out file
+			rOut.Write(buffer, blockSize);
+		}
+		
+		// Free buffer, if allocated
+		if(buffer != 0)
+		{
+			::free(buffer);
+			buffer = 0;
+		}
+	}
+	catch(...)
+	{
+		if(buffer != 0)
+		{
+			::free(buffer);
+			buffer = 0;
+		}
+		throw;
+	}
+}
+
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    static WriteNewIndex(IOStream &, int64_t, FromIndexEntry *, int64_t, IOStream &)
+//		Purpose: Write the index to the out file, just copying from the diff file and
+//				 adjusting the entries.
+//		Created: 16/1/04
+//
+// --------------------------------------------------------------------------
+static void WriteNewIndex(IOStream &rDiff, int64_t DiffNumBlocks, FromIndexEntry *pFromIndex, int64_t FromNumBlocks, IOStream &rOut)
+{
+	// Jump to the end of the diff file to read the index
+	rDiff.Seek(0 - ((DiffNumBlocks * sizeof(file_BlockIndexEntry)) + sizeof(file_BlockIndexHeader)), IOStream::SeekType_End);
+	
+	// Read block index header
+	file_BlockIndexHeader diffBlkhdr;
+	if(!rDiff.ReadFullBuffer(&diffBlkhdr, sizeof(diffBlkhdr), 0))
+	{
+		THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
+	}
+	if(ntohl(diffBlkhdr.mMagicValue) != OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1
+		|| (int64_t)box_ntoh64(diffBlkhdr.mNumBlocks) != DiffNumBlocks)
+	{
+		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+	}
+	
+	// Write it out with a blanked out other file ID
+	diffBlkhdr.mOtherFileID = box_hton64(0);
+	rOut.Write(&diffBlkhdr, sizeof(diffBlkhdr));
+	
+	// Rewrite the index
+	for(int64_t b = 0; b < DiffNumBlocks; ++b)
+	{
+		file_BlockIndexEntry en;
+		if(!rDiff.ReadFullBuffer(&en, sizeof(en), 0))
+		{
+			THROW_EXCEPTION(BackupStoreException, FailedToReadBlockOnCombine)
+		}
+		
+		// What's the size value stored in the entry
+		int64_t encodedSize = box_ntoh64(en.mEncodedSize);
+		
+		// Need to adjust it?
+		if(encodedSize <= 0)
+		{
+			// This actually refers to a block in the from file. So rewrite this.
+			int64_t blockIdx = (0 - encodedSize);
+			if(blockIdx > FromNumBlocks)
+			{
+				// References a block which doesn't actually exist
+				THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+			}
+			// Calculate size. This operation is safe because of the extra entry at the end
+			int32_t blockSize = pFromIndex[blockIdx + 1].mFilePosition - pFromIndex[blockIdx].mFilePosition;
+			// Then replace entry
+			en.mEncodedSize = box_hton64(((uint64_t)blockSize));
+		}
+		
+		// Write entry
+		rOut.Write(&en, sizeof(en));
+	}
+}
+
+
+
+
+
diff --git a/lib/backupstore/BackupStoreFileDiff.cpp b/lib/backupstore/BackupStoreFileDiff.cpp
new file mode 100644
index 00000000..5705c3aa
--- /dev/null
+++ b/lib/backupstore/BackupStoreFileDiff.cpp
@@ -0,0 +1,1046 @@
+// --------------------------------------------------------------------------
+//
+// File
+//		Name:    BackupStoreFileDiff.cpp
+//		Purpose: Functions relating to diffing BackupStoreFiles
+//		Created: 12/1/04
+//
+// --------------------------------------------------------------------------
+
+#include "Box.h"
+
+#include <string.h>
+
+#include <new>
+#include <map>
+
+#ifdef HAVE_TIME_H
+	#include <time.h>
+#elif HAVE_SYS_TIME_H
+	#include <sys/time.h>
+#endif
+
+#include "BackupStoreConstants.h"
+#include "BackupStoreException.h"
+#include "BackupStoreFile.h"
+#include "BackupStoreFileCryptVar.h"
+#include "BackupStoreFileEncodeStream.h"
+#include "BackupStoreFileWire.h"
+#include "BackupStoreObjectMagic.h"
+#include "CommonException.h"
+#include "FileStream.h"
+#include "MD5Digest.h"
+#include "RollingChecksum.h"
+#include "Timer.h"
+
+#include "MemLeakFindOn.h"
+
+#include <cstring>
+
+using namespace BackupStoreFileCryptVar;
+using namespace BackupStoreFileCreation;
+
+// By default, don't trace out details of the diff as we go along -- would fill up logs significantly.
+// But it's useful for the test.
+#ifndef BOX_RELEASE_BUILD
+	bool BackupStoreFile::TraceDetailsOfDiffProcess = false;
+#endif
+
+static void LoadIndex(IOStream &rBlockIndex, int64_t ThisID, BlocksAvailableEntry **ppIndex, int64_t &rNumBlocksOut, int Timeout, bool &rCanDiffFromThis);
+static void FindMostUsedSizes(BlocksAvailableEntry *pIndex, int64_t NumBlocks, int32_t Sizes[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES]);
+static void SearchForMatchingBlocks(IOStream &rFile, 
+	std::map<int64_t, int64_t> &rFoundBlocks, BlocksAvailableEntry *pIndex, 
+	int64_t NumBlocks, int32_t Sizes[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES],
+	DiffTimer *pDiffTimer);
+static void SetupHashTable(BlocksAvailableEntry *pIndex, int64_t NumBlocks, int32_t BlockSize, BlocksAvailableEntry **pHashTable);
+static bool SecondStageMatch(BlocksAvailableEntry *pFirstInHashList, RollingChecksum &fastSum, uint8_t *pBeginnings, uint8_t *pEndings, int Offset, int32_t BlockSize, int64_t FileBlockNumber,
+BlocksAvailableEntry *pIndex, std::map<int64_t, int64_t> &rFoundBlocks);
+static void GenerateRecipe(BackupStoreFileEncodeStream::Recipe &rRecipe, BlocksAvailableEntry *pIndex, int64_t NumBlocks, std::map<int64_t, int64_t> &rFoundBlocks, int64_t SizeOfInputFile);
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    BackupStoreFile::MoveStreamPositionToBlockIndex(IOStream &)
+//		Purpose: Move the file pointer in this stream to just before the block index.
+//				 Assumes that the stream is at the beginning, seekable, and
+//				 reading from the stream is OK.
+//		Created: 12/1/04
+//
+// --------------------------------------------------------------------------
+void BackupStoreFile::MoveStreamPositionToBlockIndex(IOStream &rStream)
+{
+	// Size of file
+	int64_t fileSize = rStream.BytesLeftToRead();
+
+	// Get header
+	file_StreamFormat hdr;
+
+	// Read the header
+	if(!rStream.ReadFullBuffer(&hdr, sizeof(hdr), 0 /* not interested in bytes read if this fails */, IOStream::TimeOutInfinite))
+	{
+		// Couldn't read header
+		THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
+	}
+
+	// Check magic number
+	if(ntohl(hdr.mMagicValue) != OBJECTMAGIC_FILE_MAGIC_VALUE_V1
+#ifndef BOX_DISABLE_BACKWARDS_COMPATIBILITY_BACKUPSTOREFILE
+		&& ntohl(hdr.mMagicValue) != OBJECTMAGIC_FILE_MAGIC_VALUE_V0
+#endif
+		)
+	{
+		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+	}
+	
+	// Work out where the index is
+	int64_t numBlocks = box_ntoh64(hdr.mNumBlocks);
+	int64_t blockHeaderPosFromEnd = ((numBlocks * sizeof(file_BlockIndexEntry)) + sizeof(file_BlockIndexHeader));
+	
+	// Sanity check
+	if(blockHeaderPosFromEnd > static_cast<int64_t>(fileSize - sizeof(file_StreamFormat)))
+	{
+		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+	}
+	
+	// Seek to that position
+	rStream.Seek(0 - blockHeaderPosFromEnd, IOStream::SeekType_End);
+	
+	// Done. Stream now in right position (as long as the file is formatted correctly)
+}
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    BackupStoreFile::EncodeFileDiff(const char *, int64_t, const BackupStoreFilename &, int64_t, IOStream &, int64_t *)
+//		Purpose: Similar to EncodeFile, but takes the object ID of the file it's
+//				 diffing from, and the index of the blocks in a stream. It'll then
+//				 calculate which blocks can be reused from that old file.
+//				 The timeout is the timeout value for reading the diff block index.
+//				 If pIsCompletelyDifferent != 0, it will be set to true if the
+//				 the two files are completely different (do not share any block), false otherwise.
+//				 
+//		Created: 12/1/04
+//
+// --------------------------------------------------------------------------
+std::auto_ptr<IOStream> BackupStoreFile::EncodeFileDiff
+(
+	const char *Filename, int64_t ContainerID,
+	const BackupStoreFilename &rStoreFilename, int64_t DiffFromObjectID, 
+	IOStream &rDiffFromBlockIndex, int Timeout, DiffTimer *pDiffTimer, 
+	int64_t *pModificationTime, bool *pIsCompletelyDifferent)
+{
+	// Is it a symlink?
+	{
+		EMU_STRUCT_STAT st;
+		if(EMU_LSTAT(Filename, &st) != 0)
+		{
+			THROW_EXCEPTION(CommonException, OSFileError)
+		}
+		if((st.st_mode & S_IFLNK) == S_IFLNK)
+		{
+			// Don't do diffs for symlinks
+			if(pIsCompletelyDifferent != 0)
+			{
+				*pIsCompletelyDifferent = true;
+			}
+			return EncodeFile(Filename, ContainerID, rStoreFilename, pModificationTime);
+		}
+	}
+
+	// Load in the blocks
+	BlocksAvailableEntry *pindex = 0;
+	int64_t blocksInIndex = 0;
+	bool canDiffFromThis = false;
+	LoadIndex(rDiffFromBlockIndex, DiffFromObjectID, &pindex, blocksInIndex, Timeout, canDiffFromThis);
+	// BOX_TRACE("Diff: Blocks in index: " << blocksInIndex);
+	
+	if(!canDiffFromThis)
+	{
+		// Don't do diffing...
+		if(pIsCompletelyDifferent != 0)
+		{
+			*pIsCompletelyDifferent = true;
+		}
+		return EncodeFile(Filename, ContainerID, rStoreFilename, pModificationTime);
+	}
+	
+	// Pointer to recipe we're going to create
+	BackupStoreFileEncodeStream::Recipe *precipe = 0;
+	
+	try
+	{
+		// Find which sizes should be scanned
+		int32_t sizesToScan[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES];
+		FindMostUsedSizes(pindex, blocksInIndex, sizesToScan);
+		
+		// Flag for reporting to the user
+		bool completelyDifferent;
+			
+		// BLOCK
+		{
+			// Search the file to find matching blocks
+			std::map<int64_t, int64_t> foundBlocks; // map of offset in file to index in block index
+			int64_t sizeOfInputFile = 0;
+			// BLOCK
+			{
+				FileStream file(Filename);
+				// Get size of file
+				sizeOfInputFile = file.BytesLeftToRead();
+				// Find all those lovely matching blocks
+				SearchForMatchingBlocks(file, foundBlocks, pindex, 
+					blocksInIndex, sizesToScan, pDiffTimer);
+				
+				// Is it completely different?
+				completelyDifferent = (foundBlocks.size() == 0);
+			}
+			
+			// Create a recipe -- if the two files are completely different, don't put the from file ID in the recipe.
+			precipe = new BackupStoreFileEncodeStream::Recipe(pindex, blocksInIndex, completelyDifferent?(0):(DiffFromObjectID));
+			BlocksAvailableEntry *pindexKeptRef = pindex;	// we need this later, but must set pindex == 0 now, because of exceptions
+			pindex = 0;		// Recipe now has ownership
+			
+			// Fill it in
+			GenerateRecipe(*precipe, pindexKeptRef, blocksInIndex, foundBlocks, sizeOfInputFile);
+		}
+		// foundBlocks no longer required
+		
+		// Create the stream
+		std::auto_ptr<IOStream> stream(new BackupStoreFileEncodeStream);
+	
+		// Do the initial setup
+		((BackupStoreFileEncodeStream*)stream.get())->Setup(Filename, precipe, ContainerID, rStoreFilename, pModificationTime);
+		precipe = 0;	// Stream has taken ownership of this
+		
+		// Tell user about completely different status?
+		if(pIsCompletelyDifferent != 0)
+		{
+			*pIsCompletelyDifferent = completelyDifferent;
+		}
+	
+		// Return the stream for the caller
+		return stream;
+	}
+	catch(...)
+	{
+		// cleanup
+		if(pindex != 0)
+		{
+			::free(pindex);
+			pindex = 0;
+		}
+		if(precipe != 0)
+		{
+			delete precipe;
+			precipe = 0;
+		}
+		throw;
+	}
+}
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    static LoadIndex(IOStream &, int64_t, BlocksAvailableEntry **, int64_t, bool &)
+//		Purpose: Read in an index, and decrypt, and store in the in memory block format.
+//				 rCanDiffFromThis is set to false if the version of the from file is too old.
+//		Created: 12/1/04
+//
+// --------------------------------------------------------------------------
+static void LoadIndex(IOStream &rBlockIndex, int64_t ThisID, BlocksAvailableEntry **ppIndex, int64_t &rNumBlocksOut, int Timeout, bool &rCanDiffFromThis)
+{
+	// Reset
+	rNumBlocksOut = 0;
+	rCanDiffFromThis = false;
+	
+	// Read header
+	file_BlockIndexHeader hdr;
+	if(!rBlockIndex.ReadFullBuffer(&hdr, sizeof(hdr), 0 /* not interested in bytes read if this fails */, Timeout))
+	{
+		// Couldn't read header
+		THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
+	}
+
+#ifndef BOX_DISABLE_BACKWARDS_COMPATIBILITY_BACKUPSTOREFILE
+	// Check against backwards comptaibility stuff
+	if(hdr.mMagicValue == (int32_t)htonl(OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V0))
+	{
+		// Won't diff against old version
+		
+		// Absorb rest of stream
+		char buffer[2048];
+		while(rBlockIndex.StreamDataLeft())
+		{
+			rBlockIndex.Read(buffer, sizeof(buffer), 1000 /* 1 sec timeout */);
+		}
+		
+		// Tell caller
+		rCanDiffFromThis = false;
+		return;
+	}
+#endif
+
+	// Check magic
+	if(hdr.mMagicValue != (int32_t)htonl(OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1))
+	{
+		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+	}
+	
+	// Check that we're not trying to diff against a file which references blocks from another file
+	if(((int64_t)box_ntoh64(hdr.mOtherFileID)) != 0)
+	{
+		THROW_EXCEPTION(BackupStoreException, CannotDiffAnIncompleteStoreFile)
+	}
+
+	// Mark as an acceptable diff.
+	rCanDiffFromThis = true;
+
+	// Get basic information
+	int64_t numBlocks = box_ntoh64(hdr.mNumBlocks);
+	uint64_t entryIVBase = box_ntoh64(hdr.mEntryIVBase);
+	
+	//TODO: Verify that these sizes look reasonable
+	
+	// Allocate space for the index
+	BlocksAvailableEntry *pindex = (BlocksAvailableEntry*)::malloc(sizeof(BlocksAvailableEntry) * numBlocks);
+	if(pindex == 0)
+	{
+		throw std::bad_alloc();
+	}
+	
+	try
+	{	
+		for(int64_t b = 0; b < numBlocks; ++b)
+		{
+			// Read an entry from the stream
+			file_BlockIndexEntry entry;
+			if(!rBlockIndex.ReadFullBuffer(&entry, sizeof(entry), 0 /* not interested in bytes read if this fails */, Timeout))
+			{
+				// Couldn't read entry
+				THROW_EXCEPTION(BackupStoreException, CouldntReadEntireStructureFromStream)
+			}	
+		
+			// Calculate IV for this entry
+			uint64_t iv = entryIVBase;
+			iv += b;
+			// Network byte order
+			iv = box_hton64(iv);
+			sBlowfishDecryptBlockEntry.SetIV(&iv);			
+			
+			// Decrypt the encrypted section
+			file_BlockIndexEntryEnc entryEnc;
+			int sectionSize = sBlowfishDecryptBlockEntry.TransformBlock(&entryEnc, sizeof(entryEnc),
+					entry.mEnEnc, sizeof(entry.mEnEnc));
+			if(sectionSize != sizeof(entryEnc))
+			{
+				THROW_EXCEPTION(BackupStoreException, BlockEntryEncodingDidntGiveExpectedLength)
+			}
+
+			// Check that we're not trying to diff against a file which references blocks from another file
+			if(((int64_t)box_ntoh64(entry.mEncodedSize)) <= 0)
+			{
+				THROW_EXCEPTION(BackupStoreException, CannotDiffAnIncompleteStoreFile)
+			}
+			
+			// Store all the required information
+			pindex[b].mpNextInHashList = 0;	// hash list not set up yet
+			pindex[b].mSize = ntohl(entryEnc.mSize);
+			pindex[b].mWeakChecksum = ntohl(entryEnc.mWeakChecksum);
+			::memcpy(pindex[b].mStrongChecksum, entryEnc.mStrongChecksum, sizeof(pindex[b].mStrongChecksum));
+		}
+		
+		// Store index pointer for called
+		ASSERT(ppIndex != 0);
+		*ppIndex = pindex;
+
+		// Store number of blocks for caller
+		rNumBlocksOut = numBlocks;	
+
+	}
+	catch(...)
+	{
+		// clean up and send the exception along its way
+		::free(pindex);
+		throw;
+	}
+}
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    static FindMostUsedSizes(BlocksAvailableEntry *, int64_t, int32_t[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES])
+//		Purpose: Finds the most commonly used block sizes in the index
+//		Created: 12/1/04
+//
+// --------------------------------------------------------------------------
+static void FindMostUsedSizes(BlocksAvailableEntry *pIndex, int64_t NumBlocks, int32_t Sizes[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES])
+{
+	// Array for lengths
+	int64_t sizeCounts[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES];
+
+	// Set arrays to lots of zeros (= unused entries)
+	for(int l = 0; l < BACKUP_FILE_DIFF_MAX_BLOCK_SIZES; ++l)
+	{
+		Sizes[l] = 0;
+		sizeCounts[l] = 0;
+	}
+
+	// Array for collecting sizes
+	std::map<int32_t, int64_t> foundSizes;
+	
+	// Run through blocks and make a count of the entries
+	for(int64_t b = 0; b < NumBlocks; ++b)
+	{
+		// Only if the block size is bigger than the minimum size we'll scan for
+		if(pIndex[b].mSize > BACKUP_FILE_DIFF_MIN_BLOCK_SIZE)
+		{
+			// Find entry?
+			std::map<int32_t, int64_t>::const_iterator f(foundSizes.find(pIndex[b].mSize));
+			if(f != foundSizes.end())
+			{
+				// Increment existing entry
+				foundSizes[pIndex[b].mSize] = foundSizes[pIndex[b].mSize] + 1;
+			}
+			else
+			{
+				// New entry
+				foundSizes[pIndex[b].mSize] = 1;
+			}
+		}
+	}
+	
+	// Make the block sizes
+	for(std::map<int32_t, int64_t>::const_iterator i(foundSizes.begin()); i != foundSizes.end(); ++i)
+	{
+		// Find the position of the size in the array
+		for(int t = 0; t < BACKUP_FILE_DIFF_MAX_BLOCK_SIZES; ++t)
+		{
+			// Instead of sorting on the raw count of blocks,
+			// take the file area covered by this block size.
+			if(i->second * i->first > sizeCounts[t] * Sizes[t])
+			{
+				// Then this size belong before this entry -- shuffle them up
+				for(int s = (BACKUP_FILE_DIFF_MAX_BLOCK_SIZES - 1); s >= t; --s)
+				{
+					Sizes[s] = Sizes[s-1];
+					sizeCounts[s] = sizeCounts[s-1];
+				}
+				
+				// Insert this size
+				Sizes[t] = i->first;
+				sizeCounts[t] = i->second;
+				
+				// Shouldn't do any more searching
+				break;
+			}
+		}
+	}
+	
+	// trace the size table in debug builds
+#ifndef BOX_RELEASE_BUILD
+	if(BackupStoreFile::TraceDetailsOfDiffProcess)
+	{
+		for(int t = 0; t < BACKUP_FILE_DIFF_MAX_BLOCK_SIZES; ++t)
+		{
+			BOX_TRACE("Diff block size " << t << ": " <<
+				Sizes[t] << " (count = " << 
+				sizeCounts[t] << ")");
+		}
+	}
+#endif
+}
+
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    static SearchForMatchingBlocks(IOStream &, std::map<int64_t, int64_t> &, BlocksAvailableEntry *, int64_t, int32_t[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES])
+//		Purpose: Find the matching blocks within the file.
+//		Created: 12/1/04
+//
+// --------------------------------------------------------------------------
+static void SearchForMatchingBlocks(IOStream &rFile, std::map<int64_t, int64_t> &rFoundBlocks,
+	BlocksAvailableEntry *pIndex, int64_t NumBlocks, 
+	int32_t Sizes[BACKUP_FILE_DIFF_MAX_BLOCK_SIZES], DiffTimer *pDiffTimer)
+{
+	Timer maximumDiffingTime(0, "MaximumDiffingTime");
+
+	if(pDiffTimer && pDiffTimer->IsManaged())
+	{
+		maximumDiffingTime = Timer(pDiffTimer->GetMaximumDiffingTime(),
+			"MaximumDiffingTime");
+	}
+	
+	std::map<int64_t, int32_t> goodnessOfFit;
+
+	// Allocate the hash lookup table
+	BlocksAvailableEntry **phashTable = (BlocksAvailableEntry **)::malloc(sizeof(BlocksAvailableEntry *) * (64*1024));
+
+	// Choose a size for the buffer, just a little bit more than the maximum block size
+	int32_t bufSize = Sizes[0];
+	for(int z = 1; z < BACKUP_FILE_DIFF_MAX_BLOCK_SIZES; ++z)
+	{
+		if(Sizes[z] > bufSize) bufSize = Sizes[z];
+	}
+	bufSize += 4;
+	ASSERT(bufSize > Sizes[0]);
+	ASSERT(bufSize > 0);
+	if(bufSize > (BACKUP_FILE_MAX_BLOCK_SIZE + 1024))
+	{
+		THROW_EXCEPTION(BackupStoreException, BadBackupStoreFile)
+	}
+	
+	// TODO: Because we read in the file a scanned block size at a time,
+	// it is likely to be inefficient. Probably will be much better to
+	// calculate checksums for all block sizes in a single pass.
+
+	// Allocate the buffers.
+	uint8_t *pbuffer0 = (uint8_t *)::malloc(bufSize);
+	uint8_t *pbuffer1 = (uint8_t *)::malloc(bufSize);
+	try
+	{
+		// Check buffer allocation
+		if(pbuffer0 == 0 || pbuffer1 == 0 || phashTable == 0)
+		{
+			// If a buffer got allocated, it will be cleaned up in the catch block
+			throw std::bad_alloc();
+		}
+		
+		// Flag to abort the run, if too many blocks are found -- avoid using
+		// huge amounts of processor time when files contain many similar blocks.
+		bool abortSearch = false;
+		
+		// Search for each block size in turn
+		// NOTE: Do the smallest size first, so that the scheme for adding
+		// entries in the found list works as expected and replaces smallers block
+		// with larger blocks when it finds matches at the same offset in the file.
+		for(int s = BACKUP_FILE_DIFF_MAX_BLOCK_SIZES - 1; s >= 0; --s)
+		{
+			ASSERT(Sizes[s] <= bufSize);
+			BOX_TRACE("Diff pass " << s << ", for block size " <<
+				Sizes[s]);
+			
+			// Check we haven't finished
+			if(Sizes[s] == 0)
+			{
+				// empty entry, try next size
+				continue;
+			}
+			
+			// Set up the hash table entries
+			SetupHashTable(pIndex, NumBlocks, Sizes[s], phashTable);
+		
+			// Shift file position to beginning
+			rFile.Seek(0, IOStream::SeekType_Absolute);
+			
+			// Read first block
+			if(rFile.Read(pbuffer0, Sizes[s]) != Sizes[s])
+			{
+				// Size of file too short to match -- do next size
+				continue;
+			}
+			
+			// Setup block pointers
+			uint8_t *beginnings = pbuffer0;
+			uint8_t *endings = pbuffer1;
+			int offset = 0;
+			
+			// Calculate the first checksum, ready for rolling
+			RollingChecksum rolling(beginnings, Sizes[s]);
+			
+			// Then roll, until the file is exhausted
+			int64_t fileBlockNumber = 0;
+			int64_t fileOffset = 0;
+			int rollOverInitialBytes = 0;
+			while(true)
+			{
+				if(maximumDiffingTime.HasExpired())
+				{
+					ASSERT(pDiffTimer != NULL);
+					BOX_INFO("MaximumDiffingTime reached - "
+						"suspending file diff");
+					abortSearch = true;
+					break;
+				}
+				
+				if(pDiffTimer)
+				{
+					pDiffTimer->DoKeepAlive();
+				}
+				
+				// Load in another block of data, and record how big it is
+				int bytesInEndings = rFile.Read(endings, Sizes[s]);
+				int tmp;
+
+				// Skip any bytes from a previous matched block
+				if(rollOverInitialBytes > 0 && offset < bytesInEndings)
+				{
+					int spaceLeft = bytesInEndings - offset;
+					int thisRoll = (rollOverInitialBytes > spaceLeft) ? spaceLeft : rollOverInitialBytes;
+
+					rolling.RollForwardSeveral(beginnings+offset, endings+offset, Sizes[s], thisRoll);
+
+					offset += thisRoll;
+					fileOffset += thisRoll;
+					rollOverInitialBytes -= thisRoll;
+
+					if(rollOverInitialBytes)
+					{
+						goto refresh;
+					}
+				}
+
+				if(goodnessOfFit.count(fileOffset))
+				{
+					tmp = goodnessOfFit[fileOffset];
+				}
+				else
+				{
+					tmp = 0;
+				}
+
+				if(tmp >= Sizes[s])
+				{
+					// Skip over bigger ready-matched blocks completely
+					rollOverInitialBytes = tmp;
+					int spaceLeft = bytesInEndings - offset;
+					int thisRoll = (rollOverInitialBytes > spaceLeft) ? spaceLeft : rollOverInitialBytes;
+
+					rolling.RollForwardSeveral(beginnings+offset, endings+offset, Sizes[s], thisRoll);
+
+					offset += thisRoll;
+					fileOffset += thisRoll;
+					rollOverInitialBytes -= thisRoll;
+
+					if(rollOverInitialBytes)
+					{
+						goto refresh;
+					}
+				}
+
+				while(offset < bytesInEndings)
+				{
+					// Is current checksum in hash list?
+					uint16_t hash = rolling.GetComponentForHashing();
+					if(phashTable[hash] != 0 && (goodnessOfFit.count(fileOffset) == 0 || goodnessOfFit[fileOffset] < Sizes[s]))
+					{
+						if(SecondStageMatch(phashTable[hash], rolling, beginnings, endings, offset, Sizes[s], fileBlockNumber, pIndex, rFoundBlocks))
+						{
+							BOX_TRACE("Found block match for " << hash << " of " << Sizes[s] << " bytes at offset " << fileOffset);
+							goodnessOfFit[fileOffset] = Sizes[s];
+
+							// Block matched, roll the checksum forward to the next block without doing
+							// any more comparisons, because these are pointless (as any more matches will be ignored when
+							// the recipe is generated) and just take up valuable processor time. Edge cases are
+							// especially nasty, using huge amounts of time and memory.
+							int skip = Sizes[s];
+							if(offset < bytesInEndings && skip > 0)
+							{
+								int spaceLeft = bytesInEndings - offset;
+								int thisRoll = (skip > spaceLeft) ? spaceLeft : skip;
+
+								rolling.RollForwardSeveral(beginnings+offset, endings+offset, Sizes[s], thisRoll);
+
+								offset += thisRoll;
+								fileOffset += thisRoll;
+								skip -= thisRoll;
+							}
+							// Not all the bytes necessary will have been skipped, so get them
+							// skipped after the next block is loaded.
+							rollOverInitialBytes = skip;
+							
+							// End this loop, so the final byte isn't used again
+							break;
+						}
+						else
+						{
+							BOX_TRACE("False alarm match for " << hash << " of " << Sizes[s] << " bytes at offset " << fileOffset);
+						}
+
+						int64_t NumBlocksFound = static_cast<int64_t>(
+							rFoundBlocks.size());
+						int64_t MaxBlocksFound = NumBlocks * 
+							BACKUP_FILE_DIFF_MAX_BLOCK_FIND_MULTIPLE;
+						
+						if(NumBlocksFound > MaxBlocksFound)
+						{
+							abortSearch = true;
+							break;
+						}
+					}
+					
+					// Roll checksum forward
+					rolling.RollForward(beginnings[offset], endings[offset], Sizes[s]);
+				
+					// Increment offsets
+					++offset;
+					++fileOffset;
+				}
+				
+				if(abortSearch) break;
+				
+			refresh:
+				// Finished?
+				if(bytesInEndings != Sizes[s])
+				{
+					// No more data in file -- check the final block
+					// (Do a copy and paste of 5 lines of code instead of introducing a comparison for
+					// each byte of the file)
+					uint16_t hash = rolling.GetComponentForHashing();
+					if(phashTable[hash] != 0 && (goodnessOfFit.count(fileOffset) == 0 || goodnessOfFit[fileOffset] < Sizes[s]))
+					{
+						if(SecondStageMatch(phashTable[hash], rolling, beginnings, endings, offset, Sizes[s], fileBlockNumber, pIndex, rFoundBlocks))
+						{
+							goodnessOfFit[fileOffset] = Sizes[s];
+						}
+					}
+
+					// finish
+					break;
+				}
+				
+				// Switch buffers, reset offset
+				beginnings = endings;
+				endings = (beginnings == pbuffer0)?(pbuffer1):(pbuffer0);	// ie the other buffer
+				offset = 0;
+
+				// And count the blocks which have been done
+				++fileBlockNumber;
+			}
+
+			if(abortSearch) break;
+		}
+		
+		// Free buffers and hash table
+		::free(pbuffer1);
+		pbuffer1 = 0;
+		::free(pbuffer0);
+		pbuffer0 = 0;
+		::free(phashTable);
+		phashTable = 0;
+	}
+	catch(...)
+	{
+		// Cleanup and throw
+		if(pbuffer1 != 0) ::free(pbuffer1);
+		if(pbuffer0 != 0) ::free(pbuffer0);
+		if(phashTable != 0) ::free(phashTable);
+		throw;
+	}
+	
+#ifndef BOX_RELEASE_BUILD
+	if(BackupStoreFile::TraceDetailsOfDiffProcess)
+	{
+		// Trace out the found blocks in debug mode
+		BOX_TRACE("Diff: list of found blocks");
+		BOX_TRACE("======== ======== ======== ========");
+		BOX_TRACE("  Offset   BlkIdx     Size Movement");
+		for(std::map<int64_t, int64_t>::const_iterator i(rFoundBlocks.begin()); i != rFoundBlocks.end(); ++i)
+		{
+			int64_t orgLoc = 0;
+			for(int64_t b = 0; b < i->second; ++b)
+			{
+				orgLoc += pIndex[b].mSize;
+			}
+			BOX_TRACE(std::setw(8) << i->first << " " <<
+				std::setw(8) << i->second << " " <<
+				std::setw(8) << pIndex[i->second].mSize << 
+				" " << 
+				std::setw(8) << (i->first - orgLoc));
+		}
+		BOX_TRACE("======== ======== ======== ========");
+	}
+#endif
+}
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    static SetupHashTable(BlocksAvailableEntry *, int64_t, in32_t, BlocksAvailableEntry **)
+//		Purpose: Set up the hash table ready for a scan
+//		Created: 14/1/04
+//
+// --------------------------------------------------------------------------
+static void SetupHashTable(BlocksAvailableEntry *pIndex, int64_t NumBlocks, int32_t BlockSize, BlocksAvailableEntry **pHashTable)
+{
+	// Set all entries in the hash table to zero
+	::memset(pHashTable, 0, (sizeof(BlocksAvailableEntry *) * (64*1024)));
+
+	// Scan through the blocks, building the hash table
+	for(int64_t b = 0; b < NumBlocks; ++b)
+	{
+		// Only look at the required block size
+		if(pIndex[b].mSize == BlockSize)
+		{
+			// Get the value under which to hash this entry
+			uint16_t hash = RollingChecksum::ExtractHashingComponent(pIndex[b].mWeakChecksum);
+			
+			// Already present in table?
+			if(pHashTable[hash] != 0)
+			{
+				//BOX_TRACE("Another hash entry for " << hash << " found");
+				// Yes -- need to set the pointer in this entry to the current entry to build the linked list
+				pIndex[b].mpNextInHashList = pHashTable[hash];
+			}
+
+			// Put a pointer to this entry in the hash table
+			pHashTable[hash] = pIndex + b;
+		}
+	}
+}
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    static bool SecondStageMatch(xxx)
+//		Purpose: When a match in the hash table is found, scan for second stage match using strong checksum.
+//		Created: 14/1/04
+//
+// --------------------------------------------------------------------------
+static bool SecondStageMatch(BlocksAvailableEntry *pFirstInHashList, RollingChecksum &fastSum, uint8_t *pBeginnings, uint8_t *pEndings,
+	int Offset, int32_t BlockSize, int64_t FileBlockNumber, BlocksAvailableEntry *pIndex, std::map<int64_t, int64_t> &rFoundBlocks)
+{
+	// Check parameters
+	ASSERT(pBeginnings != 0);
+	ASSERT(pEndings != 0);
+	ASSERT(Offset >= 0);
+	ASSERT(BlockSize > 0);
+	ASSERT(pFirstInHashList != 0);
+	ASSERT(pIndex != 0);
+
+#ifndef BOX_RELEASE_BUILD
+	uint16_t DEBUG_Hash = fastSum.GetComponentForHashing();
+#endif
+	uint32_t Checksum = fastSum.GetChecksum();
+
+	// Before we go to the expense of the MD5, make sure it's a darn good match on the checksum we already know.
+	BlocksAvailableEntry *scan = pFirstInHashList;
+	bool found=false;
+	while(scan != 0)
+	{
+		if(scan->mWeakChecksum == Checksum)
+		{
+			found = true;
+			break;
+		}
+		scan = scan->mpNextInHashList;
+	}
+	if(!found)
+	{
+		return false;
+	}
+
+	// Calculate the strong MD5 digest for this block
+	MD5Digest strong;
+	// Add the data from the beginnings
+	strong.Add(pBeginnings + Offset, BlockSize - Offset);
+	// Add any data from the endings
+	if(Offset > 0)
+	{
+		strong.Add(pEndings, Offset);
+	}
+	strong.Finish();
+	
+	// Then go through the entries in the hash list, comparing with the strong digest calculated
+	scan = pFirstInHashList;
+	//BOX_TRACE("second stage match");
+	while(scan != 0)
+	{
+		//BOX_TRACE("scan size " << scan->mSize <<
+		//	", block size " << BlockSize <<
+		//	", hash " << Hash);
+		ASSERT(scan->mSize == BlockSize);
+		ASSERT(RollingChecksum::ExtractHashingComponent(scan->mWeakChecksum) == DEBUG_Hash);
+	
+		// Compare?
+		if(strong.DigestMatches(scan->mStrongChecksum))
+		{
+			//BOX_TRACE("Match!\n");
+			// Found! Add to list of found blocks...
+			int64_t fileOffset = (FileBlockNumber * BlockSize) + Offset;
+			int64_t blockIndex = (scan - pIndex);	// pointer arthmitic is frowned upon. But most efficient way of doing it here -- alternative is to use more memory
+			
+			// We do NOT search for smallest blocks first, as this code originally assumed.
+			// To prevent this from potentially overwriting a better match, the caller must determine
+			// the relative "goodness" of any existing match and this one, and avoid the call if it
+			// could be detrimental.
+			rFoundBlocks[fileOffset] = blockIndex;
+			
+			// No point in searching further, report success
+			return true;
+		}
+	
+		// Next
+		scan = scan->mpNextInHashList;
+	}
+	
+	// Not matched
+	return false;
+}
+
+
+// --------------------------------------------------------------------------
+//
+// Function
+//		Name:    static GenerateRecipe(BackupStoreFileEncodeStream::Recipe &, BlocksAvailableEntry *, int64_t, std::map<int64_t, int64_t> &)
+//		Purpose: Fills in the recipe from the found block list
+//		Created: 15/1/04
+//
+// --------------------------------------------------------------------------
+static void GenerateRecipe(BackupStoreFileEncodeStream::Recipe &rRecipe, BlocksAvailableEntry *pIndex,
+		int64_t NumBlocks, std::map<int64_t, int64_t> &rFoundBlocks, int64_t SizeOfInputFile)
+{
+	// NOTE: This function could be a lot more sophisiticated. For example, if
+	// a small block overlaps a big block like this
+	//   ****
+	//      *******************************
+	// then the small block will be used, not the big one. But it'd be better to
+	// just ignore the small block and keep the big one. However, some stats should
+	// be gathered about real world files before writing complex code which might
+	// go wrong.
+
+	// Initialise a blank instruction
+	BackupStoreFileEncodeStream::RecipeInstruction instruction;
+	#define RESET_INSTRUCTION			\
+	instruction.mSpaceBefore = 0;		\
+	instruction.mBlocks = 0;			\
+	instruction.mpStartBlock = 0;
+	RESET_INSTRUCTION
+
+	// First, a special case for when there are no found blocks
+	if(rFoundBlocks.size() == 0)
+	{
+		// No blocks, just a load of space
+		instruction.mSpaceBefore = SizeOfInputFile;
+		rRecipe.push_back(instruction);	
+	
+		#ifndef BOX_RELEASE_BUILD
+		if(BackupStoreFile::TraceDetailsOfDiffProcess)
+		{
+			BOX_TRACE("Diff: Default recipe generated, " << 
+				SizeOfInputFile << " bytes of file");
+		}
+		#endif
+		
+		// Don't do anything
+		return;
+	}
+
+	// Current location
+	int64_t loc = 0;
+
+	// Then iterate through the list, generating the recipe
+	std::map<int64_t, int64_t>::const_iterator i(rFoundBlocks.begin());
+	ASSERT(i != rFoundBlocks.end());	// check logic
+
+	// Counting for debug tracing
+#ifndef BOX_RELEASE_BUILD
+	int64_t debug_NewBytesFound = 0;
+	int64_t debug_OldBlocksUsed = 0;
+#endif
+	
+	for(; i != rFoundBlocks.end(); ++i)
+	{
+		// Remember... map is (position in file) -> (index of block in pIndex)
+		
+		if(i->first < loc)
+		{
+			// This block overlaps the last one
+			continue;
+		}
+		else if(i->first > loc)
+		{
+			// There's a gap between the end of the last thing and this block.
+			// If there's an instruction waiting, push it onto the list
+			if(instruction.mSpaceBefore != 0 || instruction.mpStartBlock != 0)
+			{
+				rRecipe.push_back(instruction);
+			}
+			// Start a new instruction, with the gap ready
+			RESET_INSTRUCTION
+			instruction.mSpaceBefore = i->first - loc;
+			// Move location forward to match
+			loc += instruction.mSpaceBefore;
+#ifndef BOX_RELEASE_BUILD
+			debug_NewBytesFound += instruction.mSpaceBefore;
+#endif
+		}
+		
+		// First, does the current instruction need pushing back, because this block is not
+		// sequential to the last one?
+		if(instruction.mpStartBlock != 0 && (pIndex + i->second) != (instruction.mpStartBlock + instruction.mBlocks))
+		{
+			rRecipe.push_back(instruction);
+			RESET_INSTRUCTION
+		}
+		
+		// Add in this block
+		if(instruction.mpStartBlock == 0)
+		{
+			// This block starts a new instruction
+			instruction.mpStartBlock = pIndex + i->second;
+			instruction.mBlocks = 1;
+		}
+		else
+		{
+			// It continues the previous section of blocks
+			instruction.mBlocks += 1;
+		}
+
+#ifndef BOX_RELEASE_BUILD
+		debug_OldBlocksUsed++;
+#endif
+
+		// Move location forward
+		loc += pIndex[i->second].mSize;
+	}
+	
+	// Push the last instruction generated
+	rRecipe.push_back(instruction);
+	
+	// Is there any space left at the end which needs sending?
+	if(loc != SizeOfInputFile)
+	{
+		RESET_INSTRUCTION
+		instruction.mSpaceBefore = SizeOfInputFile - loc;
+#ifndef BOX_RELEASE_BUILD
+		debug_NewBytesFound += instruction.mSpaceBefore;
+#endif
+		rRecipe.push_back(instruction);		
+	}
+	
+	// dump out the recipe
+#ifndef BOX_RELEASE_BUILD
+	BOX_TRACE("Diff: " << 
+		debug_NewBytesFound << " new bytes found, " <<
+		debug_OldBlocksUsed << " old blocks used");
+	if(BackupStoreFile::TraceDetailsOfDiffProcess)
+	{
+		BOX_TRACE("Diff: Recipe generated (size " << rRecipe.size());
+		BOX_TRACE("======== ========= ========");
+		BOX_TRACE("Space b4 FirstBlk  NumBlks");
+		{
+			for(unsigned int e = 0; e < rRecipe.size(); ++e)
+			{
+				char b[64];
+#ifdef WIN32
+				sprintf(b, "%8I64d", (int64_t)(rRecipe[e].mpStartBlock - pIndex));
+#else
+				sprintf(b, "%8lld", (int64_t)(rRecipe[e].mpStartBlock - pIndex));
+#endif
+				BOX_TRACE(std::setw(8) <<
+					rRecipe[e].mSpaceBefore <<
+					" " <<
+					((rRecipe[e].mpStartBlock == 0)?"       -":b) <<
+					" " << std::setw(8) <<
+					rRecipe[e].mBlocks);
+			}
+		}
+		BOX_TRACE("======== ========= ========");
+	}
+#endif
+}
author	Chris Wilson <chris+github@qwirx.com>	2011-05-24 15:08:59 +0000
committer	Chris Wilson <chris+github@qwirx.com>	2011-05-24 15:08:59 +0000
commit	2bca085d623e857e89878a48efba59f78875370f (patch)
tree	1cc119e7886cf2293c161f700323444934b85904 /lib/backupstore
parent	f04cd092a2711160519c957a5ba8c28babd3004c (diff)