3 files changed, 807 insertions, 0 deletions
diff --git a/test/backupdiff/difftestfiles.cpp b/test/backupdiff/difftestfiles.cpp
new file mode 100755
index 00000000..881876ca
--- /dev/null
+++ b/test/backupdiff/difftestfiles.cpp
@@ -0,0 +1,294 @@
+// --------------------------------------------------------------------------
+//
+// File
+//		Name:    createtestfiles.cpp
+//		Purpose: Create the test files for the backupdiff test
+//		Created: 12/1/04
+//
+// --------------------------------------------------------------------------
+
+#include "Box.h"
+
+#include <string.h>
+
+#include "FileStream.h"
+#include "PartialReadStream.h"
+#include "Test.h"
+#include "RollingChecksum.h"
+
+#include "MemLeakFindOn.h"
+
+#define ACT_END		0
+#define ACT_COPY	1
+#define	ACT_NEW		2
+#define	ACT_SKIP	3
+#define ACT_COPYEND	4
+
+typedef struct
+{
+	int action, length, seed;
+} gen_action;
+
+#define INITIAL_FILE_LENGTH (128*1024 + 342)
+
+
+gen_action file1actions[] = {
+	{ACT_COPYEND, 0, 0},
+	{ACT_END, 0, 0} };
+
+gen_action file2actions[] = {
+	{ACT_COPY, 16*1024, 0},
+	// Do blocks on block boundaries, but swapped around a little
+	{ACT_SKIP, 4*1024, 0},
+	{ACT_COPY, 8*1024, 0},
+	{ACT_SKIP, -12*1024, 0},
+	{ACT_COPY, 4*1024, 0},
+	{ACT_SKIP, 8*1024, 0},
+	// Get rest of file with some new data inserted
+	{ACT_COPY, 37*1024 + 12, 0},
+	{ACT_NEW, 23*1024 + 129, 23990},
+	{ACT_COPYEND, 0, 0},
+	{ACT_END, 0, 0} };
+
+gen_action file3actions[] = {
+	{ACT_COPY, 12*1024 + 983, 0},
+	{ACT_SKIP, 37*1024 + 12, 0},
+	{ACT_COPYEND, 0, 0},
+	{ACT_END, 0, 0} };
+
+gen_action file4actions[] = {
+	{ACT_COPY, 20*1024 + 2385, 0},
+	{ACT_NEW, 12, 2334},
+	{ACT_COPY, 16*1024 + 385, 0},
+	{ACT_SKIP, 9*1024 + 42, 0},
+	{ACT_COPYEND, 0, 0},
+	{ACT_END, 0, 0} };
+
+// insert 1 byte a block into the file, between two other blocks
+gen_action file5actions[] = {
+	{ACT_COPY, 4*1024, 0},
+	{ACT_NEW, 1, 2334},
+	{ACT_COPYEND, 0, 0},
+	{ACT_END, 0, 0} };
+
+gen_action file6actions[] = {
+	{ACT_NEW, 6*1024, 12353452},
+	{ACT_COPYEND, 0, 0},
+	{ACT_END, 0, 0} };
+
+// but delete that one byte block, it's annoying
+gen_action file7actions[] = {
+	{ACT_COPY, 10*1024, 0},
+	{ACT_SKIP, 1, 0},
+	{ACT_COPYEND, 0, 0},
+	{ACT_NEW, 7*1024, 1235352},
+	{ACT_END, 0, 0} };
+
+gen_action file8actions[] = {
+	{ACT_NEW, 54*1024 + 9, 125352},
+	{ACT_END, 0, 0} };
+
+gen_action file9actions[] = {
+	{ACT_END, 0, 0} };
+
+gen_action *testfiles[] = {file1actions, file2actions, file3actions, file4actions,
+	file5actions, file6actions, file7actions, file8actions, file9actions, 0};
+
+
+// Nice random data for testing written files
+class R250 {
+public:
+	// Set up internal state table with 32-bit random numbers.  
+	// The bizarre bit-twiddling is because rand() returns 16 bits of which
+	// the bottom bit is always zero!  Hence, I use only some of the bits.
+	// You might want to do something better than this....
+
+	R250(int seed) : posn1(0), posn2(103)
+	{
+		// populate the state and incr tables
+		srand(seed);
+
+		for (int i = 0; i != stateLen; ++i)	{
+			state[i] = ((rand() >> 2) << 19) ^ ((rand() >> 2) << 11) ^ (rand() >> 2);
+			incrTable[i] = i == stateLen - 1 ? 0 : i + 1;
+		}
+
+		// stir up the numbers to ensure they're random
+
+		for (int j = 0; j != stateLen * 4; ++j)			
+			(void) next();
+	}
+
+	// Returns the next random number.  Xor together two elements separated
+	// by 103 mod 250, replacing the first element with the result.  Then
+	// increment the two indices mod 250.
+	inline int next()
+	{
+		int ret = (state[posn1] ^= state[posn2]);	// xor and replace element
+
+		posn1 = incrTable[posn1];		// increment indices using lookup table
+		posn2 = incrTable[posn2];
+
+		return ret;
+	}
+private:
+	enum { stateLen = 250 };	// length of the state table
+	int state[stateLen];		// holds the random number state
+	int incrTable[stateLen];	// lookup table: maps i to (i+1) % stateLen
+	int posn1, posn2;			// indices into the state table
+};
+
+void make_random_data(void *buffer, int size, int seed)
+{
+	R250 rand(seed);
+
+	int n = size / sizeof(int);
+	int *b = (int*)buffer;
+	for(int l = 0; l < n; ++l)
+	{
+		b[l] = rand.next();
+	}
+}
+
+void write_test_data(IOStream &rstream, int size, int seed)
+{
+	R250 rand(seed);
+	
+	while(size > 0)
+	{
+		// make a nice buffer of data
+		int buffer[2048/sizeof(int)];
+		for(unsigned int l = 0; l < (sizeof(buffer) / sizeof(int)); ++l)
+		{
+			buffer[l] = rand.next();
+		}
+		
+		// Write out...
+		unsigned int w = size;
+		if(w > sizeof(buffer)) w = sizeof(buffer);
+		rstream.Write(buffer, w);
+		
+		size -= w;
+	}	
+}
+
+void gen_varient(IOStream &out, char *sourcename, gen_action *pact)
+{
+	// Open source
+	FileStream source(sourcename);
+	
+	while(true)
+	{
+		switch(pact->action)
+		{
+		case ACT_END:
+			{
+				// all done
+				return;
+			}
+		case ACT_COPY:
+			{
+				PartialReadStream copy(source, pact->length);
+				copy.CopyStreamTo(out);
+				break;
+			}
+		case ACT_NEW:
+			{
+				write_test_data(out, pact->length, pact->seed);
+				break;
+			}
+		case ACT_SKIP:
+			{
+				source.Seek(pact->length, IOStream::SeekType_Relative);
+				break;
+			}
+		case ACT_COPYEND:
+			{
+				source.CopyStreamTo(out);
+				break;
+			}
+		}
+	
+		++pact;
+	}
+}
+
+void create_test_files()
+{
+	// First, the keys for the crypto
+	{
+		FileStream keys("testfiles/backup.keys", O_WRONLY | O_CREAT);
+		write_test_data(keys, 1024, 237);
+	}
+	
+	// Create the initial file -- needs various special properties...
+	// 1) Two blocks much be the different, but have the same weak checksum
+	// 2) A block must exist twice, but at an offset which isn't a multiple of the block size.
+	{
+		FileStream f0("testfiles/f0", O_WRONLY | O_CREAT);
+		// Write first bit.
+		write_test_data(f0, (16*1024), 20012);
+		// Now repeated checksum blocks
+		uint8_t blk[4096];
+		make_random_data(blk, sizeof(blk), 12201);
+		// Three magic numbers which make the checksum work: Use this perl to find them:
+		/*
+			for($z = 1; $z < 4096; $z++)
+			{
+				for($n = 0; $n <= 255; $n++)
+				{
+					for($m = 0; $m <= 255; $m++)
+					{
+						if($n != $m && (($n*4096 + $m*(4096-$z)) % (64*1024) == ($n*(4096-$z) + $m*4096) % (64*1024)))
+						{
+							print "$z: $n $m\n";
+						}
+					}
+				}
+			}
+		*/
+		blk[0] = 255;
+		blk[1024] = 191;
+		// Checksum to check
+		RollingChecksum c1(blk, sizeof(blk));
+		// Write
+		f0.Write(blk, sizeof(blk));
+		// Adjust block and write again
+		uint8_t blk2[4096];
+		memcpy(blk2, blk, sizeof(blk2));
+		blk2[1024] = 255;
+		blk2[0] = 191;
+		TEST_THAT(::memcmp(blk2, blk, sizeof(blk)) != 0);
+		RollingChecksum c2(blk2, sizeof(blk2));
+		f0.Write(blk2, sizeof(blk2));
+		// Check checksums
+		TEST_THAT(c1.GetChecksum() == c2.GetChecksum());
+		
+		// Another 4k block
+		write_test_data(f0, (4*1024), 99209);
+		// Offset block
+		make_random_data(blk, 2048, 1234199);
+		f0.Write(blk, 2048);
+		f0.Write(blk, 2048);
+		f0.Write(blk, 2048);
+		make_random_data(blk, 2048, 1343278);
+		f0.Write(blk, 2048);
+	
+		write_test_data(f0, INITIAL_FILE_LENGTH - (16*1024) - ((4*1024)*2) - (4*1024) - (2048*4), 202);
+	
+	}
+	
+	// Then... create the varients
+	for(int l = 0; testfiles[l] != 0; ++l)
+	{
+		char n1[256];
+		char n2[256];
+		sprintf(n1, "testfiles/f%d", l + 1);
+		sprintf(n2, "testfiles/f%d", l);
+
+		FileStream f1(n1, O_WRONLY | O_CREAT);
+		gen_varient(f1, n2, testfiles[l]);
+	}
+}
+
+
diff --git a/test/backupdiff/testbackupdiff.cpp b/test/backupdiff/testbackupdiff.cpp
new file mode 100755
index 00000000..d086253e
--- /dev/null
+++ b/test/backupdiff/testbackupdiff.cpp
@@ -0,0 +1,511 @@
+// --------------------------------------------------------------------------
+//
+// File
+//		Name:    testbackupdiff.cpp
+//		Purpose: Test diffing routines for backup store files
+//		Created: 12/1/04
+//
+// --------------------------------------------------------------------------
+
+#include "Box.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#include "Test.h"
+#include "BackupClientCryptoKeys.h"
+#include "BackupStoreFile.h"
+#include "BackupStoreFilenameClear.h"
+#include "FileStream.h"
+#include "BackupStoreFileWire.h"
+#include "BackupStoreObjectMagic.h"
+#include "BackupStoreFileCryptVar.h"
+#include "BackupStoreException.h"
+#include "CollectInBufferStream.h"
+
+#include "MemLeakFindOn.h"
+
+using namespace BackupStoreFileCryptVar;
+
+
+// from another file
+void create_test_files();
+
+bool files_identical(const char *file1, const char *file2)
+{
+	FileStream f1(file1);
+	FileStream f2(file2);
+	
+	if(f1.BytesLeftToRead() != f2.BytesLeftToRead())
+	{
+		return false;
+	}
+	
+	while(f1.StreamDataLeft())
+	{
+		char buffer1[2048];
+		char buffer2[2048];
+		int s = f1.Read(buffer1, sizeof(buffer1));
+		if(f2.Read(buffer2, s) != s)
+		{
+			return false;
+		}
+		if(::memcmp(buffer1, buffer2, s) != 0)
+		{
+			return false;
+		}
+	}
+	
+	if(f2.StreamDataLeft())
+	{
+		return false;
+	}
+	
+	return true;
+}
+
+void make_file_of_zeros(const char *filename, int size)
+{
+	void *b = malloc(size);
+	memset(b, 0, size);
+	FILE *f = fopen(filename, "wb");
+	fwrite(b, size, 1, f);
+	fclose(f);
+	free(b);
+	TEST_THAT(TestGetFileSize(filename) == size);
+}
+
+
+void check_encoded_file(const char *filename, int64_t OtherFileID, int new_blocks_expected, int old_blocks_expected)
+{
+	FileStream enc(filename);
+	
+	// Use the interface verify routine
+	int64_t otherIDFromFile = 0;
+	TEST_THAT(BackupStoreFile::VerifyEncodedFileFormat(enc, &otherIDFromFile));
+	TEST_THAT(otherIDFromFile == OtherFileID);
+	
+	// Now do our own reading
+	enc.Seek(0, IOStream::SeekType_Absolute);
+	BackupStoreFile::MoveStreamPositionToBlockIndex(enc);
+	// Read in header to check magic value is as expected
+	file_BlockIndexHeader hdr;
+	TEST_THAT(enc.ReadFullBuffer(&hdr, sizeof(hdr), 0));
+	TEST_THAT(hdr.mMagicValue == (int32_t)htonl(OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1));
+	TEST_THAT((uint64_t)ntoh64(hdr.mOtherFileID) == (uint64_t)OtherFileID);
+	// number of blocks
+	int64_t nblocks = ntoh64(hdr.mNumBlocks);
+	TRACE2("Reading index from '%s', has %lld blocks\n", filename, nblocks);
+	TRACE0("======== ===== ========== ======== ========\n   Index Where  EncSz/Idx     Size  WChcksm\n");
+	// Read them all in
+	int64_t nnew = 0, nold = 0;
+	for(int64_t b = 0; b < nblocks; ++b)
+	{
+		file_BlockIndexEntry en;
+		TEST_THAT(enc.ReadFullBuffer(&en, sizeof(en), 0));
+		int64_t s = ntoh64(en.mEncodedSize);
+		if(s > 0)
+		{
+			nnew++;
+			TRACE2("%8lld this  s=%8lld", b, s);
+		}
+		else
+		{
+			nold++;
+			TRACE2("%8lld other i=%8lld", b, 0 - s);		
+		}
+		// Decode the rest
+		uint64_t iv = ntoh64(hdr.mEntryIVBase);
+		iv += b;
+		sBlowfishDecryptBlockEntry.SetIV(&iv);			
+		file_BlockIndexEntryEnc entryEnc;
+		sBlowfishDecryptBlockEntry.TransformBlock(&entryEnc, sizeof(entryEnc),
+				en.mEnEnc, sizeof(en.mEnEnc));
+		TRACE2(" %8d %08x\n", ntohl(entryEnc.mSize), ntohl(entryEnc.mWeakChecksum));
+		
+	}
+	TRACE0("======== ===== ========== ======== ========\n");
+	TEST_THAT(new_blocks_expected == nnew);
+	TEST_THAT(old_blocks_expected == nold);
+}
+
+void test_diff(int from, int to, int new_blocks_expected, int old_blocks_expected, bool expect_completely_different = false)
+{
+	// First, get the block index of the thing it's comparing against
+	char from_encoded[256];
+	sprintf(from_encoded, "testfiles/f%d.encoded", from);
+	FileStream blockindex(from_encoded);
+	BackupStoreFile::MoveStreamPositionToBlockIndex(blockindex);
+
+	// make filenames
+	char from_orig[256];
+	sprintf(from_orig, "testfiles/f%d", from);
+	char to_encoded[256];
+	sprintf(to_encoded, "testfiles/f%d.encoded", to);
+	char to_diff[256];
+	sprintf(to_diff, "testfiles/f%d.diff", to);
+	char to_orig[256];
+	sprintf(to_orig, "testfiles/f%d", to);
+	char rev_diff[256];
+	sprintf(rev_diff, "testfiles/f%d.revdiff", to);
+	char from_rebuild[256];
+	sprintf(from_rebuild, "testfiles/f%d.rebuilt", to);
+	char from_rebuild_dec[256];
+	sprintf(from_rebuild_dec, "testfiles/f%d.rebuilt_dec", to);
+	
+	// Then call the encode varient for diffing files
+	bool completelyDifferent = !expect_completely_different;	// oposite of what we want
+	{
+		BackupStoreFilenameClear f1name("filename");
+		FileStream out(to_diff, O_WRONLY | O_CREAT | O_EXCL);
+		std::auto_ptr<IOStream> encoded(BackupStoreFile::EncodeFileDiff(to_orig, 1 /* dir ID */, f1name,
+			1000 + from /* object ID of the file diffing from */, blockindex, IOStream::TimeOutInfinite,
+			0, &completelyDifferent));
+		encoded->CopyStreamTo(out);
+	}
+	TEST_THAT(completelyDifferent == expect_completely_different);
+	
+	// Test that the number of blocks in the file match what's expected
+	check_encoded_file(to_diff, expect_completely_different?(0):(1000 + from), new_blocks_expected, old_blocks_expected);
+
+	// filename
+	char to_testdec[256];
+	sprintf(to_testdec, "testfiles/f%d.testdec", to);
+	
+	if(!completelyDifferent)
+	{
+		// Then produce a combined file
+		{
+			FileStream diff(to_diff);
+			FileStream diff2(to_diff);
+			FileStream from(from_encoded);
+			FileStream out(to_encoded, O_WRONLY | O_CREAT | O_EXCL);
+			BackupStoreFile::CombineFile(diff, diff2, from, out);
+		}
+		
+		// And check it
+		check_encoded_file(to_encoded, 0, new_blocks_expected + old_blocks_expected, 0);
+	}
+	else
+	{
+		// Emulate the above stage!
+		char cmd[256];
+		sprintf(cmd, "cp testfiles/f%d.diff testfiles/f%d.encoded", to, to);
+		::system(cmd);
+	}
+
+	// Decode it
+	{
+		FileStream enc(to_encoded);
+		BackupStoreFile::DecodeFile(enc, to_testdec, IOStream::TimeOutInfinite);
+		TEST_THAT(files_identical(to_orig, to_testdec));
+	}
+	
+	// Then do some comparisons against the block index
+	{
+		FileStream index(to_encoded);
+		BackupStoreFile::MoveStreamPositionToBlockIndex(index);
+		TEST_THAT(BackupStoreFile::CompareFileContentsAgainstBlockIndex(to_orig, index, IOStream::TimeOutInfinite) == true);
+	}
+	{
+		char from_orig[256];
+		sprintf(from_orig, "testfiles/f%d", from);
+		FileStream index(to_encoded);
+		BackupStoreFile::MoveStreamPositionToBlockIndex(index);
+		TEST_THAT(BackupStoreFile::CompareFileContentsAgainstBlockIndex(from_orig, index, IOStream::TimeOutInfinite) == files_identical(from_orig, to_orig));
+	}
+	
+	// Check that combined index creation works as expected
+	{
+		// Load a combined index into memory
+		FileStream diff(to_diff);
+		FileStream from(from_encoded);
+		std::auto_ptr<IOStream> indexCmbStr(BackupStoreFile::CombineFileIndices(diff, from));
+		CollectInBufferStream indexCmb;
+		indexCmbStr->CopyStreamTo(indexCmb);
+		// Then check that it's as expected!
+		FileStream result(to_encoded);
+		BackupStoreFile::MoveStreamPositionToBlockIndex(result);
+		CollectInBufferStream index;
+		result.CopyStreamTo(index);
+		TEST_THAT(indexCmb.GetSize() == index.GetSize());
+		TEST_THAT(::memcmp(indexCmb.GetBuffer(), index.GetBuffer(), index.GetSize()) == 0);
+	}
+	
+	// Check that reverse delta can be made, and that it decodes OK
+	{
+		// Create reverse delta
+		{
+			bool reversedCompletelyDifferent = !completelyDifferent;
+			FileStream diff(to_diff);
+			FileStream from(from_encoded);
+			FileStream from2(from_encoded);
+			FileStream reversed(rev_diff, O_WRONLY | O_CREAT);
+			BackupStoreFile::ReverseDiffFile(diff, from, from2, reversed, to, &reversedCompletelyDifferent);
+			TEST_THAT(reversedCompletelyDifferent == completelyDifferent);
+		}
+		// Use it to combine a file
+		{
+			FileStream diff(rev_diff);
+			FileStream diff2(rev_diff);
+			FileStream from(to_encoded);
+			FileStream out(from_rebuild, O_WRONLY | O_CREAT | O_EXCL);
+			BackupStoreFile::CombineFile(diff, diff2, from, out);
+		}
+		// And then confirm that this file is actually the one we want
+		{
+			FileStream enc(from_rebuild);
+			BackupStoreFile::DecodeFile(enc, from_rebuild_dec, IOStream::TimeOutInfinite);
+			TEST_THAT(files_identical(from_orig, from_rebuild_dec));
+		}
+		// Do some extra checking
+		{
+			TEST_THAT(files_identical(from_rebuild, from_encoded));
+		}
+	}
+}
+
+void test_combined_diff(int version1, int version2, int serial)
+{
+	char combined_file[256];
+	char last_diff[256];
+	sprintf(last_diff, "testfiles/f%d.diff", version1 + 1);	// ie from version1 to version1 + 1
+
+	for(int v = version1 + 2; v <= version2; ++v)
+	{
+		FileStream diff1(last_diff);
+		char next_diff[256];
+		sprintf(next_diff, "testfiles/f%d.diff", v);
+		FileStream diff2(next_diff);
+		FileStream diff2b(next_diff);
+		sprintf(combined_file, "testfiles/comb%d_%d.cmbdiff", version1, v);
+		FileStream out(combined_file, O_WRONLY | O_CREAT);
+		BackupStoreFile::CombineDiffs(diff1, diff2, diff2b, out);
+		strcpy(last_diff, combined_file);
+	}
+
+	// Then do a combine on it, and check that it decodes to the right thing
+	char orig_enc[256];
+	sprintf(orig_enc, "testfiles/f%d.encoded", version1);	
+	char combined_out[256];
+	sprintf(combined_out, "testfiles/comb%d_%d.out", version1, version2);
+
+	{
+		FileStream diff(combined_file);
+		FileStream diff2(combined_file);
+		FileStream from(orig_enc);
+		FileStream out(combined_out, O_WRONLY | O_CREAT);
+		BackupStoreFile::CombineFile(diff, diff2, from, out);
+	}
+
+	char combined_out_dec[256];
+	sprintf(combined_out_dec, "testfiles/comb%d_%d_s%d.dec", version1, version2, serial);
+	char to_orig[256];
+	sprintf(to_orig, "testfiles/f%d", version2);
+
+	{
+		FileStream enc(combined_out);
+		BackupStoreFile::DecodeFile(enc, combined_out_dec, IOStream::TimeOutInfinite);
+		TEST_THAT(files_identical(to_orig, combined_out_dec));
+	}
+
+}
+
+#define MAX_DIFF 9
+void test_combined_diffs()
+{
+	int serial = 0;
+
+	// Number of items to combine at once
+	for(int stages = 2; stages <= 4; ++stages)
+	{
+		// Offset to get complete coverage
+		for(int offset = 0; offset < stages; ++offset)
+		{
+			// And then actual end file number
+			for(int f = 0; f <= (MAX_DIFF - stages - offset); ++f)
+			{
+				// And finally, do something!
+				test_combined_diff(offset + f, offset + f + stages, ++serial);
+			}
+		}
+	}
+}
+
+int test(int argc, const char *argv[])
+{
+	// Want to trace out all the details
+	#ifndef NDEBUG
+	BackupStoreFile::TraceDetailsOfDiffProcess = true;
+	#endif
+
+	// Create all the test files
+	create_test_files();
+
+	// Setup the crypto
+	BackupClientCryptoKeys_Setup("testfiles/backup.keys");	
+
+	// Encode the first file
+	{
+		BackupStoreFilenameClear f0name("f0");
+		FileStream out("testfiles/f0.encoded", O_WRONLY | O_CREAT | O_EXCL);
+		std::auto_ptr<IOStream> encoded(BackupStoreFile::EncodeFile("testfiles/f0", 1 /* dir ID */, f0name));
+		encoded->CopyStreamTo(out);
+		check_encoded_file("testfiles/f0.encoded", 0, 33, 0);
+	}
+	
+	// Check the "seek to index" code
+	{
+		FileStream enc("testfiles/f0.encoded");
+		BackupStoreFile::MoveStreamPositionToBlockIndex(enc);
+		// Read in header to check magic value is as expected
+		file_BlockIndexHeader hdr;
+		TEST_THAT(enc.ReadFullBuffer(&hdr, sizeof(hdr), 0));
+		TEST_THAT(hdr.mMagicValue == (int32_t)htonl(OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1));
+	}
+
+	// Diff some files -- parameters are from number, to number,
+	// then the number of new blocks expected, and the number of old blocks expected.
+	
+	// Diff the original file to a copy of itself, and check that there is no data in the file
+	// This checks that the hash table is constructed properly, because two of the blocks share
+	// the same weak checksum.
+	test_diff(0, 1, 0, 33);
+
+	// Insert some new data
+	// Blocks from old file moved whole, but put in different order
+	test_diff(1, 2, 7, 32);
+
+	// Delete some data, but not on block boundaries
+	test_diff(2, 3, 1, 29);
+
+	// Add a very small amount of data, not on block boundary
+	// delete a little data
+	test_diff(3, 4, 3, 25);
+
+	// 1 byte insertion between two blocks
+	test_diff(4, 5, 1, 28);
+
+	// a file with some new content at the very beginning
+	// NOTE: You might expect the last numbers to be 2, 29, but the small 1 byte block isn't searched for
+	test_diff(5, 6, 3, 28);
+	
+	// some new content at the very end
+	// NOTE: 1 byte block deleted, so number aren't what you'd initial expect.
+	test_diff(6, 7, 2, 30);
+	
+	// a completely different file, with no blocks matching.
+	test_diff(7, 8, 14, 0, true /* completely different expected */);
+	
+	// diff to zero sized file
+	test_diff(8, 9, 0, 0, true /* completely different expected */);
+	
+	// Test that combining diffs works
+	test_combined_diffs();
+	
+	// Check zero sized file works OK to encode on its own, using normal encoding
+	{
+		{
+			// Encode
+			BackupStoreFilenameClear fn("filename");
+			FileStream out("testfiles/f9.zerotest", O_WRONLY | O_CREAT | O_EXCL);
+			std::auto_ptr<IOStream> encoded(BackupStoreFile::EncodeFile("testfiles/f9", 1 /* dir ID */, fn));
+			encoded->CopyStreamTo(out);
+			check_encoded_file("testfiles/f9.zerotest", 0, 0, 0);		
+		}
+		{
+			// Decode
+			FileStream enc("testfiles/f9.zerotest");
+			BackupStoreFile::DecodeFile(enc, "testfiles/f9.testdec.zero", IOStream::TimeOutInfinite);
+			TEST_THAT(files_identical("testfiles/f9", "testfiles/f9.testdec.zero"));
+		}
+	}
+	
+	// Check that symlinks aren't diffed
+	TEST_THAT(::symlink("f2", "testfiles/f2.symlink") == 0)
+	// And go and diff it against the previous encoded file
+	{
+		bool completelyDifferent = false;
+		{
+			FileStream blockindex("testfiles/f1.encoded");
+			BackupStoreFile::MoveStreamPositionToBlockIndex(blockindex);
+			
+			BackupStoreFilenameClear f1name("filename");
+			FileStream out("testfiles/f2.symlink.diff", O_WRONLY | O_CREAT | O_EXCL);
+			std::auto_ptr<IOStream> encoded(BackupStoreFile::EncodeFileDiff("testfiles/f2.symlink", 1 /* dir ID */, f1name,
+				1001 /* object ID of the file diffing from */, blockindex, IOStream::TimeOutInfinite,
+				0, &completelyDifferent));
+			encoded->CopyStreamTo(out);
+		}
+		TEST_THAT(completelyDifferent == true);
+		check_encoded_file("testfiles/f2.symlink.diff", 0, 0, 0);		
+	}
+
+	// Check that diffing against a file which isn't "complete" and referes another isn't allowed
+	{
+		FileStream blockindex("testfiles/f1.diff");
+		BackupStoreFile::MoveStreamPositionToBlockIndex(blockindex);
+		
+		BackupStoreFilenameClear f1name("filename");
+		FileStream out("testfiles/f2.testincomplete", O_WRONLY | O_CREAT | O_EXCL);
+		TEST_CHECK_THROWS(BackupStoreFile::EncodeFileDiff("testfiles/f2", 1 /* dir ID */, f1name,
+			1001 /* object ID of the file diffing from */, blockindex, IOStream::TimeOutInfinite,
+			0, 0), BackupStoreException, CannotDiffAnIncompleteStoreFile);
+	}
+
+	// Found a nasty case where files of lots of the same thing sock up lots of processor
+	// time -- because of lots of matches found. Check this out!
+	make_file_of_zeros("testfiles/zero.0", 20*1024);
+	make_file_of_zeros("testfiles/zero.1", 200*1024);
+	// Generate a first encoded file
+	{
+		BackupStoreFilenameClear f0name("zero.0");
+		FileStream out("testfiles/zero.0.enc", O_WRONLY | O_CREAT | O_EXCL);
+		std::auto_ptr<IOStream> encoded(BackupStoreFile::EncodeFile("testfiles/zero.0", 1 /* dir ID */, f0name));
+		encoded->CopyStreamTo(out);
+	}
+	// Then diff from it -- time how long it takes...
+	{
+		int beginTime = time(0);
+		FileStream blockindex("testfiles/zero.0.enc");
+		BackupStoreFile::MoveStreamPositionToBlockIndex(blockindex);
+
+		BackupStoreFilenameClear f1name("zero.1");
+		FileStream out("testfiles/zero.1.enc", O_WRONLY | O_CREAT | O_EXCL);
+		std::auto_ptr<IOStream> encoded(BackupStoreFile::EncodeFileDiff("testfiles/zero.1", 1 /* dir ID */, f1name,
+			2000 /* object ID of the file diffing from */, blockindex, IOStream::TimeOutInfinite,
+			0, 0));
+		encoded->CopyStreamTo(out);
+		TEST_THAT(time(0) < (beginTime + 20));
+	}
+
+#if 0
+	// Code for a nasty real world example! (16Mb files, won't include them in the distribution
+	// for obvious reasons...)
+	// Generate a first encoded file
+	{
+		BackupStoreFilenameClear f0name("0000000000000000.old");
+		FileStream out("testfiles/0000000000000000.enc.0", O_WRONLY | O_CREAT | O_EXCL);
+		std::auto_ptr<IOStream> encoded(BackupStoreFile::EncodeFile("/Users/ben/Desktop/0000000000000000.old", 1 /* dir ID */, f0name));
+		encoded->CopyStreamTo(out);
+	}
+	// Then diff from it -- time how long it takes...
+	{
+		int beginTime = time(0);
+		FileStream blockindex("testfiles/0000000000000000.enc.0");
+		BackupStoreFile::MoveStreamPositionToBlockIndex(blockindex);
+
+		BackupStoreFilenameClear f1name("0000000000000000.new");
+		FileStream out("testfiles/0000000000000000.enc.1", O_WRONLY | O_CREAT | O_EXCL);
+		std::auto_ptr<IOStream> encoded(BackupStoreFile::EncodeFileDiff("/Users/ben/Desktop/0000000000000000.new", 1 /* dir ID */, f1name,
+			2000 /* object ID of the file diffing from */, blockindex, IOStream::TimeOutInfinite,
+			0, 0));
+		encoded->CopyStreamTo(out);
+		TEST_THAT(time(0) < (beginTime + 20));
+	}
+#endif // 0
+
+	return 0;
+}
+
+
diff --git a/test/backupdiff/testextra b/test/backupdiff/testextra
new file mode 100644
index 00000000..165cacb9
--- /dev/null
+++ b/test/backupdiff/testextra
@@ -0,0 +1,2 @@
+rm -rf testfiles
+mkdir testfiles