summaryrefslogtreecommitdiff
path: root/test/backupdiff/testbackupdiff.cpp
diff options
context:
space:
mode:
authorBen Summers <ben@fluffy.co.uk>2005-10-14 08:50:54 +0000
committerBen Summers <ben@fluffy.co.uk>2005-10-14 08:50:54 +0000
commit99f8ce096bc5569adbfea1911dbcda24c28d8d8b (patch)
tree049c302161fea1f2f6223e1e8f3c40d9e8aadc8b /test/backupdiff/testbackupdiff.cpp
Box Backup 0.09 with a few tweeks
Diffstat (limited to 'test/backupdiff/testbackupdiff.cpp')
-rwxr-xr-xtest/backupdiff/testbackupdiff.cpp511
1 files changed, 511 insertions, 0 deletions
diff --git a/test/backupdiff/testbackupdiff.cpp b/test/backupdiff/testbackupdiff.cpp
new file mode 100755
index 00000000..d086253e
--- /dev/null
+++ b/test/backupdiff/testbackupdiff.cpp
@@ -0,0 +1,511 @@
+// --------------------------------------------------------------------------
+//
+// File
+// Name: testbackupdiff.cpp
+// Purpose: Test diffing routines for backup store files
+// Created: 12/1/04
+//
+// --------------------------------------------------------------------------
+
+#include "Box.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#include "Test.h"
+#include "BackupClientCryptoKeys.h"
+#include "BackupStoreFile.h"
+#include "BackupStoreFilenameClear.h"
+#include "FileStream.h"
+#include "BackupStoreFileWire.h"
+#include "BackupStoreObjectMagic.h"
+#include "BackupStoreFileCryptVar.h"
+#include "BackupStoreException.h"
+#include "CollectInBufferStream.h"
+
+#include "MemLeakFindOn.h"
+
+using namespace BackupStoreFileCryptVar;
+
+
+// from another file
+void create_test_files();
+
+bool files_identical(const char *file1, const char *file2)
+{
+ FileStream f1(file1);
+ FileStream f2(file2);
+
+ if(f1.BytesLeftToRead() != f2.BytesLeftToRead())
+ {
+ return false;
+ }
+
+ while(f1.StreamDataLeft())
+ {
+ char buffer1[2048];
+ char buffer2[2048];
+ int s = f1.Read(buffer1, sizeof(buffer1));
+ if(f2.Read(buffer2, s) != s)
+ {
+ return false;
+ }
+ if(::memcmp(buffer1, buffer2, s) != 0)
+ {
+ return false;
+ }
+ }
+
+ if(f2.StreamDataLeft())
+ {
+ return false;
+ }
+
+ return true;
+}
+
+void make_file_of_zeros(const char *filename, int size)
+{
+ void *b = malloc(size);
+ memset(b, 0, size);
+ FILE *f = fopen(filename, "wb");
+ fwrite(b, size, 1, f);
+ fclose(f);
+ free(b);
+ TEST_THAT(TestGetFileSize(filename) == size);
+}
+
+
+void check_encoded_file(const char *filename, int64_t OtherFileID, int new_blocks_expected, int old_blocks_expected)
+{
+ FileStream enc(filename);
+
+ // Use the interface verify routine
+ int64_t otherIDFromFile = 0;
+ TEST_THAT(BackupStoreFile::VerifyEncodedFileFormat(enc, &otherIDFromFile));
+ TEST_THAT(otherIDFromFile == OtherFileID);
+
+ // Now do our own reading
+ enc.Seek(0, IOStream::SeekType_Absolute);
+ BackupStoreFile::MoveStreamPositionToBlockIndex(enc);
+ // Read in header to check magic value is as expected
+ file_BlockIndexHeader hdr;
+ TEST_THAT(enc.ReadFullBuffer(&hdr, sizeof(hdr), 0));
+ TEST_THAT(hdr.mMagicValue == (int32_t)htonl(OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1));
+ TEST_THAT((uint64_t)ntoh64(hdr.mOtherFileID) == (uint64_t)OtherFileID);
+ // number of blocks
+ int64_t nblocks = ntoh64(hdr.mNumBlocks);
+ TRACE2("Reading index from '%s', has %lld blocks\n", filename, nblocks);
+ TRACE0("======== ===== ========== ======== ========\n Index Where EncSz/Idx Size WChcksm\n");
+ // Read them all in
+ int64_t nnew = 0, nold = 0;
+ for(int64_t b = 0; b < nblocks; ++b)
+ {
+ file_BlockIndexEntry en;
+ TEST_THAT(enc.ReadFullBuffer(&en, sizeof(en), 0));
+ int64_t s = ntoh64(en.mEncodedSize);
+ if(s > 0)
+ {
+ nnew++;
+ TRACE2("%8lld this s=%8lld", b, s);
+ }
+ else
+ {
+ nold++;
+ TRACE2("%8lld other i=%8lld", b, 0 - s);
+ }
+ // Decode the rest
+ uint64_t iv = ntoh64(hdr.mEntryIVBase);
+ iv += b;
+ sBlowfishDecryptBlockEntry.SetIV(&iv);
+ file_BlockIndexEntryEnc entryEnc;
+ sBlowfishDecryptBlockEntry.TransformBlock(&entryEnc, sizeof(entryEnc),
+ en.mEnEnc, sizeof(en.mEnEnc));
+ TRACE2(" %8d %08x\n", ntohl(entryEnc.mSize), ntohl(entryEnc.mWeakChecksum));
+
+ }
+ TRACE0("======== ===== ========== ======== ========\n");
+ TEST_THAT(new_blocks_expected == nnew);
+ TEST_THAT(old_blocks_expected == nold);
+}
+
+void test_diff(int from, int to, int new_blocks_expected, int old_blocks_expected, bool expect_completely_different = false)
+{
+ // First, get the block index of the thing it's comparing against
+ char from_encoded[256];
+ sprintf(from_encoded, "testfiles/f%d.encoded", from);
+ FileStream blockindex(from_encoded);
+ BackupStoreFile::MoveStreamPositionToBlockIndex(blockindex);
+
+ // make filenames
+ char from_orig[256];
+ sprintf(from_orig, "testfiles/f%d", from);
+ char to_encoded[256];
+ sprintf(to_encoded, "testfiles/f%d.encoded", to);
+ char to_diff[256];
+ sprintf(to_diff, "testfiles/f%d.diff", to);
+ char to_orig[256];
+ sprintf(to_orig, "testfiles/f%d", to);
+ char rev_diff[256];
+ sprintf(rev_diff, "testfiles/f%d.revdiff", to);
+ char from_rebuild[256];
+ sprintf(from_rebuild, "testfiles/f%d.rebuilt", to);
+ char from_rebuild_dec[256];
+ sprintf(from_rebuild_dec, "testfiles/f%d.rebuilt_dec", to);
+
+ // Then call the encode varient for diffing files
+ bool completelyDifferent = !expect_completely_different; // oposite of what we want
+ {
+ BackupStoreFilenameClear f1name("filename");
+ FileStream out(to_diff, O_WRONLY | O_CREAT | O_EXCL);
+ std::auto_ptr<IOStream> encoded(BackupStoreFile::EncodeFileDiff(to_orig, 1 /* dir ID */, f1name,
+ 1000 + from /* object ID of the file diffing from */, blockindex, IOStream::TimeOutInfinite,
+ 0, &completelyDifferent));
+ encoded->CopyStreamTo(out);
+ }
+ TEST_THAT(completelyDifferent == expect_completely_different);
+
+ // Test that the number of blocks in the file match what's expected
+ check_encoded_file(to_diff, expect_completely_different?(0):(1000 + from), new_blocks_expected, old_blocks_expected);
+
+ // filename
+ char to_testdec[256];
+ sprintf(to_testdec, "testfiles/f%d.testdec", to);
+
+ if(!completelyDifferent)
+ {
+ // Then produce a combined file
+ {
+ FileStream diff(to_diff);
+ FileStream diff2(to_diff);
+ FileStream from(from_encoded);
+ FileStream out(to_encoded, O_WRONLY | O_CREAT | O_EXCL);
+ BackupStoreFile::CombineFile(diff, diff2, from, out);
+ }
+
+ // And check it
+ check_encoded_file(to_encoded, 0, new_blocks_expected + old_blocks_expected, 0);
+ }
+ else
+ {
+ // Emulate the above stage!
+ char cmd[256];
+ sprintf(cmd, "cp testfiles/f%d.diff testfiles/f%d.encoded", to, to);
+ ::system(cmd);
+ }
+
+ // Decode it
+ {
+ FileStream enc(to_encoded);
+ BackupStoreFile::DecodeFile(enc, to_testdec, IOStream::TimeOutInfinite);
+ TEST_THAT(files_identical(to_orig, to_testdec));
+ }
+
+ // Then do some comparisons against the block index
+ {
+ FileStream index(to_encoded);
+ BackupStoreFile::MoveStreamPositionToBlockIndex(index);
+ TEST_THAT(BackupStoreFile::CompareFileContentsAgainstBlockIndex(to_orig, index, IOStream::TimeOutInfinite) == true);
+ }
+ {
+ char from_orig[256];
+ sprintf(from_orig, "testfiles/f%d", from);
+ FileStream index(to_encoded);
+ BackupStoreFile::MoveStreamPositionToBlockIndex(index);
+ TEST_THAT(BackupStoreFile::CompareFileContentsAgainstBlockIndex(from_orig, index, IOStream::TimeOutInfinite) == files_identical(from_orig, to_orig));
+ }
+
+ // Check that combined index creation works as expected
+ {
+ // Load a combined index into memory
+ FileStream diff(to_diff);
+ FileStream from(from_encoded);
+ std::auto_ptr<IOStream> indexCmbStr(BackupStoreFile::CombineFileIndices(diff, from));
+ CollectInBufferStream indexCmb;
+ indexCmbStr->CopyStreamTo(indexCmb);
+ // Then check that it's as expected!
+ FileStream result(to_encoded);
+ BackupStoreFile::MoveStreamPositionToBlockIndex(result);
+ CollectInBufferStream index;
+ result.CopyStreamTo(index);
+ TEST_THAT(indexCmb.GetSize() == index.GetSize());
+ TEST_THAT(::memcmp(indexCmb.GetBuffer(), index.GetBuffer(), index.GetSize()) == 0);
+ }
+
+ // Check that reverse delta can be made, and that it decodes OK
+ {
+ // Create reverse delta
+ {
+ bool reversedCompletelyDifferent = !completelyDifferent;
+ FileStream diff(to_diff);
+ FileStream from(from_encoded);
+ FileStream from2(from_encoded);
+ FileStream reversed(rev_diff, O_WRONLY | O_CREAT);
+ BackupStoreFile::ReverseDiffFile(diff, from, from2, reversed, to, &reversedCompletelyDifferent);
+ TEST_THAT(reversedCompletelyDifferent == completelyDifferent);
+ }
+ // Use it to combine a file
+ {
+ FileStream diff(rev_diff);
+ FileStream diff2(rev_diff);
+ FileStream from(to_encoded);
+ FileStream out(from_rebuild, O_WRONLY | O_CREAT | O_EXCL);
+ BackupStoreFile::CombineFile(diff, diff2, from, out);
+ }
+ // And then confirm that this file is actually the one we want
+ {
+ FileStream enc(from_rebuild);
+ BackupStoreFile::DecodeFile(enc, from_rebuild_dec, IOStream::TimeOutInfinite);
+ TEST_THAT(files_identical(from_orig, from_rebuild_dec));
+ }
+ // Do some extra checking
+ {
+ TEST_THAT(files_identical(from_rebuild, from_encoded));
+ }
+ }
+}
+
+void test_combined_diff(int version1, int version2, int serial)
+{
+ char combined_file[256];
+ char last_diff[256];
+ sprintf(last_diff, "testfiles/f%d.diff", version1 + 1); // ie from version1 to version1 + 1
+
+ for(int v = version1 + 2; v <= version2; ++v)
+ {
+ FileStream diff1(last_diff);
+ char next_diff[256];
+ sprintf(next_diff, "testfiles/f%d.diff", v);
+ FileStream diff2(next_diff);
+ FileStream diff2b(next_diff);
+ sprintf(combined_file, "testfiles/comb%d_%d.cmbdiff", version1, v);
+ FileStream out(combined_file, O_WRONLY | O_CREAT);
+ BackupStoreFile::CombineDiffs(diff1, diff2, diff2b, out);
+ strcpy(last_diff, combined_file);
+ }
+
+ // Then do a combine on it, and check that it decodes to the right thing
+ char orig_enc[256];
+ sprintf(orig_enc, "testfiles/f%d.encoded", version1);
+ char combined_out[256];
+ sprintf(combined_out, "testfiles/comb%d_%d.out", version1, version2);
+
+ {
+ FileStream diff(combined_file);
+ FileStream diff2(combined_file);
+ FileStream from(orig_enc);
+ FileStream out(combined_out, O_WRONLY | O_CREAT);
+ BackupStoreFile::CombineFile(diff, diff2, from, out);
+ }
+
+ char combined_out_dec[256];
+ sprintf(combined_out_dec, "testfiles/comb%d_%d_s%d.dec", version1, version2, serial);
+ char to_orig[256];
+ sprintf(to_orig, "testfiles/f%d", version2);
+
+ {
+ FileStream enc(combined_out);
+ BackupStoreFile::DecodeFile(enc, combined_out_dec, IOStream::TimeOutInfinite);
+ TEST_THAT(files_identical(to_orig, combined_out_dec));
+ }
+
+}
+
+#define MAX_DIFF 9
+void test_combined_diffs()
+{
+ int serial = 0;
+
+ // Number of items to combine at once
+ for(int stages = 2; stages <= 4; ++stages)
+ {
+ // Offset to get complete coverage
+ for(int offset = 0; offset < stages; ++offset)
+ {
+ // And then actual end file number
+ for(int f = 0; f <= (MAX_DIFF - stages - offset); ++f)
+ {
+ // And finally, do something!
+ test_combined_diff(offset + f, offset + f + stages, ++serial);
+ }
+ }
+ }
+}
+
+int test(int argc, const char *argv[])
+{
+ // Want to trace out all the details
+ #ifndef NDEBUG
+ BackupStoreFile::TraceDetailsOfDiffProcess = true;
+ #endif
+
+ // Create all the test files
+ create_test_files();
+
+ // Setup the crypto
+ BackupClientCryptoKeys_Setup("testfiles/backup.keys");
+
+ // Encode the first file
+ {
+ BackupStoreFilenameClear f0name("f0");
+ FileStream out("testfiles/f0.encoded", O_WRONLY | O_CREAT | O_EXCL);
+ std::auto_ptr<IOStream> encoded(BackupStoreFile::EncodeFile("testfiles/f0", 1 /* dir ID */, f0name));
+ encoded->CopyStreamTo(out);
+ check_encoded_file("testfiles/f0.encoded", 0, 33, 0);
+ }
+
+ // Check the "seek to index" code
+ {
+ FileStream enc("testfiles/f0.encoded");
+ BackupStoreFile::MoveStreamPositionToBlockIndex(enc);
+ // Read in header to check magic value is as expected
+ file_BlockIndexHeader hdr;
+ TEST_THAT(enc.ReadFullBuffer(&hdr, sizeof(hdr), 0));
+ TEST_THAT(hdr.mMagicValue == (int32_t)htonl(OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1));
+ }
+
+ // Diff some files -- parameters are from number, to number,
+ // then the number of new blocks expected, and the number of old blocks expected.
+
+ // Diff the original file to a copy of itself, and check that there is no data in the file
+ // This checks that the hash table is constructed properly, because two of the blocks share
+ // the same weak checksum.
+ test_diff(0, 1, 0, 33);
+
+ // Insert some new data
+ // Blocks from old file moved whole, but put in different order
+ test_diff(1, 2, 7, 32);
+
+ // Delete some data, but not on block boundaries
+ test_diff(2, 3, 1, 29);
+
+ // Add a very small amount of data, not on block boundary
+ // delete a little data
+ test_diff(3, 4, 3, 25);
+
+ // 1 byte insertion between two blocks
+ test_diff(4, 5, 1, 28);
+
+ // a file with some new content at the very beginning
+ // NOTE: You might expect the last numbers to be 2, 29, but the small 1 byte block isn't searched for
+ test_diff(5, 6, 3, 28);
+
+ // some new content at the very end
+ // NOTE: 1 byte block deleted, so number aren't what you'd initial expect.
+ test_diff(6, 7, 2, 30);
+
+ // a completely different file, with no blocks matching.
+ test_diff(7, 8, 14, 0, true /* completely different expected */);
+
+ // diff to zero sized file
+ test_diff(8, 9, 0, 0, true /* completely different expected */);
+
+ // Test that combining diffs works
+ test_combined_diffs();
+
+ // Check zero sized file works OK to encode on its own, using normal encoding
+ {
+ {
+ // Encode
+ BackupStoreFilenameClear fn("filename");
+ FileStream out("testfiles/f9.zerotest", O_WRONLY | O_CREAT | O_EXCL);
+ std::auto_ptr<IOStream> encoded(BackupStoreFile::EncodeFile("testfiles/f9", 1 /* dir ID */, fn));
+ encoded->CopyStreamTo(out);
+ check_encoded_file("testfiles/f9.zerotest", 0, 0, 0);
+ }
+ {
+ // Decode
+ FileStream enc("testfiles/f9.zerotest");
+ BackupStoreFile::DecodeFile(enc, "testfiles/f9.testdec.zero", IOStream::TimeOutInfinite);
+ TEST_THAT(files_identical("testfiles/f9", "testfiles/f9.testdec.zero"));
+ }
+ }
+
+ // Check that symlinks aren't diffed
+ TEST_THAT(::symlink("f2", "testfiles/f2.symlink") == 0)
+ // And go and diff it against the previous encoded file
+ {
+ bool completelyDifferent = false;
+ {
+ FileStream blockindex("testfiles/f1.encoded");
+ BackupStoreFile::MoveStreamPositionToBlockIndex(blockindex);
+
+ BackupStoreFilenameClear f1name("filename");
+ FileStream out("testfiles/f2.symlink.diff", O_WRONLY | O_CREAT | O_EXCL);
+ std::auto_ptr<IOStream> encoded(BackupStoreFile::EncodeFileDiff("testfiles/f2.symlink", 1 /* dir ID */, f1name,
+ 1001 /* object ID of the file diffing from */, blockindex, IOStream::TimeOutInfinite,
+ 0, &completelyDifferent));
+ encoded->CopyStreamTo(out);
+ }
+ TEST_THAT(completelyDifferent == true);
+ check_encoded_file("testfiles/f2.symlink.diff", 0, 0, 0);
+ }
+
+ // Check that diffing against a file which isn't "complete" and referes another isn't allowed
+ {
+ FileStream blockindex("testfiles/f1.diff");
+ BackupStoreFile::MoveStreamPositionToBlockIndex(blockindex);
+
+ BackupStoreFilenameClear f1name("filename");
+ FileStream out("testfiles/f2.testincomplete", O_WRONLY | O_CREAT | O_EXCL);
+ TEST_CHECK_THROWS(BackupStoreFile::EncodeFileDiff("testfiles/f2", 1 /* dir ID */, f1name,
+ 1001 /* object ID of the file diffing from */, blockindex, IOStream::TimeOutInfinite,
+ 0, 0), BackupStoreException, CannotDiffAnIncompleteStoreFile);
+ }
+
+ // Found a nasty case where files of lots of the same thing sock up lots of processor
+ // time -- because of lots of matches found. Check this out!
+ make_file_of_zeros("testfiles/zero.0", 20*1024);
+ make_file_of_zeros("testfiles/zero.1", 200*1024);
+ // Generate a first encoded file
+ {
+ BackupStoreFilenameClear f0name("zero.0");
+ FileStream out("testfiles/zero.0.enc", O_WRONLY | O_CREAT | O_EXCL);
+ std::auto_ptr<IOStream> encoded(BackupStoreFile::EncodeFile("testfiles/zero.0", 1 /* dir ID */, f0name));
+ encoded->CopyStreamTo(out);
+ }
+ // Then diff from it -- time how long it takes...
+ {
+ int beginTime = time(0);
+ FileStream blockindex("testfiles/zero.0.enc");
+ BackupStoreFile::MoveStreamPositionToBlockIndex(blockindex);
+
+ BackupStoreFilenameClear f1name("zero.1");
+ FileStream out("testfiles/zero.1.enc", O_WRONLY | O_CREAT | O_EXCL);
+ std::auto_ptr<IOStream> encoded(BackupStoreFile::EncodeFileDiff("testfiles/zero.1", 1 /* dir ID */, f1name,
+ 2000 /* object ID of the file diffing from */, blockindex, IOStream::TimeOutInfinite,
+ 0, 0));
+ encoded->CopyStreamTo(out);
+ TEST_THAT(time(0) < (beginTime + 20));
+ }
+
+#if 0
+ // Code for a nasty real world example! (16Mb files, won't include them in the distribution
+ // for obvious reasons...)
+ // Generate a first encoded file
+ {
+ BackupStoreFilenameClear f0name("0000000000000000.old");
+ FileStream out("testfiles/0000000000000000.enc.0", O_WRONLY | O_CREAT | O_EXCL);
+ std::auto_ptr<IOStream> encoded(BackupStoreFile::EncodeFile("/Users/ben/Desktop/0000000000000000.old", 1 /* dir ID */, f0name));
+ encoded->CopyStreamTo(out);
+ }
+ // Then diff from it -- time how long it takes...
+ {
+ int beginTime = time(0);
+ FileStream blockindex("testfiles/0000000000000000.enc.0");
+ BackupStoreFile::MoveStreamPositionToBlockIndex(blockindex);
+
+ BackupStoreFilenameClear f1name("0000000000000000.new");
+ FileStream out("testfiles/0000000000000000.enc.1", O_WRONLY | O_CREAT | O_EXCL);
+ std::auto_ptr<IOStream> encoded(BackupStoreFile::EncodeFileDiff("/Users/ben/Desktop/0000000000000000.new", 1 /* dir ID */, f1name,
+ 2000 /* object ID of the file diffing from */, blockindex, IOStream::TimeOutInfinite,
+ 0, 0));
+ encoded->CopyStreamTo(out);
+ TEST_THAT(time(0) < (beginTime + 20));
+ }
+#endif // 0
+
+ return 0;
+}
+
+