diff options
author | Chris Wilson <chris+github@qwirx.com> | 2013-09-20 15:17:06 +0000 |
---|---|---|
committer | Chris Wilson <chris+github@qwirx.com> | 2013-09-20 15:17:06 +0000 |
commit | f65a58a02d90a75cb08d05e5575867bebb7cb784 (patch) | |
tree | bdf69bebc3ea74c596542d3502a177557b4990cb | |
parent | 0540ad493355d0ab5dc56354a43fccd8b6adea5e (diff) |
Repair damaged Box RAID objects and non-transformed .rfw files.
-rw-r--r-- | lib/backupstore/BackupStoreCheck.cpp | 111 | ||||
-rw-r--r-- | lib/backupstore/BackupStoreCheck.h | 9 | ||||
-rw-r--r-- | lib/raidfile/RaidFileController.cpp | 6 | ||||
-rw-r--r-- | lib/raidfile/RaidFileController.h | 2 | ||||
-rw-r--r-- | lib/raidfile/RaidFileRead.cpp | 6 | ||||
-rw-r--r-- | test/backupstorefix/testbackupstorefix.cpp | 136 |
6 files changed, 197 insertions, 73 deletions
diff --git a/lib/backupstore/BackupStoreCheck.cpp b/lib/backupstore/BackupStoreCheck.cpp index b8b8cc40..f2302337 100644 --- a/lib/backupstore/BackupStoreCheck.cpp +++ b/lib/backupstore/BackupStoreCheck.cpp @@ -16,15 +16,18 @@ # include <unistd.h> #endif +#include "autogen_BackupStoreException.h" #include "BackupStoreCheck.h" -#include "StoreStructure.h" +#include "BackupStoreConstants.h" +#include "BackupStoreDirectory.h" +#include "BackupStoreFile.h" +#include "BackupStoreObjectMagic.h" +#include "RaidFileController.h" +#include "RaidFileException.h" #include "RaidFileRead.h" +#include "RaidFileUtil.h" #include "RaidFileWrite.h" -#include "autogen_BackupStoreException.h" -#include "BackupStoreObjectMagic.h" -#include "BackupStoreFile.h" -#include "BackupStoreDirectory.h" -#include "BackupStoreConstants.h" +#include "StoreStructure.h" #include "Utils.h" #include "MemLeakFindOn.h" @@ -281,6 +284,33 @@ int64_t BackupStoreCheck::CheckObjectsScanDir(int64_t StartID, int Level, const // Read in all the directories, and recurse downwards { + // If any of the directories is missing, create it. + RaidFileController &rcontroller(RaidFileController::GetController()); + RaidFileDiscSet rdiscSet(rcontroller.GetDiscSet(mDiscSetNumber)); + + if(!rdiscSet.IsNonRaidSet()) + { + unsigned int numDiscs = rdiscSet.size(); + + for(unsigned int l = 0; l < numDiscs; ++l) + { + // build name + std::string dn(rdiscSet[l] + DIRECTORY_SEPARATOR + rDirName); + struct stat st; + + if(stat(dn.c_str(), &st) != 0 && errno == ENOENT) + { + if(mkdir(dn.c_str(), 0755) != 0) + { + THROW_SYS_FILE_ERROR("Failed to " + "create missing RaidFile " + "directory", dn, + RaidFileException, OSError); + } + } + } + } + std::vector<std::string> dirs; RaidFileRead::ReadDirectoryContents(mDiscSetNumber, rDirName, RaidFileRead::DirReadType_DirsOnly, dirs); @@ -483,13 +513,6 @@ bool BackupStoreCheck::CheckAndAddObject(int64_t ObjectID, return false; break; } - - // Add to usage counts - mBlocksUsed += size; - if(!isFile) - { - mBlocksInDirectories += size; - } } catch(...) { @@ -503,18 +526,56 @@ bool BackupStoreCheck::CheckAndAddObject(int64_t ObjectID, return false; } - // Debugging for Sune Molgaard's issue with non-existent files being - // detected as unattached and crashing later in CheckUnattachedObjects() - if (ObjectID == 0x90c1a) - { - BOX_INFO("Adding ID " << BOX_FORMAT_OBJECTID(ObjectID) << - " contained by " << BOX_FORMAT_OBJECTID(containerID) << - " with size " << size << " and isFile " << isFile); - } - // Add to list of IDs known about AddID(ObjectID, containerID, size, isFile); + // Add to usage counts + mBlocksUsed += size; + if(!isFile) + { + mBlocksInDirectories += size; + } + + // If it looks like a good object, and it's non-RAID, and + // this is a RAID set, then convert it to RAID. + + RaidFileController &rcontroller(RaidFileController::GetController()); + RaidFileDiscSet rdiscSet(rcontroller.GetDiscSet(mDiscSetNumber)); + if(!rdiscSet.IsNonRaidSet()) + { + // See if the file exists + RaidFileUtil::ExistType existance = + RaidFileUtil::RaidFileExists(rdiscSet, rFilename); + if(existance == RaidFileUtil::NonRaid) + { + BOX_WARNING("Found non-RAID write file in RAID set" << + (mFixErrors?", transforming to RAID: ":"") << + (mFixErrors?rFilename:"")); + if(mFixErrors) + { + RaidFileWrite write(mDiscSetNumber, rFilename); + write.TransformToRaidStorage(); + } + } + else if(existance == RaidFileUtil::AsRaidWithMissingReadable) + { + BOX_WARNING("Found damaged but repairable RAID file" << + (mFixErrors?", repairing: ":"") << + (mFixErrors?rFilename:"")); + if(mFixErrors) + { + std::auto_ptr<RaidFileRead> read( + RaidFileRead::Open(mDiscSetNumber, + rFilename)); + RaidFileWrite write(mDiscSetNumber, rFilename); + write.Open(true /* overwrite */); + read->CopyStreamTo(write); + read.reset(); + write.Commit(true /* transform to RAID */); + } + } + } + // Report success return true; } @@ -645,13 +706,11 @@ void BackupStoreCheck::CheckDirectories() if(isModified && mFixErrors) { - BOX_WARNING("Fixing directory ID " << BOX_FORMAT_OBJECTID(pblock->mID[e])); - - // Save back to disc + BOX_WARNING("Writing modified directory to disk: " << + BOX_FORMAT_OBJECTID(pblock->mID[e])); RaidFileWrite fixed(mDiscSetNumber, filename); fixed.Open(true /* allow overwriting */); dir.WriteToStream(fixed); - // Commit it fixed.Commit(true /* convert to raid representation now */); } diff --git a/lib/backupstore/BackupStoreCheck.h b/lib/backupstore/BackupStoreCheck.h index e091432c..178a873a 100644 --- a/lib/backupstore/BackupStoreCheck.h +++ b/lib/backupstore/BackupStoreCheck.h @@ -28,9 +28,8 @@ The following problems can be fixed: * Spurious files deleted * Corrupted files deleted * Root ID as file, deleted - * Dirs with wrong object id inside, deleted - * Direcetory entries pointing to non-existant files, deleted - * Doubly references files have second reference deleted + * Dirs with wrong object id in header, deleted + * Doubly referenced files have second reference deleted * Wrong directory container IDs fixed * Missing root recreated * Reattach files which exist, but aren't referenced @@ -43,7 +42,9 @@ The following problems can be fixed: * Inside directories, - only one object per name has old version clear - IDs aren't duplicated - * Bad store info files regenerated + - entries pointing to non-existant files are deleted + - patches depending on non-existent objects are deleted + * Bad store info and refcount files regenerated * Bad sizes of files in directories fixed */ diff --git a/lib/raidfile/RaidFileController.cpp b/lib/raidfile/RaidFileController.cpp index e1305d9a..cf93947f 100644 --- a/lib/raidfile/RaidFileController.cpp +++ b/lib/raidfile/RaidFileController.cpp @@ -171,7 +171,11 @@ RaidFileDiscSet &RaidFileController::GetDiscSet(unsigned int DiscSetNum) return mSetList[DiscSetNum]; } - +// Overload to make usable in gdb debugger. +int RaidFileDiscSet::GetSetNumForWriteFiles(const char* filename) const +{ + return GetSetNumForWriteFiles(std::string(filename)); +} // -------------------------------------------------------------------------- // diff --git a/lib/raidfile/RaidFileController.h b/lib/raidfile/RaidFileController.h index 216bdf3a..601cca22 100644 --- a/lib/raidfile/RaidFileController.h +++ b/lib/raidfile/RaidFileController.h @@ -49,7 +49,7 @@ public: int GetSetID() const {return mSetID;} int GetSetNumForWriteFiles(const std::string &rFilename) const; - + int GetSetNumForWriteFiles(const char* filename) const; unsigned int GetBlockSize() const {return mBlockSize;} // Is this disc set a non-RAID disc set? (ie files never get transformed to raid storage) diff --git a/lib/raidfile/RaidFileRead.cpp b/lib/raidfile/RaidFileRead.cpp index 3dab69e5..bcff54c6 100644 --- a/lib/raidfile/RaidFileRead.cpp +++ b/lib/raidfile/RaidFileRead.cpp @@ -1025,8 +1025,8 @@ std::auto_ptr<RaidFileRead> RaidFileRead::Open(int SetNumber, const std::string RaidFileUtil::ExistType existance = RaidFileUtil::RaidFileExists(rdiscSet, Filename, &startDisc, &existingFiles, pRevisionID); if(existance == RaidFileUtil::NoFile) { - BOX_ERROR("Expected raidfile " << Filename << " does not exist"); - THROW_EXCEPTION(RaidFileException, RaidFileDoesntExist) + THROW_FILE_ERROR("Expected raidfile does not exist", + Filename, RaidFileException, RaidFileDoesntExist); } else if(existance == RaidFileUtil::NonRaid) { @@ -1588,7 +1588,7 @@ bool RaidFileRead::ReadDirectoryContents(int SetNumber, const std::string &rDirN { // build name std::string dn(rdiscSet[l] + DIRECTORY_SEPARATOR + rDirName); - + // read the contents... DIR *dirHandle = 0; try diff --git a/test/backupstorefix/testbackupstorefix.cpp b/test/backupstorefix/testbackupstorefix.cpp index 4ad8f7d0..bc9911f0 100644 --- a/test/backupstorefix/testbackupstorefix.cpp +++ b/test/backupstorefix/testbackupstorefix.cpp @@ -32,6 +32,7 @@ #include "RaidFileController.h" #include "RaidFileException.h" #include "RaidFileRead.h" +#include "RaidFileUtil.h" #include "RaidFileWrite.h" #include "ServerControl.h" #include "StoreStructure.h" @@ -75,6 +76,26 @@ std::map<int32_t, bool> objectIsDir; ::system(BBSTOREACCOUNTS " -c testfiles/bbstored.conf check 01234567"); \ ::system(BBSTOREACCOUNTS " -c testfiles/bbstored.conf check 01234567 fix"); +bool check_fix_internal(int expected_num_errors) +{ + BackupStoreCheck checker(storeRoot, discSetNum, + 0x01234567, true /* FixErrors */, false /* Quiet */); + checker.Check(); + if (expected_num_errors == -1) + { + TEST_THAT(checker.ErrorsFound()); + return checker.ErrorsFound(); + } + else + { + TEST_EQUAL(expected_num_errors, checker.GetNumErrorsFound()); + return checker.GetNumErrorsFound() == expected_num_errors; + } +} + +#define RUN_CHECK_INTERNAL(expected_num_errors) \ + TEST_THAT(check_fix_internal(expected_num_errors)) + // Get ID of an object given a filename int32_t getID(const char *name) { @@ -399,11 +420,7 @@ void check_root_dir_ok(dir_en_check after_entries[], { // Check the store, check that the error is detected and // repaired, by removing x1 from the directory. - BackupStoreCheck check(storeRoot, discSetNum, - 0x01234567 /* AccountID */, false /* FixErrors */, - true /* Quiet */); - check.Check(); - TEST_THAT(!check.ErrorsFound()); + RUN_CHECK_INTERNAL(0); // Read the directory back in, check that it's empty BackupStoreDirectory dir; @@ -418,12 +435,7 @@ void check_and_fix_root_dir(dir_en_check after_entries[], { // Check the store, check that the error is detected and // repaired. - BackupStoreCheck check(storeRoot, discSetNum, - 0x01234567 /* AccountID */, true /* FixErrors */, - true /* Quiet */); - check.Check(); - TEST_THAT(check.ErrorsFound()); - + RUN_CHECK_INTERNAL(-1); check_root_dir_ok(after_entries, after_deps); } @@ -529,6 +541,9 @@ int test(int argc, const char *argv[]) check_and_fix_root_dir(after_entries, after_deps); } + BOX_INFO(" === Test that an entry pointing to a directory whose " + "raidfile is corrupted doesn't crash"); + // Start the bbstored server BOX_TRACE(" === Starting bbstored server: " BBSTORED " testfiles/bbstored.conf"); @@ -712,8 +727,39 @@ int test(int argc, const char *argv[]) f.Commit(true /* write now! */); } +#ifndef BOX_RELEASE_BUILD + // Delete two of the three raidfiles and their parent + // directories. This used to crash bbstoreaccounts check. + // We can only do this, without destroying the entire store, + // in debug mode, where the store has a far deeper + // structure. + // This will destroy or damage objects 18-1b and 58-5b, + // some repairably. + #define RUN(x) TEST_THAT(system(x) == 0); + RUN("mv testfiles/0_0/backup/01234567/02/01/o00.rf " + "testfiles/0_0/backup/01234567/02/01/o00.rfw"); // 0x18 + RUN("mv testfiles/0_1/backup/01234567/02/01/o01.rf " + "testfiles/0_1/backup/01234567/02/01/o01.rfw"); // 0x19 + //RUN("mv testfiles/0_2/backup/01234567/02/01/o02.rf " + // "testfiles/0_0/backup/01234567/02/01/o02.rfw"); // 0x1a + RUN("mv testfiles/0_0/backup/01234567/02/01/o03.rf " + "testfiles/0_0/backup/01234567/02/01/o03.rfw"); // 0x1b + RUN("mv testfiles/0_0/backup/01234567/02/01/01/o00.rf " + "testfiles/0_0/backup/01234567/02/01/01/o00.rfw"); // 0x58 + RUN("mv testfiles/0_1/backup/01234567/02/01/01/o01.rf " + "testfiles/0_1/backup/01234567/02/01/01/o01.rfw"); // 0x59 + //RUN("mv testfiles/0_2/backup/01234567/02/01/01/o02.rf " + // "testfiles/0_0/backup/01234567/02/01/01/o02.rfw"); // 0x5a + RUN("mv testfiles/0_0/backup/01234567/02/01/01/o03.rf " + "testfiles/0_0/backup/01234567/02/01/01/o03.rfw"); // 0x5b + // RUN("rm -r testfiles/0_1/backup/01234567/02/01"); + RUN("rm -r testfiles/0_2/backup/01234567/02/01"); + #undef RUN +#endif // BOX_RELEASE_BUILD + // Fix it - RUN_CHECK + RUN_CHECK_INTERNAL(3); + // Check TEST_THAT(::system(PERL_EXECUTABLE " testfiles/testbackupstorefix.pl check 1") @@ -721,6 +767,28 @@ int test(int argc, const char *argv[]) // Check the modified file doesn't exist TEST_THAT(!RaidFileRead::FileExists(discSetNum, fn)); + + // Check that the missing RaidFiles were regenerated and + // committed. FileExists returns NonRaid if it find a .rfw + // file, so checking for AsRaid excludes this possibility. + RaidFileController &rcontroller(RaidFileController::GetController()); + RaidFileDiscSet rdiscSet(rcontroller.GetDiscSet(discSetNum)); + TEST_EQUAL(RaidFileUtil::AsRaid, RaidFileUtil::RaidFileExists( + rdiscSet, "backup/01234567/02/01/o00")); + TEST_EQUAL(RaidFileUtil::AsRaid, RaidFileUtil::RaidFileExists( + rdiscSet, "backup/01234567/02/01/o01")); + TEST_EQUAL(RaidFileUtil::AsRaid, RaidFileUtil::RaidFileExists( + rdiscSet, "backup/01234567/02/01/o02")); + TEST_EQUAL(RaidFileUtil::AsRaid, RaidFileUtil::RaidFileExists( + rdiscSet, "backup/01234567/02/01/o03")); + TEST_EQUAL(RaidFileUtil::AsRaid, RaidFileUtil::RaidFileExists( + rdiscSet, "backup/01234567/02/01/01/o00")); + TEST_EQUAL(RaidFileUtil::AsRaid, RaidFileUtil::RaidFileExists( + rdiscSet, "backup/01234567/02/01/01/o01")); + TEST_EQUAL(RaidFileUtil::AsRaid, RaidFileUtil::RaidFileExists( + rdiscSet, "backup/01234567/02/01/01/o02")); + TEST_EQUAL(RaidFileUtil::AsRaid, RaidFileUtil::RaidFileExists( + rdiscSet, "backup/01234567/02/01/01/o03")); } // ------------------------------------------------------------------------------------------------ @@ -761,33 +829,25 @@ int test(int argc, const char *argv[]) DeleteObject("Test1/pass/cacted/ming"); // Delete a file DeleteObject("Test1/cannes/ict/scely"); - // Fix it - { - // Check it - BackupStoreCheck checker(storeRoot, discSetNum, - 0x01234567, true /* FixErrors */, false /* Quiet */); - checker.Check(); - // Should just be greater than 1 really, we don't know quite - // how good the checker is (or will become) at spotting errors! - // But this will help us catch changes in checker behaviour, - // so it's not a bad thing to test. - - // The 11 errors are: - // ERROR: Directory ID 0xb references object 0x3e which does not exist. - // ERROR: Removing directory entry 0x3e from directory 0xb - // ERROR: Directory ID 0xc had invalid entries, fixed - // ERROR: Directory ID 0xc has wrong size for object 0x40 - // ERROR: Directory ID 0x17 has wrong container ID. - // ERROR: Object 0x51 is unattached. - // ERROR: Object 0x52 is unattached. - // ERROR: BlocksUsed changed from 282 to 278 - // ERROR: BlocksInCurrentFiles changed from 226 to 220 - // ERROR: BlocksInDirectories changed from 56 to 54 - // ERROR: NumFiles changed from 113 to 110 - - TEST_EQUAL(11, checker.GetNumErrorsFound()); - } + // We don't know quite how good the checker is (or will become) at + // spotting errors! But asserting an exact number will help us catch + // changes in checker behaviour, so it's not a bad thing to test. + + // The 11 errors are: + // ERROR: Directory ID 0xb references object 0x3e which does not exist. + // ERROR: Removing directory entry 0x3e from directory 0xb + // ERROR: Directory ID 0xc had invalid entries, fixed + // ERROR: Directory ID 0xc has wrong size for object 0x40 + // ERROR: Directory ID 0x17 has wrong container ID. + // ERROR: Object 0x51 is unattached. + // ERROR: Object 0x52 is unattached. + // ERROR: BlocksUsed changed from 282 to 278 + // ERROR: BlocksInCurrentFiles changed from 226 to 220 + // ERROR: BlocksInDirectories changed from 56 to 54 + // ERROR: NumFiles changed from 113 to 110 + + RUN_CHECK_INTERNAL(11); // Check everything is as it should be TEST_THAT(::system(PERL_EXECUTABLE |