summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris+github@qwirx.com>2013-09-20 15:17:06 +0000
committerChris Wilson <chris+github@qwirx.com>2013-09-20 15:17:06 +0000
commitf65a58a02d90a75cb08d05e5575867bebb7cb784 (patch)
treebdf69bebc3ea74c596542d3502a177557b4990cb
parent0540ad493355d0ab5dc56354a43fccd8b6adea5e (diff)
Repair damaged Box RAID objects and non-transformed .rfw files.
-rw-r--r--lib/backupstore/BackupStoreCheck.cpp111
-rw-r--r--lib/backupstore/BackupStoreCheck.h9
-rw-r--r--lib/raidfile/RaidFileController.cpp6
-rw-r--r--lib/raidfile/RaidFileController.h2
-rw-r--r--lib/raidfile/RaidFileRead.cpp6
-rw-r--r--test/backupstorefix/testbackupstorefix.cpp136
6 files changed, 197 insertions, 73 deletions
diff --git a/lib/backupstore/BackupStoreCheck.cpp b/lib/backupstore/BackupStoreCheck.cpp
index b8b8cc40..f2302337 100644
--- a/lib/backupstore/BackupStoreCheck.cpp
+++ b/lib/backupstore/BackupStoreCheck.cpp
@@ -16,15 +16,18 @@
# include <unistd.h>
#endif
+#include "autogen_BackupStoreException.h"
#include "BackupStoreCheck.h"
-#include "StoreStructure.h"
+#include "BackupStoreConstants.h"
+#include "BackupStoreDirectory.h"
+#include "BackupStoreFile.h"
+#include "BackupStoreObjectMagic.h"
+#include "RaidFileController.h"
+#include "RaidFileException.h"
#include "RaidFileRead.h"
+#include "RaidFileUtil.h"
#include "RaidFileWrite.h"
-#include "autogen_BackupStoreException.h"
-#include "BackupStoreObjectMagic.h"
-#include "BackupStoreFile.h"
-#include "BackupStoreDirectory.h"
-#include "BackupStoreConstants.h"
+#include "StoreStructure.h"
#include "Utils.h"
#include "MemLeakFindOn.h"
@@ -281,6 +284,33 @@ int64_t BackupStoreCheck::CheckObjectsScanDir(int64_t StartID, int Level, const
// Read in all the directories, and recurse downwards
{
+ // If any of the directories is missing, create it.
+ RaidFileController &rcontroller(RaidFileController::GetController());
+ RaidFileDiscSet rdiscSet(rcontroller.GetDiscSet(mDiscSetNumber));
+
+ if(!rdiscSet.IsNonRaidSet())
+ {
+ unsigned int numDiscs = rdiscSet.size();
+
+ for(unsigned int l = 0; l < numDiscs; ++l)
+ {
+ // build name
+ std::string dn(rdiscSet[l] + DIRECTORY_SEPARATOR + rDirName);
+ struct stat st;
+
+ if(stat(dn.c_str(), &st) != 0 && errno == ENOENT)
+ {
+ if(mkdir(dn.c_str(), 0755) != 0)
+ {
+ THROW_SYS_FILE_ERROR("Failed to "
+ "create missing RaidFile "
+ "directory", dn,
+ RaidFileException, OSError);
+ }
+ }
+ }
+ }
+
std::vector<std::string> dirs;
RaidFileRead::ReadDirectoryContents(mDiscSetNumber, rDirName,
RaidFileRead::DirReadType_DirsOnly, dirs);
@@ -483,13 +513,6 @@ bool BackupStoreCheck::CheckAndAddObject(int64_t ObjectID,
return false;
break;
}
-
- // Add to usage counts
- mBlocksUsed += size;
- if(!isFile)
- {
- mBlocksInDirectories += size;
- }
}
catch(...)
{
@@ -503,18 +526,56 @@ bool BackupStoreCheck::CheckAndAddObject(int64_t ObjectID,
return false;
}
- // Debugging for Sune Molgaard's issue with non-existent files being
- // detected as unattached and crashing later in CheckUnattachedObjects()
- if (ObjectID == 0x90c1a)
- {
- BOX_INFO("Adding ID " << BOX_FORMAT_OBJECTID(ObjectID) <<
- " contained by " << BOX_FORMAT_OBJECTID(containerID) <<
- " with size " << size << " and isFile " << isFile);
- }
-
// Add to list of IDs known about
AddID(ObjectID, containerID, size, isFile);
+ // Add to usage counts
+ mBlocksUsed += size;
+ if(!isFile)
+ {
+ mBlocksInDirectories += size;
+ }
+
+ // If it looks like a good object, and it's non-RAID, and
+ // this is a RAID set, then convert it to RAID.
+
+ RaidFileController &rcontroller(RaidFileController::GetController());
+ RaidFileDiscSet rdiscSet(rcontroller.GetDiscSet(mDiscSetNumber));
+ if(!rdiscSet.IsNonRaidSet())
+ {
+ // See if the file exists
+ RaidFileUtil::ExistType existance =
+ RaidFileUtil::RaidFileExists(rdiscSet, rFilename);
+ if(existance == RaidFileUtil::NonRaid)
+ {
+ BOX_WARNING("Found non-RAID write file in RAID set" <<
+ (mFixErrors?", transforming to RAID: ":"") <<
+ (mFixErrors?rFilename:""));
+ if(mFixErrors)
+ {
+ RaidFileWrite write(mDiscSetNumber, rFilename);
+ write.TransformToRaidStorage();
+ }
+ }
+ else if(existance == RaidFileUtil::AsRaidWithMissingReadable)
+ {
+ BOX_WARNING("Found damaged but repairable RAID file" <<
+ (mFixErrors?", repairing: ":"") <<
+ (mFixErrors?rFilename:""));
+ if(mFixErrors)
+ {
+ std::auto_ptr<RaidFileRead> read(
+ RaidFileRead::Open(mDiscSetNumber,
+ rFilename));
+ RaidFileWrite write(mDiscSetNumber, rFilename);
+ write.Open(true /* overwrite */);
+ read->CopyStreamTo(write);
+ read.reset();
+ write.Commit(true /* transform to RAID */);
+ }
+ }
+ }
+
// Report success
return true;
}
@@ -645,13 +706,11 @@ void BackupStoreCheck::CheckDirectories()
if(isModified && mFixErrors)
{
- BOX_WARNING("Fixing directory ID " << BOX_FORMAT_OBJECTID(pblock->mID[e]));
-
- // Save back to disc
+ BOX_WARNING("Writing modified directory to disk: " <<
+ BOX_FORMAT_OBJECTID(pblock->mID[e]));
RaidFileWrite fixed(mDiscSetNumber, filename);
fixed.Open(true /* allow overwriting */);
dir.WriteToStream(fixed);
- // Commit it
fixed.Commit(true /* convert to raid representation now */);
}
diff --git a/lib/backupstore/BackupStoreCheck.h b/lib/backupstore/BackupStoreCheck.h
index e091432c..178a873a 100644
--- a/lib/backupstore/BackupStoreCheck.h
+++ b/lib/backupstore/BackupStoreCheck.h
@@ -28,9 +28,8 @@ The following problems can be fixed:
* Spurious files deleted
* Corrupted files deleted
* Root ID as file, deleted
- * Dirs with wrong object id inside, deleted
- * Direcetory entries pointing to non-existant files, deleted
- * Doubly references files have second reference deleted
+ * Dirs with wrong object id in header, deleted
+ * Doubly referenced files have second reference deleted
* Wrong directory container IDs fixed
* Missing root recreated
* Reattach files which exist, but aren't referenced
@@ -43,7 +42,9 @@ The following problems can be fixed:
* Inside directories,
- only one object per name has old version clear
- IDs aren't duplicated
- * Bad store info files regenerated
+ - entries pointing to non-existant files are deleted
+ - patches depending on non-existent objects are deleted
+ * Bad store info and refcount files regenerated
* Bad sizes of files in directories fixed
*/
diff --git a/lib/raidfile/RaidFileController.cpp b/lib/raidfile/RaidFileController.cpp
index e1305d9a..cf93947f 100644
--- a/lib/raidfile/RaidFileController.cpp
+++ b/lib/raidfile/RaidFileController.cpp
@@ -171,7 +171,11 @@ RaidFileDiscSet &RaidFileController::GetDiscSet(unsigned int DiscSetNum)
return mSetList[DiscSetNum];
}
-
+// Overload to make usable in gdb debugger.
+int RaidFileDiscSet::GetSetNumForWriteFiles(const char* filename) const
+{
+ return GetSetNumForWriteFiles(std::string(filename));
+}
// --------------------------------------------------------------------------
//
diff --git a/lib/raidfile/RaidFileController.h b/lib/raidfile/RaidFileController.h
index 216bdf3a..601cca22 100644
--- a/lib/raidfile/RaidFileController.h
+++ b/lib/raidfile/RaidFileController.h
@@ -49,7 +49,7 @@ public:
int GetSetID() const {return mSetID;}
int GetSetNumForWriteFiles(const std::string &rFilename) const;
-
+ int GetSetNumForWriteFiles(const char* filename) const;
unsigned int GetBlockSize() const {return mBlockSize;}
// Is this disc set a non-RAID disc set? (ie files never get transformed to raid storage)
diff --git a/lib/raidfile/RaidFileRead.cpp b/lib/raidfile/RaidFileRead.cpp
index 3dab69e5..bcff54c6 100644
--- a/lib/raidfile/RaidFileRead.cpp
+++ b/lib/raidfile/RaidFileRead.cpp
@@ -1025,8 +1025,8 @@ std::auto_ptr<RaidFileRead> RaidFileRead::Open(int SetNumber, const std::string
RaidFileUtil::ExistType existance = RaidFileUtil::RaidFileExists(rdiscSet, Filename, &startDisc, &existingFiles, pRevisionID);
if(existance == RaidFileUtil::NoFile)
{
- BOX_ERROR("Expected raidfile " << Filename << " does not exist");
- THROW_EXCEPTION(RaidFileException, RaidFileDoesntExist)
+ THROW_FILE_ERROR("Expected raidfile does not exist",
+ Filename, RaidFileException, RaidFileDoesntExist);
}
else if(existance == RaidFileUtil::NonRaid)
{
@@ -1588,7 +1588,7 @@ bool RaidFileRead::ReadDirectoryContents(int SetNumber, const std::string &rDirN
{
// build name
std::string dn(rdiscSet[l] + DIRECTORY_SEPARATOR + rDirName);
-
+
// read the contents...
DIR *dirHandle = 0;
try
diff --git a/test/backupstorefix/testbackupstorefix.cpp b/test/backupstorefix/testbackupstorefix.cpp
index 4ad8f7d0..bc9911f0 100644
--- a/test/backupstorefix/testbackupstorefix.cpp
+++ b/test/backupstorefix/testbackupstorefix.cpp
@@ -32,6 +32,7 @@
#include "RaidFileController.h"
#include "RaidFileException.h"
#include "RaidFileRead.h"
+#include "RaidFileUtil.h"
#include "RaidFileWrite.h"
#include "ServerControl.h"
#include "StoreStructure.h"
@@ -75,6 +76,26 @@ std::map<int32_t, bool> objectIsDir;
::system(BBSTOREACCOUNTS " -c testfiles/bbstored.conf check 01234567"); \
::system(BBSTOREACCOUNTS " -c testfiles/bbstored.conf check 01234567 fix");
+bool check_fix_internal(int expected_num_errors)
+{
+ BackupStoreCheck checker(storeRoot, discSetNum,
+ 0x01234567, true /* FixErrors */, false /* Quiet */);
+ checker.Check();
+ if (expected_num_errors == -1)
+ {
+ TEST_THAT(checker.ErrorsFound());
+ return checker.ErrorsFound();
+ }
+ else
+ {
+ TEST_EQUAL(expected_num_errors, checker.GetNumErrorsFound());
+ return checker.GetNumErrorsFound() == expected_num_errors;
+ }
+}
+
+#define RUN_CHECK_INTERNAL(expected_num_errors) \
+ TEST_THAT(check_fix_internal(expected_num_errors))
+
// Get ID of an object given a filename
int32_t getID(const char *name)
{
@@ -399,11 +420,7 @@ void check_root_dir_ok(dir_en_check after_entries[],
{
// Check the store, check that the error is detected and
// repaired, by removing x1 from the directory.
- BackupStoreCheck check(storeRoot, discSetNum,
- 0x01234567 /* AccountID */, false /* FixErrors */,
- true /* Quiet */);
- check.Check();
- TEST_THAT(!check.ErrorsFound());
+ RUN_CHECK_INTERNAL(0);
// Read the directory back in, check that it's empty
BackupStoreDirectory dir;
@@ -418,12 +435,7 @@ void check_and_fix_root_dir(dir_en_check after_entries[],
{
// Check the store, check that the error is detected and
// repaired.
- BackupStoreCheck check(storeRoot, discSetNum,
- 0x01234567 /* AccountID */, true /* FixErrors */,
- true /* Quiet */);
- check.Check();
- TEST_THAT(check.ErrorsFound());
-
+ RUN_CHECK_INTERNAL(-1);
check_root_dir_ok(after_entries, after_deps);
}
@@ -529,6 +541,9 @@ int test(int argc, const char *argv[])
check_and_fix_root_dir(after_entries, after_deps);
}
+ BOX_INFO(" === Test that an entry pointing to a directory whose "
+ "raidfile is corrupted doesn't crash");
+
// Start the bbstored server
BOX_TRACE(" === Starting bbstored server: " BBSTORED
" testfiles/bbstored.conf");
@@ -712,8 +727,39 @@ int test(int argc, const char *argv[])
f.Commit(true /* write now! */);
}
+#ifndef BOX_RELEASE_BUILD
+ // Delete two of the three raidfiles and their parent
+ // directories. This used to crash bbstoreaccounts check.
+ // We can only do this, without destroying the entire store,
+ // in debug mode, where the store has a far deeper
+ // structure.
+ // This will destroy or damage objects 18-1b and 58-5b,
+ // some repairably.
+ #define RUN(x) TEST_THAT(system(x) == 0);
+ RUN("mv testfiles/0_0/backup/01234567/02/01/o00.rf "
+ "testfiles/0_0/backup/01234567/02/01/o00.rfw"); // 0x18
+ RUN("mv testfiles/0_1/backup/01234567/02/01/o01.rf "
+ "testfiles/0_1/backup/01234567/02/01/o01.rfw"); // 0x19
+ //RUN("mv testfiles/0_2/backup/01234567/02/01/o02.rf "
+ // "testfiles/0_0/backup/01234567/02/01/o02.rfw"); // 0x1a
+ RUN("mv testfiles/0_0/backup/01234567/02/01/o03.rf "
+ "testfiles/0_0/backup/01234567/02/01/o03.rfw"); // 0x1b
+ RUN("mv testfiles/0_0/backup/01234567/02/01/01/o00.rf "
+ "testfiles/0_0/backup/01234567/02/01/01/o00.rfw"); // 0x58
+ RUN("mv testfiles/0_1/backup/01234567/02/01/01/o01.rf "
+ "testfiles/0_1/backup/01234567/02/01/01/o01.rfw"); // 0x59
+ //RUN("mv testfiles/0_2/backup/01234567/02/01/01/o02.rf "
+ // "testfiles/0_0/backup/01234567/02/01/01/o02.rfw"); // 0x5a
+ RUN("mv testfiles/0_0/backup/01234567/02/01/01/o03.rf "
+ "testfiles/0_0/backup/01234567/02/01/01/o03.rfw"); // 0x5b
+ // RUN("rm -r testfiles/0_1/backup/01234567/02/01");
+ RUN("rm -r testfiles/0_2/backup/01234567/02/01");
+ #undef RUN
+#endif // BOX_RELEASE_BUILD
+
// Fix it
- RUN_CHECK
+ RUN_CHECK_INTERNAL(3);
+
// Check
TEST_THAT(::system(PERL_EXECUTABLE
" testfiles/testbackupstorefix.pl check 1")
@@ -721,6 +767,28 @@ int test(int argc, const char *argv[])
// Check the modified file doesn't exist
TEST_THAT(!RaidFileRead::FileExists(discSetNum, fn));
+
+ // Check that the missing RaidFiles were regenerated and
+ // committed. FileExists returns NonRaid if it find a .rfw
+ // file, so checking for AsRaid excludes this possibility.
+ RaidFileController &rcontroller(RaidFileController::GetController());
+ RaidFileDiscSet rdiscSet(rcontroller.GetDiscSet(discSetNum));
+ TEST_EQUAL(RaidFileUtil::AsRaid, RaidFileUtil::RaidFileExists(
+ rdiscSet, "backup/01234567/02/01/o00"));
+ TEST_EQUAL(RaidFileUtil::AsRaid, RaidFileUtil::RaidFileExists(
+ rdiscSet, "backup/01234567/02/01/o01"));
+ TEST_EQUAL(RaidFileUtil::AsRaid, RaidFileUtil::RaidFileExists(
+ rdiscSet, "backup/01234567/02/01/o02"));
+ TEST_EQUAL(RaidFileUtil::AsRaid, RaidFileUtil::RaidFileExists(
+ rdiscSet, "backup/01234567/02/01/o03"));
+ TEST_EQUAL(RaidFileUtil::AsRaid, RaidFileUtil::RaidFileExists(
+ rdiscSet, "backup/01234567/02/01/01/o00"));
+ TEST_EQUAL(RaidFileUtil::AsRaid, RaidFileUtil::RaidFileExists(
+ rdiscSet, "backup/01234567/02/01/01/o01"));
+ TEST_EQUAL(RaidFileUtil::AsRaid, RaidFileUtil::RaidFileExists(
+ rdiscSet, "backup/01234567/02/01/01/o02"));
+ TEST_EQUAL(RaidFileUtil::AsRaid, RaidFileUtil::RaidFileExists(
+ rdiscSet, "backup/01234567/02/01/01/o03"));
}
// ------------------------------------------------------------------------------------------------
@@ -761,33 +829,25 @@ int test(int argc, const char *argv[])
DeleteObject("Test1/pass/cacted/ming");
// Delete a file
DeleteObject("Test1/cannes/ict/scely");
- // Fix it
- {
- // Check it
- BackupStoreCheck checker(storeRoot, discSetNum,
- 0x01234567, true /* FixErrors */, false /* Quiet */);
- checker.Check();
- // Should just be greater than 1 really, we don't know quite
- // how good the checker is (or will become) at spotting errors!
- // But this will help us catch changes in checker behaviour,
- // so it's not a bad thing to test.
-
- // The 11 errors are:
- // ERROR: Directory ID 0xb references object 0x3e which does not exist.
- // ERROR: Removing directory entry 0x3e from directory 0xb
- // ERROR: Directory ID 0xc had invalid entries, fixed
- // ERROR: Directory ID 0xc has wrong size for object 0x40
- // ERROR: Directory ID 0x17 has wrong container ID.
- // ERROR: Object 0x51 is unattached.
- // ERROR: Object 0x52 is unattached.
- // ERROR: BlocksUsed changed from 282 to 278
- // ERROR: BlocksInCurrentFiles changed from 226 to 220
- // ERROR: BlocksInDirectories changed from 56 to 54
- // ERROR: NumFiles changed from 113 to 110
-
- TEST_EQUAL(11, checker.GetNumErrorsFound());
- }
+ // We don't know quite how good the checker is (or will become) at
+ // spotting errors! But asserting an exact number will help us catch
+ // changes in checker behaviour, so it's not a bad thing to test.
+
+ // The 11 errors are:
+ // ERROR: Directory ID 0xb references object 0x3e which does not exist.
+ // ERROR: Removing directory entry 0x3e from directory 0xb
+ // ERROR: Directory ID 0xc had invalid entries, fixed
+ // ERROR: Directory ID 0xc has wrong size for object 0x40
+ // ERROR: Directory ID 0x17 has wrong container ID.
+ // ERROR: Object 0x51 is unattached.
+ // ERROR: Object 0x52 is unattached.
+ // ERROR: BlocksUsed changed from 282 to 278
+ // ERROR: BlocksInCurrentFiles changed from 226 to 220
+ // ERROR: BlocksInDirectories changed from 56 to 54
+ // ERROR: NumFiles changed from 113 to 110
+
+ RUN_CHECK_INTERNAL(11);
// Check everything is as it should be
TEST_THAT(::system(PERL_EXECUTABLE