summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cmds-check.c356
-rw-r--r--disk-io.c2
-rw-r--r--extent-tree.c22
-rw-r--r--tests/fsck-tests.sh15
-rw-r--r--tests/fsck-tests/006-bad_root_items_fs.tar.xzbin0 -> 24980 bytes
-rw-r--r--tests/fsck-tests/007-bad_root_items_fs_skinny.tar.xzbin0 -> 26520 bytes
-rw-r--r--utils.c21
-rw-r--r--utils.h2
8 files changed, 395 insertions, 23 deletions
diff --git a/cmds-check.c b/cmds-check.c
index 310eb2a8..2a5f823d 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -7198,6 +7198,345 @@ static int fill_csum_tree(struct btrfs_trans_handle *trans,
return ret;
}
+struct root_item_info {
+ /* level of the root */
+ u8 level;
+ /* number of nodes at this level, must be 1 for a root */
+ int node_count;
+ u64 bytenr;
+ u64 gen;
+ struct cache_extent cache_extent;
+};
+
+static struct cache_tree *roots_info_cache = NULL;
+
+static void free_roots_info_cache(void)
+{
+ if (!roots_info_cache)
+ return;
+
+ while (!cache_tree_empty(roots_info_cache)) {
+ struct cache_extent *entry;
+ struct root_item_info *rii;
+
+ entry = first_cache_extent(roots_info_cache);
+ remove_cache_extent(roots_info_cache, entry);
+ rii = container_of(entry, struct root_item_info, cache_extent);
+ free(rii);
+ }
+
+ free(roots_info_cache);
+ roots_info_cache = NULL;
+}
+
+static int build_roots_info_cache(struct btrfs_fs_info *info)
+{
+ int ret = 0;
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ struct btrfs_path *path;
+
+ if (!roots_info_cache) {
+ roots_info_cache = malloc(sizeof(*roots_info_cache));
+ if (!roots_info_cache)
+ return -ENOMEM;
+ cache_tree_init(roots_info_cache);
+ }
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = 0;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ leaf = path->nodes[0];
+
+ while (1) {
+ struct btrfs_key found_key;
+ struct btrfs_extent_item *ei;
+ struct btrfs_extent_inline_ref *iref;
+ int slot = path->slots[0];
+ int type;
+ u64 flags;
+ u64 root_id;
+ u8 level;
+ struct cache_extent *entry;
+ struct root_item_info *rii;
+
+ if (slot >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(info->extent_root, path);
+ if (ret < 0) {
+ break;
+ } else if (ret) {
+ ret = 0;
+ break;
+ }
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ }
+
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+
+ if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
+ found_key.type != BTRFS_METADATA_ITEM_KEY)
+ goto next;
+
+ ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
+ flags = btrfs_extent_flags(leaf, ei);
+
+ if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
+ !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
+ goto next;
+
+ if (found_key.type == BTRFS_METADATA_ITEM_KEY) {
+ iref = (struct btrfs_extent_inline_ref *)(ei + 1);
+ level = found_key.offset;
+ } else {
+ struct btrfs_tree_block_info *info;
+
+ info = (struct btrfs_tree_block_info *)(ei + 1);
+ iref = (struct btrfs_extent_inline_ref *)(info + 1);
+ level = btrfs_tree_block_level(leaf, info);
+ }
+
+ /*
+ * For a root extent, it must be of the following type and the
+ * first (and only one) iref in the item.
+ */
+ type = btrfs_extent_inline_ref_type(leaf, iref);
+ if (type != BTRFS_TREE_BLOCK_REF_KEY)
+ goto next;
+
+ root_id = btrfs_extent_inline_ref_offset(leaf, iref);
+ entry = lookup_cache_extent(roots_info_cache, root_id, 1);
+ if (!entry) {
+ rii = malloc(sizeof(struct root_item_info));
+ if (!rii) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ rii->cache_extent.start = root_id;
+ rii->cache_extent.size = 1;
+ rii->level = (u8)-1;
+ entry = &rii->cache_extent;
+ ret = insert_cache_extent(roots_info_cache, entry);
+ ASSERT(ret == 0);
+ } else {
+ rii = container_of(entry, struct root_item_info,
+ cache_extent);
+ }
+
+ ASSERT(rii->cache_extent.start == root_id);
+ ASSERT(rii->cache_extent.size == 1);
+
+ if (level > rii->level || rii->level == (u8)-1) {
+ rii->level = level;
+ rii->bytenr = found_key.objectid;
+ rii->gen = btrfs_extent_generation(leaf, ei);
+ rii->node_count = 1;
+ } else if (level == rii->level) {
+ rii->node_count++;
+ }
+next:
+ path->slots[0]++;
+ }
+
+out:
+ btrfs_free_path(path);
+
+ return ret;
+}
+
+static int maybe_repair_root_item(struct btrfs_fs_info *info,
+ struct btrfs_path *path,
+ const struct btrfs_key *root_key,
+ const int read_only_mode)
+{
+ const u64 root_id = root_key->objectid;
+ struct cache_extent *entry;
+ struct root_item_info *rii;
+ struct btrfs_root_item ri;
+ unsigned long offset;
+
+ entry = lookup_cache_extent(roots_info_cache, root_id, 1);
+ if (!entry) {
+ fprintf(stderr,
+ "Error: could not find extent items for root %llu\n",
+ root_key->objectid);
+ return -ENOENT;
+ }
+
+ rii = container_of(entry, struct root_item_info, cache_extent);
+ ASSERT(rii->cache_extent.start == root_id);
+ ASSERT(rii->cache_extent.size == 1);
+
+ if (rii->node_count != 1) {
+ fprintf(stderr,
+ "Error: could not find btree root extent for root %llu\n",
+ root_id);
+ return -ENOENT;
+ }
+
+ offset = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
+ read_extent_buffer(path->nodes[0], &ri, offset, sizeof(ri));
+
+ if (btrfs_root_bytenr(&ri) != rii->bytenr ||
+ btrfs_root_level(&ri) != rii->level ||
+ btrfs_root_generation(&ri) != rii->gen) {
+
+ /*
+ * If we're in repair mode but our caller told us to not update
+ * the root item, i.e. just check if it needs to be updated, don't
+ * print this message, since the caller will call us again shortly
+ * for the same root item without read only mode (the caller will
+ * open a transaction first).
+ */
+ if (!(read_only_mode && repair))
+ fprintf(stderr,
+ "%sroot item for root %llu,"
+ " current bytenr %llu, current gen %llu, current level %u,"
+ " new bytenr %llu, new gen %llu, new level %u\n",
+ (read_only_mode ? "" : "fixing "),
+ root_id,
+ btrfs_root_bytenr(&ri), btrfs_root_generation(&ri),
+ btrfs_root_level(&ri),
+ rii->bytenr, rii->gen, rii->level);
+
+ if (btrfs_root_generation(&ri) > rii->gen) {
+ fprintf(stderr,
+ "root %llu has a root item with a more recent gen (%llu) compared to the found root node (%llu)\n",
+ root_id, btrfs_root_generation(&ri), rii->gen);
+ return -EINVAL;
+ }
+
+ if (!read_only_mode) {
+ btrfs_set_root_bytenr(&ri, rii->bytenr);
+ btrfs_set_root_level(&ri, rii->level);
+ btrfs_set_root_generation(&ri, rii->gen);
+ write_extent_buffer(path->nodes[0], &ri,
+ offset, sizeof(ri));
+ }
+
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * A regression introduced in the 3.17 kernel (more specifically in 3.17-rc2),
+ * caused read-only snapshots to be corrupted if they were created at a moment
+ * when the source subvolume/snapshot had orphan items. The issue was that the
+ * on-disk root items became incorrect, referring to the pre orphan cleanup root
+ * node instead of the post orphan cleanup root node.
+ * So this function, and its callees, just detects and fixes those cases. Even
+ * though the regression was for read-only snapshots, this function applies to
+ * any snapshot/subvolume root.
+ * This must be run before any other repair code - not doing it so, makes other
+ * repair code delete or modify backrefs in the extent tree for example, which
+ * will result in an inconsistent fs after repairing the root items.
+ */
+static int repair_root_items(struct btrfs_fs_info *info)
+{
+ struct btrfs_path *path = NULL;
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ struct btrfs_trans_handle *trans = NULL;
+ int ret = 0;
+ int bad_roots = 0;
+ int need_trans = 0;
+
+ ret = build_roots_info_cache(info);
+ if (ret)
+ goto out;
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ key.objectid = BTRFS_FIRST_FREE_OBJECTID;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = 0;
+
+again:
+ /*
+ * Avoid opening and committing transactions if a leaf doesn't have
+ * any root items that need to be fixed, so that we avoid rotating
+ * backup roots unnecessarily.
+ */
+ if (need_trans) {
+ trans = btrfs_start_transaction(info->tree_root, 1);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out;
+ }
+ }
+
+ ret = btrfs_search_slot(trans, info->tree_root, &key, path,
+ 0, trans ? 1 : 0);
+ if (ret < 0)
+ goto out;
+ leaf = path->nodes[0];
+
+ while (1) {
+ struct btrfs_key found_key;
+
+ if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+ int no_more_keys = find_next_key(path, &key);
+
+ btrfs_release_path(path);
+ if (trans) {
+ ret = btrfs_commit_transaction(trans,
+ info->tree_root);
+ trans = NULL;
+ if (ret < 0)
+ goto out;
+ }
+ need_trans = 0;
+ if (no_more_keys)
+ break;
+ goto again;
+ }
+
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+
+ if (found_key.type != BTRFS_ROOT_ITEM_KEY)
+ goto next;
+
+ ret = maybe_repair_root_item(info, path, &found_key,
+ trans ? 0 : 1);
+ if (ret < 0)
+ goto out;
+ if (ret) {
+ if (!trans && repair) {
+ need_trans = 1;
+ key = found_key;
+ btrfs_release_path(path);
+ goto again;
+ }
+ bad_roots++;
+ }
+next:
+ path->slots[0]++;
+ }
+ ret = 0;
+out:
+ free_roots_info_cache();
+ if (path)
+ btrfs_free_path(path);
+ if (ret < 0)
+ return ret;
+
+ return bad_roots;
+}
+
static struct option long_options[] = {
{ "super", 1, NULL, 's' },
{ "repair", 0, NULL, 0 },
@@ -7320,6 +7659,23 @@ int cmd_check(int argc, char **argv)
}
root = info->fs_root;
+
+ ret = repair_root_items(info);
+ if (ret < 0)
+ goto close_out;
+ if (repair) {
+ fprintf(stderr, "Fixed %d roots.\n", ret);
+ ret = 0;
+ } else if (ret > 0) {
+ fprintf(stderr,
+ "Found %d roots with an outdated root item.\n",
+ ret);
+ fprintf(stderr,
+ "Please run a filesystem check with the option --repair to fix them.\n");
+ ret = 1;
+ goto close_out;
+ }
+
/*
* repair mode will force us to commit transaction which
* will make us fail to load log tree when mounting.
diff --git a/disk-io.c b/disk-io.c
index 02b6d42d..77fc610c 100644
--- a/disk-io.c
+++ b/disk-io.c
@@ -475,6 +475,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
if (root->commit_root == root->node)
goto commit_tree;
+ if (root == root->fs_info->tree_root)
+ goto commit_tree;
free_extent_buffer(root->commit_root);
root->commit_root = NULL;
diff --git a/extent-tree.c b/extent-tree.c
index 5443ec86..080f30d3 100644
--- a/extent-tree.c
+++ b/extent-tree.c
@@ -29,6 +29,7 @@
#include "volumes.h"
#include "free-space-cache.h"
#include "math.h"
+#include "utils.h"
#define PENDING_EXTENT_INSERT 0
#define PENDING_EXTENT_DELETE 1
@@ -972,27 +973,6 @@ static inline int extent_ref_type(u64 parent, u64 owner)
return type;
}
-static int find_next_key(struct btrfs_path *path, struct btrfs_key *key)
-
-{
- int level;
- for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
- if (!path->nodes[level])
- break;
- if (path->slots[level] + 1 >=
- btrfs_header_nritems(path->nodes[level]))
- continue;
- if (level == 0)
- btrfs_item_key_to_cpu(path->nodes[level], key,
- path->slots[level] + 1);
- else
- btrfs_node_key_to_cpu(path->nodes[level], key,
- path->slots[level] + 1);
- return 0;
- }
- return 1;
-}
-
static int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
diff --git a/tests/fsck-tests.sh b/tests/fsck-tests.sh
index 867366b9..3f04626e 100644
--- a/tests/fsck-tests.sh
+++ b/tests/fsck-tests.sh
@@ -27,12 +27,23 @@ rm -f $RESULT
# test rely on corrupting blocks tool
run_check make btrfs-corrupt-block
-for i in $(find $here/tests/fsck-tests -name '*.img')
+# Some broken filesystem images are kept as .img files, created by the tool
+# btrfs-image, and others are kept as .tar.xz files that contain raw filesystem
+# image (the backing file of a loop device, as a sparse file). The reason for
+# keeping some as tarballs of raw images is that for these cases btrfs-image
+# isn't able to preserve all the (bad) filesystem structure for some reason.
+for i in $(find $here/tests/fsck-tests -name '*.img' -o -name '*.tar.xz')
do
echo " [TEST] $(basename $i)"
echo "testing image $i" >> $RESULT
- run_check $here/btrfs-image -r $i test.img
+ extension=${i#*.}
+
+ if [ $extension == "img" ]; then
+ run_check $here/btrfs-image -r $i test.img
+ else
+ run_check tar xJf $i
+ fi
$here/btrfsck test.img >> $RESULT 2>&1
[ $? -eq 0 ] && _fail "btrfsck should have detected corruption"
diff --git a/tests/fsck-tests/006-bad_root_items_fs.tar.xz b/tests/fsck-tests/006-bad_root_items_fs.tar.xz
new file mode 100644
index 00000000..125d8e7d
--- /dev/null
+++ b/tests/fsck-tests/006-bad_root_items_fs.tar.xz
Binary files differ
diff --git a/tests/fsck-tests/007-bad_root_items_fs_skinny.tar.xz b/tests/fsck-tests/007-bad_root_items_fs_skinny.tar.xz
new file mode 100644
index 00000000..ed99dc4d
--- /dev/null
+++ b/tests/fsck-tests/007-bad_root_items_fs_skinny.tar.xz
Binary files differ
diff --git a/utils.c b/utils.c
index 5a72f19f..f10c178b 100644
--- a/utils.c
+++ b/utils.c
@@ -2410,3 +2410,24 @@ void units_set_base(unsigned *units, unsigned base)
*units = base | mode;
}
+
+int find_next_key(struct btrfs_path *path, struct btrfs_key *key)
+{
+ int level;
+
+ for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
+ if (!path->nodes[level])
+ break;
+ if (path->slots[level] + 1 >=
+ btrfs_header_nritems(path->nodes[level]))
+ continue;
+ if (level == 0)
+ btrfs_item_key_to_cpu(path->nodes[level], key,
+ path->slots[level] + 1);
+ else
+ btrfs_node_key_to_cpu(path->nodes[level], key,
+ path->slots[level] + 1);
+ return 0;
+ }
+ return 1;
+}
diff --git a/utils.h b/utils.h
index aed03f23..7accbd20 100644
--- a/utils.h
+++ b/utils.h
@@ -158,4 +158,6 @@ static inline u64 btrfs_min_dev_size(u32 leafsize)
btrfs_min_global_blk_rsv_size(leafsize));
}
+int find_next_key(struct btrfs_path *path, struct btrfs_key *key);
+
#endif