From b8d706e58e69a4782cae111049636e439db86f44 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 1 Feb 2018 13:37:31 +0800 Subject: btrfs-progs: check: Move lowmem check code to its own check/lowmem.[ch] Since lowmem mode code is highly internally connected, it's pretty hard to move them piece by piece. In theory it's possible to move part of the functions and temporarily export them, but it will just cause extra temporarily modifications. So this patch moves the whole lowmem check part into its own check/lowmem.[ch]. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- Makefile | 2 +- check/lowmem.c | 4568 ++++++++++++++++++++ check/lowmem.h | 5 + check/main.c | 12817 ++++++++++++++++++------------------------------------- 4 files changed, 8705 insertions(+), 8687 deletions(-) create mode 100644 check/lowmem.c diff --git a/Makefile b/Makefile index dfc116c7..d30f1d29 100644 --- a/Makefile +++ b/Makefile @@ -113,7 +113,7 @@ cmds_objects = cmds-subvolume.o cmds-filesystem.o cmds-device.o cmds-scrub.o \ cmds-restore.o cmds-rescue.o chunk-recover.o super-recover.o \ cmds-property.o cmds-fi-usage.o cmds-inspect-dump-tree.o \ cmds-inspect-dump-super.o cmds-inspect-tree-stats.o cmds-fi-du.o \ - mkfs/common.o check/common.o + mkfs/common.o check/common.o check/lowmem.o libbtrfs_objects = send-stream.o send-utils.o kernel-lib/rbtree.o btrfs-list.o \ kernel-lib/crc32c.o messages.o \ uuid-tree.o utils-lib.o rbtree-utils.o diff --git a/check/lowmem.c b/check/lowmem.c new file mode 100644 index 00000000..5d17961e --- /dev/null +++ b/check/lowmem.c @@ -0,0 +1,4568 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include "ctree.h" +#include "repair.h" +#include "transaction.h" +#include "messages.h" +#include "disk-io.h" +#include "backref.h" +#include "hash.h" +#include "internal.h" +#include "utils.h" +#include "volumes.h" +#include "check/common.h" +#include "check/lowmem.h" + +static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb, + u64 *flags_ret) +{ + struct btrfs_root *extent_root = root->fs_info->extent_root; + struct btrfs_root_item *ri = &root->root_item; + struct btrfs_extent_inline_ref *iref; + struct btrfs_extent_item *ei; + struct btrfs_key key; + struct btrfs_path *path = NULL; + unsigned long ptr; + unsigned long end; + u64 flags; + u64 owner = 0; + u64 offset; + int slot; + int type; + int ret = 0; + + /* + * Except file/reloc tree, we can not have FULL BACKREF MODE + */ + if (root->objectid < BTRFS_FIRST_FREE_OBJECTID) + goto normal; + + /* root node */ + if (eb->start == btrfs_root_bytenr(ri)) + goto normal; + + if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC)) + goto full_backref; + + owner = btrfs_header_owner(eb); + if (owner == root->objectid) + goto normal; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = btrfs_header_bytenr(eb); + key.type = (u8)-1; + key.offset = (u64)-1; + + ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); + if (ret <= 0) { + ret = -EIO; + goto out; + } + + if (ret > 0) { + ret = btrfs_previous_extent_item(extent_root, path, + key.objectid); + if (ret) + goto full_backref; + + } + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + + eb = path->nodes[0]; + slot = path->slots[0]; + ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item); + + flags = btrfs_extent_flags(eb, ei); + if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) + goto full_backref; + + ptr = (unsigned long)(ei + 1); + end = (unsigned long)ei + btrfs_item_size_nr(eb, slot); + + if (key.type == BTRFS_EXTENT_ITEM_KEY) + ptr += sizeof(struct btrfs_tree_block_info); + +next: + /* Reached extent item ends normally */ + if (ptr == end) + goto full_backref; + + /* Beyond extent item end, wrong item size */ + if (ptr > end) { + error("extent item at bytenr %llu slot %d has wrong size", + eb->start, slot); + goto full_backref; + } + + iref = (struct btrfs_extent_inline_ref *)ptr; + offset = btrfs_extent_inline_ref_offset(eb, iref); + type = btrfs_extent_inline_ref_type(eb, iref); + + if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner) + goto normal; + ptr += btrfs_extent_inline_ref_size(type); + goto next; + +normal: + *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF; + goto out; + +full_backref: + *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF; +out: + btrfs_free_path(path); + return ret; +} + +/* + * for a tree node or leaf, if it's shared, indeed we don't need to iterate it + * in every fs or file tree check. Here we find its all root ids, and only check + * it in the fs or file tree which has the smallest root id. + */ +static int need_check(struct btrfs_root *root, struct ulist *roots) +{ + struct rb_node *node; + struct ulist_node *u; + + /* + * @roots can be empty if it belongs to tree reloc tree + * In that case, we should always check the leaf, as we can't use + * the tree owner to ensure some other root will check it. + */ + if (roots->nnodes == 1 || roots->nnodes == 0) + return 1; + + node = rb_first(&roots->root); + u = rb_entry(node, struct ulist_node, rb_node); + /* + * current root id is not smallest, we skip it and let it be checked + * in the fs or file tree who hash the smallest root id. + */ + if (root->objectid != u->val) + return 0; + + return 1; +} + +/* + * for a tree node or leaf, we record its reference count, so later if we still + * process this node or leaf, don't need to compute its reference count again. + * + * @bytenr if @bytenr == (u64)-1, only update nrefs->full_backref[level] + */ +static int update_nodes_refs(struct btrfs_root *root, u64 bytenr, + struct extent_buffer *eb, struct node_refs *nrefs, + u64 level, int check_all) +{ + struct ulist *roots; + u64 refs = 0; + u64 flags = 0; + int root_level = btrfs_header_level(root->node); + int check; + int ret; + + if (nrefs->bytenr[level] == bytenr) + return 0; + + if (bytenr != (u64)-1) { + /* the return value of this function seems a mistake */ + ret = btrfs_lookup_extent_info(NULL, root, bytenr, + level, 1, &refs, &flags); + /* temporary fix */ + if (ret < 0 && !check_all) + return ret; + + nrefs->bytenr[level] = bytenr; + nrefs->refs[level] = refs; + nrefs->full_backref[level] = 0; + nrefs->checked[level] = 0; + + if (refs > 1) { + ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr, + 0, &roots); + if (ret) + return -EIO; + + check = need_check(root, roots); + ulist_free(roots); + nrefs->need_check[level] = check; + } else { + if (!check_all) { + nrefs->need_check[level] = 1; + } else { + if (level == root_level) { + nrefs->need_check[level] = 1; + } else { + /* + * The node refs may have not been + * updated if upper needs checking (the + * lowest root_objectid) the node can + * be checked. + */ + nrefs->need_check[level] = + nrefs->need_check[level + 1]; + } + } + } + } + + if (check_all && eb) { + calc_extent_flag_v2(root, eb, &flags); + if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) + nrefs->full_backref[level] = 1; + } + + return 0; +} + +/* + * This function only handles BACKREF_MISSING, + * If corresponding extent item exists, increase the ref, else insert an extent + * item and backref. + * + * Returns error bits after repair. + */ +static int repair_tree_block_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *node, + struct node_refs *nrefs, int level, int err) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_path path; + struct btrfs_extent_item *ei; + struct btrfs_tree_block_info *bi; + struct btrfs_key key; + struct extent_buffer *eb; + u32 size = sizeof(*ei); + u32 node_size = root->fs_info->nodesize; + int insert_extent = 0; + int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA); + int root_level = btrfs_header_level(root->node); + int generation; + int ret; + u64 owner; + u64 bytenr; + u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK; + u64 parent = 0; + + if ((err & BACKREF_MISSING) == 0) + return err; + + WARN_ON(level > BTRFS_MAX_LEVEL); + WARN_ON(level < 0); + + btrfs_init_path(&path); + bytenr = btrfs_header_bytenr(node); + owner = btrfs_header_owner(node); + generation = btrfs_header_generation(node); + + key.objectid = bytenr; + key.type = (u8)-1; + key.offset = (u64)-1; + + /* Search for the extent item */ + ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); + if (ret <= 0) { + ret = -EIO; + goto out; + } + + ret = btrfs_previous_extent_item(extent_root, &path, bytenr); + if (ret) + insert_extent = 1; + + /* calculate if the extent item flag is full backref or not */ + if (nrefs->full_backref[level] != 0) + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + + /* insert an extent item */ + if (insert_extent) { + struct btrfs_disk_key copy_key; + + generation = btrfs_header_generation(node); + + if (level < root_level && nrefs->full_backref[level + 1] && + owner != root->objectid) { + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + } + + key.objectid = bytenr; + if (!skinny_metadata) { + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = node_size; + size += sizeof(*bi); + } else { + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = level; + } + + btrfs_release_path(&path); + ret = btrfs_insert_empty_item(trans, extent_root, &path, &key, + size); + if (ret) + goto out; + + eb = path.nodes[0]; + ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item); + + btrfs_set_extent_refs(eb, ei, 0); + btrfs_set_extent_generation(eb, ei, generation); + btrfs_set_extent_flags(eb, ei, flags); + + if (!skinny_metadata) { + bi = (struct btrfs_tree_block_info *)(ei + 1); + memset_extent_buffer(eb, 0, (unsigned long)bi, + sizeof(*bi)); + btrfs_set_disk_key_objectid(©_key, root->objectid); + btrfs_set_disk_key_type(©_key, 0); + btrfs_set_disk_key_offset(©_key, 0); + + btrfs_set_tree_block_level(eb, bi, level); + btrfs_set_tree_block_key(eb, bi, ©_key); + } + btrfs_mark_buffer_dirty(eb); + printf("Added an extent item [%llu %u]\n", bytenr, node_size); + btrfs_update_block_group(extent_root, bytenr, node_size, 1, 0); + + nrefs->refs[level] = 0; + nrefs->full_backref[level] = + flags & BTRFS_BLOCK_FLAG_FULL_BACKREF; + btrfs_release_path(&path); + } + + if (level < root_level && nrefs->full_backref[level + 1] && + owner != root->objectid) + parent = nrefs->bytenr[level + 1]; + + /* increase the ref */ + ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size, + parent, root->objectid, level, 0); + + nrefs->refs[level]++; +out: + btrfs_release_path(&path); + if (ret) { + error( + "failed to repair tree block ref start %llu root %llu due to %s", + bytenr, root->objectid, strerror(-ret)); + } else { + printf("Added one tree block ref start %llu %s %llu\n", + bytenr, parent ? "parent" : "root", + parent ? parent : root->objectid); + err &= ~BACKREF_MISSING; + } + + return err; +} + +/* + * Update global fs information. + */ +static void account_bytes(struct btrfs_root *root, struct btrfs_path *path, + int level) +{ + u32 free_nrs; + struct extent_buffer *eb = path->nodes[level]; + + total_btree_bytes += eb->len; + if (fs_root_objectid(root->objectid)) + total_fs_tree_bytes += eb->len; + if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID) + total_extent_tree_bytes += eb->len; + + if (level == 0) { + btree_space_waste += btrfs_leaf_free_space(root, eb); + } else { + free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root->fs_info) - + btrfs_header_nritems(eb)); + btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr); + } +} + +/* + * Find the @index according by @ino and name. + * Notice:time efficiency is O(N) + * + * @root: the root of the fs/file tree + * @index_ret: the index as return value + * @namebuf: the name to match + * @name_len: the length of name to match + * @file_type: the file_type of INODE_ITEM to match + * + * Returns 0 if found and *@index_ret will be modified with right value + * Returns< 0 not found and *@index_ret will be (u64)-1 + */ +static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id, + u64 *index_ret, char *namebuf, u32 name_len, + u8 file_type) +{ + struct btrfs_path path; + struct extent_buffer *node; + struct btrfs_dir_item *di; + struct btrfs_key key; + struct btrfs_key location; + char name[BTRFS_NAME_LEN] = {0}; + + u32 total; + u32 cur = 0; + u32 len; + u32 data_len; + u8 filetype; + int slot; + int ret; + + ASSERT(index_ret); + + /* search from the last index */ + key.objectid = dirid; + key.offset = (u64)-1; + key.type = BTRFS_DIR_INDEX_KEY; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) + return ret; + +loop: + ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY); + if (ret) { + ret = -ENOENT; + *index_ret = (64)-1; + goto out; + } + /* Check whether inode_id/filetype/name match */ + node = path.nodes[0]; + slot = path.slots[0]; + di = btrfs_item_ptr(node, slot, struct btrfs_dir_item); + total = btrfs_item_size_nr(node, slot); + while (cur < total) { + ret = -ENOENT; + len = btrfs_dir_name_len(node, di); + data_len = btrfs_dir_data_len(node, di); + + btrfs_dir_item_key_to_cpu(node, di, &location); + if (location.objectid != location_id || + location.type != BTRFS_INODE_ITEM_KEY || + location.offset != 0) + goto next; + + filetype = btrfs_dir_type(node, di); + if (file_type != filetype) + goto next; + + if (len > BTRFS_NAME_LEN) + len = BTRFS_NAME_LEN; + + read_extent_buffer(node, name, (unsigned long)(di + 1), len); + if (len != name_len || strncmp(namebuf, name, len)) + goto next; + + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + *index_ret = key.offset; + ret = 0; + goto out; +next: + len += sizeof(*di) + data_len; + di = (struct btrfs_dir_item *)((char *)di + len); + cur += len; + } + goto loop; + +out: + btrfs_release_path(&path); + return ret; +} + +/* + * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified + * INODE_REF/INODE_EXTREF match. + * + * @root: the root of the fs/file tree + * @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right + * value while find index + * @location_key: location key of the struct btrfs_dir_item to match + * @name: the name to match + * @namelen: the length of name + * @file_type: the type of file to math + * + * Return 0 if no error occurred. + * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find + * DIR_ITEM/DIR_INDEX + * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF + * and DIR_ITEM/DIR_INDEX mismatch + */ +static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key, + struct btrfs_key *location_key, char *name, + u32 namelen, u8 file_type) +{ + struct btrfs_path path; + struct extent_buffer *node; + struct btrfs_dir_item *di; + struct btrfs_key location; + char namebuf[BTRFS_NAME_LEN] = {0}; + u32 total; + u32 cur = 0; + u32 len; + u32 data_len; + u8 filetype; + int slot; + int ret; + + /* get the index by traversing all index */ + if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) { + ret = find_dir_index(root, key->objectid, + location_key->objectid, &key->offset, + name, namelen, file_type); + if (ret) + ret = DIR_INDEX_MISSING; + return ret; + } + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, root, key, &path, 0, 0); + if (ret) { + ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING : + DIR_INDEX_MISSING; + goto out; + } + + /* Check whether inode_id/filetype/name match */ + node = path.nodes[0]; + slot = path.slots[0]; + di = btrfs_item_ptr(node, slot, struct btrfs_dir_item); + total = btrfs_item_size_nr(node, slot); + while (cur < total) { + ret = key->type == BTRFS_DIR_ITEM_KEY ? + DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH; + + len = btrfs_dir_name_len(node, di); + data_len = btrfs_dir_data_len(node, di); + + btrfs_dir_item_key_to_cpu(node, di, &location); + if (location.objectid != location_key->objectid || + location.type != location_key->type || + location.offset != location_key->offset) + goto next; + + filetype = btrfs_dir_type(node, di); + if (file_type != filetype) + goto next; + + if (len > BTRFS_NAME_LEN) { + len = BTRFS_NAME_LEN; + warning("root %llu %s[%llu %llu] name too long %u, trimmed", + root->objectid, + key->type == BTRFS_DIR_ITEM_KEY ? + "DIR_ITEM" : "DIR_INDEX", + key->objectid, key->offset, len); + } + read_extent_buffer(node, namebuf, (unsigned long)(di + 1), + len); + if (len != namelen || strncmp(namebuf, name, len)) + goto next; + + ret = 0; + goto out; +next: + len += sizeof(*di) + data_len; + di = (struct btrfs_dir_item *)((char *)di + len); + cur += len; + } + +out: + btrfs_release_path(&path); + return ret; +} + +/* + * The ternary means dir item, dir index and relative inode ref. + * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING + * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow + * strategy: + * If two of three is missing or mismatched, delete the existing one. + * If one of three is missing or mismatched, add the missing one. + * + * returns 0 means success. + * returns not 0 means on error; + */ +int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino, + u64 index, char *name, int name_len, u8 filetype, + int err) +{ + struct btrfs_trans_handle *trans; + int stage = 0; + int ret = 0; + + /* + * stage shall be one of following valild values: + * 0: Fine, nothing to do. + * 1: One of three is wrong, so add missing one. + * 2: Two of three is wrong, so delete existed one. + */ + if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) + stage++; + if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) + stage++; + if (err & (INODE_REF_MISSING)) + stage++; + + /* stage must be smllarer than 3 */ + ASSERT(stage < 3); + + trans = btrfs_start_transaction(root, 1); + if (stage == 2) { + ret = btrfs_unlink(trans, root, ino, dir_ino, index, name, + name_len, 0); + goto out; + } + if (stage == 1) { + ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len, + filetype, &index, 1, 1); + goto out; + } +out: + btrfs_commit_transaction(trans, root); + + if (ret) + error("fail to repair inode %llu name %s filetype %u", + ino, name, filetype); + else + printf("%s ref/dir_item of inode %llu name %s filetype %u\n", + stage == 2 ? "Delete" : "Add", + ino, name, filetype); + + return ret; +} + +/* + * Prints inode ref error message + */ +static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key, + u64 index, const char *namebuf, int name_len, + u8 filetype, int err) +{ + if (!err) + return; + + /* root dir error */ + if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) { + error( + "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s", + root->objectid, key->objectid, key->offset, namebuf); + return; + } + + /* normal error */ + if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) + error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u", + root->objectid, key->offset, + btrfs_name_hash(namebuf, name_len), + err & DIR_ITEM_MISMATCH ? "mismatch" : "missing", + namebuf, filetype); + if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) + error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u", + root->objectid, key->offset, index, + err & DIR_ITEM_MISMATCH ? "mismatch" : "missing", + namebuf, filetype); +} + +/* + * Traverse the given INODE_REF and call find_dir_item() to find related + * DIR_ITEM/DIR_INDEX. + * + * @root: the root of the fs/file tree + * @ref_key: the key of the INODE_REF + * @path the path provides node and slot + * @refs: the count of INODE_REF + * @mode: the st_mode of INODE_ITEM + * @name_ret: returns with the first ref's name + * @name_len_ret: len of the name_ret + * + * Return 0 if no error occurred. + */ +static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key, + struct btrfs_path *path, char *name_ret, + u32 *namelen_ret, u64 *refs_ret, int mode) +{ + struct btrfs_key key; + struct btrfs_key location; + struct btrfs_inode_ref *ref; + struct extent_buffer *node; + char namebuf[BTRFS_NAME_LEN] = {0}; + u32 total; + u32 cur = 0; + u32 len; + u32 name_len; + u64 index; + int ret; + int err = 0; + int tmp_err; + int slot; + int need_research = 0; + u64 refs; + +begin: + err = 0; + cur = 0; + refs = *refs_ret; + + /* since after repair, path and the dir item may be changed */ + if (need_research) { + need_research = 0; + btrfs_release_path(path); + ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0); + /* the item was deleted, let path point to the last checked item */ + if (ret > 0) { + if (path->slots[0] == 0) + btrfs_prev_leaf(root, path); + else + path->slots[0]--; + } + if (ret) + goto out; + } + + location.objectid = ref_key->objectid; + location.type = BTRFS_INODE_ITEM_KEY; + location.offset = 0; + node = path->nodes[0]; + slot = path->slots[0]; + + memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf)); + ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref); + total = btrfs_item_size_nr(node, slot); + +next: + /* Update inode ref count */ + refs++; + tmp_err = 0; + index = btrfs_inode_ref_index(node, ref); + name_len = btrfs_inode_ref_name_len(node, ref); + + if (name_len <= BTRFS_NAME_LEN) { + len = name_len; + } else { + len = BTRFS_NAME_LEN; + warning("root %llu INODE_REF[%llu %llu] name too long", + root->objectid, ref_key->objectid, ref_key->offset); + } + + read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len); + + /* copy the first name found to name_ret */ + if (refs == 1 && name_ret) { + memcpy(name_ret, namebuf, len); + *namelen_ret = len; + } + + /* Check root dir ref */ + if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) { + if (index != 0 || len != strlen("..") || + strncmp("..", namebuf, len) || + ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) { + /* set err bits then repair will delete the ref */ + err |= DIR_INDEX_MISSING; + err |= DIR_ITEM_MISSING; + } + goto end; + } + + /* Find related DIR_INDEX */ + key.objectid = ref_key->offset; + key.type = BTRFS_DIR_INDEX_KEY; + key.offset = index; + tmp_err |= find_dir_item(root, &key, &location, namebuf, len, + imode_to_type(mode)); + + /* Find related dir_item */ + key.objectid = ref_key->offset; + key.type = BTRFS_DIR_ITEM_KEY; + key.offset = btrfs_name_hash(namebuf, len); + tmp_err |= find_dir_item(root, &key, &location, namebuf, len, + imode_to_type(mode)); +end: + if (tmp_err && repair) { + ret = repair_ternary_lowmem(root, ref_key->offset, + ref_key->objectid, index, namebuf, + name_len, imode_to_type(mode), + tmp_err); + if (!ret) { + need_research = 1; + goto begin; + } + } + print_inode_ref_err(root, ref_key, index, namebuf, name_len, + imode_to_type(mode), tmp_err); + err |= tmp_err; + len = sizeof(*ref) + name_len; + ref = (struct btrfs_inode_ref *)((char *)ref + len); + cur += len; + if (cur < total) + goto next; + +out: + *refs_ret = refs; + return err; +} + +/* + * Traverse the given INODE_EXTREF and call find_dir_item() to find related + * DIR_ITEM/DIR_INDEX. + * + * @root: the root of the fs/file tree + * @ref_key: the key of the INODE_EXTREF + * @refs: the count of INODE_EXTREF + * @mode: the st_mode of INODE_ITEM + * + * Return 0 if no error occurred. + */ +static int check_inode_extref(struct btrfs_root *root, + struct btrfs_key *ref_key, + struct extent_buffer *node, int slot, u64 *refs, + int mode) +{ + struct btrfs_key key; + struct btrfs_key location; + struct btrfs_inode_extref *extref; + char namebuf[BTRFS_NAME_LEN] = {0}; + u32 total; + u32 cur = 0; + u32 len; + u32 name_len; + u64 index; + u64 parent; + int ret; + int err = 0; + + location.objectid = ref_key->objectid; + location.type = BTRFS_INODE_ITEM_KEY; + location.offset = 0; + + extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref); + total = btrfs_item_size_nr(node, slot); + +next: + /* update inode ref count */ + (*refs)++; + name_len = btrfs_inode_extref_name_len(node, extref); + index = btrfs_inode_extref_index(node, extref); + parent = btrfs_inode_extref_parent(node, extref); + if (name_len <= BTRFS_NAME_LEN) { + len = name_len; + } else { + len = BTRFS_NAME_LEN; + warning("root %llu INODE_EXTREF[%llu %llu] name too long", + root->objectid, ref_key->objectid, ref_key->offset); + } + read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len); + + /* Check root dir ref name */ + if (index == 0 && strncmp(namebuf, "..", name_len)) { + error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s", + root->objectid, ref_key->objectid, ref_key->offset, + namebuf); + err |= ROOT_DIR_ERROR; + } + + /* find related dir_index */ + key.objectid = parent; + key.type = BTRFS_DIR_INDEX_KEY; + key.offset = index; + ret = find_dir_item(root, &key, &location, namebuf, len, mode); + err |= ret; + + /* find related dir_item */ + key.objectid = parent; + key.type = BTRFS_DIR_ITEM_KEY; + key.offset = btrfs_name_hash(namebuf, len); + ret = find_dir_item(root, &key, &location, namebuf, len, mode); + err |= ret; + + len = sizeof(*extref) + name_len; + extref = (struct btrfs_inode_extref *)((char *)extref + len); + cur += len; + + if (cur < total) + goto next; + + return err; +} + +/* + * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified + * DIR_ITEM/DIR_INDEX match. + * Return with @index_ret. + * + * @root: the root of the fs/file tree + * @key: the key of the INODE_REF/INODE_EXTREF + * @name: the name in the INODE_REF/INODE_EXTREF + * @namelen: the length of name in the INODE_REF/INODE_EXTREF + * @index_ret: the index in the INODE_REF/INODE_EXTREF, + * value (64)-1 means do not check index + * @ext_ref: the EXTENDED_IREF feature + * + * Return 0 if no error occurred. + * Return >0 for error bitmap + */ +static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key, + char *name, int namelen, u64 *index_ret, + unsigned int ext_ref) +{ + struct btrfs_path path; + struct btrfs_inode_ref *ref; + struct btrfs_inode_extref *extref; + struct extent_buffer *node; + char ref_namebuf[BTRFS_NAME_LEN] = {0}; + u32 total; + u32 cur = 0; + u32 len; + u32 ref_namelen; + u64 ref_index; + u64 parent; + u64 dir_id; + int slot; + int ret; + + ASSERT(index_ret); + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, root, key, &path, 0, 0); + if (ret) { + ret = INODE_REF_MISSING; + goto extref; + } + + node = path.nodes[0]; + slot = path.slots[0]; + + ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref); + total = btrfs_item_size_nr(node, slot); + + /* Iterate all entry of INODE_REF */ + while (cur < total) { + ret = INODE_REF_MISSING; + + ref_namelen = btrfs_inode_ref_name_len(node, ref); + ref_index = btrfs_inode_ref_index(node, ref); + if (*index_ret != (u64)-1 && *index_ret != ref_index) + goto next_ref; + + if (cur + sizeof(*ref) + ref_namelen > total || + ref_namelen > BTRFS_NAME_LEN) { + warning("root %llu INODE %s[%llu %llu] name too long", + root->objectid, + key->type == BTRFS_INODE_REF_KEY ? + "REF" : "EXTREF", + key->objectid, key->offset); + + if (cur + sizeof(*ref) > total) + break; + len = min_t(u32, total - cur - sizeof(*ref), + BTRFS_NAME_LEN); + } else { + len = ref_namelen; + } + + read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1), + len); + + if (len != namelen || strncmp(ref_namebuf, name, len)) + goto next_ref; + + *index_ret = ref_index; + ret = 0; + goto out; +next_ref: + len = sizeof(*ref) + ref_namelen; + ref = (struct btrfs_inode_ref *)((char *)ref + len); + cur += len; + } + +extref: + /* Skip if not support EXTENDED_IREF feature */ + if (!ext_ref) + goto out; + + btrfs_release_path(&path); + btrfs_init_path(&path); + + dir_id = key->offset; + key->type = BTRFS_INODE_EXTREF_KEY; + key->offset = btrfs_extref_hash(dir_id, name, namelen); + + ret = btrfs_search_slot(NULL, root, key, &path, 0, 0); + if (ret) { + ret = INODE_REF_MISSING; + goto out; + } + + node = path.nodes[0]; + slot = path.slots[0]; + + extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref); + cur = 0; + total = btrfs_item_size_nr(node, slot); + + /* Iterate all entry of INODE_EXTREF */ + while (cur < total) { + ret = INODE_REF_MISSING; + + ref_namelen = btrfs_inode_extref_name_len(node, extref); + ref_index = btrfs_inode_extref_index(node, extref); + parent = btrfs_inode_extref_parent(node, extref); + if (*index_ret != (u64)-1 && *index_ret != ref_index) + goto next_extref; + + if (parent != dir_id) + goto next_extref; + + if (ref_namelen <= BTRFS_NAME_LEN) { + len = ref_namelen; + } else { + len = BTRFS_NAME_LEN; + warning("root %llu INODE %s[%llu %llu] name too long", + root->objectid, + key->type == BTRFS_INODE_REF_KEY ? + "REF" : "EXTREF", + key->objectid, key->offset); + } + read_extent_buffer(node, ref_namebuf, + (unsigned long)(extref + 1), len); + + if (len != namelen || strncmp(ref_namebuf, name, len)) + goto next_extref; + + *index_ret = ref_index; + ret = 0; + goto out; + +next_extref: + len = sizeof(*extref) + ref_namelen; + extref = (struct btrfs_inode_extref *)((char *)extref + len); + cur += len; + + } +out: + btrfs_release_path(&path); + return ret; +} + +static int create_inode_item_lowmem(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 ino, + u8 filetype) +{ + u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755; + + return insert_inode_item(trans, root, ino, 0, 0, 0, mode); +} + +/* + * Insert the missing inode item. + * + * Returns 0 means success. + * Returns <0 means error. + */ +static int repair_inode_item_missing(struct btrfs_root *root, u64 ino, + u8 filetype) +{ + struct btrfs_key key; + struct btrfs_trans_handle *trans; + struct btrfs_path path; + int ret; + + key.objectid = ino; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; + + btrfs_init_path(&path); + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + ret = -EIO; + goto out; + } + + ret = btrfs_search_slot(trans, root, &key, &path, 1, 1); + if (ret < 0 || !ret) + goto fail; + + /* insert inode item */ + create_inode_item_lowmem(trans, root, ino, filetype); + ret = 0; +fail: + btrfs_commit_transaction(trans, root); +out: + if (ret) + error("failed to repair root %llu INODE ITEM[%llu] missing", + root->objectid, ino); + btrfs_release_path(&path); + return ret; +} + +/* + * Call repair_inode_item_missing and repair_ternary_lowmem to repair + * + * Returns error after repair + */ +static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino, + u64 index, u8 filetype, char *namebuf, u32 name_len, + int err) +{ + int ret; + + if (err & INODE_ITEM_MISSING) { + ret = repair_inode_item_missing(root, ino, filetype); + if (!ret) + err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING); + } + + if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) { + ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf, + name_len, filetype, err); + if (!ret) { + err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING); + err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING); + err &= ~(INODE_REF_MISSING); + } + } + return err; +} + +static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key, + u64 ino, u64 index, const char *namebuf, + int name_len, u8 filetype, int err) +{ + if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) { + error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s", + root->objectid, key->objectid, key->offset, namebuf, + filetype, + err & DIR_ITEM_MISMATCH ? "mismath" : "missing"); + } + + if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) { + error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s", + root->objectid, key->objectid, index, namebuf, filetype, + err & DIR_ITEM_MISMATCH ? "mismath" : "missing"); + } + + if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) { + error( + "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s", + root->objectid, ino, index, namebuf, filetype, + err & INODE_ITEM_MISMATCH ? "mismath" : "missing"); + } + + if (err & INODE_REF_MISSING) + error( + "root %llu INODE REF[%llu, %llu] name %s filetype %u missing", + root->objectid, ino, key->objectid, namebuf, filetype); + +} + +/* + * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and + * call find_inode_ref() to check related INODE_REF/INODE_EXTREF. + * + * @root: the root of the fs/file tree + * @key: the key of the INODE_REF/INODE_EXTREF + * @path: the path + * @size: the st_size of the INODE_ITEM + * @ext_ref: the EXTENDED_IREF feature + * + * Return 0 if no error occurred. + * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated. + */ +static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key, + struct btrfs_path *path, u64 *size, + unsigned int ext_ref) +{ + struct btrfs_dir_item *di; + struct btrfs_inode_item *ii; + struct btrfs_key key; + struct btrfs_key location; + struct extent_buffer *node; + int slot; + char namebuf[BTRFS_NAME_LEN] = {0}; + u32 total; + u32 cur = 0; + u32 len; + u32 name_len; + u32 data_len; + u8 filetype; + u32 mode = 0; + u64 index; + int ret; + int err; + int tmp_err; + int need_research = 0; + + /* + * For DIR_ITEM set index to (u64)-1, so that find_inode_ref + * ignore index check. + */ + if (di_key->type == BTRFS_DIR_INDEX_KEY) + index = di_key->offset; + else + index = (u64)-1; +begin: + err = 0; + cur = 0; + + /* since after repair, path and the dir item may be changed */ + if (need_research) { + need_research = 0; + err |= DIR_COUNT_AGAIN; + btrfs_release_path(path); + ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0); + /* the item was deleted, let path point the last checked item */ + if (ret > 0) { + if (path->slots[0] == 0) + btrfs_prev_leaf(root, path); + else + path->slots[0]--; + } + if (ret) + goto out; + } + + node = path->nodes[0]; + slot = path->slots[0]; + + di = btrfs_item_ptr(node, slot, struct btrfs_dir_item); + total = btrfs_item_size_nr(node, slot); + memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf)); + + while (cur < total) { + data_len = btrfs_dir_data_len(node, di); + tmp_err = 0; + if (data_len) + error("root %llu %s[%llu %llu] data_len shouldn't be %u", + root->objectid, + di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX", + di_key->objectid, di_key->offset, data_len); + + name_len = btrfs_dir_name_len(node, di); + if (name_len <= BTRFS_NAME_LEN) { + len = name_len; + } else { + len = BTRFS_NAME_LEN; + warning("root %llu %s[%llu %llu] name too long", + root->objectid, + di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX", + di_key->objectid, di_key->offset); + } + (*size) += name_len; + read_extent_buffer(node, namebuf, (unsigned long)(di + 1), + len); + filetype = btrfs_dir_type(node, di); + + if (di_key->type == BTRFS_DIR_ITEM_KEY && + di_key->offset != btrfs_name_hash(namebuf, len)) { + err |= -EIO; + error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu", + root->objectid, di_key->objectid, di_key->offset, + namebuf, len, filetype, di_key->offset, + btrfs_name_hash(namebuf, len)); + } + + btrfs_dir_item_key_to_cpu(node, di, &location); + /* Ignore related ROOT_ITEM check */ + if (location.type == BTRFS_ROOT_ITEM_KEY) + goto next; + + btrfs_release_path(path); + /* Check relative INODE_ITEM(existence/filetype) */ + ret = btrfs_search_slot(NULL, root, &location, path, 0, 0); + if (ret) { + tmp_err |= INODE_ITEM_MISSING; + goto next; + } + + ii = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + mode = btrfs_inode_mode(path->nodes[0], ii); + if (imode_to_type(mode) != filetype) { + tmp_err |= INODE_ITEM_MISMATCH; + goto next; + } + + /* Check relative INODE_REF/INODE_EXTREF */ + key.objectid = location.objectid; + key.type = BTRFS_INODE_REF_KEY; + key.offset = di_key->objectid; + tmp_err |= find_inode_ref(root, &key, namebuf, len, + &index, ext_ref); + + /* check relative INDEX/ITEM */ + key.objectid = di_key->objectid; + if (key.type == BTRFS_DIR_ITEM_KEY) { + key.type = BTRFS_DIR_INDEX_KEY; + key.offset = index; + } else { + key.type = BTRFS_DIR_ITEM_KEY; + key.offset = btrfs_name_hash(namebuf, name_len); + } + + tmp_err |= find_dir_item(root, &key, &location, namebuf, + name_len, filetype); + /* find_dir_item may find index */ + if (key.type == BTRFS_DIR_INDEX_KEY) + index = key.offset; +next: + + if (tmp_err && repair) { + ret = repair_dir_item(root, di_key->objectid, + location.objectid, index, + imode_to_type(mode), namebuf, + name_len, tmp_err); + if (ret != tmp_err) { + need_research = 1; + goto begin; + } + } + btrfs_release_path(path); + print_dir_item_err(root, di_key, location.objectid, index, + namebuf, name_len, filetype, tmp_err); + err |= tmp_err; + len = sizeof(*di) + name_len + data_len; + di = (struct btrfs_dir_item *)((char *)di + len); + cur += len; + + if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) { + error("root %llu DIR_INDEX[%llu %llu] should contain only one entry", + root->objectid, di_key->objectid, + di_key->offset); + break; + } + } +out: + /* research path */ + btrfs_release_path(path); + ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0); + if (ret) + err |= ret > 0 ? -ENOENT : ret; + return err; +} + +/* + * Wrapper function of btrfs_punch_hole. + * + * Returns 0 means success. + * Returns not 0 means error. + */ +static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start, + u64 len) +{ + struct btrfs_trans_handle *trans; + int ret = 0; + + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); + + ret = btrfs_punch_hole(trans, root, ino, start, len); + if (ret) + error("failed to add hole [%llu, %llu] in inode [%llu]", + start, len, ino); + else + printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len, + ino); + + btrfs_commit_transaction(trans, root); + return ret; +} + +/* + * Check file extent datasum/hole, update the size of the file extents, + * check and update the last offset of the file extent. + * + * @root: the root of fs/file tree. + * @fkey: the key of the file extent. + * @nodatasum: INODE_NODATASUM feature. + * @size: the sum of all EXTENT_DATA items size for this inode. + * @end: the offset of the last extent. + * + * Return 0 if no error occurred. + */ +static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey, + struct extent_buffer *node, int slot, + unsigned int nodatasum, u64 *size, u64 *end) +{ + struct btrfs_file_extent_item *fi; + u64 disk_bytenr; + u64 disk_num_bytes; + u64 extent_num_bytes; + u64 extent_offset; + u64 csum_found; /* In byte size, sectorsize aligned */ + u64 search_start; /* Logical range start we search for csum */ + u64 search_len; /* Logical range len we search for csum */ + unsigned int extent_type; + unsigned int is_hole; + int compressed = 0; + int ret; + int err = 0; + + fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item); + + /* Check inline extent */ + extent_type = btrfs_file_extent_type(node, fi); + if (extent_type == BTRFS_FILE_EXTENT_INLINE) { + struct btrfs_item *e = btrfs_item_nr(slot); + u32 item_inline_len; + + item_inline_len = btrfs_file_extent_inline_item_len(node, e); + extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi); + compressed = btrfs_file_extent_compression(node, fi); + if (extent_num_bytes == 0) { + error( + "root %llu EXTENT_DATA[%llu %llu] has empty inline extent", + root->objectid, fkey->objectid, fkey->offset); + err |= FILE_EXTENT_ERROR; + } + if (!compressed && extent_num_bytes != item_inline_len) { + error( + "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u", + root->objectid, fkey->objectid, fkey->offset, + extent_num_bytes, item_inline_len); + err |= FILE_EXTENT_ERROR; + } + *end += extent_num_bytes; + *size += extent_num_bytes; + return err; + } + + /* Check extent type */ + if (extent_type != BTRFS_FILE_EXTENT_REG && + extent_type != BTRFS_FILE_EXTENT_PREALLOC) { + err |= FILE_EXTENT_ERROR; + error("root %llu EXTENT_DATA[%llu %llu] type bad", + root->objectid, fkey->objectid, fkey->offset); + return err; + } + + /* Check REG_EXTENT/PREALLOC_EXTENT */ + disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi); + disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi); + extent_num_bytes = btrfs_file_extent_num_bytes(node, fi); + extent_offset = btrfs_file_extent_offset(node, fi); + compressed = btrfs_file_extent_compression(node, fi); + is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0); + + /* + * Check EXTENT_DATA csum + * + * For plain (uncompressed) extent, we should only check the range + * we're referring to, as it's possible that part of prealloc extent + * has been written, and has csum: + * + * |<--- Original large preallocated extent A ---->| + * |<- Prealloc File Extent ->|<- Regular Extent ->| + * No csum Has csum + * + * For compressed extent, we should check the whole range. + */ + if (!compressed) { + search_start = disk_bytenr + extent_offset; + search_len = extent_num_bytes; + } else { + search_start = disk_bytenr; + search_len = disk_num_bytes; + } + ret = count_csum_range(root->fs_info, search_start, search_len, &csum_found); + if (csum_found > 0 && nodatasum) { + err |= ODD_CSUM_ITEM; + error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum", + root->objectid, fkey->objectid, fkey->offset); + } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum && + !is_hole && (ret < 0 || csum_found < search_len)) { + err |= CSUM_ITEM_MISSING; + error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu", + root->objectid, fkey->objectid, fkey->offset, + csum_found, search_len); + } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) { + err |= ODD_CSUM_ITEM; + error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu", + root->objectid, fkey->objectid, fkey->offset, csum_found); + } + + /* Check EXTENT_DATA hole */ + if (!no_holes && *end != fkey->offset) { + if (repair) + ret = punch_extent_hole(root, fkey->objectid, + *end, fkey->offset - *end); + if (!repair || ret) { + err |= FILE_EXTENT_ERROR; + error( +"root %llu EXTENT_DATA[%llu %llu] gap exists, expected: EXTENT_DATA[%llu %llu]", + root->objectid, fkey->objectid, fkey->offset, + fkey->objectid, *end); + } + } + + *end += extent_num_bytes; + if (!is_hole) + *size += extent_num_bytes; + + return err; +} + +static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type, + u64 *size_ret) +{ + struct btrfs_key key; + struct btrfs_path path; + u32 len; + struct btrfs_dir_item *di; + int ret; + int cur = 0; + int total = 0; + + ASSERT(size_ret); + *size_ret = 0; + + key.objectid = ino; + key.type = type; + key.offset = (u64)-1; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) { + ret = -EIO; + goto out; + } + /* if found, go to spacial case */ + if (ret == 0) + goto special_case; + +loop: + ret = btrfs_previous_item(root, &path, ino, type); + + if (ret) { + ret = 0; + goto out; + } + +special_case: + di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item); + cur = 0; + total = btrfs_item_size_nr(path.nodes[0], path.slots[0]); + + while (cur < total) { + len = btrfs_dir_name_len(path.nodes[0], di); + if (len > BTRFS_NAME_LEN) + len = BTRFS_NAME_LEN; + *size_ret += len; + + len += btrfs_dir_data_len(path.nodes[0], di); + len += sizeof(*di); + di = (struct btrfs_dir_item *)((char *)di + len); + cur += len; + } + goto loop; + +out: + btrfs_release_path(&path); + return ret; +} + +static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size) +{ + u64 item_size; + u64 index_size; + int ret; + + ASSERT(size); + ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size); + if (ret) + goto out; + + ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size); + if (ret) + goto out; + + *size = item_size + index_size; + +out: + if (ret) + error("failed to count root %llu INODE[%llu] root size", + root->objectid, ino); + return ret; +} + +/* + * Set inode item nbytes to @nbytes + * + * Returns 0 on success + * Returns != 0 on error + */ +static int repair_inode_nbytes_lowmem(struct btrfs_root *root, + struct btrfs_path *path, + u64 ino, u64 nbytes) +{ + struct btrfs_trans_handle *trans; + struct btrfs_inode_item *ii; + struct btrfs_key key; + struct btrfs_key research_key; + int err = 0; + int ret; + + btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]); + + key.objectid = ino; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; + + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + err |= ret; + goto out; + } + + btrfs_release_path(path); + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret > 0) + ret = -ENOENT; + if (ret) { + err |= ret; + goto fail; + } + + ii = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes); + btrfs_mark_buffer_dirty(path->nodes[0]); +fail: + btrfs_commit_transaction(trans, root); +out: + if (ret) + error("failed to set nbytes in inode %llu root %llu", + ino, root->root_key.objectid); + else + printf("Set nbytes in inode item %llu root %llu\n to %llu", ino, + root->root_key.objectid, nbytes); + + /* research path */ + btrfs_release_path(path); + ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0); + err |= ret; + + return err; +} + +/* + * Set directory inode isize to @isize. + * + * Returns 0 on success. + * Returns != 0 on error. + */ +static int repair_dir_isize_lowmem(struct btrfs_root *root, + struct btrfs_path *path, + u64 ino, u64 isize) +{ + struct btrfs_trans_handle *trans; + struct btrfs_inode_item *ii; + struct btrfs_key key; + struct btrfs_key research_key; + int ret; + int err = 0; + + btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]); + + key.objectid = ino; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; + + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + err |= ret; + goto out; + } + + btrfs_release_path(path); + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret > 0) + ret = -ENOENT; + if (ret) { + err |= ret; + goto fail; + } + + ii = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + btrfs_set_inode_size(path->nodes[0], ii, isize); + btrfs_mark_buffer_dirty(path->nodes[0]); +fail: + btrfs_commit_transaction(trans, root); +out: + if (ret) + error("failed to set isize in inode %llu root %llu", + ino, root->root_key.objectid); + else + printf("Set isize in inode %llu root %llu to %llu\n", + ino, root->root_key.objectid, isize); + + btrfs_release_path(path); + ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0); + err |= ret; + + return err; +} + +/* + * Wrapper function for btrfs_add_orphan_item(). + * + * Returns 0 on success. + * Returns != 0 on error. + */ +static int repair_inode_orphan_item_lowmem(struct btrfs_root *root, + struct btrfs_path *path, u64 ino) +{ + struct btrfs_trans_handle *trans; + struct btrfs_key research_key; + int ret; + int err = 0; + + btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]); + + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + err |= ret; + goto out; + } + + btrfs_release_path(path); + ret = btrfs_add_orphan_item(trans, root, path, ino); + err |= ret; + btrfs_commit_transaction(trans, root); +out: + if (ret) + error("failed to add inode %llu as orphan item root %llu", + ino, root->root_key.objectid); + else + printf("Added inode %llu as orphan item root %llu\n", + ino, root->root_key.objectid); + + btrfs_release_path(path); + ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0); + err |= ret; + + return err; +} + +/* Set inode_item nlink to @ref_count. + * If @ref_count == 0, move it to "lost+found" and increase @ref_count. + * + * Returns 0 on success + */ +static int repair_inode_nlinks_lowmem(struct btrfs_root *root, + struct btrfs_path *path, u64 ino, + const char *name, u32 namelen, + u64 ref_count, u8 filetype, u64 *nlink) +{ + struct btrfs_trans_handle *trans; + struct btrfs_inode_item *ii; + struct btrfs_key key; + struct btrfs_key old_key; + char namebuf[BTRFS_NAME_LEN] = {0}; + int name_len; + int ret; + int ret2; + + /* save the key */ + btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]); + + if (name && namelen) { + ASSERT(namelen <= BTRFS_NAME_LEN); + memcpy(namebuf, name, namelen); + name_len = namelen; + } else { + sprintf(namebuf, "%llu", ino); + name_len = count_digits(ino); + printf("Can't find file name for inode %llu, use %s instead\n", + ino, namebuf); + } + + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + + btrfs_release_path(path); + /* if refs is 0, put it into lostfound */ + if (ref_count == 0) { + ret = link_inode_to_lostfound(trans, root, path, ino, namebuf, + name_len, filetype, &ref_count); + if (ret) + goto fail; + } + + /* reset inode_item's nlink to ref_count */ + key.objectid = ino; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; + + btrfs_release_path(path); + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret > 0) + ret = -ENOENT; + if (ret) + goto fail; + + ii = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + btrfs_set_inode_nlink(path->nodes[0], ii, ref_count); + btrfs_mark_buffer_dirty(path->nodes[0]); + + if (nlink) + *nlink = ref_count; +fail: + btrfs_commit_transaction(trans, root); +out: + if (ret) + error( + "fail to repair nlink of inode %llu root %llu name %s filetype %u", + root->objectid, ino, namebuf, filetype); + else + printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n", + root->objectid, ino, namebuf, filetype); + + /* research */ + btrfs_release_path(path); + ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0); + if (ret2 < 0) + return ret |= ret2; + return ret; +} + +/* + * Check INODE_ITEM and related ITEMs (the same inode number) + * 1. check link count + * 2. check inode ref/extref + * 3. check dir item/index + * + * @ext_ref: the EXTENDED_IREF feature + * + * Return 0 if no error occurred. + * Return >0 for error or hit the traversal is done(by error bitmap) + */ +static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path, + unsigned int ext_ref) +{ + struct extent_buffer *node; + struct btrfs_inode_item *ii; + struct btrfs_key key; + struct btrfs_key last_key; + u64 inode_id; + u32 mode; + u64 nlink; + u64 nbytes; + u64 isize; + u64 size = 0; + u64 refs = 0; + u64 extent_end = 0; + u64 extent_size = 0; + unsigned int dir; + unsigned int nodatasum; + int slot; + int ret; + int err = 0; + char namebuf[BTRFS_NAME_LEN] = {0}; + u32 name_len = 0; + + node = path->nodes[0]; + slot = path->slots[0]; + + btrfs_item_key_to_cpu(node, &key, slot); + inode_id = key.objectid; + + if (inode_id == BTRFS_ORPHAN_OBJECTID) { + ret = btrfs_next_item(root, path); + if (ret > 0) + err |= LAST_ITEM; + return err; + } + + ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item); + isize = btrfs_inode_size(node, ii); + nbytes = btrfs_inode_nbytes(node, ii); + mode = btrfs_inode_mode(node, ii); + dir = imode_to_type(mode) == BTRFS_FT_DIR; + nlink = btrfs_inode_nlink(node, ii); + nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM; + + while (1) { + btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]); + ret = btrfs_next_item(root, path); + if (ret < 0) { + /* out will fill 'err' rusing current statistics */ + goto out; + } else if (ret > 0) { + err |= LAST_ITEM; + goto out; + } + + node = path->nodes[0]; + slot = path->slots[0]; + btrfs_item_key_to_cpu(node, &key, slot); + if (key.objectid != inode_id) + goto out; + + switch (key.type) { + case BTRFS_INODE_REF_KEY: + ret = check_inode_ref(root, &key, path, namebuf, + &name_len, &refs, mode); + err |= ret; + break; + case BTRFS_INODE_EXTREF_KEY: + if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref) + warning("root %llu EXTREF[%llu %llu] isn't supported", + root->objectid, key.objectid, + key.offset); + ret = check_inode_extref(root, &key, node, slot, &refs, + mode); + err |= ret; + break; + case BTRFS_DIR_ITEM_KEY: + case BTRFS_DIR_INDEX_KEY: + if (!dir) { + warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]", + root->objectid, inode_id, + imode_to_type(mode), key.objectid, + key.offset); + } + ret = check_dir_item(root, &key, path, &size, ext_ref); + err |= ret; + break; + case BTRFS_EXTENT_DATA_KEY: + if (dir) { + warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]", + root->objectid, inode_id, key.objectid, + key.offset); + } + ret = check_file_extent(root, &key, node, slot, + nodatasum, &extent_size, + &extent_end); + err |= ret; + break; + case BTRFS_XATTR_ITEM_KEY: + break; + default: + error("ITEM[%llu %u %llu] UNKNOWN TYPE", + key.objectid, key.type, key.offset); + } + } + +out: + if (err & LAST_ITEM) { + btrfs_release_path(path); + ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0); + if (ret) + return err; + } + + /* verify INODE_ITEM nlink/isize/nbytes */ + if (dir) { + if (repair && (err & DIR_COUNT_AGAIN)) { + err &= ~DIR_COUNT_AGAIN; + count_dir_isize(root, inode_id, &size); + } + + if ((nlink != 1 || refs != 1) && repair) { + ret = repair_inode_nlinks_lowmem(root, path, inode_id, + namebuf, name_len, refs, imode_to_type(mode), + &nlink); + } + + if (nlink != 1) { + err |= LINK_COUNT_ERROR; + error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)", + root->objectid, inode_id, nlink); + } + + /* + * Just a warning, as dir inode nbytes is just an + * instructive value. + */ + if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) { + warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u", + root->objectid, inode_id, + root->fs_info->nodesize); + } + + if (isize != size) { + if (repair) + ret = repair_dir_isize_lowmem(root, path, + inode_id, size); + if (!repair || ret) { + err |= ISIZE_ERROR; + error( + "root %llu DIR INODE [%llu] size %llu not equal to %llu", + root->objectid, inode_id, isize, size); + } + } + } else { + if (nlink != refs) { + if (repair) + ret = repair_inode_nlinks_lowmem(root, path, + inode_id, namebuf, name_len, refs, + imode_to_type(mode), &nlink); + if (!repair || ret) { + err |= LINK_COUNT_ERROR; + error( + "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)", + root->objectid, inode_id, nlink, refs); + } + } else if (!nlink) { + if (repair) + ret = repair_inode_orphan_item_lowmem(root, + path, inode_id); + if (!repair || ret) { + err |= ORPHAN_ITEM; + error("root %llu INODE[%llu] is orphan item", + root->objectid, inode_id); + } + } + + if (!nbytes && !no_holes && extent_end < isize) { + if (repair) + ret = punch_extent_hole(root, inode_id, + extent_end, isize - extent_end); + if (!repair || ret) { + err |= NBYTES_ERROR; + error( + "root %llu INODE[%llu] size %llu should have a file extent hole", + root->objectid, inode_id, isize); + } + } + + if (nbytes != extent_size) { + if (repair) + ret = repair_inode_nbytes_lowmem(root, path, + inode_id, extent_size); + if (!repair || ret) { + err |= NBYTES_ERROR; + error( + "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu", + root->objectid, inode_id, nbytes, + extent_size); + } + } + } + + if (err & LAST_ITEM) + btrfs_next_item(root, path); + return err; +} + +/* + * Returns >0 Found error, not fatal, should continue + * Returns <0 Fatal error, must exit the whole check + * Returns 0 No errors found + */ +static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path, + struct node_refs *nrefs, int *level, int ext_ref) +{ + struct extent_buffer *cur = path->nodes[0]; + struct btrfs_key key; + u64 cur_bytenr; + u32 nritems; + u64 first_ino = 0; + int root_level = btrfs_header_level(root->node); + int i; + int ret = 0; /* Final return value */ + int err = 0; /* Positive error bitmap */ + + cur_bytenr = cur->start; + + /* skip to first inode item or the first inode number change */ + nritems = btrfs_header_nritems(cur); + for (i = 0; i < nritems; i++) { + btrfs_item_key_to_cpu(cur, &key, i); + if (i == 0) + first_ino = key.objectid; + if (key.type == BTRFS_INODE_ITEM_KEY || + (first_ino && first_ino != key.objectid)) + break; + } + if (i == nritems) { + path->slots[0] = nritems; + return 0; + } + path->slots[0] = i; + +again: + err |= check_inode_item(root, path, ext_ref); + + /* modify cur since check_inode_item may change path */ + cur = path->nodes[0]; + + if (err & LAST_ITEM) + goto out; + + /* still have inode items in thie leaf */ + if (cur->start == cur_bytenr) + goto again; + + /* + * we have switched to another leaf, above nodes may + * have changed, here walk down the path, if a node + * or leaf is shared, check whether we can skip this + * node or leaf. + */ + for (i = root_level; i >= 0; i--) { + if (path->nodes[i]->start == nrefs->bytenr[i]) + continue; + + ret = update_nodes_refs(root, path->nodes[i]->start, + path->nodes[i], nrefs, i, 0); + if (ret) + goto out; + + if (!nrefs->need_check[i]) { + *level += 1; + break; + } + } + + for (i = 0; i < *level; i++) { + free_extent_buffer(path->nodes[i]); + path->nodes[i] = NULL; + } +out: + err &= ~LAST_ITEM; + if (err && !ret) + ret = err; + return ret; +} + +/* + * @level if @level == -1 means extent data item + * else normal treeblocl. + */ +static int should_check_extent_strictly(struct btrfs_root *root, + struct node_refs *nrefs, int level) +{ + int root_level = btrfs_header_level(root->node); + + if (level > root_level || level < -1) + return 1; + if (level == root_level) + return 1; + /* + * if the upper node is marked full backref, it should contain shared + * backref of the parent (except owner == root->objectid). + */ + while (++level <= root_level) + if (nrefs->refs[level] > 1) + return 0; + + return 1; +} + +static int check_extent_inline_ref(struct extent_buffer *eb, + struct btrfs_key *key, struct btrfs_extent_inline_ref *iref) +{ + int ret; + u8 type = btrfs_extent_inline_ref_type(eb, iref); + + switch (type) { + case BTRFS_TREE_BLOCK_REF_KEY: + case BTRFS_EXTENT_DATA_REF_KEY: + case BTRFS_SHARED_BLOCK_REF_KEY: + case BTRFS_SHARED_DATA_REF_KEY: + ret = 0; + break; + default: + error("extent[%llu %u %llu] has unknown ref type: %d", + key->objectid, key->type, key->offset, type); + ret = UNKNOWN_TYPE; + break; + } + + return ret; +} + +/* + * Check backrefs of a tree block given by @bytenr or @eb. + * + * @root: the root containing the @bytenr or @eb + * @eb: tree block extent buffer, can be NULL + * @bytenr: bytenr of the tree block to search + * @level: tree level of the tree block + * @owner: owner of the tree block + * + * Return >0 for any error found and output error message + * Return 0 for no error found + */ +static int check_tree_block_ref(struct btrfs_root *root, + struct extent_buffer *eb, u64 bytenr, + int level, u64 owner, struct node_refs *nrefs) +{ + struct btrfs_key key; + struct btrfs_root *extent_root = root->fs_info->extent_root; + struct btrfs_path path; + struct btrfs_extent_item *ei; + struct btrfs_extent_inline_ref *iref; + struct extent_buffer *leaf; + unsigned long end; + unsigned long ptr; + int slot; + int skinny_level; + int root_level = btrfs_header_level(root->node); + int type; + u32 nodesize = root->fs_info->nodesize; + u32 item_size; + u64 offset; + int found_ref = 0; + int err = 0; + int ret; + int strict = 1; + int parent = 0; + + btrfs_init_path(&path); + key.objectid = bytenr; + if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) + key.type = BTRFS_METADATA_ITEM_KEY; + else + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = (u64)-1; + + /* Search for the backref in extent tree */ + ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); + if (ret < 0) { + err |= BACKREF_MISSING; + goto out; + } + ret = btrfs_previous_extent_item(extent_root, &path, bytenr); + if (ret) { + err |= BACKREF_MISSING; + goto out; + } + + leaf = path.nodes[0]; + slot = path.slots[0]; + btrfs_item_key_to_cpu(leaf, &key, slot); + + ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); + + if (key.type == BTRFS_METADATA_ITEM_KEY) { + skinny_level = (int)key.offset; + iref = (struct btrfs_extent_inline_ref *)(ei + 1); + } else { + struct btrfs_tree_block_info *info; + + info = (struct btrfs_tree_block_info *)(ei + 1); + skinny_level = btrfs_tree_block_level(leaf, info); + iref = (struct btrfs_extent_inline_ref *)(info + 1); + } + + + if (eb) { + u64 header_gen; + u64 extent_gen; + + /* + * Due to the feature of shared tree blocks, if the upper node + * is a fs root or shared node, the extent of checked node may + * not be updated until the next CoW. + */ + if (nrefs) + strict = should_check_extent_strictly(root, nrefs, + level); + if (!(btrfs_extent_flags(leaf, ei) & + BTRFS_EXTENT_FLAG_TREE_BLOCK)) { + error( + "extent[%llu %u] backref type mismatch, missing bit: %llx", + key.objectid, nodesize, + BTRFS_EXTENT_FLAG_TREE_BLOCK); + err = BACKREF_MISMATCH; + } + header_gen = btrfs_header_generation(eb); + extent_gen = btrfs_extent_generation(leaf, ei); + if (header_gen != extent_gen) { + error( + "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu", + key.objectid, nodesize, header_gen, + extent_gen); + err = BACKREF_MISMATCH; + } + if (level != skinny_level) { + error( + "extent[%llu %u] level mismatch, wanted: %u, have: %u", + key.objectid, nodesize, level, skinny_level); + err = BACKREF_MISMATCH; + } + if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) { + error( + "extent[%llu %u] is referred by other roots than %llu", + key.objectid, nodesize, root->objectid); + err = BACKREF_MISMATCH; + } + } + + /* + * Iterate the extent/metadata item to find the exact backref + */ + item_size = btrfs_item_size_nr(leaf, slot); + ptr = (unsigned long)iref; + end = (unsigned long)ei + item_size; + + while (ptr < end) { + iref = (struct btrfs_extent_inline_ref *)ptr; + type = btrfs_extent_inline_ref_type(leaf, iref); + offset = btrfs_extent_inline_ref_offset(leaf, iref); + + ret = check_extent_inline_ref(leaf, &key, iref); + if (ret) { + err |= ret; + break; + } + if (type == BTRFS_TREE_BLOCK_REF_KEY) { + if (offset == root->objectid) + found_ref = 1; + if (!strict && owner == offset) + found_ref = 1; + } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) { + /* + * Backref of tree reloc root points to itself, no need + * to check backref any more. + * + * This may be an error of loop backref, but extent tree + * checker should have already handled it. + * Here we only need to avoid infinite iteration. + */ + if (offset == bytenr) { + found_ref = 1; + } else { + /* + * Check if the backref points to valid + * referencer + */ + found_ref = !check_tree_block_ref( root, NULL, + offset, level + 1, owner, + NULL); + } + } + + if (found_ref) + break; + ptr += btrfs_extent_inline_ref_size(type); + } + + /* + * Inlined extent item doesn't have what we need, check + * TREE_BLOCK_REF_KEY + */ + if (!found_ref) { + btrfs_release_path(&path); + key.objectid = bytenr; + key.type = BTRFS_TREE_BLOCK_REF_KEY; + key.offset = root->objectid; + + ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); + if (!ret) + found_ref = 1; + } + /* + * Finally check SHARED BLOCK REF, any found will be good + * Here we're not doing comprehensive extent backref checking, + * only need to ensure there is some extent referring to this + * tree block. + */ + if (!found_ref) { + btrfs_release_path(&path); + key.objectid = bytenr; + key.type = BTRFS_SHARED_BLOCK_REF_KEY; + key.offset = (u64)-1; + + ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); + if (ret < 0) { + err |= BACKREF_MISSING; + goto out; + } + ret = btrfs_previous_extent_item(extent_root, &path, bytenr); + if (ret) { + err |= BACKREF_MISSING; + goto out; + } + found_ref = 1; + } + if (!found_ref) + err |= BACKREF_MISSING; +out: + btrfs_release_path(&path); + if (nrefs && strict && + level < root_level && nrefs->full_backref[level + 1]) + parent = nrefs->bytenr[level + 1]; + if (eb && (err & BACKREF_MISSING)) + error( + "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu", + bytenr, nodesize, owner, level, + parent ? "parent" : "root", + parent ? parent : root->objectid); + return err; +} + +/* + * If @err contains BACKREF_MISSING then add extent of the + * file_extent_data_item. + * + * Returns error bits after reapir. + */ +static int repair_extent_data_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *pathp, + struct node_refs *nrefs, + int err) +{ + struct btrfs_file_extent_item *fi; + struct btrfs_key fi_key; + struct btrfs_key key; + struct btrfs_extent_item *ei; + struct btrfs_path path; + struct btrfs_root *extent_root = root->fs_info->extent_root; + struct extent_buffer *eb; + u64 size; + u64 disk_bytenr; + u64 num_bytes; + u64 parent; + u64 offset; + u64 extent_offset; + u64 file_offset; + int generation; + int slot; + int ret = 0; + + eb = pathp->nodes[0]; + slot = pathp->slots[0]; + btrfs_item_key_to_cpu(eb, &fi_key, slot); + fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); + + if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE || + btrfs_file_extent_disk_bytenr(eb, fi) == 0) + return err; + + file_offset = fi_key.offset; + generation = btrfs_file_extent_generation(eb, fi); + disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi); + num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); + extent_offset = btrfs_file_extent_offset(eb, fi); + offset = file_offset - extent_offset; + + /* now repair only adds backref */ + if ((err & BACKREF_MISSING) == 0) + return err; + + /* search extent item */ + key.objectid = disk_bytenr; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = num_bytes; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); + if (ret < 0) { + ret = -EIO; + goto out; + } + + /* insert an extent item */ + if (ret > 0) { + key.objectid = disk_bytenr; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = num_bytes; + size = sizeof(*ei); + + btrfs_release_path(&path); + ret = btrfs_insert_empty_item(trans, extent_root, &path, &key, + size); + if (ret) + goto out; + eb = path.nodes[0]; + ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item); + + btrfs_set_extent_refs(eb, ei, 0); + btrfs_set_extent_generation(eb, ei, generation); + btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA); + + btrfs_mark_buffer_dirty(eb); + ret = btrfs_update_block_group(extent_root, disk_bytenr, + num_bytes, 1, 0); + btrfs_release_path(&path); + } + + if (nrefs->full_backref[0]) + parent = btrfs_header_bytenr(eb); + else + parent = 0; + + ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent, + root->objectid, + parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid, + offset); + if (ret) { + error( + "failed to increase extent data backref[%llu %llu] root %llu", + disk_bytenr, num_bytes, root->objectid); + goto out; + } else { + printf("Add one extent data backref [%llu %llu]\n", + disk_bytenr, num_bytes); + } + + err &= ~BACKREF_MISSING; +out: + if (ret) + error("can't repair root %llu extent data item[%llu %llu]", + root->objectid, disk_bytenr, num_bytes); + return err; +} + +/* + * Check EXTENT_DATA item, mainly for its dbackref in extent tree + * + * Return >0 any error found and output error message + * Return 0 for no error found + */ +static int check_extent_data_item(struct btrfs_root *root, + struct btrfs_path *pathp, + struct node_refs *nrefs, int account_bytes) +{ + struct btrfs_file_extent_item *fi; + struct extent_buffer *eb = pathp->nodes[0]; + struct btrfs_path path; + struct btrfs_root *extent_root = root->fs_info->extent_root; + struct btrfs_key fi_key; + struct btrfs_key dbref_key; + struct extent_buffer *leaf; + struct btrfs_extent_item *ei; + struct btrfs_extent_inline_ref *iref; + struct btrfs_extent_data_ref *dref; + u64 owner; + u64 disk_bytenr; + u64 disk_num_bytes; + u64 extent_num_bytes; + u64 extent_flags; + u64 offset; + u32 item_size; + unsigned long end; + unsigned long ptr; + int type; + int found_dbackref = 0; + int slot = pathp->slots[0]; + int err = 0; + int ret; + int strict; + + btrfs_item_key_to_cpu(eb, &fi_key, slot); + fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); + + /* Nothing to check for hole and inline data extents */ + if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE || + btrfs_file_extent_disk_bytenr(eb, fi) == 0) + return 0; + + disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi); + disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); + extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi); + offset = btrfs_file_extent_offset(eb, fi); + + /* Check unaligned disk_num_bytes and num_bytes */ + if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) { + error( +"file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u", + fi_key.objectid, fi_key.offset, disk_num_bytes, + root->fs_info->sectorsize); + err |= BYTES_UNALIGNED; + } else if (account_bytes) { + data_bytes_allocated += disk_num_bytes; + } + if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) { + error( +"file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u", + fi_key.objectid, fi_key.offset, extent_num_bytes, + root->fs_info->sectorsize); + err |= BYTES_UNALIGNED; + } else if (account_bytes) { + data_bytes_referenced += extent_num_bytes; + } + owner = btrfs_header_owner(eb); + + /* Check the extent item of the file extent in extent tree */ + btrfs_init_path(&path); + dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi); + dbref_key.type = BTRFS_EXTENT_ITEM_KEY; + dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi); + + ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0); + if (ret) + goto out; + + leaf = path.nodes[0]; + slot = path.slots[0]; + ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); + + extent_flags = btrfs_extent_flags(leaf, ei); + + if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) { + error( + "extent[%llu %llu] backref type mismatch, wanted bit: %llx", + disk_bytenr, disk_num_bytes, + BTRFS_EXTENT_FLAG_DATA); + err |= BACKREF_MISMATCH; + } + + /* Check data backref inside that extent item */ + item_size = btrfs_item_size_nr(leaf, path.slots[0]); + iref = (struct btrfs_extent_inline_ref *)(ei + 1); + ptr = (unsigned long)iref; + end = (unsigned long)ei + item_size; + strict = should_check_extent_strictly(root, nrefs, -1); + + while (ptr < end) { + u64 ref_root; + u64 ref_objectid; + u64 ref_offset; + bool match = false; + + iref = (struct btrfs_extent_inline_ref *)ptr; + type = btrfs_extent_inline_ref_type(leaf, iref); + dref = (struct btrfs_extent_data_ref *)(&iref->offset); + + ret = check_extent_inline_ref(leaf, &dbref_key, iref); + if (ret) { + err |= ret; + break; + } + if (type == BTRFS_EXTENT_DATA_REF_KEY) { + ref_root = btrfs_extent_data_ref_root(leaf, dref); + ref_objectid = btrfs_extent_data_ref_objectid(leaf, dref); + ref_offset = btrfs_extent_data_ref_offset(leaf, dref); + + if (ref_objectid == fi_key.objectid && + ref_offset == fi_key.offset - offset) + match = true; + if (ref_root == root->objectid && match) + found_dbackref = 1; + else if (!strict && owner == ref_root && match) + found_dbackref = 1; + } else if (type == BTRFS_SHARED_DATA_REF_KEY) { + found_dbackref = !check_tree_block_ref(root, NULL, + btrfs_extent_inline_ref_offset(leaf, iref), + 0, owner, NULL); + } + + if (found_dbackref) + break; + ptr += btrfs_extent_inline_ref_size(type); + } + + if (!found_dbackref) { + btrfs_release_path(&path); + + /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */ + dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi); + dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY; + dbref_key.offset = hash_extent_data_ref(root->objectid, + fi_key.objectid, fi_key.offset - offset); + + ret = btrfs_search_slot(NULL, root->fs_info->extent_root, + &dbref_key, &path, 0, 0); + if (!ret) { + found_dbackref = 1; + goto out; + } + + btrfs_release_path(&path); + + /* + * Neither inlined nor EXTENT_DATA_REF found, try + * SHARED_DATA_REF as last chance. + */ + dbref_key.objectid = disk_bytenr; + dbref_key.type = BTRFS_SHARED_DATA_REF_KEY; + dbref_key.offset = eb->start; + + ret = btrfs_search_slot(NULL, root->fs_info->extent_root, + &dbref_key, &path, 0, 0); + if (!ret) { + found_dbackref = 1; + goto out; + } + } + +out: + if (!found_dbackref) + err |= BACKREF_MISSING; + btrfs_release_path(&path); + if (err & BACKREF_MISSING) { + error("data extent[%llu %llu] backref lost", + disk_bytenr, disk_num_bytes); + } + return err; +} + +/* + * Check a block group item with its referener (chunk) and its used space + * with extent/metadata item + */ +static int check_block_group_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot) +{ + struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_root *chunk_root = fs_info->chunk_root; + struct btrfs_block_group_item *bi; + struct btrfs_block_group_item bg_item; + struct btrfs_path path; + struct btrfs_key bg_key; + struct btrfs_key chunk_key; + struct btrfs_key extent_key; + struct btrfs_chunk *chunk; + struct extent_buffer *leaf; + struct btrfs_extent_item *ei; + u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); + u64 flags; + u64 bg_flags; + u64 used; + u64 total = 0; + int ret; + int err = 0; + + btrfs_item_key_to_cpu(eb, &bg_key, slot); + bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item); + read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item)); + used = btrfs_block_group_used(&bg_item); + bg_flags = btrfs_block_group_flags(&bg_item); + + chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + chunk_key.type = BTRFS_CHUNK_ITEM_KEY; + chunk_key.offset = bg_key.objectid; + + btrfs_init_path(&path); + /* Search for the referencer chunk */ + ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0); + if (ret) { + error( + "block group[%llu %llu] did not find the related chunk item", + bg_key.objectid, bg_key.offset); + err |= REFERENCER_MISSING; + } else { + chunk = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_chunk); + if (btrfs_chunk_length(path.nodes[0], chunk) != + bg_key.offset) { + error( + "block group[%llu %llu] related chunk item length does not match", + bg_key.objectid, bg_key.offset); + err |= REFERENCER_MISMATCH; + } + } + btrfs_release_path(&path); + + /* Search from the block group bytenr */ + extent_key.objectid = bg_key.objectid; + extent_key.type = 0; + extent_key.offset = 0; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0); + if (ret < 0) + goto out; + + /* Iterate extent tree to account used space */ + while (1) { + leaf = path.nodes[0]; + + /* Search slot can point to the last item beyond leaf nritems */ + if (path.slots[0] >= btrfs_header_nritems(leaf)) + goto next; + + btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]); + if (extent_key.objectid >= bg_key.objectid + bg_key.offset) + break; + + if (extent_key.type != BTRFS_METADATA_ITEM_KEY && + extent_key.type != BTRFS_EXTENT_ITEM_KEY) + goto next; + if (extent_key.objectid < bg_key.objectid) + goto next; + + if (extent_key.type == BTRFS_METADATA_ITEM_KEY) + total += nodesize; + else + total += extent_key.offset; + + ei = btrfs_item_ptr(leaf, path.slots[0], + struct btrfs_extent_item); + flags = btrfs_extent_flags(leaf, ei); + if (flags & BTRFS_EXTENT_FLAG_DATA) { + if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) { + error( + "bad extent[%llu, %llu) type mismatch with chunk", + extent_key.objectid, + extent_key.objectid + extent_key.offset); + err |= CHUNK_TYPE_MISMATCH; + } + } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { + if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM | + BTRFS_BLOCK_GROUP_METADATA))) { + error( + "bad extent[%llu, %llu) type mismatch with chunk", + extent_key.objectid, + extent_key.objectid + nodesize); + err |= CHUNK_TYPE_MISMATCH; + } + } +next: + ret = btrfs_next_item(extent_root, &path); + if (ret) + break; + } + +out: + btrfs_release_path(&path); + + if (total != used) { + error( + "block group[%llu %llu] used %llu but extent items used %llu", + bg_key.objectid, bg_key.offset, used, total); + err |= BG_ACCOUNTING_ERROR; + } + return err; +} + +/* + * Get real tree block level for the case like shared block + * Return >= 0 as tree level + * Return <0 for error + */ +static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr) +{ + struct extent_buffer *eb; + struct btrfs_path path; + struct btrfs_key key; + struct btrfs_extent_item *ei; + u64 flags; + u64 transid; + u8 backref_level; + u8 header_level; + int ret; + + /* Search extent tree for extent generation and level */ + key.objectid = bytenr; + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = (u64)-1; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0); + if (ret < 0) + goto release_out; + ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr); + if (ret < 0) + goto release_out; + if (ret > 0) { + ret = -ENOENT; + goto release_out; + } + + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + ei = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_extent_item); + flags = btrfs_extent_flags(path.nodes[0], ei); + if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) { + ret = -ENOENT; + goto release_out; + } + + /* Get transid for later read_tree_block() check */ + transid = btrfs_extent_generation(path.nodes[0], ei); + + /* Get backref level as one source */ + if (key.type == BTRFS_METADATA_ITEM_KEY) { + backref_level = key.offset; + } else { + struct btrfs_tree_block_info *info; + + info = (struct btrfs_tree_block_info *)(ei + 1); + backref_level = btrfs_tree_block_level(path.nodes[0], info); + } + btrfs_release_path(&path); + + /* Get level from tree block as an alternative source */ + eb = read_tree_block(fs_info, bytenr, transid); + if (!extent_buffer_uptodate(eb)) { + free_extent_buffer(eb); + return -EIO; + } + header_level = btrfs_header_level(eb); + free_extent_buffer(eb); + + if (header_level != backref_level) + return -EIO; + return header_level; + +release_out: + btrfs_release_path(&path); + return ret; +} + +/* + * Check if a tree block backref is valid (points to a valid tree block) + * if level == -1, level will be resolved + * Return >0 for any error found and print error message + */ +static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id, + u64 bytenr, int level) +{ + struct btrfs_root *root; + struct btrfs_key key; + struct btrfs_path path; + struct extent_buffer *eb; + struct extent_buffer *node; + u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); + int err = 0; + int ret; + + /* Query level for level == -1 special case */ + if (level == -1) + level = query_tree_block_level(fs_info, bytenr); + if (level < 0) { + err |= REFERENCER_MISSING; + goto out; + } + + key.objectid = root_id; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + + root = btrfs_read_fs_root(fs_info, &key); + if (IS_ERR(root)) { + err |= REFERENCER_MISSING; + goto out; + } + + /* Read out the tree block to get item/node key */ + eb = read_tree_block(fs_info, bytenr, 0); + if (!extent_buffer_uptodate(eb)) { + err |= REFERENCER_MISSING; + free_extent_buffer(eb); + goto out; + } + + /* Empty tree, no need to check key */ + if (!btrfs_header_nritems(eb) && !level) { + free_extent_buffer(eb); + goto out; + } + + if (level) + btrfs_node_key_to_cpu(eb, &key, 0); + else + btrfs_item_key_to_cpu(eb, &key, 0); + + free_extent_buffer(eb); + + btrfs_init_path(&path); + path.lowest_level = level; + /* Search with the first key, to ensure we can reach it */ + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) { + err |= REFERENCER_MISSING; + goto release_out; + } + + node = path.nodes[level]; + if (btrfs_header_bytenr(node) != bytenr) { + error( + "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu", + bytenr, nodesize, bytenr, + btrfs_header_bytenr(node)); + err |= REFERENCER_MISMATCH; + } + if (btrfs_header_level(node) != level) { + error( + "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d", + bytenr, nodesize, level, + btrfs_header_level(node)); + err |= REFERENCER_MISMATCH; + } + +release_out: + btrfs_release_path(&path); +out: + if (err & REFERENCER_MISSING) { + if (level < 0) + error("extent [%llu %d] lost referencer (owner: %llu)", + bytenr, nodesize, root_id); + else + error( + "extent [%llu %d] lost referencer (owner: %llu, level: %u)", + bytenr, nodesize, root_id, level); + } + + return err; +} + +/* + * Check if tree block @eb is tree reloc root. + * Return 0 if it's not or any problem happens + * Return 1 if it's a tree reloc root + */ +static int is_tree_reloc_root(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb) +{ + struct btrfs_root *tree_reloc_root; + struct btrfs_key key; + u64 bytenr = btrfs_header_bytenr(eb); + u64 owner = btrfs_header_owner(eb); + int ret = 0; + + key.objectid = BTRFS_TREE_RELOC_OBJECTID; + key.offset = owner; + key.type = BTRFS_ROOT_ITEM_KEY; + + tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key); + if (IS_ERR(tree_reloc_root)) + return 0; + + if (bytenr == btrfs_header_bytenr(tree_reloc_root->node)) + ret = 1; + btrfs_free_fs_root(tree_reloc_root); + return ret; +} + +/* + * Check referencer for shared block backref + * If level == -1, this function will resolve the level. + */ +static int check_shared_block_backref(struct btrfs_fs_info *fs_info, + u64 parent, u64 bytenr, int level) +{ + struct extent_buffer *eb; + u32 nr; + int found_parent = 0; + int i; + + eb = read_tree_block(fs_info, parent, 0); + if (!extent_buffer_uptodate(eb)) + goto out; + + if (level == -1) + level = query_tree_block_level(fs_info, bytenr); + if (level < 0) + goto out; + + /* It's possible it's a tree reloc root */ + if (parent == bytenr) { + if (is_tree_reloc_root(fs_info, eb)) + found_parent = 1; + goto out; + } + + if (level + 1 != btrfs_header_level(eb)) + goto out; + + nr = btrfs_header_nritems(eb); + for (i = 0; i < nr; i++) { + if (bytenr == btrfs_node_blockptr(eb, i)) { + found_parent = 1; + break; + } + } +out: + free_extent_buffer(eb); + if (!found_parent) { + error( + "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)", + bytenr, fs_info->nodesize, parent, level); + return REFERENCER_MISSING; + } + return 0; +} + +/* + * Check referencer for normal (inlined) data ref + * If len == 0, it will be resolved by searching in extent tree + */ +static int check_extent_data_backref(struct btrfs_fs_info *fs_info, + u64 root_id, u64 objectid, u64 offset, + u64 bytenr, u64 len, u32 count) +{ + struct btrfs_root *root; + struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_key key; + struct btrfs_path path; + struct extent_buffer *leaf; + struct btrfs_file_extent_item *fi; + u32 found_count = 0; + int slot; + int ret = 0; + + if (!len) { + key.objectid = bytenr; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = (u64)-1; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); + if (ret < 0) + goto out; + ret = btrfs_previous_extent_item(extent_root, &path, bytenr); + if (ret) + goto out; + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + if (key.objectid != bytenr || + key.type != BTRFS_EXTENT_ITEM_KEY) + goto out; + len = key.offset; + btrfs_release_path(&path); + } + key.objectid = root_id; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + btrfs_init_path(&path); + + root = btrfs_read_fs_root(fs_info, &key); + if (IS_ERR(root)) + goto out; + + key.objectid = objectid; + key.type = BTRFS_EXTENT_DATA_KEY; + /* + * It can be nasty as data backref offset is + * file offset - file extent offset, which is smaller or + * equal to original backref offset. The only special case is + * overflow. So we need to special check and do further search. + */ + key.offset = offset & (1ULL << 63) ? 0 : offset; + + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) + goto out; + + /* + * Search afterwards to get correct one + * NOTE: As we must do a comprehensive check on the data backref to + * make sure the dref count also matches, we must iterate all file + * extents for that inode. + */ + while (1) { + leaf = path.nodes[0]; + slot = path.slots[0]; + + if (slot >= btrfs_header_nritems(leaf) || + btrfs_header_owner(leaf) != root_id) + goto next; + btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY) + break; + fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); + /* + * Except normal disk bytenr and disk num bytes, we still + * need to do extra check on dbackref offset as + * dbackref offset = file_offset - file_extent_offset + * + * Also, we must check the leaf owner. + * In case of shared tree blocks (snapshots) we can inherit + * leaves from source snapshot. + * In that case, reference from source snapshot should not + * count. + */ + if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr && + btrfs_file_extent_disk_num_bytes(leaf, fi) == len && + (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) == + offset && btrfs_header_owner(leaf) == root_id) + found_count++; + +next: + ret = btrfs_next_item(root, &path); + if (ret) + break; + } +out: + btrfs_release_path(&path); + if (found_count != count) { + error( +"extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u", + bytenr, len, root_id, objectid, offset, count, found_count); + return REFERENCER_MISSING; + } + return 0; +} + +/* + * Check if the referencer of a shared data backref exists + */ +static int check_shared_data_backref(struct btrfs_fs_info *fs_info, + u64 parent, u64 bytenr) +{ + struct extent_buffer *eb; + struct btrfs_key key; + struct btrfs_file_extent_item *fi; + u32 nr; + int found_parent = 0; + int i; + + eb = read_tree_block(fs_info, parent, 0); + if (!extent_buffer_uptodate(eb)) + goto out; + + nr = btrfs_header_nritems(eb); + for (i = 0; i < nr; i++) { + btrfs_item_key_to_cpu(eb, &key, i); + if (key.type != BTRFS_EXTENT_DATA_KEY) + continue; + + fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item); + if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE) + continue; + + if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) { + found_parent = 1; + break; + } + } + +out: + free_extent_buffer(eb); + if (!found_parent) { + error("shared extent %llu referencer lost (parent: %llu)", + bytenr, parent); + return REFERENCER_MISSING; + } + return 0; +} + +/* + * Only delete backref if REFERENCER_MISSING now + * + * Returns <0 the extent was deleted + * Returns >0 the backref was deleted but extent still exists, returned value + * means error after repair + * Returns 0 nothing happened + */ +static int repair_extent_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, + u64 owner, u64 offset, int err) +{ + struct btrfs_key old_key; + int freed = 0; + int ret; + + btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]); + + if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) { + /* delete the backref */ + ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr, + num_bytes, parent, root_objectid, owner, offset); + if (!ret) { + freed = 1; + err &= ~REFERENCER_MISSING; + printf("Delete backref in extent [%llu %llu]\n", + bytenr, num_bytes); + } else { + error("fail to delete backref in extent [%llu %llu]", + bytenr, num_bytes); + } + } + + /* btrfs_free_extent may delete the extent */ + btrfs_release_path(path); + ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0); + + if (ret) + ret = -ENOENT; + else if (freed) + ret = err; + return ret; +} + +/* + * This function will check a given extent item, including its backref and + * itself (like crossing stripe boundary and type) + * + * Since we don't use extent_record anymore, introduce new error bit + */ +static int check_extent_item(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_path *path) +{ + struct btrfs_extent_item *ei; + struct btrfs_extent_inline_ref *iref; + struct btrfs_extent_data_ref *dref; + struct extent_buffer *eb = path->nodes[0]; + unsigned long end; + unsigned long ptr; + int slot = path->slots[0]; + int type; + u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); + u32 item_size = btrfs_item_size_nr(eb, slot); + u64 flags; + u64 offset; + u64 parent; + u64 num_bytes; + u64 root_objectid; + u64 owner; + u64 owner_offset; + int metadata = 0; + int level; + struct btrfs_key key; + int ret; + int err = 0; + + btrfs_item_key_to_cpu(eb, &key, slot); + if (key.type == BTRFS_EXTENT_ITEM_KEY) { + bytes_used += key.offset; + num_bytes = key.offset; + } else { + bytes_used += nodesize; + num_bytes = nodesize; + } + + if (item_size < sizeof(*ei)) { + /* + * COMPAT_EXTENT_TREE_V0 case, but it's already a super + * old thing when on disk format is still un-determined. + * No need to care about it anymore + */ + error("unsupported COMPAT_EXTENT_TREE_V0 detected"); + return -ENOTTY; + } + + ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item); + flags = btrfs_extent_flags(eb, ei); + + if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) + metadata = 1; + if (metadata && check_crossing_stripes(global_info, key.objectid, + eb->len)) { + error("bad metadata [%llu, %llu) crossing stripe boundary", + key.objectid, key.objectid + nodesize); + err |= CROSSING_STRIPE_BOUNDARY; + } + + ptr = (unsigned long)(ei + 1); + + if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) { + /* Old EXTENT_ITEM metadata */ + struct btrfs_tree_block_info *info; + + info = (struct btrfs_tree_block_info *)ptr; + level = btrfs_tree_block_level(eb, info); + ptr += sizeof(struct btrfs_tree_block_info); + } else { + /* New METADATA_ITEM */ + level = key.offset; + } + end = (unsigned long)ei + item_size; + +next: + /* Reached extent item end normally */ + if (ptr == end) + goto out; + + /* Beyond extent item end, wrong item size */ + if (ptr > end) { + err |= ITEM_SIZE_MISMATCH; + error("extent item at bytenr %llu slot %d has wrong size", + eb->start, slot); + goto out; + } + + parent = 0; + root_objectid = 0; + owner = 0; + owner_offset = 0; + /* Now check every backref in this extent item */ + iref = (struct btrfs_extent_inline_ref *)ptr; + type = btrfs_extent_inline_ref_type(eb, iref); + offset = btrfs_extent_inline_ref_offset(eb, iref); + switch (type) { + case BTRFS_TREE_BLOCK_REF_KEY: + root_objectid = offset; + owner = level; + ret = check_tree_block_backref(fs_info, offset, key.objectid, + level); + err |= ret; + break; + case BTRFS_SHARED_BLOCK_REF_KEY: + parent = offset; + ret = check_shared_block_backref(fs_info, offset, key.objectid, + level); + err |= ret; + break; + case BTRFS_EXTENT_DATA_REF_KEY: + dref = (struct btrfs_extent_data_ref *)(&iref->offset); + root_objectid = btrfs_extent_data_ref_root(eb, dref); + owner = btrfs_extent_data_ref_objectid(eb, dref); + owner_offset = btrfs_extent_data_ref_offset(eb, dref); + ret = check_extent_data_backref(fs_info, root_objectid, owner, + owner_offset, key.objectid, key.offset, + btrfs_extent_data_ref_count(eb, dref)); + err |= ret; + break; + case BTRFS_SHARED_DATA_REF_KEY: + parent = offset; + ret = check_shared_data_backref(fs_info, offset, key.objectid); + err |= ret; + break; + default: + error("extent[%llu %d %llu] has unknown ref type: %d", + key.objectid, key.type, key.offset, type); + ret = UNKNOWN_TYPE; + err |= ret; + goto out; + } + + if (err && repair) { + ret = repair_extent_item(trans, fs_info->extent_root, path, + key.objectid, num_bytes, parent, root_objectid, + owner, owner_offset, ret); + if (ret < 0) + goto out; + if (ret) { + goto next; + err = ret; + } + } + + ptr += btrfs_extent_inline_ref_size(type); + goto next; + +out: + return err; +} + +/* + * Check if a dev extent item is referred correctly by its chunk + */ +static int check_dev_extent_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot) +{ + struct btrfs_root *chunk_root = fs_info->chunk_root; + struct btrfs_dev_extent *ptr; + struct btrfs_path path; + struct btrfs_key chunk_key; + struct btrfs_key devext_key; + struct btrfs_chunk *chunk; + struct extent_buffer *l; + int num_stripes; + u64 length; + int i; + int found_chunk = 0; + int ret; + + btrfs_item_key_to_cpu(eb, &devext_key, slot); + ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent); + length = btrfs_dev_extent_length(eb, ptr); + + chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr); + chunk_key.type = BTRFS_CHUNK_ITEM_KEY; + chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr); + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0); + if (ret) + goto out; + + l = path.nodes[0]; + chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk); + ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0], + chunk_key.offset); + if (ret < 0) + goto out; + + if (btrfs_stripe_length(fs_info, l, chunk) != length) + goto out; + + num_stripes = btrfs_chunk_num_stripes(l, chunk); + for (i = 0; i < num_stripes; i++) { + u64 devid = btrfs_stripe_devid_nr(l, chunk, i); + u64 offset = btrfs_stripe_offset_nr(l, chunk, i); + + if (devid == devext_key.objectid && + offset == devext_key.offset) { + found_chunk = 1; + break; + } + } +out: + btrfs_release_path(&path); + if (!found_chunk) { + error( + "device extent[%llu, %llu, %llu] did not find the related chunk", + devext_key.objectid, devext_key.offset, length); + return REFERENCER_MISSING; + } + return 0; +} + +/* + * Check if the used space is correct with the dev item + */ +static int check_dev_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot) +{ + struct btrfs_root *dev_root = fs_info->dev_root; + struct btrfs_dev_item *dev_item; + struct btrfs_path path; + struct btrfs_key key; + struct btrfs_dev_extent *ptr; + u64 total_bytes; + u64 dev_id; + u64 used; + u64 total = 0; + int ret; + + dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item); + dev_id = btrfs_device_id(eb, dev_item); + used = btrfs_device_bytes_used(eb, dev_item); + total_bytes = btrfs_device_total_bytes(eb, dev_item); + + key.objectid = dev_id; + key.type = BTRFS_DEV_EXTENT_KEY; + key.offset = 0; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0); + if (ret < 0) { + btrfs_item_key_to_cpu(eb, &key, slot); + error("cannot find any related dev extent for dev[%llu, %u, %llu]", + key.objectid, key.type, key.offset); + btrfs_release_path(&path); + return REFERENCER_MISSING; + } + + /* Iterate dev_extents to calculate the used space of a device */ + while (1) { + if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) + goto next; + + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + if (key.objectid > dev_id) + break; + if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id) + goto next; + + ptr = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_dev_extent); + total += btrfs_dev_extent_length(path.nodes[0], ptr); +next: + ret = btrfs_next_item(dev_root, &path); + if (ret) + break; + } + btrfs_release_path(&path); + + if (used != total) { + btrfs_item_key_to_cpu(eb, &key, slot); + error( +"Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]", + total, used, BTRFS_ROOT_TREE_OBJECTID, + BTRFS_DEV_EXTENT_KEY, dev_id); + return ACCOUNTING_MISMATCH; + } + check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize); + + return 0; +} + +/* + * Check a chunk item. + * Including checking all referred dev_extents and block group + */ +static int check_chunk_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int slot) +{ + struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_root *dev_root = fs_info->dev_root; + struct btrfs_path path; + struct btrfs_key chunk_key; + struct btrfs_key bg_key; + struct btrfs_key devext_key; + struct btrfs_chunk *chunk; + struct extent_buffer *leaf; + struct btrfs_block_group_item *bi; + struct btrfs_block_group_item bg_item; + struct btrfs_dev_extent *ptr; + u64 length; + u64 chunk_end; + u64 stripe_len; + u64 type; + int num_stripes; + u64 offset; + u64 objectid; + int i; + int ret; + int err = 0; + + btrfs_item_key_to_cpu(eb, &chunk_key, slot); + chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk); + length = btrfs_chunk_length(eb, chunk); + chunk_end = chunk_key.offset + length; + ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot, + chunk_key.offset); + if (ret < 0) { + error("chunk[%llu %llu) is invalid", chunk_key.offset, + chunk_end); + err |= BYTES_UNALIGNED | UNKNOWN_TYPE; + goto out; + } + type = btrfs_chunk_type(eb, chunk); + + bg_key.objectid = chunk_key.offset; + bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; + bg_key.offset = length; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0); + if (ret) { + error( + "chunk[%llu %llu) did not find the related block group item", + chunk_key.offset, chunk_end); + err |= REFERENCER_MISSING; + } else{ + leaf = path.nodes[0]; + bi = btrfs_item_ptr(leaf, path.slots[0], + struct btrfs_block_group_item); + read_extent_buffer(leaf, &bg_item, (unsigned long)bi, + sizeof(bg_item)); + if (btrfs_block_group_flags(&bg_item) != type) { + error( +"chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu", + chunk_key.offset, chunk_end, type, + btrfs_block_group_flags(&bg_item)); + err |= REFERENCER_MISSING; + } + } + + num_stripes = btrfs_chunk_num_stripes(eb, chunk); + stripe_len = btrfs_stripe_length(fs_info, eb, chunk); + for (i = 0; i < num_stripes; i++) { + btrfs_release_path(&path); + btrfs_init_path(&path); + devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i); + devext_key.type = BTRFS_DEV_EXTENT_KEY; + devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i); + + ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path, + 0, 0); + if (ret) + goto not_match_dev; + + leaf = path.nodes[0]; + ptr = btrfs_item_ptr(leaf, path.slots[0], + struct btrfs_dev_extent); + objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr); + offset = btrfs_dev_extent_chunk_offset(leaf, ptr); + if (objectid != chunk_key.objectid || + offset != chunk_key.offset || + btrfs_dev_extent_length(leaf, ptr) != stripe_len) + goto not_match_dev; + continue; +not_match_dev: + err |= BACKREF_MISSING; + error( + "chunk[%llu %llu) stripe %d did not find the related dev extent", + chunk_key.objectid, chunk_end, i); + continue; + } + btrfs_release_path(&path); +out: + return err; +} + +/* + * Add block group item to the extent tree if @err contains REFERENCER_MISSING. + * FIXME: We still need to repair error of dev_item. + * + * Returns error after repair. + */ +static int repair_chunk_item(struct btrfs_trans_handle *trans, + struct btrfs_root *chunk_root, + struct btrfs_path *path, int err) +{ + struct btrfs_chunk *chunk; + struct btrfs_key chunk_key; + struct extent_buffer *eb = path->nodes[0]; + u64 length; + int slot = path->slots[0]; + u64 type; + int ret = 0; + + btrfs_item_key_to_cpu(eb, &chunk_key, slot); + if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY) + return err; + chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk); + type = btrfs_chunk_type(path->nodes[0], chunk); + length = btrfs_chunk_length(eb, chunk); + + if (err & REFERENCER_MISSING) { + ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0, + type, chunk_key.offset, length); + if (ret) { + error("fail to add block group item[%llu %llu]", + chunk_key.offset, length); + goto out; + } else { + err &= ~REFERENCER_MISSING; + printf("Added block group item[%llu %llu]\n", + chunk_key.offset, length); + } + } + +out: + return err; +} + +static int delete_extent_tree_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path) +{ + struct btrfs_key key; + int ret = 0; + + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + btrfs_release_path(path); + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret) { + ret = -ENOENT; + goto out; + } + + ret = btrfs_del_item(trans, root, path); + if (ret) + goto out; + + if (path->slots[0] == 0) + btrfs_prev_leaf(root, path); + else + path->slots[0]--; +out: + if (ret) + error("failed to delete root %llu item[%llu, %u, %llu]", + root->objectid, key.objectid, key.type, key.offset); + else + printf("Deleted root %llu item[%llu, %u, %llu]\n", + root->objectid, key.objectid, key.type, key.offset); + return ret; +} + +/* + * Main entry function to check known items and update related accounting info + */ +static int check_leaf_items(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + struct node_refs *nrefs, int account_bytes) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_key key; + struct extent_buffer *eb; + int slot; + int type; + struct btrfs_extent_data_ref *dref; + int ret = 0; + int err = 0; + +again: + eb = path->nodes[0]; + slot = path->slots[0]; + if (slot >= btrfs_header_nritems(eb)) { + if (slot == 0) { + error("empty leaf [%llu %u] root %llu", eb->start, + root->fs_info->nodesize, root->objectid); + err |= EIO; + } + goto out; + } + + btrfs_item_key_to_cpu(eb, &key, slot); + type = key.type; + + switch (type) { + case BTRFS_EXTENT_DATA_KEY: + ret = check_extent_data_item(root, path, nrefs, account_bytes); + if (repair && ret) + ret = repair_extent_data_item(trans, root, path, nrefs, + ret); + err |= ret; + break; + case BTRFS_BLOCK_GROUP_ITEM_KEY: + ret = check_block_group_item(fs_info, eb, slot); + if (repair && + ret & REFERENCER_MISSING) + ret = delete_extent_tree_item(trans, root, path); + err |= ret; + break; + case BTRFS_DEV_ITEM_KEY: + ret = check_dev_item(fs_info, eb, slot); + err |= ret; + break; + case BTRFS_CHUNK_ITEM_KEY: + ret = check_chunk_item(fs_info, eb, slot); + if (repair && ret) + ret = repair_chunk_item(trans, root, path, ret); + err |= ret; + break; + case BTRFS_DEV_EXTENT_KEY: + ret = check_dev_extent_item(fs_info, eb, slot); + err |= ret; + break; + case BTRFS_EXTENT_ITEM_KEY: + case BTRFS_METADATA_ITEM_KEY: + ret = check_extent_item(trans, fs_info, path); + err |= ret; + break; + case BTRFS_EXTENT_CSUM_KEY: + total_csum_bytes += btrfs_item_size_nr(eb, slot); + err |= ret; + break; + case BTRFS_TREE_BLOCK_REF_KEY: + ret = check_tree_block_backref(fs_info, key.offset, + key.objectid, -1); + if (repair && + ret & (REFERENCER_MISMATCH | REFERENCER_MISSING)) + ret = delete_extent_tree_item(trans, root, path); + err |= ret; + break; + case BTRFS_EXTENT_DATA_REF_KEY: + dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref); + ret = check_extent_data_backref(fs_info, + btrfs_extent_data_ref_root(eb, dref), + btrfs_extent_data_ref_objectid(eb, dref), + btrfs_extent_data_ref_offset(eb, dref), + key.objectid, 0, + btrfs_extent_data_ref_count(eb, dref)); + if (repair && + ret & (REFERENCER_MISMATCH | REFERENCER_MISSING)) + ret = delete_extent_tree_item(trans, root, path); + err |= ret; + break; + case BTRFS_SHARED_BLOCK_REF_KEY: + ret = check_shared_block_backref(fs_info, key.offset, + key.objectid, -1); + if (repair && + ret & (REFERENCER_MISMATCH | REFERENCER_MISSING)) + ret = delete_extent_tree_item(trans, root, path); + err |= ret; + break; + case BTRFS_SHARED_DATA_REF_KEY: + ret = check_shared_data_backref(fs_info, key.offset, + key.objectid); + if (repair && + ret & (REFERENCER_MISMATCH | REFERENCER_MISSING)) + ret = delete_extent_tree_item(trans, root, path); + err |= ret; + break; + default: + break; + } + + ++path->slots[0]; + goto again; +out: + return err; +} + +/* + * @trans just for lowmem repair mode + * @check all if not 0 then check all tree block backrefs and items + * 0 then just check relationship of items in fs tree(s) + * + * Returns >0 Found error, should continue + * Returns <0 Fatal error, must exit the whole check + * Returns 0 No errors found + */ +static int walk_down_tree_v2(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + int *level, struct node_refs *nrefs, int ext_ref, + int check_all) +{ + enum btrfs_tree_block_status status; + u64 bytenr; + u64 ptr_gen; + struct btrfs_fs_info *fs_info = root->fs_info; + struct extent_buffer *next; + struct extent_buffer *cur; + int ret; + int err = 0; + int check; + int account_file_data = 0; + + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + + ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]), + path->nodes[*level], nrefs, *level, check_all); + if (ret < 0) + return ret; + + while (*level >= 0) { + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); + cur = path->nodes[*level]; + bytenr = btrfs_header_bytenr(cur); + check = nrefs->need_check[*level]; + + if (btrfs_header_level(cur) != *level) + WARN_ON(1); + /* + * Update bytes accounting and check tree block ref + * NOTE: Doing accounting and check before checking nritems + * is necessary because of empty node/leaf. + */ + if ((check_all && !nrefs->checked[*level]) || + (!check_all && nrefs->need_check[*level])) { + ret = check_tree_block_ref(root, cur, + btrfs_header_bytenr(cur), btrfs_header_level(cur), + btrfs_header_owner(cur), nrefs); + + if (repair && ret) + ret = repair_tree_block_ref(trans, root, + path->nodes[*level], nrefs, *level, ret); + err |= ret; + + if (check_all && nrefs->need_check[*level] && + nrefs->refs[*level]) { + account_bytes(root, path, *level); + account_file_data = 1; + } + nrefs->checked[*level] = 1; + } + + if (path->slots[*level] >= btrfs_header_nritems(cur)) + break; + + /* Don't forgot to check leaf/node validation */ + if (*level == 0) { + /* skip duplicate check */ + if (check || !check_all) { + ret = btrfs_check_leaf(root, NULL, cur); + if (ret != BTRFS_TREE_BLOCK_CLEAN) { + err |= -EIO; + break; + } + } + + ret = 0; + if (!check_all) + ret = process_one_leaf_v2(root, path, nrefs, + level, ext_ref); + else + ret = check_leaf_items(trans, root, path, + nrefs, account_file_data); + err |= ret; + break; + } else { + if (check || !check_all) { + ret = btrfs_check_node(root, NULL, cur); + if (ret != BTRFS_TREE_BLOCK_CLEAN) { + err |= -EIO; + break; + } + } + } + + bytenr = btrfs_node_blockptr(cur, path->slots[*level]); + ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); + + ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1, + check_all); + if (ret < 0) + break; + /* + * check all trees in check_chunks_and_extent_v2 + * check shared node once in check_fs_roots + */ + if (!check_all && !nrefs->need_check[*level - 1]) { + path->slots[*level]++; + continue; + } + + next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize); + if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { + free_extent_buffer(next); + reada_walk_down(root, cur, path->slots[*level]); + next = read_tree_block(fs_info, bytenr, ptr_gen); + if (!extent_buffer_uptodate(next)) { + struct btrfs_key node_key; + + btrfs_node_key_to_cpu(path->nodes[*level], + &node_key, + path->slots[*level]); + btrfs_add_corrupt_extent_record(fs_info, + &node_key, path->nodes[*level]->start, + fs_info->nodesize, *level); + err |= -EIO; + break; + } + } + + ret = check_child_node(cur, path->slots[*level], next); + err |= ret; + if (ret < 0) + break; + + if (btrfs_is_leaf(next)) + status = btrfs_check_leaf(root, NULL, next); + else + status = btrfs_check_node(root, NULL, next); + if (status != BTRFS_TREE_BLOCK_CLEAN) { + free_extent_buffer(next); + err |= -EIO; + break; + } + + *level = *level - 1; + free_extent_buffer(path->nodes[*level]); + path->nodes[*level] = next; + path->slots[*level] = 0; + account_file_data = 0; + + update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all); + } + return err; +} + +static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path, + int *level) +{ + int i; + struct extent_buffer *leaf; + + for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { + leaf = path->nodes[i]; + if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) { + path->slots[i]++; + *level = i; + return 0; + } else { + free_extent_buffer(path->nodes[*level]); + path->nodes[*level] = NULL; + *level = i + 1; + } + } + return 1; +} + +/* + * Insert the missing inode item and inode ref. + * + * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir. + * Root dir should be handled specially because root dir is the root of fs. + * + * returns err (>0 or 0) after repair + */ +static int repair_fs_first_inode(struct btrfs_root *root, int err) +{ + struct btrfs_trans_handle *trans; + struct btrfs_key key; + struct btrfs_path path; + int filetype = BTRFS_FT_DIR; + int ret = 0; + + btrfs_init_path(&path); + + if (err & INODE_REF_MISSING) { + key.objectid = BTRFS_FIRST_FREE_OBJECTID; + key.type = BTRFS_INODE_REF_KEY; + key.offset = BTRFS_FIRST_FREE_OBJECTID; + + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + + btrfs_release_path(&path); + ret = btrfs_search_slot(trans, root, &key, &path, 1, 1); + if (ret) + goto trans_fail; + + ret = btrfs_insert_inode_ref(trans, root, "..", 2, + BTRFS_FIRST_FREE_OBJECTID, + BTRFS_FIRST_FREE_OBJECTID, 0); + if (ret) + goto trans_fail; + + printf("Add INODE_REF[%llu %llu] name %s\n", + BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID, + ".."); + err &= ~INODE_REF_MISSING; +trans_fail: + if (ret) + error("fail to insert first inode's ref"); + btrfs_commit_transaction(trans, root); + } + + if (err & INODE_ITEM_MISSING) { + ret = repair_inode_item_missing(root, + BTRFS_FIRST_FREE_OBJECTID, filetype); + if (ret) + goto out; + err &= ~INODE_ITEM_MISSING; + } +out: + if (ret) + error("fail to repair first inode"); + btrfs_release_path(&path); + return err; +} + +/* + * check first root dir's inode_item and inode_ref + * + * returns 0 means no error + * returns >0 means error + * returns <0 means fatal error + */ +static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref) +{ + struct btrfs_path path; + struct btrfs_key key; + struct btrfs_inode_item *ii; + u64 index; + u32 mode; + int err = 0; + int ret; + + key.objectid = BTRFS_FIRST_FREE_OBJECTID; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; + + /* For root being dropped, we don't need to check first inode */ + if (btrfs_root_refs(&root->root_item) == 0 && + btrfs_disk_key_objectid(&root->root_item.drop_progress) >= + BTRFS_FIRST_FREE_OBJECTID) + return 0; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) + goto out; + if (ret > 0) { + ret = 0; + err |= INODE_ITEM_MISSING; + } else { + ii = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_inode_item); + mode = btrfs_inode_mode(path.nodes[0], ii); + if (imode_to_type(mode) != BTRFS_FT_DIR) + err |= INODE_ITEM_MISMATCH; + } + + /* lookup first inode ref */ + key.offset = BTRFS_FIRST_FREE_OBJECTID; + key.type = BTRFS_INODE_REF_KEY; + /* special index value */ + index = 0; + + ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref); + if (ret < 0) + goto out; + err |= ret; + +out: + btrfs_release_path(&path); + + if (err && repair) + err = repair_fs_first_inode(root, err); + + if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) + error("root dir INODE_ITEM is %s", + err & INODE_ITEM_MISMATCH ? "mismatch" : "missing"); + if (err & INODE_REF_MISSING) + error("root dir INODE_REF is missing"); + + return ret < 0 ? ret : err; +} + +/* + * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree + * blocks and integrity of fs tree items. + * + * @root: the root of the tree to be checked. + * @ext_ref feature EXTENDED_IREF is enable or not. + * @account if NOT 0 means check the tree (including tree)'s treeblocks. + * otherwise means check fs tree(s) items relationship and + * @root MUST be a fs tree root. + * Returns 0 represents OK. + * Returns not 0 represents error. + */ +static int check_btrfs_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root, unsigned int ext_ref, + int check_all) +{ + struct btrfs_path path; + struct node_refs nrefs; + struct btrfs_root_item *root_item = &root->root_item; + int ret; + int level; + int err = 0; + + memset(&nrefs, 0, sizeof(nrefs)); + if (!check_all) { + /* + * We need to manually check the first inode item (256) + * As the following traversal function will only start from + * the first inode item in the leaf, if inode item (256) is + * missing we will skip it forever. + */ + ret = check_fs_first_inode(root, ext_ref); + if (ret < 0) + return ret; + } + + + level = btrfs_header_level(root->node); + btrfs_init_path(&path); + + if (btrfs_root_refs(root_item) > 0 || + btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { + path.nodes[level] = root->node; + path.slots[level] = 0; + extent_buffer_get(root->node); + } else { + struct btrfs_key key; + + btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); + level = root_item->drop_level; + path.lowest_level = level; + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) + goto out; + ret = 0; + } + + while (1) { + ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs, + ext_ref, check_all); + + err |= !!ret; + + /* if ret is negative, walk shall stop */ + if (ret < 0) { + ret = err; + break; + } + + ret = walk_up_tree_v2(root, &path, &level); + if (ret != 0) { + /* Normal exit, reset ret to err */ + ret = err; + break; + } + } + +out: + btrfs_release_path(&path); + return ret; +} + +/* + * Iterate all items in the tree and call check_inode_item() to check. + * + * @root: the root of the tree to be checked. + * @ext_ref: the EXTENDED_IREF feature + * + * Return 0 if no error found. + * Return <0 for error. + */ +static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref) +{ + reset_cached_block_groups(root->fs_info); + return check_btrfs_root(NULL, root, ext_ref, 0); +} + +/* + * Find the relative ref for root_ref and root_backref. + * + * @root: the root of the root tree. + * @ref_key: the key of the root ref. + * + * Return 0 if no error occurred. + */ +static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key, + struct extent_buffer *node, int slot) +{ + struct btrfs_path path; + struct btrfs_key key; + struct btrfs_root_ref *ref; + struct btrfs_root_ref *backref; + char ref_name[BTRFS_NAME_LEN] = {0}; + char backref_name[BTRFS_NAME_LEN] = {0}; + u64 ref_dirid; + u64 ref_seq; + u32 ref_namelen; + u64 backref_dirid; + u64 backref_seq; + u32 backref_namelen; + u32 len; + int ret; + int err = 0; + + ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref); + ref_dirid = btrfs_root_ref_dirid(node, ref); + ref_seq = btrfs_root_ref_sequence(node, ref); + ref_namelen = btrfs_root_ref_name_len(node, ref); + + if (ref_namelen <= BTRFS_NAME_LEN) { + len = ref_namelen; + } else { + len = BTRFS_NAME_LEN; + warning("%s[%llu %llu] ref_name too long", + ref_key->type == BTRFS_ROOT_REF_KEY ? + "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid, + ref_key->offset); + } + read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len); + + /* Find relative root_ref */ + key.objectid = ref_key->offset; + key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type; + key.offset = ref_key->objectid; + + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret) { + err |= ROOT_REF_MISSING; + error("%s[%llu %llu] couldn't find relative ref", + ref_key->type == BTRFS_ROOT_REF_KEY ? + "ROOT_REF" : "ROOT_BACKREF", + ref_key->objectid, ref_key->offset); + goto out; + } + + backref = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_root_ref); + backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref); + backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref); + backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref); + + if (backref_namelen <= BTRFS_NAME_LEN) { + len = backref_namelen; + } else { + len = BTRFS_NAME_LEN; + warning("%s[%llu %llu] ref_name too long", + key.type == BTRFS_ROOT_REF_KEY ? + "ROOT_REF" : "ROOT_BACKREF", + key.objectid, key.offset); + } + read_extent_buffer(path.nodes[0], backref_name, + (unsigned long)(backref + 1), len); + + if (ref_dirid != backref_dirid || ref_seq != backref_seq || + ref_namelen != backref_namelen || + strncmp(ref_name, backref_name, len)) { + err |= ROOT_REF_MISMATCH; + error("%s[%llu %llu] mismatch relative ref", + ref_key->type == BTRFS_ROOT_REF_KEY ? + "ROOT_REF" : "ROOT_BACKREF", + ref_key->objectid, ref_key->offset); + } +out: + btrfs_release_path(&path); + return err; +} + +/* + * Check all fs/file tree in low_memory mode. + * + * 1. for fs tree root item, call check_fs_root_v2() + * 2. for fs tree root ref/backref, call check_root_ref() + * + * Return 0 if no error occurred. + */ +int check_fs_roots_v2(struct btrfs_fs_info *fs_info) +{ + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_root *cur_root = NULL; + struct btrfs_path path; + struct btrfs_key key; + struct extent_buffer *node; + unsigned int ext_ref; + int slot; + int ret; + int err = 0; + + ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF); + + btrfs_init_path(&path); + key.objectid = BTRFS_FS_TREE_OBJECTID; + key.offset = 0; + key.type = BTRFS_ROOT_ITEM_KEY; + + ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0); + if (ret < 0) { + err = ret; + goto out; + } else if (ret > 0) { + err = -ENOENT; + goto out; + } + + while (1) { + node = path.nodes[0]; + slot = path.slots[0]; + btrfs_item_key_to_cpu(node, &key, slot); + if (key.objectid > BTRFS_LAST_FREE_OBJECTID) + goto out; + if (key.type == BTRFS_ROOT_ITEM_KEY && + fs_root_objectid(key.objectid)) { + if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) { + cur_root = btrfs_read_fs_root_no_cache(fs_info, + &key); + } else { + key.offset = (u64)-1; + cur_root = btrfs_read_fs_root(fs_info, &key); + } + + if (IS_ERR(cur_root)) { + error("Fail to read fs/subvol tree: %lld", + key.objectid); + err = -EIO; + goto next; + } + + ret = check_fs_root_v2(cur_root, ext_ref); + err |= ret; + + if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) + btrfs_free_fs_root(cur_root); + } else if (key.type == BTRFS_ROOT_REF_KEY || + key.type == BTRFS_ROOT_BACKREF_KEY) { + ret = check_root_ref(tree_root, &key, node, slot); + err |= ret; + } +next: + ret = btrfs_next_item(tree_root, &path); + if (ret > 0) + goto out; + if (ret < 0) { + err = ret; + goto out; + } + } + +out: + btrfs_release_path(&path); + return err; +} + +/* + * Low memory usage version check_chunks_and_extents. + */ +int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info) +{ + struct btrfs_trans_handle *trans = NULL; + struct btrfs_path path; + struct btrfs_key old_key; + struct btrfs_key key; + struct btrfs_root *root1; + struct btrfs_root *root; + struct btrfs_root *cur_root; + int err = 0; + int ret; + + root = fs_info->fs_root; + + if (repair) { + trans = btrfs_start_transaction(fs_info->extent_root, 1); + if (IS_ERR(trans)) { + error("failed to start transaction before check"); + return PTR_ERR(trans); + } + } + + root1 = root->fs_info->chunk_root; + ret = check_btrfs_root(trans, root1, 0, 1); + err |= ret; + + root1 = root->fs_info->tree_root; + ret = check_btrfs_root(trans, root1, 0, 1); + err |= ret; + + btrfs_init_path(&path); + key.objectid = BTRFS_EXTENT_TREE_OBJECTID; + key.offset = 0; + key.type = BTRFS_ROOT_ITEM_KEY; + + ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0); + if (ret) { + error("cannot find extent tree in tree_root"); + goto out; + } + + while (1) { + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); + if (key.type != BTRFS_ROOT_ITEM_KEY) + goto next; + old_key = key; + key.offset = (u64)-1; + + if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) + cur_root = btrfs_read_fs_root_no_cache(root->fs_info, + &key); + else + cur_root = btrfs_read_fs_root(root->fs_info, &key); + if (IS_ERR(cur_root) || !cur_root) { + error("failed to read tree: %lld", key.objectid); + goto next; + } + + ret = check_btrfs_root(trans, cur_root, 0, 1); + err |= ret; + + if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) + btrfs_free_fs_root(cur_root); + + btrfs_release_path(&path); + ret = btrfs_search_slot(NULL, root->fs_info->tree_root, + &old_key, &path, 0, 0); + if (ret) + goto out; +next: + ret = btrfs_next_item(root1, &path); + if (ret) + goto out; + } +out: + + /* if repair, update block accounting */ + if (repair) { + ret = btrfs_fix_block_accounting(trans, root); + if (ret) + err |= ret; + else + err &= ~BG_ACCOUNTING_ERROR; + } + + if (trans) + btrfs_commit_transaction(trans, root->fs_info->extent_root); + + btrfs_release_path(&path); + + return err; +} diff --git a/check/lowmem.h b/check/lowmem.h index e6ca7634..d1051a0b 100644 --- a/check/lowmem.h +++ b/check/lowmem.h @@ -20,6 +20,8 @@ #ifndef __BTRFS_CHECK_LOWMEM_H__ #define __BTRFS_CHECK_LOWMEM_H__ +#include "check/common.h" + #define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */ #define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */ #define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */ @@ -59,4 +61,7 @@ #define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */ #define CHUNK_TYPE_MISMATCH (1 << 8) +int check_fs_roots_v2(struct btrfs_fs_info *fs_info); +int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info); + #endif diff --git a/check/main.c b/check/main.c index 6f5af7fb..98e24afb 100644 --- a/check/main.c +++ b/check/main.c @@ -1579,322 +1579,6 @@ static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb, return ret; } -static int update_nodes_refs(struct btrfs_root *root, u64 bytenr, - struct extent_buffer *eb, struct node_refs *nrefs, - u64 level, int check_all); -static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path, - unsigned int ext_ref); - -/* - * Returns >0 Found error, not fatal, should continue - * Returns <0 Fatal error, must exit the whole check - * Returns 0 No errors found - */ -static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path, - struct node_refs *nrefs, int *level, int ext_ref) -{ - struct extent_buffer *cur = path->nodes[0]; - struct btrfs_key key; - u64 cur_bytenr; - u32 nritems; - u64 first_ino = 0; - int root_level = btrfs_header_level(root->node); - int i; - int ret = 0; /* Final return value */ - int err = 0; /* Positive error bitmap */ - - cur_bytenr = cur->start; - - /* skip to first inode item or the first inode number change */ - nritems = btrfs_header_nritems(cur); - for (i = 0; i < nritems; i++) { - btrfs_item_key_to_cpu(cur, &key, i); - if (i == 0) - first_ino = key.objectid; - if (key.type == BTRFS_INODE_ITEM_KEY || - (first_ino && first_ino != key.objectid)) - break; - } - if (i == nritems) { - path->slots[0] = nritems; - return 0; - } - path->slots[0] = i; - -again: - err |= check_inode_item(root, path, ext_ref); - - /* modify cur since check_inode_item may change path */ - cur = path->nodes[0]; - - if (err & LAST_ITEM) - goto out; - - /* still have inode items in thie leaf */ - if (cur->start == cur_bytenr) - goto again; - - /* - * we have switched to another leaf, above nodes may - * have changed, here walk down the path, if a node - * or leaf is shared, check whether we can skip this - * node or leaf. - */ - for (i = root_level; i >= 0; i--) { - if (path->nodes[i]->start == nrefs->bytenr[i]) - continue; - - ret = update_nodes_refs(root, path->nodes[i]->start, - path->nodes[i], nrefs, i, 0); - if (ret) - goto out; - - if (!nrefs->need_check[i]) { - *level += 1; - break; - } - } - - for (i = 0; i < *level; i++) { - free_extent_buffer(path->nodes[i]); - path->nodes[i] = NULL; - } -out: - err &= ~LAST_ITEM; - if (err && !ret) - ret = err; - return ret; -} - -/* - * for a tree node or leaf, if it's shared, indeed we don't need to iterate it - * in every fs or file tree check. Here we find its all root ids, and only check - * it in the fs or file tree which has the smallest root id. - */ -static int need_check(struct btrfs_root *root, struct ulist *roots) -{ - struct rb_node *node; - struct ulist_node *u; - - /* - * @roots can be empty if it belongs to tree reloc tree - * In that case, we should always check the leaf, as we can't use - * the tree owner to ensure some other root will check it. - */ - if (roots->nnodes == 1 || roots->nnodes == 0) - return 1; - - node = rb_first(&roots->root); - u = rb_entry(node, struct ulist_node, rb_node); - /* - * current root id is not smallest, we skip it and let it be checked - * in the fs or file tree who hash the smallest root id. - */ - if (root->objectid != u->val) - return 0; - - return 1; -} - -static int calc_extent_flag_v2(struct btrfs_root *root, struct extent_buffer *eb, - u64 *flags_ret) -{ - struct btrfs_root *extent_root = root->fs_info->extent_root; - struct btrfs_root_item *ri = &root->root_item; - struct btrfs_extent_inline_ref *iref; - struct btrfs_extent_item *ei; - struct btrfs_key key; - struct btrfs_path *path = NULL; - unsigned long ptr; - unsigned long end; - u64 flags; - u64 owner = 0; - u64 offset; - int slot; - int type; - int ret = 0; - - /* - * Except file/reloc tree, we can not have FULL BACKREF MODE - */ - if (root->objectid < BTRFS_FIRST_FREE_OBJECTID) - goto normal; - - /* root node */ - if (eb->start == btrfs_root_bytenr(ri)) - goto normal; - - if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC)) - goto full_backref; - - owner = btrfs_header_owner(eb); - if (owner == root->objectid) - goto normal; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - - key.objectid = btrfs_header_bytenr(eb); - key.type = (u8)-1; - key.offset = (u64)-1; - - ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); - if (ret <= 0) { - ret = -EIO; - goto out; - } - - if (ret > 0) { - ret = btrfs_previous_extent_item(extent_root, path, - key.objectid); - if (ret) - goto full_backref; - - } - btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); - - eb = path->nodes[0]; - slot = path->slots[0]; - ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item); - - flags = btrfs_extent_flags(eb, ei); - if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) - goto full_backref; - - ptr = (unsigned long)(ei + 1); - end = (unsigned long)ei + btrfs_item_size_nr(eb, slot); - - if (key.type == BTRFS_EXTENT_ITEM_KEY) - ptr += sizeof(struct btrfs_tree_block_info); - -next: - /* Reached extent item ends normally */ - if (ptr == end) - goto full_backref; - - /* Beyond extent item end, wrong item size */ - if (ptr > end) { - error("extent item at bytenr %llu slot %d has wrong size", - eb->start, slot); - goto full_backref; - } - - iref = (struct btrfs_extent_inline_ref *)ptr; - offset = btrfs_extent_inline_ref_offset(eb, iref); - type = btrfs_extent_inline_ref_type(eb, iref); - - if (type == BTRFS_TREE_BLOCK_REF_KEY && offset == owner) - goto normal; - ptr += btrfs_extent_inline_ref_size(type); - goto next; - -normal: - *flags_ret &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF; - goto out; - -full_backref: - *flags_ret |= BTRFS_BLOCK_FLAG_FULL_BACKREF; -out: - btrfs_free_path(path); - return ret; -} - -/* - * for a tree node or leaf, we record its reference count, so later if we still - * process this node or leaf, don't need to compute its reference count again. - * - * @bytenr if @bytenr == (u64)-1, only update nrefs->full_backref[level] - */ -static int update_nodes_refs(struct btrfs_root *root, u64 bytenr, - struct extent_buffer *eb, struct node_refs *nrefs, - u64 level, int check_all) -{ - struct ulist *roots; - u64 refs = 0; - u64 flags = 0; - int root_level = btrfs_header_level(root->node); - int check; - int ret; - - if (nrefs->bytenr[level] == bytenr) - return 0; - - if (bytenr != (u64)-1) { - /* the return value of this function seems a mistake */ - ret = btrfs_lookup_extent_info(NULL, root, bytenr, - level, 1, &refs, &flags); - /* temporary fix */ - if (ret < 0 && !check_all) - return ret; - - nrefs->bytenr[level] = bytenr; - nrefs->refs[level] = refs; - nrefs->full_backref[level] = 0; - nrefs->checked[level] = 0; - - if (refs > 1) { - ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr, - 0, &roots); - if (ret) - return -EIO; - - check = need_check(root, roots); - ulist_free(roots); - nrefs->need_check[level] = check; - } else { - if (!check_all) { - nrefs->need_check[level] = 1; - } else { - if (level == root_level) { - nrefs->need_check[level] = 1; - } else { - /* - * The node refs may have not been - * updated if upper needs checking (the - * lowest root_objectid) the node can - * be checked. - */ - nrefs->need_check[level] = - nrefs->need_check[level + 1]; - } - } - } - } - - if (check_all && eb) { - calc_extent_flag_v2(root, eb, &flags); - if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) - nrefs->full_backref[level] = 1; - } - - return 0; -} - -/* - * @level if @level == -1 means extent data item - * else normal treeblocl. - */ -static int should_check_extent_strictly(struct btrfs_root *root, - struct node_refs *nrefs, int level) -{ - int root_level = btrfs_header_level(root->node); - - if (level > root_level || level < -1) - return 1; - if (level == root_level) - return 1; - /* - * if the upper node is marked full backref, it should contain shared - * backref of the parent (except owner == root->objectid). - */ - while (++level <= root_level) - if (nrefs->refs[level] > 1) - return 0; - - return 1; -} - static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, struct walk_control *wc, int *level, struct node_refs *nrefs) @@ -2024,436 +1708,80 @@ out: return err; } -/* - * Update global fs information. - */ -static void account_bytes(struct btrfs_root *root, struct btrfs_path *path, - int level) +static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path, + struct walk_control *wc, int *level) { - u32 free_nrs; - struct extent_buffer *eb = path->nodes[level]; - - total_btree_bytes += eb->len; - if (fs_root_objectid(root->objectid)) - total_fs_tree_bytes += eb->len; - if (btrfs_header_owner(eb) == BTRFS_EXTENT_TREE_OBJECTID) - total_extent_tree_bytes += eb->len; + int i; + struct extent_buffer *leaf; - if (level == 0) { - btree_space_waste += btrfs_leaf_free_space(root, eb); - } else { - free_nrs = (BTRFS_NODEPTRS_PER_BLOCK(root->fs_info) - - btrfs_header_nritems(eb)); - btree_space_waste += free_nrs * sizeof(struct btrfs_key_ptr); + for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { + leaf = path->nodes[i]; + if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) { + path->slots[i]++; + *level = i; + return 0; + } else { + free_extent_buffer(path->nodes[*level]); + path->nodes[*level] = NULL; + BUG_ON(*level > wc->active_node); + if (*level == wc->active_node) + leave_shared_node(root, wc, *level); + *level = i + 1; + } } + return 1; } -/* - * This function only handles BACKREF_MISSING, - * If corresponding extent item exists, increase the ref, else insert an extent - * item and backref. - * - * Returns error bits after repair. - */ -static int repair_tree_block_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct extent_buffer *node, - struct node_refs *nrefs, int level, int err) +static int check_root_dir(struct inode_record *rec) { - struct btrfs_fs_info *fs_info = root->fs_info; - struct btrfs_root *extent_root = fs_info->extent_root; - struct btrfs_path path; - struct btrfs_extent_item *ei; - struct btrfs_tree_block_info *bi; - struct btrfs_key key; - struct extent_buffer *eb; - u32 size = sizeof(*ei); - u32 node_size = root->fs_info->nodesize; - int insert_extent = 0; - int skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA); - int root_level = btrfs_header_level(root->node); - int generation; - int ret; - u64 owner; - u64 bytenr; - u64 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK; - u64 parent = 0; - - if ((err & BACKREF_MISSING) == 0) - return err; + struct inode_backref *backref; + int ret = -1; - WARN_ON(level > BTRFS_MAX_LEVEL); - WARN_ON(level < 0); + if (!rec->found_inode_item || rec->errors) + goto out; + if (rec->nlink != 1 || rec->found_link != 0) + goto out; + if (list_empty(&rec->backrefs)) + goto out; + backref = to_inode_backref(rec->backrefs.next); + if (!backref->found_inode_ref) + goto out; + if (backref->index != 0 || backref->namelen != 2 || + memcmp(backref->name, "..", 2)) + goto out; + if (backref->found_dir_index || backref->found_dir_item) + goto out; + ret = 0; +out: + return ret; +} - btrfs_init_path(&path); - bytenr = btrfs_header_bytenr(node); - owner = btrfs_header_owner(node); - generation = btrfs_header_generation(node); +static int repair_inode_isize(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + struct inode_record *rec) +{ + struct btrfs_inode_item *ei; + struct btrfs_key key; + int ret; - key.objectid = bytenr; - key.type = (u8)-1; + key.objectid = rec->ino; + key.type = BTRFS_INODE_ITEM_KEY; key.offset = (u64)-1; - /* Search for the extent item */ - ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); - if (ret <= 0) { - ret = -EIO; - goto out; - } - - ret = btrfs_previous_extent_item(extent_root, &path, bytenr); - if (ret) - insert_extent = 1; - - /* calculate if the extent item flag is full backref or not */ - if (nrefs->full_backref[level] != 0) - flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; - - /* insert an extent item */ - if (insert_extent) { - struct btrfs_disk_key copy_key; - - generation = btrfs_header_generation(node); - - if (level < root_level && nrefs->full_backref[level + 1] && - owner != root->objectid) { - flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; - } - - key.objectid = bytenr; - if (!skinny_metadata) { - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = node_size; - size += sizeof(*bi); - } else { - key.type = BTRFS_METADATA_ITEM_KEY; - key.offset = level; - } - - btrfs_release_path(&path); - ret = btrfs_insert_empty_item(trans, extent_root, &path, &key, - size); - if (ret) - goto out; - - eb = path.nodes[0]; - ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item); - - btrfs_set_extent_refs(eb, ei, 0); - btrfs_set_extent_generation(eb, ei, generation); - btrfs_set_extent_flags(eb, ei, flags); - - if (!skinny_metadata) { - bi = (struct btrfs_tree_block_info *)(ei + 1); - memset_extent_buffer(eb, 0, (unsigned long)bi, - sizeof(*bi)); - btrfs_set_disk_key_objectid(©_key, root->objectid); - btrfs_set_disk_key_type(©_key, 0); - btrfs_set_disk_key_offset(©_key, 0); - - btrfs_set_tree_block_level(eb, bi, level); - btrfs_set_tree_block_key(eb, bi, ©_key); - } - btrfs_mark_buffer_dirty(eb); - printf("Added an extent item [%llu %u]\n", bytenr, node_size); - btrfs_update_block_group(extent_root, bytenr, node_size, 1, 0); - - nrefs->refs[level] = 0; - nrefs->full_backref[level] = - flags & BTRFS_BLOCK_FLAG_FULL_BACKREF; - btrfs_release_path(&path); - } - - if (level < root_level && nrefs->full_backref[level + 1] && - owner != root->objectid) - parent = nrefs->bytenr[level + 1]; - - /* increase the ref */ - ret = btrfs_inc_extent_ref(trans, extent_root, bytenr, node_size, - parent, root->objectid, level, 0); - - nrefs->refs[level]++; -out: - btrfs_release_path(&path); - if (ret) { - error( - "failed to repair tree block ref start %llu root %llu due to %s", - bytenr, root->objectid, strerror(-ret)); - } else { - printf("Added one tree block ref start %llu %s %llu\n", - bytenr, parent ? "parent" : "root", - parent ? parent : root->objectid); - err &= ~BACKREF_MISSING; - } - - return err; -} - -static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path, - unsigned int ext_ref); -static int check_tree_block_ref(struct btrfs_root *root, - struct extent_buffer *eb, u64 bytenr, - int level, u64 owner, struct node_refs *nrefs); -static int check_leaf_items(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_path *path, - struct node_refs *nrefs, int account_bytes); - -/* - * @trans just for lowmem repair mode - * @check all if not 0 then check all tree block backrefs and items - * 0 then just check relationship of items in fs tree(s) - * - * Returns >0 Found error, should continue - * Returns <0 Fatal error, must exit the whole check - * Returns 0 No errors found - */ -static int walk_down_tree_v2(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_path *path, - int *level, struct node_refs *nrefs, int ext_ref, - int check_all) - -{ - enum btrfs_tree_block_status status; - u64 bytenr; - u64 ptr_gen; - struct btrfs_fs_info *fs_info = root->fs_info; - struct extent_buffer *next; - struct extent_buffer *cur; - int ret; - int err = 0; - int check; - int account_file_data = 0; - - WARN_ON(*level < 0); - WARN_ON(*level >= BTRFS_MAX_LEVEL); - - ret = update_nodes_refs(root, btrfs_header_bytenr(path->nodes[*level]), - path->nodes[*level], nrefs, *level, check_all); - if (ret < 0) - return ret; - - while (*level >= 0) { - WARN_ON(*level < 0); - WARN_ON(*level >= BTRFS_MAX_LEVEL); - cur = path->nodes[*level]; - bytenr = btrfs_header_bytenr(cur); - check = nrefs->need_check[*level]; - - if (btrfs_header_level(cur) != *level) - WARN_ON(1); - /* - * Update bytes accounting and check tree block ref - * NOTE: Doing accounting and check before checking nritems - * is necessary because of empty node/leaf. - */ - if ((check_all && !nrefs->checked[*level]) || - (!check_all && nrefs->need_check[*level])) { - ret = check_tree_block_ref(root, cur, - btrfs_header_bytenr(cur), btrfs_header_level(cur), - btrfs_header_owner(cur), nrefs); - - if (repair && ret) - ret = repair_tree_block_ref(trans, root, - path->nodes[*level], nrefs, *level, ret); - err |= ret; - - if (check_all && nrefs->need_check[*level] && - nrefs->refs[*level]) { - account_bytes(root, path, *level); - account_file_data = 1; - } - nrefs->checked[*level] = 1; - } - - if (path->slots[*level] >= btrfs_header_nritems(cur)) - break; - - /* Don't forgot to check leaf/node validation */ - if (*level == 0) { - /* skip duplicate check */ - if (check || !check_all) { - ret = btrfs_check_leaf(root, NULL, cur); - if (ret != BTRFS_TREE_BLOCK_CLEAN) { - err |= -EIO; - break; - } - } - - ret = 0; - if (!check_all) - ret = process_one_leaf_v2(root, path, nrefs, - level, ext_ref); - else - ret = check_leaf_items(trans, root, path, - nrefs, account_file_data); - err |= ret; - break; - } else { - if (check || !check_all) { - ret = btrfs_check_node(root, NULL, cur); - if (ret != BTRFS_TREE_BLOCK_CLEAN) { - err |= -EIO; - break; - } - } - } - - bytenr = btrfs_node_blockptr(cur, path->slots[*level]); - ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); - - ret = update_nodes_refs(root, bytenr, NULL, nrefs, *level - 1, - check_all); - if (ret < 0) - break; - /* - * check all trees in check_chunks_and_extent_v2 - * check shared node once in check_fs_roots - */ - if (!check_all && !nrefs->need_check[*level - 1]) { - path->slots[*level]++; - continue; - } - - next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize); - if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { - free_extent_buffer(next); - reada_walk_down(root, cur, path->slots[*level]); - next = read_tree_block(fs_info, bytenr, ptr_gen); - if (!extent_buffer_uptodate(next)) { - struct btrfs_key node_key; - - btrfs_node_key_to_cpu(path->nodes[*level], - &node_key, - path->slots[*level]); - btrfs_add_corrupt_extent_record(fs_info, - &node_key, path->nodes[*level]->start, - fs_info->nodesize, *level); - err |= -EIO; - break; - } - } - - ret = check_child_node(cur, path->slots[*level], next); - err |= ret; - if (ret < 0) - break; - - if (btrfs_is_leaf(next)) - status = btrfs_check_leaf(root, NULL, next); - else - status = btrfs_check_node(root, NULL, next); - if (status != BTRFS_TREE_BLOCK_CLEAN) { - free_extent_buffer(next); - err |= -EIO; - break; - } - - *level = *level - 1; - free_extent_buffer(path->nodes[*level]); - path->nodes[*level] = next; - path->slots[*level] = 0; - account_file_data = 0; - - update_nodes_refs(root, (u64)-1, next, nrefs, *level, check_all); - } - return err; -} - -static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path, - struct walk_control *wc, int *level) -{ - int i; - struct extent_buffer *leaf; - - for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { - leaf = path->nodes[i]; - if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) { - path->slots[i]++; - *level = i; - return 0; - } else { - free_extent_buffer(path->nodes[*level]); - path->nodes[*level] = NULL; - BUG_ON(*level > wc->active_node); - if (*level == wc->active_node) - leave_shared_node(root, wc, *level); - *level = i + 1; - } - } - return 1; -} - -static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path, - int *level) -{ - int i; - struct extent_buffer *leaf; - - for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { - leaf = path->nodes[i]; - if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) { - path->slots[i]++; - *level = i; - return 0; - } else { - free_extent_buffer(path->nodes[*level]); - path->nodes[*level] = NULL; - *level = i + 1; - } - } - return 1; -} - -static int check_root_dir(struct inode_record *rec) -{ - struct inode_backref *backref; - int ret = -1; - - if (!rec->found_inode_item || rec->errors) - goto out; - if (rec->nlink != 1 || rec->found_link != 0) - goto out; - if (list_empty(&rec->backrefs)) - goto out; - backref = to_inode_backref(rec->backrefs.next); - if (!backref->found_inode_ref) - goto out; - if (backref->index != 0 || backref->namelen != 2 || - memcmp(backref->name, "..", 2)) - goto out; - if (backref->found_dir_index || backref->found_dir_item) - goto out; - ret = 0; -out: - return ret; -} - -static int repair_inode_isize(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_path *path, - struct inode_record *rec) -{ - struct btrfs_inode_item *ei; - struct btrfs_key key; - int ret; - - key.objectid = rec->ino; - key.type = BTRFS_INODE_ITEM_KEY; - key.offset = (u64)-1; - - ret = btrfs_search_slot(trans, root, &key, path, 0, 1); - if (ret < 0) - goto out; - if (ret) { - if (!path->slots[0]) { - ret = -ENOENT; - goto out; - } - path->slots[0]--; - ret = 0; - } - btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); - if (key.objectid != rec->ino) { - ret = -ENOENT; + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret < 0) + goto out; + if (ret) { + if (!path->slots[0]) { + ret = -ENOENT; + goto out; + } + path->slots[0]--; + ret = 0; + } + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + if (key.objectid != rec->ino) { + ret = -ENOENT; goto out; } @@ -2618,15 +1946,6 @@ static int delete_dir_index(struct btrfs_root *root, return ret; } -static int create_inode_item_lowmem(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 ino, - u8 filetype) -{ - u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755; - - return insert_inode_item(trans, root, ino, 0, 0, 0, mode); -} - static int create_inode_item(struct btrfs_root *root, struct inode_record *rec, int root_dir) { @@ -4078,8731 +3397,4857 @@ out: return err; } -/* - * Find the @index according by @ino and name. - * Notice:time efficiency is O(N) - * - * @root: the root of the fs/file tree - * @index_ret: the index as return value - * @namebuf: the name to match - * @name_len: the length of name to match - * @file_type: the file_type of INODE_ITEM to match - * - * Returns 0 if found and *@index_ret will be modified with right value - * Returns< 0 not found and *@index_ret will be (u64)-1 - */ -static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id, - u64 *index_ret, char *namebuf, u32 name_len, - u8 file_type) +static struct tree_backref *find_tree_backref(struct extent_record *rec, + u64 parent, u64 root) { - struct btrfs_path path; - struct extent_buffer *node; - struct btrfs_dir_item *di; - struct btrfs_key key; - struct btrfs_key location; - char name[BTRFS_NAME_LEN] = {0}; + struct rb_node *node; + struct tree_backref *back = NULL; + struct tree_backref match = { + .node = { + .is_data = 0, + }, + }; - u32 total; - u32 cur = 0; - u32 len; - u32 data_len; - u8 filetype; - int slot; - int ret; + if (parent) { + match.parent = parent; + match.node.full_backref = 1; + } else { + match.root = root; + } - ASSERT(index_ret); + node = rb_search(&rec->backref_tree, &match.node.node, + (rb_compare_keys)compare_extent_backref, NULL); + if (node) + back = to_tree_backref(rb_node_to_extent_backref(node)); - /* search from the last index */ - key.objectid = dirid; - key.offset = (u64)-1; - key.type = BTRFS_DIR_INDEX_KEY; + return back; +} - btrfs_init_path(&path); - ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); - if (ret < 0) - return ret; +static struct data_backref *find_data_backref(struct extent_record *rec, + u64 parent, u64 root, + u64 owner, u64 offset, + int found_ref, + u64 disk_bytenr, u64 bytes) +{ + struct rb_node *node; + struct data_backref *back = NULL; + struct data_backref match = { + .node = { + .is_data = 1, + }, + .owner = owner, + .offset = offset, + .bytes = bytes, + .found_ref = found_ref, + .disk_bytenr = disk_bytenr, + }; -loop: - ret = btrfs_previous_item(root, &path, dirid, BTRFS_DIR_INDEX_KEY); - if (ret) { - ret = -ENOENT; - *index_ret = (64)-1; - goto out; + if (parent) { + match.parent = parent; + match.node.full_backref = 1; + } else { + match.root = root; } - /* Check whether inode_id/filetype/name match */ - node = path.nodes[0]; - slot = path.slots[0]; - di = btrfs_item_ptr(node, slot, struct btrfs_dir_item); - total = btrfs_item_size_nr(node, slot); - while (cur < total) { - ret = -ENOENT; - len = btrfs_dir_name_len(node, di); - data_len = btrfs_dir_data_len(node, di); - - btrfs_dir_item_key_to_cpu(node, di, &location); - if (location.objectid != location_id || - location.type != BTRFS_INODE_ITEM_KEY || - location.offset != 0) - goto next; - filetype = btrfs_dir_type(node, di); - if (file_type != filetype) - goto next; + node = rb_search(&rec->backref_tree, &match.node.node, + (rb_compare_keys)compare_extent_backref, NULL); + if (node) + back = to_data_backref(rb_node_to_extent_backref(node)); - if (len > BTRFS_NAME_LEN) - len = BTRFS_NAME_LEN; + return back; +} - read_extent_buffer(node, name, (unsigned long)(di + 1), len); - if (len != name_len || strncmp(namebuf, name, len)) - goto next; +static int do_check_fs_roots(struct btrfs_fs_info *fs_info, + struct cache_tree *root_cache) +{ + int ret; - btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); - *index_ret = key.offset; - ret = 0; - goto out; -next: - len += sizeof(*di) + data_len; - di = (struct btrfs_dir_item *)((char *)di + len); - cur += len; - } - goto loop; + if (!ctx.progress_enabled) + fprintf(stderr, "checking fs roots\n"); + if (check_mode == CHECK_MODE_LOWMEM) + ret = check_fs_roots_v2(fs_info); + else + ret = check_fs_roots(fs_info, root_cache); -out: - btrfs_release_path(&path); return ret; } -/* - * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified - * INODE_REF/INODE_EXTREF match. - * - * @root: the root of the fs/file tree - * @key: the key of the DIR_ITEM/DIR_INDEX, key->offset will be right - * value while find index - * @location_key: location key of the struct btrfs_dir_item to match - * @name: the name to match - * @namelen: the length of name - * @file_type: the type of file to math - * - * Return 0 if no error occurred. - * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find - * DIR_ITEM/DIR_INDEX - * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF - * and DIR_ITEM/DIR_INDEX mismatch - */ -static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key, - struct btrfs_key *location_key, char *name, - u32 namelen, u8 file_type) +static int all_backpointers_checked(struct extent_record *rec, int print_errs) { - struct btrfs_path path; - struct extent_buffer *node; - struct btrfs_dir_item *di; - struct btrfs_key location; - char namebuf[BTRFS_NAME_LEN] = {0}; - u32 total; - u32 cur = 0; - u32 len; - u32 data_len; - u8 filetype; - int slot; - int ret; + struct extent_backref *back, *tmp; + struct tree_backref *tback; + struct data_backref *dback; + u64 found = 0; + int err = 0; - /* get the index by traversing all index */ - if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) { - ret = find_dir_index(root, key->objectid, - location_key->objectid, &key->offset, - name, namelen, file_type); - if (ret) - ret = DIR_INDEX_MISSING; - return ret; - } + rbtree_postorder_for_each_entry_safe(back, tmp, + &rec->backref_tree, node) { + if (!back->found_extent_tree) { + err = 1; + if (!print_errs) + goto out; + if (back->is_data) { + dback = to_data_backref(back); + fprintf(stderr, "Data backref %llu %s %llu" + " owner %llu offset %llu num_refs %lu" + " not found in extent tree\n", + (unsigned long long)rec->start, + back->full_backref ? + "parent" : "root", + back->full_backref ? + (unsigned long long)dback->parent: + (unsigned long long)dback->root, + (unsigned long long)dback->owner, + (unsigned long long)dback->offset, + (unsigned long)dback->num_refs); + } else { + tback = to_tree_backref(back); + fprintf(stderr, "Tree backref %llu parent %llu" + " root %llu not found in extent tree\n", + (unsigned long long)rec->start, + (unsigned long long)tback->parent, + (unsigned long long)tback->root); + } + } + if (!back->is_data && !back->found_ref) { + err = 1; + if (!print_errs) + goto out; + tback = to_tree_backref(back); + fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n", + (unsigned long long)rec->start, + back->full_backref ? "parent" : "root", + back->full_backref ? + (unsigned long long)tback->parent : + (unsigned long long)tback->root, back); + } + if (back->is_data) { + dback = to_data_backref(back); + if (dback->found_ref != dback->num_refs) { + err = 1; + if (!print_errs) + goto out; + fprintf(stderr, "Incorrect local backref count" + " on %llu %s %llu owner %llu" + " offset %llu found %u wanted %u back %p\n", + (unsigned long long)rec->start, + back->full_backref ? + "parent" : "root", + back->full_backref ? + (unsigned long long)dback->parent: + (unsigned long long)dback->root, + (unsigned long long)dback->owner, + (unsigned long long)dback->offset, + dback->found_ref, dback->num_refs, back); + } + if (dback->disk_bytenr != rec->start) { + err = 1; + if (!print_errs) + goto out; + fprintf(stderr, "Backref disk bytenr does not" + " match extent record, bytenr=%llu, " + "ref bytenr=%llu\n", + (unsigned long long)rec->start, + (unsigned long long)dback->disk_bytenr); + } - btrfs_init_path(&path); - ret = btrfs_search_slot(NULL, root, key, &path, 0, 0); - if (ret) { - ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING : - DIR_INDEX_MISSING; - goto out; + if (dback->bytes != rec->nr) { + err = 1; + if (!print_errs) + goto out; + fprintf(stderr, "Backref bytes do not match " + "extent backref, bytenr=%llu, ref " + "bytes=%llu, backref bytes=%llu\n", + (unsigned long long)rec->start, + (unsigned long long)rec->nr, + (unsigned long long)dback->bytes); + } + } + if (!back->is_data) { + found += 1; + } else { + dback = to_data_backref(back); + found += dback->found_ref; + } } - - /* Check whether inode_id/filetype/name match */ - node = path.nodes[0]; - slot = path.slots[0]; - di = btrfs_item_ptr(node, slot, struct btrfs_dir_item); - total = btrfs_item_size_nr(node, slot); - while (cur < total) { - ret = key->type == BTRFS_DIR_ITEM_KEY ? - DIR_ITEM_MISMATCH : DIR_INDEX_MISMATCH; - - len = btrfs_dir_name_len(node, di); - data_len = btrfs_dir_data_len(node, di); - - btrfs_dir_item_key_to_cpu(node, di, &location); - if (location.objectid != location_key->objectid || - location.type != location_key->type || - location.offset != location_key->offset) - goto next; - - filetype = btrfs_dir_type(node, di); - if (file_type != filetype) - goto next; - - if (len > BTRFS_NAME_LEN) { - len = BTRFS_NAME_LEN; - warning("root %llu %s[%llu %llu] name too long %u, trimmed", - root->objectid, - key->type == BTRFS_DIR_ITEM_KEY ? - "DIR_ITEM" : "DIR_INDEX", - key->objectid, key->offset, len); - } - read_extent_buffer(node, namebuf, (unsigned long)(di + 1), - len); - if (len != namelen || strncmp(namebuf, name, len)) - goto next; - - ret = 0; - goto out; -next: - len += sizeof(*di) + data_len; - di = (struct btrfs_dir_item *)((char *)di + len); - cur += len; + if (found != rec->refs) { + err = 1; + if (!print_errs) + goto out; + fprintf(stderr, "Incorrect global backref count " + "on %llu found %llu wanted %llu\n", + (unsigned long long)rec->start, + (unsigned long long)found, + (unsigned long long)rec->refs); } - out: - btrfs_release_path(&path); - return ret; + return err; } -/* - * Prints inode ref error message - */ -static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key, - u64 index, const char *namebuf, int name_len, - u8 filetype, int err) +static void __free_one_backref(struct rb_node *node) { - if (!err) - return; - - /* root dir error */ - if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) { - error( - "root %llu root dir shouldn't have INODE REF[%llu %llu] name %s", - root->objectid, key->objectid, key->offset, namebuf); - return; - } + struct extent_backref *back = rb_node_to_extent_backref(node); - /* normal error */ - if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) - error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u", - root->objectid, key->offset, - btrfs_name_hash(namebuf, name_len), - err & DIR_ITEM_MISMATCH ? "mismatch" : "missing", - namebuf, filetype); - if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) - error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u", - root->objectid, key->offset, index, - err & DIR_ITEM_MISMATCH ? "mismatch" : "missing", - namebuf, filetype); + free(back); } -/* - * Insert the missing inode item. - * - * Returns 0 means success. - * Returns <0 means error. - */ -static int repair_inode_item_missing(struct btrfs_root *root, u64 ino, - u8 filetype) +static void free_all_extent_backrefs(struct extent_record *rec) { - struct btrfs_key key; - struct btrfs_trans_handle *trans; - struct btrfs_path path; - int ret; - - key.objectid = ino; - key.type = BTRFS_INODE_ITEM_KEY; - key.offset = 0; + rb_free_nodes(&rec->backref_tree, __free_one_backref); +} - btrfs_init_path(&path); - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - ret = -EIO; - goto out; - } +static void free_extent_record_cache(struct cache_tree *extent_cache) +{ + struct cache_extent *cache; + struct extent_record *rec; - ret = btrfs_search_slot(trans, root, &key, &path, 1, 1); - if (ret < 0 || !ret) - goto fail; - - /* insert inode item */ - create_inode_item_lowmem(trans, root, ino, filetype); - ret = 0; -fail: - btrfs_commit_transaction(trans, root); -out: - if (ret) - error("failed to repair root %llu INODE ITEM[%llu] missing", - root->objectid, ino); - btrfs_release_path(&path); - return ret; + while (1) { + cache = first_cache_extent(extent_cache); + if (!cache) + break; + rec = container_of(cache, struct extent_record, cache); + remove_cache_extent(extent_cache, cache); + free_all_extent_backrefs(rec); + free(rec); + } } -/* - * The ternary means dir item, dir index and relative inode ref. - * The function handles errs: INODE_MISSING, DIR_INDEX_MISSING - * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow - * strategy: - * If two of three is missing or mismatched, delete the existing one. - * If one of three is missing or mismatched, add the missing one. - * - * returns 0 means success. - * returns not 0 means on error; - */ -int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino, - u64 index, char *name, int name_len, u8 filetype, - int err) +static int maybe_free_extent_rec(struct cache_tree *extent_cache, + struct extent_record *rec) { - struct btrfs_trans_handle *trans; - int stage = 0; - int ret = 0; - - /* - * stage shall be one of following valild values: - * 0: Fine, nothing to do. - * 1: One of three is wrong, so add missing one. - * 2: Two of three is wrong, so delete existed one. - */ - if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) - stage++; - if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) - stage++; - if (err & (INODE_REF_MISSING)) - stage++; - - /* stage must be smllarer than 3 */ - ASSERT(stage < 3); - - trans = btrfs_start_transaction(root, 1); - if (stage == 2) { - ret = btrfs_unlink(trans, root, ino, dir_ino, index, name, - name_len, 0); - goto out; - } - if (stage == 1) { - ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len, - filetype, &index, 1, 1); - goto out; + if (rec->content_checked && rec->owner_ref_checked && + rec->extent_item_refs == rec->refs && rec->refs > 0 && + rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) && + !rec->bad_full_backref && !rec->crossing_stripes && + !rec->wrong_chunk_type) { + remove_cache_extent(extent_cache, &rec->cache); + free_all_extent_backrefs(rec); + list_del_init(&rec->list); + free(rec); } -out: - btrfs_commit_transaction(trans, root); - - if (ret) - error("fail to repair inode %llu name %s filetype %u", - ino, name, filetype); - else - printf("%s ref/dir_item of inode %llu name %s filetype %u\n", - stage == 2 ? "Delete" : "Add", - ino, name, filetype); - - return ret; + return 0; } -/* - * Traverse the given INODE_REF and call find_dir_item() to find related - * DIR_ITEM/DIR_INDEX. - * - * @root: the root of the fs/file tree - * @ref_key: the key of the INODE_REF - * @path the path provides node and slot - * @refs: the count of INODE_REF - * @mode: the st_mode of INODE_ITEM - * @name_ret: returns with the first ref's name - * @name_len_ret: len of the name_ret - * - * Return 0 if no error occurred. - */ -static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key, - struct btrfs_path *path, char *name_ret, - u32 *namelen_ret, u64 *refs_ret, int mode) +static int check_owner_ref(struct btrfs_root *root, + struct extent_record *rec, + struct extent_buffer *buf) { + struct extent_backref *node, *tmp; + struct tree_backref *back; + struct btrfs_root *ref_root; struct btrfs_key key; - struct btrfs_key location; - struct btrfs_inode_ref *ref; - struct extent_buffer *node; - char namebuf[BTRFS_NAME_LEN] = {0}; - u32 total; - u32 cur = 0; - u32 len; - u32 name_len; - u64 index; + struct btrfs_path path; + struct extent_buffer *parent; + int level; + int found = 0; int ret; - int err = 0; - int tmp_err; - int slot; - int need_research = 0; - u64 refs; -begin: - err = 0; - cur = 0; - refs = *refs_ret; - - /* since after repair, path and the dir item may be changed */ - if (need_research) { - need_research = 0; - btrfs_release_path(path); - ret = btrfs_search_slot(NULL, root, ref_key, path, 0, 0); - /* the item was deleted, let path point to the last checked item */ - if (ret > 0) { - if (path->slots[0] == 0) - btrfs_prev_leaf(root, path); - else - path->slots[0]--; - } - if (ret) - goto out; + rbtree_postorder_for_each_entry_safe(node, tmp, + &rec->backref_tree, node) { + if (node->is_data) + continue; + if (!node->found_ref) + continue; + if (node->full_backref) + continue; + back = to_tree_backref(node); + if (btrfs_header_owner(buf) == back->root) + return 0; } + BUG_ON(rec->is_root); - location.objectid = ref_key->objectid; - location.type = BTRFS_INODE_ITEM_KEY; - location.offset = 0; - node = path->nodes[0]; - slot = path->slots[0]; + /* try to find the block by search corresponding fs tree */ + key.objectid = btrfs_header_owner(buf); + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; - memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf)); - ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref); - total = btrfs_item_size_nr(node, slot); + ref_root = btrfs_read_fs_root(root->fs_info, &key); + if (IS_ERR(ref_root)) + return 1; -next: - /* Update inode ref count */ - refs++; - tmp_err = 0; - index = btrfs_inode_ref_index(node, ref); - name_len = btrfs_inode_ref_name_len(node, ref); + level = btrfs_header_level(buf); + if (level == 0) + btrfs_item_key_to_cpu(buf, &key, 0); + else + btrfs_node_key_to_cpu(buf, &key, 0); - if (name_len <= BTRFS_NAME_LEN) { - len = name_len; - } else { - len = BTRFS_NAME_LEN; - warning("root %llu INODE_REF[%llu %llu] name too long", - root->objectid, ref_key->objectid, ref_key->offset); - } + btrfs_init_path(&path); + path.lowest_level = level + 1; + ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0); + if (ret < 0) + return 0; - read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len); + parent = path.nodes[level + 1]; + if (parent && buf->start == btrfs_node_blockptr(parent, + path.slots[level + 1])) + found = 1; - /* copy the first name found to name_ret */ - if (refs == 1 && name_ret) { - memcpy(name_ret, namebuf, len); - *namelen_ret = len; - } + btrfs_release_path(&path); + return found ? 0 : 1; +} - /* Check root dir ref */ - if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) { - if (index != 0 || len != strlen("..") || - strncmp("..", namebuf, len) || - ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) { - /* set err bits then repair will delete the ref */ - err |= DIR_INDEX_MISSING; - err |= DIR_ITEM_MISSING; - } - goto end; - } +static int is_extent_tree_record(struct extent_record *rec) +{ + struct extent_backref *node, *tmp; + struct tree_backref *back; + int is_extent = 0; - /* Find related DIR_INDEX */ - key.objectid = ref_key->offset; - key.type = BTRFS_DIR_INDEX_KEY; - key.offset = index; - tmp_err |= find_dir_item(root, &key, &location, namebuf, len, - imode_to_type(mode)); - - /* Find related dir_item */ - key.objectid = ref_key->offset; - key.type = BTRFS_DIR_ITEM_KEY; - key.offset = btrfs_name_hash(namebuf, len); - tmp_err |= find_dir_item(root, &key, &location, namebuf, len, - imode_to_type(mode)); -end: - if (tmp_err && repair) { - ret = repair_ternary_lowmem(root, ref_key->offset, - ref_key->objectid, index, namebuf, - name_len, imode_to_type(mode), - tmp_err); - if (!ret) { - need_research = 1; - goto begin; - } + rbtree_postorder_for_each_entry_safe(node, tmp, + &rec->backref_tree, node) { + if (node->is_data) + return 0; + back = to_tree_backref(node); + if (node->full_backref) + return 0; + if (back->root == BTRFS_EXTENT_TREE_OBJECTID) + is_extent = 1; } - print_inode_ref_err(root, ref_key, index, namebuf, name_len, - imode_to_type(mode), tmp_err); - err |= tmp_err; - len = sizeof(*ref) + name_len; - ref = (struct btrfs_inode_ref *)((char *)ref + len); - cur += len; - if (cur < total) - goto next; - -out: - *refs_ret = refs; - return err; + return is_extent; } -/* - * Traverse the given INODE_EXTREF and call find_dir_item() to find related - * DIR_ITEM/DIR_INDEX. - * - * @root: the root of the fs/file tree - * @ref_key: the key of the INODE_EXTREF - * @refs: the count of INODE_EXTREF - * @mode: the st_mode of INODE_ITEM - * - * Return 0 if no error occurred. - */ -static int check_inode_extref(struct btrfs_root *root, - struct btrfs_key *ref_key, - struct extent_buffer *node, int slot, u64 *refs, - int mode) + +static int record_bad_block_io(struct btrfs_fs_info *info, + struct cache_tree *extent_cache, + u64 start, u64 len) { + struct extent_record *rec; + struct cache_extent *cache; struct btrfs_key key; - struct btrfs_key location; - struct btrfs_inode_extref *extref; - char namebuf[BTRFS_NAME_LEN] = {0}; - u32 total; - u32 cur = 0; - u32 len; - u32 name_len; - u64 index; - u64 parent; - int ret; - int err = 0; - location.objectid = ref_key->objectid; - location.type = BTRFS_INODE_ITEM_KEY; - location.offset = 0; + cache = lookup_cache_extent(extent_cache, start, len); + if (!cache) + return 0; + + rec = container_of(cache, struct extent_record, cache); + if (!is_extent_tree_record(rec)) + return 0; - extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref); - total = btrfs_item_size_nr(node, slot); + btrfs_disk_key_to_cpu(&key, &rec->parent_key); + return btrfs_add_corrupt_extent_record(info, &key, start, len, 0); +} -next: - /* update inode ref count */ - (*refs)++; - name_len = btrfs_inode_extref_name_len(node, extref); - index = btrfs_inode_extref_index(node, extref); - parent = btrfs_inode_extref_parent(node, extref); - if (name_len <= BTRFS_NAME_LEN) { - len = name_len; +static int swap_values(struct btrfs_root *root, struct btrfs_path *path, + struct extent_buffer *buf, int slot) +{ + if (btrfs_header_level(buf)) { + struct btrfs_key_ptr ptr1, ptr2; + + read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot), + sizeof(struct btrfs_key_ptr)); + read_extent_buffer(buf, &ptr2, + btrfs_node_key_ptr_offset(slot + 1), + sizeof(struct btrfs_key_ptr)); + write_extent_buffer(buf, &ptr1, + btrfs_node_key_ptr_offset(slot + 1), + sizeof(struct btrfs_key_ptr)); + write_extent_buffer(buf, &ptr2, + btrfs_node_key_ptr_offset(slot), + sizeof(struct btrfs_key_ptr)); + if (slot == 0) { + struct btrfs_disk_key key; + btrfs_node_key(buf, &key, 0); + btrfs_fixup_low_keys(root, path, &key, + btrfs_header_level(buf) + 1); + } } else { - len = BTRFS_NAME_LEN; - warning("root %llu INODE_EXTREF[%llu %llu] name too long", - root->objectid, ref_key->objectid, ref_key->offset); - } - read_extent_buffer(node, namebuf, (unsigned long)(extref + 1), len); + struct btrfs_item *item1, *item2; + struct btrfs_key k1, k2; + char *item1_data, *item2_data; + u32 item1_offset, item2_offset, item1_size, item2_size; - /* Check root dir ref name */ - if (index == 0 && strncmp(namebuf, "..", name_len)) { - error("root %llu INODE_EXTREF[%llu %llu] ROOT_DIR name shouldn't be %s", - root->objectid, ref_key->objectid, ref_key->offset, - namebuf); - err |= ROOT_DIR_ERROR; - } + item1 = btrfs_item_nr(slot); + item2 = btrfs_item_nr(slot + 1); + btrfs_item_key_to_cpu(buf, &k1, slot); + btrfs_item_key_to_cpu(buf, &k2, slot + 1); + item1_offset = btrfs_item_offset(buf, item1); + item2_offset = btrfs_item_offset(buf, item2); + item1_size = btrfs_item_size(buf, item1); + item2_size = btrfs_item_size(buf, item2); - /* find related dir_index */ - key.objectid = parent; - key.type = BTRFS_DIR_INDEX_KEY; - key.offset = index; - ret = find_dir_item(root, &key, &location, namebuf, len, mode); - err |= ret; + item1_data = malloc(item1_size); + if (!item1_data) + return -ENOMEM; + item2_data = malloc(item2_size); + if (!item2_data) { + free(item1_data); + return -ENOMEM; + } - /* find related dir_item */ - key.objectid = parent; - key.type = BTRFS_DIR_ITEM_KEY; - key.offset = btrfs_name_hash(namebuf, len); - ret = find_dir_item(root, &key, &location, namebuf, len, mode); - err |= ret; + read_extent_buffer(buf, item1_data, item1_offset, item1_size); + read_extent_buffer(buf, item2_data, item2_offset, item2_size); - len = sizeof(*extref) + name_len; - extref = (struct btrfs_inode_extref *)((char *)extref + len); - cur += len; + write_extent_buffer(buf, item1_data, item2_offset, item2_size); + write_extent_buffer(buf, item2_data, item1_offset, item1_size); + free(item1_data); + free(item2_data); - if (cur < total) - goto next; + btrfs_set_item_offset(buf, item1, item2_offset); + btrfs_set_item_offset(buf, item2, item1_offset); + btrfs_set_item_size(buf, item1, item2_size); + btrfs_set_item_size(buf, item2, item1_size); - return err; + path->slots[0] = slot; + btrfs_set_item_key_unsafe(root, path, &k2); + path->slots[0] = slot + 1; + btrfs_set_item_key_unsafe(root, path, &k1); + } + return 0; } -/* - * Find INODE_REF/INODE_EXTREF for the given key and check it with the specified - * DIR_ITEM/DIR_INDEX match. - * Return with @index_ret. - * - * @root: the root of the fs/file tree - * @key: the key of the INODE_REF/INODE_EXTREF - * @name: the name in the INODE_REF/INODE_EXTREF - * @namelen: the length of name in the INODE_REF/INODE_EXTREF - * @index_ret: the index in the INODE_REF/INODE_EXTREF, - * value (64)-1 means do not check index - * @ext_ref: the EXTENDED_IREF feature - * - * Return 0 if no error occurred. - * Return >0 for error bitmap - */ -static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key, - char *name, int namelen, u64 *index_ret, - unsigned int ext_ref) +static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path) { - struct btrfs_path path; - struct btrfs_inode_ref *ref; - struct btrfs_inode_extref *extref; - struct extent_buffer *node; - char ref_namebuf[BTRFS_NAME_LEN] = {0}; - u32 total; - u32 cur = 0; - u32 len; - u32 ref_namelen; - u64 ref_index; - u64 parent; - u64 dir_id; - int slot; - int ret; - - ASSERT(index_ret); + struct extent_buffer *buf; + struct btrfs_key k1, k2; + int i; + int level = path->lowest_level; + int ret = -EIO; - btrfs_init_path(&path); - ret = btrfs_search_slot(NULL, root, key, &path, 0, 0); - if (ret) { - ret = INODE_REF_MISSING; - goto extref; + buf = path->nodes[level]; + for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) { + if (level) { + btrfs_node_key_to_cpu(buf, &k1, i); + btrfs_node_key_to_cpu(buf, &k2, i + 1); + } else { + btrfs_item_key_to_cpu(buf, &k1, i); + btrfs_item_key_to_cpu(buf, &k2, i + 1); + } + if (btrfs_comp_cpu_keys(&k1, &k2) < 0) + continue; + ret = swap_values(root, path, buf, i); + if (ret) + break; + btrfs_mark_buffer_dirty(buf); + i = 0; } + return ret; +} - node = path.nodes[0]; - slot = path.slots[0]; - - ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref); - total = btrfs_item_size_nr(node, slot); +static int delete_bogus_item(struct btrfs_root *root, + struct btrfs_path *path, + struct extent_buffer *buf, int slot) +{ + struct btrfs_key key; + int nritems = btrfs_header_nritems(buf); - /* Iterate all entry of INODE_REF */ - while (cur < total) { - ret = INODE_REF_MISSING; - - ref_namelen = btrfs_inode_ref_name_len(node, ref); - ref_index = btrfs_inode_ref_index(node, ref); - if (*index_ret != (u64)-1 && *index_ret != ref_index) - goto next_ref; - - if (cur + sizeof(*ref) + ref_namelen > total || - ref_namelen > BTRFS_NAME_LEN) { - warning("root %llu INODE %s[%llu %llu] name too long", - root->objectid, - key->type == BTRFS_INODE_REF_KEY ? - "REF" : "EXTREF", - key->objectid, key->offset); - - if (cur + sizeof(*ref) > total) - break; - len = min_t(u32, total - cur - sizeof(*ref), - BTRFS_NAME_LEN); - } else { - len = ref_namelen; - } + btrfs_item_key_to_cpu(buf, &key, slot); - read_extent_buffer(node, ref_namebuf, (unsigned long)(ref + 1), - len); + /* These are all the keys we can deal with missing. */ + if (key.type != BTRFS_DIR_INDEX_KEY && + key.type != BTRFS_EXTENT_ITEM_KEY && + key.type != BTRFS_METADATA_ITEM_KEY && + key.type != BTRFS_TREE_BLOCK_REF_KEY && + key.type != BTRFS_EXTENT_DATA_REF_KEY) + return -1; - if (len != namelen || strncmp(ref_namebuf, name, len)) - goto next_ref; + printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n", + (unsigned long long)key.objectid, key.type, + (unsigned long long)key.offset, slot, buf->start); + memmove_extent_buffer(buf, btrfs_item_nr_offset(slot), + btrfs_item_nr_offset(slot + 1), + sizeof(struct btrfs_item) * + (nritems - slot - 1)); + btrfs_set_header_nritems(buf, nritems - 1); + if (slot == 0) { + struct btrfs_disk_key disk_key; - *index_ret = ref_index; - ret = 0; - goto out; -next_ref: - len = sizeof(*ref) + ref_namelen; - ref = (struct btrfs_inode_ref *)((char *)ref + len); - cur += len; + btrfs_item_key(buf, &disk_key, 0); + btrfs_fixup_low_keys(root, path, &disk_key, 1); } + btrfs_mark_buffer_dirty(buf); + return 0; +} -extref: - /* Skip if not support EXTENDED_IREF feature */ - if (!ext_ref) - goto out; +static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path) +{ + struct extent_buffer *buf; + int i; + int ret = 0; - btrfs_release_path(&path); - btrfs_init_path(&path); + /* We should only get this for leaves */ + BUG_ON(path->lowest_level); + buf = path->nodes[0]; +again: + for (i = 0; i < btrfs_header_nritems(buf); i++) { + unsigned int shift = 0, offset; - dir_id = key->offset; - key->type = BTRFS_INODE_EXTREF_KEY; - key->offset = btrfs_extref_hash(dir_id, name, namelen); + if (i == 0 && btrfs_item_end_nr(buf, i) != + BTRFS_LEAF_DATA_SIZE(root->fs_info)) { + if (btrfs_item_end_nr(buf, i) > + BTRFS_LEAF_DATA_SIZE(root->fs_info)) { + ret = delete_bogus_item(root, path, buf, i); + if (!ret) + goto again; + fprintf(stderr, "item is off the end of the " + "leaf, can't fix\n"); + ret = -EIO; + break; + } + shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) - + btrfs_item_end_nr(buf, i); + } else if (i > 0 && btrfs_item_end_nr(buf, i) != + btrfs_item_offset_nr(buf, i - 1)) { + if (btrfs_item_end_nr(buf, i) > + btrfs_item_offset_nr(buf, i - 1)) { + ret = delete_bogus_item(root, path, buf, i); + if (!ret) + goto again; + fprintf(stderr, "items overlap, can't fix\n"); + ret = -EIO; + break; + } + shift = btrfs_item_offset_nr(buf, i - 1) - + btrfs_item_end_nr(buf, i); + } + if (!shift) + continue; - ret = btrfs_search_slot(NULL, root, key, &path, 0, 0); - if (ret) { - ret = INODE_REF_MISSING; - goto out; + printf("Shifting item nr %d by %u bytes in block %llu\n", + i, shift, (unsigned long long)buf->start); + offset = btrfs_item_offset_nr(buf, i); + memmove_extent_buffer(buf, + btrfs_leaf_data(buf) + offset + shift, + btrfs_leaf_data(buf) + offset, + btrfs_item_size_nr(buf, i)); + btrfs_set_item_offset(buf, btrfs_item_nr(i), + offset + shift); + btrfs_mark_buffer_dirty(buf); } - node = path.nodes[0]; - slot = path.slots[0]; + /* + * We may have moved things, in which case we want to exit so we don't + * write those changes out. Once we have proper abort functionality in + * progs this can be changed to something nicer. + */ + BUG_ON(ret); + return ret; +} - extref = btrfs_item_ptr(node, slot, struct btrfs_inode_extref); - cur = 0; - total = btrfs_item_size_nr(node, slot); +/* + * Attempt to fix basic block failures. If we can't fix it for whatever reason + * then just return -EIO. + */ +static int try_to_fix_bad_block(struct btrfs_root *root, + struct extent_buffer *buf, + enum btrfs_tree_block_status status) +{ + struct btrfs_trans_handle *trans; + struct ulist *roots; + struct ulist_node *node; + struct btrfs_root *search_root; + struct btrfs_path path; + struct ulist_iterator iter; + struct btrfs_key root_key, key; + int ret; - /* Iterate all entry of INODE_EXTREF */ - while (cur < total) { - ret = INODE_REF_MISSING; + if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER && + status != BTRFS_TREE_BLOCK_INVALID_OFFSETS) + return -EIO; - ref_namelen = btrfs_inode_extref_name_len(node, extref); - ref_index = btrfs_inode_extref_index(node, extref); - parent = btrfs_inode_extref_parent(node, extref); - if (*index_ret != (u64)-1 && *index_ret != ref_index) - goto next_extref; + ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots); + if (ret) + return -EIO; - if (parent != dir_id) - goto next_extref; + btrfs_init_path(&path); + ULIST_ITER_INIT(&iter); + while ((node = ulist_next(roots, &iter))) { + root_key.objectid = node->val; + root_key.type = BTRFS_ROOT_ITEM_KEY; + root_key.offset = (u64)-1; - if (ref_namelen <= BTRFS_NAME_LEN) { - len = ref_namelen; - } else { - len = BTRFS_NAME_LEN; - warning("root %llu INODE %s[%llu %llu] name too long", - root->objectid, - key->type == BTRFS_INODE_REF_KEY ? - "REF" : "EXTREF", - key->objectid, key->offset); + search_root = btrfs_read_fs_root(root->fs_info, &root_key); + if (IS_ERR(root)) { + ret = -EIO; + break; } - read_extent_buffer(node, ref_namebuf, - (unsigned long)(extref + 1), len); - - if (len != namelen || strncmp(ref_namebuf, name, len)) - goto next_extref; - *index_ret = ref_index; - ret = 0; - goto out; -next_extref: - len = sizeof(*extref) + ref_namelen; - extref = (struct btrfs_inode_extref *)((char *)extref + len); - cur += len; + trans = btrfs_start_transaction(search_root, 0); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + break; + } + path.lowest_level = btrfs_header_level(buf); + path.skip_check_block = 1; + if (path.lowest_level) + btrfs_node_key_to_cpu(buf, &key, 0); + else + btrfs_item_key_to_cpu(buf, &key, 0); + ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1); + if (ret) { + ret = -EIO; + btrfs_commit_transaction(trans, search_root); + break; + } + if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER) + ret = fix_key_order(search_root, &path); + else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS) + ret = fix_item_offset(search_root, &path); + if (ret) { + btrfs_commit_transaction(trans, search_root); + break; + } + btrfs_release_path(&path); + btrfs_commit_transaction(trans, search_root); } -out: + ulist_free(roots); btrfs_release_path(&path); return ret; } -static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key, - u64 ino, u64 index, const char *namebuf, - int name_len, u8 filetype, int err) +static int check_block(struct btrfs_root *root, + struct cache_tree *extent_cache, + struct extent_buffer *buf, u64 flags) { - if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) { - error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s", - root->objectid, key->objectid, key->offset, namebuf, - filetype, - err & DIR_ITEM_MISMATCH ? "mismath" : "missing"); - } - - if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) { - error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s", - root->objectid, key->objectid, index, namebuf, filetype, - err & DIR_ITEM_MISMATCH ? "mismath" : "missing"); - } - - if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) { - error( - "root %llu INODE_ITEM[%llu] index %llu name %s filetype %d %s", - root->objectid, ino, index, namebuf, filetype, - err & INODE_ITEM_MISMATCH ? "mismath" : "missing"); - } + struct extent_record *rec; + struct cache_extent *cache; + struct btrfs_key key; + enum btrfs_tree_block_status status; + int ret = 0; + int level; - if (err & INODE_REF_MISSING) - error( - "root %llu INODE REF[%llu, %llu] name %s filetype %u missing", - root->objectid, ino, key->objectid, namebuf, filetype); + cache = lookup_cache_extent(extent_cache, buf->start, buf->len); + if (!cache) + return 1; + rec = container_of(cache, struct extent_record, cache); + rec->generation = btrfs_header_generation(buf); -} + level = btrfs_header_level(buf); + if (btrfs_header_nritems(buf) > 0) { -/* - * Call repair_inode_item_missing and repair_ternary_lowmem to repair - * - * Returns error after repair - */ -static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino, - u64 index, u8 filetype, char *namebuf, u32 name_len, - int err) -{ - int ret; + if (level == 0) + btrfs_item_key_to_cpu(buf, &key, 0); + else + btrfs_node_key_to_cpu(buf, &key, 0); - if (err & INODE_ITEM_MISSING) { - ret = repair_inode_item_missing(root, ino, filetype); - if (!ret) - err &= ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING); + rec->info_objectid = key.objectid; } + rec->info_level = level; - if (err & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) { - ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf, - name_len, filetype, err); - if (!ret) { - err &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING); - err &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING); - err &= ~(INODE_REF_MISSING); - } - } - return err; -} + if (btrfs_is_leaf(buf)) + status = btrfs_check_leaf(root, &rec->parent_key, buf); + else + status = btrfs_check_node(root, &rec->parent_key, buf); -static int __count_dir_isize(struct btrfs_root *root, u64 ino, int type, - u64 *size_ret) -{ - struct btrfs_key key; - struct btrfs_path path; - u32 len; - struct btrfs_dir_item *di; - int ret; - int cur = 0; - int total = 0; - - ASSERT(size_ret); - *size_ret = 0; - - key.objectid = ino; - key.type = type; - key.offset = (u64)-1; - - btrfs_init_path(&path); - ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); - if (ret < 0) { - ret = -EIO; - goto out; - } - /* if found, go to spacial case */ - if (ret == 0) - goto special_case; - -loop: - ret = btrfs_previous_item(root, &path, ino, type); - - if (ret) { - ret = 0; - goto out; - } - -special_case: - di = btrfs_item_ptr(path.nodes[0], path.slots[0], struct btrfs_dir_item); - cur = 0; - total = btrfs_item_size_nr(path.nodes[0], path.slots[0]); - - while (cur < total) { - len = btrfs_dir_name_len(path.nodes[0], di); - if (len > BTRFS_NAME_LEN) - len = BTRFS_NAME_LEN; - *size_ret += len; - - len += btrfs_dir_data_len(path.nodes[0], di); - len += sizeof(*di); - di = (struct btrfs_dir_item *)((char *)di + len); - cur += len; + if (status != BTRFS_TREE_BLOCK_CLEAN) { + if (repair) + status = try_to_fix_bad_block(root, buf, status); + if (status != BTRFS_TREE_BLOCK_CLEAN) { + ret = -EIO; + fprintf(stderr, "bad block %llu\n", + (unsigned long long)buf->start); + } else { + /* + * Signal to callers we need to start the scan over + * again since we'll have cowed blocks. + */ + ret = -EAGAIN; + } + } else { + rec->content_checked = 1; + if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) + rec->owner_ref_checked = 1; + else { + ret = check_owner_ref(root, rec, buf); + if (!ret) + rec->owner_ref_checked = 1; + } } - goto loop; - -out: - btrfs_release_path(&path); + if (!ret) + maybe_free_extent_rec(extent_cache, rec); return ret; } -static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size) +#if 0 +static struct tree_backref *find_tree_backref(struct extent_record *rec, + u64 parent, u64 root) { - u64 item_size; - u64 index_size; - int ret; - - ASSERT(size); - ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY, &item_size); - if (ret) - goto out; - - ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY, &index_size); - if (ret) - goto out; - - *size = item_size + index_size; + struct list_head *cur = rec->backrefs.next; + struct extent_backref *node; + struct tree_backref *back; -out: - if (ret) - error("failed to count root %llu INODE[%llu] root size", - root->objectid, ino); - return ret; + while(cur != &rec->backrefs) { + node = to_extent_backref(cur); + cur = cur->next; + if (node->is_data) + continue; + back = to_tree_backref(node); + if (parent > 0) { + if (!node->full_backref) + continue; + if (parent == back->parent) + return back; + } else { + if (node->full_backref) + continue; + if (back->root == root) + return back; + } + } + return NULL; } +#endif -/* - * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and - * call find_inode_ref() to check related INODE_REF/INODE_EXTREF. - * - * @root: the root of the fs/file tree - * @key: the key of the INODE_REF/INODE_EXTREF - * @path: the path - * @size: the st_size of the INODE_ITEM - * @ext_ref: the EXTENDED_IREF feature - * - * Return 0 if no error occurred. - * Return DIR_COUNT_AGAIN if the isize of the inode should be recalculated. - */ -static int check_dir_item(struct btrfs_root *root, struct btrfs_key *di_key, - struct btrfs_path *path, u64 *size, - unsigned int ext_ref) +static struct tree_backref *alloc_tree_backref(struct extent_record *rec, + u64 parent, u64 root) { - struct btrfs_dir_item *di; - struct btrfs_inode_item *ii; - struct btrfs_key key; - struct btrfs_key location; - struct extent_buffer *node; - int slot; - char namebuf[BTRFS_NAME_LEN] = {0}; - u32 total; - u32 cur = 0; - u32 len; - u32 name_len; - u32 data_len; - u8 filetype; - u32 mode = 0; - u64 index; - int ret; - int err; - int tmp_err; - int need_research = 0; + struct tree_backref *ref = malloc(sizeof(*ref)); - /* - * For DIR_ITEM set index to (u64)-1, so that find_inode_ref - * ignore index check. - */ - if (di_key->type == BTRFS_DIR_INDEX_KEY) - index = di_key->offset; - else - index = (u64)-1; -begin: - err = 0; - cur = 0; - - /* since after repair, path and the dir item may be changed */ - if (need_research) { - need_research = 0; - err |= DIR_COUNT_AGAIN; - btrfs_release_path(path); - ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0); - /* the item was deleted, let path point the last checked item */ - if (ret > 0) { - if (path->slots[0] == 0) - btrfs_prev_leaf(root, path); - else - path->slots[0]--; - } - if (ret) - goto out; + if (!ref) + return NULL; + memset(&ref->node, 0, sizeof(ref->node)); + if (parent > 0) { + ref->parent = parent; + ref->node.full_backref = 1; + } else { + ref->root = root; + ref->node.full_backref = 0; } - node = path->nodes[0]; - slot = path->slots[0]; - - di = btrfs_item_ptr(node, slot, struct btrfs_dir_item); - total = btrfs_item_size_nr(node, slot); - memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf)); - - while (cur < total) { - data_len = btrfs_dir_data_len(node, di); - tmp_err = 0; - if (data_len) - error("root %llu %s[%llu %llu] data_len shouldn't be %u", - root->objectid, - di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX", - di_key->objectid, di_key->offset, data_len); - - name_len = btrfs_dir_name_len(node, di); - if (name_len <= BTRFS_NAME_LEN) { - len = name_len; - } else { - len = BTRFS_NAME_LEN; - warning("root %llu %s[%llu %llu] name too long", - root->objectid, - di_key->type == BTRFS_DIR_ITEM_KEY ? "DIR_ITEM" : "DIR_INDEX", - di_key->objectid, di_key->offset); - } - (*size) += name_len; - read_extent_buffer(node, namebuf, (unsigned long)(di + 1), - len); - filetype = btrfs_dir_type(node, di); - - if (di_key->type == BTRFS_DIR_ITEM_KEY && - di_key->offset != btrfs_name_hash(namebuf, len)) { - err |= -EIO; - error("root %llu DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu", - root->objectid, di_key->objectid, di_key->offset, - namebuf, len, filetype, di_key->offset, - btrfs_name_hash(namebuf, len)); - } - - btrfs_dir_item_key_to_cpu(node, di, &location); - /* Ignore related ROOT_ITEM check */ - if (location.type == BTRFS_ROOT_ITEM_KEY) - goto next; - - btrfs_release_path(path); - /* Check relative INODE_ITEM(existence/filetype) */ - ret = btrfs_search_slot(NULL, root, &location, path, 0, 0); - if (ret) { - tmp_err |= INODE_ITEM_MISSING; - goto next; - } - - ii = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_inode_item); - mode = btrfs_inode_mode(path->nodes[0], ii); - if (imode_to_type(mode) != filetype) { - tmp_err |= INODE_ITEM_MISMATCH; - goto next; - } + return ref; +} - /* Check relative INODE_REF/INODE_EXTREF */ - key.objectid = location.objectid; - key.type = BTRFS_INODE_REF_KEY; - key.offset = di_key->objectid; - tmp_err |= find_inode_ref(root, &key, namebuf, len, - &index, ext_ref); +#if 0 +static struct data_backref *find_data_backref(struct extent_record *rec, + u64 parent, u64 root, + u64 owner, u64 offset, + int found_ref, + u64 disk_bytenr, u64 bytes) +{ + struct list_head *cur = rec->backrefs.next; + struct extent_backref *node; + struct data_backref *back; - /* check relative INDEX/ITEM */ - key.objectid = di_key->objectid; - if (key.type == BTRFS_DIR_ITEM_KEY) { - key.type = BTRFS_DIR_INDEX_KEY; - key.offset = index; + while(cur != &rec->backrefs) { + node = to_extent_backref(cur); + cur = cur->next; + if (!node->is_data) + continue; + back = to_data_backref(node); + if (parent > 0) { + if (!node->full_backref) + continue; + if (parent == back->parent) + return back; } else { - key.type = BTRFS_DIR_ITEM_KEY; - key.offset = btrfs_name_hash(namebuf, name_len); - } - - tmp_err |= find_dir_item(root, &key, &location, namebuf, - name_len, filetype); - /* find_dir_item may find index */ - if (key.type == BTRFS_DIR_INDEX_KEY) - index = key.offset; -next: - - if (tmp_err && repair) { - ret = repair_dir_item(root, di_key->objectid, - location.objectid, index, - imode_to_type(mode), namebuf, - name_len, tmp_err); - if (ret != tmp_err) { - need_research = 1; - goto begin; + if (node->full_backref) + continue; + if (back->root == root && back->owner == owner && + back->offset == offset) { + if (found_ref && node->found_ref && + (back->bytes != bytes || + back->disk_bytenr != disk_bytenr)) + continue; + return back; } } - btrfs_release_path(path); - print_dir_item_err(root, di_key, location.objectid, index, - namebuf, name_len, filetype, tmp_err); - err |= tmp_err; - len = sizeof(*di) + name_len + data_len; - di = (struct btrfs_dir_item *)((char *)di + len); - cur += len; - - if (di_key->type == BTRFS_DIR_INDEX_KEY && cur < total) { - error("root %llu DIR_INDEX[%llu %llu] should contain only one entry", - root->objectid, di_key->objectid, - di_key->offset); - break; - } } -out: - /* research path */ - btrfs_release_path(path); - ret = btrfs_search_slot(NULL, root, di_key, path, 0, 0); - if (ret) - err |= ret > 0 ? -ENOENT : ret; - return err; + return NULL; } +#endif -/* - * Wrapper function of btrfs_punch_hole. - * - * Returns 0 means success. - * Returns not 0 means error. - */ -static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start, - u64 len) +static struct data_backref *alloc_data_backref(struct extent_record *rec, + u64 parent, u64 root, + u64 owner, u64 offset, + u64 max_size) { - struct btrfs_trans_handle *trans; - int ret = 0; - - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) - return PTR_ERR(trans); + struct data_backref *ref = malloc(sizeof(*ref)); - ret = btrfs_punch_hole(trans, root, ino, start, len); - if (ret) - error("failed to add hole [%llu, %llu] in inode [%llu]", - start, len, ino); - else - printf("Add a hole [%llu, %llu] in inode [%llu]\n", start, len, - ino); + if (!ref) + return NULL; + memset(&ref->node, 0, sizeof(ref->node)); + ref->node.is_data = 1; - btrfs_commit_transaction(trans, root); - return ret; + if (parent > 0) { + ref->parent = parent; + ref->owner = 0; + ref->offset = 0; + ref->node.full_backref = 1; + } else { + ref->root = root; + ref->owner = owner; + ref->offset = offset; + ref->node.full_backref = 0; + } + ref->bytes = max_size; + ref->found_ref = 0; + ref->num_refs = 0; + if (max_size > rec->max_size) + rec->max_size = max_size; + return ref; } -/* - * Check file extent datasum/hole, update the size of the file extents, - * check and update the last offset of the file extent. - * - * @root: the root of fs/file tree. - * @fkey: the key of the file extent. - * @nodatasum: INODE_NODATASUM feature. - * @size: the sum of all EXTENT_DATA items size for this inode. - * @end: the offset of the last extent. - * - * Return 0 if no error occurred. - */ -static int check_file_extent(struct btrfs_root *root, struct btrfs_key *fkey, - struct extent_buffer *node, int slot, - unsigned int nodatasum, u64 *size, u64 *end) +/* Check if the type of extent matches with its chunk */ +static void check_extent_type(struct extent_record *rec) { - struct btrfs_file_extent_item *fi; - u64 disk_bytenr; - u64 disk_num_bytes; - u64 extent_num_bytes; - u64 extent_offset; - u64 csum_found; /* In byte size, sectorsize aligned */ - u64 search_start; /* Logical range start we search for csum */ - u64 search_len; /* Logical range len we search for csum */ - unsigned int extent_type; - unsigned int is_hole; - int compressed = 0; - int ret; - int err = 0; + struct btrfs_block_group_cache *bg_cache; - fi = btrfs_item_ptr(node, slot, struct btrfs_file_extent_item); + bg_cache = btrfs_lookup_first_block_group(global_info, rec->start); + if (!bg_cache) + return; - /* Check inline extent */ - extent_type = btrfs_file_extent_type(node, fi); - if (extent_type == BTRFS_FILE_EXTENT_INLINE) { - struct btrfs_item *e = btrfs_item_nr(slot); - u32 item_inline_len; - - item_inline_len = btrfs_file_extent_inline_item_len(node, e); - extent_num_bytes = btrfs_file_extent_inline_len(node, slot, fi); - compressed = btrfs_file_extent_compression(node, fi); - if (extent_num_bytes == 0) { - error( - "root %llu EXTENT_DATA[%llu %llu] has empty inline extent", - root->objectid, fkey->objectid, fkey->offset); - err |= FILE_EXTENT_ERROR; - } - if (!compressed && extent_num_bytes != item_inline_len) { - error( - "root %llu EXTENT_DATA[%llu %llu] wrong inline size, have: %llu, expected: %u", - root->objectid, fkey->objectid, fkey->offset, - extent_num_bytes, item_inline_len); - err |= FILE_EXTENT_ERROR; - } - *end += extent_num_bytes; - *size += extent_num_bytes; - return err; + /* data extent, check chunk directly*/ + if (!rec->metadata) { + if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA)) + rec->wrong_chunk_type = 1; + return; } - /* Check extent type */ - if (extent_type != BTRFS_FILE_EXTENT_REG && - extent_type != BTRFS_FILE_EXTENT_PREALLOC) { - err |= FILE_EXTENT_ERROR; - error("root %llu EXTENT_DATA[%llu %llu] type bad", - root->objectid, fkey->objectid, fkey->offset); - return err; + /* metadata extent, check the obvious case first */ + if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM | + BTRFS_BLOCK_GROUP_METADATA))) { + rec->wrong_chunk_type = 1; + return; } - /* Check REG_EXTENT/PREALLOC_EXTENT */ - disk_bytenr = btrfs_file_extent_disk_bytenr(node, fi); - disk_num_bytes = btrfs_file_extent_disk_num_bytes(node, fi); - extent_num_bytes = btrfs_file_extent_num_bytes(node, fi); - extent_offset = btrfs_file_extent_offset(node, fi); - compressed = btrfs_file_extent_compression(node, fi); - is_hole = (disk_bytenr == 0) && (disk_num_bytes == 0); - /* - * Check EXTENT_DATA csum - * - * For plain (uncompressed) extent, we should only check the range - * we're referring to, as it's possible that part of prealloc extent - * has been written, and has csum: - * - * |<--- Original large preallocated extent A ---->| - * |<- Prealloc File Extent ->|<- Regular Extent ->| - * No csum Has csum - * - * For compressed extent, we should check the whole range. + * Check SYSTEM extent, as it's also marked as metadata, we can only + * make sure it's a SYSTEM extent by its backref */ - if (!compressed) { - search_start = disk_bytenr + extent_offset; - search_len = extent_num_bytes; - } else { - search_start = disk_bytenr; - search_len = disk_num_bytes; - } - ret = count_csum_range(root->fs_info, search_start, search_len, &csum_found); - if (csum_found > 0 && nodatasum) { - err |= ODD_CSUM_ITEM; - error("root %llu EXTENT_DATA[%llu %llu] nodatasum shouldn't have datasum", - root->objectid, fkey->objectid, fkey->offset); - } else if (extent_type == BTRFS_FILE_EXTENT_REG && !nodatasum && - !is_hole && (ret < 0 || csum_found < search_len)) { - err |= CSUM_ITEM_MISSING; - error("root %llu EXTENT_DATA[%llu %llu] csum missing, have: %llu, expected: %llu", - root->objectid, fkey->objectid, fkey->offset, - csum_found, search_len); - } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC && csum_found > 0) { - err |= ODD_CSUM_ITEM; - error("root %llu EXTENT_DATA[%llu %llu] prealloc shouldn't have csum, but has: %llu", - root->objectid, fkey->objectid, fkey->offset, csum_found); - } - - /* Check EXTENT_DATA hole */ - if (!no_holes && *end != fkey->offset) { - if (repair) - ret = punch_extent_hole(root, fkey->objectid, - *end, fkey->offset - *end); - if (!repair || ret) { - err |= FILE_EXTENT_ERROR; - error( -"root %llu EXTENT_DATA[%llu %llu] gap exists, expected: EXTENT_DATA[%llu %llu]", - root->objectid, fkey->objectid, fkey->offset, - fkey->objectid, *end); - } - } + if (!RB_EMPTY_ROOT(&rec->backref_tree)) { + struct extent_backref *node; + struct tree_backref *tback; + u64 bg_type; - *end += extent_num_bytes; - if (!is_hole) - *size += extent_num_bytes; + node = rb_node_to_extent_backref(rb_first(&rec->backref_tree)); + if (node->is_data) { + /* tree block shouldn't have data backref */ + rec->wrong_chunk_type = 1; + return; + } + tback = container_of(node, struct tree_backref, node); - return err; + if (tback->root == BTRFS_CHUNK_TREE_OBJECTID) + bg_type = BTRFS_BLOCK_GROUP_SYSTEM; + else + bg_type = BTRFS_BLOCK_GROUP_METADATA; + if (!(bg_cache->flags & bg_type)) + rec->wrong_chunk_type = 1; + } } /* - * Set inode item nbytes to @nbytes - * - * Returns 0 on success - * Returns != 0 on error + * Allocate a new extent record, fill default values from @tmpl and insert int + * @extent_cache. Caller is supposed to make sure the [start,nr) is not in + * the cache, otherwise it fails. */ -static int repair_inode_nbytes_lowmem(struct btrfs_root *root, - struct btrfs_path *path, - u64 ino, u64 nbytes) +static int add_extent_rec_nolookup(struct cache_tree *extent_cache, + struct extent_record *tmpl) { - struct btrfs_trans_handle *trans; - struct btrfs_inode_item *ii; - struct btrfs_key key; - struct btrfs_key research_key; - int err = 0; - int ret; - - btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]); - - key.objectid = ino; - key.type = BTRFS_INODE_ITEM_KEY; - key.offset = 0; - - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - err |= ret; - goto out; - } + struct extent_record *rec; + int ret = 0; - btrfs_release_path(path); - ret = btrfs_search_slot(trans, root, &key, path, 0, 1); - if (ret > 0) - ret = -ENOENT; + BUG_ON(tmpl->max_size == 0); + rec = malloc(sizeof(*rec)); + if (!rec) + return -ENOMEM; + rec->start = tmpl->start; + rec->max_size = tmpl->max_size; + rec->nr = max(tmpl->nr, tmpl->max_size); + rec->found_rec = tmpl->found_rec; + rec->content_checked = tmpl->content_checked; + rec->owner_ref_checked = tmpl->owner_ref_checked; + rec->num_duplicates = 0; + rec->metadata = tmpl->metadata; + rec->flag_block_full_backref = FLAG_UNSET; + rec->bad_full_backref = 0; + rec->crossing_stripes = 0; + rec->wrong_chunk_type = 0; + rec->is_root = tmpl->is_root; + rec->refs = tmpl->refs; + rec->extent_item_refs = tmpl->extent_item_refs; + rec->parent_generation = tmpl->parent_generation; + INIT_LIST_HEAD(&rec->backrefs); + INIT_LIST_HEAD(&rec->dups); + INIT_LIST_HEAD(&rec->list); + rec->backref_tree = RB_ROOT; + memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key)); + rec->cache.start = tmpl->start; + rec->cache.size = tmpl->nr; + ret = insert_cache_extent(extent_cache, &rec->cache); if (ret) { - err |= ret; - goto fail; + free(rec); + return ret; } + bytes_used += rec->nr; - ii = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_inode_item); - btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes); - btrfs_mark_buffer_dirty(path->nodes[0]); -fail: - btrfs_commit_transaction(trans, root); -out: - if (ret) - error("failed to set nbytes in inode %llu root %llu", - ino, root->root_key.objectid); - else - printf("Set nbytes in inode item %llu root %llu\n to %llu", ino, - root->root_key.objectid, nbytes); - - /* research path */ - btrfs_release_path(path); - ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0); - err |= ret; - - return err; + if (tmpl->metadata) + rec->crossing_stripes = check_crossing_stripes(global_info, + rec->start, global_info->nodesize); + check_extent_type(rec); + return ret; } /* - * Set directory inode isize to @isize. + * Lookup and modify an extent, some values of @tmpl are interpreted verbatim, + * some are hints: + * - refs - if found, increase refs + * - is_root - if found, set + * - content_checked - if found, set + * - owner_ref_checked - if found, set * - * Returns 0 on success. - * Returns != 0 on error. + * If not found, create a new one, initialize and insert. */ -static int repair_dir_isize_lowmem(struct btrfs_root *root, - struct btrfs_path *path, - u64 ino, u64 isize) +static int add_extent_rec(struct cache_tree *extent_cache, + struct extent_record *tmpl) { - struct btrfs_trans_handle *trans; - struct btrfs_inode_item *ii; - struct btrfs_key key; - struct btrfs_key research_key; - int ret; - int err = 0; + struct extent_record *rec; + struct cache_extent *cache; + int ret = 0; + int dup = 0; - btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]); + cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr); + if (cache) { + rec = container_of(cache, struct extent_record, cache); + if (tmpl->refs) + rec->refs++; + if (rec->nr == 1) + rec->nr = max(tmpl->nr, tmpl->max_size); - key.objectid = ino; - key.type = BTRFS_INODE_ITEM_KEY; - key.offset = 0; + /* + * We need to make sure to reset nr to whatever the extent + * record says was the real size, this way we can compare it to + * the backrefs. + */ + if (tmpl->found_rec) { + if (tmpl->start != rec->start || rec->found_rec) { + struct extent_record *tmp; - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - err |= ret; - goto out; - } + dup = 1; + if (list_empty(&rec->list)) + list_add_tail(&rec->list, + &duplicate_extents); - btrfs_release_path(path); - ret = btrfs_search_slot(trans, root, &key, path, 0, 1); - if (ret > 0) - ret = -ENOENT; - if (ret) { - err |= ret; - goto fail; - } + /* + * We have to do this song and dance in case we + * find an extent record that falls inside of + * our current extent record but does not have + * the same objectid. + */ + tmp = malloc(sizeof(*tmp)); + if (!tmp) + return -ENOMEM; + tmp->start = tmpl->start; + tmp->max_size = tmpl->max_size; + tmp->nr = tmpl->nr; + tmp->found_rec = 1; + tmp->metadata = tmpl->metadata; + tmp->extent_item_refs = tmpl->extent_item_refs; + INIT_LIST_HEAD(&tmp->list); + list_add_tail(&tmp->list, &rec->dups); + rec->num_duplicates++; + } else { + rec->nr = tmpl->nr; + rec->found_rec = 1; + } + } - ii = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_inode_item); - btrfs_set_inode_size(path->nodes[0], ii, isize); - btrfs_mark_buffer_dirty(path->nodes[0]); -fail: - btrfs_commit_transaction(trans, root); -out: - if (ret) - error("failed to set isize in inode %llu root %llu", - ino, root->root_key.objectid); - else - printf("Set isize in inode %llu root %llu to %llu\n", - ino, root->root_key.objectid, isize); + if (tmpl->extent_item_refs && !dup) { + if (rec->extent_item_refs) { + fprintf(stderr, "block %llu rec " + "extent_item_refs %llu, passed %llu\n", + (unsigned long long)tmpl->start, + (unsigned long long) + rec->extent_item_refs, + (unsigned long long)tmpl->extent_item_refs); + } + rec->extent_item_refs = tmpl->extent_item_refs; + } + if (tmpl->is_root) + rec->is_root = 1; + if (tmpl->content_checked) + rec->content_checked = 1; + if (tmpl->owner_ref_checked) + rec->owner_ref_checked = 1; + memcpy(&rec->parent_key, &tmpl->parent_key, + sizeof(tmpl->parent_key)); + if (tmpl->parent_generation) + rec->parent_generation = tmpl->parent_generation; + if (rec->max_size < tmpl->max_size) + rec->max_size = tmpl->max_size; - btrfs_release_path(path); - ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0); - err |= ret; + /* + * A metadata extent can't cross stripe_len boundary, otherwise + * kernel scrub won't be able to handle it. + * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check + * it. + */ + if (tmpl->metadata) + rec->crossing_stripes = check_crossing_stripes( + global_info, rec->start, + global_info->nodesize); + check_extent_type(rec); + maybe_free_extent_rec(extent_cache, rec); + return ret; + } - return err; + ret = add_extent_rec_nolookup(extent_cache, tmpl); + + return ret; } -/* - * Wrapper function for btrfs_add_orphan_item(). - * - * Returns 0 on success. - * Returns != 0 on error. - */ -static int repair_inode_orphan_item_lowmem(struct btrfs_root *root, - struct btrfs_path *path, u64 ino) +static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr, + u64 parent, u64 root, int found_ref) { - struct btrfs_trans_handle *trans; - struct btrfs_key research_key; + struct extent_record *rec; + struct tree_backref *back; + struct cache_extent *cache; int ret; - int err = 0; + bool insert = false; - btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]); - - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - err |= ret; - goto out; - } - - btrfs_release_path(path); - ret = btrfs_add_orphan_item(trans, root, path, ino); - err |= ret; - btrfs_commit_transaction(trans, root); -out: - if (ret) - error("failed to add inode %llu as orphan item root %llu", - ino, root->root_key.objectid); - else - printf("Added inode %llu as orphan item root %llu\n", - ino, root->root_key.objectid); - - btrfs_release_path(path); - ret = btrfs_search_slot(NULL, root, &research_key, path, 0, 0); - err |= ret; - - return err; -} + cache = lookup_cache_extent(extent_cache, bytenr, 1); + if (!cache) { + struct extent_record tmpl; -/* Set inode_item nlink to @ref_count. - * If @ref_count == 0, move it to "lost+found" and increase @ref_count. - * - * Returns 0 on success - */ -static int repair_inode_nlinks_lowmem(struct btrfs_root *root, - struct btrfs_path *path, u64 ino, - const char *name, u32 namelen, - u64 ref_count, u8 filetype, u64 *nlink) -{ - struct btrfs_trans_handle *trans; - struct btrfs_inode_item *ii; - struct btrfs_key key; - struct btrfs_key old_key; - char namebuf[BTRFS_NAME_LEN] = {0}; - int name_len; - int ret; - int ret2; + memset(&tmpl, 0, sizeof(tmpl)); + tmpl.start = bytenr; + tmpl.nr = 1; + tmpl.metadata = 1; + tmpl.max_size = 1; - /* save the key */ - btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]); + ret = add_extent_rec_nolookup(extent_cache, &tmpl); + if (ret) + return ret; - if (name && namelen) { - ASSERT(namelen <= BTRFS_NAME_LEN); - memcpy(namebuf, name, namelen); - name_len = namelen; - } else { - sprintf(namebuf, "%llu", ino); - name_len = count_digits(ino); - printf("Can't find file name for inode %llu, use %s instead\n", - ino, namebuf); + /* really a bug in cache_extent implement now */ + cache = lookup_cache_extent(extent_cache, bytenr, 1); + if (!cache) + return -ENOENT; } - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - goto out; + rec = container_of(cache, struct extent_record, cache); + if (rec->start != bytenr) { + /* + * Several cause, from unaligned bytenr to over lapping extents + */ + return -EEXIST; } - btrfs_release_path(path); - /* if refs is 0, put it into lostfound */ - if (ref_count == 0) { - ret = link_inode_to_lostfound(trans, root, path, ino, namebuf, - name_len, filetype, &ref_count); - if (ret) - goto fail; + back = find_tree_backref(rec, parent, root); + if (!back) { + back = alloc_tree_backref(rec, parent, root); + if (!back) + return -ENOMEM; + insert = true; } - /* reset inode_item's nlink to ref_count */ - key.objectid = ino; - key.type = BTRFS_INODE_ITEM_KEY; - key.offset = 0; - - btrfs_release_path(path); - ret = btrfs_search_slot(trans, root, &key, path, 0, 1); - if (ret > 0) - ret = -ENOENT; - if (ret) - goto fail; - - ii = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_inode_item); - btrfs_set_inode_nlink(path->nodes[0], ii, ref_count); - btrfs_mark_buffer_dirty(path->nodes[0]); - - if (nlink) - *nlink = ref_count; -fail: - btrfs_commit_transaction(trans, root); -out: - if (ret) - error( - "fail to repair nlink of inode %llu root %llu name %s filetype %u", - root->objectid, ino, namebuf, filetype); - else - printf("Fixed nlink of inode %llu root %llu name %s filetype %u\n", - root->objectid, ino, namebuf, filetype); - - /* research */ - btrfs_release_path(path); - ret2 = btrfs_search_slot(NULL, root, &old_key, path, 0, 0); - if (ret2 < 0) - return ret |= ret2; - return ret; + if (found_ref) { + if (back->node.found_ref) { + fprintf(stderr, "Extent back ref already exists " + "for %llu parent %llu root %llu \n", + (unsigned long long)bytenr, + (unsigned long long)parent, + (unsigned long long)root); + } + back->node.found_ref = 1; + } else { + if (back->node.found_extent_tree) { + fprintf(stderr, "Extent back ref already exists " + "for %llu parent %llu root %llu \n", + (unsigned long long)bytenr, + (unsigned long long)parent, + (unsigned long long)root); + } + back->node.found_extent_tree = 1; + } + if (insert) + WARN_ON(rb_insert(&rec->backref_tree, &back->node.node, + compare_extent_backref)); + check_extent_type(rec); + maybe_free_extent_rec(extent_cache, rec); + return 0; } -/* - * Check INODE_ITEM and related ITEMs (the same inode number) - * 1. check link count - * 2. check inode ref/extref - * 3. check dir item/index - * - * @ext_ref: the EXTENDED_IREF feature - * - * Return 0 if no error occurred. - * Return >0 for error or hit the traversal is done(by error bitmap) - */ -static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path, - unsigned int ext_ref) +static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr, + u64 parent, u64 root, u64 owner, u64 offset, + u32 num_refs, int found_ref, u64 max_size) { - struct extent_buffer *node; - struct btrfs_inode_item *ii; - struct btrfs_key key; - struct btrfs_key last_key; - u64 inode_id; - u32 mode; - u64 nlink; - u64 nbytes; - u64 isize; - u64 size = 0; - u64 refs = 0; - u64 extent_end = 0; - u64 extent_size = 0; - unsigned int dir; - unsigned int nodatasum; - int slot; + struct extent_record *rec; + struct data_backref *back; + struct cache_extent *cache; int ret; - int err = 0; - char namebuf[BTRFS_NAME_LEN] = {0}; - u32 name_len = 0; - - node = path->nodes[0]; - slot = path->slots[0]; - - btrfs_item_key_to_cpu(node, &key, slot); - inode_id = key.objectid; - - if (inode_id == BTRFS_ORPHAN_OBJECTID) { - ret = btrfs_next_item(root, path); - if (ret > 0) - err |= LAST_ITEM; - return err; - } - - ii = btrfs_item_ptr(node, slot, struct btrfs_inode_item); - isize = btrfs_inode_size(node, ii); - nbytes = btrfs_inode_nbytes(node, ii); - mode = btrfs_inode_mode(node, ii); - dir = imode_to_type(mode) == BTRFS_FT_DIR; - nlink = btrfs_inode_nlink(node, ii); - nodatasum = btrfs_inode_flags(node, ii) & BTRFS_INODE_NODATASUM; - - while (1) { - btrfs_item_key_to_cpu(path->nodes[0], &last_key, path->slots[0]); - ret = btrfs_next_item(root, path); - if (ret < 0) { - /* out will fill 'err' rusing current statistics */ - goto out; - } else if (ret > 0) { - err |= LAST_ITEM; - goto out; - } + bool insert = false; - node = path->nodes[0]; - slot = path->slots[0]; - btrfs_item_key_to_cpu(node, &key, slot); - if (key.objectid != inode_id) - goto out; + cache = lookup_cache_extent(extent_cache, bytenr, 1); + if (!cache) { + struct extent_record tmpl; - switch (key.type) { - case BTRFS_INODE_REF_KEY: - ret = check_inode_ref(root, &key, path, namebuf, - &name_len, &refs, mode); - err |= ret; - break; - case BTRFS_INODE_EXTREF_KEY: - if (key.type == BTRFS_INODE_EXTREF_KEY && !ext_ref) - warning("root %llu EXTREF[%llu %llu] isn't supported", - root->objectid, key.objectid, - key.offset); - ret = check_inode_extref(root, &key, node, slot, &refs, - mode); - err |= ret; - break; - case BTRFS_DIR_ITEM_KEY: - case BTRFS_DIR_INDEX_KEY: - if (!dir) { - warning("root %llu INODE[%llu] mode %u shouldn't have DIR_INDEX[%llu %llu]", - root->objectid, inode_id, - imode_to_type(mode), key.objectid, - key.offset); - } - ret = check_dir_item(root, &key, path, &size, ext_ref); - err |= ret; - break; - case BTRFS_EXTENT_DATA_KEY: - if (dir) { - warning("root %llu DIR INODE[%llu] shouldn't EXTENT_DATA[%llu %llu]", - root->objectid, inode_id, key.objectid, - key.offset); - } - ret = check_file_extent(root, &key, node, slot, - nodatasum, &extent_size, - &extent_end); - err |= ret; - break; - case BTRFS_XATTR_ITEM_KEY: - break; - default: - error("ITEM[%llu %u %llu] UNKNOWN TYPE", - key.objectid, key.type, key.offset); - } - } + memset(&tmpl, 0, sizeof(tmpl)); + tmpl.start = bytenr; + tmpl.nr = 1; + tmpl.max_size = max_size; -out: - if (err & LAST_ITEM) { - btrfs_release_path(path); - ret = btrfs_search_slot(NULL, root, &last_key, path, 0, 0); + ret = add_extent_rec_nolookup(extent_cache, &tmpl); if (ret) - return err; + return ret; + + cache = lookup_cache_extent(extent_cache, bytenr, 1); + if (!cache) + abort(); } - /* verify INODE_ITEM nlink/isize/nbytes */ - if (dir) { - if (repair && (err & DIR_COUNT_AGAIN)) { - err &= ~DIR_COUNT_AGAIN; - count_dir_isize(root, inode_id, &size); - } + rec = container_of(cache, struct extent_record, cache); + if (rec->max_size < max_size) + rec->max_size = max_size; - if ((nlink != 1 || refs != 1) && repair) { - ret = repair_inode_nlinks_lowmem(root, path, inode_id, - namebuf, name_len, refs, imode_to_type(mode), - &nlink); - } + /* + * If found_ref is set then max_size is the real size and must match the + * existing refs. So if we have already found a ref then we need to + * make sure that this ref matches the existing one, otherwise we need + * to add a new backref so we can notice that the backrefs don't match + * and we need to figure out who is telling the truth. This is to + * account for that awful fsync bug I introduced where we'd end up with + * a btrfs_file_extent_item that would have its length include multiple + * prealloc extents or point inside of a prealloc extent. + */ + back = find_data_backref(rec, parent, root, owner, offset, found_ref, + bytenr, max_size); + if (!back) { + back = alloc_data_backref(rec, parent, root, owner, offset, + max_size); + BUG_ON(!back); + insert = true; + } - if (nlink != 1) { - err |= LINK_COUNT_ERROR; - error("root %llu DIR INODE[%llu] shouldn't have more than one link(%llu)", - root->objectid, inode_id, nlink); - } + if (found_ref) { + BUG_ON(num_refs != 1); + if (back->node.found_ref) + BUG_ON(back->bytes != max_size); + back->node.found_ref = 1; + back->found_ref += 1; + if (back->bytes != max_size || back->disk_bytenr != bytenr) { + back->bytes = max_size; + back->disk_bytenr = bytenr; - /* - * Just a warning, as dir inode nbytes is just an - * instructive value. - */ - if (!IS_ALIGNED(nbytes, root->fs_info->nodesize)) { - warning("root %llu DIR INODE[%llu] nbytes should be aligned to %u", - root->objectid, inode_id, - root->fs_info->nodesize); - } - - if (isize != size) { - if (repair) - ret = repair_dir_isize_lowmem(root, path, - inode_id, size); - if (!repair || ret) { - err |= ISIZE_ERROR; - error( - "root %llu DIR INODE [%llu] size %llu not equal to %llu", - root->objectid, inode_id, isize, size); + /* Need to reinsert if not already in the tree */ + if (!insert) { + rb_erase(&back->node.node, &rec->backref_tree); + insert = true; } } + rec->refs += 1; + rec->content_checked = 1; + rec->owner_ref_checked = 1; } else { - if (nlink != refs) { - if (repair) - ret = repair_inode_nlinks_lowmem(root, path, - inode_id, namebuf, name_len, refs, - imode_to_type(mode), &nlink); - if (!repair || ret) { - err |= LINK_COUNT_ERROR; - error( - "root %llu INODE[%llu] nlink(%llu) not equal to inode_refs(%llu)", - root->objectid, inode_id, nlink, refs); - } - } else if (!nlink) { - if (repair) - ret = repair_inode_orphan_item_lowmem(root, - path, inode_id); - if (!repair || ret) { - err |= ORPHAN_ITEM; - error("root %llu INODE[%llu] is orphan item", - root->objectid, inode_id); - } - } - - if (!nbytes && !no_holes && extent_end < isize) { - if (repair) - ret = punch_extent_hole(root, inode_id, - extent_end, isize - extent_end); - if (!repair || ret) { - err |= NBYTES_ERROR; - error( - "root %llu INODE[%llu] size %llu should have a file extent hole", - root->objectid, inode_id, isize); - } - } - - if (nbytes != extent_size) { - if (repair) - ret = repair_inode_nbytes_lowmem(root, path, - inode_id, extent_size); - if (!repair || ret) { - err |= NBYTES_ERROR; - error( - "root %llu INODE[%llu] nbytes %llu not equal to extent_size %llu", - root->objectid, inode_id, nbytes, - extent_size); - } + if (back->node.found_extent_tree) { + fprintf(stderr, "Extent back ref already exists " + "for %llu parent %llu root %llu " + "owner %llu offset %llu num_refs %lu\n", + (unsigned long long)bytenr, + (unsigned long long)parent, + (unsigned long long)root, + (unsigned long long)owner, + (unsigned long long)offset, + (unsigned long)num_refs); } + back->num_refs = num_refs; + back->node.found_extent_tree = 1; } + if (insert) + WARN_ON(rb_insert(&rec->backref_tree, &back->node.node, + compare_extent_backref)); - if (err & LAST_ITEM) - btrfs_next_item(root, path); - return err; + maybe_free_extent_rec(extent_cache, rec); + return 0; } -/* - * Insert the missing inode item and inode ref. - * - * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref * dir. - * Root dir should be handled specially because root dir is the root of fs. - * - * returns err (>0 or 0) after repair - */ -static int repair_fs_first_inode(struct btrfs_root *root, int err) +static int add_pending(struct cache_tree *pending, + struct cache_tree *seen, u64 bytenr, u32 size) { - struct btrfs_trans_handle *trans; - struct btrfs_key key; - struct btrfs_path path; - int filetype = BTRFS_FT_DIR; - int ret = 0; - - btrfs_init_path(&path); - - if (err & INODE_REF_MISSING) { - key.objectid = BTRFS_FIRST_FREE_OBJECTID; - key.type = BTRFS_INODE_REF_KEY; - key.offset = BTRFS_FIRST_FREE_OBJECTID; + int ret; + ret = add_cache_extent(seen, bytenr, size); + if (ret) + return ret; + add_cache_extent(pending, bytenr, size); + return 0; +} - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - goto out; - } +static int pick_next_pending(struct cache_tree *pending, + struct cache_tree *reada, + struct cache_tree *nodes, + u64 last, struct block_info *bits, int bits_nr, + int *reada_bits) +{ + unsigned long node_start = last; + struct cache_extent *cache; + int ret; - btrfs_release_path(&path); - ret = btrfs_search_slot(trans, root, &key, &path, 1, 1); - if (ret) - goto trans_fail; + cache = search_cache_extent(reada, 0); + if (cache) { + bits[0].start = cache->start; + bits[0].size = cache->size; + *reada_bits = 1; + return 1; + } + *reada_bits = 0; + if (node_start > 32768) + node_start -= 32768; - ret = btrfs_insert_inode_ref(trans, root, "..", 2, - BTRFS_FIRST_FREE_OBJECTID, - BTRFS_FIRST_FREE_OBJECTID, 0); - if (ret) - goto trans_fail; + cache = search_cache_extent(nodes, node_start); + if (!cache) + cache = search_cache_extent(nodes, 0); - printf("Add INODE_REF[%llu %llu] name %s\n", - BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID, - ".."); - err &= ~INODE_REF_MISSING; -trans_fail: - if (ret) - error("fail to insert first inode's ref"); - btrfs_commit_transaction(trans, root); + if (!cache) { + cache = search_cache_extent(pending, 0); + if (!cache) + return 0; + ret = 0; + do { + bits[ret].start = cache->start; + bits[ret].size = cache->size; + cache = next_cache_extent(cache); + ret++; + } while (cache && ret < bits_nr); + return ret; } - if (err & INODE_ITEM_MISSING) { - ret = repair_inode_item_missing(root, - BTRFS_FIRST_FREE_OBJECTID, filetype); - if (ret) - goto out; - err &= ~INODE_ITEM_MISSING; + ret = 0; + do { + bits[ret].start = cache->start; + bits[ret].size = cache->size; + cache = next_cache_extent(cache); + ret++; + } while (cache && ret < bits_nr); + + if (bits_nr - ret > 8) { + u64 lookup = bits[0].start + bits[0].size; + struct cache_extent *next; + next = search_cache_extent(pending, lookup); + while(next) { + if (next->start - lookup > 32768) + break; + bits[ret].start = next->start; + bits[ret].size = next->size; + lookup = next->start + next->size; + ret++; + if (ret == bits_nr) + break; + next = next_cache_extent(next); + if (!next) + break; + } } -out: - if (ret) - error("fail to repair first inode"); - btrfs_release_path(&path); - return err; + return ret; } -/* - * check first root dir's inode_item and inode_ref - * - * returns 0 means no error - * returns >0 means error - * returns <0 means fatal error - */ -static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref) +static void free_chunk_record(struct cache_extent *cache) { - struct btrfs_path path; - struct btrfs_key key; - struct btrfs_inode_item *ii; - u64 index; - u32 mode; - int err = 0; - int ret; + struct chunk_record *rec; - key.objectid = BTRFS_FIRST_FREE_OBJECTID; - key.type = BTRFS_INODE_ITEM_KEY; - key.offset = 0; + rec = container_of(cache, struct chunk_record, cache); + list_del_init(&rec->list); + list_del_init(&rec->dextents); + free(rec); +} - /* For root being dropped, we don't need to check first inode */ - if (btrfs_root_refs(&root->root_item) == 0 && - btrfs_disk_key_objectid(&root->root_item.drop_progress) >= - BTRFS_FIRST_FREE_OBJECTID) - return 0; +void free_chunk_cache_tree(struct cache_tree *chunk_cache) +{ + cache_tree_free_extents(chunk_cache, free_chunk_record); +} - btrfs_init_path(&path); - ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); - if (ret < 0) - goto out; - if (ret > 0) { - ret = 0; - err |= INODE_ITEM_MISSING; - } else { - ii = btrfs_item_ptr(path.nodes[0], path.slots[0], - struct btrfs_inode_item); - mode = btrfs_inode_mode(path.nodes[0], ii); - if (imode_to_type(mode) != BTRFS_FT_DIR) - err |= INODE_ITEM_MISMATCH; - } +static void free_device_record(struct rb_node *node) +{ + struct device_record *rec; - /* lookup first inode ref */ - key.offset = BTRFS_FIRST_FREE_OBJECTID; - key.type = BTRFS_INODE_REF_KEY; - /* special index value */ - index = 0; + rec = container_of(node, struct device_record, node); + free(rec); +} - ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref); - if (ret < 0) - goto out; - err |= ret; +FREE_RB_BASED_TREE(device_cache, free_device_record); -out: - btrfs_release_path(&path); +int insert_block_group_record(struct block_group_tree *tree, + struct block_group_record *bg_rec) +{ + int ret; + + ret = insert_cache_extent(&tree->tree, &bg_rec->cache); + if (ret) + return ret; - if (err && repair) - err = repair_fs_first_inode(root, err); + list_add_tail(&bg_rec->list, &tree->block_groups); + return 0; +} - if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) - error("root dir INODE_ITEM is %s", - err & INODE_ITEM_MISMATCH ? "mismatch" : "missing"); - if (err & INODE_REF_MISSING) - error("root dir INODE_REF is missing"); +static void free_block_group_record(struct cache_extent *cache) +{ + struct block_group_record *rec; - return ret < 0 ? ret : err; + rec = container_of(cache, struct block_group_record, cache); + list_del_init(&rec->list); + free(rec); } -static struct tree_backref *find_tree_backref(struct extent_record *rec, - u64 parent, u64 root) +void free_block_group_tree(struct block_group_tree *tree) { - struct rb_node *node; - struct tree_backref *back = NULL; - struct tree_backref match = { - .node = { - .is_data = 0, - }, - }; + cache_tree_free_extents(&tree->tree, free_block_group_record); +} - if (parent) { - match.parent = parent; - match.node.full_backref = 1; - } else { - match.root = root; - } +int insert_device_extent_record(struct device_extent_tree *tree, + struct device_extent_record *de_rec) +{ + int ret; - node = rb_search(&rec->backref_tree, &match.node.node, - (rb_compare_keys)compare_extent_backref, NULL); - if (node) - back = to_tree_backref(rb_node_to_extent_backref(node)); + /* + * Device extent is a bit different from the other extents, because + * the extents which belong to the different devices may have the + * same start and size, so we need use the special extent cache + * search/insert functions. + */ + ret = insert_cache_extent2(&tree->tree, &de_rec->cache); + if (ret) + return ret; - return back; + list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans); + list_add_tail(&de_rec->device_list, &tree->no_device_orphans); + return 0; } -static struct data_backref *find_data_backref(struct extent_record *rec, - u64 parent, u64 root, - u64 owner, u64 offset, - int found_ref, - u64 disk_bytenr, u64 bytes) +static void free_device_extent_record(struct cache_extent *cache) { - struct rb_node *node; - struct data_backref *back = NULL; - struct data_backref match = { - .node = { - .is_data = 1, - }, - .owner = owner, - .offset = offset, - .bytes = bytes, - .found_ref = found_ref, - .disk_bytenr = disk_bytenr, - }; - - if (parent) { - match.parent = parent; - match.node.full_backref = 1; - } else { - match.root = root; - } + struct device_extent_record *rec; - node = rb_search(&rec->backref_tree, &match.node.node, - (rb_compare_keys)compare_extent_backref, NULL); - if (node) - back = to_data_backref(rb_node_to_extent_backref(node)); + rec = container_of(cache, struct device_extent_record, cache); + if (!list_empty(&rec->chunk_list)) + list_del_init(&rec->chunk_list); + if (!list_empty(&rec->device_list)) + list_del_init(&rec->device_list); + free(rec); +} - return back; +void free_device_extent_tree(struct device_extent_tree *tree) +{ + cache_tree_free_extents(&tree->tree, free_device_extent_record); } -/* - * This function calls walk_down_tree_v2 and walk_up_tree_v2 to check tree - * blocks and integrity of fs tree items. - * - * @root: the root of the tree to be checked. - * @ext_ref feature EXTENDED_IREF is enable or not. - * @account if NOT 0 means check the tree (including tree)'s treeblocks. - * otherwise means check fs tree(s) items relationship and - * @root MUST be a fs tree root. - * Returns 0 represents OK. - * Returns not 0 represents error. - */ -static int check_btrfs_root(struct btrfs_trans_handle *trans, - struct btrfs_root *root, unsigned int ext_ref, - int check_all) +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 +static int process_extent_ref_v0(struct cache_tree *extent_cache, + struct extent_buffer *leaf, int slot) { - struct btrfs_path path; - struct node_refs nrefs; - struct btrfs_root_item *root_item = &root->root_item; + struct btrfs_extent_ref_v0 *ref0; + struct btrfs_key key; int ret; - int level; - int err = 0; - memset(&nrefs, 0, sizeof(nrefs)); - if (!check_all) { - /* - * We need to manually check the first inode item (256) - * As the following traversal function will only start from - * the first inode item in the leaf, if inode item (256) is - * missing we will skip it forever. - */ - ret = check_fs_first_inode(root, ext_ref); - if (ret < 0) - return ret; + btrfs_item_key_to_cpu(leaf, &key, slot); + ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0); + if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) { + ret = add_tree_backref(extent_cache, key.objectid, key.offset, + 0, 0); + } else { + ret = add_data_backref(extent_cache, key.objectid, key.offset, + 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0); } + return ret; +} +#endif +struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf, + struct btrfs_key *key, + int slot) +{ + struct btrfs_chunk *ptr; + struct chunk_record *rec; + int num_stripes, i; - level = btrfs_header_level(root->node); - btrfs_init_path(&path); - - if (btrfs_root_refs(root_item) > 0 || - btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { - path.nodes[level] = root->node; - path.slots[level] = 0; - extent_buffer_get(root->node); - } else { - struct btrfs_key key; + ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); + num_stripes = btrfs_chunk_num_stripes(leaf, ptr); - btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); - level = root_item->drop_level; - path.lowest_level = level; - ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); - if (ret < 0) - goto out; - ret = 0; + rec = calloc(1, btrfs_chunk_record_size(num_stripes)); + if (!rec) { + fprintf(stderr, "memory allocation failed\n"); + exit(-1); } - while (1) { - ret = walk_down_tree_v2(trans, root, &path, &level, &nrefs, - ext_ref, check_all); + INIT_LIST_HEAD(&rec->list); + INIT_LIST_HEAD(&rec->dextents); + rec->bg_rec = NULL; - err |= !!ret; + rec->cache.start = key->offset; + rec->cache.size = btrfs_chunk_length(leaf, ptr); - /* if ret is negative, walk shall stop */ - if (ret < 0) { - ret = err; - break; - } + rec->generation = btrfs_header_generation(leaf); - ret = walk_up_tree_v2(root, &path, &level); - if (ret != 0) { - /* Normal exit, reset ret to err */ - ret = err; - break; - } + rec->objectid = key->objectid; + rec->type = key->type; + rec->offset = key->offset; + + rec->length = rec->cache.size; + rec->owner = btrfs_chunk_owner(leaf, ptr); + rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr); + rec->type_flags = btrfs_chunk_type(leaf, ptr); + rec->io_width = btrfs_chunk_io_width(leaf, ptr); + rec->io_align = btrfs_chunk_io_align(leaf, ptr); + rec->sector_size = btrfs_chunk_sector_size(leaf, ptr); + rec->num_stripes = num_stripes; + rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr); + + for (i = 0; i < rec->num_stripes; ++i) { + rec->stripes[i].devid = + btrfs_stripe_devid_nr(leaf, ptr, i); + rec->stripes[i].offset = + btrfs_stripe_offset_nr(leaf, ptr, i); + read_extent_buffer(leaf, rec->stripes[i].dev_uuid, + (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i), + BTRFS_UUID_SIZE); } -out: - btrfs_release_path(&path); - return ret; + return rec; } -/* - * Iterate all items in the tree and call check_inode_item() to check. - * - * @root: the root of the tree to be checked. - * @ext_ref: the EXTENDED_IREF feature - * - * Return 0 if no error found. - * Return <0 for error. - */ -static int check_fs_root_v2(struct btrfs_root *root, unsigned int ext_ref) +static int process_chunk_item(struct cache_tree *chunk_cache, + struct btrfs_key *key, struct extent_buffer *eb, + int slot) { - reset_cached_block_groups(root->fs_info); - return check_btrfs_root(NULL, root, ext_ref, 0); + struct chunk_record *rec; + struct btrfs_chunk *chunk; + int ret = 0; + + chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk); + /* + * Do extra check for this chunk item, + * + * It's still possible one can craft a leaf with CHUNK_ITEM, with + * wrong onwer(3) out of chunk tree, to pass both chunk tree check + * and owner<->key_type check. + */ + ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot, + key->offset); + if (ret < 0) { + error("chunk(%llu, %llu) is not valid, ignore it", + key->offset, btrfs_chunk_length(eb, chunk)); + return 0; + } + rec = btrfs_new_chunk_record(eb, key, slot); + ret = insert_cache_extent(chunk_cache, &rec->cache); + if (ret) { + fprintf(stderr, "Chunk[%llu, %llu] existed.\n", + rec->offset, rec->length); + free(rec); + } + + return ret; } -/* - * Find the relative ref for root_ref and root_backref. - * - * @root: the root of the root tree. - * @ref_key: the key of the root ref. - * - * Return 0 if no error occurred. - */ -static int check_root_ref(struct btrfs_root *root, struct btrfs_key *ref_key, - struct extent_buffer *node, int slot) +static int process_device_item(struct rb_root *dev_cache, + struct btrfs_key *key, struct extent_buffer *eb, int slot) { - struct btrfs_path path; - struct btrfs_key key; - struct btrfs_root_ref *ref; - struct btrfs_root_ref *backref; - char ref_name[BTRFS_NAME_LEN] = {0}; - char backref_name[BTRFS_NAME_LEN] = {0}; - u64 ref_dirid; - u64 ref_seq; - u32 ref_namelen; - u64 backref_dirid; - u64 backref_seq; - u32 backref_namelen; - u32 len; - int ret; - int err = 0; + struct btrfs_dev_item *ptr; + struct device_record *rec; + int ret = 0; - ref = btrfs_item_ptr(node, slot, struct btrfs_root_ref); - ref_dirid = btrfs_root_ref_dirid(node, ref); - ref_seq = btrfs_root_ref_sequence(node, ref); - ref_namelen = btrfs_root_ref_name_len(node, ref); + ptr = btrfs_item_ptr(eb, + slot, struct btrfs_dev_item); - if (ref_namelen <= BTRFS_NAME_LEN) { - len = ref_namelen; - } else { - len = BTRFS_NAME_LEN; - warning("%s[%llu %llu] ref_name too long", - ref_key->type == BTRFS_ROOT_REF_KEY ? - "ROOT_REF" : "ROOT_BACKREF", ref_key->objectid, - ref_key->offset); + rec = malloc(sizeof(*rec)); + if (!rec) { + fprintf(stderr, "memory allocation failed\n"); + return -ENOMEM; } - read_extent_buffer(node, ref_name, (unsigned long)(ref + 1), len); - /* Find relative root_ref */ - key.objectid = ref_key->offset; - key.type = BTRFS_ROOT_BACKREF_KEY + BTRFS_ROOT_REF_KEY - ref_key->type; - key.offset = ref_key->objectid; + rec->devid = key->offset; + rec->generation = btrfs_header_generation(eb); - btrfs_init_path(&path); - ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); - if (ret) { - err |= ROOT_REF_MISSING; - error("%s[%llu %llu] couldn't find relative ref", - ref_key->type == BTRFS_ROOT_REF_KEY ? - "ROOT_REF" : "ROOT_BACKREF", - ref_key->objectid, ref_key->offset); - goto out; - } + rec->objectid = key->objectid; + rec->type = key->type; + rec->offset = key->offset; - backref = btrfs_item_ptr(path.nodes[0], path.slots[0], - struct btrfs_root_ref); - backref_dirid = btrfs_root_ref_dirid(path.nodes[0], backref); - backref_seq = btrfs_root_ref_sequence(path.nodes[0], backref); - backref_namelen = btrfs_root_ref_name_len(path.nodes[0], backref); + rec->devid = btrfs_device_id(eb, ptr); + rec->total_byte = btrfs_device_total_bytes(eb, ptr); + rec->byte_used = btrfs_device_bytes_used(eb, ptr); - if (backref_namelen <= BTRFS_NAME_LEN) { - len = backref_namelen; - } else { - len = BTRFS_NAME_LEN; - warning("%s[%llu %llu] ref_name too long", - key.type == BTRFS_ROOT_REF_KEY ? - "ROOT_REF" : "ROOT_BACKREF", - key.objectid, key.offset); - } - read_extent_buffer(path.nodes[0], backref_name, - (unsigned long)(backref + 1), len); - - if (ref_dirid != backref_dirid || ref_seq != backref_seq || - ref_namelen != backref_namelen || - strncmp(ref_name, backref_name, len)) { - err |= ROOT_REF_MISMATCH; - error("%s[%llu %llu] mismatch relative ref", - ref_key->type == BTRFS_ROOT_REF_KEY ? - "ROOT_REF" : "ROOT_BACKREF", - ref_key->objectid, ref_key->offset); + ret = rb_insert(dev_cache, &rec->node, device_record_compare); + if (ret) { + fprintf(stderr, "Device[%llu] existed.\n", rec->devid); + free(rec); } -out: - btrfs_release_path(&path); - return err; + + return ret; } -/* - * Check all fs/file tree in low_memory mode. - * - * 1. for fs tree root item, call check_fs_root_v2() - * 2. for fs tree root ref/backref, call check_root_ref() - * - * Return 0 if no error occurred. - */ -static int check_fs_roots_v2(struct btrfs_fs_info *fs_info) +struct block_group_record * +btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key, + int slot) { - struct btrfs_root *tree_root = fs_info->tree_root; - struct btrfs_root *cur_root = NULL; - struct btrfs_path path; - struct btrfs_key key; - struct extent_buffer *node; - unsigned int ext_ref; - int slot; - int ret; - int err = 0; - - ext_ref = btrfs_fs_incompat(fs_info, EXTENDED_IREF); - - btrfs_init_path(&path); - key.objectid = BTRFS_FS_TREE_OBJECTID; - key.offset = 0; - key.type = BTRFS_ROOT_ITEM_KEY; + struct btrfs_block_group_item *ptr; + struct block_group_record *rec; - ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0); - if (ret < 0) { - err = ret; - goto out; - } else if (ret > 0) { - err = -ENOENT; - goto out; + rec = calloc(1, sizeof(*rec)); + if (!rec) { + fprintf(stderr, "memory allocation failed\n"); + exit(-1); } - while (1) { - node = path.nodes[0]; - slot = path.slots[0]; - btrfs_item_key_to_cpu(node, &key, slot); - if (key.objectid > BTRFS_LAST_FREE_OBJECTID) - goto out; - if (key.type == BTRFS_ROOT_ITEM_KEY && - fs_root_objectid(key.objectid)) { - if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) { - cur_root = btrfs_read_fs_root_no_cache(fs_info, - &key); - } else { - key.offset = (u64)-1; - cur_root = btrfs_read_fs_root(fs_info, &key); - } + rec->cache.start = key->objectid; + rec->cache.size = key->offset; - if (IS_ERR(cur_root)) { - error("Fail to read fs/subvol tree: %lld", - key.objectid); - err = -EIO; - goto next; - } + rec->generation = btrfs_header_generation(leaf); - ret = check_fs_root_v2(cur_root, ext_ref); - err |= ret; + rec->objectid = key->objectid; + rec->type = key->type; + rec->offset = key->offset; - if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) - btrfs_free_fs_root(cur_root); - } else if (key.type == BTRFS_ROOT_REF_KEY || - key.type == BTRFS_ROOT_BACKREF_KEY) { - ret = check_root_ref(tree_root, &key, node, slot); - err |= ret; - } -next: - ret = btrfs_next_item(tree_root, &path); - if (ret > 0) - goto out; - if (ret < 0) { - err = ret; - goto out; - } - } + ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item); + rec->flags = btrfs_disk_block_group_flags(leaf, ptr); -out: - btrfs_release_path(&path); - return err; + INIT_LIST_HEAD(&rec->list); + + return rec; } -static int do_check_fs_roots(struct btrfs_fs_info *fs_info, - struct cache_tree *root_cache) +static int process_block_group_item(struct block_group_tree *block_group_cache, + struct btrfs_key *key, + struct extent_buffer *eb, int slot) { - int ret; + struct block_group_record *rec; + int ret = 0; - if (!ctx.progress_enabled) - fprintf(stderr, "checking fs roots\n"); - if (check_mode == CHECK_MODE_LOWMEM) - ret = check_fs_roots_v2(fs_info); - else - ret = check_fs_roots(fs_info, root_cache); + rec = btrfs_new_block_group_record(eb, key, slot); + ret = insert_block_group_record(block_group_cache, rec); + if (ret) { + fprintf(stderr, "Block Group[%llu, %llu] existed.\n", + rec->objectid, rec->offset); + free(rec); + } return ret; } -static int all_backpointers_checked(struct extent_record *rec, int print_errs) +struct device_extent_record * +btrfs_new_device_extent_record(struct extent_buffer *leaf, + struct btrfs_key *key, int slot) { - struct extent_backref *back, *tmp; - struct tree_backref *tback; - struct data_backref *dback; - u64 found = 0; - int err = 0; - - rbtree_postorder_for_each_entry_safe(back, tmp, - &rec->backref_tree, node) { - if (!back->found_extent_tree) { - err = 1; - if (!print_errs) - goto out; - if (back->is_data) { - dback = to_data_backref(back); - fprintf(stderr, "Data backref %llu %s %llu" - " owner %llu offset %llu num_refs %lu" - " not found in extent tree\n", - (unsigned long long)rec->start, - back->full_backref ? - "parent" : "root", - back->full_backref ? - (unsigned long long)dback->parent: - (unsigned long long)dback->root, - (unsigned long long)dback->owner, - (unsigned long long)dback->offset, - (unsigned long)dback->num_refs); - } else { - tback = to_tree_backref(back); - fprintf(stderr, "Tree backref %llu parent %llu" - " root %llu not found in extent tree\n", - (unsigned long long)rec->start, - (unsigned long long)tback->parent, - (unsigned long long)tback->root); - } - } - if (!back->is_data && !back->found_ref) { - err = 1; - if (!print_errs) - goto out; - tback = to_tree_backref(back); - fprintf(stderr, "Backref %llu %s %llu not referenced back %p\n", - (unsigned long long)rec->start, - back->full_backref ? "parent" : "root", - back->full_backref ? - (unsigned long long)tback->parent : - (unsigned long long)tback->root, back); - } - if (back->is_data) { - dback = to_data_backref(back); - if (dback->found_ref != dback->num_refs) { - err = 1; - if (!print_errs) - goto out; - fprintf(stderr, "Incorrect local backref count" - " on %llu %s %llu owner %llu" - " offset %llu found %u wanted %u back %p\n", - (unsigned long long)rec->start, - back->full_backref ? - "parent" : "root", - back->full_backref ? - (unsigned long long)dback->parent: - (unsigned long long)dback->root, - (unsigned long long)dback->owner, - (unsigned long long)dback->offset, - dback->found_ref, dback->num_refs, back); - } - if (dback->disk_bytenr != rec->start) { - err = 1; - if (!print_errs) - goto out; - fprintf(stderr, "Backref disk bytenr does not" - " match extent record, bytenr=%llu, " - "ref bytenr=%llu\n", - (unsigned long long)rec->start, - (unsigned long long)dback->disk_bytenr); - } + struct device_extent_record *rec; + struct btrfs_dev_extent *ptr; - if (dback->bytes != rec->nr) { - err = 1; - if (!print_errs) - goto out; - fprintf(stderr, "Backref bytes do not match " - "extent backref, bytenr=%llu, ref " - "bytes=%llu, backref bytes=%llu\n", - (unsigned long long)rec->start, - (unsigned long long)rec->nr, - (unsigned long long)dback->bytes); - } - } - if (!back->is_data) { - found += 1; - } else { - dback = to_data_backref(back); - found += dback->found_ref; - } - } - if (found != rec->refs) { - err = 1; - if (!print_errs) - goto out; - fprintf(stderr, "Incorrect global backref count " - "on %llu found %llu wanted %llu\n", - (unsigned long long)rec->start, - (unsigned long long)found, - (unsigned long long)rec->refs); + rec = calloc(1, sizeof(*rec)); + if (!rec) { + fprintf(stderr, "memory allocation failed\n"); + exit(-1); } -out: - return err; -} -static void __free_one_backref(struct rb_node *node) -{ - struct extent_backref *back = rb_node_to_extent_backref(node); + rec->cache.objectid = key->objectid; + rec->cache.start = key->offset; - free(back); -} + rec->generation = btrfs_header_generation(leaf); -static void free_all_extent_backrefs(struct extent_record *rec) -{ - rb_free_nodes(&rec->backref_tree, __free_one_backref); -} + rec->objectid = key->objectid; + rec->type = key->type; + rec->offset = key->offset; -static void free_extent_record_cache(struct cache_tree *extent_cache) -{ - struct cache_extent *cache; - struct extent_record *rec; + ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent); + rec->chunk_objecteid = + btrfs_dev_extent_chunk_objectid(leaf, ptr); + rec->chunk_offset = + btrfs_dev_extent_chunk_offset(leaf, ptr); + rec->length = btrfs_dev_extent_length(leaf, ptr); + rec->cache.size = rec->length; - while (1) { - cache = first_cache_extent(extent_cache); - if (!cache) - break; - rec = container_of(cache, struct extent_record, cache); - remove_cache_extent(extent_cache, cache); - free_all_extent_backrefs(rec); - free(rec); - } + INIT_LIST_HEAD(&rec->chunk_list); + INIT_LIST_HEAD(&rec->device_list); + + return rec; } -static int maybe_free_extent_rec(struct cache_tree *extent_cache, - struct extent_record *rec) +static int +process_device_extent_item(struct device_extent_tree *dev_extent_cache, + struct btrfs_key *key, struct extent_buffer *eb, + int slot) { - if (rec->content_checked && rec->owner_ref_checked && - rec->extent_item_refs == rec->refs && rec->refs > 0 && - rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) && - !rec->bad_full_backref && !rec->crossing_stripes && - !rec->wrong_chunk_type) { - remove_cache_extent(extent_cache, &rec->cache); - free_all_extent_backrefs(rec); - list_del_init(&rec->list); + struct device_extent_record *rec; + int ret; + + rec = btrfs_new_device_extent_record(eb, key, slot); + ret = insert_device_extent_record(dev_extent_cache, rec); + if (ret) { + fprintf(stderr, + "Device extent[%llu, %llu, %llu] existed.\n", + rec->objectid, rec->offset, rec->length); free(rec); } - return 0; + + return ret; } -static int check_owner_ref(struct btrfs_root *root, - struct extent_record *rec, - struct extent_buffer *buf) +static int process_extent_item(struct btrfs_root *root, + struct cache_tree *extent_cache, + struct extent_buffer *eb, int slot) { - struct extent_backref *node, *tmp; - struct tree_backref *back; - struct btrfs_root *ref_root; + struct btrfs_extent_item *ei; + struct btrfs_extent_inline_ref *iref; + struct btrfs_extent_data_ref *dref; + struct btrfs_shared_data_ref *sref; struct btrfs_key key; - struct btrfs_path path; - struct extent_buffer *parent; - int level; - int found = 0; + struct extent_record tmpl; + unsigned long end; + unsigned long ptr; int ret; + int type; + u32 item_size = btrfs_item_size_nr(eb, slot); + u64 refs = 0; + u64 offset; + u64 num_bytes; + int metadata = 0; - rbtree_postorder_for_each_entry_safe(node, tmp, - &rec->backref_tree, node) { - if (node->is_data) - continue; - if (!node->found_ref) - continue; - if (node->full_backref) - continue; - back = to_tree_backref(node); - if (btrfs_header_owner(buf) == back->root) - return 0; + btrfs_item_key_to_cpu(eb, &key, slot); + + if (key.type == BTRFS_METADATA_ITEM_KEY) { + metadata = 1; + num_bytes = root->fs_info->nodesize; + } else { + num_bytes = key.offset; } - BUG_ON(rec->is_root); - /* try to find the block by search corresponding fs tree */ - key.objectid = btrfs_header_owner(buf); - key.type = BTRFS_ROOT_ITEM_KEY; - key.offset = (u64)-1; + if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) { + error("ignoring invalid extent, bytenr %llu is not aligned to %u", + key.objectid, root->fs_info->sectorsize); + return -EIO; + } + if (item_size < sizeof(*ei)) { +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 + struct btrfs_extent_item_v0 *ei0; + if (item_size != sizeof(*ei0)) { + error( + "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d", + key.objectid, key.type, key.offset, + btrfs_header_bytenr(eb), slot); + BUG(); + } + ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0); + refs = btrfs_extent_refs_v0(eb, ei0); +#else + BUG(); +#endif + memset(&tmpl, 0, sizeof(tmpl)); + tmpl.start = key.objectid; + tmpl.nr = num_bytes; + tmpl.extent_item_refs = refs; + tmpl.metadata = metadata; + tmpl.found_rec = 1; + tmpl.max_size = num_bytes; - ref_root = btrfs_read_fs_root(root->fs_info, &key); - if (IS_ERR(ref_root)) - return 1; + return add_extent_rec(extent_cache, &tmpl); + } - level = btrfs_header_level(buf); - if (level == 0) - btrfs_item_key_to_cpu(buf, &key, 0); + ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item); + refs = btrfs_extent_refs(eb, ei); + if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK) + metadata = 1; else - btrfs_node_key_to_cpu(buf, &key, 0); + metadata = 0; + if (metadata && num_bytes != root->fs_info->nodesize) { + error("ignore invalid metadata extent, length %llu does not equal to %u", + num_bytes, root->fs_info->nodesize); + return -EIO; + } + if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) { + error("ignore invalid data extent, length %llu is not aligned to %u", + num_bytes, root->fs_info->sectorsize); + return -EIO; + } - btrfs_init_path(&path); - path.lowest_level = level + 1; - ret = btrfs_search_slot(NULL, ref_root, &key, &path, 0, 0); - if (ret < 0) - return 0; + memset(&tmpl, 0, sizeof(tmpl)); + tmpl.start = key.objectid; + tmpl.nr = num_bytes; + tmpl.extent_item_refs = refs; + tmpl.metadata = metadata; + tmpl.found_rec = 1; + tmpl.max_size = num_bytes; + add_extent_rec(extent_cache, &tmpl); - parent = path.nodes[level + 1]; - if (parent && buf->start == btrfs_node_blockptr(parent, - path.slots[level + 1])) - found = 1; + ptr = (unsigned long)(ei + 1); + if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK && + key.type == BTRFS_EXTENT_ITEM_KEY) + ptr += sizeof(struct btrfs_tree_block_info); - btrfs_release_path(&path); - return found ? 0 : 1; + end = (unsigned long)ei + item_size; + while (ptr < end) { + iref = (struct btrfs_extent_inline_ref *)ptr; + type = btrfs_extent_inline_ref_type(eb, iref); + offset = btrfs_extent_inline_ref_offset(eb, iref); + switch (type) { + case BTRFS_TREE_BLOCK_REF_KEY: + ret = add_tree_backref(extent_cache, key.objectid, + 0, offset, 0); + if (ret < 0) + error( + "add_tree_backref failed (extent items tree block): %s", + strerror(-ret)); + break; + case BTRFS_SHARED_BLOCK_REF_KEY: + ret = add_tree_backref(extent_cache, key.objectid, + offset, 0, 0); + if (ret < 0) + error( + "add_tree_backref failed (extent items shared block): %s", + strerror(-ret)); + break; + case BTRFS_EXTENT_DATA_REF_KEY: + dref = (struct btrfs_extent_data_ref *)(&iref->offset); + add_data_backref(extent_cache, key.objectid, 0, + btrfs_extent_data_ref_root(eb, dref), + btrfs_extent_data_ref_objectid(eb, + dref), + btrfs_extent_data_ref_offset(eb, dref), + btrfs_extent_data_ref_count(eb, dref), + 0, num_bytes); + break; + case BTRFS_SHARED_DATA_REF_KEY: + sref = (struct btrfs_shared_data_ref *)(iref + 1); + add_data_backref(extent_cache, key.objectid, offset, + 0, 0, 0, + btrfs_shared_data_ref_count(eb, sref), + 0, num_bytes); + break; + default: + fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n", + key.objectid, key.type, num_bytes); + goto out; + } + ptr += btrfs_extent_inline_ref_size(type); + } + WARN_ON(ptr > end); +out: + return 0; } -static int is_extent_tree_record(struct extent_record *rec) +static int check_cache_range(struct btrfs_root *root, + struct btrfs_block_group_cache *cache, + u64 offset, u64 bytes) { - struct extent_backref *node, *tmp; - struct tree_backref *back; - int is_extent = 0; + struct btrfs_free_space *entry; + u64 *logical; + u64 bytenr; + int stripe_len; + int i, nr, ret; - rbtree_postorder_for_each_entry_safe(node, tmp, - &rec->backref_tree, node) { - if (node->is_data) - return 0; - back = to_tree_backref(node); - if (node->full_backref) - return 0; - if (back->root == BTRFS_EXTENT_TREE_OBJECTID) - is_extent = 1; - } - return is_extent; -} + for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { + bytenr = btrfs_sb_offset(i); + ret = btrfs_rmap_block(root->fs_info, + cache->key.objectid, bytenr, 0, + &logical, &nr, &stripe_len); + if (ret) + return ret; + + while (nr--) { + if (logical[nr] + stripe_len <= offset) + continue; + if (offset + bytes <= logical[nr]) + continue; + if (logical[nr] == offset) { + if (stripe_len >= bytes) { + free(logical); + return 0; + } + bytes -= stripe_len; + offset += stripe_len; + } else if (logical[nr] < offset) { + if (logical[nr] + stripe_len >= + offset + bytes) { + free(logical); + return 0; + } + bytes = (offset + bytes) - + (logical[nr] + stripe_len); + offset = logical[nr] + stripe_len; + } else { + /* + * Could be tricky, the super may land in the + * middle of the area we're checking. First + * check the easiest case, it's at the end. + */ + if (logical[nr] + stripe_len >= + bytes + offset) { + bytes = logical[nr] - offset; + continue; + } + /* Check the left side */ + ret = check_cache_range(root, cache, + offset, + logical[nr] - offset); + if (ret) { + free(logical); + return ret; + } -static int record_bad_block_io(struct btrfs_fs_info *info, - struct cache_tree *extent_cache, - u64 start, u64 len) -{ - struct extent_record *rec; - struct cache_extent *cache; - struct btrfs_key key; + /* Now we continue with the right side */ + bytes = (offset + bytes) - + (logical[nr] + stripe_len); + offset = logical[nr] + stripe_len; + } + } - cache = lookup_cache_extent(extent_cache, start, len); - if (!cache) - return 0; + free(logical); + } - rec = container_of(cache, struct extent_record, cache); - if (!is_extent_tree_record(rec)) - return 0; + entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes); + if (!entry) { + fprintf(stderr, "There is no free space entry for %Lu-%Lu\n", + offset, offset+bytes); + return -EINVAL; + } - btrfs_disk_key_to_cpu(&key, &rec->parent_key); - return btrfs_add_corrupt_extent_record(info, &key, start, len, 0); + if (entry->offset != offset) { + fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset, + entry->offset); + return -EINVAL; + } + + if (entry->bytes != bytes) { + fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n", + bytes, entry->bytes, offset); + return -EINVAL; + } + + unlink_free_space(cache->free_space_ctl, entry); + free(entry); + return 0; } -static int swap_values(struct btrfs_root *root, struct btrfs_path *path, - struct extent_buffer *buf, int slot) +static int verify_space_cache(struct btrfs_root *root, + struct btrfs_block_group_cache *cache) { - if (btrfs_header_level(buf)) { - struct btrfs_key_ptr ptr1, ptr2; + struct btrfs_path path; + struct extent_buffer *leaf; + struct btrfs_key key; + u64 last; + int ret = 0; - read_extent_buffer(buf, &ptr1, btrfs_node_key_ptr_offset(slot), - sizeof(struct btrfs_key_ptr)); - read_extent_buffer(buf, &ptr2, - btrfs_node_key_ptr_offset(slot + 1), - sizeof(struct btrfs_key_ptr)); - write_extent_buffer(buf, &ptr1, - btrfs_node_key_ptr_offset(slot + 1), - sizeof(struct btrfs_key_ptr)); - write_extent_buffer(buf, &ptr2, - btrfs_node_key_ptr_offset(slot), - sizeof(struct btrfs_key_ptr)); - if (slot == 0) { - struct btrfs_disk_key key; - btrfs_node_key(buf, &key, 0); - btrfs_fixup_low_keys(root, path, &key, - btrfs_header_level(buf) + 1); - } - } else { - struct btrfs_item *item1, *item2; - struct btrfs_key k1, k2; - char *item1_data, *item2_data; - u32 item1_offset, item2_offset, item1_size, item2_size; + root = root->fs_info->extent_root; - item1 = btrfs_item_nr(slot); - item2 = btrfs_item_nr(slot + 1); - btrfs_item_key_to_cpu(buf, &k1, slot); - btrfs_item_key_to_cpu(buf, &k2, slot + 1); - item1_offset = btrfs_item_offset(buf, item1); - item2_offset = btrfs_item_offset(buf, item2); - item1_size = btrfs_item_size(buf, item1); - item2_size = btrfs_item_size(buf, item2); + last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET); - item1_data = malloc(item1_size); - if (!item1_data) - return -ENOMEM; - item2_data = malloc(item2_size); - if (!item2_data) { - free(item1_data); - return -ENOMEM; + btrfs_init_path(&path); + key.objectid = last; + key.offset = 0; + key.type = BTRFS_EXTENT_ITEM_KEY; + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) + goto out; + ret = 0; + while (1) { + if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) { + ret = btrfs_next_leaf(root, &path); + if (ret < 0) + goto out; + if (ret > 0) { + ret = 0; + break; + } + } + leaf = path.nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path.slots[0]); + if (key.objectid >= cache->key.offset + cache->key.objectid) + break; + if (key.type != BTRFS_EXTENT_ITEM_KEY && + key.type != BTRFS_METADATA_ITEM_KEY) { + path.slots[0]++; + continue; } - read_extent_buffer(buf, item1_data, item1_offset, item1_size); - read_extent_buffer(buf, item2_data, item2_offset, item2_size); - - write_extent_buffer(buf, item1_data, item2_offset, item2_size); - write_extent_buffer(buf, item2_data, item1_offset, item1_size); - free(item1_data); - free(item2_data); - - btrfs_set_item_offset(buf, item1, item2_offset); - btrfs_set_item_offset(buf, item2, item1_offset); - btrfs_set_item_size(buf, item1, item2_size); - btrfs_set_item_size(buf, item2, item1_size); + if (last == key.objectid) { + if (key.type == BTRFS_EXTENT_ITEM_KEY) + last = key.objectid + key.offset; + else + last = key.objectid + root->fs_info->nodesize; + path.slots[0]++; + continue; + } - path->slots[0] = slot; - btrfs_set_item_key_unsafe(root, path, &k2); - path->slots[0] = slot + 1; - btrfs_set_item_key_unsafe(root, path, &k1); + ret = check_cache_range(root, cache, last, + key.objectid - last); + if (ret) + break; + if (key.type == BTRFS_EXTENT_ITEM_KEY) + last = key.objectid + key.offset; + else + last = key.objectid + root->fs_info->nodesize; + path.slots[0]++; } - return 0; -} -static int fix_key_order(struct btrfs_root *root, struct btrfs_path *path) -{ - struct extent_buffer *buf; - struct btrfs_key k1, k2; - int i; - int level = path->lowest_level; - int ret = -EIO; + if (last < cache->key.objectid + cache->key.offset) + ret = check_cache_range(root, cache, last, + cache->key.objectid + + cache->key.offset - last); - buf = path->nodes[level]; - for (i = 0; i < btrfs_header_nritems(buf) - 1; i++) { - if (level) { - btrfs_node_key_to_cpu(buf, &k1, i); - btrfs_node_key_to_cpu(buf, &k2, i + 1); - } else { - btrfs_item_key_to_cpu(buf, &k1, i); - btrfs_item_key_to_cpu(buf, &k2, i + 1); - } - if (btrfs_comp_cpu_keys(&k1, &k2) < 0) - continue; - ret = swap_values(root, path, buf, i); - if (ret) - break; - btrfs_mark_buffer_dirty(buf); - i = 0; +out: + btrfs_release_path(&path); + + if (!ret && + !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) { + fprintf(stderr, "There are still entries left in the space " + "cache\n"); + ret = -EINVAL; } + return ret; } -static int delete_bogus_item(struct btrfs_root *root, - struct btrfs_path *path, - struct extent_buffer *buf, int slot) +static int check_space_cache(struct btrfs_root *root) { - struct btrfs_key key; - int nritems = btrfs_header_nritems(buf); - - btrfs_item_key_to_cpu(buf, &key, slot); - - /* These are all the keys we can deal with missing. */ - if (key.type != BTRFS_DIR_INDEX_KEY && - key.type != BTRFS_EXTENT_ITEM_KEY && - key.type != BTRFS_METADATA_ITEM_KEY && - key.type != BTRFS_TREE_BLOCK_REF_KEY && - key.type != BTRFS_EXTENT_DATA_REF_KEY) - return -1; - - printf("Deleting bogus item [%llu,%u,%llu] at slot %d on block %llu\n", - (unsigned long long)key.objectid, key.type, - (unsigned long long)key.offset, slot, buf->start); - memmove_extent_buffer(buf, btrfs_item_nr_offset(slot), - btrfs_item_nr_offset(slot + 1), - sizeof(struct btrfs_item) * - (nritems - slot - 1)); - btrfs_set_header_nritems(buf, nritems - 1); - if (slot == 0) { - struct btrfs_disk_key disk_key; + struct btrfs_block_group_cache *cache; + u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE; + int ret; + int error = 0; - btrfs_item_key(buf, &disk_key, 0); - btrfs_fixup_low_keys(root, path, &disk_key, 1); + if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL && + btrfs_super_generation(root->fs_info->super_copy) != + btrfs_super_cache_generation(root->fs_info->super_copy)) { + printf("cache and super generation don't match, space cache " + "will be invalidated\n"); + return 0; } - btrfs_mark_buffer_dirty(buf); - return 0; -} -static int fix_item_offset(struct btrfs_root *root, struct btrfs_path *path) -{ - struct extent_buffer *buf; - int i; - int ret = 0; + if (ctx.progress_enabled) { + ctx.tp = TASK_FREE_SPACE; + task_start(ctx.info); + } - /* We should only get this for leaves */ - BUG_ON(path->lowest_level); - buf = path->nodes[0]; -again: - for (i = 0; i < btrfs_header_nritems(buf); i++) { - unsigned int shift = 0, offset; + while (1) { + cache = btrfs_lookup_first_block_group(root->fs_info, start); + if (!cache) + break; - if (i == 0 && btrfs_item_end_nr(buf, i) != - BTRFS_LEAF_DATA_SIZE(root->fs_info)) { - if (btrfs_item_end_nr(buf, i) > - BTRFS_LEAF_DATA_SIZE(root->fs_info)) { - ret = delete_bogus_item(root, path, buf, i); - if (!ret) - goto again; - fprintf(stderr, "item is off the end of the " - "leaf, can't fix\n"); - ret = -EIO; + start = cache->key.objectid + cache->key.offset; + if (!cache->free_space_ctl) { + if (btrfs_init_free_space_ctl(cache, + root->fs_info->sectorsize)) { + ret = -ENOMEM; break; } - shift = BTRFS_LEAF_DATA_SIZE(root->fs_info) - - btrfs_item_end_nr(buf, i); - } else if (i > 0 && btrfs_item_end_nr(buf, i) != - btrfs_item_offset_nr(buf, i - 1)) { - if (btrfs_item_end_nr(buf, i) > - btrfs_item_offset_nr(buf, i - 1)) { - ret = delete_bogus_item(root, path, buf, i); - if (!ret) - goto again; - fprintf(stderr, "items overlap, can't fix\n"); - ret = -EIO; - break; + } else { + btrfs_remove_free_space_cache(cache); + } + + if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) { + ret = exclude_super_stripes(root, cache); + if (ret) { + fprintf(stderr, "could not exclude super stripes: %s\n", + strerror(-ret)); + error++; + continue; } - shift = btrfs_item_offset_nr(buf, i - 1) - - btrfs_item_end_nr(buf, i); + ret = load_free_space_tree(root->fs_info, cache); + free_excluded_extents(root, cache); + if (ret < 0) { + fprintf(stderr, "could not load free space tree: %s\n", + strerror(-ret)); + error++; + continue; + } + error += ret; + } else { + ret = load_free_space_cache(root->fs_info, cache); + if (!ret) + continue; } - if (!shift) - continue; - printf("Shifting item nr %d by %u bytes in block %llu\n", - i, shift, (unsigned long long)buf->start); - offset = btrfs_item_offset_nr(buf, i); - memmove_extent_buffer(buf, - btrfs_leaf_data(buf) + offset + shift, - btrfs_leaf_data(buf) + offset, - btrfs_item_size_nr(buf, i)); - btrfs_set_item_offset(buf, btrfs_item_nr(i), - offset + shift); - btrfs_mark_buffer_dirty(buf); + ret = verify_space_cache(root, cache); + if (ret) { + fprintf(stderr, "cache appears valid but isn't %Lu\n", + cache->key.objectid); + error++; + } } - /* - * We may have moved things, in which case we want to exit so we don't - * write those changes out. Once we have proper abort functionality in - * progs this can be changed to something nicer. - */ - BUG_ON(ret); - return ret; + task_stop(ctx.info); + + return error ? -EINVAL : 0; } -/* - * Attempt to fix basic block failures. If we can't fix it for whatever reason - * then just return -EIO. - */ -static int try_to_fix_bad_block(struct btrfs_root *root, - struct extent_buffer *buf, - enum btrfs_tree_block_status status) -{ - struct btrfs_trans_handle *trans; - struct ulist *roots; - struct ulist_node *node; - struct btrfs_root *search_root; - struct btrfs_path path; - struct ulist_iterator iter; - struct btrfs_key root_key, key; - int ret; - - if (status != BTRFS_TREE_BLOCK_BAD_KEY_ORDER && - status != BTRFS_TREE_BLOCK_INVALID_OFFSETS) - return -EIO; +static int check_extent_csums(struct btrfs_root *root, u64 bytenr, + u64 num_bytes, unsigned long leaf_offset, + struct extent_buffer *eb) { - ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots); - if (ret) - return -EIO; + struct btrfs_fs_info *fs_info = root->fs_info; + u64 offset = 0; + u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); + char *data; + unsigned long csum_offset; + u32 csum; + u32 csum_expected; + u64 read_len; + u64 data_checked = 0; + u64 tmp; + int ret = 0; + int mirror; + int num_copies; - btrfs_init_path(&path); - ULIST_ITER_INIT(&iter); - while ((node = ulist_next(roots, &iter))) { - root_key.objectid = node->val; - root_key.type = BTRFS_ROOT_ITEM_KEY; - root_key.offset = (u64)-1; + if (num_bytes % fs_info->sectorsize) + return -EINVAL; - search_root = btrfs_read_fs_root(root->fs_info, &root_key); - if (IS_ERR(root)) { - ret = -EIO; - break; - } + data = malloc(num_bytes); + if (!data) + return -ENOMEM; + while (offset < num_bytes) { + mirror = 0; +again: + read_len = num_bytes - offset; + /* read as much space once a time */ + ret = read_extent_data(fs_info, data + offset, + bytenr + offset, &read_len, mirror); + if (ret) + goto out; + data_checked = 0; + /* verify every 4k data's checksum */ + while (data_checked < read_len) { + csum = ~(u32)0; + tmp = offset + data_checked; - trans = btrfs_start_transaction(search_root, 0); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - break; - } + csum = btrfs_csum_data((char *)data + tmp, + csum, fs_info->sectorsize); + btrfs_csum_final(csum, (u8 *)&csum); - path.lowest_level = btrfs_header_level(buf); - path.skip_check_block = 1; - if (path.lowest_level) - btrfs_node_key_to_cpu(buf, &key, 0); - else - btrfs_item_key_to_cpu(buf, &key, 0); - ret = btrfs_search_slot(trans, search_root, &key, &path, 0, 1); - if (ret) { - ret = -EIO; - btrfs_commit_transaction(trans, search_root); - break; - } - if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER) - ret = fix_key_order(search_root, &path); - else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS) - ret = fix_item_offset(search_root, &path); - if (ret) { - btrfs_commit_transaction(trans, search_root); - break; + csum_offset = leaf_offset + + tmp / fs_info->sectorsize * csum_size; + read_extent_buffer(eb, (char *)&csum_expected, + csum_offset, csum_size); + /* try another mirror */ + if (csum != csum_expected) { + fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n", + mirror, bytenr + tmp, + csum, csum_expected); + num_copies = btrfs_num_copies(root->fs_info, + bytenr, num_bytes); + if (mirror < num_copies - 1) { + mirror += 1; + goto again; + } + } + data_checked += fs_info->sectorsize; } - btrfs_release_path(&path); - btrfs_commit_transaction(trans, search_root); + offset += read_len; } - ulist_free(roots); - btrfs_release_path(&path); +out: + free(data); return ret; } -static int check_block(struct btrfs_root *root, - struct cache_tree *extent_cache, - struct extent_buffer *buf, u64 flags) +static int check_extent_exists(struct btrfs_root *root, u64 bytenr, + u64 num_bytes) { - struct extent_record *rec; - struct cache_extent *cache; + struct btrfs_path path; + struct extent_buffer *leaf; struct btrfs_key key; - enum btrfs_tree_block_status status; - int ret = 0; - int level; - - cache = lookup_cache_extent(extent_cache, buf->start, buf->len); - if (!cache) - return 1; - rec = container_of(cache, struct extent_record, cache); - rec->generation = btrfs_header_generation(buf); - - level = btrfs_header_level(buf); - if (btrfs_header_nritems(buf) > 0) { + int ret; - if (level == 0) - btrfs_item_key_to_cpu(buf, &key, 0); - else - btrfs_node_key_to_cpu(buf, &key, 0); + btrfs_init_path(&path); + key.objectid = bytenr; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = (u64)-1; - rec->info_objectid = key.objectid; +again: + ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path, + 0, 0); + if (ret < 0) { + fprintf(stderr, "Error looking up extent record %d\n", ret); + btrfs_release_path(&path); + return ret; + } else if (ret) { + if (path.slots[0] > 0) { + path.slots[0]--; + } else { + ret = btrfs_prev_leaf(root, &path); + if (ret < 0) { + goto out; + } else if (ret > 0) { + ret = 0; + goto out; + } + } } - rec->info_level = level; - if (btrfs_is_leaf(buf)) - status = btrfs_check_leaf(root, &rec->parent_key, buf); - else - status = btrfs_check_node(root, &rec->parent_key, buf); + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); - if (status != BTRFS_TREE_BLOCK_CLEAN) { - if (repair) - status = try_to_fix_bad_block(root, buf, status); - if (status != BTRFS_TREE_BLOCK_CLEAN) { - ret = -EIO; - fprintf(stderr, "bad block %llu\n", - (unsigned long long)buf->start); + /* + * Block group items come before extent items if they have the same + * bytenr, so walk back one more just in case. Dear future traveller, + * first congrats on mastering time travel. Now if it's not too much + * trouble could you go back to 2006 and tell Chris to make the + * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the + * EXTENT_ITEM_KEY please? + */ + while (key.type > BTRFS_EXTENT_ITEM_KEY) { + if (path.slots[0] > 0) { + path.slots[0]--; } else { - /* - * Signal to callers we need to start the scan over - * again since we'll have cowed blocks. - */ - ret = -EAGAIN; - } - } else { - rec->content_checked = 1; - if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) - rec->owner_ref_checked = 1; - else { - ret = check_owner_ref(root, rec, buf); - if (!ret) - rec->owner_ref_checked = 1; + ret = btrfs_prev_leaf(root, &path); + if (ret < 0) { + goto out; + } else if (ret > 0) { + ret = 0; + goto out; + } } + btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); } - if (!ret) - maybe_free_extent_rec(extent_cache, rec); - return ret; -} - -#if 0 -static struct tree_backref *find_tree_backref(struct extent_record *rec, - u64 parent, u64 root) -{ - struct list_head *cur = rec->backrefs.next; - struct extent_backref *node; - struct tree_backref *back; - while(cur != &rec->backrefs) { - node = to_extent_backref(cur); - cur = cur->next; - if (node->is_data) + while (num_bytes) { + if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) { + ret = btrfs_next_leaf(root, &path); + if (ret < 0) { + fprintf(stderr, "Error going to next leaf " + "%d\n", ret); + btrfs_release_path(&path); + return ret; + } else if (ret) { + break; + } + } + leaf = path.nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path.slots[0]); + if (key.type != BTRFS_EXTENT_ITEM_KEY) { + path.slots[0]++; continue; - back = to_tree_backref(node); - if (parent > 0) { - if (!node->full_backref) - continue; - if (parent == back->parent) - return back; - } else { - if (node->full_backref) - continue; - if (back->root == root) - return back; } - } - return NULL; -} -#endif + if (key.objectid + key.offset < bytenr) { + path.slots[0]++; + continue; + } + if (key.objectid > bytenr + num_bytes) + break; -static struct tree_backref *alloc_tree_backref(struct extent_record *rec, - u64 parent, u64 root) -{ - struct tree_backref *ref = malloc(sizeof(*ref)); - - if (!ref) - return NULL; - memset(&ref->node, 0, sizeof(ref->node)); - if (parent > 0) { - ref->parent = parent; - ref->node.full_backref = 1; - } else { - ref->root = root; - ref->node.full_backref = 0; - } - - return ref; -} - -#if 0 -static struct data_backref *find_data_backref(struct extent_record *rec, - u64 parent, u64 root, - u64 owner, u64 offset, - int found_ref, - u64 disk_bytenr, u64 bytes) -{ - struct list_head *cur = rec->backrefs.next; - struct extent_backref *node; - struct data_backref *back; - - while(cur != &rec->backrefs) { - node = to_extent_backref(cur); - cur = cur->next; - if (!node->is_data) - continue; - back = to_data_backref(node); - if (parent > 0) { - if (!node->full_backref) - continue; - if (parent == back->parent) - return back; + if (key.objectid == bytenr) { + if (key.offset >= num_bytes) { + num_bytes = 0; + break; + } + num_bytes -= key.offset; + bytenr += key.offset; + } else if (key.objectid < bytenr) { + if (key.objectid + key.offset >= bytenr + num_bytes) { + num_bytes = 0; + break; + } + num_bytes = (bytenr + num_bytes) - + (key.objectid + key.offset); + bytenr = key.objectid + key.offset; } else { - if (node->full_backref) - continue; - if (back->root == root && back->owner == owner && - back->offset == offset) { - if (found_ref && node->found_ref && - (back->bytes != bytes || - back->disk_bytenr != disk_bytenr)) - continue; - return back; + if (key.objectid + key.offset < bytenr + num_bytes) { + u64 new_start = key.objectid + key.offset; + u64 new_bytes = bytenr + num_bytes - new_start; + + /* + * Weird case, the extent is in the middle of + * our range, we'll have to search one side + * and then the other. Not sure if this happens + * in real life, but no harm in coding it up + * anyway just in case. + */ + btrfs_release_path(&path); + ret = check_extent_exists(root, new_start, + new_bytes); + if (ret) { + fprintf(stderr, "Right section didn't " + "have a record\n"); + break; + } + num_bytes = key.objectid - bytenr; + goto again; } + num_bytes = key.objectid - bytenr; } + path.slots[0]++; } - return NULL; + ret = 0; + +out: + if (num_bytes && !ret) { + fprintf(stderr, "There are no extents for csum range " + "%Lu-%Lu\n", bytenr, bytenr+num_bytes); + ret = 1; + } + + btrfs_release_path(&path); + return ret; } -#endif -static struct data_backref *alloc_data_backref(struct extent_record *rec, - u64 parent, u64 root, - u64 owner, u64 offset, - u64 max_size) +static int check_csums(struct btrfs_root *root) { - struct data_backref *ref = malloc(sizeof(*ref)); + struct btrfs_path path; + struct extent_buffer *leaf; + struct btrfs_key key; + u64 offset = 0, num_bytes = 0; + u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); + int errors = 0; + int ret; + u64 data_len; + unsigned long leaf_offset; - if (!ref) - return NULL; - memset(&ref->node, 0, sizeof(ref->node)); - ref->node.is_data = 1; + root = root->fs_info->csum_root; + if (!extent_buffer_uptodate(root->node)) { + fprintf(stderr, "No valid csum tree found\n"); + return -ENOENT; + } - if (parent > 0) { - ref->parent = parent; - ref->owner = 0; - ref->offset = 0; - ref->node.full_backref = 1; - } else { - ref->root = root; - ref->owner = owner; - ref->offset = offset; - ref->node.full_backref = 0; + btrfs_init_path(&path); + key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; + key.type = BTRFS_EXTENT_CSUM_KEY; + key.offset = 0; + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) { + fprintf(stderr, "Error searching csum tree %d\n", ret); + btrfs_release_path(&path); + return ret; } - ref->bytes = max_size; - ref->found_ref = 0; - ref->num_refs = 0; - if (max_size > rec->max_size) - rec->max_size = max_size; - return ref; -} -/* Check if the type of extent matches with its chunk */ -static void check_extent_type(struct extent_record *rec) -{ - struct btrfs_block_group_cache *bg_cache; + if (ret > 0 && path.slots[0]) + path.slots[0]--; + ret = 0; - bg_cache = btrfs_lookup_first_block_group(global_info, rec->start); - if (!bg_cache) - return; + while (1) { + if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) { + ret = btrfs_next_leaf(root, &path); + if (ret < 0) { + fprintf(stderr, "Error going to next leaf " + "%d\n", ret); + break; + } + if (ret) + break; + } + leaf = path.nodes[0]; - /* data extent, check chunk directly*/ - if (!rec->metadata) { - if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_DATA)) - rec->wrong_chunk_type = 1; - return; - } + btrfs_item_key_to_cpu(leaf, &key, path.slots[0]); + if (key.type != BTRFS_EXTENT_CSUM_KEY) { + path.slots[0]++; + continue; + } - /* metadata extent, check the obvious case first */ - if (!(bg_cache->flags & (BTRFS_BLOCK_GROUP_SYSTEM | - BTRFS_BLOCK_GROUP_METADATA))) { - rec->wrong_chunk_type = 1; - return; + data_len = (btrfs_item_size_nr(leaf, path.slots[0]) / + csum_size) * root->fs_info->sectorsize; + if (!check_data_csum) + goto skip_csum_check; + leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]); + ret = check_extent_csums(root, key.offset, data_len, + leaf_offset, leaf); + if (ret) + break; +skip_csum_check: + if (!num_bytes) { + offset = key.offset; + } else if (key.offset != offset + num_bytes) { + ret = check_extent_exists(root, offset, num_bytes); + if (ret) { + fprintf(stderr, "Csum exists for %Lu-%Lu but " + "there is no extent record\n", + offset, offset+num_bytes); + errors++; + } + offset = key.offset; + num_bytes = 0; + } + num_bytes += data_len; + path.slots[0]++; } - /* - * Check SYSTEM extent, as it's also marked as metadata, we can only - * make sure it's a SYSTEM extent by its backref - */ - if (!RB_EMPTY_ROOT(&rec->backref_tree)) { - struct extent_backref *node; - struct tree_backref *tback; - u64 bg_type; + btrfs_release_path(&path); + return errors; +} - node = rb_node_to_extent_backref(rb_first(&rec->backref_tree)); - if (node->is_data) { - /* tree block shouldn't have data backref */ - rec->wrong_chunk_type = 1; - return; +static int is_dropped_key(struct btrfs_key *key, + struct btrfs_key *drop_key) { + if (key->objectid < drop_key->objectid) + return 1; + else if (key->objectid == drop_key->objectid) { + if (key->type < drop_key->type) + return 1; + else if (key->type == drop_key->type) { + if (key->offset < drop_key->offset) + return 1; } - tback = container_of(node, struct tree_backref, node); - - if (tback->root == BTRFS_CHUNK_TREE_OBJECTID) - bg_type = BTRFS_BLOCK_GROUP_SYSTEM; - else - bg_type = BTRFS_BLOCK_GROUP_METADATA; - if (!(bg_cache->flags & bg_type)) - rec->wrong_chunk_type = 1; } + return 0; } /* - * Allocate a new extent record, fill default values from @tmpl and insert int - * @extent_cache. Caller is supposed to make sure the [start,nr) is not in - * the cache, otherwise it fails. - */ -static int add_extent_rec_nolookup(struct cache_tree *extent_cache, - struct extent_record *tmpl) -{ - struct extent_record *rec; - int ret = 0; - - BUG_ON(tmpl->max_size == 0); - rec = malloc(sizeof(*rec)); - if (!rec) - return -ENOMEM; - rec->start = tmpl->start; - rec->max_size = tmpl->max_size; - rec->nr = max(tmpl->nr, tmpl->max_size); - rec->found_rec = tmpl->found_rec; - rec->content_checked = tmpl->content_checked; - rec->owner_ref_checked = tmpl->owner_ref_checked; - rec->num_duplicates = 0; - rec->metadata = tmpl->metadata; - rec->flag_block_full_backref = FLAG_UNSET; - rec->bad_full_backref = 0; - rec->crossing_stripes = 0; - rec->wrong_chunk_type = 0; - rec->is_root = tmpl->is_root; - rec->refs = tmpl->refs; - rec->extent_item_refs = tmpl->extent_item_refs; - rec->parent_generation = tmpl->parent_generation; - INIT_LIST_HEAD(&rec->backrefs); - INIT_LIST_HEAD(&rec->dups); - INIT_LIST_HEAD(&rec->list); - rec->backref_tree = RB_ROOT; - memcpy(&rec->parent_key, &tmpl->parent_key, sizeof(tmpl->parent_key)); - rec->cache.start = tmpl->start; - rec->cache.size = tmpl->nr; - ret = insert_cache_extent(extent_cache, &rec->cache); - if (ret) { - free(rec); - return ret; - } - bytes_used += rec->nr; - - if (tmpl->metadata) - rec->crossing_stripes = check_crossing_stripes(global_info, - rec->start, global_info->nodesize); - check_extent_type(rec); - return ret; -} - -/* - * Lookup and modify an extent, some values of @tmpl are interpreted verbatim, - * some are hints: - * - refs - if found, increase refs - * - is_root - if found, set - * - content_checked - if found, set - * - owner_ref_checked - if found, set + * Here are the rules for FULL_BACKREF. * - * If not found, create a new one, initialize and insert. + * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set. + * 2) If btrfs_header_owner(buf) no longer points to buf then we have + * FULL_BACKREF set. + * 3) We cowed the block walking down a reloc tree. This is impossible to tell + * if it happened after the relocation occurred since we'll have dropped the + * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and + * have no real way to know for sure. + * + * We process the blocks one root at a time, and we start from the lowest root + * objectid and go to the highest. So we can just lookup the owner backref for + * the record and if we don't find it then we know it doesn't exist and we have + * a FULL BACKREF. + * + * FIXME: if we ever start reclaiming root objectid's then we need to fix this + * assumption and simply indicate that we _think_ that the FULL BACKREF needs to + * be set or not and then we can check later once we've gathered all the refs. */ -static int add_extent_rec(struct cache_tree *extent_cache, - struct extent_record *tmpl) +static int calc_extent_flag(struct cache_tree *extent_cache, + struct extent_buffer *buf, + struct root_item_record *ri, + u64 *flags) { struct extent_record *rec; struct cache_extent *cache; - int ret = 0; - int dup = 0; - - cache = lookup_cache_extent(extent_cache, tmpl->start, tmpl->nr); - if (cache) { - rec = container_of(cache, struct extent_record, cache); - if (tmpl->refs) - rec->refs++; - if (rec->nr == 1) - rec->nr = max(tmpl->nr, tmpl->max_size); - - /* - * We need to make sure to reset nr to whatever the extent - * record says was the real size, this way we can compare it to - * the backrefs. - */ - if (tmpl->found_rec) { - if (tmpl->start != rec->start || rec->found_rec) { - struct extent_record *tmp; + struct tree_backref *tback; + u64 owner = 0; - dup = 1; - if (list_empty(&rec->list)) - list_add_tail(&rec->list, - &duplicate_extents); + cache = lookup_cache_extent(extent_cache, buf->start, 1); + /* we have added this extent before */ + if (!cache) + return -ENOENT; - /* - * We have to do this song and dance in case we - * find an extent record that falls inside of - * our current extent record but does not have - * the same objectid. - */ - tmp = malloc(sizeof(*tmp)); - if (!tmp) - return -ENOMEM; - tmp->start = tmpl->start; - tmp->max_size = tmpl->max_size; - tmp->nr = tmpl->nr; - tmp->found_rec = 1; - tmp->metadata = tmpl->metadata; - tmp->extent_item_refs = tmpl->extent_item_refs; - INIT_LIST_HEAD(&tmp->list); - list_add_tail(&tmp->list, &rec->dups); - rec->num_duplicates++; - } else { - rec->nr = tmpl->nr; - rec->found_rec = 1; - } - } + rec = container_of(cache, struct extent_record, cache); - if (tmpl->extent_item_refs && !dup) { - if (rec->extent_item_refs) { - fprintf(stderr, "block %llu rec " - "extent_item_refs %llu, passed %llu\n", - (unsigned long long)tmpl->start, - (unsigned long long) - rec->extent_item_refs, - (unsigned long long)tmpl->extent_item_refs); - } - rec->extent_item_refs = tmpl->extent_item_refs; - } - if (tmpl->is_root) - rec->is_root = 1; - if (tmpl->content_checked) - rec->content_checked = 1; - if (tmpl->owner_ref_checked) - rec->owner_ref_checked = 1; - memcpy(&rec->parent_key, &tmpl->parent_key, - sizeof(tmpl->parent_key)); - if (tmpl->parent_generation) - rec->parent_generation = tmpl->parent_generation; - if (rec->max_size < tmpl->max_size) - rec->max_size = tmpl->max_size; + /* + * Except file/reloc tree, we can not have + * FULL BACKREF MODE + */ + if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID) + goto normal; + /* + * root node + */ + if (buf->start == ri->bytenr) + goto normal; - /* - * A metadata extent can't cross stripe_len boundary, otherwise - * kernel scrub won't be able to handle it. - * As now stripe_len is fixed to BTRFS_STRIPE_LEN, just check - * it. - */ - if (tmpl->metadata) - rec->crossing_stripes = check_crossing_stripes( - global_info, rec->start, - global_info->nodesize); - check_extent_type(rec); - maybe_free_extent_rec(extent_cache, rec); - return ret; - } + if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) + goto full_backref; - ret = add_extent_rec_nolookup(extent_cache, tmpl); + owner = btrfs_header_owner(buf); + if (owner == ri->objectid) + goto normal; - return ret; + tback = find_tree_backref(rec, 0, owner); + if (!tback) + goto full_backref; +normal: + *flags = 0; + if (rec->flag_block_full_backref != FLAG_UNSET && + rec->flag_block_full_backref != 0) + rec->bad_full_backref = 1; + return 0; +full_backref: + *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + if (rec->flag_block_full_backref != FLAG_UNSET && + rec->flag_block_full_backref != 1) + rec->bad_full_backref = 1; + return 0; } -static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr, - u64 parent, u64 root, int found_ref) +static void report_mismatch_key_root(u8 key_type, u64 rootid) { - struct extent_record *rec; - struct tree_backref *back; - struct cache_extent *cache; - int ret; - bool insert = false; + fprintf(stderr, "Invalid key type("); + print_key_type(stderr, 0, key_type); + fprintf(stderr, ") found in root("); + print_objectid(stderr, rootid, 0); + fprintf(stderr, ")\n"); +} - cache = lookup_cache_extent(extent_cache, bytenr, 1); - if (!cache) { - struct extent_record tmpl; +/* + * Check if the key is valid with its extent buffer. + * + * This is a early check in case invalid key exists in a extent buffer + * This is not comprehensive yet, but should prevent wrong key/item passed + * further + */ +static int check_type_with_root(u64 rootid, u8 key_type) +{ + switch (key_type) { + /* Only valid in chunk tree */ + case BTRFS_DEV_ITEM_KEY: + case BTRFS_CHUNK_ITEM_KEY: + if (rootid != BTRFS_CHUNK_TREE_OBJECTID) + goto err; + break; + /* valid in csum and log tree */ + case BTRFS_CSUM_TREE_OBJECTID: + if (!(rootid == BTRFS_TREE_LOG_OBJECTID || + is_fstree(rootid))) + goto err; + break; + case BTRFS_EXTENT_ITEM_KEY: + case BTRFS_METADATA_ITEM_KEY: + case BTRFS_BLOCK_GROUP_ITEM_KEY: + if (rootid != BTRFS_EXTENT_TREE_OBJECTID) + goto err; + break; + case BTRFS_ROOT_ITEM_KEY: + if (rootid != BTRFS_ROOT_TREE_OBJECTID) + goto err; + break; + case BTRFS_DEV_EXTENT_KEY: + if (rootid != BTRFS_DEV_TREE_OBJECTID) + goto err; + break; + } + return 0; +err: + report_mismatch_key_root(key_type, rootid); + return -EINVAL; +} - memset(&tmpl, 0, sizeof(tmpl)); - tmpl.start = bytenr; - tmpl.nr = 1; - tmpl.metadata = 1; - tmpl.max_size = 1; +static int run_next_block(struct btrfs_root *root, + struct block_info *bits, + int bits_nr, + u64 *last, + struct cache_tree *pending, + struct cache_tree *seen, + struct cache_tree *reada, + struct cache_tree *nodes, + struct cache_tree *extent_cache, + struct cache_tree *chunk_cache, + struct rb_root *dev_cache, + struct block_group_tree *block_group_cache, + struct device_extent_tree *dev_extent_cache, + struct root_item_record *ri) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + struct extent_buffer *buf; + struct extent_record *rec = NULL; + u64 bytenr; + u32 size; + u64 parent; + u64 owner; + u64 flags; + u64 ptr; + u64 gen = 0; + int ret = 0; + int i; + int nritems; + struct btrfs_key key; + struct cache_extent *cache; + int reada_bits; - ret = add_extent_rec_nolookup(extent_cache, &tmpl); - if (ret) - return ret; + nritems = pick_next_pending(pending, reada, nodes, *last, bits, + bits_nr, &reada_bits); + if (nritems == 0) + return 1; - /* really a bug in cache_extent implement now */ - cache = lookup_cache_extent(extent_cache, bytenr, 1); - if (!cache) - return -ENOENT; - } + if (!reada_bits) { + for(i = 0; i < nritems; i++) { + ret = add_cache_extent(reada, bits[i].start, + bits[i].size); + if (ret == -EEXIST) + continue; - rec = container_of(cache, struct extent_record, cache); - if (rec->start != bytenr) { - /* - * Several cause, from unaligned bytenr to over lapping extents - */ - return -EEXIST; + /* fixme, get the parent transid */ + readahead_tree_block(fs_info, bits[i].start, 0); + } } + *last = bits[0].start; + bytenr = bits[0].start; + size = bits[0].size; - back = find_tree_backref(rec, parent, root); - if (!back) { - back = alloc_tree_backref(rec, parent, root); - if (!back) - return -ENOMEM; - insert = true; + cache = lookup_cache_extent(pending, bytenr, size); + if (cache) { + remove_cache_extent(pending, cache); + free(cache); } - - if (found_ref) { - if (back->node.found_ref) { - fprintf(stderr, "Extent back ref already exists " - "for %llu parent %llu root %llu \n", - (unsigned long long)bytenr, - (unsigned long long)parent, - (unsigned long long)root); - } - back->node.found_ref = 1; - } else { - if (back->node.found_extent_tree) { - fprintf(stderr, "Extent back ref already exists " - "for %llu parent %llu root %llu \n", - (unsigned long long)bytenr, - (unsigned long long)parent, - (unsigned long long)root); - } - back->node.found_extent_tree = 1; + cache = lookup_cache_extent(reada, bytenr, size); + if (cache) { + remove_cache_extent(reada, cache); + free(cache); + } + cache = lookup_cache_extent(nodes, bytenr, size); + if (cache) { + remove_cache_extent(nodes, cache); + free(cache); + } + cache = lookup_cache_extent(extent_cache, bytenr, size); + if (cache) { + rec = container_of(cache, struct extent_record, cache); + gen = rec->parent_generation; } - if (insert) - WARN_ON(rb_insert(&rec->backref_tree, &back->node.node, - compare_extent_backref)); - check_extent_type(rec); - maybe_free_extent_rec(extent_cache, rec); - return 0; -} - -static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr, - u64 parent, u64 root, u64 owner, u64 offset, - u32 num_refs, int found_ref, u64 max_size) -{ - struct extent_record *rec; - struct data_backref *back; - struct cache_extent *cache; - int ret; - bool insert = false; - - cache = lookup_cache_extent(extent_cache, bytenr, 1); - if (!cache) { - struct extent_record tmpl; - - memset(&tmpl, 0, sizeof(tmpl)); - tmpl.start = bytenr; - tmpl.nr = 1; - tmpl.max_size = max_size; - - ret = add_extent_rec_nolookup(extent_cache, &tmpl); - if (ret) - return ret; - cache = lookup_cache_extent(extent_cache, bytenr, 1); - if (!cache) - abort(); + /* fixme, get the real parent transid */ + buf = read_tree_block(root->fs_info, bytenr, gen); + if (!extent_buffer_uptodate(buf)) { + record_bad_block_io(root->fs_info, + extent_cache, bytenr, size); + goto out; } - rec = container_of(cache, struct extent_record, cache); - if (rec->max_size < max_size) - rec->max_size = max_size; + nritems = btrfs_header_nritems(buf); - /* - * If found_ref is set then max_size is the real size and must match the - * existing refs. So if we have already found a ref then we need to - * make sure that this ref matches the existing one, otherwise we need - * to add a new backref so we can notice that the backrefs don't match - * and we need to figure out who is telling the truth. This is to - * account for that awful fsync bug I introduced where we'd end up with - * a btrfs_file_extent_item that would have its length include multiple - * prealloc extents or point inside of a prealloc extent. - */ - back = find_data_backref(rec, parent, root, owner, offset, found_ref, - bytenr, max_size); - if (!back) { - back = alloc_data_backref(rec, parent, root, owner, offset, - max_size); - BUG_ON(!back); - insert = true; + flags = 0; + if (!init_extent_tree) { + ret = btrfs_lookup_extent_info(NULL, root, bytenr, + btrfs_header_level(buf), 1, NULL, + &flags); + if (ret < 0) { + ret = calc_extent_flag(extent_cache, buf, ri, &flags); + if (ret < 0) { + fprintf(stderr, "Couldn't calc extent flags\n"); + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + } + } + } else { + flags = 0; + ret = calc_extent_flag(extent_cache, buf, ri, &flags); + if (ret < 0) { + fprintf(stderr, "Couldn't calc extent flags\n"); + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + } } - if (found_ref) { - BUG_ON(num_refs != 1); - if (back->node.found_ref) - BUG_ON(back->bytes != max_size); - back->node.found_ref = 1; - back->found_ref += 1; - if (back->bytes != max_size || back->disk_bytenr != bytenr) { - back->bytes = max_size; - back->disk_bytenr = bytenr; - - /* Need to reinsert if not already in the tree */ - if (!insert) { - rb_erase(&back->node.node, &rec->backref_tree); - insert = true; + if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { + if (ri != NULL && + ri->objectid != BTRFS_TREE_RELOC_OBJECTID && + ri->objectid == btrfs_header_owner(buf)) { + /* + * Ok we got to this block from it's original owner and + * we have FULL_BACKREF set. Relocation can leave + * converted blocks over so this is altogether possible, + * however it's not possible if the generation > the + * last snapshot, so check for this case. + */ + if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) && + btrfs_header_generation(buf) > ri->last_snapshot) { + flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF; + rec->bad_full_backref = 1; } } - rec->refs += 1; - rec->content_checked = 1; - rec->owner_ref_checked = 1; } else { - if (back->node.found_extent_tree) { - fprintf(stderr, "Extent back ref already exists " - "for %llu parent %llu root %llu " - "owner %llu offset %llu num_refs %lu\n", - (unsigned long long)bytenr, - (unsigned long long)parent, - (unsigned long long)root, - (unsigned long long)owner, - (unsigned long long)offset, - (unsigned long)num_refs); + if (ri != NULL && + (ri->objectid == BTRFS_TREE_RELOC_OBJECTID || + btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) { + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + rec->bad_full_backref = 1; } - back->num_refs = num_refs; - back->node.found_extent_tree = 1; } - if (insert) - WARN_ON(rb_insert(&rec->backref_tree, &back->node.node, - compare_extent_backref)); - maybe_free_extent_rec(extent_cache, rec); - return 0; -} + if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { + rec->flag_block_full_backref = 1; + parent = bytenr; + owner = 0; + } else { + rec->flag_block_full_backref = 0; + parent = 0; + owner = btrfs_header_owner(buf); + } -static int add_pending(struct cache_tree *pending, - struct cache_tree *seen, u64 bytenr, u32 size) -{ - int ret; - ret = add_cache_extent(seen, bytenr, size); + ret = check_block(root, extent_cache, buf, flags); if (ret) - return ret; - add_cache_extent(pending, bytenr, size); - return 0; -} - -static int pick_next_pending(struct cache_tree *pending, - struct cache_tree *reada, - struct cache_tree *nodes, - u64 last, struct block_info *bits, int bits_nr, - int *reada_bits) -{ - unsigned long node_start = last; - struct cache_extent *cache; - int ret; + goto out; - cache = search_cache_extent(reada, 0); - if (cache) { - bits[0].start = cache->start; - bits[0].size = cache->size; - *reada_bits = 1; - return 1; - } - *reada_bits = 0; - if (node_start > 32768) - node_start -= 32768; + if (btrfs_is_leaf(buf)) { + btree_space_waste += btrfs_leaf_free_space(root, buf); + for (i = 0; i < nritems; i++) { + struct btrfs_file_extent_item *fi; + btrfs_item_key_to_cpu(buf, &key, i); + /* + * Check key type against the leaf owner. + * Could filter quite a lot of early error if + * owner is correct + */ + if (check_type_with_root(btrfs_header_owner(buf), + key.type)) { + fprintf(stderr, "ignoring invalid key\n"); + continue; + } + if (key.type == BTRFS_EXTENT_ITEM_KEY) { + process_extent_item(root, extent_cache, buf, + i); + continue; + } + if (key.type == BTRFS_METADATA_ITEM_KEY) { + process_extent_item(root, extent_cache, buf, + i); + continue; + } + if (key.type == BTRFS_EXTENT_CSUM_KEY) { + total_csum_bytes += + btrfs_item_size_nr(buf, i); + continue; + } + if (key.type == BTRFS_CHUNK_ITEM_KEY) { + process_chunk_item(chunk_cache, &key, buf, i); + continue; + } + if (key.type == BTRFS_DEV_ITEM_KEY) { + process_device_item(dev_cache, &key, buf, i); + continue; + } + if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { + process_block_group_item(block_group_cache, + &key, buf, i); + continue; + } + if (key.type == BTRFS_DEV_EXTENT_KEY) { + process_device_extent_item(dev_extent_cache, + &key, buf, i); + continue; - cache = search_cache_extent(nodes, node_start); - if (!cache) - cache = search_cache_extent(nodes, 0); + } + if (key.type == BTRFS_EXTENT_REF_V0_KEY) { +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 + process_extent_ref_v0(extent_cache, buf, i); +#else + BUG(); +#endif + continue; + } - if (!cache) { - cache = search_cache_extent(pending, 0); - if (!cache) - return 0; - ret = 0; - do { - bits[ret].start = cache->start; - bits[ret].size = cache->size; - cache = next_cache_extent(cache); - ret++; - } while (cache && ret < bits_nr); - return ret; - } + if (key.type == BTRFS_TREE_BLOCK_REF_KEY) { + ret = add_tree_backref(extent_cache, + key.objectid, 0, key.offset, 0); + if (ret < 0) + error( + "add_tree_backref failed (leaf tree block): %s", + strerror(-ret)); + continue; + } + if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) { + ret = add_tree_backref(extent_cache, + key.objectid, key.offset, 0, 0); + if (ret < 0) + error( + "add_tree_backref failed (leaf shared block): %s", + strerror(-ret)); + continue; + } + if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { + struct btrfs_extent_data_ref *ref; + ref = btrfs_item_ptr(buf, i, + struct btrfs_extent_data_ref); + add_data_backref(extent_cache, + key.objectid, 0, + btrfs_extent_data_ref_root(buf, ref), + btrfs_extent_data_ref_objectid(buf, + ref), + btrfs_extent_data_ref_offset(buf, ref), + btrfs_extent_data_ref_count(buf, ref), + 0, root->fs_info->sectorsize); + continue; + } + if (key.type == BTRFS_SHARED_DATA_REF_KEY) { + struct btrfs_shared_data_ref *ref; + ref = btrfs_item_ptr(buf, i, + struct btrfs_shared_data_ref); + add_data_backref(extent_cache, + key.objectid, key.offset, 0, 0, 0, + btrfs_shared_data_ref_count(buf, ref), + 0, root->fs_info->sectorsize); + continue; + } + if (key.type == BTRFS_ORPHAN_ITEM_KEY) { + struct bad_item *bad; - ret = 0; - do { - bits[ret].start = cache->start; - bits[ret].size = cache->size; - cache = next_cache_extent(cache); - ret++; - } while (cache && ret < bits_nr); + if (key.objectid == BTRFS_ORPHAN_OBJECTID) + continue; + if (!owner) + continue; + bad = malloc(sizeof(struct bad_item)); + if (!bad) + continue; + INIT_LIST_HEAD(&bad->list); + memcpy(&bad->key, &key, + sizeof(struct btrfs_key)); + bad->root_id = owner; + list_add_tail(&bad->list, &delete_items); + continue; + } + if (key.type != BTRFS_EXTENT_DATA_KEY) + continue; + fi = btrfs_item_ptr(buf, i, + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(buf, fi) == + BTRFS_FILE_EXTENT_INLINE) + continue; + if (btrfs_file_extent_disk_bytenr(buf, fi) == 0) + continue; - if (bits_nr - ret > 8) { - u64 lookup = bits[0].start + bits[0].size; - struct cache_extent *next; - next = search_cache_extent(pending, lookup); - while(next) { - if (next->start - lookup > 32768) - break; - bits[ret].start = next->start; - bits[ret].size = next->size; - lookup = next->start + next->size; - ret++; - if (ret == bits_nr) - break; - next = next_cache_extent(next); - if (!next) - break; + data_bytes_allocated += + btrfs_file_extent_disk_num_bytes(buf, fi); + if (data_bytes_allocated < root->fs_info->sectorsize) { + abort(); + } + data_bytes_referenced += + btrfs_file_extent_num_bytes(buf, fi); + add_data_backref(extent_cache, + btrfs_file_extent_disk_bytenr(buf, fi), + parent, owner, key.objectid, key.offset - + btrfs_file_extent_offset(buf, fi), 1, 1, + btrfs_file_extent_disk_num_bytes(buf, fi)); } - } - return ret; -} - -static void free_chunk_record(struct cache_extent *cache) -{ - struct chunk_record *rec; - - rec = container_of(cache, struct chunk_record, cache); - list_del_init(&rec->list); - list_del_init(&rec->dextents); - free(rec); -} + } else { + int level; + struct btrfs_key first_key; -void free_chunk_cache_tree(struct cache_tree *chunk_cache) -{ - cache_tree_free_extents(chunk_cache, free_chunk_record); -} + first_key.objectid = 0; -static void free_device_record(struct rb_node *node) -{ - struct device_record *rec; + if (nritems > 0) + btrfs_item_key_to_cpu(buf, &first_key, 0); + level = btrfs_header_level(buf); + for (i = 0; i < nritems; i++) { + struct extent_record tmpl; - rec = container_of(node, struct device_record, node); - free(rec); -} + ptr = btrfs_node_blockptr(buf, i); + size = root->fs_info->nodesize; + btrfs_node_key_to_cpu(buf, &key, i); + if (ri != NULL) { + if ((level == ri->drop_level) + && is_dropped_key(&key, &ri->drop_key)) { + continue; + } + } -FREE_RB_BASED_TREE(device_cache, free_device_record); + memset(&tmpl, 0, sizeof(tmpl)); + btrfs_cpu_key_to_disk(&tmpl.parent_key, &key); + tmpl.parent_generation = btrfs_node_ptr_generation(buf, i); + tmpl.start = ptr; + tmpl.nr = size; + tmpl.refs = 1; + tmpl.metadata = 1; + tmpl.max_size = size; + ret = add_extent_rec(extent_cache, &tmpl); + if (ret < 0) + goto out; -int insert_block_group_record(struct block_group_tree *tree, - struct block_group_record *bg_rec) -{ - int ret; - - ret = insert_cache_extent(&tree->tree, &bg_rec->cache); - if (ret) - return ret; - - list_add_tail(&bg_rec->list, &tree->block_groups); - return 0; -} - -static void free_block_group_record(struct cache_extent *cache) -{ - struct block_group_record *rec; - - rec = container_of(cache, struct block_group_record, cache); - list_del_init(&rec->list); - free(rec); -} - -void free_block_group_tree(struct block_group_tree *tree) -{ - cache_tree_free_extents(&tree->tree, free_block_group_record); -} - -int insert_device_extent_record(struct device_extent_tree *tree, - struct device_extent_record *de_rec) -{ - int ret; - - /* - * Device extent is a bit different from the other extents, because - * the extents which belong to the different devices may have the - * same start and size, so we need use the special extent cache - * search/insert functions. - */ - ret = insert_cache_extent2(&tree->tree, &de_rec->cache); - if (ret) - return ret; - - list_add_tail(&de_rec->chunk_list, &tree->no_chunk_orphans); - list_add_tail(&de_rec->device_list, &tree->no_device_orphans); - return 0; -} - -static void free_device_extent_record(struct cache_extent *cache) -{ - struct device_extent_record *rec; - - rec = container_of(cache, struct device_extent_record, cache); - if (!list_empty(&rec->chunk_list)) - list_del_init(&rec->chunk_list); - if (!list_empty(&rec->device_list)) - list_del_init(&rec->device_list); - free(rec); -} - -void free_device_extent_tree(struct device_extent_tree *tree) -{ - cache_tree_free_extents(&tree->tree, free_device_extent_record); -} - -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 -static int process_extent_ref_v0(struct cache_tree *extent_cache, - struct extent_buffer *leaf, int slot) -{ - struct btrfs_extent_ref_v0 *ref0; - struct btrfs_key key; - int ret; - - btrfs_item_key_to_cpu(leaf, &key, slot); - ref0 = btrfs_item_ptr(leaf, slot, struct btrfs_extent_ref_v0); - if (btrfs_ref_objectid_v0(leaf, ref0) < BTRFS_FIRST_FREE_OBJECTID) { - ret = add_tree_backref(extent_cache, key.objectid, key.offset, - 0, 0); - } else { - ret = add_data_backref(extent_cache, key.objectid, key.offset, - 0, 0, 0, btrfs_ref_count_v0(leaf, ref0), 0, 0); - } - return ret; -} -#endif - -struct chunk_record *btrfs_new_chunk_record(struct extent_buffer *leaf, - struct btrfs_key *key, - int slot) -{ - struct btrfs_chunk *ptr; - struct chunk_record *rec; - int num_stripes, i; - - ptr = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); - num_stripes = btrfs_chunk_num_stripes(leaf, ptr); - - rec = calloc(1, btrfs_chunk_record_size(num_stripes)); - if (!rec) { - fprintf(stderr, "memory allocation failed\n"); - exit(-1); - } - - INIT_LIST_HEAD(&rec->list); - INIT_LIST_HEAD(&rec->dextents); - rec->bg_rec = NULL; - - rec->cache.start = key->offset; - rec->cache.size = btrfs_chunk_length(leaf, ptr); - - rec->generation = btrfs_header_generation(leaf); - - rec->objectid = key->objectid; - rec->type = key->type; - rec->offset = key->offset; - - rec->length = rec->cache.size; - rec->owner = btrfs_chunk_owner(leaf, ptr); - rec->stripe_len = btrfs_chunk_stripe_len(leaf, ptr); - rec->type_flags = btrfs_chunk_type(leaf, ptr); - rec->io_width = btrfs_chunk_io_width(leaf, ptr); - rec->io_align = btrfs_chunk_io_align(leaf, ptr); - rec->sector_size = btrfs_chunk_sector_size(leaf, ptr); - rec->num_stripes = num_stripes; - rec->sub_stripes = btrfs_chunk_sub_stripes(leaf, ptr); - - for (i = 0; i < rec->num_stripes; ++i) { - rec->stripes[i].devid = - btrfs_stripe_devid_nr(leaf, ptr, i); - rec->stripes[i].offset = - btrfs_stripe_offset_nr(leaf, ptr, i); - read_extent_buffer(leaf, rec->stripes[i].dev_uuid, - (unsigned long)btrfs_stripe_dev_uuid_nr(ptr, i), - BTRFS_UUID_SIZE); - } - - return rec; -} - -static int process_chunk_item(struct cache_tree *chunk_cache, - struct btrfs_key *key, struct extent_buffer *eb, - int slot) -{ - struct chunk_record *rec; - struct btrfs_chunk *chunk; - int ret = 0; - - chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk); - /* - * Do extra check for this chunk item, - * - * It's still possible one can craft a leaf with CHUNK_ITEM, with - * wrong onwer(3) out of chunk tree, to pass both chunk tree check - * and owner<->key_type check. - */ - ret = btrfs_check_chunk_valid(global_info, eb, chunk, slot, - key->offset); - if (ret < 0) { - error("chunk(%llu, %llu) is not valid, ignore it", - key->offset, btrfs_chunk_length(eb, chunk)); - return 0; - } - rec = btrfs_new_chunk_record(eb, key, slot); - ret = insert_cache_extent(chunk_cache, &rec->cache); - if (ret) { - fprintf(stderr, "Chunk[%llu, %llu] existed.\n", - rec->offset, rec->length); - free(rec); - } - - return ret; -} - -static int process_device_item(struct rb_root *dev_cache, - struct btrfs_key *key, struct extent_buffer *eb, int slot) -{ - struct btrfs_dev_item *ptr; - struct device_record *rec; - int ret = 0; - - ptr = btrfs_item_ptr(eb, - slot, struct btrfs_dev_item); - - rec = malloc(sizeof(*rec)); - if (!rec) { - fprintf(stderr, "memory allocation failed\n"); - return -ENOMEM; - } - - rec->devid = key->offset; - rec->generation = btrfs_header_generation(eb); - - rec->objectid = key->objectid; - rec->type = key->type; - rec->offset = key->offset; - - rec->devid = btrfs_device_id(eb, ptr); - rec->total_byte = btrfs_device_total_bytes(eb, ptr); - rec->byte_used = btrfs_device_bytes_used(eb, ptr); - - ret = rb_insert(dev_cache, &rec->node, device_record_compare); - if (ret) { - fprintf(stderr, "Device[%llu] existed.\n", rec->devid); - free(rec); - } - - return ret; -} - -struct block_group_record * -btrfs_new_block_group_record(struct extent_buffer *leaf, struct btrfs_key *key, - int slot) -{ - struct btrfs_block_group_item *ptr; - struct block_group_record *rec; - - rec = calloc(1, sizeof(*rec)); - if (!rec) { - fprintf(stderr, "memory allocation failed\n"); - exit(-1); - } - - rec->cache.start = key->objectid; - rec->cache.size = key->offset; - - rec->generation = btrfs_header_generation(leaf); - - rec->objectid = key->objectid; - rec->type = key->type; - rec->offset = key->offset; - - ptr = btrfs_item_ptr(leaf, slot, struct btrfs_block_group_item); - rec->flags = btrfs_disk_block_group_flags(leaf, ptr); - - INIT_LIST_HEAD(&rec->list); - - return rec; -} - -static int process_block_group_item(struct block_group_tree *block_group_cache, - struct btrfs_key *key, - struct extent_buffer *eb, int slot) -{ - struct block_group_record *rec; - int ret = 0; - - rec = btrfs_new_block_group_record(eb, key, slot); - ret = insert_block_group_record(block_group_cache, rec); - if (ret) { - fprintf(stderr, "Block Group[%llu, %llu] existed.\n", - rec->objectid, rec->offset); - free(rec); - } - - return ret; -} - -struct device_extent_record * -btrfs_new_device_extent_record(struct extent_buffer *leaf, - struct btrfs_key *key, int slot) -{ - struct device_extent_record *rec; - struct btrfs_dev_extent *ptr; - - rec = calloc(1, sizeof(*rec)); - if (!rec) { - fprintf(stderr, "memory allocation failed\n"); - exit(-1); - } - - rec->cache.objectid = key->objectid; - rec->cache.start = key->offset; - - rec->generation = btrfs_header_generation(leaf); - - rec->objectid = key->objectid; - rec->type = key->type; - rec->offset = key->offset; - - ptr = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent); - rec->chunk_objecteid = - btrfs_dev_extent_chunk_objectid(leaf, ptr); - rec->chunk_offset = - btrfs_dev_extent_chunk_offset(leaf, ptr); - rec->length = btrfs_dev_extent_length(leaf, ptr); - rec->cache.size = rec->length; - - INIT_LIST_HEAD(&rec->chunk_list); - INIT_LIST_HEAD(&rec->device_list); - - return rec; -} - -static int -process_device_extent_item(struct device_extent_tree *dev_extent_cache, - struct btrfs_key *key, struct extent_buffer *eb, - int slot) -{ - struct device_extent_record *rec; - int ret; - - rec = btrfs_new_device_extent_record(eb, key, slot); - ret = insert_device_extent_record(dev_extent_cache, rec); - if (ret) { - fprintf(stderr, - "Device extent[%llu, %llu, %llu] existed.\n", - rec->objectid, rec->offset, rec->length); - free(rec); - } - - return ret; -} - -static int process_extent_item(struct btrfs_root *root, - struct cache_tree *extent_cache, - struct extent_buffer *eb, int slot) -{ - struct btrfs_extent_item *ei; - struct btrfs_extent_inline_ref *iref; - struct btrfs_extent_data_ref *dref; - struct btrfs_shared_data_ref *sref; - struct btrfs_key key; - struct extent_record tmpl; - unsigned long end; - unsigned long ptr; - int ret; - int type; - u32 item_size = btrfs_item_size_nr(eb, slot); - u64 refs = 0; - u64 offset; - u64 num_bytes; - int metadata = 0; - - btrfs_item_key_to_cpu(eb, &key, slot); - - if (key.type == BTRFS_METADATA_ITEM_KEY) { - metadata = 1; - num_bytes = root->fs_info->nodesize; - } else { - num_bytes = key.offset; - } - - if (!IS_ALIGNED(key.objectid, root->fs_info->sectorsize)) { - error("ignoring invalid extent, bytenr %llu is not aligned to %u", - key.objectid, root->fs_info->sectorsize); - return -EIO; - } - if (item_size < sizeof(*ei)) { -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 - struct btrfs_extent_item_v0 *ei0; - if (item_size != sizeof(*ei0)) { - error( - "invalid extent item format: ITEM[%llu %u %llu] leaf: %llu slot: %d", - key.objectid, key.type, key.offset, - btrfs_header_bytenr(eb), slot); - BUG(); - } - ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0); - refs = btrfs_extent_refs_v0(eb, ei0); -#else - BUG(); -#endif - memset(&tmpl, 0, sizeof(tmpl)); - tmpl.start = key.objectid; - tmpl.nr = num_bytes; - tmpl.extent_item_refs = refs; - tmpl.metadata = metadata; - tmpl.found_rec = 1; - tmpl.max_size = num_bytes; - - return add_extent_rec(extent_cache, &tmpl); - } - - ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item); - refs = btrfs_extent_refs(eb, ei); - if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK) - metadata = 1; - else - metadata = 0; - if (metadata && num_bytes != root->fs_info->nodesize) { - error("ignore invalid metadata extent, length %llu does not equal to %u", - num_bytes, root->fs_info->nodesize); - return -EIO; - } - if (!metadata && !IS_ALIGNED(num_bytes, root->fs_info->sectorsize)) { - error("ignore invalid data extent, length %llu is not aligned to %u", - num_bytes, root->fs_info->sectorsize); - return -EIO; - } - - memset(&tmpl, 0, sizeof(tmpl)); - tmpl.start = key.objectid; - tmpl.nr = num_bytes; - tmpl.extent_item_refs = refs; - tmpl.metadata = metadata; - tmpl.found_rec = 1; - tmpl.max_size = num_bytes; - add_extent_rec(extent_cache, &tmpl); - - ptr = (unsigned long)(ei + 1); - if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK && - key.type == BTRFS_EXTENT_ITEM_KEY) - ptr += sizeof(struct btrfs_tree_block_info); - - end = (unsigned long)ei + item_size; - while (ptr < end) { - iref = (struct btrfs_extent_inline_ref *)ptr; - type = btrfs_extent_inline_ref_type(eb, iref); - offset = btrfs_extent_inline_ref_offset(eb, iref); - switch (type) { - case BTRFS_TREE_BLOCK_REF_KEY: - ret = add_tree_backref(extent_cache, key.objectid, - 0, offset, 0); - if (ret < 0) - error( - "add_tree_backref failed (extent items tree block): %s", - strerror(-ret)); - break; - case BTRFS_SHARED_BLOCK_REF_KEY: - ret = add_tree_backref(extent_cache, key.objectid, - offset, 0, 0); - if (ret < 0) - error( - "add_tree_backref failed (extent items shared block): %s", - strerror(-ret)); - break; - case BTRFS_EXTENT_DATA_REF_KEY: - dref = (struct btrfs_extent_data_ref *)(&iref->offset); - add_data_backref(extent_cache, key.objectid, 0, - btrfs_extent_data_ref_root(eb, dref), - btrfs_extent_data_ref_objectid(eb, - dref), - btrfs_extent_data_ref_offset(eb, dref), - btrfs_extent_data_ref_count(eb, dref), - 0, num_bytes); - break; - case BTRFS_SHARED_DATA_REF_KEY: - sref = (struct btrfs_shared_data_ref *)(iref + 1); - add_data_backref(extent_cache, key.objectid, offset, - 0, 0, 0, - btrfs_shared_data_ref_count(eb, sref), - 0, num_bytes); - break; - default: - fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n", - key.objectid, key.type, num_bytes); - goto out; - } - ptr += btrfs_extent_inline_ref_size(type); - } - WARN_ON(ptr > end); -out: - return 0; -} - -static int check_cache_range(struct btrfs_root *root, - struct btrfs_block_group_cache *cache, - u64 offset, u64 bytes) -{ - struct btrfs_free_space *entry; - u64 *logical; - u64 bytenr; - int stripe_len; - int i, nr, ret; - - for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { - bytenr = btrfs_sb_offset(i); - ret = btrfs_rmap_block(root->fs_info, - cache->key.objectid, bytenr, 0, - &logical, &nr, &stripe_len); - if (ret) - return ret; - - while (nr--) { - if (logical[nr] + stripe_len <= offset) - continue; - if (offset + bytes <= logical[nr]) - continue; - if (logical[nr] == offset) { - if (stripe_len >= bytes) { - free(logical); - return 0; - } - bytes -= stripe_len; - offset += stripe_len; - } else if (logical[nr] < offset) { - if (logical[nr] + stripe_len >= - offset + bytes) { - free(logical); - return 0; - } - bytes = (offset + bytes) - - (logical[nr] + stripe_len); - offset = logical[nr] + stripe_len; - } else { - /* - * Could be tricky, the super may land in the - * middle of the area we're checking. First - * check the easiest case, it's at the end. - */ - if (logical[nr] + stripe_len >= - bytes + offset) { - bytes = logical[nr] - offset; - continue; - } - - /* Check the left side */ - ret = check_cache_range(root, cache, - offset, - logical[nr] - offset); - if (ret) { - free(logical); - return ret; - } - - /* Now we continue with the right side */ - bytes = (offset + bytes) - - (logical[nr] + stripe_len); - offset = logical[nr] + stripe_len; - } - } - - free(logical); - } - - entry = btrfs_find_free_space(cache->free_space_ctl, offset, bytes); - if (!entry) { - fprintf(stderr, "There is no free space entry for %Lu-%Lu\n", - offset, offset+bytes); - return -EINVAL; - } - - if (entry->offset != offset) { - fprintf(stderr, "Wanted offset %Lu, found %Lu\n", offset, - entry->offset); - return -EINVAL; - } - - if (entry->bytes != bytes) { - fprintf(stderr, "Wanted bytes %Lu, found %Lu for off %Lu\n", - bytes, entry->bytes, offset); - return -EINVAL; - } - - unlink_free_space(cache->free_space_ctl, entry); - free(entry); - return 0; -} - -static int verify_space_cache(struct btrfs_root *root, - struct btrfs_block_group_cache *cache) -{ - struct btrfs_path path; - struct extent_buffer *leaf; - struct btrfs_key key; - u64 last; - int ret = 0; - - root = root->fs_info->extent_root; - - last = max_t(u64, cache->key.objectid, BTRFS_SUPER_INFO_OFFSET); - - btrfs_init_path(&path); - key.objectid = last; - key.offset = 0; - key.type = BTRFS_EXTENT_ITEM_KEY; - ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); - if (ret < 0) - goto out; - ret = 0; - while (1) { - if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) { - ret = btrfs_next_leaf(root, &path); - if (ret < 0) - goto out; - if (ret > 0) { - ret = 0; - break; - } - } - leaf = path.nodes[0]; - btrfs_item_key_to_cpu(leaf, &key, path.slots[0]); - if (key.objectid >= cache->key.offset + cache->key.objectid) - break; - if (key.type != BTRFS_EXTENT_ITEM_KEY && - key.type != BTRFS_METADATA_ITEM_KEY) { - path.slots[0]++; - continue; - } - - if (last == key.objectid) { - if (key.type == BTRFS_EXTENT_ITEM_KEY) - last = key.objectid + key.offset; - else - last = key.objectid + root->fs_info->nodesize; - path.slots[0]++; - continue; - } - - ret = check_cache_range(root, cache, last, - key.objectid - last); - if (ret) - break; - if (key.type == BTRFS_EXTENT_ITEM_KEY) - last = key.objectid + key.offset; - else - last = key.objectid + root->fs_info->nodesize; - path.slots[0]++; - } - - if (last < cache->key.objectid + cache->key.offset) - ret = check_cache_range(root, cache, last, - cache->key.objectid + - cache->key.offset - last); - -out: - btrfs_release_path(&path); - - if (!ret && - !RB_EMPTY_ROOT(&cache->free_space_ctl->free_space_offset)) { - fprintf(stderr, "There are still entries left in the space " - "cache\n"); - ret = -EINVAL; - } - - return ret; -} - -static int check_space_cache(struct btrfs_root *root) -{ - struct btrfs_block_group_cache *cache; - u64 start = BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE; - int ret; - int error = 0; - - if (btrfs_super_cache_generation(root->fs_info->super_copy) != -1ULL && - btrfs_super_generation(root->fs_info->super_copy) != - btrfs_super_cache_generation(root->fs_info->super_copy)) { - printf("cache and super generation don't match, space cache " - "will be invalidated\n"); - return 0; - } - - if (ctx.progress_enabled) { - ctx.tp = TASK_FREE_SPACE; - task_start(ctx.info); - } - - while (1) { - cache = btrfs_lookup_first_block_group(root->fs_info, start); - if (!cache) - break; - - start = cache->key.objectid + cache->key.offset; - if (!cache->free_space_ctl) { - if (btrfs_init_free_space_ctl(cache, - root->fs_info->sectorsize)) { - ret = -ENOMEM; - break; - } - } else { - btrfs_remove_free_space_cache(cache); - } - - if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) { - ret = exclude_super_stripes(root, cache); - if (ret) { - fprintf(stderr, "could not exclude super stripes: %s\n", - strerror(-ret)); - error++; - continue; - } - ret = load_free_space_tree(root->fs_info, cache); - free_excluded_extents(root, cache); - if (ret < 0) { - fprintf(stderr, "could not load free space tree: %s\n", - strerror(-ret)); - error++; - continue; - } - error += ret; - } else { - ret = load_free_space_cache(root->fs_info, cache); - if (!ret) - continue; - } - - ret = verify_space_cache(root, cache); - if (ret) { - fprintf(stderr, "cache appears valid but isn't %Lu\n", - cache->key.objectid); - error++; - } - } - - task_stop(ctx.info); - - return error ? -EINVAL : 0; -} - -static int check_extent_csums(struct btrfs_root *root, u64 bytenr, - u64 num_bytes, unsigned long leaf_offset, - struct extent_buffer *eb) { - - struct btrfs_fs_info *fs_info = root->fs_info; - u64 offset = 0; - u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); - char *data; - unsigned long csum_offset; - u32 csum; - u32 csum_expected; - u64 read_len; - u64 data_checked = 0; - u64 tmp; - int ret = 0; - int mirror; - int num_copies; - - if (num_bytes % fs_info->sectorsize) - return -EINVAL; - - data = malloc(num_bytes); - if (!data) - return -ENOMEM; - - while (offset < num_bytes) { - mirror = 0; -again: - read_len = num_bytes - offset; - /* read as much space once a time */ - ret = read_extent_data(fs_info, data + offset, - bytenr + offset, &read_len, mirror); - if (ret) - goto out; - data_checked = 0; - /* verify every 4k data's checksum */ - while (data_checked < read_len) { - csum = ~(u32)0; - tmp = offset + data_checked; - - csum = btrfs_csum_data((char *)data + tmp, - csum, fs_info->sectorsize); - btrfs_csum_final(csum, (u8 *)&csum); - - csum_offset = leaf_offset + - tmp / fs_info->sectorsize * csum_size; - read_extent_buffer(eb, (char *)&csum_expected, - csum_offset, csum_size); - /* try another mirror */ - if (csum != csum_expected) { - fprintf(stderr, "mirror %d bytenr %llu csum %u expected csum %u\n", - mirror, bytenr + tmp, - csum, csum_expected); - num_copies = btrfs_num_copies(root->fs_info, - bytenr, num_bytes); - if (mirror < num_copies - 1) { - mirror += 1; - goto again; - } - } - data_checked += fs_info->sectorsize; - } - offset += read_len; - } -out: - free(data); - return ret; -} - -static int check_extent_exists(struct btrfs_root *root, u64 bytenr, - u64 num_bytes) -{ - struct btrfs_path path; - struct extent_buffer *leaf; - struct btrfs_key key; - int ret; - - btrfs_init_path(&path); - key.objectid = bytenr; - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = (u64)-1; - -again: - ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, &path, - 0, 0); - if (ret < 0) { - fprintf(stderr, "Error looking up extent record %d\n", ret); - btrfs_release_path(&path); - return ret; - } else if (ret) { - if (path.slots[0] > 0) { - path.slots[0]--; - } else { - ret = btrfs_prev_leaf(root, &path); - if (ret < 0) { - goto out; - } else if (ret > 0) { - ret = 0; - goto out; - } - } - } - - btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); - - /* - * Block group items come before extent items if they have the same - * bytenr, so walk back one more just in case. Dear future traveller, - * first congrats on mastering time travel. Now if it's not too much - * trouble could you go back to 2006 and tell Chris to make the - * BLOCK_GROUP_ITEM_KEY (and BTRFS_*_REF_KEY) lower than the - * EXTENT_ITEM_KEY please? - */ - while (key.type > BTRFS_EXTENT_ITEM_KEY) { - if (path.slots[0] > 0) { - path.slots[0]--; - } else { - ret = btrfs_prev_leaf(root, &path); - if (ret < 0) { - goto out; - } else if (ret > 0) { - ret = 0; - goto out; - } - } - btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); - } - - while (num_bytes) { - if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) { - ret = btrfs_next_leaf(root, &path); - if (ret < 0) { - fprintf(stderr, "Error going to next leaf " - "%d\n", ret); - btrfs_release_path(&path); - return ret; - } else if (ret) { - break; - } - } - leaf = path.nodes[0]; - btrfs_item_key_to_cpu(leaf, &key, path.slots[0]); - if (key.type != BTRFS_EXTENT_ITEM_KEY) { - path.slots[0]++; - continue; - } - if (key.objectid + key.offset < bytenr) { - path.slots[0]++; - continue; - } - if (key.objectid > bytenr + num_bytes) - break; - - if (key.objectid == bytenr) { - if (key.offset >= num_bytes) { - num_bytes = 0; - break; - } - num_bytes -= key.offset; - bytenr += key.offset; - } else if (key.objectid < bytenr) { - if (key.objectid + key.offset >= bytenr + num_bytes) { - num_bytes = 0; - break; - } - num_bytes = (bytenr + num_bytes) - - (key.objectid + key.offset); - bytenr = key.objectid + key.offset; - } else { - if (key.objectid + key.offset < bytenr + num_bytes) { - u64 new_start = key.objectid + key.offset; - u64 new_bytes = bytenr + num_bytes - new_start; - - /* - * Weird case, the extent is in the middle of - * our range, we'll have to search one side - * and then the other. Not sure if this happens - * in real life, but no harm in coding it up - * anyway just in case. - */ - btrfs_release_path(&path); - ret = check_extent_exists(root, new_start, - new_bytes); - if (ret) { - fprintf(stderr, "Right section didn't " - "have a record\n"); - break; - } - num_bytes = key.objectid - bytenr; - goto again; - } - num_bytes = key.objectid - bytenr; - } - path.slots[0]++; - } - ret = 0; - -out: - if (num_bytes && !ret) { - fprintf(stderr, "There are no extents for csum range " - "%Lu-%Lu\n", bytenr, bytenr+num_bytes); - ret = 1; - } - - btrfs_release_path(&path); - return ret; -} - -static int check_csums(struct btrfs_root *root) -{ - struct btrfs_path path; - struct extent_buffer *leaf; - struct btrfs_key key; - u64 offset = 0, num_bytes = 0; - u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); - int errors = 0; - int ret; - u64 data_len; - unsigned long leaf_offset; - - root = root->fs_info->csum_root; - if (!extent_buffer_uptodate(root->node)) { - fprintf(stderr, "No valid csum tree found\n"); - return -ENOENT; - } - - btrfs_init_path(&path); - key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; - key.type = BTRFS_EXTENT_CSUM_KEY; - key.offset = 0; - ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); - if (ret < 0) { - fprintf(stderr, "Error searching csum tree %d\n", ret); - btrfs_release_path(&path); - return ret; - } - - if (ret > 0 && path.slots[0]) - path.slots[0]--; - ret = 0; - - while (1) { - if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) { - ret = btrfs_next_leaf(root, &path); - if (ret < 0) { - fprintf(stderr, "Error going to next leaf " - "%d\n", ret); - break; - } - if (ret) - break; - } - leaf = path.nodes[0]; - - btrfs_item_key_to_cpu(leaf, &key, path.slots[0]); - if (key.type != BTRFS_EXTENT_CSUM_KEY) { - path.slots[0]++; - continue; - } - - data_len = (btrfs_item_size_nr(leaf, path.slots[0]) / - csum_size) * root->fs_info->sectorsize; - if (!check_data_csum) - goto skip_csum_check; - leaf_offset = btrfs_item_ptr_offset(leaf, path.slots[0]); - ret = check_extent_csums(root, key.offset, data_len, - leaf_offset, leaf); - if (ret) - break; -skip_csum_check: - if (!num_bytes) { - offset = key.offset; - } else if (key.offset != offset + num_bytes) { - ret = check_extent_exists(root, offset, num_bytes); - if (ret) { - fprintf(stderr, "Csum exists for %Lu-%Lu but " - "there is no extent record\n", - offset, offset+num_bytes); - errors++; - } - offset = key.offset; - num_bytes = 0; - } - num_bytes += data_len; - path.slots[0]++; - } - - btrfs_release_path(&path); - return errors; -} - -static int is_dropped_key(struct btrfs_key *key, - struct btrfs_key *drop_key) { - if (key->objectid < drop_key->objectid) - return 1; - else if (key->objectid == drop_key->objectid) { - if (key->type < drop_key->type) - return 1; - else if (key->type == drop_key->type) { - if (key->offset < drop_key->offset) - return 1; - } - } - return 0; -} - -/* - * Here are the rules for FULL_BACKREF. - * - * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set. - * 2) If btrfs_header_owner(buf) no longer points to buf then we have - * FULL_BACKREF set. - * 3) We cowed the block walking down a reloc tree. This is impossible to tell - * if it happened after the relocation occurred since we'll have dropped the - * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and - * have no real way to know for sure. - * - * We process the blocks one root at a time, and we start from the lowest root - * objectid and go to the highest. So we can just lookup the owner backref for - * the record and if we don't find it then we know it doesn't exist and we have - * a FULL BACKREF. - * - * FIXME: if we ever start reclaiming root objectid's then we need to fix this - * assumption and simply indicate that we _think_ that the FULL BACKREF needs to - * be set or not and then we can check later once we've gathered all the refs. - */ -static int calc_extent_flag(struct cache_tree *extent_cache, - struct extent_buffer *buf, - struct root_item_record *ri, - u64 *flags) -{ - struct extent_record *rec; - struct cache_extent *cache; - struct tree_backref *tback; - u64 owner = 0; - - cache = lookup_cache_extent(extent_cache, buf->start, 1); - /* we have added this extent before */ - if (!cache) - return -ENOENT; - - rec = container_of(cache, struct extent_record, cache); - - /* - * Except file/reloc tree, we can not have - * FULL BACKREF MODE - */ - if (ri->objectid < BTRFS_FIRST_FREE_OBJECTID) - goto normal; - /* - * root node - */ - if (buf->start == ri->bytenr) - goto normal; - - if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) - goto full_backref; - - owner = btrfs_header_owner(buf); - if (owner == ri->objectid) - goto normal; - - tback = find_tree_backref(rec, 0, owner); - if (!tback) - goto full_backref; -normal: - *flags = 0; - if (rec->flag_block_full_backref != FLAG_UNSET && - rec->flag_block_full_backref != 0) - rec->bad_full_backref = 1; - return 0; -full_backref: - *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; - if (rec->flag_block_full_backref != FLAG_UNSET && - rec->flag_block_full_backref != 1) - rec->bad_full_backref = 1; - return 0; -} - -static void report_mismatch_key_root(u8 key_type, u64 rootid) -{ - fprintf(stderr, "Invalid key type("); - print_key_type(stderr, 0, key_type); - fprintf(stderr, ") found in root("); - print_objectid(stderr, rootid, 0); - fprintf(stderr, ")\n"); -} - -/* - * Check if the key is valid with its extent buffer. - * - * This is a early check in case invalid key exists in a extent buffer - * This is not comprehensive yet, but should prevent wrong key/item passed - * further - */ -static int check_type_with_root(u64 rootid, u8 key_type) -{ - switch (key_type) { - /* Only valid in chunk tree */ - case BTRFS_DEV_ITEM_KEY: - case BTRFS_CHUNK_ITEM_KEY: - if (rootid != BTRFS_CHUNK_TREE_OBJECTID) - goto err; - break; - /* valid in csum and log tree */ - case BTRFS_CSUM_TREE_OBJECTID: - if (!(rootid == BTRFS_TREE_LOG_OBJECTID || - is_fstree(rootid))) - goto err; - break; - case BTRFS_EXTENT_ITEM_KEY: - case BTRFS_METADATA_ITEM_KEY: - case BTRFS_BLOCK_GROUP_ITEM_KEY: - if (rootid != BTRFS_EXTENT_TREE_OBJECTID) - goto err; - break; - case BTRFS_ROOT_ITEM_KEY: - if (rootid != BTRFS_ROOT_TREE_OBJECTID) - goto err; - break; - case BTRFS_DEV_EXTENT_KEY: - if (rootid != BTRFS_DEV_TREE_OBJECTID) - goto err; - break; - } - return 0; -err: - report_mismatch_key_root(key_type, rootid); - return -EINVAL; -} - -static int run_next_block(struct btrfs_root *root, - struct block_info *bits, - int bits_nr, - u64 *last, - struct cache_tree *pending, - struct cache_tree *seen, - struct cache_tree *reada, - struct cache_tree *nodes, - struct cache_tree *extent_cache, - struct cache_tree *chunk_cache, - struct rb_root *dev_cache, - struct block_group_tree *block_group_cache, - struct device_extent_tree *dev_extent_cache, - struct root_item_record *ri) -{ - struct btrfs_fs_info *fs_info = root->fs_info; - struct extent_buffer *buf; - struct extent_record *rec = NULL; - u64 bytenr; - u32 size; - u64 parent; - u64 owner; - u64 flags; - u64 ptr; - u64 gen = 0; - int ret = 0; - int i; - int nritems; - struct btrfs_key key; - struct cache_extent *cache; - int reada_bits; - - nritems = pick_next_pending(pending, reada, nodes, *last, bits, - bits_nr, &reada_bits); - if (nritems == 0) - return 1; - - if (!reada_bits) { - for(i = 0; i < nritems; i++) { - ret = add_cache_extent(reada, bits[i].start, - bits[i].size); - if (ret == -EEXIST) - continue; - - /* fixme, get the parent transid */ - readahead_tree_block(fs_info, bits[i].start, 0); - } - } - *last = bits[0].start; - bytenr = bits[0].start; - size = bits[0].size; - - cache = lookup_cache_extent(pending, bytenr, size); - if (cache) { - remove_cache_extent(pending, cache); - free(cache); - } - cache = lookup_cache_extent(reada, bytenr, size); - if (cache) { - remove_cache_extent(reada, cache); - free(cache); - } - cache = lookup_cache_extent(nodes, bytenr, size); - if (cache) { - remove_cache_extent(nodes, cache); - free(cache); - } - cache = lookup_cache_extent(extent_cache, bytenr, size); - if (cache) { - rec = container_of(cache, struct extent_record, cache); - gen = rec->parent_generation; - } - - /* fixme, get the real parent transid */ - buf = read_tree_block(root->fs_info, bytenr, gen); - if (!extent_buffer_uptodate(buf)) { - record_bad_block_io(root->fs_info, - extent_cache, bytenr, size); - goto out; - } - - nritems = btrfs_header_nritems(buf); - - flags = 0; - if (!init_extent_tree) { - ret = btrfs_lookup_extent_info(NULL, root, bytenr, - btrfs_header_level(buf), 1, NULL, - &flags); - if (ret < 0) { - ret = calc_extent_flag(extent_cache, buf, ri, &flags); - if (ret < 0) { - fprintf(stderr, "Couldn't calc extent flags\n"); - flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; - } - } - } else { - flags = 0; - ret = calc_extent_flag(extent_cache, buf, ri, &flags); - if (ret < 0) { - fprintf(stderr, "Couldn't calc extent flags\n"); - flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; - } - } - - if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { - if (ri != NULL && - ri->objectid != BTRFS_TREE_RELOC_OBJECTID && - ri->objectid == btrfs_header_owner(buf)) { - /* - * Ok we got to this block from it's original owner and - * we have FULL_BACKREF set. Relocation can leave - * converted blocks over so this is altogether possible, - * however it's not possible if the generation > the - * last snapshot, so check for this case. - */ - if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) && - btrfs_header_generation(buf) > ri->last_snapshot) { - flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF; - rec->bad_full_backref = 1; - } - } - } else { - if (ri != NULL && - (ri->objectid == BTRFS_TREE_RELOC_OBJECTID || - btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) { - flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; - rec->bad_full_backref = 1; - } - } - - if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { - rec->flag_block_full_backref = 1; - parent = bytenr; - owner = 0; - } else { - rec->flag_block_full_backref = 0; - parent = 0; - owner = btrfs_header_owner(buf); - } - - ret = check_block(root, extent_cache, buf, flags); - if (ret) - goto out; - - if (btrfs_is_leaf(buf)) { - btree_space_waste += btrfs_leaf_free_space(root, buf); - for (i = 0; i < nritems; i++) { - struct btrfs_file_extent_item *fi; - btrfs_item_key_to_cpu(buf, &key, i); - /* - * Check key type against the leaf owner. - * Could filter quite a lot of early error if - * owner is correct - */ - if (check_type_with_root(btrfs_header_owner(buf), - key.type)) { - fprintf(stderr, "ignoring invalid key\n"); - continue; - } - if (key.type == BTRFS_EXTENT_ITEM_KEY) { - process_extent_item(root, extent_cache, buf, - i); - continue; - } - if (key.type == BTRFS_METADATA_ITEM_KEY) { - process_extent_item(root, extent_cache, buf, - i); - continue; - } - if (key.type == BTRFS_EXTENT_CSUM_KEY) { - total_csum_bytes += - btrfs_item_size_nr(buf, i); - continue; - } - if (key.type == BTRFS_CHUNK_ITEM_KEY) { - process_chunk_item(chunk_cache, &key, buf, i); - continue; - } - if (key.type == BTRFS_DEV_ITEM_KEY) { - process_device_item(dev_cache, &key, buf, i); - continue; - } - if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { - process_block_group_item(block_group_cache, - &key, buf, i); - continue; - } - if (key.type == BTRFS_DEV_EXTENT_KEY) { - process_device_extent_item(dev_extent_cache, - &key, buf, i); - continue; - - } - if (key.type == BTRFS_EXTENT_REF_V0_KEY) { -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 - process_extent_ref_v0(extent_cache, buf, i); -#else - BUG(); -#endif - continue; - } - - if (key.type == BTRFS_TREE_BLOCK_REF_KEY) { - ret = add_tree_backref(extent_cache, - key.objectid, 0, key.offset, 0); - if (ret < 0) - error( - "add_tree_backref failed (leaf tree block): %s", - strerror(-ret)); - continue; - } - if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) { - ret = add_tree_backref(extent_cache, - key.objectid, key.offset, 0, 0); - if (ret < 0) - error( - "add_tree_backref failed (leaf shared block): %s", - strerror(-ret)); - continue; - } - if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { - struct btrfs_extent_data_ref *ref; - ref = btrfs_item_ptr(buf, i, - struct btrfs_extent_data_ref); - add_data_backref(extent_cache, - key.objectid, 0, - btrfs_extent_data_ref_root(buf, ref), - btrfs_extent_data_ref_objectid(buf, - ref), - btrfs_extent_data_ref_offset(buf, ref), - btrfs_extent_data_ref_count(buf, ref), - 0, root->fs_info->sectorsize); - continue; - } - if (key.type == BTRFS_SHARED_DATA_REF_KEY) { - struct btrfs_shared_data_ref *ref; - ref = btrfs_item_ptr(buf, i, - struct btrfs_shared_data_ref); - add_data_backref(extent_cache, - key.objectid, key.offset, 0, 0, 0, - btrfs_shared_data_ref_count(buf, ref), - 0, root->fs_info->sectorsize); - continue; - } - if (key.type == BTRFS_ORPHAN_ITEM_KEY) { - struct bad_item *bad; - - if (key.objectid == BTRFS_ORPHAN_OBJECTID) - continue; - if (!owner) - continue; - bad = malloc(sizeof(struct bad_item)); - if (!bad) - continue; - INIT_LIST_HEAD(&bad->list); - memcpy(&bad->key, &key, - sizeof(struct btrfs_key)); - bad->root_id = owner; - list_add_tail(&bad->list, &delete_items); - continue; - } - if (key.type != BTRFS_EXTENT_DATA_KEY) - continue; - fi = btrfs_item_ptr(buf, i, - struct btrfs_file_extent_item); - if (btrfs_file_extent_type(buf, fi) == - BTRFS_FILE_EXTENT_INLINE) - continue; - if (btrfs_file_extent_disk_bytenr(buf, fi) == 0) - continue; - - data_bytes_allocated += - btrfs_file_extent_disk_num_bytes(buf, fi); - if (data_bytes_allocated < root->fs_info->sectorsize) { - abort(); - } - data_bytes_referenced += - btrfs_file_extent_num_bytes(buf, fi); - add_data_backref(extent_cache, - btrfs_file_extent_disk_bytenr(buf, fi), - parent, owner, key.objectid, key.offset - - btrfs_file_extent_offset(buf, fi), 1, 1, - btrfs_file_extent_disk_num_bytes(buf, fi)); - } - } else { - int level; - struct btrfs_key first_key; - - first_key.objectid = 0; - - if (nritems > 0) - btrfs_item_key_to_cpu(buf, &first_key, 0); - level = btrfs_header_level(buf); - for (i = 0; i < nritems; i++) { - struct extent_record tmpl; - - ptr = btrfs_node_blockptr(buf, i); - size = root->fs_info->nodesize; - btrfs_node_key_to_cpu(buf, &key, i); - if (ri != NULL) { - if ((level == ri->drop_level) - && is_dropped_key(&key, &ri->drop_key)) { - continue; - } - } - - memset(&tmpl, 0, sizeof(tmpl)); - btrfs_cpu_key_to_disk(&tmpl.parent_key, &key); - tmpl.parent_generation = btrfs_node_ptr_generation(buf, i); - tmpl.start = ptr; - tmpl.nr = size; - tmpl.refs = 1; - tmpl.metadata = 1; - tmpl.max_size = size; - ret = add_extent_rec(extent_cache, &tmpl); - if (ret < 0) - goto out; - - ret = add_tree_backref(extent_cache, ptr, parent, - owner, 1); - if (ret < 0) { - error( - "add_tree_backref failed (non-leaf block): %s", - strerror(-ret)); - continue; - } - - if (level > 1) { - add_pending(nodes, seen, ptr, size); - } else { - add_pending(pending, seen, ptr, size); - } - } - btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) - - nritems) * sizeof(struct btrfs_key_ptr); - } - total_btree_bytes += buf->len; - if (fs_root_objectid(btrfs_header_owner(buf))) - total_fs_tree_bytes += buf->len; - if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID) - total_extent_tree_bytes += buf->len; -out: - free_extent_buffer(buf); - return ret; -} - -static int add_root_to_pending(struct extent_buffer *buf, - struct cache_tree *extent_cache, - struct cache_tree *pending, - struct cache_tree *seen, - struct cache_tree *nodes, - u64 objectid) -{ - struct extent_record tmpl; - int ret; - - if (btrfs_header_level(buf) > 0) - add_pending(nodes, seen, buf->start, buf->len); - else - add_pending(pending, seen, buf->start, buf->len); - - memset(&tmpl, 0, sizeof(tmpl)); - tmpl.start = buf->start; - tmpl.nr = buf->len; - tmpl.is_root = 1; - tmpl.refs = 1; - tmpl.metadata = 1; - tmpl.max_size = buf->len; - add_extent_rec(extent_cache, &tmpl); - - if (objectid == BTRFS_TREE_RELOC_OBJECTID || - btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) - ret = add_tree_backref(extent_cache, buf->start, buf->start, - 0, 1); - else - ret = add_tree_backref(extent_cache, buf->start, 0, objectid, - 1); - return ret; -} - -/* as we fix the tree, we might be deleting blocks that - * we're tracking for repair. This hook makes sure we - * remove any backrefs for blocks as we are fixing them. - */ -static int free_extent_hook(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 bytenr, u64 num_bytes, u64 parent, - u64 root_objectid, u64 owner, u64 offset, - int refs_to_drop) -{ - struct extent_record *rec; - struct cache_extent *cache; - int is_data; - struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache; - - is_data = owner >= BTRFS_FIRST_FREE_OBJECTID; - cache = lookup_cache_extent(extent_cache, bytenr, num_bytes); - if (!cache) - return 0; - - rec = container_of(cache, struct extent_record, cache); - if (is_data) { - struct data_backref *back; - back = find_data_backref(rec, parent, root_objectid, owner, - offset, 1, bytenr, num_bytes); - if (!back) - goto out; - if (back->node.found_ref) { - back->found_ref -= refs_to_drop; - if (rec->refs) - rec->refs -= refs_to_drop; - } - if (back->node.found_extent_tree) { - back->num_refs -= refs_to_drop; - if (rec->extent_item_refs) - rec->extent_item_refs -= refs_to_drop; - } - if (back->found_ref == 0) - back->node.found_ref = 0; - if (back->num_refs == 0) - back->node.found_extent_tree = 0; - - if (!back->node.found_extent_tree && back->node.found_ref) { - rb_erase(&back->node.node, &rec->backref_tree); - free(back); - } - } else { - struct tree_backref *back; - back = find_tree_backref(rec, parent, root_objectid); - if (!back) - goto out; - if (back->node.found_ref) { - if (rec->refs) - rec->refs--; - back->node.found_ref = 0; - } - if (back->node.found_extent_tree) { - if (rec->extent_item_refs) - rec->extent_item_refs--; - back->node.found_extent_tree = 0; - } - if (!back->node.found_extent_tree && back->node.found_ref) { - rb_erase(&back->node.node, &rec->backref_tree); - free(back); - } - } - maybe_free_extent_rec(extent_cache, rec); -out: - return 0; -} - -static int delete_extent_records(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - u64 bytenr) -{ - struct btrfs_key key; - struct btrfs_key found_key; - struct extent_buffer *leaf; - int ret; - int slot; - - - key.objectid = bytenr; - key.type = (u8)-1; - key.offset = (u64)-1; - - while(1) { - ret = btrfs_search_slot(trans, root->fs_info->extent_root, - &key, path, 0, 1); - if (ret < 0) - break; - - if (ret > 0) { - ret = 0; - if (path->slots[0] == 0) - break; - path->slots[0]--; - } - ret = 0; - - leaf = path->nodes[0]; - slot = path->slots[0]; - - btrfs_item_key_to_cpu(leaf, &found_key, slot); - if (found_key.objectid != bytenr) - break; - - if (found_key.type != BTRFS_EXTENT_ITEM_KEY && - found_key.type != BTRFS_METADATA_ITEM_KEY && - found_key.type != BTRFS_TREE_BLOCK_REF_KEY && - found_key.type != BTRFS_EXTENT_DATA_REF_KEY && - found_key.type != BTRFS_EXTENT_REF_V0_KEY && - found_key.type != BTRFS_SHARED_BLOCK_REF_KEY && - found_key.type != BTRFS_SHARED_DATA_REF_KEY) { - btrfs_release_path(path); - if (found_key.type == 0) { - if (found_key.offset == 0) - break; - key.offset = found_key.offset - 1; - key.type = found_key.type; - } - key.type = found_key.type - 1; - key.offset = (u64)-1; - continue; - } - - fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n", - found_key.objectid, found_key.type, found_key.offset); - - ret = btrfs_del_item(trans, root->fs_info->extent_root, path); - if (ret) - break; - btrfs_release_path(path); - - if (found_key.type == BTRFS_EXTENT_ITEM_KEY || - found_key.type == BTRFS_METADATA_ITEM_KEY) { - u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ? - found_key.offset : root->fs_info->nodesize; - - ret = btrfs_update_block_group(root, bytenr, - bytes, 0, 0); - if (ret) - break; - } - } - - btrfs_release_path(path); - return ret; -} - -/* - * for a single backref, this will allocate a new extent - * and add the backref to it. - */ -static int record_extent(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info, - struct btrfs_path *path, - struct extent_record *rec, - struct extent_backref *back, - int allocated, u64 flags) -{ - int ret = 0; - struct btrfs_root *extent_root = info->extent_root; - struct extent_buffer *leaf; - struct btrfs_key ins_key; - struct btrfs_extent_item *ei; - struct data_backref *dback; - struct btrfs_tree_block_info *bi; - - if (!back->is_data) - rec->max_size = max_t(u64, rec->max_size, - info->nodesize); - - if (!allocated) { - u32 item_size = sizeof(*ei); - - if (!back->is_data) - item_size += sizeof(*bi); - - ins_key.objectid = rec->start; - ins_key.offset = rec->max_size; - ins_key.type = BTRFS_EXTENT_ITEM_KEY; - - ret = btrfs_insert_empty_item(trans, extent_root, path, - &ins_key, item_size); - if (ret) - goto fail; - - leaf = path->nodes[0]; - ei = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_extent_item); - - btrfs_set_extent_refs(leaf, ei, 0); - btrfs_set_extent_generation(leaf, ei, rec->generation); - - if (back->is_data) { - btrfs_set_extent_flags(leaf, ei, - BTRFS_EXTENT_FLAG_DATA); - } else { - struct btrfs_disk_key copy_key;; - - bi = (struct btrfs_tree_block_info *)(ei + 1); - memset_extent_buffer(leaf, 0, (unsigned long)bi, - sizeof(*bi)); - - btrfs_set_disk_key_objectid(©_key, - rec->info_objectid); - btrfs_set_disk_key_type(©_key, 0); - btrfs_set_disk_key_offset(©_key, 0); - - btrfs_set_tree_block_level(leaf, bi, rec->info_level); - btrfs_set_tree_block_key(leaf, bi, ©_key); - - btrfs_set_extent_flags(leaf, ei, - BTRFS_EXTENT_FLAG_TREE_BLOCK | flags); - } - - btrfs_mark_buffer_dirty(leaf); - ret = btrfs_update_block_group(extent_root, rec->start, - rec->max_size, 1, 0); - if (ret) - goto fail; - btrfs_release_path(path); - } - - if (back->is_data) { - u64 parent; - int i; - - dback = to_data_backref(back); - if (back->full_backref) - parent = dback->parent; - else - parent = 0; - - for (i = 0; i < dback->found_ref; i++) { - /* if parent != 0, we're doing a full backref - * passing BTRFS_FIRST_FREE_OBJECTID as the owner - * just makes the backref allocator create a data - * backref - */ - ret = btrfs_inc_extent_ref(trans, info->extent_root, - rec->start, rec->max_size, - parent, - dback->root, - parent ? - BTRFS_FIRST_FREE_OBJECTID : - dback->owner, - dback->offset); - if (ret) - break; - } - fprintf(stderr, "adding new data backref" - " on %llu %s %llu owner %llu" - " offset %llu found %d\n", - (unsigned long long)rec->start, - back->full_backref ? - "parent" : "root", - back->full_backref ? - (unsigned long long)parent : - (unsigned long long)dback->root, - (unsigned long long)dback->owner, - (unsigned long long)dback->offset, - dback->found_ref); - } else { - u64 parent; - struct tree_backref *tback; - - tback = to_tree_backref(back); - if (back->full_backref) - parent = tback->parent; - else - parent = 0; - - ret = btrfs_inc_extent_ref(trans, info->extent_root, - rec->start, rec->max_size, - parent, tback->root, 0, 0); - fprintf(stderr, "adding new tree backref on " - "start %llu len %llu parent %llu root %llu\n", - rec->start, rec->max_size, parent, tback->root); - } -fail: - btrfs_release_path(path); - return ret; -} - -static struct extent_entry *find_entry(struct list_head *entries, - u64 bytenr, u64 bytes) -{ - struct extent_entry *entry = NULL; - - list_for_each_entry(entry, entries, list) { - if (entry->bytenr == bytenr && entry->bytes == bytes) - return entry; - } - - return NULL; -} - -static struct extent_entry *find_most_right_entry(struct list_head *entries) -{ - struct extent_entry *entry, *best = NULL, *prev = NULL; - - list_for_each_entry(entry, entries, list) { - /* - * If there are as many broken entries as entries then we know - * not to trust this particular entry. - */ - if (entry->broken == entry->count) - continue; - - /* - * Special case, when there are only two entries and 'best' is - * the first one - */ - if (!prev) { - best = entry; - prev = entry; - continue; - } - - /* - * If our current entry == best then we can't be sure our best - * is really the best, so we need to keep searching. - */ - if (best && best->count == entry->count) { - prev = entry; - best = NULL; - continue; - } - - /* Prev == entry, not good enough, have to keep searching */ - if (!prev->broken && prev->count == entry->count) - continue; - - if (!best) - best = (prev->count > entry->count) ? prev : entry; - else if (best->count < entry->count) - best = entry; - prev = entry; - } - - return best; -} - -static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path, - struct data_backref *dback, struct extent_entry *entry) -{ - struct btrfs_trans_handle *trans; - struct btrfs_root *root; - struct btrfs_file_extent_item *fi; - struct extent_buffer *leaf; - struct btrfs_key key; - u64 bytenr, bytes; - int ret, err; - - key.objectid = dback->root; - key.type = BTRFS_ROOT_ITEM_KEY; - key.offset = (u64)-1; - root = btrfs_read_fs_root(info, &key); - if (IS_ERR(root)) { - fprintf(stderr, "Couldn't find root for our ref\n"); - return -EINVAL; - } - - /* - * The backref points to the original offset of the extent if it was - * split, so we need to search down to the offset we have and then walk - * forward until we find the backref we're looking for. - */ - key.objectid = dback->owner; - key.type = BTRFS_EXTENT_DATA_KEY; - key.offset = dback->offset; - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) { - fprintf(stderr, "Error looking up ref %d\n", ret); - return ret; - } - - while (1) { - if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { - ret = btrfs_next_leaf(root, path); - if (ret) { - fprintf(stderr, "Couldn't find our ref, next\n"); - return -EINVAL; - } - } - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - if (key.objectid != dback->owner || - key.type != BTRFS_EXTENT_DATA_KEY) { - fprintf(stderr, "Couldn't find our ref, search\n"); - return -EINVAL; - } - fi = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); - bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); - bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); - - if (bytenr == dback->disk_bytenr && bytes == dback->bytes) - break; - path->slots[0]++; - } - - btrfs_release_path(path); - - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) - return PTR_ERR(trans); - - /* - * Ok we have the key of the file extent we want to fix, now we can cow - * down to the thing and fix it. - */ - ret = btrfs_search_slot(trans, root, &key, path, 0, 1); - if (ret < 0) { - fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n", - key.objectid, key.type, key.offset, ret); - goto out; - } - if (ret > 0) { - fprintf(stderr, "Well that's odd, we just found this key " - "[%Lu, %u, %Lu]\n", key.objectid, key.type, - key.offset); - ret = -EINVAL; - goto out; - } - leaf = path->nodes[0]; - fi = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); - - if (btrfs_file_extent_compression(leaf, fi) && - dback->disk_bytenr != entry->bytenr) { - fprintf(stderr, "Ref doesn't match the record start and is " - "compressed, please take a btrfs-image of this file " - "system and send it to a btrfs developer so they can " - "complete this functionality for bytenr %Lu\n", - dback->disk_bytenr); - ret = -EINVAL; - goto out; - } - - if (dback->node.broken && dback->disk_bytenr != entry->bytenr) { - btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr); - } else if (dback->disk_bytenr > entry->bytenr) { - u64 off_diff, offset; - - off_diff = dback->disk_bytenr - entry->bytenr; - offset = btrfs_file_extent_offset(leaf, fi); - if (dback->disk_bytenr + offset + - btrfs_file_extent_num_bytes(leaf, fi) > - entry->bytenr + entry->bytes) { - fprintf(stderr, "Ref is past the entry end, please " - "take a btrfs-image of this file system and " - "send it to a btrfs developer, ref %Lu\n", - dback->disk_bytenr); - ret = -EINVAL; - goto out; - } - offset += off_diff; - btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr); - btrfs_set_file_extent_offset(leaf, fi, offset); - } else if (dback->disk_bytenr < entry->bytenr) { - u64 offset; - - offset = btrfs_file_extent_offset(leaf, fi); - if (dback->disk_bytenr + offset < entry->bytenr) { - fprintf(stderr, "Ref is before the entry start, please" - " take a btrfs-image of this file system and " - "send it to a btrfs developer, ref %Lu\n", - dback->disk_bytenr); - ret = -EINVAL; - goto out; - } - - offset += dback->disk_bytenr; - offset -= entry->bytenr; - btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr); - btrfs_set_file_extent_offset(leaf, fi, offset); - } - - btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes); - - /* - * Chances are if disk_num_bytes were wrong then so is ram_bytes, but - * only do this if we aren't using compression, otherwise it's a - * trickier case. - */ - if (!btrfs_file_extent_compression(leaf, fi)) - btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes); - else - printf("ram bytes may be wrong?\n"); - btrfs_mark_buffer_dirty(leaf); -out: - err = btrfs_commit_transaction(trans, root); - btrfs_release_path(path); - return ret ? ret : err; -} - -static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path, - struct extent_record *rec) -{ - struct extent_backref *back, *tmp; - struct data_backref *dback; - struct extent_entry *entry, *best = NULL; - LIST_HEAD(entries); - int nr_entries = 0; - int broken_entries = 0; - int ret = 0; - short mismatch = 0; - - /* - * Metadata is easy and the backrefs should always agree on bytenr and - * size, if not we've got bigger issues. - */ - if (rec->metadata) - return 0; - - rbtree_postorder_for_each_entry_safe(back, tmp, - &rec->backref_tree, node) { - if (back->full_backref || !back->is_data) - continue; - - dback = to_data_backref(back); - - /* - * We only pay attention to backrefs that we found a real - * backref for. - */ - if (dback->found_ref == 0) - continue; - - /* - * For now we only catch when the bytes don't match, not the - * bytenr. We can easily do this at the same time, but I want - * to have a fs image to test on before we just add repair - * functionality willy-nilly so we know we won't screw up the - * repair. - */ - - entry = find_entry(&entries, dback->disk_bytenr, - dback->bytes); - if (!entry) { - entry = malloc(sizeof(struct extent_entry)); - if (!entry) { - ret = -ENOMEM; - goto out; - } - memset(entry, 0, sizeof(*entry)); - entry->bytenr = dback->disk_bytenr; - entry->bytes = dback->bytes; - list_add_tail(&entry->list, &entries); - nr_entries++; - } - - /* - * If we only have on entry we may think the entries agree when - * in reality they don't so we have to do some extra checking. - */ - if (dback->disk_bytenr != rec->start || - dback->bytes != rec->nr || back->broken) - mismatch = 1; - - if (back->broken) { - entry->broken++; - broken_entries++; - } - - entry->count++; - } - - /* Yay all the backrefs agree, carry on good sir */ - if (nr_entries <= 1 && !mismatch) - goto out; - - fprintf(stderr, "attempting to repair backref discrepency for bytenr " - "%Lu\n", rec->start); - - /* - * First we want to see if the backrefs can agree amongst themselves who - * is right, so figure out which one of the entries has the highest - * count. - */ - best = find_most_right_entry(&entries); - - /* - * Ok so we may have an even split between what the backrefs think, so - * this is where we use the extent ref to see what it thinks. - */ - if (!best) { - entry = find_entry(&entries, rec->start, rec->nr); - if (!entry && (!broken_entries || !rec->found_rec)) { - fprintf(stderr, "Backrefs don't agree with each other " - "and extent record doesn't agree with anybody," - " so we can't fix bytenr %Lu bytes %Lu\n", - rec->start, rec->nr); - ret = -EINVAL; - goto out; - } else if (!entry) { - /* - * Ok our backrefs were broken, we'll assume this is the - * correct value and add an entry for this range. - */ - entry = malloc(sizeof(struct extent_entry)); - if (!entry) { - ret = -ENOMEM; - goto out; - } - memset(entry, 0, sizeof(*entry)); - entry->bytenr = rec->start; - entry->bytes = rec->nr; - list_add_tail(&entry->list, &entries); - nr_entries++; - } - entry->count++; - best = find_most_right_entry(&entries); - if (!best) { - fprintf(stderr, "Backrefs and extent record evenly " - "split on who is right, this is going to " - "require user input to fix bytenr %Lu bytes " - "%Lu\n", rec->start, rec->nr); - ret = -EINVAL; - goto out; - } - } - - /* - * I don't think this can happen currently as we'll abort() if we catch - * this case higher up, but in case somebody removes that we still can't - * deal with it properly here yet, so just bail out of that's the case. - */ - if (best->bytenr != rec->start) { - fprintf(stderr, "Extent start and backref starts don't match, " - "please use btrfs-image on this file system and send " - "it to a btrfs developer so they can make fsck fix " - "this particular case. bytenr is %Lu, bytes is %Lu\n", - rec->start, rec->nr); - ret = -EINVAL; - goto out; - } - - /* - * Ok great we all agreed on an extent record, let's go find the real - * references and fix up the ones that don't match. - */ - rbtree_postorder_for_each_entry_safe(back, tmp, - &rec->backref_tree, node) { - if (back->full_backref || !back->is_data) - continue; - - dback = to_data_backref(back); - - /* - * Still ignoring backrefs that don't have a real ref attached - * to them. - */ - if (dback->found_ref == 0) - continue; - - if (dback->bytes == best->bytes && - dback->disk_bytenr == best->bytenr) - continue; - - ret = repair_ref(info, path, dback, best); - if (ret) - goto out; - } - - /* - * Ok we messed with the actual refs, which means we need to drop our - * entire cache and go back and rescan. I know this is a huge pain and - * adds a lot of extra work, but it's the only way to be safe. Once all - * the backrefs agree we may not need to do anything to the extent - * record itself. - */ - ret = -EAGAIN; -out: - while (!list_empty(&entries)) { - entry = list_entry(entries.next, struct extent_entry, list); - list_del_init(&entry->list); - free(entry); - } - return ret; -} - -static int process_duplicates(struct cache_tree *extent_cache, - struct extent_record *rec) -{ - struct extent_record *good, *tmp; - struct cache_extent *cache; - int ret; - - /* - * If we found a extent record for this extent then return, or if we - * have more than one duplicate we are likely going to need to delete - * something. - */ - if (rec->found_rec || rec->num_duplicates > 1) - return 0; - - /* Shouldn't happen but just in case */ - BUG_ON(!rec->num_duplicates); - - /* - * So this happens if we end up with a backref that doesn't match the - * actual extent entry. So either the backref is bad or the extent - * entry is bad. Either way we want to have the extent_record actually - * reflect what we found in the extent_tree, so we need to take the - * duplicate out and use that as the extent_record since the only way we - * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY. - */ - remove_cache_extent(extent_cache, &rec->cache); - - good = to_extent_record(rec->dups.next); - list_del_init(&good->list); - INIT_LIST_HEAD(&good->backrefs); - INIT_LIST_HEAD(&good->dups); - good->cache.start = good->start; - good->cache.size = good->nr; - good->content_checked = 0; - good->owner_ref_checked = 0; - good->num_duplicates = 0; - good->refs = rec->refs; - list_splice_init(&rec->backrefs, &good->backrefs); - while (1) { - cache = lookup_cache_extent(extent_cache, good->start, - good->nr); - if (!cache) - break; - tmp = container_of(cache, struct extent_record, cache); - - /* - * If we find another overlapping extent and it's found_rec is - * set then it's a duplicate and we need to try and delete - * something. - */ - if (tmp->found_rec || tmp->num_duplicates > 0) { - if (list_empty(&good->list)) - list_add_tail(&good->list, - &duplicate_extents); - good->num_duplicates += tmp->num_duplicates + 1; - list_splice_init(&tmp->dups, &good->dups); - list_del_init(&tmp->list); - list_add_tail(&tmp->list, &good->dups); - remove_cache_extent(extent_cache, &tmp->cache); - continue; - } - - /* - * Ok we have another non extent item backed extent rec, so lets - * just add it to this extent and carry on like we did above. - */ - good->refs += tmp->refs; - list_splice_init(&tmp->backrefs, &good->backrefs); - remove_cache_extent(extent_cache, &tmp->cache); - free(tmp); - } - ret = insert_cache_extent(extent_cache, &good->cache); - BUG_ON(ret); - free(rec); - return good->num_duplicates ? 0 : 1; -} - -static int delete_duplicate_records(struct btrfs_root *root, - struct extent_record *rec) -{ - struct btrfs_trans_handle *trans; - LIST_HEAD(delete_list); - struct btrfs_path path; - struct extent_record *tmp, *good, *n; - int nr_del = 0; - int ret = 0, err; - struct btrfs_key key; - - btrfs_init_path(&path); - - good = rec; - /* Find the record that covers all of the duplicates. */ - list_for_each_entry(tmp, &rec->dups, list) { - if (good->start < tmp->start) - continue; - if (good->nr > tmp->nr) - continue; - - if (tmp->start + tmp->nr < good->start + good->nr) { - fprintf(stderr, "Ok we have overlapping extents that " - "aren't completely covered by each other, this " - "is going to require more careful thought. " - "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n", - tmp->start, tmp->nr, good->start, good->nr); - abort(); - } - good = tmp; - } - - if (good != rec) - list_add_tail(&rec->list, &delete_list); - - list_for_each_entry_safe(tmp, n, &rec->dups, list) { - if (tmp == good) - continue; - list_move_tail(&tmp->list, &delete_list); - } - - root = root->fs_info->extent_root; - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - goto out; - } - - list_for_each_entry(tmp, &delete_list, list) { - if (tmp->found_rec == 0) - continue; - key.objectid = tmp->start; - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = tmp->nr; - - /* Shouldn't happen but just in case */ - if (tmp->metadata) { - fprintf(stderr, "Well this shouldn't happen, extent " - "record overlaps but is metadata? " - "[%Lu, %Lu]\n", tmp->start, tmp->nr); - abort(); - } - - ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); - if (ret) { - if (ret > 0) - ret = -EINVAL; - break; - } - ret = btrfs_del_item(trans, root, &path); - if (ret) - break; - btrfs_release_path(&path); - nr_del++; - } - err = btrfs_commit_transaction(trans, root); - if (err && !ret) - ret = err; -out: - while (!list_empty(&delete_list)) { - tmp = to_extent_record(delete_list.next); - list_del_init(&tmp->list); - if (tmp == rec) - continue; - free(tmp); - } - - while (!list_empty(&rec->dups)) { - tmp = to_extent_record(rec->dups.next); - list_del_init(&tmp->list); - free(tmp); - } - - btrfs_release_path(&path); - - if (!ret && !nr_del) - rec->num_duplicates = 0; - - return ret ? ret : nr_del; -} - -static int find_possible_backrefs(struct btrfs_fs_info *info, - struct btrfs_path *path, - struct cache_tree *extent_cache, - struct extent_record *rec) -{ - struct btrfs_root *root; - struct extent_backref *back, *tmp; - struct data_backref *dback; - struct cache_extent *cache; - struct btrfs_file_extent_item *fi; - struct btrfs_key key; - u64 bytenr, bytes; - int ret; - - rbtree_postorder_for_each_entry_safe(back, tmp, - &rec->backref_tree, node) { - /* Don't care about full backrefs (poor unloved backrefs) */ - if (back->full_backref || !back->is_data) - continue; - - dback = to_data_backref(back); - - /* We found this one, we don't need to do a lookup */ - if (dback->found_ref) - continue; - - key.objectid = dback->root; - key.type = BTRFS_ROOT_ITEM_KEY; - key.offset = (u64)-1; - - root = btrfs_read_fs_root(info, &key); - - /* No root, definitely a bad ref, skip */ - if (IS_ERR(root) && PTR_ERR(root) == -ENOENT) - continue; - /* Other err, exit */ - if (IS_ERR(root)) - return PTR_ERR(root); - - key.objectid = dback->owner; - key.type = BTRFS_EXTENT_DATA_KEY; - key.offset = dback->offset; - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret) { - btrfs_release_path(path); - if (ret < 0) - return ret; - /* Didn't find it, we can carry on */ - ret = 0; - continue; - } - - fi = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_file_extent_item); - bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi); - bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi); - btrfs_release_path(path); - cache = lookup_cache_extent(extent_cache, bytenr, 1); - if (cache) { - struct extent_record *tmp; - tmp = container_of(cache, struct extent_record, cache); - - /* - * If we found an extent record for the bytenr for this - * particular backref then we can't add it to our - * current extent record. We only want to add backrefs - * that don't have a corresponding extent item in the - * extent tree since they likely belong to this record - * and we need to fix it if it doesn't match bytenrs. - */ - if (tmp->found_rec) - continue; - } - - dback->found_ref += 1; - dback->disk_bytenr = bytenr; - dback->bytes = bytes; - - /* - * Set this so the verify backref code knows not to trust the - * values in this backref. - */ - back->broken = 1; - } - - return 0; -} - -/* - * Record orphan data ref into corresponding root. - * - * Return 0 if the extent item contains data ref and recorded. - * Return 1 if the extent item contains no useful data ref - * On that case, it may contains only shared_dataref or metadata backref - * or the file extent exists(this should be handled by the extent bytenr - * recovery routine) - * Return <0 if something goes wrong. - */ -static int record_orphan_data_extents(struct btrfs_fs_info *fs_info, - struct extent_record *rec) -{ - struct btrfs_key key; - struct btrfs_root *dest_root; - struct extent_backref *back, *tmp; - struct data_backref *dback; - struct orphan_data_extent *orphan; - struct btrfs_path path; - int recorded_data_ref = 0; - int ret = 0; - - if (rec->metadata) - return 1; - btrfs_init_path(&path); - rbtree_postorder_for_each_entry_safe(back, tmp, - &rec->backref_tree, node) { - if (back->full_backref || !back->is_data || - !back->found_extent_tree) - continue; - dback = to_data_backref(back); - if (dback->found_ref) - continue; - key.objectid = dback->root; - key.type = BTRFS_ROOT_ITEM_KEY; - key.offset = (u64)-1; - - dest_root = btrfs_read_fs_root(fs_info, &key); - - /* For non-exist root we just skip it */ - if (IS_ERR(dest_root) || !dest_root) - continue; - - key.objectid = dback->owner; - key.type = BTRFS_EXTENT_DATA_KEY; - key.offset = dback->offset; - - ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0); - btrfs_release_path(&path); - /* - * For ret < 0, it's OK since the fs-tree may be corrupted, - * we need to record it for inode/file extent rebuild. - * For ret > 0, we record it only for file extent rebuild. - * For ret == 0, the file extent exists but only bytenr - * mismatch, let the original bytenr fix routine to handle, - * don't record it. - */ - if (ret == 0) - continue; - ret = 0; - orphan = malloc(sizeof(*orphan)); - if (!orphan) { - ret = -ENOMEM; - goto out; - } - INIT_LIST_HEAD(&orphan->list); - orphan->root = dback->root; - orphan->objectid = dback->owner; - orphan->offset = dback->offset; - orphan->disk_bytenr = rec->cache.start; - orphan->disk_len = rec->cache.size; - list_add(&dest_root->orphan_data_extents, &orphan->list); - recorded_data_ref = 1; - } -out: - btrfs_release_path(&path); - if (!ret) - return !recorded_data_ref; - else - return ret; -} - -/* - * when an incorrect extent item is found, this will delete - * all of the existing entries for it and recreate them - * based on what the tree scan found. - */ -static int fixup_extent_refs(struct btrfs_fs_info *info, - struct cache_tree *extent_cache, - struct extent_record *rec) -{ - struct btrfs_trans_handle *trans = NULL; - int ret; - struct btrfs_path path; - struct cache_extent *cache; - struct extent_backref *back, *tmp; - int allocated = 0; - u64 flags = 0; - - if (rec->flag_block_full_backref) - flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; - - btrfs_init_path(&path); - if (rec->refs != rec->extent_item_refs && !rec->metadata) { - /* - * Sometimes the backrefs themselves are so broken they don't - * get attached to any meaningful rec, so first go back and - * check any of our backrefs that we couldn't find and throw - * them into the list if we find the backref so that - * verify_backrefs can figure out what to do. - */ - ret = find_possible_backrefs(info, &path, extent_cache, rec); - if (ret < 0) - goto out; - } - - /* step one, make sure all of the backrefs agree */ - ret = verify_backrefs(info, &path, rec); - if (ret < 0) - goto out; - - trans = btrfs_start_transaction(info->extent_root, 1); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - goto out; - } - - /* step two, delete all the existing records */ - ret = delete_extent_records(trans, info->extent_root, &path, - rec->start); - - if (ret < 0) - goto out; - - /* was this block corrupt? If so, don't add references to it */ - cache = lookup_cache_extent(info->corrupt_blocks, - rec->start, rec->max_size); - if (cache) { - ret = 0; - goto out; - } - - /* step three, recreate all the refs we did find */ - rbtree_postorder_for_each_entry_safe(back, tmp, - &rec->backref_tree, node) { - /* - * if we didn't find any references, don't create a - * new extent record - */ - if (!back->found_ref) - continue; - - rec->bad_full_backref = 0; - ret = record_extent(trans, info, &path, rec, back, allocated, flags); - allocated = 1; - - if (ret) - goto out; - } -out: - if (trans) { - int err = btrfs_commit_transaction(trans, info->extent_root); - if (!ret) - ret = err; - } - - if (!ret) - fprintf(stderr, "Repaired extent references for %llu\n", - (unsigned long long)rec->start); - - btrfs_release_path(&path); - return ret; -} - -static int fixup_extent_flags(struct btrfs_fs_info *fs_info, - struct extent_record *rec) -{ - struct btrfs_trans_handle *trans; - struct btrfs_root *root = fs_info->extent_root; - struct btrfs_path path; - struct btrfs_extent_item *ei; - struct btrfs_key key; - u64 flags; - int ret = 0; - - key.objectid = rec->start; - if (rec->metadata) { - key.type = BTRFS_METADATA_ITEM_KEY; - key.offset = rec->info_level; - } else { - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = rec->max_size; - } - - trans = btrfs_start_transaction(root, 0); - if (IS_ERR(trans)) - return PTR_ERR(trans); - - btrfs_init_path(&path); - ret = btrfs_search_slot(trans, root, &key, &path, 0, 1); - if (ret < 0) { - btrfs_release_path(&path); - btrfs_commit_transaction(trans, root); - return ret; - } else if (ret) { - fprintf(stderr, "Didn't find extent for %llu\n", - (unsigned long long)rec->start); - btrfs_release_path(&path); - btrfs_commit_transaction(trans, root); - return -ENOENT; - } - - ei = btrfs_item_ptr(path.nodes[0], path.slots[0], - struct btrfs_extent_item); - flags = btrfs_extent_flags(path.nodes[0], ei); - if (rec->flag_block_full_backref) { - fprintf(stderr, "setting full backref on %llu\n", - (unsigned long long)key.objectid); - flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; - } else { - fprintf(stderr, "clearing full backref on %llu\n", - (unsigned long long)key.objectid); - flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF; - } - btrfs_set_extent_flags(path.nodes[0], ei, flags); - btrfs_mark_buffer_dirty(path.nodes[0]); - btrfs_release_path(&path); - ret = btrfs_commit_transaction(trans, root); - if (!ret) - fprintf(stderr, "Repaired extent flags for %llu\n", - (unsigned long long)rec->start); - - return ret; -} - -/* right now we only prune from the extent allocation tree */ -static int prune_one_block(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info, - struct btrfs_corrupt_block *corrupt) -{ - int ret; - struct btrfs_path path; - struct extent_buffer *eb; - u64 found; - int slot; - int nritems; - int level = corrupt->level + 1; - - btrfs_init_path(&path); -again: - /* we want to stop at the parent to our busted block */ - path.lowest_level = level; - - ret = btrfs_search_slot(trans, info->extent_root, - &corrupt->key, &path, -1, 1); - - if (ret < 0) - goto out; - - eb = path.nodes[level]; - if (!eb) { - ret = -ENOENT; - goto out; - } - - /* - * hopefully the search gave us the block we want to prune, - * lets try that first - */ - slot = path.slots[level]; - found = btrfs_node_blockptr(eb, slot); - if (found == corrupt->cache.start) - goto del_ptr; - - nritems = btrfs_header_nritems(eb); - - /* the search failed, lets scan this node and hope we find it */ - for (slot = 0; slot < nritems; slot++) { - found = btrfs_node_blockptr(eb, slot); - if (found == corrupt->cache.start) - goto del_ptr; - } - /* - * we couldn't find the bad block. TODO, search all the nodes for pointers - * to this block - */ - if (eb == info->extent_root->node) { - ret = -ENOENT; - goto out; - } else { - level++; - btrfs_release_path(&path); - goto again; - } - -del_ptr: - printk("deleting pointer to block %Lu\n", corrupt->cache.start); - ret = btrfs_del_ptr(info->extent_root, &path, level, slot); - -out: - btrfs_release_path(&path); - return ret; -} - -static int prune_corrupt_blocks(struct btrfs_fs_info *info) -{ - struct btrfs_trans_handle *trans = NULL; - struct cache_extent *cache; - struct btrfs_corrupt_block *corrupt; - - while (1) { - cache = search_cache_extent(info->corrupt_blocks, 0); - if (!cache) - break; - if (!trans) { - trans = btrfs_start_transaction(info->extent_root, 1); - if (IS_ERR(trans)) - return PTR_ERR(trans); - } - corrupt = container_of(cache, struct btrfs_corrupt_block, cache); - prune_one_block(trans, info, corrupt); - remove_cache_extent(info->corrupt_blocks, cache); - } - if (trans) - return btrfs_commit_transaction(trans, info->extent_root); - return 0; -} - -static int check_extent_refs(struct btrfs_root *root, - struct cache_tree *extent_cache) -{ - struct extent_record *rec; - struct cache_extent *cache; - int ret = 0; - int had_dups = 0; - int err = 0; - - if (repair) { - /* - * if we're doing a repair, we have to make sure - * we don't allocate from the problem extents. - * In the worst case, this will be all the - * extents in the FS - */ - cache = search_cache_extent(extent_cache, 0); - while(cache) { - rec = container_of(cache, struct extent_record, cache); - set_extent_dirty(root->fs_info->excluded_extents, - rec->start, - rec->start + rec->max_size - 1); - cache = next_cache_extent(cache); - } - - /* pin down all the corrupted blocks too */ - cache = search_cache_extent(root->fs_info->corrupt_blocks, 0); - while(cache) { - set_extent_dirty(root->fs_info->excluded_extents, - cache->start, - cache->start + cache->size - 1); - cache = next_cache_extent(cache); - } - prune_corrupt_blocks(root->fs_info); - reset_cached_block_groups(root->fs_info); - } - - reset_cached_block_groups(root->fs_info); - - /* - * We need to delete any duplicate entries we find first otherwise we - * could mess up the extent tree when we have backrefs that actually - * belong to a different extent item and not the weird duplicate one. - */ - while (repair && !list_empty(&duplicate_extents)) { - rec = to_extent_record(duplicate_extents.next); - list_del_init(&rec->list); - - /* Sometimes we can find a backref before we find an actual - * extent, so we need to process it a little bit to see if there - * truly are multiple EXTENT_ITEM_KEY's for the same range, or - * if this is a backref screwup. If we need to delete stuff - * process_duplicates() will return 0, otherwise it will return - * 1 and we - */ - if (process_duplicates(extent_cache, rec)) - continue; - ret = delete_duplicate_records(root, rec); - if (ret < 0) - return ret; - /* - * delete_duplicate_records will return the number of entries - * deleted, so if it's greater than 0 then we know we actually - * did something and we need to remove. - */ - if (ret) - had_dups = 1; - } - - if (had_dups) - return -EAGAIN; - - while(1) { - int cur_err = 0; - int fix = 0; - - cache = search_cache_extent(extent_cache, 0); - if (!cache) - break; - rec = container_of(cache, struct extent_record, cache); - if (rec->num_duplicates) { - fprintf(stderr, "extent item %llu has multiple extent " - "items\n", (unsigned long long)rec->start); - cur_err = 1; - } - - if (rec->refs != rec->extent_item_refs) { - fprintf(stderr, "ref mismatch on [%llu %llu] ", - (unsigned long long)rec->start, - (unsigned long long)rec->nr); - fprintf(stderr, "extent item %llu, found %llu\n", - (unsigned long long)rec->extent_item_refs, - (unsigned long long)rec->refs); - ret = record_orphan_data_extents(root->fs_info, rec); - if (ret < 0) - goto repair_abort; - fix = ret; - cur_err = 1; - } - if (all_backpointers_checked(rec, 1)) { - fprintf(stderr, "backpointer mismatch on [%llu %llu]\n", - (unsigned long long)rec->start, - (unsigned long long)rec->nr); - fix = 1; - cur_err = 1; - } - if (!rec->owner_ref_checked) { - fprintf(stderr, "owner ref check failed [%llu %llu]\n", - (unsigned long long)rec->start, - (unsigned long long)rec->nr); - fix = 1; - cur_err = 1; - } - - if (repair && fix) { - ret = fixup_extent_refs(root->fs_info, extent_cache, rec); - if (ret) - goto repair_abort; - } - - - if (rec->bad_full_backref) { - fprintf(stderr, "bad full backref, on [%llu]\n", - (unsigned long long)rec->start); - if (repair) { - ret = fixup_extent_flags(root->fs_info, rec); - if (ret) - goto repair_abort; - fix = 1; - } - cur_err = 1; - } - /* - * Although it's not a extent ref's problem, we reuse this - * routine for error reporting. - * No repair function yet. - */ - if (rec->crossing_stripes) { - fprintf(stderr, - "bad metadata [%llu, %llu) crossing stripe boundary\n", - rec->start, rec->start + rec->max_size); - cur_err = 1; - } - - if (rec->wrong_chunk_type) { - fprintf(stderr, - "bad extent [%llu, %llu), type mismatch with chunk\n", - rec->start, rec->start + rec->max_size); - cur_err = 1; - } - - err = cur_err; - remove_cache_extent(extent_cache, cache); - free_all_extent_backrefs(rec); - if (!init_extent_tree && repair && (!cur_err || fix)) - clear_extent_dirty(root->fs_info->excluded_extents, - rec->start, - rec->start + rec->max_size - 1); - free(rec); - } -repair_abort: - if (repair) { - if (ret && ret != -EAGAIN) { - fprintf(stderr, "failed to repair damaged filesystem, aborting\n"); - exit(1); - } else if (!ret) { - struct btrfs_trans_handle *trans; - - root = root->fs_info->extent_root; - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - goto repair_abort; - } - - ret = btrfs_fix_block_accounting(trans, root); - if (ret) - goto repair_abort; - ret = btrfs_commit_transaction(trans, root); - if (ret) - goto repair_abort; - } - return ret; - } - - if (err) - err = -EIO; - return err; -} - -u64 calc_stripe_length(u64 type, u64 length, int num_stripes) -{ - u64 stripe_size; - - if (type & BTRFS_BLOCK_GROUP_RAID0) { - stripe_size = length; - stripe_size /= num_stripes; - } else if (type & BTRFS_BLOCK_GROUP_RAID10) { - stripe_size = length * 2; - stripe_size /= num_stripes; - } else if (type & BTRFS_BLOCK_GROUP_RAID5) { - stripe_size = length; - stripe_size /= (num_stripes - 1); - } else if (type & BTRFS_BLOCK_GROUP_RAID6) { - stripe_size = length; - stripe_size /= (num_stripes - 2); - } else { - stripe_size = length; - } - return stripe_size; -} - -/* - * Check the chunk with its block group/dev list ref: - * Return 0 if all refs seems valid. - * Return 1 if part of refs seems valid, need later check for rebuild ref - * like missing block group and needs to search extent tree to rebuild them. - * Return -1 if essential refs are missing and unable to rebuild. - */ -static int check_chunk_refs(struct chunk_record *chunk_rec, - struct block_group_tree *block_group_cache, - struct device_extent_tree *dev_extent_cache, - int silent) -{ - struct cache_extent *block_group_item; - struct block_group_record *block_group_rec; - struct cache_extent *dev_extent_item; - struct device_extent_record *dev_extent_rec; - u64 devid; - u64 offset; - u64 length; - int metadump_v2 = 0; - int i; - int ret = 0; - - block_group_item = lookup_cache_extent(&block_group_cache->tree, - chunk_rec->offset, - chunk_rec->length); - if (block_group_item) { - block_group_rec = container_of(block_group_item, - struct block_group_record, - cache); - if (chunk_rec->length != block_group_rec->offset || - chunk_rec->offset != block_group_rec->objectid || - (!metadump_v2 && - chunk_rec->type_flags != block_group_rec->flags)) { - if (!silent) - fprintf(stderr, - "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n", - chunk_rec->objectid, - chunk_rec->type, - chunk_rec->offset, - chunk_rec->length, - chunk_rec->offset, - chunk_rec->type_flags, - block_group_rec->objectid, - block_group_rec->type, - block_group_rec->offset, - block_group_rec->offset, - block_group_rec->objectid, - block_group_rec->flags); - ret = -1; - } else { - list_del_init(&block_group_rec->list); - chunk_rec->bg_rec = block_group_rec; - } - } else { - if (!silent) - fprintf(stderr, - "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n", - chunk_rec->objectid, - chunk_rec->type, - chunk_rec->offset, - chunk_rec->length, - chunk_rec->offset, - chunk_rec->type_flags); - ret = 1; - } - - if (metadump_v2) - return ret; - - length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length, - chunk_rec->num_stripes); - for (i = 0; i < chunk_rec->num_stripes; ++i) { - devid = chunk_rec->stripes[i].devid; - offset = chunk_rec->stripes[i].offset; - dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree, - devid, offset, length); - if (dev_extent_item) { - dev_extent_rec = container_of(dev_extent_item, - struct device_extent_record, - cache); - if (dev_extent_rec->objectid != devid || - dev_extent_rec->offset != offset || - dev_extent_rec->chunk_offset != chunk_rec->offset || - dev_extent_rec->length != length) { - if (!silent) - fprintf(stderr, - "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n", - chunk_rec->objectid, - chunk_rec->type, - chunk_rec->offset, - chunk_rec->stripes[i].devid, - chunk_rec->stripes[i].offset, - dev_extent_rec->objectid, - dev_extent_rec->offset, - dev_extent_rec->length); - ret = -1; + ret = add_tree_backref(extent_cache, ptr, parent, + owner, 1); + if (ret < 0) { + error( + "add_tree_backref failed (non-leaf block): %s", + strerror(-ret)); + continue; + } + + if (level > 1) { + add_pending(nodes, seen, ptr, size); } else { - list_move(&dev_extent_rec->chunk_list, - &chunk_rec->dextents); + add_pending(pending, seen, ptr, size); } - } else { - if (!silent) - fprintf(stderr, - "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n", - chunk_rec->objectid, - chunk_rec->type, - chunk_rec->offset, - chunk_rec->stripes[i].devid, - chunk_rec->stripes[i].offset); - ret = -1; } + btree_space_waste += (BTRFS_NODEPTRS_PER_BLOCK(fs_info) - + nritems) * sizeof(struct btrfs_key_ptr); } + total_btree_bytes += buf->len; + if (fs_root_objectid(btrfs_header_owner(buf))) + total_fs_tree_bytes += buf->len; + if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID) + total_extent_tree_bytes += buf->len; +out: + free_extent_buffer(buf); return ret; } -/* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */ -int check_chunks(struct cache_tree *chunk_cache, - struct block_group_tree *block_group_cache, - struct device_extent_tree *dev_extent_cache, - struct list_head *good, struct list_head *bad, - struct list_head *rebuild, int silent) +static int add_root_to_pending(struct extent_buffer *buf, + struct cache_tree *extent_cache, + struct cache_tree *pending, + struct cache_tree *seen, + struct cache_tree *nodes, + u64 objectid) { - struct cache_extent *chunk_item; - struct chunk_record *chunk_rec; - struct block_group_record *bg_rec; - struct device_extent_record *dext_rec; - int err; - int ret = 0; + struct extent_record tmpl; + int ret; - chunk_item = first_cache_extent(chunk_cache); - while (chunk_item) { - chunk_rec = container_of(chunk_item, struct chunk_record, - cache); - err = check_chunk_refs(chunk_rec, block_group_cache, - dev_extent_cache, silent); - if (err < 0) - ret = err; - if (err == 0 && good) - list_add_tail(&chunk_rec->list, good); - if (err > 0 && rebuild) - list_add_tail(&chunk_rec->list, rebuild); - if (err < 0 && bad) - list_add_tail(&chunk_rec->list, bad); - chunk_item = next_cache_extent(chunk_item); - } + if (btrfs_header_level(buf) > 0) + add_pending(nodes, seen, buf->start, buf->len); + else + add_pending(pending, seen, buf->start, buf->len); - list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) { - if (!silent) - fprintf(stderr, - "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n", - bg_rec->objectid, - bg_rec->offset, - bg_rec->flags); - if (!ret) - ret = 1; - } + memset(&tmpl, 0, sizeof(tmpl)); + tmpl.start = buf->start; + tmpl.nr = buf->len; + tmpl.is_root = 1; + tmpl.refs = 1; + tmpl.metadata = 1; + tmpl.max_size = buf->len; + add_extent_rec(extent_cache, &tmpl); - list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans, - chunk_list) { - if (!silent) - fprintf(stderr, - "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n", - dext_rec->objectid, - dext_rec->offset, - dext_rec->length); - if (!ret) - ret = 1; - } + if (objectid == BTRFS_TREE_RELOC_OBJECTID || + btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) + ret = add_tree_backref(extent_cache, buf->start, buf->start, + 0, 1); + else + ret = add_tree_backref(extent_cache, buf->start, 0, objectid, + 1); return ret; } - -static int check_device_used(struct device_record *dev_rec, - struct device_extent_tree *dext_cache) +/* as we fix the tree, we might be deleting blocks that + * we're tracking for repair. This hook makes sure we + * remove any backrefs for blocks as we are fixing them. + */ +static int free_extent_hook(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 bytenr, u64 num_bytes, u64 parent, + u64 root_objectid, u64 owner, u64 offset, + int refs_to_drop) { + struct extent_record *rec; struct cache_extent *cache; - struct device_extent_record *dev_extent_rec; - u64 total_byte = 0; + int is_data; + struct cache_tree *extent_cache = root->fs_info->fsck_extent_cache; - cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0); - while (cache) { - dev_extent_rec = container_of(cache, - struct device_extent_record, - cache); - if (dev_extent_rec->objectid != dev_rec->devid) + is_data = owner >= BTRFS_FIRST_FREE_OBJECTID; + cache = lookup_cache_extent(extent_cache, bytenr, num_bytes); + if (!cache) + return 0; + + rec = container_of(cache, struct extent_record, cache); + if (is_data) { + struct data_backref *back; + back = find_data_backref(rec, parent, root_objectid, owner, + offset, 1, bytenr, num_bytes); + if (!back) + goto out; + if (back->node.found_ref) { + back->found_ref -= refs_to_drop; + if (rec->refs) + rec->refs -= refs_to_drop; + } + if (back->node.found_extent_tree) { + back->num_refs -= refs_to_drop; + if (rec->extent_item_refs) + rec->extent_item_refs -= refs_to_drop; + } + if (back->found_ref == 0) + back->node.found_ref = 0; + if (back->num_refs == 0) + back->node.found_extent_tree = 0; + + if (!back->node.found_extent_tree && back->node.found_ref) { + rb_erase(&back->node.node, &rec->backref_tree); + free(back); + } + } else { + struct tree_backref *back; + back = find_tree_backref(rec, parent, root_objectid); + if (!back) + goto out; + if (back->node.found_ref) { + if (rec->refs) + rec->refs--; + back->node.found_ref = 0; + } + if (back->node.found_extent_tree) { + if (rec->extent_item_refs) + rec->extent_item_refs--; + back->node.found_extent_tree = 0; + } + if (!back->node.found_extent_tree && back->node.found_ref) { + rb_erase(&back->node.node, &rec->backref_tree); + free(back); + } + } + maybe_free_extent_rec(extent_cache, rec); +out: + return 0; +} + +static int delete_extent_records(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 bytenr) +{ + struct btrfs_key key; + struct btrfs_key found_key; + struct extent_buffer *leaf; + int ret; + int slot; + + + key.objectid = bytenr; + key.type = (u8)-1; + key.offset = (u64)-1; + + while(1) { + ret = btrfs_search_slot(trans, root->fs_info->extent_root, + &key, path, 0, 1); + if (ret < 0) break; - list_del_init(&dev_extent_rec->device_list); - total_byte += dev_extent_rec->length; - cache = next_cache_extent(cache); + if (ret > 0) { + ret = 0; + if (path->slots[0] == 0) + break; + path->slots[0]--; + } + ret = 0; + + leaf = path->nodes[0]; + slot = path->slots[0]; + + btrfs_item_key_to_cpu(leaf, &found_key, slot); + if (found_key.objectid != bytenr) + break; + + if (found_key.type != BTRFS_EXTENT_ITEM_KEY && + found_key.type != BTRFS_METADATA_ITEM_KEY && + found_key.type != BTRFS_TREE_BLOCK_REF_KEY && + found_key.type != BTRFS_EXTENT_DATA_REF_KEY && + found_key.type != BTRFS_EXTENT_REF_V0_KEY && + found_key.type != BTRFS_SHARED_BLOCK_REF_KEY && + found_key.type != BTRFS_SHARED_DATA_REF_KEY) { + btrfs_release_path(path); + if (found_key.type == 0) { + if (found_key.offset == 0) + break; + key.offset = found_key.offset - 1; + key.type = found_key.type; + } + key.type = found_key.type - 1; + key.offset = (u64)-1; + continue; + } + + fprintf(stderr, "repair deleting extent record: key %Lu %u %Lu\n", + found_key.objectid, found_key.type, found_key.offset); + + ret = btrfs_del_item(trans, root->fs_info->extent_root, path); + if (ret) + break; + btrfs_release_path(path); + + if (found_key.type == BTRFS_EXTENT_ITEM_KEY || + found_key.type == BTRFS_METADATA_ITEM_KEY) { + u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ? + found_key.offset : root->fs_info->nodesize; + + ret = btrfs_update_block_group(root, bytenr, + bytes, 0, 0); + if (ret) + break; + } } - if (total_byte != dev_rec->byte_used) { - fprintf(stderr, - "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n", - total_byte, dev_rec->byte_used, dev_rec->objectid, - dev_rec->type, dev_rec->offset); - return -1; - } else { - return 0; - } + btrfs_release_path(path); + return ret; } /* - * Unlike device size alignment check above, some super total_bytes check - * failure can lead to mount failure for newer kernel. - * - * So this function will return the error for a fatal super total_bytes problem. + * for a single backref, this will allocate a new extent + * and add the backref to it. */ -static bool is_super_size_valid(struct btrfs_fs_info *fs_info) +static int record_extent(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *info, + struct btrfs_path *path, + struct extent_record *rec, + struct extent_backref *back, + int allocated, u64 flags) { - struct btrfs_device *dev; - struct list_head *dev_list = &fs_info->fs_devices->devices; - u64 total_bytes = 0; - u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy); + int ret = 0; + struct btrfs_root *extent_root = info->extent_root; + struct extent_buffer *leaf; + struct btrfs_key ins_key; + struct btrfs_extent_item *ei; + struct data_backref *dback; + struct btrfs_tree_block_info *bi; - list_for_each_entry(dev, dev_list, dev_list) - total_bytes += dev->total_bytes; + if (!back->is_data) + rec->max_size = max_t(u64, rec->max_size, + info->nodesize); - /* Important check, which can cause unmountable fs */ - if (super_bytes < total_bytes) { - error("super total bytes %llu smaller than real device(s) size %llu", - super_bytes, total_bytes); - error("mounting this fs may fail for newer kernels"); - error("this can be fixed by 'btrfs rescue fix-device-size'"); - return false; - } + if (!allocated) { + u32 item_size = sizeof(*ei); - /* - * Optional check, just to make everything aligned and match with each - * other. - * - * For a btrfs-image restored fs, we don't need to check it anyway. - */ - if (btrfs_super_flags(fs_info->super_copy) & - (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2)) - return true; - if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) || - !IS_ALIGNED(total_bytes, fs_info->sectorsize) || - super_bytes != total_bytes) { - warning("minor unaligned/mismatch device size detected"); - warning( - "recommended to use 'btrfs rescue fix-device-size' to fix it"); - } - return true; -} + if (!back->is_data) + item_size += sizeof(*bi); -/* check btrfs_dev_item -> btrfs_dev_extent */ -static int check_devices(struct rb_root *dev_cache, - struct device_extent_tree *dev_extent_cache) -{ - struct rb_node *dev_node; - struct device_record *dev_rec; - struct device_extent_record *dext_rec; - int err; - int ret = 0; + ins_key.objectid = rec->start; + ins_key.offset = rec->max_size; + ins_key.type = BTRFS_EXTENT_ITEM_KEY; - dev_node = rb_first(dev_cache); - while (dev_node) { - dev_rec = container_of(dev_node, struct device_record, node); - err = check_device_used(dev_rec, dev_extent_cache); - if (err) - ret = err; + ret = btrfs_insert_empty_item(trans, extent_root, path, + &ins_key, item_size); + if (ret) + goto fail; - check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte, - global_info->sectorsize); - dev_node = rb_next(dev_node); - } - list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans, - device_list) { - fprintf(stderr, - "Device extent[%llu, %llu, %llu] didn't find its device.\n", - dext_rec->objectid, dext_rec->offset, dext_rec->length); - if (!ret) - ret = 1; + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_item); + + btrfs_set_extent_refs(leaf, ei, 0); + btrfs_set_extent_generation(leaf, ei, rec->generation); + + if (back->is_data) { + btrfs_set_extent_flags(leaf, ei, + BTRFS_EXTENT_FLAG_DATA); + } else { + struct btrfs_disk_key copy_key;; + + bi = (struct btrfs_tree_block_info *)(ei + 1); + memset_extent_buffer(leaf, 0, (unsigned long)bi, + sizeof(*bi)); + + btrfs_set_disk_key_objectid(©_key, + rec->info_objectid); + btrfs_set_disk_key_type(©_key, 0); + btrfs_set_disk_key_offset(©_key, 0); + + btrfs_set_tree_block_level(leaf, bi, rec->info_level); + btrfs_set_tree_block_key(leaf, bi, ©_key); + + btrfs_set_extent_flags(leaf, ei, + BTRFS_EXTENT_FLAG_TREE_BLOCK | flags); + } + + btrfs_mark_buffer_dirty(leaf); + ret = btrfs_update_block_group(extent_root, rec->start, + rec->max_size, 1, 0); + if (ret) + goto fail; + btrfs_release_path(path); } - return ret; -} -static int add_root_item_to_list(struct list_head *head, - u64 objectid, u64 bytenr, u64 last_snapshot, - u8 level, u8 drop_level, - struct btrfs_key *drop_key) -{ + if (back->is_data) { + u64 parent; + int i; - struct root_item_record *ri_rec; - ri_rec = malloc(sizeof(*ri_rec)); - if (!ri_rec) - return -ENOMEM; - ri_rec->bytenr = bytenr; - ri_rec->objectid = objectid; - ri_rec->level = level; - ri_rec->drop_level = drop_level; - ri_rec->last_snapshot = last_snapshot; - if (drop_key) - memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key)); - list_add_tail(&ri_rec->list, head); + dback = to_data_backref(back); + if (back->full_backref) + parent = dback->parent; + else + parent = 0; - return 0; + for (i = 0; i < dback->found_ref; i++) { + /* if parent != 0, we're doing a full backref + * passing BTRFS_FIRST_FREE_OBJECTID as the owner + * just makes the backref allocator create a data + * backref + */ + ret = btrfs_inc_extent_ref(trans, info->extent_root, + rec->start, rec->max_size, + parent, + dback->root, + parent ? + BTRFS_FIRST_FREE_OBJECTID : + dback->owner, + dback->offset); + if (ret) + break; + } + fprintf(stderr, "adding new data backref" + " on %llu %s %llu owner %llu" + " offset %llu found %d\n", + (unsigned long long)rec->start, + back->full_backref ? + "parent" : "root", + back->full_backref ? + (unsigned long long)parent : + (unsigned long long)dback->root, + (unsigned long long)dback->owner, + (unsigned long long)dback->offset, + dback->found_ref); + } else { + u64 parent; + struct tree_backref *tback; + + tback = to_tree_backref(back); + if (back->full_backref) + parent = tback->parent; + else + parent = 0; + + ret = btrfs_inc_extent_ref(trans, info->extent_root, + rec->start, rec->max_size, + parent, tback->root, 0, 0); + fprintf(stderr, "adding new tree backref on " + "start %llu len %llu parent %llu root %llu\n", + rec->start, rec->max_size, parent, tback->root); + } +fail: + btrfs_release_path(path); + return ret; } -static void free_root_item_list(struct list_head *list) +static struct extent_entry *find_entry(struct list_head *entries, + u64 bytenr, u64 bytes) { - struct root_item_record *ri_rec; + struct extent_entry *entry = NULL; - while (!list_empty(list)) { - ri_rec = list_first_entry(list, struct root_item_record, - list); - list_del_init(&ri_rec->list); - free(ri_rec); + list_for_each_entry(entry, entries, list) { + if (entry->bytenr == bytenr && entry->bytes == bytes) + return entry; } + + return NULL; } -static int deal_root_from_list(struct list_head *list, - struct btrfs_root *root, - struct block_info *bits, - int bits_nr, - struct cache_tree *pending, - struct cache_tree *seen, - struct cache_tree *reada, - struct cache_tree *nodes, - struct cache_tree *extent_cache, - struct cache_tree *chunk_cache, - struct rb_root *dev_cache, - struct block_group_tree *block_group_cache, - struct device_extent_tree *dev_extent_cache) +static struct extent_entry *find_most_right_entry(struct list_head *entries) { - int ret = 0; - u64 last; + struct extent_entry *entry, *best = NULL, *prev = NULL; + + list_for_each_entry(entry, entries, list) { + /* + * If there are as many broken entries as entries then we know + * not to trust this particular entry. + */ + if (entry->broken == entry->count) + continue; - while (!list_empty(list)) { - struct root_item_record *rec; - struct extent_buffer *buf; - rec = list_entry(list->next, - struct root_item_record, list); - last = 0; - buf = read_tree_block(root->fs_info, rec->bytenr, 0); - if (!extent_buffer_uptodate(buf)) { - free_extent_buffer(buf); - ret = -EIO; - break; - } - ret = add_root_to_pending(buf, extent_cache, pending, - seen, nodes, rec->objectid); - if (ret < 0) - break; /* - * To rebuild extent tree, we need deal with snapshot - * one by one, otherwise we deal with node firstly which - * can maximize readahead. + * Special case, when there are only two entries and 'best' is + * the first one */ - while (1) { - ret = run_next_block(root, bits, bits_nr, &last, - pending, seen, reada, nodes, - extent_cache, chunk_cache, - dev_cache, block_group_cache, - dev_extent_cache, rec); - if (ret != 0) - break; + if (!prev) { + best = entry; + prev = entry; + continue; } - free_extent_buffer(buf); - list_del(&rec->list); - free(rec); - if (ret < 0) - break; - } - while (ret >= 0) { - ret = run_next_block(root, bits, bits_nr, &last, pending, seen, - reada, nodes, extent_cache, chunk_cache, - dev_cache, block_group_cache, - dev_extent_cache, NULL); - if (ret != 0) { - if (ret > 0) - ret = 0; - break; + + /* + * If our current entry == best then we can't be sure our best + * is really the best, so we need to keep searching. + */ + if (best && best->count == entry->count) { + prev = entry; + best = NULL; + continue; } + + /* Prev == entry, not good enough, have to keep searching */ + if (!prev->broken && prev->count == entry->count) + continue; + + if (!best) + best = (prev->count > entry->count) ? prev : entry; + else if (best->count < entry->count) + best = entry; + prev = entry; } - return ret; + + return best; } -static int check_chunks_and_extents(struct btrfs_fs_info *fs_info) +static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path, + struct data_backref *dback, struct extent_entry *entry) { - struct rb_root dev_cache; - struct cache_tree chunk_cache; - struct block_group_tree block_group_cache; - struct device_extent_tree dev_extent_cache; - struct cache_tree extent_cache; - struct cache_tree seen; - struct cache_tree pending; - struct cache_tree reada; - struct cache_tree nodes; - struct extent_io_tree excluded_extents; - struct cache_tree corrupt_blocks; - struct btrfs_path path; - struct btrfs_key key; - struct btrfs_key found_key; - int ret, err = 0; - struct block_info *bits; - int bits_nr; - struct extent_buffer *leaf; - int slot; - struct btrfs_root_item ri; - struct list_head dropping_trees; - struct list_head normal_trees; - struct btrfs_root *root1; + struct btrfs_trans_handle *trans; struct btrfs_root *root; - u64 objectid; - u8 level; - - root = fs_info->fs_root; - dev_cache = RB_ROOT; - cache_tree_init(&chunk_cache); - block_group_tree_init(&block_group_cache); - device_extent_tree_init(&dev_extent_cache); - - cache_tree_init(&extent_cache); - cache_tree_init(&seen); - cache_tree_init(&pending); - cache_tree_init(&nodes); - cache_tree_init(&reada); - cache_tree_init(&corrupt_blocks); - extent_io_tree_init(&excluded_extents); - INIT_LIST_HEAD(&dropping_trees); - INIT_LIST_HEAD(&normal_trees); - - if (repair) { - fs_info->excluded_extents = &excluded_extents; - fs_info->fsck_extent_cache = &extent_cache; - fs_info->free_extent_hook = free_extent_hook; - fs_info->corrupt_blocks = &corrupt_blocks; - } + struct btrfs_file_extent_item *fi; + struct extent_buffer *leaf; + struct btrfs_key key; + u64 bytenr, bytes; + int ret, err; - bits_nr = 1024; - bits = malloc(bits_nr * sizeof(struct block_info)); - if (!bits) { - perror("malloc"); - exit(1); + key.objectid = dback->root; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + root = btrfs_read_fs_root(info, &key); + if (IS_ERR(root)) { + fprintf(stderr, "Couldn't find root for our ref\n"); + return -EINVAL; } - if (ctx.progress_enabled) { - ctx.tp = TASK_EXTENTS; - task_start(ctx.info); + /* + * The backref points to the original offset of the extent if it was + * split, so we need to search down to the offset we have and then walk + * forward until we find the backref we're looking for. + */ + key.objectid = dback->owner; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = dback->offset; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) { + fprintf(stderr, "Error looking up ref %d\n", ret); + return ret; } -again: - root1 = fs_info->tree_root; - level = btrfs_header_level(root1->node); - ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid, - root1->node->start, 0, level, 0, NULL); - if (ret < 0) - goto out; - root1 = fs_info->chunk_root; - level = btrfs_header_level(root1->node); - ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid, - root1->node->start, 0, level, 0, NULL); - if (ret < 0) - goto out; - btrfs_init_path(&path); - key.offset = 0; - key.objectid = 0; - key.type = BTRFS_ROOT_ITEM_KEY; - ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0); - if (ret < 0) - goto out; - while(1) { - leaf = path.nodes[0]; - slot = path.slots[0]; - if (slot >= btrfs_header_nritems(path.nodes[0])) { - ret = btrfs_next_leaf(root, &path); - if (ret != 0) - break; - leaf = path.nodes[0]; - slot = path.slots[0]; - } - btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]); - if (found_key.type == BTRFS_ROOT_ITEM_KEY) { - unsigned long offset; - u64 last_snapshot; - - offset = btrfs_item_ptr_offset(leaf, path.slots[0]); - read_extent_buffer(leaf, &ri, offset, sizeof(ri)); - last_snapshot = btrfs_root_last_snapshot(&ri); - if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) { - level = btrfs_root_level(&ri); - ret = add_root_item_to_list(&normal_trees, - found_key.objectid, - btrfs_root_bytenr(&ri), - last_snapshot, level, - 0, NULL); - if (ret < 0) - goto out; - } else { - level = btrfs_root_level(&ri); - objectid = found_key.objectid; - btrfs_disk_key_to_cpu(&found_key, - &ri.drop_progress); - ret = add_root_item_to_list(&dropping_trees, - objectid, - btrfs_root_bytenr(&ri), - last_snapshot, level, - ri.drop_level, &found_key); - if (ret < 0) - goto out; + while (1) { + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + ret = btrfs_next_leaf(root, path); + if (ret) { + fprintf(stderr, "Couldn't find our ref, next\n"); + return -EINVAL; } } - path.slots[0]++; + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.objectid != dback->owner || + key.type != BTRFS_EXTENT_DATA_KEY) { + fprintf(stderr, "Couldn't find our ref, search\n"); + return -EINVAL; + } + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); + bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); + + if (bytenr == dback->disk_bytenr && bytes == dback->bytes) + break; + path->slots[0]++; } - btrfs_release_path(&path); + + btrfs_release_path(path); + + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); /* - * check_block can return -EAGAIN if it fixes something, please keep - * this in mind when dealing with return values from these functions, if - * we get -EAGAIN we want to fall through and restart the loop. + * Ok we have the key of the file extent we want to fix, now we can cow + * down to the thing and fix it. */ - ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending, - &seen, &reada, &nodes, &extent_cache, - &chunk_cache, &dev_cache, &block_group_cache, - &dev_extent_cache); + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); if (ret < 0) { - if (ret == -EAGAIN) - goto loop; + fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n", + key.objectid, key.type, key.offset, ret); goto out; } - ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr, - &pending, &seen, &reada, &nodes, - &extent_cache, &chunk_cache, &dev_cache, - &block_group_cache, &dev_extent_cache); - if (ret < 0) { - if (ret == -EAGAIN) - goto loop; + if (ret > 0) { + fprintf(stderr, "Well that's odd, we just found this key " + "[%Lu, %u, %Lu]\n", key.objectid, key.type, + key.offset); + ret = -EINVAL; goto out; } + leaf = path->nodes[0]; + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); - ret = check_chunks(&chunk_cache, &block_group_cache, - &dev_extent_cache, NULL, NULL, NULL, 0); - if (ret) { - if (ret == -EAGAIN) - goto loop; - err = ret; + if (btrfs_file_extent_compression(leaf, fi) && + dback->disk_bytenr != entry->bytenr) { + fprintf(stderr, "Ref doesn't match the record start and is " + "compressed, please take a btrfs-image of this file " + "system and send it to a btrfs developer so they can " + "complete this functionality for bytenr %Lu\n", + dback->disk_bytenr); + ret = -EINVAL; + goto out; } - ret = check_extent_refs(root, &extent_cache); - if (ret < 0) { - if (ret == -EAGAIN) - goto loop; - goto out; + if (dback->node.broken && dback->disk_bytenr != entry->bytenr) { + btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr); + } else if (dback->disk_bytenr > entry->bytenr) { + u64 off_diff, offset; + + off_diff = dback->disk_bytenr - entry->bytenr; + offset = btrfs_file_extent_offset(leaf, fi); + if (dback->disk_bytenr + offset + + btrfs_file_extent_num_bytes(leaf, fi) > + entry->bytenr + entry->bytes) { + fprintf(stderr, "Ref is past the entry end, please " + "take a btrfs-image of this file system and " + "send it to a btrfs developer, ref %Lu\n", + dback->disk_bytenr); + ret = -EINVAL; + goto out; + } + offset += off_diff; + btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr); + btrfs_set_file_extent_offset(leaf, fi, offset); + } else if (dback->disk_bytenr < entry->bytenr) { + u64 offset; + + offset = btrfs_file_extent_offset(leaf, fi); + if (dback->disk_bytenr + offset < entry->bytenr) { + fprintf(stderr, "Ref is before the entry start, please" + " take a btrfs-image of this file system and " + "send it to a btrfs developer, ref %Lu\n", + dback->disk_bytenr); + ret = -EINVAL; + goto out; + } + + offset += dback->disk_bytenr; + offset -= entry->bytenr; + btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr); + btrfs_set_file_extent_offset(leaf, fi, offset); } - ret = check_devices(&dev_cache, &dev_extent_cache); - if (ret && err) - ret = err; + btrfs_set_file_extent_disk_num_bytes(leaf, fi, entry->bytes); + /* + * Chances are if disk_num_bytes were wrong then so is ram_bytes, but + * only do this if we aren't using compression, otherwise it's a + * trickier case. + */ + if (!btrfs_file_extent_compression(leaf, fi)) + btrfs_set_file_extent_ram_bytes(leaf, fi, entry->bytes); + else + printf("ram bytes may be wrong?\n"); + btrfs_mark_buffer_dirty(leaf); out: - task_stop(ctx.info); - if (repair) { - free_corrupt_blocks_tree(fs_info->corrupt_blocks); - extent_io_tree_cleanup(&excluded_extents); - fs_info->fsck_extent_cache = NULL; - fs_info->free_extent_hook = NULL; - fs_info->corrupt_blocks = NULL; - fs_info->excluded_extents = NULL; - } - free(bits); - free_chunk_cache_tree(&chunk_cache); - free_device_cache_tree(&dev_cache); - free_block_group_tree(&block_group_cache); - free_device_extent_tree(&dev_extent_cache); - free_extent_cache_tree(&seen); - free_extent_cache_tree(&pending); - free_extent_cache_tree(&reada); - free_extent_cache_tree(&nodes); - free_root_item_list(&normal_trees); - free_root_item_list(&dropping_trees); - return ret; -loop: - free_corrupt_blocks_tree(fs_info->corrupt_blocks); - free_extent_cache_tree(&seen); - free_extent_cache_tree(&pending); - free_extent_cache_tree(&reada); - free_extent_cache_tree(&nodes); - free_chunk_cache_tree(&chunk_cache); - free_block_group_tree(&block_group_cache); - free_device_cache_tree(&dev_cache); - free_device_extent_tree(&dev_extent_cache); - free_extent_record_cache(&extent_cache); - free_root_item_list(&normal_trees); - free_root_item_list(&dropping_trees); - extent_io_tree_cleanup(&excluded_extents); - goto again; + err = btrfs_commit_transaction(trans, root); + btrfs_release_path(path); + return ret ? ret : err; } -static int check_extent_inline_ref(struct extent_buffer *eb, - struct btrfs_key *key, struct btrfs_extent_inline_ref *iref) +static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path, + struct extent_record *rec) { - int ret; - u8 type = btrfs_extent_inline_ref_type(eb, iref); + struct extent_backref *back, *tmp; + struct data_backref *dback; + struct extent_entry *entry, *best = NULL; + LIST_HEAD(entries); + int nr_entries = 0; + int broken_entries = 0; + int ret = 0; + short mismatch = 0; - switch (type) { - case BTRFS_TREE_BLOCK_REF_KEY: - case BTRFS_EXTENT_DATA_REF_KEY: - case BTRFS_SHARED_BLOCK_REF_KEY: - case BTRFS_SHARED_DATA_REF_KEY: - ret = 0; - break; - default: - error("extent[%llu %u %llu] has unknown ref type: %d", - key->objectid, key->type, key->offset, type); - ret = UNKNOWN_TYPE; - break; - } + /* + * Metadata is easy and the backrefs should always agree on bytenr and + * size, if not we've got bigger issues. + */ + if (rec->metadata) + return 0; - return ret; -} + rbtree_postorder_for_each_entry_safe(back, tmp, + &rec->backref_tree, node) { + if (back->full_backref || !back->is_data) + continue; -/* - * Check backrefs of a tree block given by @bytenr or @eb. - * - * @root: the root containing the @bytenr or @eb - * @eb: tree block extent buffer, can be NULL - * @bytenr: bytenr of the tree block to search - * @level: tree level of the tree block - * @owner: owner of the tree block - * - * Return >0 for any error found and output error message - * Return 0 for no error found - */ -static int check_tree_block_ref(struct btrfs_root *root, - struct extent_buffer *eb, u64 bytenr, - int level, u64 owner, struct node_refs *nrefs) -{ - struct btrfs_key key; - struct btrfs_root *extent_root = root->fs_info->extent_root; - struct btrfs_path path; - struct btrfs_extent_item *ei; - struct btrfs_extent_inline_ref *iref; - struct extent_buffer *leaf; - unsigned long end; - unsigned long ptr; - int slot; - int skinny_level; - int root_level = btrfs_header_level(root->node); - int type; - u32 nodesize = root->fs_info->nodesize; - u32 item_size; - u64 offset; - int found_ref = 0; - int err = 0; - int ret; - int strict = 1; - int parent = 0; + dback = to_data_backref(back); - btrfs_init_path(&path); - key.objectid = bytenr; - if (btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) - key.type = BTRFS_METADATA_ITEM_KEY; - else - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = (u64)-1; + /* + * We only pay attention to backrefs that we found a real + * backref for. + */ + if (dback->found_ref == 0) + continue; - /* Search for the backref in extent tree */ - ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); - if (ret < 0) { - err |= BACKREF_MISSING; - goto out; + /* + * For now we only catch when the bytes don't match, not the + * bytenr. We can easily do this at the same time, but I want + * to have a fs image to test on before we just add repair + * functionality willy-nilly so we know we won't screw up the + * repair. + */ + + entry = find_entry(&entries, dback->disk_bytenr, + dback->bytes); + if (!entry) { + entry = malloc(sizeof(struct extent_entry)); + if (!entry) { + ret = -ENOMEM; + goto out; + } + memset(entry, 0, sizeof(*entry)); + entry->bytenr = dback->disk_bytenr; + entry->bytes = dback->bytes; + list_add_tail(&entry->list, &entries); + nr_entries++; + } + + /* + * If we only have on entry we may think the entries agree when + * in reality they don't so we have to do some extra checking. + */ + if (dback->disk_bytenr != rec->start || + dback->bytes != rec->nr || back->broken) + mismatch = 1; + + if (back->broken) { + entry->broken++; + broken_entries++; + } + + entry->count++; } - ret = btrfs_previous_extent_item(extent_root, &path, bytenr); - if (ret) { - err |= BACKREF_MISSING; + + /* Yay all the backrefs agree, carry on good sir */ + if (nr_entries <= 1 && !mismatch) goto out; + + fprintf(stderr, "attempting to repair backref discrepency for bytenr " + "%Lu\n", rec->start); + + /* + * First we want to see if the backrefs can agree amongst themselves who + * is right, so figure out which one of the entries has the highest + * count. + */ + best = find_most_right_entry(&entries); + + /* + * Ok so we may have an even split between what the backrefs think, so + * this is where we use the extent ref to see what it thinks. + */ + if (!best) { + entry = find_entry(&entries, rec->start, rec->nr); + if (!entry && (!broken_entries || !rec->found_rec)) { + fprintf(stderr, "Backrefs don't agree with each other " + "and extent record doesn't agree with anybody," + " so we can't fix bytenr %Lu bytes %Lu\n", + rec->start, rec->nr); + ret = -EINVAL; + goto out; + } else if (!entry) { + /* + * Ok our backrefs were broken, we'll assume this is the + * correct value and add an entry for this range. + */ + entry = malloc(sizeof(struct extent_entry)); + if (!entry) { + ret = -ENOMEM; + goto out; + } + memset(entry, 0, sizeof(*entry)); + entry->bytenr = rec->start; + entry->bytes = rec->nr; + list_add_tail(&entry->list, &entries); + nr_entries++; + } + entry->count++; + best = find_most_right_entry(&entries); + if (!best) { + fprintf(stderr, "Backrefs and extent record evenly " + "split on who is right, this is going to " + "require user input to fix bytenr %Lu bytes " + "%Lu\n", rec->start, rec->nr); + ret = -EINVAL; + goto out; + } } - leaf = path.nodes[0]; - slot = path.slots[0]; - btrfs_item_key_to_cpu(leaf, &key, slot); - - ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); - - if (key.type == BTRFS_METADATA_ITEM_KEY) { - skinny_level = (int)key.offset; - iref = (struct btrfs_extent_inline_ref *)(ei + 1); - } else { - struct btrfs_tree_block_info *info; - - info = (struct btrfs_tree_block_info *)(ei + 1); - skinny_level = btrfs_tree_block_level(leaf, info); - iref = (struct btrfs_extent_inline_ref *)(info + 1); + /* + * I don't think this can happen currently as we'll abort() if we catch + * this case higher up, but in case somebody removes that we still can't + * deal with it properly here yet, so just bail out of that's the case. + */ + if (best->bytenr != rec->start) { + fprintf(stderr, "Extent start and backref starts don't match, " + "please use btrfs-image on this file system and send " + "it to a btrfs developer so they can make fsck fix " + "this particular case. bytenr is %Lu, bytes is %Lu\n", + rec->start, rec->nr); + ret = -EINVAL; + goto out; } + /* + * Ok great we all agreed on an extent record, let's go find the real + * references and fix up the ones that don't match. + */ + rbtree_postorder_for_each_entry_safe(back, tmp, + &rec->backref_tree, node) { + if (back->full_backref || !back->is_data) + continue; - if (eb) { - u64 header_gen; - u64 extent_gen; + dback = to_data_backref(back); /* - * Due to the feature of shared tree blocks, if the upper node - * is a fs root or shared node, the extent of checked node may - * not be updated until the next CoW. + * Still ignoring backrefs that don't have a real ref attached + * to them. */ - if (nrefs) - strict = should_check_extent_strictly(root, nrefs, - level); - if (!(btrfs_extent_flags(leaf, ei) & - BTRFS_EXTENT_FLAG_TREE_BLOCK)) { - error( - "extent[%llu %u] backref type mismatch, missing bit: %llx", - key.objectid, nodesize, - BTRFS_EXTENT_FLAG_TREE_BLOCK); - err = BACKREF_MISMATCH; - } - header_gen = btrfs_header_generation(eb); - extent_gen = btrfs_extent_generation(leaf, ei); - if (header_gen != extent_gen) { - error( - "extent[%llu %u] backref generation mismatch, wanted: %llu, have: %llu", - key.objectid, nodesize, header_gen, - extent_gen); - err = BACKREF_MISMATCH; - } - if (level != skinny_level) { - error( - "extent[%llu %u] level mismatch, wanted: %u, have: %u", - key.objectid, nodesize, level, skinny_level); - err = BACKREF_MISMATCH; - } - if (!is_fstree(owner) && btrfs_extent_refs(leaf, ei) != 1) { - error( - "extent[%llu %u] is referred by other roots than %llu", - key.objectid, nodesize, root->objectid); - err = BACKREF_MISMATCH; - } + if (dback->found_ref == 0) + continue; + + if (dback->bytes == best->bytes && + dback->disk_bytenr == best->bytenr) + continue; + + ret = repair_ref(info, path, dback, best); + if (ret) + goto out; } /* - * Iterate the extent/metadata item to find the exact backref + * Ok we messed with the actual refs, which means we need to drop our + * entire cache and go back and rescan. I know this is a huge pain and + * adds a lot of extra work, but it's the only way to be safe. Once all + * the backrefs agree we may not need to do anything to the extent + * record itself. */ - item_size = btrfs_item_size_nr(leaf, slot); - ptr = (unsigned long)iref; - end = (unsigned long)ei + item_size; - - while (ptr < end) { - iref = (struct btrfs_extent_inline_ref *)ptr; - type = btrfs_extent_inline_ref_type(leaf, iref); - offset = btrfs_extent_inline_ref_offset(leaf, iref); - - ret = check_extent_inline_ref(leaf, &key, iref); - if (ret) { - err |= ret; - break; - } - if (type == BTRFS_TREE_BLOCK_REF_KEY) { - if (offset == root->objectid) - found_ref = 1; - if (!strict && owner == offset) - found_ref = 1; - } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) { - /* - * Backref of tree reloc root points to itself, no need - * to check backref any more. - * - * This may be an error of loop backref, but extent tree - * checker should have already handled it. - * Here we only need to avoid infinite iteration. - */ - if (offset == bytenr) { - found_ref = 1; - } else { - /* - * Check if the backref points to valid - * referencer - */ - found_ref = !check_tree_block_ref( root, NULL, - offset, level + 1, owner, - NULL); - } - } - - if (found_ref) - break; - ptr += btrfs_extent_inline_ref_size(type); + ret = -EAGAIN; +out: + while (!list_empty(&entries)) { + entry = list_entry(entries.next, struct extent_entry, list); + list_del_init(&entry->list); + free(entry); } + return ret; +} + +static int process_duplicates(struct cache_tree *extent_cache, + struct extent_record *rec) +{ + struct extent_record *good, *tmp; + struct cache_extent *cache; + int ret; /* - * Inlined extent item doesn't have what we need, check - * TREE_BLOCK_REF_KEY + * If we found a extent record for this extent then return, or if we + * have more than one duplicate we are likely going to need to delete + * something. */ - if (!found_ref) { - btrfs_release_path(&path); - key.objectid = bytenr; - key.type = BTRFS_TREE_BLOCK_REF_KEY; - key.offset = root->objectid; + if (rec->found_rec || rec->num_duplicates > 1) + return 0; + + /* Shouldn't happen but just in case */ + BUG_ON(!rec->num_duplicates); - ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); - if (!ret) - found_ref = 1; - } /* - * Finally check SHARED BLOCK REF, any found will be good - * Here we're not doing comprehensive extent backref checking, - * only need to ensure there is some extent referring to this - * tree block. + * So this happens if we end up with a backref that doesn't match the + * actual extent entry. So either the backref is bad or the extent + * entry is bad. Either way we want to have the extent_record actually + * reflect what we found in the extent_tree, so we need to take the + * duplicate out and use that as the extent_record since the only way we + * get a duplicate is if we find a real life BTRFS_EXTENT_ITEM_KEY. */ - if (!found_ref) { - btrfs_release_path(&path); - key.objectid = bytenr; - key.type = BTRFS_SHARED_BLOCK_REF_KEY; - key.offset = (u64)-1; + remove_cache_extent(extent_cache, &rec->cache); - ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); - if (ret < 0) { - err |= BACKREF_MISSING; - goto out; - } - ret = btrfs_previous_extent_item(extent_root, &path, bytenr); - if (ret) { - err |= BACKREF_MISSING; - goto out; + good = to_extent_record(rec->dups.next); + list_del_init(&good->list); + INIT_LIST_HEAD(&good->backrefs); + INIT_LIST_HEAD(&good->dups); + good->cache.start = good->start; + good->cache.size = good->nr; + good->content_checked = 0; + good->owner_ref_checked = 0; + good->num_duplicates = 0; + good->refs = rec->refs; + list_splice_init(&rec->backrefs, &good->backrefs); + while (1) { + cache = lookup_cache_extent(extent_cache, good->start, + good->nr); + if (!cache) + break; + tmp = container_of(cache, struct extent_record, cache); + + /* + * If we find another overlapping extent and it's found_rec is + * set then it's a duplicate and we need to try and delete + * something. + */ + if (tmp->found_rec || tmp->num_duplicates > 0) { + if (list_empty(&good->list)) + list_add_tail(&good->list, + &duplicate_extents); + good->num_duplicates += tmp->num_duplicates + 1; + list_splice_init(&tmp->dups, &good->dups); + list_del_init(&tmp->list); + list_add_tail(&tmp->list, &good->dups); + remove_cache_extent(extent_cache, &tmp->cache); + continue; } - found_ref = 1; + + /* + * Ok we have another non extent item backed extent rec, so lets + * just add it to this extent and carry on like we did above. + */ + good->refs += tmp->refs; + list_splice_init(&tmp->backrefs, &good->backrefs); + remove_cache_extent(extent_cache, &tmp->cache); + free(tmp); } - if (!found_ref) - err |= BACKREF_MISSING; -out: - btrfs_release_path(&path); - if (nrefs && strict && - level < root_level && nrefs->full_backref[level + 1]) - parent = nrefs->bytenr[level + 1]; - if (eb && (err & BACKREF_MISSING)) - error( - "extent[%llu %u] backref lost (owner: %llu, level: %u) %s %llu", - bytenr, nodesize, owner, level, - parent ? "parent" : "root", - parent ? parent : root->objectid); - return err; + ret = insert_cache_extent(extent_cache, &good->cache); + BUG_ON(ret); + free(rec); + return good->num_duplicates ? 0 : 1; } -/* - * If @err contains BACKREF_MISSING then add extent of the - * file_extent_data_item. - * - * Returns error bits after reapir. - */ -static int repair_extent_data_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *pathp, - struct node_refs *nrefs, - int err) +static int delete_duplicate_records(struct btrfs_root *root, + struct extent_record *rec) { - struct btrfs_file_extent_item *fi; - struct btrfs_key fi_key; - struct btrfs_key key; - struct btrfs_extent_item *ei; + struct btrfs_trans_handle *trans; + LIST_HEAD(delete_list); struct btrfs_path path; - struct btrfs_root *extent_root = root->fs_info->extent_root; - struct extent_buffer *eb; - u64 size; - u64 disk_bytenr; - u64 num_bytes; - u64 parent; - u64 offset; - u64 extent_offset; - u64 file_offset; - int generation; - int slot; - int ret = 0; + struct extent_record *tmp, *good, *n; + int nr_del = 0; + int ret = 0, err; + struct btrfs_key key; - eb = pathp->nodes[0]; - slot = pathp->slots[0]; - btrfs_item_key_to_cpu(eb, &fi_key, slot); - fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); + btrfs_init_path(&path); - if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE || - btrfs_file_extent_disk_bytenr(eb, fi) == 0) - return err; + good = rec; + /* Find the record that covers all of the duplicates. */ + list_for_each_entry(tmp, &rec->dups, list) { + if (good->start < tmp->start) + continue; + if (good->nr > tmp->nr) + continue; - file_offset = fi_key.offset; - generation = btrfs_file_extent_generation(eb, fi); - disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi); - num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); - extent_offset = btrfs_file_extent_offset(eb, fi); - offset = file_offset - extent_offset; + if (tmp->start + tmp->nr < good->start + good->nr) { + fprintf(stderr, "Ok we have overlapping extents that " + "aren't completely covered by each other, this " + "is going to require more careful thought. " + "The extents are [%Lu-%Lu] and [%Lu-%Lu]\n", + tmp->start, tmp->nr, good->start, good->nr); + abort(); + } + good = tmp; + } - /* now repair only adds backref */ - if ((err & BACKREF_MISSING) == 0) - return err; + if (good != rec) + list_add_tail(&rec->list, &delete_list); - /* search extent item */ - key.objectid = disk_bytenr; - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = num_bytes; + list_for_each_entry_safe(tmp, n, &rec->dups, list) { + if (tmp == good) + continue; + list_move_tail(&tmp->list, &delete_list); + } - btrfs_init_path(&path); - ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); - if (ret < 0) { - ret = -EIO; + root = root->fs_info->extent_root; + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); goto out; } - /* insert an extent item */ - if (ret > 0) { - key.objectid = disk_bytenr; + list_for_each_entry(tmp, &delete_list, list) { + if (tmp->found_rec == 0) + continue; + key.objectid = tmp->start; key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = num_bytes; - size = sizeof(*ei); - - btrfs_release_path(&path); - ret = btrfs_insert_empty_item(trans, extent_root, &path, &key, - size); - if (ret) - goto out; - eb = path.nodes[0]; - ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item); + key.offset = tmp->nr; - btrfs_set_extent_refs(eb, ei, 0); - btrfs_set_extent_generation(eb, ei, generation); - btrfs_set_extent_flags(eb, ei, BTRFS_EXTENT_FLAG_DATA); + /* Shouldn't happen but just in case */ + if (tmp->metadata) { + fprintf(stderr, "Well this shouldn't happen, extent " + "record overlaps but is metadata? " + "[%Lu, %Lu]\n", tmp->start, tmp->nr); + abort(); + } - btrfs_mark_buffer_dirty(eb); - ret = btrfs_update_block_group(extent_root, disk_bytenr, - num_bytes, 1, 0); + ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); + if (ret) { + if (ret > 0) + ret = -EINVAL; + break; + } + ret = btrfs_del_item(trans, root, &path); + if (ret) + break; btrfs_release_path(&path); + nr_del++; + } + err = btrfs_commit_transaction(trans, root); + if (err && !ret) + ret = err; +out: + while (!list_empty(&delete_list)) { + tmp = to_extent_record(delete_list.next); + list_del_init(&tmp->list); + if (tmp == rec) + continue; + free(tmp); } - if (nrefs->full_backref[0]) - parent = btrfs_header_bytenr(eb); - else - parent = 0; - - ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, num_bytes, parent, - root->objectid, - parent ? BTRFS_FIRST_FREE_OBJECTID : fi_key.objectid, - offset); - if (ret) { - error( - "failed to increase extent data backref[%llu %llu] root %llu", - disk_bytenr, num_bytes, root->objectid); - goto out; - } else { - printf("Add one extent data backref [%llu %llu]\n", - disk_bytenr, num_bytes); + while (!list_empty(&rec->dups)) { + tmp = to_extent_record(rec->dups.next); + list_del_init(&tmp->list); + free(tmp); } - err &= ~BACKREF_MISSING; -out: - if (ret) - error("can't repair root %llu extent data item[%llu %llu]", - root->objectid, disk_bytenr, num_bytes); - return err; + btrfs_release_path(&path); + + if (!ret && !nr_del) + rec->num_duplicates = 0; + + return ret ? ret : nr_del; } -/* - * Check EXTENT_DATA item, mainly for its dbackref in extent tree - * - * Return >0 any error found and output error message - * Return 0 for no error found - */ -static int check_extent_data_item(struct btrfs_root *root, - struct btrfs_path *pathp, - struct node_refs *nrefs, int account_bytes) +static int find_possible_backrefs(struct btrfs_fs_info *info, + struct btrfs_path *path, + struct cache_tree *extent_cache, + struct extent_record *rec) { + struct btrfs_root *root; + struct extent_backref *back, *tmp; + struct data_backref *dback; + struct cache_extent *cache; struct btrfs_file_extent_item *fi; - struct extent_buffer *eb = pathp->nodes[0]; - struct btrfs_path path; - struct btrfs_root *extent_root = root->fs_info->extent_root; - struct btrfs_key fi_key; - struct btrfs_key dbref_key; - struct extent_buffer *leaf; - struct btrfs_extent_item *ei; - struct btrfs_extent_inline_ref *iref; - struct btrfs_extent_data_ref *dref; - u64 owner; - u64 disk_bytenr; - u64 disk_num_bytes; - u64 extent_num_bytes; - u64 extent_flags; - u64 offset; - u32 item_size; - unsigned long end; - unsigned long ptr; - int type; - int found_dbackref = 0; - int slot = pathp->slots[0]; - int err = 0; + struct btrfs_key key; + u64 bytenr, bytes; int ret; - int strict; - - btrfs_item_key_to_cpu(eb, &fi_key, slot); - fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); - - /* Nothing to check for hole and inline data extents */ - if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE || - btrfs_file_extent_disk_bytenr(eb, fi) == 0) - return 0; - - disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi); - disk_num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); - extent_num_bytes = btrfs_file_extent_num_bytes(eb, fi); - offset = btrfs_file_extent_offset(eb, fi); - - /* Check unaligned disk_num_bytes and num_bytes */ - if (!IS_ALIGNED(disk_num_bytes, root->fs_info->sectorsize)) { - error( -"file extent [%llu, %llu] has unaligned disk num bytes: %llu, should be aligned to %u", - fi_key.objectid, fi_key.offset, disk_num_bytes, - root->fs_info->sectorsize); - err |= BYTES_UNALIGNED; - } else if (account_bytes) { - data_bytes_allocated += disk_num_bytes; - } - if (!IS_ALIGNED(extent_num_bytes, root->fs_info->sectorsize)) { - error( -"file extent [%llu, %llu] has unaligned num bytes: %llu, should be aligned to %u", - fi_key.objectid, fi_key.offset, extent_num_bytes, - root->fs_info->sectorsize); - err |= BYTES_UNALIGNED; - } else if (account_bytes) { - data_bytes_referenced += extent_num_bytes; - } - owner = btrfs_header_owner(eb); - - /* Check the extent item of the file extent in extent tree */ - btrfs_init_path(&path); - dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi); - dbref_key.type = BTRFS_EXTENT_ITEM_KEY; - dbref_key.offset = btrfs_file_extent_disk_num_bytes(eb, fi); - - ret = btrfs_search_slot(NULL, extent_root, &dbref_key, &path, 0, 0); - if (ret) - goto out; - leaf = path.nodes[0]; - slot = path.slots[0]; - ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); + rbtree_postorder_for_each_entry_safe(back, tmp, + &rec->backref_tree, node) { + /* Don't care about full backrefs (poor unloved backrefs) */ + if (back->full_backref || !back->is_data) + continue; - extent_flags = btrfs_extent_flags(leaf, ei); + dback = to_data_backref(back); - if (!(extent_flags & BTRFS_EXTENT_FLAG_DATA)) { - error( - "extent[%llu %llu] backref type mismatch, wanted bit: %llx", - disk_bytenr, disk_num_bytes, - BTRFS_EXTENT_FLAG_DATA); - err |= BACKREF_MISMATCH; - } + /* We found this one, we don't need to do a lookup */ + if (dback->found_ref) + continue; - /* Check data backref inside that extent item */ - item_size = btrfs_item_size_nr(leaf, path.slots[0]); - iref = (struct btrfs_extent_inline_ref *)(ei + 1); - ptr = (unsigned long)iref; - end = (unsigned long)ei + item_size; - strict = should_check_extent_strictly(root, nrefs, -1); + key.objectid = dback->root; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; - while (ptr < end) { - u64 ref_root; - u64 ref_objectid; - u64 ref_offset; - bool match = false; + root = btrfs_read_fs_root(info, &key); - iref = (struct btrfs_extent_inline_ref *)ptr; - type = btrfs_extent_inline_ref_type(leaf, iref); - dref = (struct btrfs_extent_data_ref *)(&iref->offset); + /* No root, definitely a bad ref, skip */ + if (IS_ERR(root) && PTR_ERR(root) == -ENOENT) + continue; + /* Other err, exit */ + if (IS_ERR(root)) + return PTR_ERR(root); - ret = check_extent_inline_ref(leaf, &dbref_key, iref); + key.objectid = dback->owner; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = dback->offset; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret) { - err |= ret; - break; + btrfs_release_path(path); + if (ret < 0) + return ret; + /* Didn't find it, we can carry on */ + ret = 0; + continue; } - if (type == BTRFS_EXTENT_DATA_REF_KEY) { - ref_root = btrfs_extent_data_ref_root(leaf, dref); - ref_objectid = btrfs_extent_data_ref_objectid(leaf, dref); - ref_offset = btrfs_extent_data_ref_offset(leaf, dref); - - if (ref_objectid == fi_key.objectid && - ref_offset == fi_key.offset - offset) - match = true; - if (ref_root == root->objectid && match) - found_dbackref = 1; - else if (!strict && owner == ref_root && match) - found_dbackref = 1; - } else if (type == BTRFS_SHARED_DATA_REF_KEY) { - found_dbackref = !check_tree_block_ref(root, NULL, - btrfs_extent_inline_ref_offset(leaf, iref), - 0, owner, NULL); - } - - if (found_dbackref) - break; - ptr += btrfs_extent_inline_ref_size(type); - } - - if (!found_dbackref) { - btrfs_release_path(&path); - /* Didn't find inlined data backref, try EXTENT_DATA_REF_KEY */ - dbref_key.objectid = btrfs_file_extent_disk_bytenr(eb, fi); - dbref_key.type = BTRFS_EXTENT_DATA_REF_KEY; - dbref_key.offset = hash_extent_data_ref(root->objectid, - fi_key.objectid, fi_key.offset - offset); + fi = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); + bytenr = btrfs_file_extent_disk_bytenr(path->nodes[0], fi); + bytes = btrfs_file_extent_disk_num_bytes(path->nodes[0], fi); + btrfs_release_path(path); + cache = lookup_cache_extent(extent_cache, bytenr, 1); + if (cache) { + struct extent_record *tmp; + tmp = container_of(cache, struct extent_record, cache); - ret = btrfs_search_slot(NULL, root->fs_info->extent_root, - &dbref_key, &path, 0, 0); - if (!ret) { - found_dbackref = 1; - goto out; + /* + * If we found an extent record for the bytenr for this + * particular backref then we can't add it to our + * current extent record. We only want to add backrefs + * that don't have a corresponding extent item in the + * extent tree since they likely belong to this record + * and we need to fix it if it doesn't match bytenrs. + */ + if (tmp->found_rec) + continue; } - btrfs_release_path(&path); + dback->found_ref += 1; + dback->disk_bytenr = bytenr; + dback->bytes = bytes; /* - * Neither inlined nor EXTENT_DATA_REF found, try - * SHARED_DATA_REF as last chance. + * Set this so the verify backref code knows not to trust the + * values in this backref. */ - dbref_key.objectid = disk_bytenr; - dbref_key.type = BTRFS_SHARED_DATA_REF_KEY; - dbref_key.offset = eb->start; - - ret = btrfs_search_slot(NULL, root->fs_info->extent_root, - &dbref_key, &path, 0, 0); - if (!ret) { - found_dbackref = 1; - goto out; - } + back->broken = 1; } -out: - if (!found_dbackref) - err |= BACKREF_MISSING; - btrfs_release_path(&path); - if (err & BACKREF_MISSING) { - error("data extent[%llu %llu] backref lost", - disk_bytenr, disk_num_bytes); - } - return err; + return 0; } /* - * Get real tree block level for the case like shared block - * Return >= 0 as tree level - * Return <0 for error + * Record orphan data ref into corresponding root. + * + * Return 0 if the extent item contains data ref and recorded. + * Return 1 if the extent item contains no useful data ref + * On that case, it may contains only shared_dataref or metadata backref + * or the file extent exists(this should be handled by the extent bytenr + * recovery routine) + * Return <0 if something goes wrong. */ -static int query_tree_block_level(struct btrfs_fs_info *fs_info, u64 bytenr) +static int record_orphan_data_extents(struct btrfs_fs_info *fs_info, + struct extent_record *rec) { - struct extent_buffer *eb; - struct btrfs_path path; struct btrfs_key key; - struct btrfs_extent_item *ei; - u64 flags; - u64 transid; - u8 backref_level; - u8 header_level; - int ret; - - /* Search extent tree for extent generation and level */ - key.objectid = bytenr; - key.type = BTRFS_METADATA_ITEM_KEY; - key.offset = (u64)-1; + struct btrfs_root *dest_root; + struct extent_backref *back, *tmp; + struct data_backref *dback; + struct orphan_data_extent *orphan; + struct btrfs_path path; + int recorded_data_ref = 0; + int ret = 0; + if (rec->metadata) + return 1; btrfs_init_path(&path); - ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0); - if (ret < 0) - goto release_out; - ret = btrfs_previous_extent_item(fs_info->extent_root, &path, bytenr); - if (ret < 0) - goto release_out; - if (ret > 0) { - ret = -ENOENT; - goto release_out; - } - - btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); - ei = btrfs_item_ptr(path.nodes[0], path.slots[0], - struct btrfs_extent_item); - flags = btrfs_extent_flags(path.nodes[0], ei); - if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) { - ret = -ENOENT; - goto release_out; - } + rbtree_postorder_for_each_entry_safe(back, tmp, + &rec->backref_tree, node) { + if (back->full_backref || !back->is_data || + !back->found_extent_tree) + continue; + dback = to_data_backref(back); + if (dback->found_ref) + continue; + key.objectid = dback->root; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; - /* Get transid for later read_tree_block() check */ - transid = btrfs_extent_generation(path.nodes[0], ei); + dest_root = btrfs_read_fs_root(fs_info, &key); - /* Get backref level as one source */ - if (key.type == BTRFS_METADATA_ITEM_KEY) { - backref_level = key.offset; - } else { - struct btrfs_tree_block_info *info; + /* For non-exist root we just skip it */ + if (IS_ERR(dest_root) || !dest_root) + continue; - info = (struct btrfs_tree_block_info *)(ei + 1); - backref_level = btrfs_tree_block_level(path.nodes[0], info); - } - btrfs_release_path(&path); + key.objectid = dback->owner; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = dback->offset; - /* Get level from tree block as an alternative source */ - eb = read_tree_block(fs_info, bytenr, transid); - if (!extent_buffer_uptodate(eb)) { - free_extent_buffer(eb); - return -EIO; + ret = btrfs_search_slot(NULL, dest_root, &key, &path, 0, 0); + btrfs_release_path(&path); + /* + * For ret < 0, it's OK since the fs-tree may be corrupted, + * we need to record it for inode/file extent rebuild. + * For ret > 0, we record it only for file extent rebuild. + * For ret == 0, the file extent exists but only bytenr + * mismatch, let the original bytenr fix routine to handle, + * don't record it. + */ + if (ret == 0) + continue; + ret = 0; + orphan = malloc(sizeof(*orphan)); + if (!orphan) { + ret = -ENOMEM; + goto out; + } + INIT_LIST_HEAD(&orphan->list); + orphan->root = dback->root; + orphan->objectid = dback->owner; + orphan->offset = dback->offset; + orphan->disk_bytenr = rec->cache.start; + orphan->disk_len = rec->cache.size; + list_add(&dest_root->orphan_data_extents, &orphan->list); + recorded_data_ref = 1; } - header_level = btrfs_header_level(eb); - free_extent_buffer(eb); - - if (header_level != backref_level) - return -EIO; - return header_level; - -release_out: +out: btrfs_release_path(&path); - return ret; + if (!ret) + return !recorded_data_ref; + else + return ret; } /* - * Check if a tree block backref is valid (points to a valid tree block) - * if level == -1, level will be resolved - * Return >0 for any error found and print error message + * when an incorrect extent item is found, this will delete + * all of the existing entries for it and recreate them + * based on what the tree scan found. */ -static int check_tree_block_backref(struct btrfs_fs_info *fs_info, u64 root_id, - u64 bytenr, int level) +static int fixup_extent_refs(struct btrfs_fs_info *info, + struct cache_tree *extent_cache, + struct extent_record *rec) { - struct btrfs_root *root; - struct btrfs_key key; - struct btrfs_path path; - struct extent_buffer *eb; - struct extent_buffer *node; - u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); - int err = 0; + struct btrfs_trans_handle *trans = NULL; int ret; + struct btrfs_path path; + struct cache_extent *cache; + struct extent_backref *back, *tmp; + int allocated = 0; + u64 flags = 0; - /* Query level for level == -1 special case */ - if (level == -1) - level = query_tree_block_level(fs_info, bytenr); - if (level < 0) { - err |= REFERENCER_MISSING; - goto out; + if (rec->flag_block_full_backref) + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + + btrfs_init_path(&path); + if (rec->refs != rec->extent_item_refs && !rec->metadata) { + /* + * Sometimes the backrefs themselves are so broken they don't + * get attached to any meaningful rec, so first go back and + * check any of our backrefs that we couldn't find and throw + * them into the list if we find the backref so that + * verify_backrefs can figure out what to do. + */ + ret = find_possible_backrefs(info, &path, extent_cache, rec); + if (ret < 0) + goto out; } - key.objectid = root_id; - key.type = BTRFS_ROOT_ITEM_KEY; - key.offset = (u64)-1; + /* step one, make sure all of the backrefs agree */ + ret = verify_backrefs(info, &path, rec); + if (ret < 0) + goto out; - root = btrfs_read_fs_root(fs_info, &key); - if (IS_ERR(root)) { - err |= REFERENCER_MISSING; + trans = btrfs_start_transaction(info->extent_root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); goto out; } - /* Read out the tree block to get item/node key */ - eb = read_tree_block(fs_info, bytenr, 0); - if (!extent_buffer_uptodate(eb)) { - err |= REFERENCER_MISSING; - free_extent_buffer(eb); + /* step two, delete all the existing records */ + ret = delete_extent_records(trans, info->extent_root, &path, + rec->start); + + if (ret < 0) goto out; - } - /* Empty tree, no need to check key */ - if (!btrfs_header_nritems(eb) && !level) { - free_extent_buffer(eb); + /* was this block corrupt? If so, don't add references to it */ + cache = lookup_cache_extent(info->corrupt_blocks, + rec->start, rec->max_size); + if (cache) { + ret = 0; goto out; } - if (level) - btrfs_node_key_to_cpu(eb, &key, 0); - else - btrfs_item_key_to_cpu(eb, &key, 0); - - free_extent_buffer(eb); + /* step three, recreate all the refs we did find */ + rbtree_postorder_for_each_entry_safe(back, tmp, + &rec->backref_tree, node) { + /* + * if we didn't find any references, don't create a + * new extent record + */ + if (!back->found_ref) + continue; - btrfs_init_path(&path); - path.lowest_level = level; - /* Search with the first key, to ensure we can reach it */ - ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); - if (ret < 0) { - err |= REFERENCER_MISSING; - goto release_out; - } + rec->bad_full_backref = 0; + ret = record_extent(trans, info, &path, rec, back, allocated, flags); + allocated = 1; - node = path.nodes[level]; - if (btrfs_header_bytenr(node) != bytenr) { - error( - "extent [%llu %d] referencer bytenr mismatch, wanted: %llu, have: %llu", - bytenr, nodesize, bytenr, - btrfs_header_bytenr(node)); - err |= REFERENCER_MISMATCH; - } - if (btrfs_header_level(node) != level) { - error( - "extent [%llu %d] referencer level mismatch, wanted: %d, have: %d", - bytenr, nodesize, level, - btrfs_header_level(node)); - err |= REFERENCER_MISMATCH; + if (ret) + goto out; } - -release_out: - btrfs_release_path(&path); out: - if (err & REFERENCER_MISSING) { - if (level < 0) - error("extent [%llu %d] lost referencer (owner: %llu)", - bytenr, nodesize, root_id); - else - error( - "extent [%llu %d] lost referencer (owner: %llu, level: %u)", - bytenr, nodesize, root_id, level); + if (trans) { + int err = btrfs_commit_transaction(trans, info->extent_root); + if (!ret) + ret = err; } - return err; -} - -/* - * Check if tree block @eb is tree reloc root. - * Return 0 if it's not or any problem happens - * Return 1 if it's a tree reloc root - */ -static int is_tree_reloc_root(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb) -{ - struct btrfs_root *tree_reloc_root; - struct btrfs_key key; - u64 bytenr = btrfs_header_bytenr(eb); - u64 owner = btrfs_header_owner(eb); - int ret = 0; - - key.objectid = BTRFS_TREE_RELOC_OBJECTID; - key.offset = owner; - key.type = BTRFS_ROOT_ITEM_KEY; - - tree_reloc_root = btrfs_read_fs_root_no_cache(fs_info, &key); - if (IS_ERR(tree_reloc_root)) - return 0; + if (!ret) + fprintf(stderr, "Repaired extent references for %llu\n", + (unsigned long long)rec->start); - if (bytenr == btrfs_header_bytenr(tree_reloc_root->node)) - ret = 1; - btrfs_free_fs_root(tree_reloc_root); + btrfs_release_path(&path); return ret; } -/* - * Check referencer for shared block backref - * If level == -1, this function will resolve the level. - */ -static int check_shared_block_backref(struct btrfs_fs_info *fs_info, - u64 parent, u64 bytenr, int level) +static int fixup_extent_flags(struct btrfs_fs_info *fs_info, + struct extent_record *rec) { - struct extent_buffer *eb; - u32 nr; - int found_parent = 0; - int i; - - eb = read_tree_block(fs_info, parent, 0); - if (!extent_buffer_uptodate(eb)) - goto out; - - if (level == -1) - level = query_tree_block_level(fs_info, bytenr); - if (level < 0) - goto out; + struct btrfs_trans_handle *trans; + struct btrfs_root *root = fs_info->extent_root; + struct btrfs_path path; + struct btrfs_extent_item *ei; + struct btrfs_key key; + u64 flags; + int ret = 0; - /* It's possible it's a tree reloc root */ - if (parent == bytenr) { - if (is_tree_reloc_root(fs_info, eb)) - found_parent = 1; - goto out; + key.objectid = rec->start; + if (rec->metadata) { + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = rec->info_level; + } else { + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = rec->max_size; } - if (level + 1 != btrfs_header_level(eb)) - goto out; + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) + return PTR_ERR(trans); - nr = btrfs_header_nritems(eb); - for (i = 0; i < nr; i++) { - if (bytenr == btrfs_node_blockptr(eb, i)) { - found_parent = 1; - break; - } + btrfs_init_path(&path); + ret = btrfs_search_slot(trans, root, &key, &path, 0, 1); + if (ret < 0) { + btrfs_release_path(&path); + btrfs_commit_transaction(trans, root); + return ret; + } else if (ret) { + fprintf(stderr, "Didn't find extent for %llu\n", + (unsigned long long)rec->start); + btrfs_release_path(&path); + btrfs_commit_transaction(trans, root); + return -ENOENT; } -out: - free_extent_buffer(eb); - if (!found_parent) { - error( - "shared extent[%llu %u] lost its parent (parent: %llu, level: %u)", - bytenr, fs_info->nodesize, parent, level); - return REFERENCER_MISSING; + + ei = btrfs_item_ptr(path.nodes[0], path.slots[0], + struct btrfs_extent_item); + flags = btrfs_extent_flags(path.nodes[0], ei); + if (rec->flag_block_full_backref) { + fprintf(stderr, "setting full backref on %llu\n", + (unsigned long long)key.objectid); + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + } else { + fprintf(stderr, "clearing full backref on %llu\n", + (unsigned long long)key.objectid); + flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF; } - return 0; + btrfs_set_extent_flags(path.nodes[0], ei, flags); + btrfs_mark_buffer_dirty(path.nodes[0]); + btrfs_release_path(&path); + ret = btrfs_commit_transaction(trans, root); + if (!ret) + fprintf(stderr, "Repaired extent flags for %llu\n", + (unsigned long long)rec->start); + + return ret; } -/* - * Check referencer for normal (inlined) data ref - * If len == 0, it will be resolved by searching in extent tree - */ -static int check_extent_data_backref(struct btrfs_fs_info *fs_info, - u64 root_id, u64 objectid, u64 offset, - u64 bytenr, u64 len, u32 count) +/* right now we only prune from the extent allocation tree */ +static int prune_one_block(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *info, + struct btrfs_corrupt_block *corrupt) { - struct btrfs_root *root; - struct btrfs_root *extent_root = fs_info->extent_root; - struct btrfs_key key; + int ret; struct btrfs_path path; - struct extent_buffer *leaf; - struct btrfs_file_extent_item *fi; - u32 found_count = 0; + struct extent_buffer *eb; + u64 found; int slot; - int ret = 0; - - if (!len) { - key.objectid = bytenr; - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = (u64)-1; + int nritems; + int level = corrupt->level + 1; - btrfs_init_path(&path); - ret = btrfs_search_slot(NULL, extent_root, &key, &path, 0, 0); - if (ret < 0) - goto out; - ret = btrfs_previous_extent_item(extent_root, &path, bytenr); - if (ret) - goto out; - btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); - if (key.objectid != bytenr || - key.type != BTRFS_EXTENT_ITEM_KEY) - goto out; - len = key.offset; - btrfs_release_path(&path); - } - key.objectid = root_id; - key.type = BTRFS_ROOT_ITEM_KEY; - key.offset = (u64)-1; btrfs_init_path(&path); +again: + /* we want to stop at the parent to our busted block */ + path.lowest_level = level; - root = btrfs_read_fs_root(fs_info, &key); - if (IS_ERR(root)) - goto out; - - key.objectid = objectid; - key.type = BTRFS_EXTENT_DATA_KEY; - /* - * It can be nasty as data backref offset is - * file offset - file extent offset, which is smaller or - * equal to original backref offset. The only special case is - * overflow. So we need to special check and do further search. - */ - key.offset = offset & (1ULL << 63) ? 0 : offset; + ret = btrfs_search_slot(trans, info->extent_root, + &corrupt->key, &path, -1, 1); - ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); if (ret < 0) goto out; + eb = path.nodes[level]; + if (!eb) { + ret = -ENOENT; + goto out; + } + /* - * Search afterwards to get correct one - * NOTE: As we must do a comprehensive check on the data backref to - * make sure the dref count also matches, we must iterate all file - * extents for that inode. + * hopefully the search gave us the block we want to prune, + * lets try that first */ - while (1) { - leaf = path.nodes[0]; - slot = path.slots[0]; + slot = path.slots[level]; + found = btrfs_node_blockptr(eb, slot); + if (found == corrupt->cache.start) + goto del_ptr; - if (slot >= btrfs_header_nritems(leaf) || - btrfs_header_owner(leaf) != root_id) - goto next; - btrfs_item_key_to_cpu(leaf, &key, slot); - if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY) - break; - fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); - /* - * Except normal disk bytenr and disk num bytes, we still - * need to do extra check on dbackref offset as - * dbackref offset = file_offset - file_extent_offset - * - * Also, we must check the leaf owner. - * In case of shared tree blocks (snapshots) we can inherit - * leaves from source snapshot. - * In that case, reference from source snapshot should not - * count. - */ - if (btrfs_file_extent_disk_bytenr(leaf, fi) == bytenr && - btrfs_file_extent_disk_num_bytes(leaf, fi) == len && - (u64)(key.offset - btrfs_file_extent_offset(leaf, fi)) == - offset && btrfs_header_owner(leaf) == root_id) - found_count++; + nritems = btrfs_header_nritems(eb); -next: - ret = btrfs_next_item(root, &path); - if (ret) - break; + /* the search failed, lets scan this node and hope we find it */ + for (slot = 0; slot < nritems; slot++) { + found = btrfs_node_blockptr(eb, slot); + if (found == corrupt->cache.start) + goto del_ptr; + } + /* + * we couldn't find the bad block. TODO, search all the nodes for pointers + * to this block + */ + if (eb == info->extent_root->node) { + ret = -ENOENT; + goto out; + } else { + level++; + btrfs_release_path(&path); + goto again; } + +del_ptr: + printk("deleting pointer to block %Lu\n", corrupt->cache.start); + ret = btrfs_del_ptr(info->extent_root, &path, level, slot); + out: btrfs_release_path(&path); - if (found_count != count) { - error( -"extent[%llu, %llu] referencer count mismatch (root: %llu, owner: %llu, offset: %llu) wanted: %u, have: %u", - bytenr, len, root_id, objectid, offset, count, found_count); - return REFERENCER_MISSING; - } - return 0; + return ret; } -/* - * Check if the referencer of a shared data backref exists - */ -static int check_shared_data_backref(struct btrfs_fs_info *fs_info, - u64 parent, u64 bytenr) +static int prune_corrupt_blocks(struct btrfs_fs_info *info) { - struct extent_buffer *eb; - struct btrfs_key key; - struct btrfs_file_extent_item *fi; - u32 nr; - int found_parent = 0; - int i; - - eb = read_tree_block(fs_info, parent, 0); - if (!extent_buffer_uptodate(eb)) - goto out; - - nr = btrfs_header_nritems(eb); - for (i = 0; i < nr; i++) { - btrfs_item_key_to_cpu(eb, &key, i); - if (key.type != BTRFS_EXTENT_DATA_KEY) - continue; - - fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE) - continue; + struct btrfs_trans_handle *trans = NULL; + struct cache_extent *cache; + struct btrfs_corrupt_block *corrupt; - if (btrfs_file_extent_disk_bytenr(eb, fi) == bytenr) { - found_parent = 1; + while (1) { + cache = search_cache_extent(info->corrupt_blocks, 0); + if (!cache) break; + if (!trans) { + trans = btrfs_start_transaction(info->extent_root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); } + corrupt = container_of(cache, struct btrfs_corrupt_block, cache); + prune_one_block(trans, info, corrupt); + remove_cache_extent(info->corrupt_blocks, cache); } - -out: - free_extent_buffer(eb); - if (!found_parent) { - error("shared extent %llu referencer lost (parent: %llu)", - bytenr, parent); - return REFERENCER_MISSING; - } + if (trans) + return btrfs_commit_transaction(trans, info->extent_root); return 0; } -/* - * Only delete backref if REFERENCER_MISSING now - * - * Returns <0 the extent was deleted - * Returns >0 the backref was deleted but extent still exists, returned value - * means error after repair - * Returns 0 nothing happened - */ -static int repair_extent_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_path *path, - u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, - u64 owner, u64 offset, int err) +static int check_extent_refs(struct btrfs_root *root, + struct cache_tree *extent_cache) { - struct btrfs_key old_key; - int freed = 0; - int ret; + struct extent_record *rec; + struct cache_extent *cache; + int ret = 0; + int had_dups = 0; + int err = 0; - btrfs_item_key_to_cpu(path->nodes[0], &old_key, path->slots[0]); + if (repair) { + /* + * if we're doing a repair, we have to make sure + * we don't allocate from the problem extents. + * In the worst case, this will be all the + * extents in the FS + */ + cache = search_cache_extent(extent_cache, 0); + while(cache) { + rec = container_of(cache, struct extent_record, cache); + set_extent_dirty(root->fs_info->excluded_extents, + rec->start, + rec->start + rec->max_size - 1); + cache = next_cache_extent(cache); + } - if (err & (REFERENCER_MISSING | REFERENCER_MISMATCH)) { - /* delete the backref */ - ret = btrfs_free_extent(trans, root->fs_info->fs_root, bytenr, - num_bytes, parent, root_objectid, owner, offset); - if (!ret) { - freed = 1; - err &= ~REFERENCER_MISSING; - printf("Delete backref in extent [%llu %llu]\n", - bytenr, num_bytes); - } else { - error("fail to delete backref in extent [%llu %llu]", - bytenr, num_bytes); + /* pin down all the corrupted blocks too */ + cache = search_cache_extent(root->fs_info->corrupt_blocks, 0); + while(cache) { + set_extent_dirty(root->fs_info->excluded_extents, + cache->start, + cache->start + cache->size - 1); + cache = next_cache_extent(cache); } + prune_corrupt_blocks(root->fs_info); + reset_cached_block_groups(root->fs_info); } - /* btrfs_free_extent may delete the extent */ - btrfs_release_path(path); - ret = btrfs_search_slot(NULL, root, &old_key, path, 0, 0); - - if (ret) - ret = -ENOENT; - else if (freed) - ret = err; - return ret; -} - -/* - * This function will check a given extent item, including its backref and - * itself (like crossing stripe boundary and type) - * - * Since we don't use extent_record anymore, introduce new error bit - */ -static int check_extent_item(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, - struct btrfs_path *path) -{ - struct btrfs_extent_item *ei; - struct btrfs_extent_inline_ref *iref; - struct btrfs_extent_data_ref *dref; - struct extent_buffer *eb = path->nodes[0]; - unsigned long end; - unsigned long ptr; - int slot = path->slots[0]; - int type; - u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); - u32 item_size = btrfs_item_size_nr(eb, slot); - u64 flags; - u64 offset; - u64 parent; - u64 num_bytes; - u64 root_objectid; - u64 owner; - u64 owner_offset; - int metadata = 0; - int level; - struct btrfs_key key; - int ret; - int err = 0; + reset_cached_block_groups(root->fs_info); - btrfs_item_key_to_cpu(eb, &key, slot); - if (key.type == BTRFS_EXTENT_ITEM_KEY) { - bytes_used += key.offset; - num_bytes = key.offset; - } else { - bytes_used += nodesize; - num_bytes = nodesize; - } + /* + * We need to delete any duplicate entries we find first otherwise we + * could mess up the extent tree when we have backrefs that actually + * belong to a different extent item and not the weird duplicate one. + */ + while (repair && !list_empty(&duplicate_extents)) { + rec = to_extent_record(duplicate_extents.next); + list_del_init(&rec->list); - if (item_size < sizeof(*ei)) { + /* Sometimes we can find a backref before we find an actual + * extent, so we need to process it a little bit to see if there + * truly are multiple EXTENT_ITEM_KEY's for the same range, or + * if this is a backref screwup. If we need to delete stuff + * process_duplicates() will return 0, otherwise it will return + * 1 and we + */ + if (process_duplicates(extent_cache, rec)) + continue; + ret = delete_duplicate_records(root, rec); + if (ret < 0) + return ret; /* - * COMPAT_EXTENT_TREE_V0 case, but it's already a super - * old thing when on disk format is still un-determined. - * No need to care about it anymore + * delete_duplicate_records will return the number of entries + * deleted, so if it's greater than 0 then we know we actually + * did something and we need to remove. */ - error("unsupported COMPAT_EXTENT_TREE_V0 detected"); - return -ENOTTY; + if (ret) + had_dups = 1; } - ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item); - flags = btrfs_extent_flags(eb, ei); + if (had_dups) + return -EAGAIN; - if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) - metadata = 1; - if (metadata && check_crossing_stripes(global_info, key.objectid, - eb->len)) { - error("bad metadata [%llu, %llu) crossing stripe boundary", - key.objectid, key.objectid + nodesize); - err |= CROSSING_STRIPE_BOUNDARY; - } + while(1) { + int cur_err = 0; + int fix = 0; - ptr = (unsigned long)(ei + 1); + cache = search_cache_extent(extent_cache, 0); + if (!cache) + break; + rec = container_of(cache, struct extent_record, cache); + if (rec->num_duplicates) { + fprintf(stderr, "extent item %llu has multiple extent " + "items\n", (unsigned long long)rec->start); + cur_err = 1; + } - if (metadata && key.type == BTRFS_EXTENT_ITEM_KEY) { - /* Old EXTENT_ITEM metadata */ - struct btrfs_tree_block_info *info; + if (rec->refs != rec->extent_item_refs) { + fprintf(stderr, "ref mismatch on [%llu %llu] ", + (unsigned long long)rec->start, + (unsigned long long)rec->nr); + fprintf(stderr, "extent item %llu, found %llu\n", + (unsigned long long)rec->extent_item_refs, + (unsigned long long)rec->refs); + ret = record_orphan_data_extents(root->fs_info, rec); + if (ret < 0) + goto repair_abort; + fix = ret; + cur_err = 1; + } + if (all_backpointers_checked(rec, 1)) { + fprintf(stderr, "backpointer mismatch on [%llu %llu]\n", + (unsigned long long)rec->start, + (unsigned long long)rec->nr); + fix = 1; + cur_err = 1; + } + if (!rec->owner_ref_checked) { + fprintf(stderr, "owner ref check failed [%llu %llu]\n", + (unsigned long long)rec->start, + (unsigned long long)rec->nr); + fix = 1; + cur_err = 1; + } - info = (struct btrfs_tree_block_info *)ptr; - level = btrfs_tree_block_level(eb, info); - ptr += sizeof(struct btrfs_tree_block_info); - } else { - /* New METADATA_ITEM */ - level = key.offset; - } - end = (unsigned long)ei + item_size; + if (repair && fix) { + ret = fixup_extent_refs(root->fs_info, extent_cache, rec); + if (ret) + goto repair_abort; + } -next: - /* Reached extent item end normally */ - if (ptr == end) - goto out; - /* Beyond extent item end, wrong item size */ - if (ptr > end) { - err |= ITEM_SIZE_MISMATCH; - error("extent item at bytenr %llu slot %d has wrong size", - eb->start, slot); - goto out; - } + if (rec->bad_full_backref) { + fprintf(stderr, "bad full backref, on [%llu]\n", + (unsigned long long)rec->start); + if (repair) { + ret = fixup_extent_flags(root->fs_info, rec); + if (ret) + goto repair_abort; + fix = 1; + } + cur_err = 1; + } + /* + * Although it's not a extent ref's problem, we reuse this + * routine for error reporting. + * No repair function yet. + */ + if (rec->crossing_stripes) { + fprintf(stderr, + "bad metadata [%llu, %llu) crossing stripe boundary\n", + rec->start, rec->start + rec->max_size); + cur_err = 1; + } - parent = 0; - root_objectid = 0; - owner = 0; - owner_offset = 0; - /* Now check every backref in this extent item */ - iref = (struct btrfs_extent_inline_ref *)ptr; - type = btrfs_extent_inline_ref_type(eb, iref); - offset = btrfs_extent_inline_ref_offset(eb, iref); - switch (type) { - case BTRFS_TREE_BLOCK_REF_KEY: - root_objectid = offset; - owner = level; - ret = check_tree_block_backref(fs_info, offset, key.objectid, - level); - err |= ret; - break; - case BTRFS_SHARED_BLOCK_REF_KEY: - parent = offset; - ret = check_shared_block_backref(fs_info, offset, key.objectid, - level); - err |= ret; - break; - case BTRFS_EXTENT_DATA_REF_KEY: - dref = (struct btrfs_extent_data_ref *)(&iref->offset); - root_objectid = btrfs_extent_data_ref_root(eb, dref); - owner = btrfs_extent_data_ref_objectid(eb, dref); - owner_offset = btrfs_extent_data_ref_offset(eb, dref); - ret = check_extent_data_backref(fs_info, root_objectid, owner, - owner_offset, key.objectid, key.offset, - btrfs_extent_data_ref_count(eb, dref)); - err |= ret; - break; - case BTRFS_SHARED_DATA_REF_KEY: - parent = offset; - ret = check_shared_data_backref(fs_info, offset, key.objectid); - err |= ret; - break; - default: - error("extent[%llu %d %llu] has unknown ref type: %d", - key.objectid, key.type, key.offset, type); - ret = UNKNOWN_TYPE; - err |= ret; - goto out; + if (rec->wrong_chunk_type) { + fprintf(stderr, + "bad extent [%llu, %llu), type mismatch with chunk\n", + rec->start, rec->start + rec->max_size); + cur_err = 1; + } + + err = cur_err; + remove_cache_extent(extent_cache, cache); + free_all_extent_backrefs(rec); + if (!init_extent_tree && repair && (!cur_err || fix)) + clear_extent_dirty(root->fs_info->excluded_extents, + rec->start, + rec->start + rec->max_size - 1); + free(rec); } +repair_abort: + if (repair) { + if (ret && ret != -EAGAIN) { + fprintf(stderr, "failed to repair damaged filesystem, aborting\n"); + exit(1); + } else if (!ret) { + struct btrfs_trans_handle *trans; + + root = root->fs_info->extent_root; + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto repair_abort; + } - if (err && repair) { - ret = repair_extent_item(trans, fs_info->extent_root, path, - key.objectid, num_bytes, parent, root_objectid, - owner, owner_offset, ret); - if (ret < 0) - goto out; - if (ret) { - goto next; - err = ret; + ret = btrfs_fix_block_accounting(trans, root); + if (ret) + goto repair_abort; + ret = btrfs_commit_transaction(trans, root); + if (ret) + goto repair_abort; } + return ret; } - ptr += btrfs_extent_inline_ref_size(type); - goto next; - -out: + if (err) + err = -EIO; return err; } -/* - * Check if a dev extent item is referred correctly by its chunk - */ -static int check_dev_extent_item(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int slot) +u64 calc_stripe_length(u64 type, u64 length, int num_stripes) { - struct btrfs_root *chunk_root = fs_info->chunk_root; - struct btrfs_dev_extent *ptr; - struct btrfs_path path; - struct btrfs_key chunk_key; - struct btrfs_key devext_key; - struct btrfs_chunk *chunk; - struct extent_buffer *l; - int num_stripes; - u64 length; - int i; - int found_chunk = 0; - int ret; - - btrfs_item_key_to_cpu(eb, &devext_key, slot); - ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_extent); - length = btrfs_dev_extent_length(eb, ptr); - - chunk_key.objectid = btrfs_dev_extent_chunk_objectid(eb, ptr); - chunk_key.type = BTRFS_CHUNK_ITEM_KEY; - chunk_key.offset = btrfs_dev_extent_chunk_offset(eb, ptr); - - btrfs_init_path(&path); - ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0); - if (ret) - goto out; - - l = path.nodes[0]; - chunk = btrfs_item_ptr(l, path.slots[0], struct btrfs_chunk); - ret = btrfs_check_chunk_valid(fs_info, l, chunk, path.slots[0], - chunk_key.offset); - if (ret < 0) - goto out; - - if (btrfs_stripe_length(fs_info, l, chunk) != length) - goto out; - - num_stripes = btrfs_chunk_num_stripes(l, chunk); - for (i = 0; i < num_stripes; i++) { - u64 devid = btrfs_stripe_devid_nr(l, chunk, i); - u64 offset = btrfs_stripe_offset_nr(l, chunk, i); + u64 stripe_size; - if (devid == devext_key.objectid && - offset == devext_key.offset) { - found_chunk = 1; - break; - } - } -out: - btrfs_release_path(&path); - if (!found_chunk) { - error( - "device extent[%llu, %llu, %llu] did not find the related chunk", - devext_key.objectid, devext_key.offset, length); - return REFERENCER_MISSING; + if (type & BTRFS_BLOCK_GROUP_RAID0) { + stripe_size = length; + stripe_size /= num_stripes; + } else if (type & BTRFS_BLOCK_GROUP_RAID10) { + stripe_size = length * 2; + stripe_size /= num_stripes; + } else if (type & BTRFS_BLOCK_GROUP_RAID5) { + stripe_size = length; + stripe_size /= (num_stripes - 1); + } else if (type & BTRFS_BLOCK_GROUP_RAID6) { + stripe_size = length; + stripe_size /= (num_stripes - 2); + } else { + stripe_size = length; } - return 0; + return stripe_size; } /* - * Check if the used space is correct with the dev item + * Check the chunk with its block group/dev list ref: + * Return 0 if all refs seems valid. + * Return 1 if part of refs seems valid, need later check for rebuild ref + * like missing block group and needs to search extent tree to rebuild them. + * Return -1 if essential refs are missing and unable to rebuild. */ -static int check_dev_item(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int slot) +static int check_chunk_refs(struct chunk_record *chunk_rec, + struct block_group_tree *block_group_cache, + struct device_extent_tree *dev_extent_cache, + int silent) { - struct btrfs_root *dev_root = fs_info->dev_root; - struct btrfs_dev_item *dev_item; - struct btrfs_path path; - struct btrfs_key key; - struct btrfs_dev_extent *ptr; - u64 total_bytes; - u64 dev_id; - u64 used; - u64 total = 0; - int ret; - - dev_item = btrfs_item_ptr(eb, slot, struct btrfs_dev_item); - dev_id = btrfs_device_id(eb, dev_item); - used = btrfs_device_bytes_used(eb, dev_item); - total_bytes = btrfs_device_total_bytes(eb, dev_item); - - key.objectid = dev_id; - key.type = BTRFS_DEV_EXTENT_KEY; - key.offset = 0; + struct cache_extent *block_group_item; + struct block_group_record *block_group_rec; + struct cache_extent *dev_extent_item; + struct device_extent_record *dev_extent_rec; + u64 devid; + u64 offset; + u64 length; + int metadump_v2 = 0; + int i; + int ret = 0; - btrfs_init_path(&path); - ret = btrfs_search_slot(NULL, dev_root, &key, &path, 0, 0); - if (ret < 0) { - btrfs_item_key_to_cpu(eb, &key, slot); - error("cannot find any related dev extent for dev[%llu, %u, %llu]", - key.objectid, key.type, key.offset); - btrfs_release_path(&path); - return REFERENCER_MISSING; + block_group_item = lookup_cache_extent(&block_group_cache->tree, + chunk_rec->offset, + chunk_rec->length); + if (block_group_item) { + block_group_rec = container_of(block_group_item, + struct block_group_record, + cache); + if (chunk_rec->length != block_group_rec->offset || + chunk_rec->offset != block_group_rec->objectid || + (!metadump_v2 && + chunk_rec->type_flags != block_group_rec->flags)) { + if (!silent) + fprintf(stderr, + "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n", + chunk_rec->objectid, + chunk_rec->type, + chunk_rec->offset, + chunk_rec->length, + chunk_rec->offset, + chunk_rec->type_flags, + block_group_rec->objectid, + block_group_rec->type, + block_group_rec->offset, + block_group_rec->offset, + block_group_rec->objectid, + block_group_rec->flags); + ret = -1; + } else { + list_del_init(&block_group_rec->list); + chunk_rec->bg_rec = block_group_rec; + } + } else { + if (!silent) + fprintf(stderr, + "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) is not found in block group\n", + chunk_rec->objectid, + chunk_rec->type, + chunk_rec->offset, + chunk_rec->length, + chunk_rec->offset, + chunk_rec->type_flags); + ret = 1; } - /* Iterate dev_extents to calculate the used space of a device */ - while (1) { - if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) - goto next; - - btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); - if (key.objectid > dev_id) - break; - if (key.type != BTRFS_DEV_EXTENT_KEY || key.objectid != dev_id) - goto next; - - ptr = btrfs_item_ptr(path.nodes[0], path.slots[0], - struct btrfs_dev_extent); - total += btrfs_dev_extent_length(path.nodes[0], ptr); -next: - ret = btrfs_next_item(dev_root, &path); - if (ret) - break; - } - btrfs_release_path(&path); + if (metadump_v2) + return ret; - if (used != total) { - btrfs_item_key_to_cpu(eb, &key, slot); - error( -"Dev extent's total-byte %llu is not equal to bytes-used %llu in dev[%llu, %u, %llu]", - total, used, BTRFS_ROOT_TREE_OBJECTID, - BTRFS_DEV_EXTENT_KEY, dev_id); - return ACCOUNTING_MISMATCH; + length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length, + chunk_rec->num_stripes); + for (i = 0; i < chunk_rec->num_stripes; ++i) { + devid = chunk_rec->stripes[i].devid; + offset = chunk_rec->stripes[i].offset; + dev_extent_item = lookup_cache_extent2(&dev_extent_cache->tree, + devid, offset, length); + if (dev_extent_item) { + dev_extent_rec = container_of(dev_extent_item, + struct device_extent_record, + cache); + if (dev_extent_rec->objectid != devid || + dev_extent_rec->offset != offset || + dev_extent_rec->chunk_offset != chunk_rec->offset || + dev_extent_rec->length != length) { + if (!silent) + fprintf(stderr, + "Chunk[%llu, %u, %llu] stripe[%llu, %llu] dismatch dev extent[%llu, %llu, %llu]\n", + chunk_rec->objectid, + chunk_rec->type, + chunk_rec->offset, + chunk_rec->stripes[i].devid, + chunk_rec->stripes[i].offset, + dev_extent_rec->objectid, + dev_extent_rec->offset, + dev_extent_rec->length); + ret = -1; + } else { + list_move(&dev_extent_rec->chunk_list, + &chunk_rec->dextents); + } + } else { + if (!silent) + fprintf(stderr, + "Chunk[%llu, %u, %llu] stripe[%llu, %llu] is not found in dev extent\n", + chunk_rec->objectid, + chunk_rec->type, + chunk_rec->offset, + chunk_rec->stripes[i].devid, + chunk_rec->stripes[i].offset); + ret = -1; + } } - check_dev_size_alignment(dev_id, total_bytes, fs_info->sectorsize); - - return 0; + return ret; } -/* - * Check a block group item with its referener (chunk) and its used space - * with extent/metadata item - */ -static int check_block_group_item(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int slot) +/* check btrfs_chunk -> btrfs_dev_extent / btrfs_block_group_item */ +int check_chunks(struct cache_tree *chunk_cache, + struct block_group_tree *block_group_cache, + struct device_extent_tree *dev_extent_cache, + struct list_head *good, struct list_head *bad, + struct list_head *rebuild, int silent) { - struct btrfs_root *extent_root = fs_info->extent_root; - struct btrfs_root *chunk_root = fs_info->chunk_root; - struct btrfs_block_group_item *bi; - struct btrfs_block_group_item bg_item; - struct btrfs_path path; - struct btrfs_key bg_key; - struct btrfs_key chunk_key; - struct btrfs_key extent_key; - struct btrfs_chunk *chunk; - struct extent_buffer *leaf; - struct btrfs_extent_item *ei; - u32 nodesize = btrfs_super_nodesize(fs_info->super_copy); - u64 flags; - u64 bg_flags; - u64 used; - u64 total = 0; - int ret; - int err = 0; - - btrfs_item_key_to_cpu(eb, &bg_key, slot); - bi = btrfs_item_ptr(eb, slot, struct btrfs_block_group_item); - read_extent_buffer(eb, &bg_item, (unsigned long)bi, sizeof(bg_item)); - used = btrfs_block_group_used(&bg_item); - bg_flags = btrfs_block_group_flags(&bg_item); - - chunk_key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; - chunk_key.type = BTRFS_CHUNK_ITEM_KEY; - chunk_key.offset = bg_key.objectid; + struct cache_extent *chunk_item; + struct chunk_record *chunk_rec; + struct block_group_record *bg_rec; + struct device_extent_record *dext_rec; + int err; + int ret = 0; - btrfs_init_path(&path); - /* Search for the referencer chunk */ - ret = btrfs_search_slot(NULL, chunk_root, &chunk_key, &path, 0, 0); - if (ret) { - error( - "block group[%llu %llu] did not find the related chunk item", - bg_key.objectid, bg_key.offset); - err |= REFERENCER_MISSING; - } else { - chunk = btrfs_item_ptr(path.nodes[0], path.slots[0], - struct btrfs_chunk); - if (btrfs_chunk_length(path.nodes[0], chunk) != - bg_key.offset) { - error( - "block group[%llu %llu] related chunk item length does not match", - bg_key.objectid, bg_key.offset); - err |= REFERENCER_MISMATCH; - } + chunk_item = first_cache_extent(chunk_cache); + while (chunk_item) { + chunk_rec = container_of(chunk_item, struct chunk_record, + cache); + err = check_chunk_refs(chunk_rec, block_group_cache, + dev_extent_cache, silent); + if (err < 0) + ret = err; + if (err == 0 && good) + list_add_tail(&chunk_rec->list, good); + if (err > 0 && rebuild) + list_add_tail(&chunk_rec->list, rebuild); + if (err < 0 && bad) + list_add_tail(&chunk_rec->list, bad); + chunk_item = next_cache_extent(chunk_item); } - btrfs_release_path(&path); - /* Search from the block group bytenr */ - extent_key.objectid = bg_key.objectid; - extent_key.type = 0; - extent_key.offset = 0; + list_for_each_entry(bg_rec, &block_group_cache->block_groups, list) { + if (!silent) + fprintf(stderr, + "Block group[%llu, %llu] (flags = %llu) didn't find the relative chunk.\n", + bg_rec->objectid, + bg_rec->offset, + bg_rec->flags); + if (!ret) + ret = 1; + } - btrfs_init_path(&path); - ret = btrfs_search_slot(NULL, extent_root, &extent_key, &path, 0, 0); - if (ret < 0) - goto out; + list_for_each_entry(dext_rec, &dev_extent_cache->no_chunk_orphans, + chunk_list) { + if (!silent) + fprintf(stderr, + "Device extent[%llu, %llu, %llu] didn't find the relative chunk.\n", + dext_rec->objectid, + dext_rec->offset, + dext_rec->length); + if (!ret) + ret = 1; + } + return ret; +} - /* Iterate extent tree to account used space */ - while (1) { - leaf = path.nodes[0]; - /* Search slot can point to the last item beyond leaf nritems */ - if (path.slots[0] >= btrfs_header_nritems(leaf)) - goto next; +static int check_device_used(struct device_record *dev_rec, + struct device_extent_tree *dext_cache) +{ + struct cache_extent *cache; + struct device_extent_record *dev_extent_rec; + u64 total_byte = 0; - btrfs_item_key_to_cpu(leaf, &extent_key, path.slots[0]); - if (extent_key.objectid >= bg_key.objectid + bg_key.offset) + cache = search_cache_extent2(&dext_cache->tree, dev_rec->devid, 0); + while (cache) { + dev_extent_rec = container_of(cache, + struct device_extent_record, + cache); + if (dev_extent_rec->objectid != dev_rec->devid) break; - if (extent_key.type != BTRFS_METADATA_ITEM_KEY && - extent_key.type != BTRFS_EXTENT_ITEM_KEY) - goto next; - if (extent_key.objectid < bg_key.objectid) - goto next; - - if (extent_key.type == BTRFS_METADATA_ITEM_KEY) - total += nodesize; - else - total += extent_key.offset; - - ei = btrfs_item_ptr(leaf, path.slots[0], - struct btrfs_extent_item); - flags = btrfs_extent_flags(leaf, ei); - if (flags & BTRFS_EXTENT_FLAG_DATA) { - if (!(bg_flags & BTRFS_BLOCK_GROUP_DATA)) { - error( - "bad extent[%llu, %llu) type mismatch with chunk", - extent_key.objectid, - extent_key.objectid + extent_key.offset); - err |= CHUNK_TYPE_MISMATCH; - } - } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { - if (!(bg_flags & (BTRFS_BLOCK_GROUP_SYSTEM | - BTRFS_BLOCK_GROUP_METADATA))) { - error( - "bad extent[%llu, %llu) type mismatch with chunk", - extent_key.objectid, - extent_key.objectid + nodesize); - err |= CHUNK_TYPE_MISMATCH; - } - } -next: - ret = btrfs_next_item(extent_root, &path); - if (ret) - break; + list_del_init(&dev_extent_rec->device_list); + total_byte += dev_extent_rec->length; + cache = next_cache_extent(cache); } -out: - btrfs_release_path(&path); - - if (total != used) { - error( - "block group[%llu %llu] used %llu but extent items used %llu", - bg_key.objectid, bg_key.offset, used, total); - err |= BG_ACCOUNTING_ERROR; + if (total_byte != dev_rec->byte_used) { + fprintf(stderr, + "Dev extent's total-byte(%llu) is not equal to byte-used(%llu) in dev[%llu, %u, %llu]\n", + total_byte, dev_rec->byte_used, dev_rec->objectid, + dev_rec->type, dev_rec->offset); + return -1; + } else { + return 0; } - return err; } /* - * Add block group item to the extent tree if @err contains REFERENCER_MISSING. - * FIXME: We still need to repair error of dev_item. + * Unlike device size alignment check above, some super total_bytes check + * failure can lead to mount failure for newer kernel. * - * Returns error after repair. + * So this function will return the error for a fatal super total_bytes problem. */ -static int repair_chunk_item(struct btrfs_trans_handle *trans, - struct btrfs_root *chunk_root, - struct btrfs_path *path, int err) +static bool is_super_size_valid(struct btrfs_fs_info *fs_info) { - struct btrfs_chunk *chunk; - struct btrfs_key chunk_key; - struct extent_buffer *eb = path->nodes[0]; - u64 length; - int slot = path->slots[0]; - u64 type; - int ret = 0; + struct btrfs_device *dev; + struct list_head *dev_list = &fs_info->fs_devices->devices; + u64 total_bytes = 0; + u64 super_bytes = btrfs_super_total_bytes(fs_info->super_copy); - btrfs_item_key_to_cpu(eb, &chunk_key, slot); - if (chunk_key.type != BTRFS_CHUNK_ITEM_KEY) - return err; - chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk); - type = btrfs_chunk_type(path->nodes[0], chunk); - length = btrfs_chunk_length(eb, chunk); + list_for_each_entry(dev, dev_list, dev_list) + total_bytes += dev->total_bytes; - if (err & REFERENCER_MISSING) { - ret = btrfs_make_block_group(trans, chunk_root->fs_info, 0, - type, chunk_key.offset, length); - if (ret) { - error("fail to add block group item[%llu %llu]", - chunk_key.offset, length); - goto out; - } else { - err &= ~REFERENCER_MISSING; - printf("Added block group item[%llu %llu]\n", - chunk_key.offset, length); - } + /* Important check, which can cause unmountable fs */ + if (super_bytes < total_bytes) { + error("super total bytes %llu smaller than real device(s) size %llu", + super_bytes, total_bytes); + error("mounting this fs may fail for newer kernels"); + error("this can be fixed by 'btrfs rescue fix-device-size'"); + return false; } -out: - return err; + /* + * Optional check, just to make everything aligned and match with each + * other. + * + * For a btrfs-image restored fs, we don't need to check it anyway. + */ + if (btrfs_super_flags(fs_info->super_copy) & + (BTRFS_SUPER_FLAG_METADUMP | BTRFS_SUPER_FLAG_METADUMP_V2)) + return true; + if (!IS_ALIGNED(super_bytes, fs_info->sectorsize) || + !IS_ALIGNED(total_bytes, fs_info->sectorsize) || + super_bytes != total_bytes) { + warning("minor unaligned/mismatch device size detected"); + warning( + "recommended to use 'btrfs rescue fix-device-size' to fix it"); + } + return true; } -/* - * Check a chunk item. - * Including checking all referred dev_extents and block group - */ -static int check_chunk_item(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int slot) +/* check btrfs_dev_item -> btrfs_dev_extent */ +static int check_devices(struct rb_root *dev_cache, + struct device_extent_tree *dev_extent_cache) { - struct btrfs_root *extent_root = fs_info->extent_root; - struct btrfs_root *dev_root = fs_info->dev_root; - struct btrfs_path path; - struct btrfs_key chunk_key; - struct btrfs_key bg_key; - struct btrfs_key devext_key; - struct btrfs_chunk *chunk; - struct extent_buffer *leaf; - struct btrfs_block_group_item *bi; - struct btrfs_block_group_item bg_item; - struct btrfs_dev_extent *ptr; - u64 length; - u64 chunk_end; - u64 stripe_len; - u64 type; - int num_stripes; - u64 offset; - u64 objectid; - int i; - int ret; - int err = 0; - - btrfs_item_key_to_cpu(eb, &chunk_key, slot); - chunk = btrfs_item_ptr(eb, slot, struct btrfs_chunk); - length = btrfs_chunk_length(eb, chunk); - chunk_end = chunk_key.offset + length; - ret = btrfs_check_chunk_valid(fs_info, eb, chunk, slot, - chunk_key.offset); - if (ret < 0) { - error("chunk[%llu %llu) is invalid", chunk_key.offset, - chunk_end); - err |= BYTES_UNALIGNED | UNKNOWN_TYPE; - goto out; - } - type = btrfs_chunk_type(eb, chunk); + struct rb_node *dev_node; + struct device_record *dev_rec; + struct device_extent_record *dext_rec; + int err; + int ret = 0; - bg_key.objectid = chunk_key.offset; - bg_key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; - bg_key.offset = length; + dev_node = rb_first(dev_cache); + while (dev_node) { + dev_rec = container_of(dev_node, struct device_record, node); + err = check_device_used(dev_rec, dev_extent_cache); + if (err) + ret = err; - btrfs_init_path(&path); - ret = btrfs_search_slot(NULL, extent_root, &bg_key, &path, 0, 0); - if (ret) { - error( - "chunk[%llu %llu) did not find the related block group item", - chunk_key.offset, chunk_end); - err |= REFERENCER_MISSING; - } else{ - leaf = path.nodes[0]; - bi = btrfs_item_ptr(leaf, path.slots[0], - struct btrfs_block_group_item); - read_extent_buffer(leaf, &bg_item, (unsigned long)bi, - sizeof(bg_item)); - if (btrfs_block_group_flags(&bg_item) != type) { - error( -"chunk[%llu %llu) related block group item flags mismatch, wanted: %llu, have: %llu", - chunk_key.offset, chunk_end, type, - btrfs_block_group_flags(&bg_item)); - err |= REFERENCER_MISSING; - } + check_dev_size_alignment(dev_rec->devid, dev_rec->total_byte, + global_info->sectorsize); + dev_node = rb_next(dev_node); + } + list_for_each_entry(dext_rec, &dev_extent_cache->no_device_orphans, + device_list) { + fprintf(stderr, + "Device extent[%llu, %llu, %llu] didn't find its device.\n", + dext_rec->objectid, dext_rec->offset, dext_rec->length); + if (!ret) + ret = 1; } + return ret; +} - num_stripes = btrfs_chunk_num_stripes(eb, chunk); - stripe_len = btrfs_stripe_length(fs_info, eb, chunk); - for (i = 0; i < num_stripes; i++) { - btrfs_release_path(&path); - btrfs_init_path(&path); - devext_key.objectid = btrfs_stripe_devid_nr(eb, chunk, i); - devext_key.type = BTRFS_DEV_EXTENT_KEY; - devext_key.offset = btrfs_stripe_offset_nr(eb, chunk, i); +static int add_root_item_to_list(struct list_head *head, + u64 objectid, u64 bytenr, u64 last_snapshot, + u8 level, u8 drop_level, + struct btrfs_key *drop_key) +{ - ret = btrfs_search_slot(NULL, dev_root, &devext_key, &path, - 0, 0); - if (ret) - goto not_match_dev; + struct root_item_record *ri_rec; + ri_rec = malloc(sizeof(*ri_rec)); + if (!ri_rec) + return -ENOMEM; + ri_rec->bytenr = bytenr; + ri_rec->objectid = objectid; + ri_rec->level = level; + ri_rec->drop_level = drop_level; + ri_rec->last_snapshot = last_snapshot; + if (drop_key) + memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key)); + list_add_tail(&ri_rec->list, head); - leaf = path.nodes[0]; - ptr = btrfs_item_ptr(leaf, path.slots[0], - struct btrfs_dev_extent); - objectid = btrfs_dev_extent_chunk_objectid(leaf, ptr); - offset = btrfs_dev_extent_chunk_offset(leaf, ptr); - if (objectid != chunk_key.objectid || - offset != chunk_key.offset || - btrfs_dev_extent_length(leaf, ptr) != stripe_len) - goto not_match_dev; - continue; -not_match_dev: - err |= BACKREF_MISSING; - error( - "chunk[%llu %llu) stripe %d did not find the related dev extent", - chunk_key.objectid, chunk_end, i); - continue; - } - btrfs_release_path(&path); -out: - return err; + return 0; } -static int delete_extent_tree_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path) +static void free_root_item_list(struct list_head *list) { - struct btrfs_key key; - int ret = 0; + struct root_item_record *ri_rec; - btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); - btrfs_release_path(path); - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret) { - ret = -ENOENT; - goto out; + while (!list_empty(list)) { + ri_rec = list_first_entry(list, struct root_item_record, + list); + list_del_init(&ri_rec->list); + free(ri_rec); } - - ret = btrfs_del_item(trans, root, path); - if (ret) - goto out; - - if (path->slots[0] == 0) - btrfs_prev_leaf(root, path); - else - path->slots[0]--; -out: - if (ret) - error("failed to delete root %llu item[%llu, %u, %llu]", - root->objectid, key.objectid, key.type, key.offset); - else - printf("Deleted root %llu item[%llu, %u, %llu]\n", - root->objectid, key.objectid, key.type, key.offset); - return ret; } -/* - * Main entry function to check known items and update related accounting info - */ -static int check_leaf_items(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_path *path, - struct node_refs *nrefs, int account_bytes) +static int deal_root_from_list(struct list_head *list, + struct btrfs_root *root, + struct block_info *bits, + int bits_nr, + struct cache_tree *pending, + struct cache_tree *seen, + struct cache_tree *reada, + struct cache_tree *nodes, + struct cache_tree *extent_cache, + struct cache_tree *chunk_cache, + struct rb_root *dev_cache, + struct block_group_tree *block_group_cache, + struct device_extent_tree *dev_extent_cache) { - struct btrfs_fs_info *fs_info = root->fs_info; - struct btrfs_key key; - struct extent_buffer *eb; - int slot; - int type; - struct btrfs_extent_data_ref *dref; int ret = 0; - int err = 0; + u64 last; -again: - eb = path->nodes[0]; - slot = path->slots[0]; - if (slot >= btrfs_header_nritems(eb)) { - if (slot == 0) { - error("empty leaf [%llu %u] root %llu", eb->start, - root->fs_info->nodesize, root->objectid); - err |= EIO; + while (!list_empty(list)) { + struct root_item_record *rec; + struct extent_buffer *buf; + rec = list_entry(list->next, + struct root_item_record, list); + last = 0; + buf = read_tree_block(root->fs_info, rec->bytenr, 0); + if (!extent_buffer_uptodate(buf)) { + free_extent_buffer(buf); + ret = -EIO; + break; } - goto out; + ret = add_root_to_pending(buf, extent_cache, pending, + seen, nodes, rec->objectid); + if (ret < 0) + break; + /* + * To rebuild extent tree, we need deal with snapshot + * one by one, otherwise we deal with node firstly which + * can maximize readahead. + */ + while (1) { + ret = run_next_block(root, bits, bits_nr, &last, + pending, seen, reada, nodes, + extent_cache, chunk_cache, + dev_cache, block_group_cache, + dev_extent_cache, rec); + if (ret != 0) + break; + } + free_extent_buffer(buf); + list_del(&rec->list); + free(rec); + if (ret < 0) + break; } - - btrfs_item_key_to_cpu(eb, &key, slot); - type = key.type; - - switch (type) { - case BTRFS_EXTENT_DATA_KEY: - ret = check_extent_data_item(root, path, nrefs, account_bytes); - if (repair && ret) - ret = repair_extent_data_item(trans, root, path, nrefs, - ret); - err |= ret; - break; - case BTRFS_BLOCK_GROUP_ITEM_KEY: - ret = check_block_group_item(fs_info, eb, slot); - if (repair && - ret & REFERENCER_MISSING) - ret = delete_extent_tree_item(trans, root, path); - err |= ret; - break; - case BTRFS_DEV_ITEM_KEY: - ret = check_dev_item(fs_info, eb, slot); - err |= ret; - break; - case BTRFS_CHUNK_ITEM_KEY: - ret = check_chunk_item(fs_info, eb, slot); - if (repair && ret) - ret = repair_chunk_item(trans, root, path, ret); - err |= ret; - break; - case BTRFS_DEV_EXTENT_KEY: - ret = check_dev_extent_item(fs_info, eb, slot); - err |= ret; - break; - case BTRFS_EXTENT_ITEM_KEY: - case BTRFS_METADATA_ITEM_KEY: - ret = check_extent_item(trans, fs_info, path); - err |= ret; - break; - case BTRFS_EXTENT_CSUM_KEY: - total_csum_bytes += btrfs_item_size_nr(eb, slot); - err |= ret; - break; - case BTRFS_TREE_BLOCK_REF_KEY: - ret = check_tree_block_backref(fs_info, key.offset, - key.objectid, -1); - if (repair && - ret & (REFERENCER_MISMATCH | REFERENCER_MISSING)) - ret = delete_extent_tree_item(trans, root, path); - err |= ret; - break; - case BTRFS_EXTENT_DATA_REF_KEY: - dref = btrfs_item_ptr(eb, slot, struct btrfs_extent_data_ref); - ret = check_extent_data_backref(fs_info, - btrfs_extent_data_ref_root(eb, dref), - btrfs_extent_data_ref_objectid(eb, dref), - btrfs_extent_data_ref_offset(eb, dref), - key.objectid, 0, - btrfs_extent_data_ref_count(eb, dref)); - if (repair && - ret & (REFERENCER_MISMATCH | REFERENCER_MISSING)) - ret = delete_extent_tree_item(trans, root, path); - err |= ret; - break; - case BTRFS_SHARED_BLOCK_REF_KEY: - ret = check_shared_block_backref(fs_info, key.offset, - key.objectid, -1); - if (repair && - ret & (REFERENCER_MISMATCH | REFERENCER_MISSING)) - ret = delete_extent_tree_item(trans, root, path); - err |= ret; - break; - case BTRFS_SHARED_DATA_REF_KEY: - ret = check_shared_data_backref(fs_info, key.offset, - key.objectid); - if (repair && - ret & (REFERENCER_MISMATCH | REFERENCER_MISSING)) - ret = delete_extent_tree_item(trans, root, path); - err |= ret; - break; - default: - break; + while (ret >= 0) { + ret = run_next_block(root, bits, bits_nr, &last, pending, seen, + reada, nodes, extent_cache, chunk_cache, + dev_cache, block_group_cache, + dev_extent_cache, NULL); + if (ret != 0) { + if (ret > 0) + ret = 0; + break; + } } - - ++path->slots[0]; - goto again; -out: - return err; + return ret; } -/* - * Low memory usage version check_chunks_and_extents. - */ -static int check_chunks_and_extents_v2(struct btrfs_fs_info *fs_info) +static int check_chunks_and_extents(struct btrfs_fs_info *fs_info) { - struct btrfs_trans_handle *trans = NULL; + struct rb_root dev_cache; + struct cache_tree chunk_cache; + struct block_group_tree block_group_cache; + struct device_extent_tree dev_extent_cache; + struct cache_tree extent_cache; + struct cache_tree seen; + struct cache_tree pending; + struct cache_tree reada; + struct cache_tree nodes; + struct extent_io_tree excluded_extents; + struct cache_tree corrupt_blocks; struct btrfs_path path; - struct btrfs_key old_key; struct btrfs_key key; + struct btrfs_key found_key; + int ret, err = 0; + struct block_info *bits; + int bits_nr; + struct extent_buffer *leaf; + int slot; + struct btrfs_root_item ri; + struct list_head dropping_trees; + struct list_head normal_trees; struct btrfs_root *root1; struct btrfs_root *root; - struct btrfs_root *cur_root; - int err = 0; - int ret; + u64 objectid; + u8 level; root = fs_info->fs_root; + dev_cache = RB_ROOT; + cache_tree_init(&chunk_cache); + block_group_tree_init(&block_group_cache); + device_extent_tree_init(&dev_extent_cache); + + cache_tree_init(&extent_cache); + cache_tree_init(&seen); + cache_tree_init(&pending); + cache_tree_init(&nodes); + cache_tree_init(&reada); + cache_tree_init(&corrupt_blocks); + extent_io_tree_init(&excluded_extents); + INIT_LIST_HEAD(&dropping_trees); + INIT_LIST_HEAD(&normal_trees); if (repair) { - trans = btrfs_start_transaction(fs_info->extent_root, 1); - if (IS_ERR(trans)) { - error("failed to start transaction before check"); - return PTR_ERR(trans); - } + fs_info->excluded_extents = &excluded_extents; + fs_info->fsck_extent_cache = &extent_cache; + fs_info->free_extent_hook = free_extent_hook; + fs_info->corrupt_blocks = &corrupt_blocks; } - root1 = root->fs_info->chunk_root; - ret = check_btrfs_root(trans, root1, 0, 1); - err |= ret; + bits_nr = 1024; + bits = malloc(bits_nr * sizeof(struct block_info)); + if (!bits) { + perror("malloc"); + exit(1); + } - root1 = root->fs_info->tree_root; - ret = check_btrfs_root(trans, root1, 0, 1); - err |= ret; + if (ctx.progress_enabled) { + ctx.tp = TASK_EXTENTS; + task_start(ctx.info); + } +again: + root1 = fs_info->tree_root; + level = btrfs_header_level(root1->node); + ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid, + root1->node->start, 0, level, 0, NULL); + if (ret < 0) + goto out; + root1 = fs_info->chunk_root; + level = btrfs_header_level(root1->node); + ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid, + root1->node->start, 0, level, 0, NULL); + if (ret < 0) + goto out; btrfs_init_path(&path); - key.objectid = BTRFS_EXTENT_TREE_OBJECTID; key.offset = 0; + key.objectid = 0; key.type = BTRFS_ROOT_ITEM_KEY; - - ret = btrfs_search_slot(NULL, root1, &key, &path, 0, 0); - if (ret) { - error("cannot find extent tree in tree_root"); + ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, &path, 0, 0); + if (ret < 0) goto out; - } - - while (1) { - btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); - if (key.type != BTRFS_ROOT_ITEM_KEY) - goto next; - old_key = key; - key.offset = (u64)-1; - - if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) - cur_root = btrfs_read_fs_root_no_cache(root->fs_info, - &key); - else - cur_root = btrfs_read_fs_root(root->fs_info, &key); - if (IS_ERR(cur_root) || !cur_root) { - error("failed to read tree: %lld", key.objectid); - goto next; + while(1) { + leaf = path.nodes[0]; + slot = path.slots[0]; + if (slot >= btrfs_header_nritems(path.nodes[0])) { + ret = btrfs_next_leaf(root, &path); + if (ret != 0) + break; + leaf = path.nodes[0]; + slot = path.slots[0]; } + btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]); + if (found_key.type == BTRFS_ROOT_ITEM_KEY) { + unsigned long offset; + u64 last_snapshot; - ret = check_btrfs_root(trans, cur_root, 0, 1); - err |= ret; - - if (key.objectid == BTRFS_TREE_RELOC_OBJECTID) - btrfs_free_fs_root(cur_root); + offset = btrfs_item_ptr_offset(leaf, path.slots[0]); + read_extent_buffer(leaf, &ri, offset, sizeof(ri)); + last_snapshot = btrfs_root_last_snapshot(&ri); + if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) { + level = btrfs_root_level(&ri); + ret = add_root_item_to_list(&normal_trees, + found_key.objectid, + btrfs_root_bytenr(&ri), + last_snapshot, level, + 0, NULL); + if (ret < 0) + goto out; + } else { + level = btrfs_root_level(&ri); + objectid = found_key.objectid; + btrfs_disk_key_to_cpu(&found_key, + &ri.drop_progress); + ret = add_root_item_to_list(&dropping_trees, + objectid, + btrfs_root_bytenr(&ri), + last_snapshot, level, + ri.drop_level, &found_key); + if (ret < 0) + goto out; + } + } + path.slots[0]++; + } + btrfs_release_path(&path); - btrfs_release_path(&path); - ret = btrfs_search_slot(NULL, root->fs_info->tree_root, - &old_key, &path, 0, 0); - if (ret) - goto out; -next: - ret = btrfs_next_item(root1, &path); - if (ret) - goto out; + /* + * check_block can return -EAGAIN if it fixes something, please keep + * this in mind when dealing with return values from these functions, if + * we get -EAGAIN we want to fall through and restart the loop. + */ + ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending, + &seen, &reada, &nodes, &extent_cache, + &chunk_cache, &dev_cache, &block_group_cache, + &dev_extent_cache); + if (ret < 0) { + if (ret == -EAGAIN) + goto loop; + goto out; + } + ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr, + &pending, &seen, &reada, &nodes, + &extent_cache, &chunk_cache, &dev_cache, + &block_group_cache, &dev_extent_cache); + if (ret < 0) { + if (ret == -EAGAIN) + goto loop; + goto out; } -out: - /* if repair, update block accounting */ - if (repair) { - ret = btrfs_fix_block_accounting(trans, root); - if (ret) - err |= ret; - else - err &= ~BG_ACCOUNTING_ERROR; + ret = check_chunks(&chunk_cache, &block_group_cache, + &dev_extent_cache, NULL, NULL, NULL, 0); + if (ret) { + if (ret == -EAGAIN) + goto loop; + err = ret; } - if (trans) - btrfs_commit_transaction(trans, root->fs_info->extent_root); + ret = check_extent_refs(root, &extent_cache); + if (ret < 0) { + if (ret == -EAGAIN) + goto loop; + goto out; + } - btrfs_release_path(&path); + ret = check_devices(&dev_cache, &dev_extent_cache); + if (ret && err) + ret = err; - return err; +out: + task_stop(ctx.info); + if (repair) { + free_corrupt_blocks_tree(fs_info->corrupt_blocks); + extent_io_tree_cleanup(&excluded_extents); + fs_info->fsck_extent_cache = NULL; + fs_info->free_extent_hook = NULL; + fs_info->corrupt_blocks = NULL; + fs_info->excluded_extents = NULL; + } + free(bits); + free_chunk_cache_tree(&chunk_cache); + free_device_cache_tree(&dev_cache); + free_block_group_tree(&block_group_cache); + free_device_extent_tree(&dev_extent_cache); + free_extent_cache_tree(&seen); + free_extent_cache_tree(&pending); + free_extent_cache_tree(&reada); + free_extent_cache_tree(&nodes); + free_root_item_list(&normal_trees); + free_root_item_list(&dropping_trees); + return ret; +loop: + free_corrupt_blocks_tree(fs_info->corrupt_blocks); + free_extent_cache_tree(&seen); + free_extent_cache_tree(&pending); + free_extent_cache_tree(&reada); + free_extent_cache_tree(&nodes); + free_chunk_cache_tree(&chunk_cache); + free_block_group_tree(&block_group_cache); + free_device_cache_tree(&dev_cache); + free_device_extent_tree(&dev_extent_cache); + free_extent_record_cache(&extent_cache); + free_root_item_list(&normal_trees); + free_root_item_list(&dropping_trees); + extent_io_tree_cleanup(&excluded_extents); + goto again; } static int do_check_chunks_and_extents(struct btrfs_fs_info *fs_info) -- cgit v1.2.3