From 63d66268f44c58e10f1dc159bf4c19d3067d38f8 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 13 Jan 2015 15:23:01 -0500 Subject: Btrfs-progs: let btrfs-corrupt-block specify a root Sometimes we want to corrupt specific keys or delete items on different roots, so allow btrfs-corrupt-block to take a root objectid so we can corrupt a specific root. Thanks, Signed-off-by: Josef Bacik --- btrfs-corrupt-block.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/btrfs-corrupt-block.c b/btrfs-corrupt-block.c index 5db18a18..f332bdf7 100644 --- a/btrfs-corrupt-block.c +++ b/btrfs-corrupt-block.c @@ -109,6 +109,7 @@ static void print_usage(void) "to corrupt and a root+key for the item)\n"); fprintf(stderr, "\t-D Corrupt a dir item, must specify key and field\n"); fprintf(stderr, "\t-d Delete this item (must specify -K)\n"); + fprintf(stderr, "\t-r Operate on this root (only works with -d)\n"); exit(1); } @@ -1007,6 +1008,7 @@ int main(int ac, char **av) u64 metadata_block = 0; u64 inode = 0; u64 file_extent = (u64)-1; + u64 root_objectid = 0; char field[FIELD_BUF_LEN]; field[0] = '\0'; @@ -1034,11 +1036,12 @@ int main(int ac, char **av) { "item", 0, NULL, 'I'}, { "dir-item", 0, NULL, 'D'}, { "delete", 0, NULL, 'd'}, + { "root", 0, NULL, 'r'}, { NULL, 0, NULL, 0 } }; - c = getopt_long(ac, av, "l:c:b:eEkuUi:f:x:m:K:IDd", long_options, - &option_index); + c = getopt_long(ac, av, "l:c:b:eEkuUi:f:x:m:K:IDdr:", + long_options, &option_index); if (c < 0) break; switch(c) { @@ -1098,6 +1101,9 @@ int main(int ac, char **av) case 'd': delete = 1; break; + case 'r': + root_objectid = arg_strtou64(optarg); + break; default: print_usage(); } @@ -1206,9 +1212,25 @@ int main(int ac, char **av) ret = corrupt_btrfs_item(root, &key, field); } if (delete) { + struct btrfs_root *target = root; + if (!key.objectid) print_usage(); - ret = delete_item(root, &key); + if (root_objectid) { + struct btrfs_key root_key; + + root_key.objectid = root_objectid; + root_key.type = BTRFS_ROOT_ITEM_KEY; + root_key.offset = (u64)-1; + + target = btrfs_read_fs_root(root->fs_info, &root_key); + if (IS_ERR(target)) { + fprintf(stderr, "Couldn't find root %llu\n", + (unsigned long long)root_objectid); + print_usage(); + } + } + ret = delete_item(target, &key); goto out_close; } if (key.objectid || key.offset || key.type) { -- cgit v1.2.1 From 8ab2d7a9dd3099c9a5fdc5acd354b57e1039d18a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 13 Jan 2015 15:23:41 -0500 Subject: btrfs-progs: deal with no extent info Previously we used to just set FULL_BACKREF if we couldn't lookup an extent info for an extent. Now we just bail out if we can't lookup the extent info, which is less than good since fsck is supposed to fix these very problems. So instead figure out the flag we are supposed to use and pass that along instead. This patch also provides a test image to test this functionality. Thanks, Signed-off-by: Josef Bacik --- cmds-check.c | 43 +++++++++------------ .../fsck-tests/014-no-extent-info/default_case.img | Bin 0 -> 4096 bytes 2 files changed, 19 insertions(+), 24 deletions(-) create mode 100644 tests/fsck-tests/014-no-extent-info/default_case.img diff --git a/cmds-check.c b/cmds-check.c index 73d7866a..ca40e358 100644 --- a/cmds-check.c +++ b/cmds-check.c @@ -5686,6 +5686,7 @@ static int run_next_block(struct btrfs_trans_handle *trans, struct root_item_record *ri) { struct extent_buffer *buf; + struct extent_record *rec = NULL; u64 bytenr; u32 size; u64 parent; @@ -5738,8 +5739,6 @@ static int run_next_block(struct btrfs_trans_handle *trans, } cache = lookup_cache_extent(extent_cache, bytenr, size); if (cache) { - struct extent_record *rec; - rec = container_of(cache, struct extent_record, cache); gen = rec->parent_generation; } @@ -5758,20 +5757,30 @@ static int run_next_block(struct btrfs_trans_handle *trans, * FIXME, this only works only if we don't have any full * backref mode. */ + flags = 0; if (!init_extent_tree) { ret = btrfs_lookup_extent_info(NULL, root, bytenr, btrfs_header_level(buf), 1, NULL, &flags); - if (ret < 0) - goto out; + if (ret < 0) { + ret = calc_extent_flag(root, extent_cache, buf, ri, &flags); + if (ret < 0) { + fprintf(stderr, "Couldn't calc extent flags\n"); + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + } + } } else { flags = 0; ret = calc_extent_flag(root, extent_cache, buf, ri, &flags); - if (ret < 0) - goto out; + if (ret < 0) { + fprintf(stderr, "Couldn't calc extent flags\n"); + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + } } if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { + if (rec) + rec->flag_block_full_backref = 1; parent = bytenr; owner = 0; } else { @@ -7030,21 +7039,8 @@ static int fixup_extent_refs(struct btrfs_trans_handle *trans, int allocated = 0; u64 flags = 0; - /* - * remember our flags for recreating the extent. - * FIXME, if we have cleared extent tree, we can not - * lookup extent info in extent tree. - */ - if (!init_extent_tree) { - ret = btrfs_lookup_extent_info(NULL, info->extent_root, - rec->start, rec->max_size, - rec->metadata, NULL, &flags); - if (ret < 0) - return ret; - } else { - if (rec->flag_block_full_backref) - flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; - } + if (rec->flag_block_full_backref) + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; path = btrfs_alloc_path(); if (!path) @@ -7691,8 +7687,6 @@ static int deal_root_from_list(struct list_head *list, * one by one, otherwise we deal with node firstly which * can maximize readahead. */ - if (!init_extent_tree && !rec->drop_level) - goto skip; while (1) { ret = run_next_block(trans, root, bits, bits_nr, &last, pending, seen, reada, @@ -7703,10 +7697,11 @@ static int deal_root_from_list(struct list_head *list, if (ret != 0) break; } -skip: free_extent_buffer(buf); list_del(&rec->list); free(rec); + if (ret < 0) + break; } while (ret >= 0) { ret = run_next_block(trans, root, bits, bits_nr, &last, diff --git a/tests/fsck-tests/014-no-extent-info/default_case.img b/tests/fsck-tests/014-no-extent-info/default_case.img new file mode 100644 index 00000000..1ff27434 Binary files /dev/null and b/tests/fsck-tests/014-no-extent-info/default_case.img differ -- cgit v1.2.1 From cc73e60d952af55f4aac5266711456d5a774c207 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 6 Feb 2015 09:59:54 -0500 Subject: Btrfs-progs: handle -eagain properly If we fix bad blocks during run_next_block we will return -EAGAIN to loop around and start again. The deal_with_roots work messed up this handling, this patch fixes it. With this patch we can properly deal with broken tree blocks. Thanks, Signed-off-by: Josef Bacik --- cmds-check.c | 93 +++++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 64 insertions(+), 29 deletions(-) diff --git a/cmds-check.c b/cmds-check.c index ca40e358..e74fa0f6 100644 --- a/cmds-check.c +++ b/cmds-check.c @@ -7649,6 +7649,18 @@ static int add_root_item_to_list(struct list_head *head, return 0; } +static void free_root_item_list(struct list_head *list) +{ + struct root_item_record *ri_rec; + + while (!list_empty(list)) { + ri_rec = list_first_entry(list, struct root_item_record, + list); + list_del_init(&ri_rec->list); + free(ri_rec); + } +} + static int deal_root_from_list(struct list_head *list, struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -7846,50 +7858,49 @@ again: path.slots[0]++; } btrfs_release_path(&path); + + /* + * check_block can return -EAGAIN if it fixes something, please keep + * this in mind when dealing with return values from these functions, if + * we get -EAGAIN we want to fall through and restart the loop. + */ ret = deal_root_from_list(&normal_trees, trans, root, bits, bits_nr, &pending, &seen, &reada, &nodes, &extent_cache, &chunk_cache, &dev_cache, &block_group_cache, &dev_extent_cache); - if (ret < 0) + if (ret < 0) { + if (ret == -EAGAIN) + goto loop; goto out; + } ret = deal_root_from_list(&dropping_trees, trans, root, bits, bits_nr, &pending, &seen, &reada, &nodes, &extent_cache, - &chunk_cache, &dev_cache, &block_group_cache, + &chunk_cache, &dev_cache, + &block_group_cache, &dev_extent_cache); - if (ret < 0) + if (ret < 0) { + if (ret == -EAGAIN) + goto loop; goto out; - if (ret >= 0) - ret = check_extent_refs(trans, root, &extent_cache); - if (ret == -EAGAIN) { - ret = btrfs_commit_transaction(trans, root); - if (ret) - goto out; - - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - goto out; - } - - free_corrupt_blocks_tree(root->fs_info->corrupt_blocks); - free_extent_cache_tree(&seen); - free_extent_cache_tree(&pending); - free_extent_cache_tree(&reada); - free_extent_cache_tree(&nodes); - free_chunk_cache_tree(&chunk_cache); - free_block_group_tree(&block_group_cache); - free_device_cache_tree(&dev_cache); - free_device_extent_tree(&dev_extent_cache); - free_extent_record_cache(root->fs_info, &extent_cache); - goto again; } err = check_chunks(&chunk_cache, &block_group_cache, &dev_extent_cache, NULL, NULL, NULL, 0); - if (err && !ret) - ret = err; + if (err) { + if (err == -EAGAIN) + goto loop; + if (!ret) + ret = err; + } + + ret = check_extent_refs(trans, root, &extent_cache); + if (ret < 0) { + if (ret == -EAGAIN) + goto loop; + goto out; + } err = check_devices(&dev_cache, &dev_extent_cache); if (err && !ret) @@ -7917,6 +7928,30 @@ out: free_extent_cache_tree(&reada); free_extent_cache_tree(&nodes); return ret; +loop: + ret = btrfs_commit_transaction(trans, root); + if (ret) + goto out; + + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + + free_corrupt_blocks_tree(root->fs_info->corrupt_blocks); + free_extent_cache_tree(&seen); + free_extent_cache_tree(&pending); + free_extent_cache_tree(&reada); + free_extent_cache_tree(&nodes); + free_chunk_cache_tree(&chunk_cache); + free_block_group_tree(&block_group_cache); + free_device_cache_tree(&dev_cache); + free_device_extent_tree(&dev_extent_cache); + free_extent_record_cache(root->fs_info, &extent_cache); + free_root_item_list(&normal_trees); + free_root_item_list(&dropping_trees); + goto again; } static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans, -- cgit v1.2.1 From 1b7126f93722064eff6ee0792694a598a37bef30 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 14 Jan 2015 14:40:21 -0500 Subject: Btrfs-progs: read super properly in btrfs-image When btrfs-image makes a metadump it'll map all the blocks from their logical address to their physical. This works out fine with the exception of the super block, which is the physical offset. Normally this just works, but if the user has balanced their fs it'll either crash btrfs-image or it'll copy some completely arbitrary data. This forces btrfs-image to read the super directly from the disk. Thanks, Signed-off-by: Josef Bacik --- btrfs-image.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/btrfs-image.c b/btrfs-image.c index f6347f36..4bcaf6c2 100644 --- a/btrfs-image.c +++ b/btrfs-image.c @@ -868,6 +868,15 @@ static int read_data_extent(struct metadump_struct *md, return 0; } +static int get_dev_fd(struct btrfs_root *root) +{ + struct btrfs_device *dev; + + dev = list_first_entry(&root->fs_info->fs_devices->devices, + struct btrfs_device, dev_list); + return dev->fd; +} + static int flush_pending(struct metadump_struct *md, int done) { struct async_work *async = NULL; @@ -904,6 +913,24 @@ static int flush_pending(struct metadump_struct *md, int done) } } + /* + * Balance can make the mapping not cover the super block, so + * just copy directly from one of the devices. + */ + if (start == BTRFS_SUPER_INFO_OFFSET) { + int fd = get_dev_fd(md->root); + + ret = pread64(fd, async->buffer, size, start); + if (ret < size) { + free(async->buffer); + free(async); + fprintf(stderr, "Error reading superblock\n"); + return -EIO; + } + size = 0; + ret = 0; + } + while (!md->data && size > 0) { u64 this_read = min(blocksize, size); eb = read_tree_block(md->root, start, this_read, 0); -- cgit v1.2.1 From 65ac3b27586e6733191f941480f9348963dab9a9 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 15 Jan 2015 17:11:41 -0500 Subject: Btrfs-progs: don't try to repair reloc roots We have logic to fix the root locations for roots in response to a corruption bug we had earlier. However this work doesn't apply to reloc roots and can screw things up worse, so make sure we skip any reloc roots that we find. Thanks, Signed-off-by: Josef Bacik --- cmds-check.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmds-check.c b/cmds-check.c index e74fa0f6..2b08c648 100644 --- a/cmds-check.c +++ b/cmds-check.c @@ -8886,6 +8886,8 @@ again: if (found_key.type != BTRFS_ROOT_ITEM_KEY) goto next; + if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID) + goto next; ret = maybe_repair_root_item(info, path, &found_key, trans ? 0 : 1); -- cgit v1.2.1 From 20feed2ea49fff104520d3407ce0c8a935204c44 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 23 Jan 2015 11:39:28 -0500 Subject: Btrfs-progs: don't check csums for data reloc root The data reloc root is weird with it's csums. It'll copy an entire extent and then log any csums it finds, which makes it look weird when it comes to prealloc extents. So just skip the data reloc tree, it's special and we just don't need to worry about it. Thanks, Signed-off-by: Josef Bacik --- cmds-check.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/cmds-check.c b/cmds-check.c index 2b08c648..21638235 100644 --- a/cmds-check.c +++ b/cmds-check.c @@ -1530,7 +1530,16 @@ static int process_file_extent(struct btrfs_root *root, } rec->extent_end = key->offset + num_bytes; - if (disk_bytenr > 0) { + /* + * The data reloc tree will copy full extents into its inode and then + * copy the corresponding csums. Because the extent it copied could be + * a preallocated extent that hasn't been written to yet there may be no + * csums to copy, ergo we won't have csums for our file extent. This is + * ok so just don't bother checking csums if the inode belongs to the + * data reloc tree. + */ + if (disk_bytenr > 0 && + btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) { u64 found; if (btrfs_file_extent_compression(eb, fi)) num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); -- cgit v1.2.1 From 32bb725f667a5c26f208f0717cb91716523d88f9 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Sun, 25 Jan 2015 17:39:24 -0800 Subject: btrfs-progs: fix btrfs-image overlapping chunks If you create a metadump from a striped volume you will have chunks that refer to different logical offsets with the same physical offset on different devices. So when we do the restore we just truncate the number of stripes in each chunk item and carry on, which causes problems because we then have chunks that point to the same physical offset for different logical offsets. To handle this problem we keep track of logical extents that overlap on physical extents. Then we go back and remap these extents into different physical extents on the disk we are restoring onto. This makes us actually able to restore a multi disk image onto a single disk and have everything work out properly. Thanks, Signed-off-by: Josef Bacik --- btrfs-image.c | 170 +++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 134 insertions(+), 36 deletions(-) diff --git a/btrfs-image.c b/btrfs-image.c index 4bcaf6c2..aaff26d3 100644 --- a/btrfs-image.c +++ b/btrfs-image.c @@ -67,7 +67,9 @@ struct fs_chunk { u64 logical; u64 physical; u64 bytes; - struct rb_node n; + struct rb_node l; + struct rb_node p; + struct list_head list; }; struct async_work { @@ -125,10 +127,13 @@ struct mdrestore_struct { pthread_cond_t cond; struct rb_root chunk_tree; + struct rb_root physical_tree; struct list_head list; + struct list_head overlapping_chunks; size_t num_items; u32 leafsize; u64 devid; + u64 last_physical_offset; u8 uuid[BTRFS_UUID_SIZE]; u8 fsid[BTRFS_FSID_SIZE]; @@ -138,6 +143,7 @@ struct mdrestore_struct { int old_restore; int fixup_offset; int multi_devices; + int clear_space_cache; struct btrfs_fs_info *info; }; @@ -202,8 +208,8 @@ static int name_cmp(struct rb_node *a, struct rb_node *b, int fuzz) static int chunk_cmp(struct rb_node *a, struct rb_node *b, int fuzz) { - struct fs_chunk *entry = rb_entry(a, struct fs_chunk, n); - struct fs_chunk *ins = rb_entry(b, struct fs_chunk, n); + struct fs_chunk *entry = rb_entry(a, struct fs_chunk, l); + struct fs_chunk *ins = rb_entry(b, struct fs_chunk, l); if (fuzz && ins->logical >= entry->logical && ins->logical < entry->logical + entry->bytes) @@ -216,6 +222,26 @@ static int chunk_cmp(struct rb_node *a, struct rb_node *b, int fuzz) return 0; } +static int physical_cmp(struct rb_node *a, struct rb_node *b, int fuzz) +{ + struct fs_chunk *entry = rb_entry(a, struct fs_chunk, p); + struct fs_chunk *ins = rb_entry(b, struct fs_chunk, p); + + if (fuzz && ins->physical >= entry->physical && + ins->physical < entry->physical + entry->bytes) + return 0; + + if (fuzz && entry->physical >= ins->physical && + entry->physical < ins->physical + ins->bytes) + return 0; + + if (ins->physical < entry->physical) + return -1; + else if (ins->physical > entry->physical) + return 1; + return 0; +} + static void tree_insert(struct rb_root *root, struct rb_node *ins, int (*cmp)(struct rb_node *a, struct rb_node *b, int fuzz)) @@ -227,7 +253,7 @@ static void tree_insert(struct rb_root *root, struct rb_node *ins, while(*p) { parent = *p; - dir = cmp(*p, ins, 0); + dir = cmp(*p, ins, 1); if (dir < 0) p = &(*p)->rb_left; else if (dir > 0) @@ -262,6 +288,33 @@ static struct rb_node *tree_search(struct rb_root *root, return NULL; } +static u64 logical_to_physical(struct mdrestore_struct *mdres, u64 logical, u64 *size) +{ + struct fs_chunk *fs_chunk; + struct rb_node *entry; + struct fs_chunk search; + u64 offset; + + if (logical == BTRFS_SUPER_INFO_OFFSET) + return logical; + + search.logical = logical; + entry = tree_search(&mdres->chunk_tree, &search.l, chunk_cmp, 1); + if (!entry) { + if (mdres->in != stdin) + printf("Couldn't find a chunk, using logical\n"); + return logical; + } + fs_chunk = rb_entry(entry, struct fs_chunk, l); + if (fs_chunk->logical > logical || fs_chunk->logical + fs_chunk->bytes < logical) + BUG(); + offset = search.logical - fs_chunk->logical; + + *size = min(*size, fs_chunk->bytes + fs_chunk->logical - logical); + return fs_chunk->physical + offset; +} + + static char *find_collision(struct metadump_struct *md, char *name, u32 name_len) { @@ -1396,7 +1449,7 @@ static void update_super_old(u8 *buffer) csum_block(buffer, BTRFS_SUPER_INFO_SIZE); } -static int update_super(u8 *buffer) +static int update_super(struct mdrestore_struct *mdres, u8 *buffer) { struct btrfs_super_block *super = (struct btrfs_super_block *)buffer; struct btrfs_chunk *chunk; @@ -1423,6 +1476,8 @@ static int update_super(u8 *buffer) cur += sizeof(*disk_key); if (key.type == BTRFS_CHUNK_ITEM_KEY) { + u64 physical, size = 0; + chunk = (struct btrfs_chunk *)ptr; old_num_stripes = btrfs_stack_chunk_num_stripes(chunk); chunk = (struct btrfs_chunk *)write_ptr; @@ -1432,7 +1487,13 @@ static int update_super(u8 *buffer) btrfs_set_stack_chunk_sub_stripes(chunk, 0); btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM); - chunk->stripe.devid = super->dev_item.devid; + btrfs_set_stack_stripe_devid(&chunk->stripe, + super->dev_item.devid); + physical = logical_to_physical(mdres, key.offset, + &size); + if (size != (u64)-1) + btrfs_set_stack_stripe_offset(&chunk->stripe, + physical); memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE); new_array_size += sizeof(*chunk); @@ -1446,6 +1507,9 @@ static int update_super(u8 *buffer) cur += btrfs_chunk_item_size(old_num_stripes); } + if (mdres->clear_space_cache) + btrfs_set_super_cache_generation(super, 0); + btrfs_set_super_sys_array_size(super, new_array_size); csum_block(buffer, BTRFS_SUPER_INFO_SIZE); @@ -1536,7 +1600,7 @@ static int fixup_chunk_tree_block(struct mdrestore_struct *mdres, for (i = 0; i < btrfs_header_nritems(eb); i++) { struct btrfs_chunk chunk; struct btrfs_key key; - u64 type; + u64 type, physical, size = (u64)-1; btrfs_item_key_to_cpu(eb, &key, i); if (key.type != BTRFS_CHUNK_ITEM_KEY) @@ -1546,6 +1610,10 @@ static int fixup_chunk_tree_block(struct mdrestore_struct *mdres, btrfs_item_ptr_offset(eb, i), sizeof(chunk)); + size = 0; + physical = logical_to_physical(mdres, key.offset, + &size); + /* Zero out the RAID profile */ type = btrfs_stack_chunk_type(&chunk); type &= (BTRFS_BLOCK_GROUP_DATA | @@ -1557,6 +1625,9 @@ static int fixup_chunk_tree_block(struct mdrestore_struct *mdres, btrfs_set_stack_chunk_num_stripes(&chunk, 1); btrfs_set_stack_chunk_sub_stripes(&chunk, 0); btrfs_set_stack_stripe_devid(&chunk.stripe, mdres->devid); + if (size != (u64)-1) + btrfs_set_stack_stripe_offset(&chunk.stripe, + physical); memcpy(chunk.stripe.dev_uuid, mdres->uuid, BTRFS_UUID_SIZE); write_extent_buffer(eb, &chunk, @@ -1611,32 +1682,6 @@ static void write_backup_supers(int fd, u8 *buf) } } -static u64 logical_to_physical(struct mdrestore_struct *mdres, u64 logical, u64 *size) -{ - struct fs_chunk *fs_chunk; - struct rb_node *entry; - struct fs_chunk search; - u64 offset; - - if (logical == BTRFS_SUPER_INFO_OFFSET) - return logical; - - search.logical = logical; - entry = tree_search(&mdres->chunk_tree, &search.n, chunk_cmp, 1); - if (!entry) { - if (mdres->in != stdin) - printf("Couldn't find a chunk, using logical\n"); - return logical; - } - fs_chunk = rb_entry(entry, struct fs_chunk, n); - if (fs_chunk->logical > logical || fs_chunk->logical + fs_chunk->bytes < logical) - BUG(); - offset = search.logical - fs_chunk->logical; - - *size = min(*size, fs_chunk->bytes + fs_chunk->logical - logical); - return fs_chunk->physical + offset; -} - static void *restore_worker(void *data) { struct mdrestore_struct *mdres = (struct mdrestore_struct *)data; @@ -1696,7 +1741,7 @@ static void *restore_worker(void *data) if (mdres->old_restore) { update_super_old(outbuf); } else { - ret = update_super(outbuf); + ret = update_super(mdres, outbuf); if (ret) err = ret; } @@ -1769,8 +1814,9 @@ static void mdrestore_destroy(struct mdrestore_struct *mdres, int num_threads) while ((n = rb_first(&mdres->chunk_tree))) { struct fs_chunk *entry; - entry = rb_entry(n, struct fs_chunk, n); + entry = rb_entry(n, struct fs_chunk, l); rb_erase(n, &mdres->chunk_tree); + rb_erase(&entry->p, &mdres->physical_tree); free(entry); } pthread_mutex_lock(&mdres->mutex); @@ -1797,6 +1843,7 @@ static int mdrestore_init(struct mdrestore_struct *mdres, pthread_cond_init(&mdres->cond, NULL); pthread_mutex_init(&mdres->mutex, NULL); INIT_LIST_HEAD(&mdres->list); + INIT_LIST_HEAD(&mdres->overlapping_chunks); mdres->in = in; mdres->out = out; mdres->old_restore = old_restore; @@ -1804,6 +1851,8 @@ static int mdrestore_init(struct mdrestore_struct *mdres, mdres->fixup_offset = fixup_offset; mdres->info = info; mdres->multi_devices = multi_devices; + mdres->clear_space_cache = 0; + mdres->last_physical_offset = 0; if (!num_threads) return 0; @@ -2025,7 +2074,18 @@ static int read_chunk_block(struct mdrestore_struct *mdres, u8 *buffer, fs_chunk->logical = key.offset; fs_chunk->physical = btrfs_stack_stripe_offset(&chunk.stripe); fs_chunk->bytes = btrfs_stack_chunk_length(&chunk); - tree_insert(&mdres->chunk_tree, &fs_chunk->n, chunk_cmp); + INIT_LIST_HEAD(&fs_chunk->list); + if (tree_search(&mdres->physical_tree, &fs_chunk->p, + physical_cmp, 1) != NULL) + list_add(&fs_chunk->list, &mdres->overlapping_chunks); + else + tree_insert(&mdres->physical_tree, &fs_chunk->p, + physical_cmp); + if (fs_chunk->physical + fs_chunk->bytes > + mdres->last_physical_offset) + mdres->last_physical_offset = fs_chunk->physical + + fs_chunk->bytes; + tree_insert(&mdres->chunk_tree, &fs_chunk->l, chunk_cmp); } out: free(eb); @@ -2274,6 +2334,42 @@ static int build_chunk_tree(struct mdrestore_struct *mdres, return search_for_chunk_blocks(mdres, chunk_root_bytenr, 0); } +static int range_contains_super(u64 physical, u64 bytes) +{ + u64 super_bytenr; + int i; + + for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { + super_bytenr = btrfs_sb_offset(i); + if (super_bytenr >= physical && + super_bytenr < physical + bytes) + return 1; + } + + return 0; +} + +static void remap_overlapping_chunks(struct mdrestore_struct *mdres) +{ + struct fs_chunk *fs_chunk; + + while (!list_empty(&mdres->overlapping_chunks)) { + fs_chunk = list_first_entry(&mdres->overlapping_chunks, + struct fs_chunk, list); + list_del_init(&fs_chunk->list); + if (range_contains_super(fs_chunk->physical, + fs_chunk->bytes)) { + fprintf(stderr, "Remapping a chunk that had a super " + "mirror inside of it, clearing space cache " + "so we don't end up with corruption\n"); + mdres->clear_space_cache = 1; + } + fs_chunk->physical = mdres->last_physical_offset; + tree_insert(&mdres->physical_tree, &fs_chunk->p, physical_cmp); + mdres->last_physical_offset += fs_chunk->bytes; + } +} + static int __restore_metadump(const char *input, FILE *out, int old_restore, int num_threads, int fixup_offset, const char *target, int multi_devices) @@ -2328,6 +2424,8 @@ static int __restore_metadump(const char *input, FILE *out, int old_restore, ret = build_chunk_tree(&mdrestore, cluster); if (ret) goto out; + if (!list_empty(&mdrestore.overlapping_chunks)) + remap_overlapping_chunks(&mdrestore); } if (in != stdin && fseek(in, 0, SEEK_SET)) { -- cgit v1.2.1 From c7e3b63b7e231ed1d9a47ca735a7ead112b47078 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 6 Feb 2015 14:40:45 -0500 Subject: Btrfs-progs: multi-thread btrfs-image restore For some reason we only allow btrfs-image restore to have one thread, which is incredibly slow with large images. So allow us to do work with more than just one thread. This made my restore go from 16 minutes to 3 minutes. Thanks, Signed-off-by: Josef Bacik --- btrfs-image.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/btrfs-image.c b/btrfs-image.c index aaff26d3..ea855425 100644 --- a/btrfs-image.c +++ b/btrfs-image.c @@ -1922,7 +1922,6 @@ static int add_cluster(struct meta_cluster *cluster, u32 i, nritems; int ret; - BUG_ON(mdres->num_items); mdres->compress_method = header->compress; bytenr = le64_to_cpu(header->bytenr) + BLOCK_SIZE; @@ -2433,7 +2432,7 @@ static int __restore_metadump(const char *input, FILE *out, int old_restore, goto out; } - while (1) { + while (!mdrestore.error) { ret = fread(cluster, BLOCK_SIZE, 1, in); if (!ret) break; @@ -2450,14 +2449,8 @@ static int __restore_metadump(const char *input, FILE *out, int old_restore, fprintf(stderr, "Error adding cluster\n"); break; } - - ret = wait_for_worker(&mdrestore); - if (ret) { - fprintf(stderr, "One of the threads errored out %d\n", - ret); - break; - } } + ret = wait_for_worker(&mdrestore); out: mdrestore_destroy(&mdrestore, num_threads); failed_cluster: @@ -2598,7 +2591,7 @@ int main(int argc, char *argv[]) { char *source; char *target; - u64 num_threads = 0; + u64 num_threads = 1; u64 compress_level = 0; int create = 1; int old_restore = 0; @@ -2689,7 +2682,7 @@ int main(int argc, char *argv[]) } } - if (num_threads == 0 && compress_level > 0) { + if (num_threads == 1 && compress_level > 0) { num_threads = sysconf(_SC_NPROCESSORS_ONLN); if (num_threads <= 0) num_threads = 1; @@ -2708,7 +2701,7 @@ int main(int argc, char *argv[]) ret = create_metadump(source, out, num_threads, compress_level, sanitize, walk_trees); } else { - ret = restore_metadump(source, out, old_restore, 1, + ret = restore_metadump(source, out, old_restore, num_threads, multi_devices); } if (ret) { -- cgit v1.2.1 From 797a937e5dd8db0092add633a80f3cd698e182df Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 6 Feb 2015 13:03:12 -0500 Subject: Btrfs-progs: Introduce metadump_v2 The METADUMP super flag makes us skip doing the chunk tree reading which isn't helpful for the new restore since we have a valid chunk tree. But we still want to have a way for the kernel to know that this is a metadump restore so it doesn't do things like verify data checksums. We also want to skip some of the device extent checks in fsck since those will obviously not match. Thanks, Signed-off-by: Josef Bacik --- btrfs-image.c | 3 +++ cmds-check.c | 9 +++++++-- ctree.h | 1 + 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/btrfs-image.c b/btrfs-image.c index ea855425..feb4a620 100644 --- a/btrfs-image.c +++ b/btrfs-image.c @@ -1455,6 +1455,7 @@ static int update_super(struct mdrestore_struct *mdres, u8 *buffer) struct btrfs_chunk *chunk; struct btrfs_disk_key *disk_key; struct btrfs_key key; + u64 flags = btrfs_super_flags(super); u32 new_array_size = 0; u32 array_size; u32 cur = 0; @@ -1510,6 +1511,8 @@ static int update_super(struct mdrestore_struct *mdres, u8 *buffer) if (mdres->clear_space_cache) btrfs_set_super_cache_generation(super, 0); + flags |= BTRFS_SUPER_FLAG_METADUMP_V2; + btrfs_set_super_flags(super, flags); btrfs_set_super_sys_array_size(super, new_array_size); csum_block(buffer, BTRFS_SUPER_INFO_SIZE); diff --git a/cmds-check.c b/cmds-check.c index 21638235..ffdfbf27 100644 --- a/cmds-check.c +++ b/cmds-check.c @@ -7426,6 +7426,7 @@ static int check_chunk_refs(struct chunk_record *chunk_rec, u64 devid; u64 offset; u64 length; + int metadump_v2 = 0; int i; int ret = 0; @@ -7438,7 +7439,8 @@ static int check_chunk_refs(struct chunk_record *chunk_rec, cache); if (chunk_rec->length != block_group_rec->offset || chunk_rec->offset != block_group_rec->objectid || - chunk_rec->type_flags != block_group_rec->flags) { + (!metadump_v2 && + chunk_rec->type_flags != block_group_rec->flags)) { if (!silent) fprintf(stderr, "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n", @@ -7472,6 +7474,9 @@ static int check_chunk_refs(struct chunk_record *chunk_rec, ret = 1; } + if (metadump_v2) + return ret; + length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length, chunk_rec->num_stripes); for (i = 0; i < chunk_rec->num_stripes; ++i) { @@ -7538,7 +7543,7 @@ int check_chunks(struct cache_tree *chunk_cache, cache); err = check_chunk_refs(chunk_rec, block_group_cache, dev_extent_cache, silent); - if (err) + if (err < 0) ret = err; if (err == 0 && good) list_add_tail(&chunk_rec->list, good); diff --git a/ctree.h b/ctree.h index 2d2988b1..be30cb63 100644 --- a/ctree.h +++ b/ctree.h @@ -309,6 +309,7 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) #define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) #define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) #define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) +#define BTRFS_SUPER_FLAG_METADUMP_V2 (1ULL << 34) #define BTRFS_BACKREF_REV_MAX 256 #define BTRFS_BACKREF_REV_SHIFT 56 -- cgit v1.2.1 From bce7dbba2859a47554c122eae32ead87e6a6510a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Sun, 25 Jan 2015 17:41:19 -0800 Subject: Btrfs-progs: only build space info's for the main flags Hitting enospc problems with a really corrupt fs uncovered the fact that we match any flag in a block group when creating space info's. This is a problem if we have a raid level set, we'll end up with only one space info that covers metadata and data because they share a raid level. We don't want this, we want to separate out the data and metadata space infos, so mask off the raid level and only use the main flags. Thanks, Signed-off-by: Josef Bacik --- extent-tree.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/extent-tree.c b/extent-tree.c index 1785e226..d42c5727 100644 --- a/extent-tree.c +++ b/extent-tree.c @@ -1789,11 +1789,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, u64 flags) { - struct list_head *head = &info->space_info; - struct list_head *cur; struct btrfs_space_info *found; - list_for_each(cur, head) { - found = list_entry(cur, struct btrfs_space_info, list); + + flags &= BTRFS_BLOCK_GROUP_TYPE_MASK; + + list_for_each_entry(found, &info->space_info, list) { if (found->flags & flags) return found; } @@ -1825,7 +1825,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, return -ENOMEM; list_add(&found->list, &info->space_info); - found->flags = flags; + found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; found->total_bytes = total_bytes; found->bytes_used = bytes_used; found->bytes_pinned = 0; -- cgit v1.2.1 From 08a45972234b3edb1348641e188f233bab6e50c1 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 9 Feb 2015 10:02:25 -0500 Subject: Btrfs-progs: remove global transaction from fsck We hold a transaction open for the entirety of fixing extent refs. This works out ok most of the time but we can be tight on space and run out of space when fixing things. To get around this just push down the transaction starting dance into the functions that actually fix things. This keeps us from ending up with ENOSPC because we pinned everything and allows the code to be a bit simpler. Thanks, Signed-off-by: Josef Bacik --- cmds-check.c | 230 +++++++++++++++++++++++++++++++++------------------------- ctree.h | 1 + disk-io.c | 2 + extent-tree.c | 7 ++ 4 files changed, 140 insertions(+), 100 deletions(-) diff --git a/cmds-check.c b/cmds-check.c index ffdfbf27..5458c282 100644 --- a/cmds-check.c +++ b/cmds-check.c @@ -3668,7 +3668,6 @@ static void free_extent_record_cache(struct btrfs_fs_info *fs_info, if (!cache) break; rec = container_of(cache, struct extent_record, cache); - btrfs_unpin_extent(fs_info, rec->start, rec->max_size); remove_cache_extent(extent_cache, cache); free_all_extent_backrefs(rec); free(rec); @@ -3995,11 +3994,11 @@ again: * Attempt to fix basic block failures. If we can't fix it for whatever reason * then just return -EIO. */ -static int try_to_fix_bad_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int try_to_fix_bad_block(struct btrfs_root *root, struct extent_buffer *buf, enum btrfs_tree_block_status status) { + struct btrfs_trans_handle *trans; struct ulist *roots; struct ulist_node *node; struct btrfs_root *search_root; @@ -4016,7 +4015,7 @@ static int try_to_fix_bad_block(struct btrfs_trans_handle *trans, if (!path) return -EIO; - ret = btrfs_find_all_roots(trans, root->fs_info, buf->start, + ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots); if (ret) { btrfs_free_path(path); @@ -4035,7 +4034,12 @@ static int try_to_fix_bad_block(struct btrfs_trans_handle *trans, break; } - record_root_in_trans(trans, search_root); + + trans = btrfs_start_transaction(search_root, 0); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + break; + } path->lowest_level = btrfs_header_level(buf); path->skip_check_block = 1; @@ -4046,23 +4050,26 @@ static int try_to_fix_bad_block(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1); if (ret) { ret = -EIO; + btrfs_commit_transaction(trans, search_root); break; } if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER) ret = fix_key_order(trans, search_root, path); else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS) ret = fix_item_offset(trans, search_root, path); - if (ret) + if (ret) { + btrfs_commit_transaction(trans, search_root); break; + } btrfs_release_path(path); + btrfs_commit_transaction(trans, search_root); } ulist_free(roots); btrfs_free_path(path); return ret; } -static int check_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int check_block(struct btrfs_root *root, struct cache_tree *extent_cache, struct extent_buffer *buf, u64 flags) { @@ -4098,8 +4105,7 @@ static int check_block(struct btrfs_trans_handle *trans, if (status != BTRFS_TREE_BLOCK_CLEAN) { if (repair) - status = try_to_fix_bad_block(trans, root, buf, - status); + status = try_to_fix_bad_block(root, buf, status); if (status != BTRFS_TREE_BLOCK_CLEAN) { ret = -EIO; fprintf(stderr, "bad block %llu\n", @@ -5678,8 +5684,7 @@ full_backref: return 0; } -static int run_next_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int run_next_block(struct btrfs_root *root, struct block_info *bits, int bits_nr, u64 *last, @@ -5797,7 +5802,7 @@ static int run_next_block(struct btrfs_trans_handle *trans, owner = btrfs_header_owner(buf); } - ret = check_block(trans, root, extent_cache, buf, flags); + ret = check_block(root, extent_cache, buf, flags); if (ret) goto out; @@ -6349,16 +6354,16 @@ static struct extent_entry *find_most_right_entry(struct list_head *entries) return best; } -static int repair_ref(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info, struct btrfs_path *path, +static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path, struct data_backref *dback, struct extent_entry *entry) { + struct btrfs_trans_handle *trans; struct btrfs_root *root; struct btrfs_file_extent_item *fi; struct extent_buffer *leaf; struct btrfs_key key; u64 bytenr, bytes; - int ret; + int ret, err; key.objectid = dback->root; key.type = BTRFS_ROOT_ITEM_KEY; @@ -6410,11 +6415,9 @@ static int repair_ref(struct btrfs_trans_handle *trans, btrfs_release_path(path); - /* - * Have to make sure that this root gets updated when we commit the - * transaction - */ - record_root_in_trans(trans, root); + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); /* * Ok we have the key of the file extent we want to fix, now we can cow @@ -6424,13 +6427,14 @@ static int repair_ref(struct btrfs_trans_handle *trans, if (ret < 0) { fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n", key.objectid, key.type, key.offset, ret); - return ret; + goto out; } if (ret > 0) { fprintf(stderr, "Well that's odd, we just found this key " "[%Lu, %u, %Lu]\n", key.objectid, key.type, key.offset); - return -EINVAL; + ret = -EINVAL; + goto out; } leaf = path->nodes[0]; fi = btrfs_item_ptr(leaf, path->slots[0], @@ -6443,7 +6447,8 @@ static int repair_ref(struct btrfs_trans_handle *trans, "system and send it to a btrfs developer so they can " "complete this functionality for bytenr %Lu\n", dback->disk_bytenr); - return -EINVAL; + ret = -EINVAL; + goto out; } if (dback->node.broken && dback->disk_bytenr != entry->bytenr) { @@ -6460,7 +6465,8 @@ static int repair_ref(struct btrfs_trans_handle *trans, "take a btrfs-image of this file system and " "send it to a btrfs developer, ref %Lu\n", dback->disk_bytenr); - return -EINVAL; + ret = -EINVAL; + goto out; } offset += off_diff; btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr); @@ -6474,7 +6480,8 @@ static int repair_ref(struct btrfs_trans_handle *trans, " take a btrfs-image of this file system and " "send it to a btrfs developer, ref %Lu\n", dback->disk_bytenr); - return -EINVAL; + ret = -EINVAL; + goto out; } offset += dback->disk_bytenr; @@ -6495,12 +6502,13 @@ static int repair_ref(struct btrfs_trans_handle *trans, else printf("ram bytes may be wrong?\n"); btrfs_mark_buffer_dirty(leaf); +out: + err = btrfs_commit_transaction(trans, root); btrfs_release_path(path); - return 0; + return ret ? ret : err; } -static int verify_backrefs(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info, struct btrfs_path *path, +static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path, struct extent_record *rec) { struct extent_backref *back; @@ -6662,7 +6670,7 @@ static int verify_backrefs(struct btrfs_trans_handle *trans, dback->disk_bytenr == best->bytenr) continue; - ret = repair_ref(trans, info, path, dback, best); + ret = repair_ref(info, path, dback, best); if (ret) goto out; } @@ -6763,15 +6771,15 @@ static int process_duplicates(struct btrfs_root *root, return good->num_duplicates ? 0 : 1; } -static int delete_duplicate_records(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int delete_duplicate_records(struct btrfs_root *root, struct extent_record *rec) { + struct btrfs_trans_handle *trans; LIST_HEAD(delete_list); struct btrfs_path *path; struct extent_record *tmp, *good, *n; int nr_del = 0; - int ret = 0; + int ret = 0, err; struct btrfs_key key; path = btrfs_alloc_path(); @@ -6809,6 +6817,12 @@ static int delete_duplicate_records(struct btrfs_trans_handle *trans, } root = root->fs_info->extent_root; + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + list_for_each_entry(tmp, &delete_list, list) { if (tmp->found_rec == 0) continue; @@ -6828,15 +6842,17 @@ static int delete_duplicate_records(struct btrfs_trans_handle *trans, if (ret) { if (ret > 0) ret = -EINVAL; - goto out; + break; } ret = btrfs_del_item(trans, root, path); if (ret) - goto out; + break; btrfs_release_path(path); nr_del++; } - + err = btrfs_commit_transaction(trans, root); + if (err && !ret) + ret = err; out: while (!list_empty(&delete_list)) { tmp = list_entry(delete_list.next, struct extent_record, list); @@ -6860,8 +6876,7 @@ out: return ret ? ret : nr_del; } -static int find_possible_backrefs(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info, +static int find_possible_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path, struct cache_tree *extent_cache, struct extent_record *rec) @@ -7035,11 +7050,11 @@ out: * all of the existing entries for it and recreate them * based on what the tree scan found. */ -static int fixup_extent_refs(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info, +static int fixup_extent_refs(struct btrfs_fs_info *info, struct cache_tree *extent_cache, struct extent_record *rec) { + struct btrfs_trans_handle *trans = NULL; int ret; struct btrfs_path *path; struct list_head *cur = rec->backrefs.next; @@ -7063,17 +7078,22 @@ static int fixup_extent_refs(struct btrfs_trans_handle *trans, * them into the list if we find the backref so that * verify_backrefs can figure out what to do. */ - ret = find_possible_backrefs(trans, info, path, extent_cache, - rec); + ret = find_possible_backrefs(info, path, extent_cache, rec); if (ret < 0) goto out; } /* step one, make sure all of the backrefs agree */ - ret = verify_backrefs(trans, info, path, rec); + ret = verify_backrefs(info, path, rec); if (ret < 0) goto out; + trans = btrfs_start_transaction(info->extent_root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + /* step two, delete all the existing records */ ret = delete_extent_records(trans, info->extent_root, path, rec->start, rec->max_size); @@ -7108,6 +7128,12 @@ static int fixup_extent_refs(struct btrfs_trans_handle *trans, goto out; } out: + if (trans) { + int err = btrfs_commit_transaction(trans, info->extent_root); + if (!ret) + ret = err; + } + btrfs_free_path(path); return ret; } @@ -7181,20 +7207,27 @@ out: return ret; } -static int prune_corrupt_blocks(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info) +static int prune_corrupt_blocks(struct btrfs_fs_info *info) { + struct btrfs_trans_handle *trans = NULL; struct cache_extent *cache; struct btrfs_corrupt_block *corrupt; - cache = search_cache_extent(info->corrupt_blocks, 0); while (1) { + cache = search_cache_extent(info->corrupt_blocks, 0); if (!cache) break; + if (!trans) { + trans = btrfs_start_transaction(info->extent_root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); + } corrupt = container_of(cache, struct btrfs_corrupt_block, cache); prune_one_block(trans, info, corrupt); - cache = next_cache_extent(cache); + remove_cache_extent(info->corrupt_blocks, cache); } + if (trans) + return btrfs_commit_transaction(trans, info->extent_root); return 0; } @@ -7224,8 +7257,7 @@ static void reset_cached_block_groups(struct btrfs_fs_info *fs_info) } } -static int check_extent_refs(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int check_extent_refs(struct btrfs_root *root, struct cache_tree *extent_cache) { struct extent_record *rec; @@ -7246,22 +7278,28 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, cache = search_cache_extent(extent_cache, 0); while(cache) { rec = container_of(cache, struct extent_record, cache); - btrfs_pin_extent(root->fs_info, - rec->start, rec->max_size); + set_extent_dirty(root->fs_info->excluded_extents, + rec->start, + rec->start + rec->max_size - 1, + GFP_NOFS); cache = next_cache_extent(cache); } /* pin down all the corrupted blocks too */ cache = search_cache_extent(root->fs_info->corrupt_blocks, 0); while(cache) { - btrfs_pin_extent(root->fs_info, - cache->start, cache->size); + set_extent_dirty(root->fs_info->excluded_extents, + cache->start, + cache->start + cache->size - 1, + GFP_NOFS); cache = next_cache_extent(cache); } - prune_corrupt_blocks(trans, root->fs_info); + prune_corrupt_blocks(root->fs_info); reset_cached_block_groups(root->fs_info); } + reset_cached_block_groups(root->fs_info); + /* * We need to delete any duplicate entries we find first otherwise we * could mess up the extent tree when we have backrefs that actually @@ -7281,7 +7319,7 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, */ if (process_duplicates(root, extent_cache, rec)) continue; - ret = delete_duplicate_records(trans, root, rec); + ret = delete_duplicate_records(root, rec); if (ret < 0) return ret; /* @@ -7327,7 +7365,7 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, * extent, let the fallback method handle it. */ if (!fixed && repair) { - ret = fixup_extent_refs(trans, + ret = fixup_extent_refs( root->fs_info, extent_cache, rec); if (ret) @@ -7344,7 +7382,7 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, (unsigned long long)rec->nr); if (!fixed && !recorded && repair) { - ret = fixup_extent_refs(trans, root->fs_info, + ret = fixup_extent_refs(root->fs_info, extent_cache, rec); if (ret) goto repair_abort; @@ -7357,7 +7395,7 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, (unsigned long long)rec->start, (unsigned long long)rec->nr); if (!fixed && !recorded && repair) { - ret = fixup_extent_refs(trans, root->fs_info, + ret = fixup_extent_refs(root->fs_info, extent_cache, rec); if (ret) goto repair_abort; @@ -7376,7 +7414,19 @@ repair_abort: fprintf(stderr, "failed to repair damaged filesystem, aborting\n"); exit(1); } else if (!ret) { + struct btrfs_trans_handle *trans; + + root = root->fs_info->extent_root; + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto repair_abort; + } + btrfs_fix_block_accounting(trans, root); + ret = btrfs_commit_transaction(trans, root); + if (ret) + goto repair_abort; } if (err) fprintf(stderr, "repaired damaged extent references\n"); @@ -7676,7 +7726,6 @@ static void free_root_item_list(struct list_head *list) } static int deal_root_from_list(struct list_head *list, - struct btrfs_trans_handle *trans, struct btrfs_root *root, struct block_info *bits, int bits_nr, @@ -7714,11 +7763,10 @@ static int deal_root_from_list(struct list_head *list, * can maximize readahead. */ while (1) { - ret = run_next_block(trans, root, bits, bits_nr, &last, - pending, seen, reada, - nodes, extent_cache, - chunk_cache, dev_cache, - block_group_cache, + ret = run_next_block(root, bits, bits_nr, &last, + pending, seen, reada, nodes, + extent_cache, chunk_cache, + dev_cache, block_group_cache, dev_extent_cache, rec); if (ret != 0) break; @@ -7730,11 +7778,9 @@ static int deal_root_from_list(struct list_head *list, break; } while (ret >= 0) { - ret = run_next_block(trans, root, bits, bits_nr, &last, - pending, seen, reada, - nodes, extent_cache, - chunk_cache, dev_cache, - block_group_cache, + ret = run_next_block(root, bits, bits_nr, &last, pending, seen, + reada, nodes, extent_cache, chunk_cache, + dev_cache, block_group_cache, dev_extent_cache, NULL); if (ret != 0) { if (ret > 0) @@ -7756,6 +7802,7 @@ static int check_chunks_and_extents(struct btrfs_root *root) struct cache_tree pending; struct cache_tree reada; struct cache_tree nodes; + struct extent_io_tree excluded_extents; struct cache_tree corrupt_blocks; struct btrfs_path path; struct btrfs_key key; @@ -7764,7 +7811,6 @@ static int check_chunks_and_extents(struct btrfs_root *root) struct block_info *bits; int bits_nr; struct extent_buffer *leaf; - struct btrfs_trans_handle *trans = NULL; int slot; struct btrfs_root_item ri; struct list_head dropping_trees; @@ -7785,15 +7831,12 @@ static int check_chunks_and_extents(struct btrfs_root *root) cache_tree_init(&nodes); cache_tree_init(&reada); cache_tree_init(&corrupt_blocks); + extent_io_tree_init(&excluded_extents); INIT_LIST_HEAD(&dropping_trees); INIT_LIST_HEAD(&normal_trees); if (repair) { - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - fprintf(stderr, "Error starting transaction\n"); - return PTR_ERR(trans); - } + root->fs_info->excluded_extents = &excluded_extents; root->fs_info->fsck_extent_cache = &extent_cache; root->fs_info->free_extent_hook = free_extent_hook; root->fs_info->corrupt_blocks = &corrupt_blocks; @@ -7878,9 +7921,8 @@ again: * this in mind when dealing with return values from these functions, if * we get -EAGAIN we want to fall through and restart the loop. */ - ret = deal_root_from_list(&normal_trees, trans, root, - bits, bits_nr, &pending, &seen, - &reada, &nodes, &extent_cache, + ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending, + &seen, &reada, &nodes, &extent_cache, &chunk_cache, &dev_cache, &block_group_cache, &dev_extent_cache); if (ret < 0) { @@ -7888,12 +7930,10 @@ again: goto loop; goto out; } - ret = deal_root_from_list(&dropping_trees, trans, root, - bits, bits_nr, &pending, &seen, - &reada, &nodes, &extent_cache, - &chunk_cache, &dev_cache, - &block_group_cache, - &dev_extent_cache); + ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr, + &pending, &seen, &reada, &nodes, + &extent_cache, &chunk_cache, &dev_cache, + &block_group_cache, &dev_extent_cache); if (ret < 0) { if (ret == -EAGAIN) goto loop; @@ -7909,7 +7949,7 @@ again: ret = err; } - ret = check_extent_refs(trans, root, &extent_cache); + ret = check_extent_refs(root, &extent_cache); if (ret < 0) { if (ret == -EAGAIN) goto loop; @@ -7921,16 +7961,13 @@ again: ret = err; out: - if (trans) { - err = btrfs_commit_transaction(trans, root); - if (!ret) - ret = err; - } if (repair) { free_corrupt_blocks_tree(root->fs_info->corrupt_blocks); + extent_io_tree_cleanup(&excluded_extents); root->fs_info->fsck_extent_cache = NULL; root->fs_info->free_extent_hook = NULL; root->fs_info->corrupt_blocks = NULL; + root->fs_info->excluded_extents = NULL; } free(bits); free_chunk_cache_tree(&chunk_cache); @@ -7943,16 +7980,6 @@ out: free_extent_cache_tree(&nodes); return ret; loop: - ret = btrfs_commit_transaction(trans, root); - if (ret) - goto out; - - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - goto out; - } - free_corrupt_blocks_tree(root->fs_info->corrupt_blocks); free_extent_cache_tree(&seen); free_extent_cache_tree(&pending); @@ -7965,6 +7992,7 @@ loop: free_extent_record_cache(root->fs_info, &extent_cache); free_root_item_list(&normal_trees); free_root_item_list(&dropping_trees); + extent_io_tree_cleanup(&excluded_extents); goto again; } @@ -8924,6 +8952,8 @@ out: free_roots_info_cache(); if (path) btrfs_free_path(path); + if (trans) + btrfs_commit_transaction(trans, info->tree_root); if (ret < 0) return ret; diff --git a/ctree.h b/ctree.h index be30cb63..f4275d9f 100644 --- a/ctree.h +++ b/ctree.h @@ -963,6 +963,7 @@ struct btrfs_fs_info { struct extent_io_tree pinned_extents; struct extent_io_tree pending_del; struct extent_io_tree extent_ins; + struct extent_io_tree *excluded_extents; /* logical->physical extent mapping */ struct btrfs_mapping_tree mapping_tree; diff --git a/disk-io.c b/disk-io.c index 0d33258b..ca39f173 100644 --- a/disk-io.c +++ b/disk-io.c @@ -763,6 +763,8 @@ struct btrfs_fs_info *btrfs_new_fs_info(int writable, u64 sb_bytenr) extent_io_tree_init(&fs_info->pinned_extents); extent_io_tree_init(&fs_info->pending_del); extent_io_tree_init(&fs_info->extent_ins); + fs_info->excluded_extents = NULL; + fs_info->fs_root_tree = RB_ROOT; cache_tree_init(&fs_info->mapping_tree.cache_tree); diff --git a/extent-tree.c b/extent-tree.c index d42c5727..e8545ef6 100644 --- a/extent-tree.c +++ b/extent-tree.c @@ -2566,6 +2566,13 @@ check_failed: goto new_group; } + if (info->excluded_extents && + test_range_bit(info->excluded_extents, ins->objectid, + ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) { + search_start = ins->objectid + num_bytes; + goto new_group; + } + if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start && ins->objectid < exclude_start + exclude_nr)) { search_start = exclude_start + exclude_nr; -- cgit v1.2.1 From ce0517b364037dd4e1dcbbbe09d57c092975fa0f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 27 Jan 2015 14:23:21 -0800 Subject: Btrfs-progs: unpin excluded extents as we fix things We don't want to keep extent records pinned down if we fix stuff as we may need the space and we can be pretty sure that these records are correct. Thanks, Signed-off-by: Josef Bacik --- cmds-check.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/cmds-check.c b/cmds-check.c index 5458c282..9c379e65 100644 --- a/cmds-check.c +++ b/cmds-check.c @@ -7335,6 +7335,8 @@ static int check_extent_refs(struct btrfs_root *root, return -EAGAIN; while(1) { + int cur_err = 0; + fixed = 0; recorded = 0; cache = search_cache_extent(extent_cache, 0); @@ -7345,6 +7347,7 @@ static int check_extent_refs(struct btrfs_root *root, fprintf(stderr, "extent item %llu has multiple extent " "items\n", (unsigned long long)rec->start); err = 1; + cur_err = 1; } if (rec->refs != rec->extent_item_refs) { @@ -7374,7 +7377,7 @@ static int check_extent_refs(struct btrfs_root *root, } } err = 1; - + cur_err = 1; } if (all_backpointers_checked(rec, 1)) { fprintf(stderr, "backpointer mismatch on [%llu %llu]\n", @@ -7388,6 +7391,7 @@ static int check_extent_refs(struct btrfs_root *root, goto repair_abort; fixed = 1; } + cur_err = 1; err = 1; } if (!rec->owner_ref_checked) { @@ -7402,10 +7406,16 @@ static int check_extent_refs(struct btrfs_root *root, fixed = 1; } err = 1; + cur_err = 1; } remove_cache_extent(extent_cache, cache); free_all_extent_backrefs(rec); + if (!init_extent_tree && repair && (!cur_err || fixed)) + clear_extent_dirty(root->fs_info->excluded_extents, + rec->start, + rec->start + rec->max_size - 1, + GFP_NOFS); free(rec); } repair_abort: -- cgit v1.2.1 From 0b8aa1969bea17c6f539f03d673cfae1ba85ed63 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 28 Jan 2015 12:38:03 -0800 Subject: Btrfs-progs: make restore update dev items When we restore a multi disk image onto a single disk we need to update the dev items used and total bytes so that fsck doesn't freak out and that we get normal results from stuff like btrfs fi show. Thanks, Signed-off-by: Josef Bacik --- btrfs-image.c | 150 ++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 131 insertions(+), 19 deletions(-) diff --git a/btrfs-image.c b/btrfs-image.c index feb4a620..3c78388a 100644 --- a/btrfs-image.c +++ b/btrfs-image.c @@ -133,6 +133,7 @@ struct mdrestore_struct { size_t num_items; u32 leafsize; u64 devid; + u64 alloced_chunks; u64 last_physical_offset; u8 uuid[BTRFS_UUID_SIZE]; u8 fsid[BTRFS_FSID_SIZE]; @@ -1856,6 +1857,7 @@ static int mdrestore_init(struct mdrestore_struct *mdres, mdres->multi_devices = multi_devices; mdres->clear_space_cache = 0; mdres->last_physical_offset = 0; + mdres->alloced_chunks = 0; if (!num_threads) return 0; @@ -2087,6 +2089,7 @@ static int read_chunk_block(struct mdrestore_struct *mdres, u8 *buffer, mdres->last_physical_offset) mdres->last_physical_offset = fs_chunk->physical + fs_chunk->bytes; + mdres->alloced_chunks += fs_chunk->bytes; tree_insert(&mdres->chunk_tree, &fs_chunk->l, chunk_cmp); } out: @@ -2372,9 +2375,107 @@ static void remap_overlapping_chunks(struct mdrestore_struct *mdres) } } -static int __restore_metadump(const char *input, FILE *out, int old_restore, - int num_threads, int fixup_offset, - const char *target, int multi_devices) +static int fixup_devices(struct btrfs_fs_info *fs_info, + struct mdrestore_struct *mdres, off_t dev_size) +{ + struct btrfs_trans_handle *trans; + struct btrfs_dev_item *dev_item; + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_root *root = fs_info->chunk_root; + struct btrfs_key key; + u64 devid, cur_devid; + int ret; + + path = btrfs_alloc_path(); + if (!path) { + fprintf(stderr, "Error alloc'ing path\n"); + return -ENOMEM; + } + + trans = btrfs_start_transaction(fs_info->tree_root, 1); + if (IS_ERR(trans)) { + fprintf(stderr, "Error starting transaction %ld\n", + PTR_ERR(trans)); + btrfs_free_path(path); + return PTR_ERR(trans); + } + + dev_item = &fs_info->super_copy->dev_item; + + devid = btrfs_stack_device_id(dev_item); + + btrfs_set_stack_device_total_bytes(dev_item, dev_size); + btrfs_set_stack_device_bytes_used(dev_item, mdres->alloced_chunks); + + key.objectid = BTRFS_DEV_ITEMS_OBJECTID; + key.type = BTRFS_DEV_ITEM_KEY; + key.offset = 0; + +again: + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) { + fprintf(stderr, "search failed %d\n", ret); + exit(1); + } + + while (1) { + leaf = path->nodes[0]; + if (path->slots[0] >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) { + fprintf(stderr, "Error going to next leaf " + "%d\n", ret); + exit(1); + } + if (ret > 0) { + ret = 0; + break; + } + leaf = path->nodes[0]; + } + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.type > BTRFS_DEV_ITEM_KEY) + break; + if (key.type != BTRFS_DEV_ITEM_KEY) { + path->slots[0]++; + continue; + } + + dev_item = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_dev_item); + cur_devid = btrfs_device_id(leaf, dev_item); + if (devid != cur_devid) { + ret = btrfs_del_item(trans, root, path); + if (ret) { + fprintf(stderr, "Error deleting item %d\n", + ret); + exit(1); + } + btrfs_release_path(path); + goto again; + } + + btrfs_set_device_total_bytes(leaf, dev_item, dev_size); + btrfs_set_device_bytes_used(leaf, dev_item, + mdres->alloced_chunks); + btrfs_mark_buffer_dirty(leaf); + path->slots[0]++; + } + + btrfs_free_path(path); + ret = btrfs_commit_transaction(trans, fs_info->tree_root); + if (ret) { + fprintf(stderr, "Commit failed %d\n", ret); + return ret; + } + return 0; +} + +static int restore_metadump(const char *input, FILE *out, int old_restore, + int num_threads, int fixup_offset, + const char *target, int multi_devices) { struct meta_cluster *cluster = NULL; struct meta_cluster_header *header; @@ -2454,6 +2555,30 @@ static int __restore_metadump(const char *input, FILE *out, int old_restore, } } ret = wait_for_worker(&mdrestore); + + if (!ret && !multi_devices && !old_restore) { + struct stat st; + + info = open_ctree_fs_info(target, 0, 0, + OPEN_CTREE_PARTIAL | + OPEN_CTREE_WRITES); + if (!info) { + fprintf(stderr, "unable to open %s\n", target); + ret = -EIO; + goto out; + } + + if (stat(target, &st)) { + fprintf(stderr, "statting %s failed\n", target); + close_ctree(info->chunk_root); + return 1; + } + + ret = fixup_devices(info, &mdrestore, st.st_size); + close_ctree(info->chunk_root); + if (ret) + goto out; + } out: mdrestore_destroy(&mdrestore, num_threads); failed_cluster: @@ -2467,19 +2592,6 @@ failed_open: return ret; } -static int restore_metadump(const char *input, FILE *out, int old_restore, - int num_threads, int multi_devices) -{ - return __restore_metadump(input, out, old_restore, num_threads, 0, NULL, - multi_devices); -} - -static int fixup_metadump(const char *input, FILE *out, int num_threads, - const char *target) -{ - return __restore_metadump(input, out, 0, num_threads, 1, target, 1); -} - static int update_disk_super_on_device(struct btrfs_fs_info *info, const char *other_dev, u64 cur_devid) { @@ -2705,7 +2817,7 @@ int main(int argc, char *argv[]) compress_level, sanitize, walk_trees); } else { ret = restore_metadump(source, out, old_restore, num_threads, - multi_devices); + 0, target, multi_devices); } if (ret) { printk("%s failed (%s)\n", (create) ? "create" : "restore", @@ -2752,14 +2864,14 @@ int main(int argc, char *argv[]) close_ctree(info->chunk_root); /* fix metadata block to map correct chunk */ - ret = fixup_metadump(source, out, 1, target); + ret = restore_metadump(source, out, 0, num_threads, 1, + target, 1); if (ret) { fprintf(stderr, "fix metadump failed (error=%d)\n", ret); exit(1); } } - out: if (out == stdout) { fflush(out); -- cgit v1.2.1 From c6b388ef2d2bf0f7ff8d87fe82cdeddb6427eb67 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 3 Feb 2015 09:48:29 -0500 Subject: Btrfs-progs: make debug-tree spit out full_backref flag Currently btrfs-debug-tree ignores the FULL_BACKREF flag which makes it hard to figure out problems related to FULL_BACKREF. Thanks, Signed-off-by: Josef Bacik --- print-tree.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/print-tree.c b/print-tree.c index 3a7c13cd..931a321a 100644 --- a/print-tree.c +++ b/print-tree.c @@ -312,6 +312,10 @@ static void extent_flags_to_str(u64 flags, char *ret) } strcat(ret, "TREE_BLOCK"); } + if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { + strcat(ret, "|"); + strcat(ret, "FULL_BACKREF"); + } } void print_extent_item(struct extent_buffer *eb, int slot, int metadata) -- cgit v1.2.1 From 34a5ec12eedbd13f47c92108e4fb27e08598219a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 3 Feb 2015 09:48:57 -0500 Subject: Btrfs-progs: skip opening all devices with restore When we go to fixup the dev items after a restore we scan all existing devices. If you happen to be a btrfs developer you could possibly open up some random device that you didn't just restore onto, which gives you weird errors and makes you super cranky and waste a day trying to figure out what is failing. This will make it so that we use the fd we've already opened for opening our ctree. Thanks, Signed-off-by: Josef Bacik --- btrfs-find-root.c | 2 +- btrfs-image.c | 9 ++++++--- chunk-recover.c | 2 +- disk-io.c | 8 +++++--- disk-io.h | 3 ++- super-recover.c | 2 +- 6 files changed, 16 insertions(+), 10 deletions(-) diff --git a/btrfs-find-root.c b/btrfs-find-root.c index 3edb8332..c6e6b82f 100644 --- a/btrfs-find-root.c +++ b/btrfs-find-root.c @@ -79,7 +79,7 @@ static struct btrfs_root *open_ctree_broken(int fd, const char *device) return NULL; } - ret = btrfs_scan_fs_devices(fd, device, &fs_devices, 0, 1); + ret = btrfs_scan_fs_devices(fd, device, &fs_devices, 0, 1, 0); if (ret) goto out; diff --git a/btrfs-image.c b/btrfs-image.c index 3c78388a..04ec4734 100644 --- a/btrfs-image.c +++ b/btrfs-image.c @@ -2557,16 +2557,19 @@ static int restore_metadump(const char *input, FILE *out, int old_restore, ret = wait_for_worker(&mdrestore); if (!ret && !multi_devices && !old_restore) { + struct btrfs_root *root; struct stat st; - info = open_ctree_fs_info(target, 0, 0, + root = open_ctree_fd(fileno(out), target, 0, OPEN_CTREE_PARTIAL | - OPEN_CTREE_WRITES); - if (!info) { + OPEN_CTREE_WRITES | + OPEN_CTREE_NO_DEVICES); + if (!root) { fprintf(stderr, "unable to open %s\n", target); ret = -EIO; goto out; } + info = root->fs_info; if (stat(target, &st)) { fprintf(stderr, "statting %s failed\n", target); diff --git a/chunk-recover.c b/chunk-recover.c index 94efc438..832b3b1b 100644 --- a/chunk-recover.c +++ b/chunk-recover.c @@ -1520,7 +1520,7 @@ static int recover_prepare(struct recover_control *rc, char *path) goto fail_free_sb; } - ret = btrfs_scan_fs_devices(fd, path, &fs_devices, 0, 1); + ret = btrfs_scan_fs_devices(fd, path, &fs_devices, 0, 1, 0); if (ret) goto fail_free_sb; diff --git a/disk-io.c b/disk-io.c index ca39f173..0aec56e0 100644 --- a/disk-io.c +++ b/disk-io.c @@ -1006,7 +1006,8 @@ void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info) int btrfs_scan_fs_devices(int fd, const char *path, struct btrfs_fs_devices **fs_devices, - u64 sb_bytenr, int super_recover) + u64 sb_bytenr, int super_recover, + int skip_devices) { u64 total_devs; u64 dev_size; @@ -1033,7 +1034,7 @@ int btrfs_scan_fs_devices(int fd, const char *path, return ret; } - if (total_devs != 1) { + if (!skip_devices && total_devs != 1) { ret = btrfs_scan_lblkid(); if (ret) return ret; @@ -1114,7 +1115,8 @@ static struct btrfs_fs_info *__open_ctree_fd(int fp, const char *path, fs_info->on_restoring = 1; ret = btrfs_scan_fs_devices(fp, path, &fs_devices, sb_bytenr, - (flags & OPEN_CTREE_RECOVER_SUPER)); + (flags & OPEN_CTREE_RECOVER_SUPER), + (flags & OPEN_CTREE_NO_DEVICES)); if (ret) goto out; diff --git a/disk-io.h b/disk-io.h index f963a96a..53df8f06 100644 --- a/disk-io.h +++ b/disk-io.h @@ -33,6 +33,7 @@ enum btrfs_open_ctree_flags { OPEN_CTREE_RESTORE = (1 << 4), OPEN_CTREE_NO_BLOCK_GROUPS = (1 << 5), OPEN_CTREE_EXCLUSIVE = (1 << 6), + OPEN_CTREE_NO_DEVICES = (1 << 7), }; static inline u64 btrfs_sb_offset(int mirror) @@ -68,7 +69,7 @@ void btrfs_release_all_roots(struct btrfs_fs_info *fs_info); void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info); int btrfs_scan_fs_devices(int fd, const char *path, struct btrfs_fs_devices **fs_devices, u64 sb_bytenr, - int super_recover); + int super_recover, int skip_devices); int btrfs_setup_chunk_tree_and_device_map(struct btrfs_fs_info *fs_info); struct btrfs_root *open_ctree(const char *filename, u64 sb_bytenr, diff --git a/super-recover.c b/super-recover.c index 197fc4bd..e2c31294 100644 --- a/super-recover.c +++ b/super-recover.c @@ -279,7 +279,7 @@ int btrfs_recover_superblocks(const char *dname, } init_recover_superblock(&recover); - ret = btrfs_scan_fs_devices(fd, dname, &recover.fs_devices, 0, 1); + ret = btrfs_scan_fs_devices(fd, dname, &recover.fs_devices, 0, 1, 0); close(fd); if (ret) { ret = 1; -- cgit v1.2.1 From 4915964fc653033189c733ec738b4b23c87b9e5d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 5 Feb 2015 15:41:32 -0500 Subject: Btrfs-progs: fix bad extent flag We can have FULL_BACKREF set or not set when we need the opposite, this patch fixes this problem by setting a bit when the flag is set improperly. This way we can either correct the problem when we re-create the extent item if the backrefs are also wrong, or we can just set the flag properly in the extent item. Thanks, Signed-off-by: Josef Bacik --- cmds-check.c | 261 ++++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 159 insertions(+), 102 deletions(-) diff --git a/cmds-check.c b/cmds-check.c index 9c379e65..ce0ac888 100644 --- a/cmds-check.c +++ b/cmds-check.c @@ -119,12 +119,13 @@ struct extent_record { u64 info_objectid; u32 num_duplicates; u8 info_level; + int flag_block_full_backref; unsigned int found_rec:1; unsigned int content_checked:1; unsigned int owner_ref_checked:1; unsigned int is_root:1; unsigned int metadata:1; - unsigned int flag_block_full_backref:1; + unsigned int bad_full_backref:1; }; struct inode_backref { @@ -145,6 +146,7 @@ struct root_item_record { struct list_head list; u64 objectid; u64 bytenr; + u64 last_snapshot; u8 level; u8 drop_level; int level_size; @@ -3679,7 +3681,8 @@ static int maybe_free_extent_rec(struct cache_tree *extent_cache, { if (rec->content_checked && rec->owner_ref_checked && rec->extent_item_refs == rec->refs && rec->refs > 0 && - rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0)) { + rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) && + !rec->bad_full_backref) { remove_cache_extent(extent_cache, &rec->cache); free_all_extent_backrefs(rec); list_del_init(&rec->list); @@ -4338,6 +4341,8 @@ static int add_extent_rec(struct cache_tree *extent_cache, rec->owner_ref_checked = 0; rec->num_duplicates = 0; rec->metadata = metadata; + rec->flag_block_full_backref = -1; + rec->bad_full_backref = 0; INIT_LIST_HEAD(&rec->backrefs); INIT_LIST_HEAD(&rec->dups); INIT_LIST_HEAD(&rec->list); @@ -5563,27 +5568,41 @@ static int is_dropped_key(struct btrfs_key *key, return 0; } +/* + * Here are the rules for FULL_BACKREF. + * + * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set. + * 2) If btrfs_header_owner(buf) no longer points to buf then we have + * FULL_BACKREF set. + * 3) We cow'ed the block walking down a reloc tree. This is impossible to tell + * if it happened after the relocation occurred since we'll have dropped the + * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and + * have no real way to know for sure. + * + * We process the blocks one root at a time, and we start from the lowest root + * objectid and go to the highest. So we can just lookup the owner backref for + * the record and if we don't find it then we know it doesn't exist and we have + * a FULL BACKREF. + * + * FIXME: if we ever start reclaiming root objectid's then we need to fix this + * assumption and simply indicate that we _think_ that the FULL BACKREF needs to + * be set or not and then we can check later once we've gathered all the refs. + */ static int calc_extent_flag(struct btrfs_root *root, struct cache_tree *extent_cache, struct extent_buffer *buf, struct root_item_record *ri, u64 *flags) { - int i; - int nritems = btrfs_header_nritems(buf); - struct btrfs_key key; struct extent_record *rec; struct cache_extent *cache; - struct data_backref *dback; struct tree_backref *tback; - struct extent_buffer *new_buf; u64 owner = 0; - u64 bytenr; - u64 offset; - u64 ptr; - int size; - int ret; - u8 level; + + cache = lookup_cache_extent(extent_cache, buf->start, 1); + /* we have added this extent before */ + BUG_ON(!cache); + rec = container_of(cache, struct extent_record, cache); /* * Except file/reloc tree, we can not have @@ -5596,91 +5615,28 @@ static int calc_extent_flag(struct btrfs_root *root, */ if (buf->start == ri->bytenr) goto normal; - if (btrfs_is_leaf(buf)) { - /* - * we are searching from original root, world - * peace is achieved, we use normal backref. - */ - owner = btrfs_header_owner(buf); - if (owner == ri->objectid) - goto normal; - /* - * we check every eb here, and if any of - * eb dosen't have original root refers - * to this eb, we set full backref flag for - * this extent, otherwise normal backref. - */ - for (i = 0; i < nritems; i++) { - struct btrfs_file_extent_item *fi; - btrfs_item_key_to_cpu(buf, &key, i); - if (key.type != BTRFS_EXTENT_DATA_KEY) - continue; - fi = btrfs_item_ptr(buf, i, - struct btrfs_file_extent_item); - if (btrfs_file_extent_type(buf, fi) == - BTRFS_FILE_EXTENT_INLINE) - continue; - if (btrfs_file_extent_disk_bytenr(buf, fi) == 0) - continue; - bytenr = btrfs_file_extent_disk_bytenr(buf, fi); - cache = lookup_cache_extent(extent_cache, bytenr, 1); - if (!cache) - goto full_backref; - offset = btrfs_file_extent_offset(buf, fi); - rec = container_of(cache, struct extent_record, cache); - dback = find_data_backref(rec, 0, ri->objectid, owner, - key.offset - offset, 1, bytenr, bytenr); - if (!dback) - goto full_backref; - } + if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) goto full_backref; - } else { - level = btrfs_header_level(buf); - for (i = 0; i < nritems; i++) { - ptr = btrfs_node_blockptr(buf, i); - size = btrfs_level_size(root, level); - if (i == 0) { - new_buf = read_tree_block(root, ptr, size, 0); - if (!extent_buffer_uptodate(new_buf)) { - free_extent_buffer(new_buf); - ret = -EIO; - return ret; - } - /* - * we are searching from origin root, world - * peace is achieved, we use normal backref. - */ - owner = btrfs_header_owner(new_buf); - free_extent_buffer(new_buf); - if (owner == ri->objectid) - goto normal; - } - cache = lookup_cache_extent(extent_cache, ptr, size); - if (!cache) - goto full_backref; - rec = container_of(cache, struct extent_record, cache); - tback = find_tree_backref(rec, 0, owner); - if (!tback) - goto full_backref; - } - } + owner = btrfs_header_owner(buf); + if (owner == ri->objectid) + goto normal; + + tback = find_tree_backref(rec, 0, owner); + if (!tback) + goto full_backref; normal: *flags = 0; - cache = lookup_cache_extent(extent_cache, buf->start, 1); - /* we have added this extent before */ - BUG_ON(!cache); - rec = container_of(cache, struct extent_record, cache); - rec->flag_block_full_backref = 0; + if (rec->flag_block_full_backref != -1 && + rec->flag_block_full_backref != 0) + rec->bad_full_backref = 1; return 0; full_backref: *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; - cache = lookup_cache_extent(extent_cache, buf->start, 1); - /* we have added this extent before */ - BUG_ON(!cache); - rec = container_of(cache, struct extent_record, cache); - rec->flag_block_full_backref = 1; + if (rec->flag_block_full_backref != -1 && + rec->flag_block_full_backref != 1) + rec->bad_full_backref = 1; return 0; } @@ -5767,10 +5723,6 @@ static int run_next_block(struct btrfs_root *root, nritems = btrfs_header_nritems(buf); - /* - * FIXME, this only works only if we don't have any full - * backref mode. - */ flags = 0; if (!init_extent_tree) { ret = btrfs_lookup_extent_info(NULL, root, bytenr, @@ -5793,11 +5745,37 @@ static int run_next_block(struct btrfs_root *root, } if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { - if (rec) - rec->flag_block_full_backref = 1; + if (ri != NULL && + ri->objectid != BTRFS_TREE_RELOC_OBJECTID && + ri->objectid == btrfs_header_owner(buf)) { + /* + * Ok we got to this block from it's original owner and + * we have FULL_BACKREF set. Relocation can leave + * converted blocks over so this is altogether possible, + * however it's not possible if the generation > the + * last snapshot, so check for this case. + */ + if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) && + btrfs_header_generation(buf) > ri->last_snapshot) { + flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF; + rec->bad_full_backref = 1; + } + } + } else { + if (ri != NULL && + (ri->objectid == BTRFS_TREE_RELOC_OBJECTID || + btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) { + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + rec->bad_full_backref = 1; + } + } + + if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { + rec->flag_block_full_backref = 1; parent = bytenr; owner = 0; } else { + rec->flag_block_full_backref = 0; parent = 0; owner = btrfs_header_owner(buf); } @@ -6283,7 +6261,7 @@ static int record_extent(struct btrfs_trans_handle *trans, parent, tback->root, 0, 0); fprintf(stderr, "adding new tree backref on " "start %llu len %llu parent %llu root %llu\n", - rec->start, rec->max_size, tback->parent, tback->root); + rec->start, rec->max_size, parent, tback->root); } if (ret) goto fail; @@ -7121,6 +7099,7 @@ static int fixup_extent_refs(struct btrfs_fs_info *info, if (!back->found_ref) continue; + rec->bad_full_backref = 0; ret = record_extent(trans, info, path, rec, back, allocated, flags); allocated = 1; @@ -7138,6 +7117,67 @@ out: return ret; } +static int fixup_extent_flags(struct btrfs_fs_info *fs_info, + struct extent_record *rec) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = fs_info->extent_root; + struct btrfs_path *path; + struct btrfs_extent_item *ei; + struct btrfs_key key; + u64 flags; + int ret = 0; + + key.objectid = rec->start; + if (rec->metadata) { + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = rec->info_level; + } else { + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = rec->max_size; + } + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + btrfs_free_path(path); + return PTR_ERR(trans); + } + + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret < 0) { + btrfs_free_path(path); + btrfs_commit_transaction(trans, root); + return ret; + } else if (ret) { + fprintf(stderr, "Didn't find extent for %llu\n", + (unsigned long long)rec->start); + btrfs_free_path(path); + btrfs_commit_transaction(trans, root); + return -ENOENT; + } + + ei = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_extent_item); + flags = btrfs_extent_flags(path->nodes[0], ei); + if (rec->flag_block_full_backref) { + fprintf(stderr, "setting full backref on %llu\n", + (unsigned long long)key.objectid); + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + } else { + fprintf(stderr, "clearing full backref on %llu\n", + (unsigned long long)key.objectid); + flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF; + } + btrfs_set_extent_flags(path->nodes[0], ei, flags); + btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_free_path(path); + return btrfs_commit_transaction(trans, root); +} + /* right now we only prune from the extent allocation tree */ static int prune_one_block(struct btrfs_trans_handle *trans, struct btrfs_fs_info *info, @@ -7408,6 +7448,18 @@ static int check_extent_refs(struct btrfs_root *root, err = 1; cur_err = 1; } + if (rec->bad_full_backref) { + fprintf(stderr, "bad full backref, on [%llu]\n", + (unsigned long long)rec->start); + if (repair) { + ret = fixup_extent_flags(root->fs_info, rec); + if (ret) + goto repair_abort; + fixed = 1; + } + err = 1; + cur_err = 1; + } remove_cache_extent(extent_cache, cache); free_all_extent_backrefs(rec); @@ -7702,7 +7754,7 @@ static int check_devices(struct rb_root *dev_cache, } static int add_root_item_to_list(struct list_head *head, - u64 objectid, u64 bytenr, + u64 objectid, u64 bytenr, u64 last_snapshot, u8 level, u8 drop_level, int level_size, struct btrfs_key *drop_key) { @@ -7716,6 +7768,7 @@ static int add_root_item_to_list(struct list_head *head, ri_rec->level = level; ri_rec->level_size = level_size; ri_rec->drop_level = drop_level; + ri_rec->last_snapshot = last_snapshot; if (drop_key) memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key)); list_add_tail(&ri_rec->list, head); @@ -7863,14 +7916,14 @@ again: root1 = root->fs_info->tree_root; level = btrfs_header_level(root1->node); ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid, - root1->node->start, level, 0, + root1->node->start, 0, level, 0, btrfs_level_size(root1, level), NULL); if (ret < 0) goto out; root1 = root->fs_info->chunk_root; level = btrfs_header_level(root1->node); ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid, - root1->node->start, level, 0, + root1->node->start, 0, level, 0, btrfs_level_size(root1, level), NULL); if (ret < 0) goto out; @@ -7895,15 +7948,18 @@ again: btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]); if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) { unsigned long offset; + u64 last_snapshot; offset = btrfs_item_ptr_offset(leaf, path.slots[0]); read_extent_buffer(leaf, &ri, offset, sizeof(ri)); + last_snapshot = btrfs_root_last_snapshot(&ri); if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) { level = btrfs_root_level(&ri); level_size = btrfs_level_size(root, level); ret = add_root_item_to_list(&normal_trees, found_key.objectid, - btrfs_root_bytenr(&ri), level, + btrfs_root_bytenr(&ri), + last_snapshot, level, 0, level_size, NULL); if (ret < 0) goto out; @@ -7916,7 +7972,8 @@ again: ret = add_root_item_to_list(&dropping_trees, objectid, btrfs_root_bytenr(&ri), - level, ri.drop_level, + last_snapshot, level, + ri.drop_level, level_size, &found_key); if (ret < 0) goto out; -- cgit v1.2.1