diff options
-rw-r--r-- | btrfs-corrupt-block.c | 28 | ||||
-rw-r--r-- | btrfs-find-root.c | 2 | ||||
-rw-r--r-- | btrfs-image.c | 366 | ||||
-rw-r--r-- | chunk-recover.c | 2 | ||||
-rw-r--r-- | cmds-check.c | 631 | ||||
-rw-r--r-- | ctree.h | 2 | ||||
-rw-r--r-- | disk-io.c | 10 | ||||
-rw-r--r-- | disk-io.h | 3 | ||||
-rw-r--r-- | extent-tree.c | 17 | ||||
-rw-r--r-- | print-tree.c | 4 | ||||
-rw-r--r-- | super-recover.c | 2 | ||||
-rw-r--r-- | tests/fsck-tests/014-no-extent-info/default_case.img | bin | 0 -> 4096 bytes |
12 files changed, 743 insertions, 324 deletions
diff --git a/btrfs-corrupt-block.c b/btrfs-corrupt-block.c index 5db18a18..f332bdf7 100644 --- a/btrfs-corrupt-block.c +++ b/btrfs-corrupt-block.c @@ -109,6 +109,7 @@ static void print_usage(void) "to corrupt and a root+key for the item)\n"); fprintf(stderr, "\t-D Corrupt a dir item, must specify key and field\n"); fprintf(stderr, "\t-d Delete this item (must specify -K)\n"); + fprintf(stderr, "\t-r Operate on this root (only works with -d)\n"); exit(1); } @@ -1007,6 +1008,7 @@ int main(int ac, char **av) u64 metadata_block = 0; u64 inode = 0; u64 file_extent = (u64)-1; + u64 root_objectid = 0; char field[FIELD_BUF_LEN]; field[0] = '\0'; @@ -1034,11 +1036,12 @@ int main(int ac, char **av) { "item", 0, NULL, 'I'}, { "dir-item", 0, NULL, 'D'}, { "delete", 0, NULL, 'd'}, + { "root", 0, NULL, 'r'}, { NULL, 0, NULL, 0 } }; - c = getopt_long(ac, av, "l:c:b:eEkuUi:f:x:m:K:IDd", long_options, - &option_index); + c = getopt_long(ac, av, "l:c:b:eEkuUi:f:x:m:K:IDdr:", + long_options, &option_index); if (c < 0) break; switch(c) { @@ -1098,6 +1101,9 @@ int main(int ac, char **av) case 'd': delete = 1; break; + case 'r': + root_objectid = arg_strtou64(optarg); + break; default: print_usage(); } @@ -1206,9 +1212,25 @@ int main(int ac, char **av) ret = corrupt_btrfs_item(root, &key, field); } if (delete) { + struct btrfs_root *target = root; + if (!key.objectid) print_usage(); - ret = delete_item(root, &key); + if (root_objectid) { + struct btrfs_key root_key; + + root_key.objectid = root_objectid; + root_key.type = BTRFS_ROOT_ITEM_KEY; + root_key.offset = (u64)-1; + + target = btrfs_read_fs_root(root->fs_info, &root_key); + if (IS_ERR(target)) { + fprintf(stderr, "Couldn't find root %llu\n", + (unsigned long long)root_objectid); + print_usage(); + } + } + ret = delete_item(target, &key); goto out_close; } if (key.objectid || key.offset || key.type) { diff --git a/btrfs-find-root.c b/btrfs-find-root.c index 3edb8332..c6e6b82f 100644 --- a/btrfs-find-root.c +++ b/btrfs-find-root.c @@ -79,7 +79,7 @@ static struct btrfs_root *open_ctree_broken(int fd, const char *device) return NULL; } - ret = btrfs_scan_fs_devices(fd, device, &fs_devices, 0, 1); + ret = btrfs_scan_fs_devices(fd, device, &fs_devices, 0, 1, 0); if (ret) goto out; diff --git a/btrfs-image.c b/btrfs-image.c index f6347f36..04ec4734 100644 --- a/btrfs-image.c +++ b/btrfs-image.c @@ -67,7 +67,9 @@ struct fs_chunk { u64 logical; u64 physical; u64 bytes; - struct rb_node n; + struct rb_node l; + struct rb_node p; + struct list_head list; }; struct async_work { @@ -125,10 +127,14 @@ struct mdrestore_struct { pthread_cond_t cond; struct rb_root chunk_tree; + struct rb_root physical_tree; struct list_head list; + struct list_head overlapping_chunks; size_t num_items; u32 leafsize; u64 devid; + u64 alloced_chunks; + u64 last_physical_offset; u8 uuid[BTRFS_UUID_SIZE]; u8 fsid[BTRFS_FSID_SIZE]; @@ -138,6 +144,7 @@ struct mdrestore_struct { int old_restore; int fixup_offset; int multi_devices; + int clear_space_cache; struct btrfs_fs_info *info; }; @@ -202,8 +209,8 @@ static int name_cmp(struct rb_node *a, struct rb_node *b, int fuzz) static int chunk_cmp(struct rb_node *a, struct rb_node *b, int fuzz) { - struct fs_chunk *entry = rb_entry(a, struct fs_chunk, n); - struct fs_chunk *ins = rb_entry(b, struct fs_chunk, n); + struct fs_chunk *entry = rb_entry(a, struct fs_chunk, l); + struct fs_chunk *ins = rb_entry(b, struct fs_chunk, l); if (fuzz && ins->logical >= entry->logical && ins->logical < entry->logical + entry->bytes) @@ -216,6 +223,26 @@ static int chunk_cmp(struct rb_node *a, struct rb_node *b, int fuzz) return 0; } +static int physical_cmp(struct rb_node *a, struct rb_node *b, int fuzz) +{ + struct fs_chunk *entry = rb_entry(a, struct fs_chunk, p); + struct fs_chunk *ins = rb_entry(b, struct fs_chunk, p); + + if (fuzz && ins->physical >= entry->physical && + ins->physical < entry->physical + entry->bytes) + return 0; + + if (fuzz && entry->physical >= ins->physical && + entry->physical < ins->physical + ins->bytes) + return 0; + + if (ins->physical < entry->physical) + return -1; + else if (ins->physical > entry->physical) + return 1; + return 0; +} + static void tree_insert(struct rb_root *root, struct rb_node *ins, int (*cmp)(struct rb_node *a, struct rb_node *b, int fuzz)) @@ -227,7 +254,7 @@ static void tree_insert(struct rb_root *root, struct rb_node *ins, while(*p) { parent = *p; - dir = cmp(*p, ins, 0); + dir = cmp(*p, ins, 1); if (dir < 0) p = &(*p)->rb_left; else if (dir > 0) @@ -262,6 +289,33 @@ static struct rb_node *tree_search(struct rb_root *root, return NULL; } +static u64 logical_to_physical(struct mdrestore_struct *mdres, u64 logical, u64 *size) +{ + struct fs_chunk *fs_chunk; + struct rb_node *entry; + struct fs_chunk search; + u64 offset; + + if (logical == BTRFS_SUPER_INFO_OFFSET) + return logical; + + search.logical = logical; + entry = tree_search(&mdres->chunk_tree, &search.l, chunk_cmp, 1); + if (!entry) { + if (mdres->in != stdin) + printf("Couldn't find a chunk, using logical\n"); + return logical; + } + fs_chunk = rb_entry(entry, struct fs_chunk, l); + if (fs_chunk->logical > logical || fs_chunk->logical + fs_chunk->bytes < logical) + BUG(); + offset = search.logical - fs_chunk->logical; + + *size = min(*size, fs_chunk->bytes + fs_chunk->logical - logical); + return fs_chunk->physical + offset; +} + + static char *find_collision(struct metadump_struct *md, char *name, u32 name_len) { @@ -868,6 +922,15 @@ static int read_data_extent(struct metadump_struct *md, return 0; } +static int get_dev_fd(struct btrfs_root *root) +{ + struct btrfs_device *dev; + + dev = list_first_entry(&root->fs_info->fs_devices->devices, + struct btrfs_device, dev_list); + return dev->fd; +} + static int flush_pending(struct metadump_struct *md, int done) { struct async_work *async = NULL; @@ -904,6 +967,24 @@ static int flush_pending(struct metadump_struct *md, int done) } } + /* + * Balance can make the mapping not cover the super block, so + * just copy directly from one of the devices. + */ + if (start == BTRFS_SUPER_INFO_OFFSET) { + int fd = get_dev_fd(md->root); + + ret = pread64(fd, async->buffer, size, start); + if (ret < size) { + free(async->buffer); + free(async); + fprintf(stderr, "Error reading superblock\n"); + return -EIO; + } + size = 0; + ret = 0; + } + while (!md->data && size > 0) { u64 this_read = min(blocksize, size); eb = read_tree_block(md->root, start, this_read, 0); @@ -1369,12 +1450,13 @@ static void update_super_old(u8 *buffer) csum_block(buffer, BTRFS_SUPER_INFO_SIZE); } -static int update_super(u8 *buffer) +static int update_super(struct mdrestore_struct *mdres, u8 *buffer) { struct btrfs_super_block *super = (struct btrfs_super_block *)buffer; struct btrfs_chunk *chunk; struct btrfs_disk_key *disk_key; struct btrfs_key key; + u64 flags = btrfs_super_flags(super); u32 new_array_size = 0; u32 array_size; u32 cur = 0; @@ -1396,6 +1478,8 @@ static int update_super(u8 *buffer) cur += sizeof(*disk_key); if (key.type == BTRFS_CHUNK_ITEM_KEY) { + u64 physical, size = 0; + chunk = (struct btrfs_chunk *)ptr; old_num_stripes = btrfs_stack_chunk_num_stripes(chunk); chunk = (struct btrfs_chunk *)write_ptr; @@ -1405,7 +1489,13 @@ static int update_super(u8 *buffer) btrfs_set_stack_chunk_sub_stripes(chunk, 0); btrfs_set_stack_chunk_type(chunk, BTRFS_BLOCK_GROUP_SYSTEM); - chunk->stripe.devid = super->dev_item.devid; + btrfs_set_stack_stripe_devid(&chunk->stripe, + super->dev_item.devid); + physical = logical_to_physical(mdres, key.offset, + &size); + if (size != (u64)-1) + btrfs_set_stack_stripe_offset(&chunk->stripe, + physical); memcpy(chunk->stripe.dev_uuid, super->dev_item.uuid, BTRFS_UUID_SIZE); new_array_size += sizeof(*chunk); @@ -1419,6 +1509,11 @@ static int update_super(u8 *buffer) cur += btrfs_chunk_item_size(old_num_stripes); } + if (mdres->clear_space_cache) + btrfs_set_super_cache_generation(super, 0); + + flags |= BTRFS_SUPER_FLAG_METADUMP_V2; + btrfs_set_super_flags(super, flags); btrfs_set_super_sys_array_size(super, new_array_size); csum_block(buffer, BTRFS_SUPER_INFO_SIZE); @@ -1509,7 +1604,7 @@ static int fixup_chunk_tree_block(struct mdrestore_struct *mdres, for (i = 0; i < btrfs_header_nritems(eb); i++) { struct btrfs_chunk chunk; struct btrfs_key key; - u64 type; + u64 type, physical, size = (u64)-1; btrfs_item_key_to_cpu(eb, &key, i); if (key.type != BTRFS_CHUNK_ITEM_KEY) @@ -1519,6 +1614,10 @@ static int fixup_chunk_tree_block(struct mdrestore_struct *mdres, btrfs_item_ptr_offset(eb, i), sizeof(chunk)); + size = 0; + physical = logical_to_physical(mdres, key.offset, + &size); + /* Zero out the RAID profile */ type = btrfs_stack_chunk_type(&chunk); type &= (BTRFS_BLOCK_GROUP_DATA | @@ -1530,6 +1629,9 @@ static int fixup_chunk_tree_block(struct mdrestore_struct *mdres, btrfs_set_stack_chunk_num_stripes(&chunk, 1); btrfs_set_stack_chunk_sub_stripes(&chunk, 0); btrfs_set_stack_stripe_devid(&chunk.stripe, mdres->devid); + if (size != (u64)-1) + btrfs_set_stack_stripe_offset(&chunk.stripe, + physical); memcpy(chunk.stripe.dev_uuid, mdres->uuid, BTRFS_UUID_SIZE); write_extent_buffer(eb, &chunk, @@ -1584,32 +1686,6 @@ static void write_backup_supers(int fd, u8 *buf) } } -static u64 logical_to_physical(struct mdrestore_struct *mdres, u64 logical, u64 *size) -{ - struct fs_chunk *fs_chunk; - struct rb_node *entry; - struct fs_chunk search; - u64 offset; - - if (logical == BTRFS_SUPER_INFO_OFFSET) - return logical; - - search.logical = logical; - entry = tree_search(&mdres->chunk_tree, &search.n, chunk_cmp, 1); - if (!entry) { - if (mdres->in != stdin) - printf("Couldn't find a chunk, using logical\n"); - return logical; - } - fs_chunk = rb_entry(entry, struct fs_chunk, n); - if (fs_chunk->logical > logical || fs_chunk->logical + fs_chunk->bytes < logical) - BUG(); - offset = search.logical - fs_chunk->logical; - - *size = min(*size, fs_chunk->bytes + fs_chunk->logical - logical); - return fs_chunk->physical + offset; -} - static void *restore_worker(void *data) { struct mdrestore_struct *mdres = (struct mdrestore_struct *)data; @@ -1669,7 +1745,7 @@ static void *restore_worker(void *data) if (mdres->old_restore) { update_super_old(outbuf); } else { - ret = update_super(outbuf); + ret = update_super(mdres, outbuf); if (ret) err = ret; } @@ -1742,8 +1818,9 @@ static void mdrestore_destroy(struct mdrestore_struct *mdres, int num_threads) while ((n = rb_first(&mdres->chunk_tree))) { struct fs_chunk *entry; - entry = rb_entry(n, struct fs_chunk, n); + entry = rb_entry(n, struct fs_chunk, l); rb_erase(n, &mdres->chunk_tree); + rb_erase(&entry->p, &mdres->physical_tree); free(entry); } pthread_mutex_lock(&mdres->mutex); @@ -1770,6 +1847,7 @@ static int mdrestore_init(struct mdrestore_struct *mdres, pthread_cond_init(&mdres->cond, NULL); pthread_mutex_init(&mdres->mutex, NULL); INIT_LIST_HEAD(&mdres->list); + INIT_LIST_HEAD(&mdres->overlapping_chunks); mdres->in = in; mdres->out = out; mdres->old_restore = old_restore; @@ -1777,6 +1855,9 @@ static int mdrestore_init(struct mdrestore_struct *mdres, mdres->fixup_offset = fixup_offset; mdres->info = info; mdres->multi_devices = multi_devices; + mdres->clear_space_cache = 0; + mdres->last_physical_offset = 0; + mdres->alloced_chunks = 0; if (!num_threads) return 0; @@ -1846,7 +1927,6 @@ static int add_cluster(struct meta_cluster *cluster, u32 i, nritems; int ret; - BUG_ON(mdres->num_items); mdres->compress_method = header->compress; bytenr = le64_to_cpu(header->bytenr) + BLOCK_SIZE; @@ -1998,7 +2078,19 @@ static int read_chunk_block(struct mdrestore_struct *mdres, u8 *buffer, fs_chunk->logical = key.offset; fs_chunk->physical = btrfs_stack_stripe_offset(&chunk.stripe); fs_chunk->bytes = btrfs_stack_chunk_length(&chunk); - tree_insert(&mdres->chunk_tree, &fs_chunk->n, chunk_cmp); + INIT_LIST_HEAD(&fs_chunk->list); + if (tree_search(&mdres->physical_tree, &fs_chunk->p, + physical_cmp, 1) != NULL) + list_add(&fs_chunk->list, &mdres->overlapping_chunks); + else + tree_insert(&mdres->physical_tree, &fs_chunk->p, + physical_cmp); + if (fs_chunk->physical + fs_chunk->bytes > + mdres->last_physical_offset) + mdres->last_physical_offset = fs_chunk->physical + + fs_chunk->bytes; + mdres->alloced_chunks += fs_chunk->bytes; + tree_insert(&mdres->chunk_tree, &fs_chunk->l, chunk_cmp); } out: free(eb); @@ -2247,9 +2339,143 @@ static int build_chunk_tree(struct mdrestore_struct *mdres, return search_for_chunk_blocks(mdres, chunk_root_bytenr, 0); } -static int __restore_metadump(const char *input, FILE *out, int old_restore, - int num_threads, int fixup_offset, - const char *target, int multi_devices) +static int range_contains_super(u64 physical, u64 bytes) +{ + u64 super_bytenr; + int i; + + for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { + super_bytenr = btrfs_sb_offset(i); + if (super_bytenr >= physical && + super_bytenr < physical + bytes) + return 1; + } + + return 0; +} + +static void remap_overlapping_chunks(struct mdrestore_struct *mdres) +{ + struct fs_chunk *fs_chunk; + + while (!list_empty(&mdres->overlapping_chunks)) { + fs_chunk = list_first_entry(&mdres->overlapping_chunks, + struct fs_chunk, list); + list_del_init(&fs_chunk->list); + if (range_contains_super(fs_chunk->physical, + fs_chunk->bytes)) { + fprintf(stderr, "Remapping a chunk that had a super " + "mirror inside of it, clearing space cache " + "so we don't end up with corruption\n"); + mdres->clear_space_cache = 1; + } + fs_chunk->physical = mdres->last_physical_offset; + tree_insert(&mdres->physical_tree, &fs_chunk->p, physical_cmp); + mdres->last_physical_offset += fs_chunk->bytes; + } +} + +static int fixup_devices(struct btrfs_fs_info *fs_info, + struct mdrestore_struct *mdres, off_t dev_size) +{ + struct btrfs_trans_handle *trans; + struct btrfs_dev_item *dev_item; + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_root *root = fs_info->chunk_root; + struct btrfs_key key; + u64 devid, cur_devid; + int ret; + + path = btrfs_alloc_path(); + if (!path) { + fprintf(stderr, "Error alloc'ing path\n"); + return -ENOMEM; + } + + trans = btrfs_start_transaction(fs_info->tree_root, 1); + if (IS_ERR(trans)) { + fprintf(stderr, "Error starting transaction %ld\n", + PTR_ERR(trans)); + btrfs_free_path(path); + return PTR_ERR(trans); + } + + dev_item = &fs_info->super_copy->dev_item; + + devid = btrfs_stack_device_id(dev_item); + + btrfs_set_stack_device_total_bytes(dev_item, dev_size); + btrfs_set_stack_device_bytes_used(dev_item, mdres->alloced_chunks); + + key.objectid = BTRFS_DEV_ITEMS_OBJECTID; + key.type = BTRFS_DEV_ITEM_KEY; + key.offset = 0; + +again: + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) { + fprintf(stderr, "search failed %d\n", ret); + exit(1); + } + + while (1) { + leaf = path->nodes[0]; + if (path->slots[0] >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) { + fprintf(stderr, "Error going to next leaf " + "%d\n", ret); + exit(1); + } + if (ret > 0) { + ret = 0; + break; + } + leaf = path->nodes[0]; + } + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.type > BTRFS_DEV_ITEM_KEY) + break; + if (key.type != BTRFS_DEV_ITEM_KEY) { + path->slots[0]++; + continue; + } + + dev_item = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_dev_item); + cur_devid = btrfs_device_id(leaf, dev_item); + if (devid != cur_devid) { + ret = btrfs_del_item(trans, root, path); + if (ret) { + fprintf(stderr, "Error deleting item %d\n", + ret); + exit(1); + } + btrfs_release_path(path); + goto again; + } + + btrfs_set_device_total_bytes(leaf, dev_item, dev_size); + btrfs_set_device_bytes_used(leaf, dev_item, + mdres->alloced_chunks); + btrfs_mark_buffer_dirty(leaf); + path->slots[0]++; + } + + btrfs_free_path(path); + ret = btrfs_commit_transaction(trans, fs_info->tree_root); + if (ret) { + fprintf(stderr, "Commit failed %d\n", ret); + return ret; + } + return 0; +} + +static int restore_metadump(const char *input, FILE *out, int old_restore, + int num_threads, int fixup_offset, + const char *target, int multi_devices) { struct meta_cluster *cluster = NULL; struct meta_cluster_header *header; @@ -2301,6 +2527,8 @@ static int __restore_metadump(const char *input, FILE *out, int old_restore, ret = build_chunk_tree(&mdrestore, cluster); if (ret) goto out; + if (!list_empty(&mdrestore.overlapping_chunks)) + remap_overlapping_chunks(&mdrestore); } if (in != stdin && fseek(in, 0, SEEK_SET)) { @@ -2308,7 +2536,7 @@ static int __restore_metadump(const char *input, FILE *out, int old_restore, goto out; } - while (1) { + while (!mdrestore.error) { ret = fread(cluster, BLOCK_SIZE, 1, in); if (!ret) break; @@ -2325,13 +2553,34 @@ static int __restore_metadump(const char *input, FILE *out, int old_restore, fprintf(stderr, "Error adding cluster\n"); break; } + } + ret = wait_for_worker(&mdrestore); - ret = wait_for_worker(&mdrestore); - if (ret) { - fprintf(stderr, "One of the threads errored out %d\n", - ret); - break; + if (!ret && !multi_devices && !old_restore) { + struct btrfs_root *root; + struct stat st; + + root = open_ctree_fd(fileno(out), target, 0, + OPEN_CTREE_PARTIAL | + OPEN_CTREE_WRITES | + OPEN_CTREE_NO_DEVICES); + if (!root) { + fprintf(stderr, "unable to open %s\n", target); + ret = -EIO; + goto out; + } + info = root->fs_info; + + if (stat(target, &st)) { + fprintf(stderr, "statting %s failed\n", target); + close_ctree(info->chunk_root); + return 1; } + + ret = fixup_devices(info, &mdrestore, st.st_size); + close_ctree(info->chunk_root); + if (ret) + goto out; } out: mdrestore_destroy(&mdrestore, num_threads); @@ -2346,19 +2595,6 @@ failed_open: return ret; } -static int restore_metadump(const char *input, FILE *out, int old_restore, - int num_threads, int multi_devices) -{ - return __restore_metadump(input, out, old_restore, num_threads, 0, NULL, - multi_devices); -} - -static int fixup_metadump(const char *input, FILE *out, int num_threads, - const char *target) -{ - return __restore_metadump(input, out, 0, num_threads, 1, target, 1); -} - static int update_disk_super_on_device(struct btrfs_fs_info *info, const char *other_dev, u64 cur_devid) { @@ -2473,7 +2709,7 @@ int main(int argc, char *argv[]) { char *source; char *target; - u64 num_threads = 0; + u64 num_threads = 1; u64 compress_level = 0; int create = 1; int old_restore = 0; @@ -2564,7 +2800,7 @@ int main(int argc, char *argv[]) } } - if (num_threads == 0 && compress_level > 0) { + if (num_threads == 1 && compress_level > 0) { num_threads = sysconf(_SC_NPROCESSORS_ONLN); if (num_threads <= 0) num_threads = 1; @@ -2583,8 +2819,8 @@ int main(int argc, char *argv[]) ret = create_metadump(source, out, num_threads, compress_level, sanitize, walk_trees); } else { - ret = restore_metadump(source, out, old_restore, 1, - multi_devices); + ret = restore_metadump(source, out, old_restore, num_threads, + 0, target, multi_devices); } if (ret) { printk("%s failed (%s)\n", (create) ? "create" : "restore", @@ -2631,14 +2867,14 @@ int main(int argc, char *argv[]) close_ctree(info->chunk_root); /* fix metadata block to map correct chunk */ - ret = fixup_metadump(source, out, 1, target); + ret = restore_metadump(source, out, 0, num_threads, 1, + target, 1); if (ret) { fprintf(stderr, "fix metadump failed (error=%d)\n", ret); exit(1); } } - out: if (out == stdout) { fflush(out); diff --git a/chunk-recover.c b/chunk-recover.c index 94efc438..832b3b1b 100644 --- a/chunk-recover.c +++ b/chunk-recover.c @@ -1520,7 +1520,7 @@ static int recover_prepare(struct recover_control *rc, char *path) goto fail_free_sb; } - ret = btrfs_scan_fs_devices(fd, path, &fs_devices, 0, 1); + ret = btrfs_scan_fs_devices(fd, path, &fs_devices, 0, 1, 0); if (ret) goto fail_free_sb; diff --git a/cmds-check.c b/cmds-check.c index 73d7866a..ce0ac888 100644 --- a/cmds-check.c +++ b/cmds-check.c @@ -119,12 +119,13 @@ struct extent_record { u64 info_objectid; u32 num_duplicates; u8 info_level; + int flag_block_full_backref; unsigned int found_rec:1; unsigned int content_checked:1; unsigned int owner_ref_checked:1; unsigned int is_root:1; unsigned int metadata:1; - unsigned int flag_block_full_backref:1; + unsigned int bad_full_backref:1; }; struct inode_backref { @@ -145,6 +146,7 @@ struct root_item_record { struct list_head list; u64 objectid; u64 bytenr; + u64 last_snapshot; u8 level; u8 drop_level; int level_size; @@ -1530,7 +1532,16 @@ static int process_file_extent(struct btrfs_root *root, } rec->extent_end = key->offset + num_bytes; - if (disk_bytenr > 0) { + /* + * The data reloc tree will copy full extents into its inode and then + * copy the corresponding csums. Because the extent it copied could be + * a preallocated extent that hasn't been written to yet there may be no + * csums to copy, ergo we won't have csums for our file extent. This is + * ok so just don't bother checking csums if the inode belongs to the + * data reloc tree. + */ + if (disk_bytenr > 0 && + btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) { u64 found; if (btrfs_file_extent_compression(eb, fi)) num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); @@ -3659,7 +3670,6 @@ static void free_extent_record_cache(struct btrfs_fs_info *fs_info, if (!cache) break; rec = container_of(cache, struct extent_record, cache); - btrfs_unpin_extent(fs_info, rec->start, rec->max_size); remove_cache_extent(extent_cache, cache); free_all_extent_backrefs(rec); free(rec); @@ -3671,7 +3681,8 @@ static int maybe_free_extent_rec(struct cache_tree *extent_cache, { if (rec->content_checked && rec->owner_ref_checked && rec->extent_item_refs == rec->refs && rec->refs > 0 && - rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0)) { + rec->num_duplicates == 0 && !all_backpointers_checked(rec, 0) && + !rec->bad_full_backref) { remove_cache_extent(extent_cache, &rec->cache); free_all_extent_backrefs(rec); list_del_init(&rec->list); @@ -3986,11 +3997,11 @@ again: * Attempt to fix basic block failures. If we can't fix it for whatever reason * then just return -EIO. */ -static int try_to_fix_bad_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int try_to_fix_bad_block(struct btrfs_root *root, struct extent_buffer *buf, enum btrfs_tree_block_status status) { + struct btrfs_trans_handle *trans; struct ulist *roots; struct ulist_node *node; struct btrfs_root *search_root; @@ -4007,7 +4018,7 @@ static int try_to_fix_bad_block(struct btrfs_trans_handle *trans, if (!path) return -EIO; - ret = btrfs_find_all_roots(trans, root->fs_info, buf->start, + ret = btrfs_find_all_roots(NULL, root->fs_info, buf->start, 0, &roots); if (ret) { btrfs_free_path(path); @@ -4026,7 +4037,12 @@ static int try_to_fix_bad_block(struct btrfs_trans_handle *trans, break; } - record_root_in_trans(trans, search_root); + + trans = btrfs_start_transaction(search_root, 0); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + break; + } path->lowest_level = btrfs_header_level(buf); path->skip_check_block = 1; @@ -4037,23 +4053,26 @@ static int try_to_fix_bad_block(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, search_root, &key, path, 0, 1); if (ret) { ret = -EIO; + btrfs_commit_transaction(trans, search_root); break; } if (status == BTRFS_TREE_BLOCK_BAD_KEY_ORDER) ret = fix_key_order(trans, search_root, path); else if (status == BTRFS_TREE_BLOCK_INVALID_OFFSETS) ret = fix_item_offset(trans, search_root, path); - if (ret) + if (ret) { + btrfs_commit_transaction(trans, search_root); break; + } btrfs_release_path(path); + btrfs_commit_transaction(trans, search_root); } ulist_free(roots); btrfs_free_path(path); return ret; } -static int check_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int check_block(struct btrfs_root *root, struct cache_tree *extent_cache, struct extent_buffer *buf, u64 flags) { @@ -4089,8 +4108,7 @@ static int check_block(struct btrfs_trans_handle *trans, if (status != BTRFS_TREE_BLOCK_CLEAN) { if (repair) - status = try_to_fix_bad_block(trans, root, buf, - status); + status = try_to_fix_bad_block(root, buf, status); if (status != BTRFS_TREE_BLOCK_CLEAN) { ret = -EIO; fprintf(stderr, "bad block %llu\n", @@ -4323,6 +4341,8 @@ static int add_extent_rec(struct cache_tree *extent_cache, rec->owner_ref_checked = 0; rec->num_duplicates = 0; rec->metadata = metadata; + rec->flag_block_full_backref = -1; + rec->bad_full_backref = 0; INIT_LIST_HEAD(&rec->backrefs); INIT_LIST_HEAD(&rec->dups); INIT_LIST_HEAD(&rec->list); @@ -5548,27 +5568,41 @@ static int is_dropped_key(struct btrfs_key *key, return 0; } +/* + * Here are the rules for FULL_BACKREF. + * + * 1) If BTRFS_HEADER_FLAG_RELOC is set then we have FULL_BACKREF set. + * 2) If btrfs_header_owner(buf) no longer points to buf then we have + * FULL_BACKREF set. + * 3) We cow'ed the block walking down a reloc tree. This is impossible to tell + * if it happened after the relocation occurred since we'll have dropped the + * reloc root, so it's entirely possible to have FULL_BACKREF set on buf and + * have no real way to know for sure. + * + * We process the blocks one root at a time, and we start from the lowest root + * objectid and go to the highest. So we can just lookup the owner backref for + * the record and if we don't find it then we know it doesn't exist and we have + * a FULL BACKREF. + * + * FIXME: if we ever start reclaiming root objectid's then we need to fix this + * assumption and simply indicate that we _think_ that the FULL BACKREF needs to + * be set or not and then we can check later once we've gathered all the refs. + */ static int calc_extent_flag(struct btrfs_root *root, struct cache_tree *extent_cache, struct extent_buffer *buf, struct root_item_record *ri, u64 *flags) { - int i; - int nritems = btrfs_header_nritems(buf); - struct btrfs_key key; struct extent_record *rec; struct cache_extent *cache; - struct data_backref *dback; struct tree_backref *tback; - struct extent_buffer *new_buf; u64 owner = 0; - u64 bytenr; - u64 offset; - u64 ptr; - int size; - int ret; - u8 level; + + cache = lookup_cache_extent(extent_cache, buf->start, 1); + /* we have added this extent before */ + BUG_ON(!cache); + rec = container_of(cache, struct extent_record, cache); /* * Except file/reloc tree, we can not have @@ -5581,96 +5615,32 @@ static int calc_extent_flag(struct btrfs_root *root, */ if (buf->start == ri->bytenr) goto normal; - if (btrfs_is_leaf(buf)) { - /* - * we are searching from original root, world - * peace is achieved, we use normal backref. - */ - owner = btrfs_header_owner(buf); - if (owner == ri->objectid) - goto normal; - /* - * we check every eb here, and if any of - * eb dosen't have original root refers - * to this eb, we set full backref flag for - * this extent, otherwise normal backref. - */ - for (i = 0; i < nritems; i++) { - struct btrfs_file_extent_item *fi; - btrfs_item_key_to_cpu(buf, &key, i); - if (key.type != BTRFS_EXTENT_DATA_KEY) - continue; - fi = btrfs_item_ptr(buf, i, - struct btrfs_file_extent_item); - if (btrfs_file_extent_type(buf, fi) == - BTRFS_FILE_EXTENT_INLINE) - continue; - if (btrfs_file_extent_disk_bytenr(buf, fi) == 0) - continue; - bytenr = btrfs_file_extent_disk_bytenr(buf, fi); - cache = lookup_cache_extent(extent_cache, bytenr, 1); - if (!cache) - goto full_backref; - offset = btrfs_file_extent_offset(buf, fi); - rec = container_of(cache, struct extent_record, cache); - dback = find_data_backref(rec, 0, ri->objectid, owner, - key.offset - offset, 1, bytenr, bytenr); - if (!dback) - goto full_backref; - } + if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) goto full_backref; - } else { - level = btrfs_header_level(buf); - for (i = 0; i < nritems; i++) { - ptr = btrfs_node_blockptr(buf, i); - size = btrfs_level_size(root, level); - if (i == 0) { - new_buf = read_tree_block(root, ptr, size, 0); - if (!extent_buffer_uptodate(new_buf)) { - free_extent_buffer(new_buf); - ret = -EIO; - return ret; - } - /* - * we are searching from origin root, world - * peace is achieved, we use normal backref. - */ - owner = btrfs_header_owner(new_buf); - free_extent_buffer(new_buf); - if (owner == ri->objectid) - goto normal; - } - cache = lookup_cache_extent(extent_cache, ptr, size); - if (!cache) - goto full_backref; - rec = container_of(cache, struct extent_record, cache); - tback = find_tree_backref(rec, 0, owner); - if (!tback) - goto full_backref; - } - } + owner = btrfs_header_owner(buf); + if (owner == ri->objectid) + goto normal; + + tback = find_tree_backref(rec, 0, owner); + if (!tback) + goto full_backref; normal: *flags = 0; - cache = lookup_cache_extent(extent_cache, buf->start, 1); - /* we have added this extent before */ - BUG_ON(!cache); - rec = container_of(cache, struct extent_record, cache); - rec->flag_block_full_backref = 0; + if (rec->flag_block_full_backref != -1 && + rec->flag_block_full_backref != 0) + rec->bad_full_backref = 1; return 0; full_backref: *flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; - cache = lookup_cache_extent(extent_cache, buf->start, 1); - /* we have added this extent before */ - BUG_ON(!cache); - rec = container_of(cache, struct extent_record, cache); - rec->flag_block_full_backref = 1; + if (rec->flag_block_full_backref != -1 && + rec->flag_block_full_backref != 1) + rec->bad_full_backref = 1; return 0; } -static int run_next_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int run_next_block(struct btrfs_root *root, struct block_info *bits, int bits_nr, u64 *last, @@ -5686,6 +5656,7 @@ static int run_next_block(struct btrfs_trans_handle *trans, struct root_item_record *ri) { struct extent_buffer *buf; + struct extent_record *rec = NULL; u64 bytenr; u32 size; u64 parent; @@ -5738,8 +5709,6 @@ static int run_next_block(struct btrfs_trans_handle *trans, } cache = lookup_cache_extent(extent_cache, bytenr, size); if (cache) { - struct extent_record *rec; - rec = container_of(cache, struct extent_record, cache); gen = rec->parent_generation; } @@ -5754,32 +5723,64 @@ static int run_next_block(struct btrfs_trans_handle *trans, nritems = btrfs_header_nritems(buf); - /* - * FIXME, this only works only if we don't have any full - * backref mode. - */ + flags = 0; if (!init_extent_tree) { ret = btrfs_lookup_extent_info(NULL, root, bytenr, btrfs_header_level(buf), 1, NULL, &flags); - if (ret < 0) - goto out; + if (ret < 0) { + ret = calc_extent_flag(root, extent_cache, buf, ri, &flags); + if (ret < 0) { + fprintf(stderr, "Couldn't calc extent flags\n"); + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + } + } } else { flags = 0; ret = calc_extent_flag(root, extent_cache, buf, ri, &flags); - if (ret < 0) - goto out; + if (ret < 0) { + fprintf(stderr, "Couldn't calc extent flags\n"); + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + } + } + + if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { + if (ri != NULL && + ri->objectid != BTRFS_TREE_RELOC_OBJECTID && + ri->objectid == btrfs_header_owner(buf)) { + /* + * Ok we got to this block from it's original owner and + * we have FULL_BACKREF set. Relocation can leave + * converted blocks over so this is altogether possible, + * however it's not possible if the generation > the + * last snapshot, so check for this case. + */ + if (!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC) && + btrfs_header_generation(buf) > ri->last_snapshot) { + flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF; + rec->bad_full_backref = 1; + } + } + } else { + if (ri != NULL && + (ri->objectid == BTRFS_TREE_RELOC_OBJECTID || + btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) { + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + rec->bad_full_backref = 1; + } } if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { + rec->flag_block_full_backref = 1; parent = bytenr; owner = 0; } else { + rec->flag_block_full_backref = 0; parent = 0; owner = btrfs_header_owner(buf); } - ret = check_block(trans, root, extent_cache, buf, flags); + ret = check_block(root, extent_cache, buf, flags); if (ret) goto out; @@ -6260,7 +6261,7 @@ static int record_extent(struct btrfs_trans_handle *trans, parent, tback->root, 0, 0); fprintf(stderr, "adding new tree backref on " "start %llu len %llu parent %llu root %llu\n", - rec->start, rec->max_size, tback->parent, tback->root); + rec->start, rec->max_size, parent, tback->root); } if (ret) goto fail; @@ -6331,16 +6332,16 @@ static struct extent_entry *find_most_right_entry(struct list_head *entries) return best; } -static int repair_ref(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info, struct btrfs_path *path, +static int repair_ref(struct btrfs_fs_info *info, struct btrfs_path *path, struct data_backref *dback, struct extent_entry *entry) { + struct btrfs_trans_handle *trans; struct btrfs_root *root; struct btrfs_file_extent_item *fi; struct extent_buffer *leaf; struct btrfs_key key; u64 bytenr, bytes; - int ret; + int ret, err; key.objectid = dback->root; key.type = BTRFS_ROOT_ITEM_KEY; @@ -6392,11 +6393,9 @@ static int repair_ref(struct btrfs_trans_handle *trans, btrfs_release_path(path); - /* - * Have to make sure that this root gets updated when we commit the - * transaction - */ - record_root_in_trans(trans, root); + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); /* * Ok we have the key of the file extent we want to fix, now we can cow @@ -6406,13 +6405,14 @@ static int repair_ref(struct btrfs_trans_handle *trans, if (ret < 0) { fprintf(stderr, "Error cowing down to ref [%Lu, %u, %Lu]: %d\n", key.objectid, key.type, key.offset, ret); - return ret; + goto out; } if (ret > 0) { fprintf(stderr, "Well that's odd, we just found this key " "[%Lu, %u, %Lu]\n", key.objectid, key.type, key.offset); - return -EINVAL; + ret = -EINVAL; + goto out; } leaf = path->nodes[0]; fi = btrfs_item_ptr(leaf, path->slots[0], @@ -6425,7 +6425,8 @@ static int repair_ref(struct btrfs_trans_handle *trans, "system and send it to a btrfs developer so they can " "complete this functionality for bytenr %Lu\n", dback->disk_bytenr); - return -EINVAL; + ret = -EINVAL; + goto out; } if (dback->node.broken && dback->disk_bytenr != entry->bytenr) { @@ -6442,7 +6443,8 @@ static int repair_ref(struct btrfs_trans_handle *trans, "take a btrfs-image of this file system and " "send it to a btrfs developer, ref %Lu\n", dback->disk_bytenr); - return -EINVAL; + ret = -EINVAL; + goto out; } offset += off_diff; btrfs_set_file_extent_disk_bytenr(leaf, fi, entry->bytenr); @@ -6456,7 +6458,8 @@ static int repair_ref(struct btrfs_trans_handle *trans, " take a btrfs-image of this file system and " "send it to a btrfs developer, ref %Lu\n", dback->disk_bytenr); - return -EINVAL; + ret = -EINVAL; + goto out; } offset += dback->disk_bytenr; @@ -6477,12 +6480,13 @@ static int repair_ref(struct btrfs_trans_handle *trans, else printf("ram bytes may be wrong?\n"); btrfs_mark_buffer_dirty(leaf); +out: + err = btrfs_commit_transaction(trans, root); btrfs_release_path(path); - return 0; + return ret ? ret : err; } -static int verify_backrefs(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info, struct btrfs_path *path, +static int verify_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path, struct extent_record *rec) { struct extent_backref *back; @@ -6644,7 +6648,7 @@ static int verify_backrefs(struct btrfs_trans_handle *trans, dback->disk_bytenr == best->bytenr) continue; - ret = repair_ref(trans, info, path, dback, best); + ret = repair_ref(info, path, dback, best); if (ret) goto out; } @@ -6745,15 +6749,15 @@ static int process_duplicates(struct btrfs_root *root, return good->num_duplicates ? 0 : 1; } -static int delete_duplicate_records(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int delete_duplicate_records(struct btrfs_root *root, struct extent_record *rec) { + struct btrfs_trans_handle *trans; LIST_HEAD(delete_list); struct btrfs_path *path; struct extent_record *tmp, *good, *n; int nr_del = 0; - int ret = 0; + int ret = 0, err; struct btrfs_key key; path = btrfs_alloc_path(); @@ -6791,6 +6795,12 @@ static int delete_duplicate_records(struct btrfs_trans_handle *trans, } root = root->fs_info->extent_root; + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + list_for_each_entry(tmp, &delete_list, list) { if (tmp->found_rec == 0) continue; @@ -6810,15 +6820,17 @@ static int delete_duplicate_records(struct btrfs_trans_handle *trans, if (ret) { if (ret > 0) ret = -EINVAL; - goto out; + break; } ret = btrfs_del_item(trans, root, path); if (ret) - goto out; + break; btrfs_release_path(path); nr_del++; } - + err = btrfs_commit_transaction(trans, root); + if (err && !ret) + ret = err; out: while (!list_empty(&delete_list)) { tmp = list_entry(delete_list.next, struct extent_record, list); @@ -6842,8 +6854,7 @@ out: return ret ? ret : nr_del; } -static int find_possible_backrefs(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info, +static int find_possible_backrefs(struct btrfs_fs_info *info, struct btrfs_path *path, struct cache_tree *extent_cache, struct extent_record *rec) @@ -7017,11 +7028,11 @@ out: * all of the existing entries for it and recreate them * based on what the tree scan found. */ -static int fixup_extent_refs(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info, +static int fixup_extent_refs(struct btrfs_fs_info *info, struct cache_tree *extent_cache, struct extent_record *rec) { + struct btrfs_trans_handle *trans = NULL; int ret; struct btrfs_path *path; struct list_head *cur = rec->backrefs.next; @@ -7030,21 +7041,8 @@ static int fixup_extent_refs(struct btrfs_trans_handle *trans, int allocated = 0; u64 flags = 0; - /* - * remember our flags for recreating the extent. - * FIXME, if we have cleared extent tree, we can not - * lookup extent info in extent tree. - */ - if (!init_extent_tree) { - ret = btrfs_lookup_extent_info(NULL, info->extent_root, - rec->start, rec->max_size, - rec->metadata, NULL, &flags); - if (ret < 0) - return ret; - } else { - if (rec->flag_block_full_backref) - flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; - } + if (rec->flag_block_full_backref) + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; path = btrfs_alloc_path(); if (!path) @@ -7058,17 +7056,22 @@ static int fixup_extent_refs(struct btrfs_trans_handle *trans, * them into the list if we find the backref so that * verify_backrefs can figure out what to do. */ - ret = find_possible_backrefs(trans, info, path, extent_cache, - rec); + ret = find_possible_backrefs(info, path, extent_cache, rec); if (ret < 0) goto out; } /* step one, make sure all of the backrefs agree */ - ret = verify_backrefs(trans, info, path, rec); + ret = verify_backrefs(info, path, rec); if (ret < 0) goto out; + trans = btrfs_start_transaction(info->extent_root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + /* step two, delete all the existing records */ ret = delete_extent_records(trans, info->extent_root, path, rec->start, rec->max_size); @@ -7096,6 +7099,7 @@ static int fixup_extent_refs(struct btrfs_trans_handle *trans, if (!back->found_ref) continue; + rec->bad_full_backref = 0; ret = record_extent(trans, info, path, rec, back, allocated, flags); allocated = 1; @@ -7103,10 +7107,77 @@ static int fixup_extent_refs(struct btrfs_trans_handle *trans, goto out; } out: + if (trans) { + int err = btrfs_commit_transaction(trans, info->extent_root); + if (!ret) + ret = err; + } + btrfs_free_path(path); return ret; } +static int fixup_extent_flags(struct btrfs_fs_info *fs_info, + struct extent_record *rec) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = fs_info->extent_root; + struct btrfs_path *path; + struct btrfs_extent_item *ei; + struct btrfs_key key; + u64 flags; + int ret = 0; + + key.objectid = rec->start; + if (rec->metadata) { + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = rec->info_level; + } else { + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = rec->max_size; + } + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + btrfs_free_path(path); + return PTR_ERR(trans); + } + + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret < 0) { + btrfs_free_path(path); + btrfs_commit_transaction(trans, root); + return ret; + } else if (ret) { + fprintf(stderr, "Didn't find extent for %llu\n", + (unsigned long long)rec->start); + btrfs_free_path(path); + btrfs_commit_transaction(trans, root); + return -ENOENT; + } + + ei = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_extent_item); + flags = btrfs_extent_flags(path->nodes[0], ei); + if (rec->flag_block_full_backref) { + fprintf(stderr, "setting full backref on %llu\n", + (unsigned long long)key.objectid); + flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + } else { + fprintf(stderr, "clearing full backref on %llu\n", + (unsigned long long)key.objectid); + flags &= ~BTRFS_BLOCK_FLAG_FULL_BACKREF; + } + btrfs_set_extent_flags(path->nodes[0], ei, flags); + btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_free_path(path); + return btrfs_commit_transaction(trans, root); +} + /* right now we only prune from the extent allocation tree */ static int prune_one_block(struct btrfs_trans_handle *trans, struct btrfs_fs_info *info, @@ -7176,20 +7247,27 @@ out: return ret; } -static int prune_corrupt_blocks(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *info) +static int prune_corrupt_blocks(struct btrfs_fs_info *info) { + struct btrfs_trans_handle *trans = NULL; struct cache_extent *cache; struct btrfs_corrupt_block *corrupt; - cache = search_cache_extent(info->corrupt_blocks, 0); while (1) { + cache = search_cache_extent(info->corrupt_blocks, 0); if (!cache) break; + if (!trans) { + trans = btrfs_start_transaction(info->extent_root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); + } corrupt = container_of(cache, struct btrfs_corrupt_block, cache); prune_one_block(trans, info, corrupt); - cache = next_cache_extent(cache); + remove_cache_extent(info->corrupt_blocks, cache); } + if (trans) + return btrfs_commit_transaction(trans, info->extent_root); return 0; } @@ -7219,8 +7297,7 @@ static void reset_cached_block_groups(struct btrfs_fs_info *fs_info) } } -static int check_extent_refs(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int check_extent_refs(struct btrfs_root *root, struct cache_tree *extent_cache) { struct extent_record *rec; @@ -7241,22 +7318,28 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, cache = search_cache_extent(extent_cache, 0); while(cache) { rec = container_of(cache, struct extent_record, cache); - btrfs_pin_extent(root->fs_info, - rec->start, rec->max_size); + set_extent_dirty(root->fs_info->excluded_extents, + rec->start, + rec->start + rec->max_size - 1, + GFP_NOFS); cache = next_cache_extent(cache); } /* pin down all the corrupted blocks too */ cache = search_cache_extent(root->fs_info->corrupt_blocks, 0); while(cache) { - btrfs_pin_extent(root->fs_info, - cache->start, cache->size); + set_extent_dirty(root->fs_info->excluded_extents, + cache->start, + cache->start + cache->size - 1, + GFP_NOFS); cache = next_cache_extent(cache); } - prune_corrupt_blocks(trans, root->fs_info); + prune_corrupt_blocks(root->fs_info); reset_cached_block_groups(root->fs_info); } + reset_cached_block_groups(root->fs_info); + /* * We need to delete any duplicate entries we find first otherwise we * could mess up the extent tree when we have backrefs that actually @@ -7276,7 +7359,7 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, */ if (process_duplicates(root, extent_cache, rec)) continue; - ret = delete_duplicate_records(trans, root, rec); + ret = delete_duplicate_records(root, rec); if (ret < 0) return ret; /* @@ -7292,6 +7375,8 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, return -EAGAIN; while(1) { + int cur_err = 0; + fixed = 0; recorded = 0; cache = search_cache_extent(extent_cache, 0); @@ -7302,6 +7387,7 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, fprintf(stderr, "extent item %llu has multiple extent " "items\n", (unsigned long long)rec->start); err = 1; + cur_err = 1; } if (rec->refs != rec->extent_item_refs) { @@ -7322,7 +7408,7 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, * extent, let the fallback method handle it. */ if (!fixed && repair) { - ret = fixup_extent_refs(trans, + ret = fixup_extent_refs( root->fs_info, extent_cache, rec); if (ret) @@ -7331,7 +7417,7 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, } } err = 1; - + cur_err = 1; } if (all_backpointers_checked(rec, 1)) { fprintf(stderr, "backpointer mismatch on [%llu %llu]\n", @@ -7339,12 +7425,13 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, (unsigned long long)rec->nr); if (!fixed && !recorded && repair) { - ret = fixup_extent_refs(trans, root->fs_info, + ret = fixup_extent_refs(root->fs_info, extent_cache, rec); if (ret) goto repair_abort; fixed = 1; } + cur_err = 1; err = 1; } if (!rec->owner_ref_checked) { @@ -7352,17 +7439,35 @@ static int check_extent_refs(struct btrfs_trans_handle *trans, (unsigned long long)rec->start, (unsigned long long)rec->nr); if (!fixed && !recorded && repair) { - ret = fixup_extent_refs(trans, root->fs_info, + ret = fixup_extent_refs(root->fs_info, extent_cache, rec); if (ret) goto repair_abort; fixed = 1; } err = 1; + cur_err = 1; + } + if (rec->bad_full_backref) { + fprintf(stderr, "bad full backref, on [%llu]\n", + (unsigned long long)rec->start); + if (repair) { + ret = fixup_extent_flags(root->fs_info, rec); + if (ret) + goto repair_abort; + fixed = 1; + } + err = 1; + cur_err = 1; } remove_cache_extent(extent_cache, cache); free_all_extent_backrefs(rec); + if (!init_extent_tree && repair && (!cur_err || fixed)) + clear_extent_dirty(root->fs_info->excluded_extents, + rec->start, + rec->start + rec->max_size - 1, + GFP_NOFS); free(rec); } repair_abort: @@ -7371,7 +7476,19 @@ repair_abort: fprintf(stderr, "failed to repair damaged filesystem, aborting\n"); exit(1); } else if (!ret) { + struct btrfs_trans_handle *trans; + + root = root->fs_info->extent_root; + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto repair_abort; + } + btrfs_fix_block_accounting(trans, root); + ret = btrfs_commit_transaction(trans, root); + if (ret) + goto repair_abort; } if (err) fprintf(stderr, "repaired damaged extent references\n"); @@ -7421,6 +7538,7 @@ static int check_chunk_refs(struct chunk_record *chunk_rec, u64 devid; u64 offset; u64 length; + int metadump_v2 = 0; int i; int ret = 0; @@ -7433,7 +7551,8 @@ static int check_chunk_refs(struct chunk_record *chunk_rec, cache); if (chunk_rec->length != block_group_rec->offset || chunk_rec->offset != block_group_rec->objectid || - chunk_rec->type_flags != block_group_rec->flags) { + (!metadump_v2 && + chunk_rec->type_flags != block_group_rec->flags)) { if (!silent) fprintf(stderr, "Chunk[%llu, %u, %llu]: length(%llu), offset(%llu), type(%llu) mismatch with block group[%llu, %u, %llu]: offset(%llu), objectid(%llu), flags(%llu)\n", @@ -7467,6 +7586,9 @@ static int check_chunk_refs(struct chunk_record *chunk_rec, ret = 1; } + if (metadump_v2) + return ret; + length = calc_stripe_length(chunk_rec->type_flags, chunk_rec->length, chunk_rec->num_stripes); for (i = 0; i < chunk_rec->num_stripes; ++i) { @@ -7533,7 +7655,7 @@ int check_chunks(struct cache_tree *chunk_cache, cache); err = check_chunk_refs(chunk_rec, block_group_cache, dev_extent_cache, silent); - if (err) + if (err < 0) ret = err; if (err == 0 && good) list_add_tail(&chunk_rec->list, good); @@ -7632,7 +7754,7 @@ static int check_devices(struct rb_root *dev_cache, } static int add_root_item_to_list(struct list_head *head, - u64 objectid, u64 bytenr, + u64 objectid, u64 bytenr, u64 last_snapshot, u8 level, u8 drop_level, int level_size, struct btrfs_key *drop_key) { @@ -7646,6 +7768,7 @@ static int add_root_item_to_list(struct list_head *head, ri_rec->level = level; ri_rec->level_size = level_size; ri_rec->drop_level = drop_level; + ri_rec->last_snapshot = last_snapshot; if (drop_key) memcpy(&ri_rec->drop_key, drop_key, sizeof(*drop_key)); list_add_tail(&ri_rec->list, head); @@ -7653,8 +7776,19 @@ static int add_root_item_to_list(struct list_head *head, return 0; } +static void free_root_item_list(struct list_head *list) +{ + struct root_item_record *ri_rec; + + while (!list_empty(list)) { + ri_rec = list_first_entry(list, struct root_item_record, + list); + list_del_init(&ri_rec->list); + free(ri_rec); + } +} + static int deal_root_from_list(struct list_head *list, - struct btrfs_trans_handle *trans, struct btrfs_root *root, struct block_info *bits, int bits_nr, @@ -7691,29 +7825,25 @@ static int deal_root_from_list(struct list_head *list, * one by one, otherwise we deal with node firstly which * can maximize readahead. */ - if (!init_extent_tree && !rec->drop_level) - goto skip; while (1) { - ret = run_next_block(trans, root, bits, bits_nr, &last, - pending, seen, reada, - nodes, extent_cache, - chunk_cache, dev_cache, - block_group_cache, + ret = run_next_block(root, bits, bits_nr, &last, + pending, seen, reada, nodes, + extent_cache, chunk_cache, + dev_cache, block_group_cache, dev_extent_cache, rec); if (ret != 0) break; } -skip: free_extent_buffer(buf); list_del(&rec->list); free(rec); + if (ret < 0) + break; } while (ret >= 0) { - ret = run_next_block(trans, root, bits, bits_nr, &last, - pending, seen, reada, - nodes, extent_cache, - chunk_cache, dev_cache, - block_group_cache, + ret = run_next_block(root, bits, bits_nr, &last, pending, seen, + reada, nodes, extent_cache, chunk_cache, + dev_cache, block_group_cache, dev_extent_cache, NULL); if (ret != 0) { if (ret > 0) @@ -7735,6 +7865,7 @@ static int check_chunks_and_extents(struct btrfs_root *root) struct cache_tree pending; struct cache_tree reada; struct cache_tree nodes; + struct extent_io_tree excluded_extents; struct cache_tree corrupt_blocks; struct btrfs_path path; struct btrfs_key key; @@ -7743,7 +7874,6 @@ static int check_chunks_and_extents(struct btrfs_root *root) struct block_info *bits; int bits_nr; struct extent_buffer *leaf; - struct btrfs_trans_handle *trans = NULL; int slot; struct btrfs_root_item ri; struct list_head dropping_trees; @@ -7764,15 +7894,12 @@ static int check_chunks_and_extents(struct btrfs_root *root) cache_tree_init(&nodes); cache_tree_init(&reada); cache_tree_init(&corrupt_blocks); + extent_io_tree_init(&excluded_extents); INIT_LIST_HEAD(&dropping_trees); INIT_LIST_HEAD(&normal_trees); if (repair) { - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - fprintf(stderr, "Error starting transaction\n"); - return PTR_ERR(trans); - } + root->fs_info->excluded_extents = &excluded_extents; root->fs_info->fsck_extent_cache = &extent_cache; root->fs_info->free_extent_hook = free_extent_hook; root->fs_info->corrupt_blocks = &corrupt_blocks; @@ -7789,14 +7916,14 @@ again: root1 = root->fs_info->tree_root; level = btrfs_header_level(root1->node); ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid, - root1->node->start, level, 0, + root1->node->start, 0, level, 0, btrfs_level_size(root1, level), NULL); if (ret < 0) goto out; root1 = root->fs_info->chunk_root; level = btrfs_header_level(root1->node); ret = add_root_item_to_list(&normal_trees, root1->root_key.objectid, - root1->node->start, level, 0, + root1->node->start, 0, level, 0, btrfs_level_size(root1, level), NULL); if (ret < 0) goto out; @@ -7821,15 +7948,18 @@ again: btrfs_item_key_to_cpu(leaf, &found_key, path.slots[0]); if (btrfs_key_type(&found_key) == BTRFS_ROOT_ITEM_KEY) { unsigned long offset; + u64 last_snapshot; offset = btrfs_item_ptr_offset(leaf, path.slots[0]); read_extent_buffer(leaf, &ri, offset, sizeof(ri)); + last_snapshot = btrfs_root_last_snapshot(&ri); if (btrfs_disk_key_objectid(&ri.drop_progress) == 0) { level = btrfs_root_level(&ri); level_size = btrfs_level_size(root, level); ret = add_root_item_to_list(&normal_trees, found_key.objectid, - btrfs_root_bytenr(&ri), level, + btrfs_root_bytenr(&ri), + last_snapshot, level, 0, level_size, NULL); if (ret < 0) goto out; @@ -7842,7 +7972,8 @@ again: ret = add_root_item_to_list(&dropping_trees, objectid, btrfs_root_bytenr(&ri), - level, ri.drop_level, + last_snapshot, level, + ri.drop_level, level_size, &found_key); if (ret < 0) goto out; @@ -7851,66 +7982,59 @@ again: path.slots[0]++; } btrfs_release_path(&path); - ret = deal_root_from_list(&normal_trees, trans, root, - bits, bits_nr, &pending, &seen, - &reada, &nodes, &extent_cache, + + /* + * check_block can return -EAGAIN if it fixes something, please keep + * this in mind when dealing with return values from these functions, if + * we get -EAGAIN we want to fall through and restart the loop. + */ + ret = deal_root_from_list(&normal_trees, root, bits, bits_nr, &pending, + &seen, &reada, &nodes, &extent_cache, &chunk_cache, &dev_cache, &block_group_cache, &dev_extent_cache); - if (ret < 0) + if (ret < 0) { + if (ret == -EAGAIN) + goto loop; goto out; - ret = deal_root_from_list(&dropping_trees, trans, root, - bits, bits_nr, &pending, &seen, - &reada, &nodes, &extent_cache, - &chunk_cache, &dev_cache, &block_group_cache, - &dev_extent_cache); - if (ret < 0) + } + ret = deal_root_from_list(&dropping_trees, root, bits, bits_nr, + &pending, &seen, &reada, &nodes, + &extent_cache, &chunk_cache, &dev_cache, + &block_group_cache, &dev_extent_cache); + if (ret < 0) { + if (ret == -EAGAIN) + goto loop; goto out; - if (ret >= 0) - ret = check_extent_refs(trans, root, &extent_cache); - if (ret == -EAGAIN) { - ret = btrfs_commit_transaction(trans, root); - if (ret) - goto out; - - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - goto out; - } - - free_corrupt_blocks_tree(root->fs_info->corrupt_blocks); - free_extent_cache_tree(&seen); - free_extent_cache_tree(&pending); - free_extent_cache_tree(&reada); - free_extent_cache_tree(&nodes); - free_chunk_cache_tree(&chunk_cache); - free_block_group_tree(&block_group_cache); - free_device_cache_tree(&dev_cache); - free_device_extent_tree(&dev_extent_cache); - free_extent_record_cache(root->fs_info, &extent_cache); - goto again; } err = check_chunks(&chunk_cache, &block_group_cache, &dev_extent_cache, NULL, NULL, NULL, 0); - if (err && !ret) - ret = err; + if (err) { + if (err == -EAGAIN) + goto loop; + if (!ret) + ret = err; + } + + ret = check_extent_refs(root, &extent_cache); + if (ret < 0) { + if (ret == -EAGAIN) + goto loop; + goto out; + } err = check_devices(&dev_cache, &dev_extent_cache); if (err && !ret) ret = err; out: - if (trans) { - err = btrfs_commit_transaction(trans, root); - if (!ret) - ret = err; - } if (repair) { free_corrupt_blocks_tree(root->fs_info->corrupt_blocks); + extent_io_tree_cleanup(&excluded_extents); root->fs_info->fsck_extent_cache = NULL; root->fs_info->free_extent_hook = NULL; root->fs_info->corrupt_blocks = NULL; + root->fs_info->excluded_extents = NULL; } free(bits); free_chunk_cache_tree(&chunk_cache); @@ -7922,6 +8046,21 @@ out: free_extent_cache_tree(&reada); free_extent_cache_tree(&nodes); return ret; +loop: + free_corrupt_blocks_tree(root->fs_info->corrupt_blocks); + free_extent_cache_tree(&seen); + free_extent_cache_tree(&pending); + free_extent_cache_tree(&reada); + free_extent_cache_tree(&nodes); + free_chunk_cache_tree(&chunk_cache); + free_block_group_tree(&block_group_cache); + free_device_cache_tree(&dev_cache); + free_device_extent_tree(&dev_extent_cache); + free_extent_record_cache(root->fs_info, &extent_cache); + free_root_item_list(&normal_trees); + free_root_item_list(&dropping_trees); + extent_io_tree_cleanup(&excluded_extents); + goto again; } static int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans, @@ -8856,6 +8995,8 @@ again: if (found_key.type != BTRFS_ROOT_ITEM_KEY) goto next; + if (found_key.objectid == BTRFS_TREE_RELOC_OBJECTID) + goto next; ret = maybe_repair_root_item(info, path, &found_key, trans ? 0 : 1); @@ -8878,6 +9019,8 @@ out: free_roots_info_cache(); if (path) btrfs_free_path(path); + if (trans) + btrfs_commit_transaction(trans, info->tree_root); if (ret < 0) return ret; @@ -309,6 +309,7 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) #define BTRFS_HEADER_FLAG_RELOC (1ULL << 1) #define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32) #define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33) +#define BTRFS_SUPER_FLAG_METADUMP_V2 (1ULL << 34) #define BTRFS_BACKREF_REV_MAX 256 #define BTRFS_BACKREF_REV_SHIFT 56 @@ -962,6 +963,7 @@ struct btrfs_fs_info { struct extent_io_tree pinned_extents; struct extent_io_tree pending_del; struct extent_io_tree extent_ins; + struct extent_io_tree *excluded_extents; /* logical->physical extent mapping */ struct btrfs_mapping_tree mapping_tree; @@ -763,6 +763,8 @@ struct btrfs_fs_info *btrfs_new_fs_info(int writable, u64 sb_bytenr) extent_io_tree_init(&fs_info->pinned_extents); extent_io_tree_init(&fs_info->pending_del); extent_io_tree_init(&fs_info->extent_ins); + fs_info->excluded_extents = NULL; + fs_info->fs_root_tree = RB_ROOT; cache_tree_init(&fs_info->mapping_tree.cache_tree); @@ -1004,7 +1006,8 @@ void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info) int btrfs_scan_fs_devices(int fd, const char *path, struct btrfs_fs_devices **fs_devices, - u64 sb_bytenr, int super_recover) + u64 sb_bytenr, int super_recover, + int skip_devices) { u64 total_devs; u64 dev_size; @@ -1031,7 +1034,7 @@ int btrfs_scan_fs_devices(int fd, const char *path, return ret; } - if (total_devs != 1) { + if (!skip_devices && total_devs != 1) { ret = btrfs_scan_lblkid(); if (ret) return ret; @@ -1112,7 +1115,8 @@ static struct btrfs_fs_info *__open_ctree_fd(int fp, const char *path, fs_info->on_restoring = 1; ret = btrfs_scan_fs_devices(fp, path, &fs_devices, sb_bytenr, - (flags & OPEN_CTREE_RECOVER_SUPER)); + (flags & OPEN_CTREE_RECOVER_SUPER), + (flags & OPEN_CTREE_NO_DEVICES)); if (ret) goto out; @@ -33,6 +33,7 @@ enum btrfs_open_ctree_flags { OPEN_CTREE_RESTORE = (1 << 4), OPEN_CTREE_NO_BLOCK_GROUPS = (1 << 5), OPEN_CTREE_EXCLUSIVE = (1 << 6), + OPEN_CTREE_NO_DEVICES = (1 << 7), }; static inline u64 btrfs_sb_offset(int mirror) @@ -68,7 +69,7 @@ void btrfs_release_all_roots(struct btrfs_fs_info *fs_info); void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info); int btrfs_scan_fs_devices(int fd, const char *path, struct btrfs_fs_devices **fs_devices, u64 sb_bytenr, - int super_recover); + int super_recover, int skip_devices); int btrfs_setup_chunk_tree_and_device_map(struct btrfs_fs_info *fs_info); struct btrfs_root *open_ctree(const char *filename, u64 sb_bytenr, diff --git a/extent-tree.c b/extent-tree.c index 1785e226..e8545ef6 100644 --- a/extent-tree.c +++ b/extent-tree.c @@ -1789,11 +1789,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, u64 flags) { - struct list_head *head = &info->space_info; - struct list_head *cur; struct btrfs_space_info *found; - list_for_each(cur, head) { - found = list_entry(cur, struct btrfs_space_info, list); + + flags &= BTRFS_BLOCK_GROUP_TYPE_MASK; + + list_for_each_entry(found, &info->space_info, list) { if (found->flags & flags) return found; } @@ -1825,7 +1825,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, return -ENOMEM; list_add(&found->list, &info->space_info); - found->flags = flags; + found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; found->total_bytes = total_bytes; found->bytes_used = bytes_used; found->bytes_pinned = 0; @@ -2566,6 +2566,13 @@ check_failed: goto new_group; } + if (info->excluded_extents && + test_range_bit(info->excluded_extents, ins->objectid, + ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) { + search_start = ins->objectid + num_bytes; + goto new_group; + } + if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start && ins->objectid < exclude_start + exclude_nr)) { search_start = exclude_start + exclude_nr; diff --git a/print-tree.c b/print-tree.c index 3a7c13cd..931a321a 100644 --- a/print-tree.c +++ b/print-tree.c @@ -312,6 +312,10 @@ static void extent_flags_to_str(u64 flags, char *ret) } strcat(ret, "TREE_BLOCK"); } + if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { + strcat(ret, "|"); + strcat(ret, "FULL_BACKREF"); + } } void print_extent_item(struct extent_buffer *eb, int slot, int metadata) diff --git a/super-recover.c b/super-recover.c index 197fc4bd..e2c31294 100644 --- a/super-recover.c +++ b/super-recover.c @@ -279,7 +279,7 @@ int btrfs_recover_superblocks(const char *dname, } init_recover_superblock(&recover); - ret = btrfs_scan_fs_devices(fd, dname, &recover.fs_devices, 0, 1); + ret = btrfs_scan_fs_devices(fd, dname, &recover.fs_devices, 0, 1, 0); close(fd); if (ret) { ret = 1; diff --git a/tests/fsck-tests/014-no-extent-info/default_case.img b/tests/fsck-tests/014-no-extent-info/default_case.img Binary files differnew file mode 100644 index 00000000..1ff27434 --- /dev/null +++ b/tests/fsck-tests/014-no-extent-info/default_case.img |