summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cmds-chunk.c289
1 files changed, 279 insertions, 10 deletions
diff --git a/cmds-chunk.c b/cmds-chunk.c
index 7b740a34..03314deb 100644
--- a/cmds-chunk.c
+++ b/cmds-chunk.c
@@ -43,6 +43,7 @@
#define BTRFS_CHUNK_TREE_REBUILD_ABORTED -7500
#define BTRFS_STRIPE_LEN (64 * 1024)
+#define BTRFS_NUM_MIRRORS 2
struct recover_control {
int verbose;
@@ -59,11 +60,102 @@ struct recover_control {
struct cache_tree chunk;
struct block_group_tree bg;
struct device_extent_tree devext;
+ struct cache_tree eb_cache;
struct list_head good_chunks;
struct list_head bad_chunks;
+ struct list_head unrepaired_chunks;
};
+struct extent_record {
+ struct cache_extent cache;
+ u64 generation;
+ u8 csum[BTRFS_CSUM_SIZE];
+ struct btrfs_device *devices[BTRFS_NUM_MIRRORS];
+ u64 offsets[BTRFS_NUM_MIRRORS];
+ int nmirrors;
+};
+
+static struct extent_record *btrfs_new_extent_record(struct extent_buffer *eb)
+{
+ struct extent_record *rec;
+
+ rec = malloc(sizeof(*rec));
+ if (!rec) {
+ fprintf(stderr, "Fail to allocate memory for extent record.\n");
+ exit(1);
+ }
+
+ memset(rec, 0, sizeof(*rec));
+ rec->cache.start = btrfs_header_bytenr(eb);
+ rec->cache.size = eb->len;
+ rec->generation = btrfs_header_generation(eb);
+ read_extent_buffer(eb, rec->csum, (unsigned long)btrfs_header_csum(eb),
+ BTRFS_CSUM_SIZE);
+ return rec;
+}
+
+static int process_extent_buffer(struct cache_tree *eb_cache,
+ struct extent_buffer *eb,
+ struct btrfs_device *device, u64 offset)
+{
+ struct extent_record *rec;
+ struct extent_record *exist;
+ struct cache_extent *cache;
+ int ret = 0;
+
+ rec = btrfs_new_extent_record(eb);
+ if (!rec->cache.size)
+ goto free_out;
+again:
+ cache = lookup_cache_extent(eb_cache,
+ rec->cache.start,
+ rec->cache.size);
+ if (cache) {
+ exist = container_of(cache, struct extent_record, cache);
+
+ if (exist->generation > rec->generation)
+ goto free_out;
+ if (exist->generation == rec->generation) {
+ if (exist->cache.start != rec->cache.start ||
+ exist->cache.size != rec->cache.size ||
+ memcmp(exist->csum, rec->csum, BTRFS_CSUM_SIZE)) {
+ ret = -EEXIST;
+ } else {
+ BUG_ON(exist->nmirrors >= BTRFS_NUM_MIRRORS);
+ exist->devices[exist->nmirrors] = device;
+ exist->offsets[exist->nmirrors] = offset;
+ exist->nmirrors++;
+ }
+ goto free_out;
+ }
+ remove_cache_extent(eb_cache, cache);
+ free(exist);
+ goto again;
+ }
+
+ rec->devices[0] = device;
+ rec->offsets[0] = offset;
+ rec->nmirrors++;
+ ret = insert_cache_extent(eb_cache, &rec->cache);
+ BUG_ON(ret);
+out:
+ return ret;
+free_out:
+ free(rec);
+ goto out;
+}
+
+static void free_extent_record(struct cache_extent *cache)
+{
+ struct extent_record *er;
+
+ er = container_of(cache, struct extent_record, cache);
+ free(er);
+}
+
+FREE_EXTENT_CACHE_BASED_TREE(extent_record, free_extent_record);
+
static struct btrfs_chunk *create_chunk_item(struct chunk_record *record)
{
struct btrfs_chunk *ret;
@@ -100,11 +192,13 @@ void init_recover_control(struct recover_control *rc, int verbose, int yes)
{
memset(rc, 0, sizeof(struct recover_control));
cache_tree_init(&rc->chunk);
+ cache_tree_init(&rc->eb_cache);
block_group_tree_init(&rc->bg);
device_extent_tree_init(&rc->devext);
INIT_LIST_HEAD(&rc->good_chunks);
INIT_LIST_HEAD(&rc->bad_chunks);
+ INIT_LIST_HEAD(&rc->unrepaired_chunks);
rc->verbose = verbose;
rc->yes = yes;
@@ -115,6 +209,7 @@ void free_recover_control(struct recover_control *rc)
free_block_group_tree(&rc->bg);
free_chunk_cache_tree(&rc->chunk);
free_device_extent_tree(&rc->devext);
+ free_extent_record_tree(&rc->eb_cache);
}
static int process_block_group_item(struct block_group_tree *bg_cache,
@@ -554,11 +649,12 @@ static int check_all_chunks_by_metadata(struct recover_control *rc,
struct btrfs_root *root)
{
struct chunk_record *chunk;
+ struct chunk_record *next;
LIST_HEAD(orphan_chunks);
int ret = 0;
int err;
- list_for_each_entry(chunk, &rc->good_chunks, list) {
+ list_for_each_entry_safe(chunk, next, &rc->good_chunks, list) {
err = check_chunk_by_metadata(rc, root, chunk, 0);
if (err) {
if (err == -ENOENT)
@@ -568,6 +664,14 @@ static int check_all_chunks_by_metadata(struct recover_control *rc,
}
}
+ list_for_each_entry_safe(chunk, next, &rc->unrepaired_chunks, list) {
+ err = check_chunk_by_metadata(rc, root, chunk, 1);
+ if (err == -ENOENT)
+ list_move_tail(&chunk->list, &orphan_chunks);
+ else if (err && !ret)
+ ret = err;
+ }
+
list_for_each_entry(chunk, &rc->bad_chunks, list) {
err = check_chunk_by_metadata(rc, root, chunk, 1);
if (err != -ENOENT && !ret)
@@ -617,7 +721,8 @@ static inline int is_super_block_address(u64 offset)
return 0;
}
-static int scan_one_device(struct recover_control *rc, int fd)
+static int scan_one_device(struct recover_control *rc, int fd,
+ struct btrfs_device *device)
{
struct extent_buffer *buf;
u64 bytenr;
@@ -649,6 +754,10 @@ static int scan_one_device(struct recover_control *rc, int fd)
continue;
}
+ ret = process_extent_buffer(&rc->eb_cache, buf, device, bytenr);
+ if (ret)
+ goto out;
+
if (btrfs_header_level(buf) != 0)
goto next_node;
@@ -692,7 +801,7 @@ static int scan_devices(struct recover_control *rc)
dev->name);
return -1;
}
- ret = scan_one_device(rc, fd);
+ ret = scan_one_device(rc, fd, dev);
close(fd);
if (ret)
return ret;
@@ -1299,7 +1408,7 @@ static int btrfs_verify_device_extents(struct block_group_record *bg,
int expected_num_stripes;
expected_num_stripes = calc_num_stripes(bg->flags);
- if (!expected_num_stripes && expected_num_stripes != ndevexts)
+ if (expected_num_stripes && expected_num_stripes != ndevexts)
return 1;
strpie_length = calc_stripe_length(bg->flags, bg->offset, ndevexts);
@@ -1337,16 +1446,174 @@ static int btrfs_rebuild_unordered_chunk_stripes(struct recover_control *rc,
return 0;
}
+static int btrfs_calc_stripe_index(struct chunk_record *chunk, u64 logical)
+{
+ u64 offset = logical - chunk->offset;
+ int stripe_nr;
+ int nr_data_stripes;
+ int index;
+
+ stripe_nr = offset / chunk->stripe_len;
+ if (chunk->type_flags & BTRFS_BLOCK_GROUP_RAID0) {
+ index = stripe_nr % chunk->num_stripes;
+ } else if (chunk->type_flags & BTRFS_BLOCK_GROUP_RAID10) {
+ index = stripe_nr % (chunk->num_stripes / chunk->sub_stripes);
+ index *= chunk->sub_stripes;
+ } else if (chunk->type_flags & BTRFS_BLOCK_GROUP_RAID5) {
+ nr_data_stripes = chunk->num_stripes - 1;
+ index = stripe_nr % nr_data_stripes;
+ stripe_nr /= nr_data_stripes;
+ index = (index + stripe_nr) % chunk->num_stripes;
+ } else if (chunk->type_flags & BTRFS_BLOCK_GROUP_RAID6) {
+ nr_data_stripes = chunk->num_stripes - 2;
+ index = stripe_nr % nr_data_stripes;
+ stripe_nr /= nr_data_stripes;
+ index = (index + stripe_nr) % chunk->num_stripes;
+ } else {
+ BUG_ON(1);
+ }
+ return index;
+}
+
+/* calc the logical offset which is the start of the next stripe. */
+static inline u64 btrfs_next_stripe_logical_offset(struct chunk_record *chunk,
+ u64 logical)
+{
+ u64 offset = logical - chunk->offset;
+
+ offset /= chunk->stripe_len;
+ offset *= chunk->stripe_len;
+ offset += chunk->stripe_len;
+
+ return offset + chunk->offset;
+}
+
+static int is_extent_record_in_device_extent(struct extent_record *er,
+ struct device_extent_record *dext,
+ int *mirror)
+{
+ int i;
+
+ for (i = 0; i < er->nmirrors; i++) {
+ if (er->devices[i]->devid == dext->objectid &&
+ er->offsets[i] >= dext->offset &&
+ er->offsets[i] < dext->offset + dext->length) {
+ *mirror = i;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static int
+btrfs_rebuild_ordered_meta_chunk_stripes(struct recover_control *rc,
+ struct chunk_record *chunk)
+{
+ u64 start = chunk->offset;
+ u64 end = chunk->offset + chunk->length;
+ struct cache_extent *cache;
+ struct extent_record *er;
+ struct device_extent_record *devext;
+ struct device_extent_record *next;
+ struct btrfs_device *device;
+ LIST_HEAD(devexts);
+ int index;
+ int mirror;
+ int ret;
+
+ cache = lookup_cache_extent(&rc->eb_cache,
+ start, chunk->length);
+ if (!cache) {
+ /* No used space, we can reorder the stripes freely. */
+ ret = btrfs_rebuild_unordered_chunk_stripes(rc, chunk);
+ return ret;
+ }
+
+ list_splice_init(&chunk->dextents, &devexts);
+again:
+ er = container_of(cache, struct extent_record, cache);
+ index = btrfs_calc_stripe_index(chunk, er->cache.start);
+ if (chunk->stripes[index].devid)
+ goto next;
+ list_for_each_entry_safe(devext, next, &devexts, chunk_list) {
+ if (is_extent_record_in_device_extent(er, devext, &mirror)) {
+ chunk->stripes[index].devid = devext->objectid;
+ chunk->stripes[index].offset = devext->offset;
+ memcpy(chunk->stripes[index].dev_uuid,
+ er->devices[mirror]->uuid,
+ BTRFS_UUID_SIZE);
+ index++;
+ list_move(&devext->chunk_list, &chunk->dextents);
+ }
+ }
+next:
+ start = btrfs_next_stripe_logical_offset(chunk, er->cache.start);
+ if (start >= end)
+ goto no_extent_record;
+
+ cache = lookup_cache_extent(&rc->eb_cache, start, end - start);
+ if (cache)
+ goto again;
+no_extent_record:
+ if (list_empty(&devexts))
+ return 0;
+
+ if (chunk->type_flags & (BTRFS_BLOCK_GROUP_RAID5 |
+ BTRFS_BLOCK_GROUP_RAID6)) {
+ /* Fixme: try to recover the order by the parity block. */
+ list_splice_tail(&devexts, &chunk->dextents);
+ return -EINVAL;
+ }
+
+ /* There is no data on the lost stripes, we can reorder them freely. */
+ for (index = 0; index < chunk->num_stripes; index++) {
+ if (chunk->stripes[index].devid)
+ continue;
+
+ devext = list_first_entry(&devexts,
+ struct device_extent_record,
+ chunk_list);
+ list_move(&devext->chunk_list, &chunk->dextents);
+
+ chunk->stripes[index].devid = devext->objectid;
+ chunk->stripes[index].offset = devext->offset;
+ device = btrfs_find_device_by_devid(rc->fs_devices,
+ devext->objectid,
+ 0);
+ if (!device) {
+ list_splice_tail(&devexts, &chunk->dextents);
+ return -EINVAL;
+ }
+ BUG_ON(btrfs_find_device_by_devid(rc->fs_devices,
+ devext->objectid,
+ 1));
+ memcpy(chunk->stripes[index].dev_uuid, device->uuid,
+ BTRFS_UUID_SIZE);
+ }
+ return 0;
+}
+
+#define BTRFS_ORDERED_RAID (BTRFS_BLOCK_GROUP_RAID0 | \
+ BTRFS_BLOCK_GROUP_RAID10 | \
+ BTRFS_BLOCK_GROUP_RAID5 | \
+ BTRFS_BLOCK_GROUP_RAID6)
+
static int btrfs_rebuild_chunk_stripes(struct recover_control *rc,
struct chunk_record *chunk)
{
int ret;
- if (chunk->type_flags & (BTRFS_BLOCK_GROUP_RAID10 |
- BTRFS_BLOCK_GROUP_RAID0 |
- BTRFS_BLOCK_GROUP_RAID5 |
- BTRFS_BLOCK_GROUP_RAID6))
- BUG_ON(1); /* Fixme: implement in the next patch */
+ /*
+ * All the data in the system metadata chunk will be dropped,
+ * so we need not guarantee that the data is right or not, that
+ * is we can reorder the stripes in the system metadata chunk.
+ */
+ if ((chunk->type_flags & BTRFS_BLOCK_GROUP_METADATA) &&
+ (chunk->type_flags & BTRFS_ORDERED_RAID))
+ ret =btrfs_rebuild_ordered_meta_chunk_stripes(rc, chunk);
+ else if ((chunk->type_flags & BTRFS_BLOCK_GROUP_DATA) &&
+ (chunk->type_flags & BTRFS_ORDERED_RAID))
+ ret = 1; /* Be handled after the fs is opened. */
else
ret = btrfs_rebuild_unordered_chunk_stripes(rc, chunk);
@@ -1407,7 +1674,9 @@ static int btrfs_recover_chunks(struct recover_control *rc)
chunk->num_stripes = nstripes;
ret = btrfs_rebuild_chunk_stripes(rc, chunk);
- if (ret)
+ if (ret > 0)
+ list_add_tail(&chunk->list, &rc->unrepaired_chunks);
+ else if (ret < 0)
list_add_tail(&chunk->list, &rc->bad_chunks);
else
list_add_tail(&chunk->list, &rc->good_chunks);