diff options
Diffstat (limited to 'disk-io.c')
-rw-r--r-- | disk-io.c | 223 |
1 files changed, 201 insertions, 22 deletions
@@ -89,8 +89,8 @@ int csum_tree_block_size(struct extent_buffer *buf, u16 csum_size, if (verify) { if (memcmp_extent_buffer(buf, result, 0, csum_size)) { - printk("checksum verify failed on %llu wanted %X " - "found %X\n", (unsigned long long)buf->start, + printk("checksum verify failed on %llu found %X " + "wanted %X\n", (unsigned long long)buf->start, *((int *)result), *((char *)buf->data)); free(result); return 1; @@ -141,7 +141,7 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, length = blocksize; ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, - bytenr, &length, &multi, 0); + bytenr, &length, &multi, 0, NULL); BUG_ON(ret); device = multi->stripes[0].dev; device->total_ios++; @@ -182,15 +182,52 @@ out: } +static int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror) +{ + unsigned long offset = 0; + struct btrfs_multi_bio *multi = NULL; + struct btrfs_device *device; + int ret = 0; + u64 read_len; + unsigned long bytes_left = eb->len; + + while (bytes_left) { + read_len = bytes_left; + ret = btrfs_map_block(&info->mapping_tree, READ, + eb->start + offset, &read_len, &multi, + mirror, NULL); + if (ret) { + printk("Couldn't map the block %Lu\n", eb->start + offset); + return -EIO; + } + device = multi->stripes[0].dev; + + if (device->fd == 0) + return -EIO; + + eb->fd = device->fd; + device->total_ios++; + eb->dev_bytenr = multi->stripes[0].physical; + kfree(multi); + + if (read_len > bytes_left) + read_len = bytes_left; + + ret = read_extent_from_disk(eb, offset, read_len); + if (ret) + return -EIO; + offset += read_len; + bytes_left -= read_len; + } + return 0; +} + struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, u64 parent_transid) { int ret; struct extent_buffer *eb; - u64 length; u64 best_transid = 0; - struct btrfs_multi_bio *multi = NULL; - struct btrfs_device *device; int mirror_num = 0; int good_mirror = 0; int num_copies; @@ -203,21 +240,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, if (btrfs_buffer_uptodate(eb, parent_transid)) return eb; - length = blocksize; while (1) { - ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, - eb->start, &length, &multi, mirror_num); - if (ret) { - printk("Couldn't map the block %Lu\n", bytenr); - break; - } - device = multi->stripes[0].dev; - eb->fd = device->fd; - device->total_ios++; - eb->dev_bytenr = multi->stripes[0].physical; - kfree(multi); - ret = read_extent_from_disk(eb); - + ret = read_whole_eb(root->fs_info, eb, mirror_num); if (ret == 0 && check_tree_block(root, eb) == 0 && csum_tree_block(root, eb, 1) == 0 && verify_parent_transid(eb->tree, eb, parent_transid, ignore) @@ -253,12 +277,156 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, return NULL; } +static int rmw_eb(struct btrfs_fs_info *info, + struct extent_buffer *eb, struct extent_buffer *orig_eb) +{ + int ret; + unsigned long orig_off = 0; + unsigned long dest_off = 0; + unsigned long copy_len = eb->len; + + ret = read_whole_eb(info, eb, 0); + if (ret) + return ret; + + if (eb->start + eb->len <= orig_eb->start || + eb->start >= orig_eb->start + orig_eb->len) + return 0; + /* + * | ----- orig_eb ------- | + * | ----- stripe ------- | + * | ----- orig_eb ------- | + * | ----- orig_eb ------- | + */ + if (eb->start > orig_eb->start) + orig_off = eb->start - orig_eb->start; + if (orig_eb->start > eb->start) + dest_off = orig_eb->start - eb->start; + + if (copy_len > orig_eb->len - orig_off) + copy_len = orig_eb->len - orig_off; + if (copy_len > eb->len - dest_off) + copy_len = eb->len - dest_off; + + memcpy(eb->data + dest_off, orig_eb->data + orig_off, copy_len); + return 0; +} + +static void split_eb_for_raid56(struct btrfs_fs_info *info, + struct extent_buffer *orig_eb, + struct extent_buffer **ebs, + u64 stripe_len, u64 *raid_map, + int num_stripes) +{ + struct extent_buffer *eb; + u64 start = orig_eb->start; + u64 this_eb_start; + int i; + int ret; + + for (i = 0; i < num_stripes; i++) { + if (raid_map[i] >= BTRFS_RAID5_P_STRIPE) + break; + + eb = malloc(sizeof(struct extent_buffer) + stripe_len); + if (!eb) + BUG(); + memset(eb, 0, sizeof(struct extent_buffer) + stripe_len); + + eb->start = raid_map[i]; + eb->len = stripe_len; + eb->refs = 1; + eb->flags = 0; + eb->fd = -1; + eb->dev_bytenr = (u64)-1; + + this_eb_start = raid_map[i]; + + if (start > this_eb_start || + start + orig_eb->len < this_eb_start + stripe_len) { + ret = rmw_eb(info, eb, orig_eb); + BUG_ON(ret); + } else { + memcpy(eb->data, orig_eb->data + eb->start - start, stripe_len); + } + ebs[i] = eb; + } +} + +static int write_raid56_with_parity(struct btrfs_fs_info *info, + struct extent_buffer *eb, + struct btrfs_multi_bio *multi, + u64 stripe_len, u64 *raid_map) +{ + struct extent_buffer *ebs[multi->num_stripes], *p_eb = NULL, *q_eb = NULL; + int i; + int j; + int ret; + int alloc_size = eb->len; + + if (stripe_len > alloc_size) + alloc_size = stripe_len; + + split_eb_for_raid56(info, eb, ebs, stripe_len, raid_map, + multi->num_stripes); + + for (i = 0; i < multi->num_stripes; i++) { + struct extent_buffer *new_eb; + if (raid_map[i] < BTRFS_RAID5_P_STRIPE) { + ebs[i]->dev_bytenr = multi->stripes[i].physical; + ebs[i]->fd = multi->stripes[i].dev->fd; + multi->stripes[i].dev->total_ios++; + BUG_ON(ebs[i]->start != raid_map[i]); + continue; + } + new_eb = kmalloc(sizeof(*eb) + alloc_size, GFP_NOFS); + BUG_ON(!new_eb); + new_eb->dev_bytenr = multi->stripes[i].physical; + new_eb->fd = multi->stripes[i].dev->fd; + multi->stripes[i].dev->total_ios++; + new_eb->len = stripe_len; + + if (raid_map[i] == BTRFS_RAID5_P_STRIPE) + p_eb = new_eb; + else if (raid_map[i] == BTRFS_RAID6_Q_STRIPE) + q_eb = new_eb; + } + if (q_eb) { + void *pointers[multi->num_stripes]; + ebs[multi->num_stripes - 2] = p_eb; + ebs[multi->num_stripes - 1] = q_eb; + + for (i = 0; i < multi->num_stripes; i++) + pointers[i] = ebs[i]->data; + + raid6_gen_syndrome(multi->num_stripes, stripe_len, pointers); + } else { + ebs[multi->num_stripes - 1] = p_eb; + memcpy(p_eb->data, ebs[0]->data, stripe_len); + for (j = 1; j < multi->num_stripes - 1; j++) { + for (i = 0; i < stripe_len; i += sizeof(unsigned long)) { + *(unsigned long *)(p_eb->data + i) ^= + *(unsigned long *)(ebs[j]->data + i); + } + } + } + + for (i = 0; i < multi->num_stripes; i++) { + ret = write_extent_to_disk(ebs[i]); + BUG_ON(ret); + if (ebs[i] != eb) + kfree(ebs[i]); + } + return 0; +} + int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *eb) { int ret; int dev_nr; u64 length; + u64 *raid_map = NULL; struct btrfs_multi_bio *multi = NULL; if (check_tree_block(root, eb)) @@ -272,9 +440,13 @@ int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, dev_nr = 0; length = eb->len; ret = btrfs_map_block(&root->fs_info->mapping_tree, WRITE, - eb->start, &length, &multi, 0); + eb->start, &length, &multi, 0, &raid_map); - while(dev_nr < multi->num_stripes) { + if (raid_map) { + ret = write_raid56_with_parity(root->fs_info, eb, multi, + length, raid_map); + BUG_ON(ret); + } else while (dev_nr < multi->num_stripes) { BUG_ON(ret); eb->fd = multi->stripes[dev_nr].dev->fd; eb->dev_bytenr = multi->stripes[dev_nr].physical; @@ -644,6 +816,9 @@ static struct btrfs_fs_info *__open_ctree_fd(int fp, const char *path, if (sb_bytenr == 0) sb_bytenr = BTRFS_SUPER_INFO_OFFSET; + /* try to drop all the caches */ + posix_fadvise(fp, 0, 0, POSIX_FADV_DONTNEED); + ret = btrfs_scan_one_device(fp, path, &fs_devices, &total_devs, sb_bytenr); @@ -1100,6 +1275,10 @@ static int close_all_devices(struct btrfs_fs_info *fs_info) list = &fs_info->fs_devices->devices; list_for_each(next, list) { device = list_entry(next, struct btrfs_device, dev_list); + if (device->fd) { + fsync(device->fd); + posix_fadvise(device->fd, 0, 0, POSIX_FADV_DONTNEED); + } close(device->fd); } return 0; |