diff options
Diffstat (limited to 'utils.c')
-rw-r--r-- | utils.c | 999 |
1 files changed, 945 insertions, 54 deletions
@@ -58,6 +58,9 @@ static int btrfs_scan_done = 0; static char argv0_buf[ARGV0_BUF_SIZE] = "btrfs"; +static int rand_seed_initlized = 0; +static unsigned short rand_seed[3]; + const char *get_argv0_buf(void) { return argv0_buf; @@ -179,9 +182,831 @@ int test_uuid_unique(char *fs_uuid) } /* + * Reserve space from free_tree. + * The algorithm is very simple, find the first cache_extent with enough space + * and allocate from its beginning. + */ +static int reserve_free_space(struct cache_tree *free_tree, u64 len, + u64 *ret_start) +{ + struct cache_extent *cache; + int found = 0; + + BUG_ON(!ret_start); + cache = first_cache_extent(free_tree); + while (cache) { + if (cache->size > len) { + found = 1; + *ret_start = cache->start; + + cache->size -= len; + if (cache->size == 0) { + remove_cache_extent(free_tree, cache); + free(cache); + } else { + cache->start += len; + } + break; + } + cache = next_cache_extent(cache); + } + if (!found) + return -ENOSPC; + return 0; +} + +static inline int write_temp_super(int fd, struct btrfs_super_block *sb, + u64 sb_bytenr) +{ + u32 crc = ~(u32)0; + int ret; + + crc = btrfs_csum_data(NULL, (char *)sb + BTRFS_CSUM_SIZE, crc, + BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); + btrfs_csum_final(crc, (char *)&sb->csum[0]); + ret = pwrite(fd, sb, BTRFS_SUPER_INFO_SIZE, sb_bytenr); + if (ret < BTRFS_SUPER_INFO_SIZE) + ret = (ret < 0 ? -errno : -EIO); + else + ret = 0; + return ret; +} + +/* + * Setup temporary superblock at cfg->super_bynter + * Needed info are extracted from cfg, and root_bytenr, chunk_bytenr + * + * For now sys chunk array will be empty and dev_item is empty too. + * They will be re-initialized at temp chunk tree setup. + */ +static int setup_temp_super(int fd, struct btrfs_mkfs_config *cfg, + u64 root_bytenr, u64 chunk_bytenr) +{ + unsigned char chunk_uuid[BTRFS_UUID_SIZE]; + char super_buf[BTRFS_SUPER_INFO_SIZE]; + struct btrfs_super_block *super = (struct btrfs_super_block *)super_buf; + int ret; + + /* + * We rely on cfg->chunk_uuid and cfg->fs_uuid to pass uuid + * for other functions. + * Caller must allocate space for them + */ + BUG_ON(!cfg->chunk_uuid || !cfg->fs_uuid); + memset(super_buf, 0, BTRFS_SUPER_INFO_SIZE); + cfg->num_bytes = round_down(cfg->num_bytes, cfg->sectorsize); + + if (cfg->fs_uuid && *cfg->fs_uuid) { + if (uuid_parse(cfg->fs_uuid, super->fsid) != 0) { + error("cound not parse UUID: %s", cfg->fs_uuid); + ret = -EINVAL; + goto out; + } + if (!test_uuid_unique(cfg->fs_uuid)) { + error("non-unique UUID: %s", cfg->fs_uuid); + ret = -EINVAL; + goto out; + } + } else { + uuid_generate(super->fsid); + uuid_unparse(super->fsid, cfg->fs_uuid); + } + uuid_generate(chunk_uuid); + uuid_unparse(chunk_uuid, cfg->chunk_uuid); + + btrfs_set_super_bytenr(super, cfg->super_bytenr); + btrfs_set_super_num_devices(super, 1); + btrfs_set_super_magic(super, BTRFS_MAGIC); + btrfs_set_super_generation(super, 1); + btrfs_set_super_root(super, root_bytenr); + btrfs_set_super_chunk_root(super, chunk_bytenr); + btrfs_set_super_total_bytes(super, cfg->num_bytes); + /* + * Temporary filesystem will only have 6 tree roots: + * chunk tree, root tree, extent_tree, device tree, fs tree + * and csum tree. + */ + btrfs_set_super_bytes_used(super, 6 * cfg->nodesize); + btrfs_set_super_sectorsize(super, cfg->sectorsize); + btrfs_set_super_leafsize(super, cfg->nodesize); + btrfs_set_super_nodesize(super, cfg->nodesize); + btrfs_set_super_stripesize(super, cfg->stripesize); + btrfs_set_super_csum_type(super, BTRFS_CSUM_TYPE_CRC32); + btrfs_set_super_chunk_root(super, chunk_bytenr); + btrfs_set_super_cache_generation(super, -1); + btrfs_set_super_incompat_flags(super, cfg->features); + if (cfg->label) + __strncpy_null(super->label, cfg->label, BTRFS_LABEL_SIZE - 1); + + /* Sys chunk array will be re-initialized at chunk tree init time */ + super->sys_chunk_array_size = 0; + + ret = write_temp_super(fd, super, cfg->super_bytenr); +out: + return ret; +} + +/* + * Setup an extent buffer for tree block. + */ +static int setup_temp_extent_buffer(struct extent_buffer *buf, + struct btrfs_mkfs_config *cfg, + u64 bytenr, u64 owner) +{ + unsigned char fsid[BTRFS_FSID_SIZE]; + unsigned char chunk_uuid[BTRFS_UUID_SIZE]; + int ret; + + /* We rely on cfg->fs_uuid and chunk_uuid to fsid and chunk uuid */ + BUG_ON(!cfg->fs_uuid || !cfg->chunk_uuid); + ret = uuid_parse(cfg->fs_uuid, fsid); + if (ret) + return -EINVAL; + ret = uuid_parse(cfg->chunk_uuid, chunk_uuid); + if (ret) + return -EINVAL; + + memset(buf->data, 0, cfg->nodesize); + buf->len = cfg->nodesize; + btrfs_set_header_bytenr(buf, bytenr); + btrfs_set_header_generation(buf, 1); + btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV); + btrfs_set_header_owner(buf, owner); + btrfs_set_header_flags(buf, BTRFS_HEADER_FLAG_WRITTEN); + write_extent_buffer(buf, chunk_uuid, btrfs_header_chunk_tree_uuid(buf), + BTRFS_UUID_SIZE); + write_extent_buffer(buf, fsid, btrfs_header_fsid(), BTRFS_FSID_SIZE); + return 0; +} + +static inline int write_temp_extent_buffer(int fd, struct extent_buffer *buf, + u64 bytenr) +{ + int ret; + + csum_tree_block_size(buf, BTRFS_CRC32_SIZE, 0); + + /* Temporary extent buffer is always mapped 1:1 on disk */ + ret = pwrite(fd, buf->data, buf->len, bytenr); + if (ret < buf->len) + ret = (ret < 0 ? ret : -EIO); + else + ret = 0; + return ret; +} + +/* + * Insert a root item for temporary tree root + * + * Only used in make_btrfs_v2(). + */ +static void insert_temp_root_item(struct extent_buffer *buf, + struct btrfs_mkfs_config *cfg, + int *slot, u32 *itemoff, u64 objectid, + u64 bytenr) +{ + struct btrfs_root_item root_item; + struct btrfs_inode_item *inode_item; + struct btrfs_disk_key disk_key; + + btrfs_set_header_nritems(buf, *slot + 1); + (*itemoff) -= sizeof(root_item); + memset(&root_item, 0, sizeof(root_item)); + inode_item = &root_item.inode; + btrfs_set_stack_inode_generation(inode_item, 1); + btrfs_set_stack_inode_size(inode_item, 3); + btrfs_set_stack_inode_nlink(inode_item, 1); + btrfs_set_stack_inode_nbytes(inode_item, cfg->nodesize); + btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); + btrfs_set_root_refs(&root_item, 1); + btrfs_set_root_used(&root_item, cfg->nodesize); + btrfs_set_root_generation(&root_item, 1); + btrfs_set_root_bytenr(&root_item, bytenr); + + memset(&disk_key, 0, sizeof(disk_key)); + btrfs_set_disk_key_type(&disk_key, BTRFS_ROOT_ITEM_KEY); + btrfs_set_disk_key_objectid(&disk_key, objectid); + btrfs_set_disk_key_offset(&disk_key, 0); + + btrfs_set_item_key(buf, &disk_key, *slot); + btrfs_set_item_offset(buf, btrfs_item_nr(*slot), *itemoff); + btrfs_set_item_size(buf, btrfs_item_nr(*slot), sizeof(root_item)); + write_extent_buffer(buf, &root_item, + btrfs_item_ptr_offset(buf, *slot), + sizeof(root_item)); + (*slot)++; +} + +static int setup_temp_root_tree(int fd, struct btrfs_mkfs_config *cfg, + u64 root_bytenr, u64 extent_bytenr, + u64 dev_bytenr, u64 fs_bytenr, u64 csum_bytenr) +{ + struct extent_buffer *buf = NULL; + u32 itemoff = __BTRFS_LEAF_DATA_SIZE(cfg->nodesize); + int slot = 0; + int ret; + + /* + * Provided bytenr must in ascending order, or tree root will have a + * bad key order. + */ + BUG_ON(!(root_bytenr < extent_bytenr && extent_bytenr < dev_bytenr && + dev_bytenr < fs_bytenr && fs_bytenr < csum_bytenr)); + buf = malloc(sizeof(*buf) + cfg->nodesize); + if (!buf) + return -ENOMEM; + + ret = setup_temp_extent_buffer(buf, cfg, root_bytenr, + BTRFS_ROOT_TREE_OBJECTID); + if (ret < 0) + goto out; + + insert_temp_root_item(buf, cfg, &slot, &itemoff, + BTRFS_EXTENT_TREE_OBJECTID, extent_bytenr); + insert_temp_root_item(buf, cfg, &slot, &itemoff, + BTRFS_DEV_TREE_OBJECTID, dev_bytenr); + insert_temp_root_item(buf, cfg, &slot, &itemoff, + BTRFS_FS_TREE_OBJECTID, fs_bytenr); + insert_temp_root_item(buf, cfg, &slot, &itemoff, + BTRFS_CSUM_TREE_OBJECTID, csum_bytenr); + + ret = write_temp_extent_buffer(fd, buf, root_bytenr); +out: + free(buf); + return ret; +} + +static int insert_temp_dev_item(int fd, struct extent_buffer *buf, + struct btrfs_mkfs_config *cfg, + int *slot, u32 *itemoff) +{ + struct btrfs_disk_key disk_key; + struct btrfs_dev_item *dev_item; + char super_buf[BTRFS_SUPER_INFO_SIZE]; + unsigned char dev_uuid[BTRFS_UUID_SIZE]; + unsigned char fsid[BTRFS_FSID_SIZE]; + struct btrfs_super_block *super = (struct btrfs_super_block *)super_buf; + int ret; + + ret = pread(fd, super_buf, BTRFS_SUPER_INFO_SIZE, cfg->super_bytenr); + if (ret < BTRFS_SUPER_INFO_SIZE) { + ret = (ret < 0 ? -errno : -EIO); + goto out; + } + + btrfs_set_header_nritems(buf, *slot + 1); + (*itemoff) -= sizeof(*dev_item); + /* setup device item 1, 0 is for replace case */ + btrfs_set_disk_key_type(&disk_key, BTRFS_DEV_ITEM_KEY); + btrfs_set_disk_key_objectid(&disk_key, BTRFS_DEV_ITEMS_OBJECTID); + btrfs_set_disk_key_offset(&disk_key, 1); + btrfs_set_item_key(buf, &disk_key, *slot); + btrfs_set_item_offset(buf, btrfs_item_nr(*slot), *itemoff); + btrfs_set_item_size(buf, btrfs_item_nr(*slot), sizeof(*dev_item)); + + dev_item = btrfs_item_ptr(buf, *slot, struct btrfs_dev_item); + /* Generate device uuid */ + uuid_generate(dev_uuid); + write_extent_buffer(buf, dev_uuid, + (unsigned long)btrfs_device_uuid(dev_item), + BTRFS_UUID_SIZE); + uuid_parse(cfg->fs_uuid, fsid); + write_extent_buffer(buf, fsid, + (unsigned long)btrfs_device_fsid(dev_item), + BTRFS_FSID_SIZE); + btrfs_set_device_id(buf, dev_item, 1); + btrfs_set_device_generation(buf, dev_item, 0); + btrfs_set_device_total_bytes(buf, dev_item, cfg->num_bytes); + /* + * The number must match the initial SYSTEM and META chunk size + */ + btrfs_set_device_bytes_used(buf, dev_item, + BTRFS_MKFS_SYSTEM_GROUP_SIZE + + BTRFS_CONVERT_META_GROUP_SIZE); + btrfs_set_device_io_align(buf, dev_item, cfg->sectorsize); + btrfs_set_device_io_width(buf, dev_item, cfg->sectorsize); + btrfs_set_device_sector_size(buf, dev_item, cfg->sectorsize); + btrfs_set_device_type(buf, dev_item, 0); + + /* Super dev_item is not complete, copy the complete one to sb */ + read_extent_buffer(buf, &super->dev_item, (unsigned long)dev_item, + sizeof(*dev_item)); + ret = write_temp_super(fd, super, cfg->super_bytenr); + (*slot)++; +out: + return ret; +} + +static int insert_temp_chunk_item(int fd, struct extent_buffer *buf, + struct btrfs_mkfs_config *cfg, + int *slot, u32 *itemoff, u64 start, u64 len, + u64 type) +{ + struct btrfs_chunk *chunk; + struct btrfs_disk_key disk_key; + char super_buf[BTRFS_SUPER_INFO_SIZE]; + struct btrfs_super_block *sb = (struct btrfs_super_block *)super_buf; + int ret = 0; + + ret = pread(fd, super_buf, BTRFS_SUPER_INFO_SIZE, + cfg->super_bytenr); + if (ret < BTRFS_SUPER_INFO_SIZE) { + ret = (ret < 0 ? ret : -EIO); + return ret; + } + + btrfs_set_header_nritems(buf, *slot + 1); + (*itemoff) -= btrfs_chunk_item_size(1); + btrfs_set_disk_key_type(&disk_key, BTRFS_CHUNK_ITEM_KEY); + btrfs_set_disk_key_objectid(&disk_key, BTRFS_FIRST_CHUNK_TREE_OBJECTID); + btrfs_set_disk_key_offset(&disk_key, start); + btrfs_set_item_key(buf, &disk_key, *slot); + btrfs_set_item_offset(buf, btrfs_item_nr(*slot), *itemoff); + btrfs_set_item_size(buf, btrfs_item_nr(*slot), + btrfs_chunk_item_size(1)); + + chunk = btrfs_item_ptr(buf, *slot, struct btrfs_chunk); + btrfs_set_chunk_length(buf, chunk, len); + btrfs_set_chunk_owner(buf, chunk, BTRFS_EXTENT_TREE_OBJECTID); + btrfs_set_chunk_stripe_len(buf, chunk, 64 * 1024); + btrfs_set_chunk_type(buf, chunk, type); + btrfs_set_chunk_io_align(buf, chunk, cfg->sectorsize); + btrfs_set_chunk_io_width(buf, chunk, cfg->sectorsize); + btrfs_set_chunk_sector_size(buf, chunk, cfg->sectorsize); + btrfs_set_chunk_num_stripes(buf, chunk, 1); + /* TODO: Support DUP profile for system chunk */ + btrfs_set_stripe_devid_nr(buf, chunk, 0, 1); + /* We are doing 1:1 mapping, so start is its dev offset */ + btrfs_set_stripe_offset_nr(buf, chunk, 0, start); + write_extent_buffer(buf, &sb->dev_item.uuid, + (unsigned long)btrfs_stripe_dev_uuid_nr(chunk, 0), + BTRFS_UUID_SIZE); + (*slot)++; + + /* + * If it's system chunk, also copy it to super block. + */ + if (type & BTRFS_BLOCK_GROUP_SYSTEM) { + char *cur; + + cur = (char *)sb->sys_chunk_array + sb->sys_chunk_array_size; + memcpy(cur, &disk_key, sizeof(disk_key)); + cur += sizeof(disk_key); + read_extent_buffer(buf, cur, (unsigned long int)chunk, + btrfs_chunk_item_size(1)); + sb->sys_chunk_array_size += btrfs_chunk_item_size(1) + + sizeof(disk_key); + + ret = write_temp_super(fd, sb, cfg->super_bytenr); + } + return ret; +} + +static int setup_temp_chunk_tree(int fd, struct btrfs_mkfs_config *cfg, + u64 sys_chunk_start, u64 meta_chunk_start, + u64 chunk_bytenr) +{ + struct extent_buffer *buf = NULL; + u32 itemoff = __BTRFS_LEAF_DATA_SIZE(cfg->nodesize); + int slot = 0; + int ret; + + /* Must ensure SYS chunk starts before META chunk */ + BUG_ON(meta_chunk_start < sys_chunk_start); + buf = malloc(sizeof(*buf) + cfg->nodesize); + if (!buf) + return -ENOMEM; + ret = setup_temp_extent_buffer(buf, cfg, chunk_bytenr, + BTRFS_CHUNK_TREE_OBJECTID); + if (ret < 0) + goto out; + + ret = insert_temp_dev_item(fd, buf, cfg, &slot, &itemoff); + if (ret < 0) + goto out; + ret = insert_temp_chunk_item(fd, buf, cfg, &slot, &itemoff, + sys_chunk_start, + BTRFS_MKFS_SYSTEM_GROUP_SIZE, + BTRFS_BLOCK_GROUP_SYSTEM); + if (ret < 0) + goto out; + ret = insert_temp_chunk_item(fd, buf, cfg, &slot, &itemoff, + meta_chunk_start, + BTRFS_CONVERT_META_GROUP_SIZE, + BTRFS_BLOCK_GROUP_METADATA); + if (ret < 0) + goto out; + ret = write_temp_extent_buffer(fd, buf, chunk_bytenr); + +out: + free(buf); + return ret; +} + +static void insert_temp_dev_extent(struct extent_buffer *buf, + int *slot, u32 *itemoff, u64 start, u64 len) +{ + struct btrfs_dev_extent *dev_extent; + struct btrfs_disk_key disk_key; + + btrfs_set_header_nritems(buf, *slot + 1); + (*itemoff) -= sizeof(*dev_extent); + btrfs_set_disk_key_type(&disk_key, BTRFS_DEV_EXTENT_KEY); + btrfs_set_disk_key_objectid(&disk_key, 1); + btrfs_set_disk_key_offset(&disk_key, start); + btrfs_set_item_key(buf, &disk_key, *slot); + btrfs_set_item_offset(buf, btrfs_item_nr(*slot), *itemoff); + btrfs_set_item_size(buf, btrfs_item_nr(*slot), sizeof(*dev_extent)); + + dev_extent = btrfs_item_ptr(buf, *slot, struct btrfs_dev_extent); + btrfs_set_dev_extent_chunk_objectid(buf, dev_extent, + BTRFS_FIRST_CHUNK_TREE_OBJECTID); + btrfs_set_dev_extent_length(buf, dev_extent, len); + btrfs_set_dev_extent_chunk_offset(buf, dev_extent, start); + btrfs_set_dev_extent_chunk_tree(buf, dev_extent, + BTRFS_CHUNK_TREE_OBJECTID); + (*slot)++; +} + +static int setup_temp_dev_tree(int fd, struct btrfs_mkfs_config *cfg, + u64 sys_chunk_start, u64 meta_chunk_start, + u64 dev_bytenr) +{ + struct extent_buffer *buf = NULL; + u32 itemoff = __BTRFS_LEAF_DATA_SIZE(cfg->nodesize); + int slot = 0; + int ret; + + /* Must ensure SYS chunk starts before META chunk */ + BUG_ON(meta_chunk_start < sys_chunk_start); + buf = malloc(sizeof(*buf) + cfg->nodesize); + if (!buf) + return -ENOMEM; + ret = setup_temp_extent_buffer(buf, cfg, dev_bytenr, + BTRFS_DEV_TREE_OBJECTID); + if (ret < 0) + goto out; + insert_temp_dev_extent(buf, &slot, &itemoff, sys_chunk_start, + BTRFS_MKFS_SYSTEM_GROUP_SIZE); + insert_temp_dev_extent(buf, &slot, &itemoff, meta_chunk_start, + BTRFS_CONVERT_META_GROUP_SIZE); + ret = write_temp_extent_buffer(fd, buf, dev_bytenr); +out: + free(buf); + return ret; +} + +static int setup_temp_fs_tree(int fd, struct btrfs_mkfs_config *cfg, + u64 fs_bytenr) +{ + struct extent_buffer *buf = NULL; + int ret; + + buf = malloc(sizeof(*buf) + cfg->nodesize); + if (!buf) + return -ENOMEM; + ret = setup_temp_extent_buffer(buf, cfg, fs_bytenr, + BTRFS_FS_TREE_OBJECTID); + if (ret < 0) + goto out; + /* + * Temporary fs tree is completely empty. + */ + ret = write_temp_extent_buffer(fd, buf, fs_bytenr); +out: + free(buf); + return ret; +} + +static int setup_temp_csum_tree(int fd, struct btrfs_mkfs_config *cfg, + u64 csum_bytenr) +{ + struct extent_buffer *buf = NULL; + int ret; + + buf = malloc(sizeof(*buf) + cfg->nodesize); + if (!buf) + return -ENOMEM; + ret = setup_temp_extent_buffer(buf, cfg, csum_bytenr, + BTRFS_CSUM_TREE_OBJECTID); + if (ret < 0) + goto out; + /* + * Temporary csum tree is completely empty. + */ + ret = write_temp_extent_buffer(fd, buf, csum_bytenr); +out: + free(buf); + return ret; +} + +/* + * Insert one temporary extent item. + * + * NOTE: if skinny_metadata is not enabled, this function must be called + * after all other trees are initialized. + * Or fs without skinny-metadata will be screwed up. + */ +static int insert_temp_extent_item(int fd, struct extent_buffer *buf, + struct btrfs_mkfs_config *cfg, + int *slot, u32 *itemoff, u64 bytenr, + u64 ref_root) +{ + struct extent_buffer *tmp; + struct btrfs_extent_item *ei; + struct btrfs_extent_inline_ref *iref; + struct btrfs_disk_key disk_key; + struct btrfs_disk_key tree_info_key; + struct btrfs_tree_block_info *info; + int itemsize; + int skinny_metadata = cfg->features & + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA; + int ret; + + if (skinny_metadata) + itemsize = sizeof(*ei) + sizeof(*iref); + else + itemsize = sizeof(*ei) + sizeof(*iref) + + sizeof(struct btrfs_tree_block_info); + + btrfs_set_header_nritems(buf, *slot + 1); + *(itemoff) -= itemsize; + + if (skinny_metadata) { + btrfs_set_disk_key_type(&disk_key, BTRFS_METADATA_ITEM_KEY); + btrfs_set_disk_key_offset(&disk_key, 0); + } else { + btrfs_set_disk_key_type(&disk_key, BTRFS_EXTENT_ITEM_KEY); + btrfs_set_disk_key_offset(&disk_key, cfg->nodesize); + } + btrfs_set_disk_key_objectid(&disk_key, bytenr); + + btrfs_set_item_key(buf, &disk_key, *slot); + btrfs_set_item_offset(buf, btrfs_item_nr(*slot), *itemoff); + btrfs_set_item_size(buf, btrfs_item_nr(*slot), itemsize); + + ei = btrfs_item_ptr(buf, *slot, struct btrfs_extent_item); + btrfs_set_extent_refs(buf, ei, 1); + btrfs_set_extent_generation(buf, ei, 1); + btrfs_set_extent_flags(buf, ei, BTRFS_EXTENT_FLAG_TREE_BLOCK); + + if (skinny_metadata) { + iref = (struct btrfs_extent_inline_ref *)(ei + 1); + } else { + info = (struct btrfs_tree_block_info *)(ei + 1); + iref = (struct btrfs_extent_inline_ref *)(info + 1); + } + btrfs_set_extent_inline_ref_type(buf, iref, + BTRFS_TREE_BLOCK_REF_KEY); + btrfs_set_extent_inline_ref_offset(buf, iref, ref_root); + + (*slot)++; + if (skinny_metadata) + return 0; + + /* + * Lastly, check the tree block key by read the tree block + * Since we do 1:1 mapping for convert case, we can directly + * read the bytenr from disk + */ + tmp = malloc(sizeof(*tmp) + cfg->nodesize); + if (!tmp) + return -ENOMEM; + ret = setup_temp_extent_buffer(tmp, cfg, bytenr, ref_root); + if (ret < 0) + goto out; + ret = pread(fd, tmp->data, cfg->nodesize, bytenr); + if (ret < cfg->nodesize) { + ret = (ret < 0 ? -errno : -EIO); + goto out; + } + if (btrfs_header_nritems(tmp) == 0) { + btrfs_set_disk_key_type(&tree_info_key, 0); + btrfs_set_disk_key_objectid(&tree_info_key, 0); + btrfs_set_disk_key_offset(&tree_info_key, 0); + } else { + btrfs_item_key(tmp, &tree_info_key, 0); + } + btrfs_set_tree_block_key(buf, info, &tree_info_key); + +out: + free(tmp); + return ret; +} + +static void insert_temp_block_group(struct extent_buffer *buf, + struct btrfs_mkfs_config *cfg, + int *slot, u32 *itemoff, + u64 bytenr, u64 len, u64 used, u64 flag) +{ + struct btrfs_block_group_item bgi; + struct btrfs_disk_key disk_key; + + btrfs_set_header_nritems(buf, *slot + 1); + (*itemoff) -= sizeof(bgi); + btrfs_set_disk_key_type(&disk_key, BTRFS_BLOCK_GROUP_ITEM_KEY); + btrfs_set_disk_key_objectid(&disk_key, bytenr); + btrfs_set_disk_key_offset(&disk_key, len); + btrfs_set_item_key(buf, &disk_key, *slot); + btrfs_set_item_offset(buf, btrfs_item_nr(*slot), *itemoff); + btrfs_set_item_size(buf, btrfs_item_nr(*slot), sizeof(bgi)); + + btrfs_set_block_group_flags(&bgi, flag); + btrfs_set_block_group_used(&bgi, used); + btrfs_set_block_group_chunk_objectid(&bgi, + BTRFS_FIRST_CHUNK_TREE_OBJECTID); + write_extent_buffer(buf, &bgi, btrfs_item_ptr_offset(buf, *slot), + sizeof(bgi)); + (*slot)++; +} + +static int setup_temp_extent_tree(int fd, struct btrfs_mkfs_config *cfg, + u64 chunk_bytenr, u64 root_bytenr, + u64 extent_bytenr, u64 dev_bytenr, + u64 fs_bytenr, u64 csum_bytenr) +{ + struct extent_buffer *buf = NULL; + u32 itemoff = __BTRFS_LEAF_DATA_SIZE(cfg->nodesize); + int slot = 0; + int ret; + + /* + * We must ensure provided bytenr are in ascending order, + * or extent tree key order will be broken. + */ + BUG_ON(!(chunk_bytenr < root_bytenr && root_bytenr < extent_bytenr && + extent_bytenr < dev_bytenr && dev_bytenr < fs_bytenr && + fs_bytenr < csum_bytenr)); + buf = malloc(sizeof(*buf) + cfg->nodesize); + if (!buf) + return -ENOMEM; + + ret = setup_temp_extent_buffer(buf, cfg, extent_bytenr, + BTRFS_EXTENT_TREE_OBJECTID); + if (ret < 0) + goto out; + + ret = insert_temp_extent_item(fd, buf, cfg, &slot, &itemoff, + chunk_bytenr, BTRFS_CHUNK_TREE_OBJECTID); + if (ret < 0) + goto out; + + insert_temp_block_group(buf, cfg, &slot, &itemoff, chunk_bytenr, + BTRFS_MKFS_SYSTEM_GROUP_SIZE, cfg->nodesize, + BTRFS_BLOCK_GROUP_SYSTEM); + + ret = insert_temp_extent_item(fd, buf, cfg, &slot, &itemoff, + root_bytenr, BTRFS_ROOT_TREE_OBJECTID); + if (ret < 0) + goto out; + + /* 5 tree block used, root, extent, dev, fs and csum*/ + insert_temp_block_group(buf, cfg, &slot, &itemoff, root_bytenr, + BTRFS_CONVERT_META_GROUP_SIZE, cfg->nodesize * 5, + BTRFS_BLOCK_GROUP_METADATA); + + ret = insert_temp_extent_item(fd, buf, cfg, &slot, &itemoff, + extent_bytenr, BTRFS_EXTENT_TREE_OBJECTID); + if (ret < 0) + goto out; + ret = insert_temp_extent_item(fd, buf, cfg, &slot, &itemoff, + dev_bytenr, BTRFS_DEV_TREE_OBJECTID); + if (ret < 0) + goto out; + ret = insert_temp_extent_item(fd, buf, cfg, &slot, &itemoff, + fs_bytenr, BTRFS_FS_TREE_OBJECTID); + if (ret < 0) + goto out; + ret = insert_temp_extent_item(fd, buf, cfg, &slot, &itemoff, + csum_bytenr, BTRFS_CSUM_TREE_OBJECTID); + if (ret < 0) + goto out; + + ret = write_temp_extent_buffer(fd, buf, extent_bytenr); +out: + free(buf); + return ret; +} + +/* + * Improved version of make_btrfs(). + * + * This one will + * 1) Do chunk allocation to avoid used data + * And after this function, extent type matches chunk type + * 2) Better structured code + * No super long hand written codes to initialized all tree blocks + * Split into small blocks and reuse codes. + * TODO: Reuse tree operation facilities by introducing new flags + */ +static int make_convert_btrfs(int fd, struct btrfs_mkfs_config *cfg, + struct btrfs_convert_context *cctx) +{ + struct cache_tree *free = &cctx->free; + struct cache_tree *used = &cctx->used; + u64 sys_chunk_start; + u64 meta_chunk_start; + /* chunk tree bytenr, in system chunk */ + u64 chunk_bytenr; + /* metadata trees bytenr, in metadata chunk */ + u64 root_bytenr; + u64 extent_bytenr; + u64 dev_bytenr; + u64 fs_bytenr; + u64 csum_bytenr; + int ret; + + /* Shouldn't happen */ + BUG_ON(cache_tree_empty(used)); + + /* + * reserve space for temporary superblock first + * Here we allocate a little larger space, to keep later + * free space will be STRIPE_LEN aligned + */ + ret = reserve_free_space(free, BTRFS_STRIPE_LEN, + &cfg->super_bytenr); + if (ret < 0) + goto out; + + /* + * Then reserve system chunk space + * TODO: Change system group size depending on cctx->total_bytes. + * If using current 4M, it can only handle less than one TB for + * worst case and then run out of sys space. + */ + ret = reserve_free_space(free, BTRFS_MKFS_SYSTEM_GROUP_SIZE, + &sys_chunk_start); + if (ret < 0) + goto out; + ret = reserve_free_space(free, BTRFS_CONVERT_META_GROUP_SIZE, + &meta_chunk_start); + if (ret < 0) + goto out; + + /* + * Allocated meta/sys chunks will be mapped 1:1 with device offset. + * + * Inside the allocated metadata chunk, the layout will be: + * | offset | contents | + * ------------------------------------- + * | +0 | tree root | + * | +nodesize | extent root | + * | +nodesize * 2 | device root | + * | +nodesize * 3 | fs tree | + * | +nodesize * 4 | csum tree | + * ------------------------------------- + * Inside the allocated system chunk, the layout will be: + * | offset | contents | + * ------------------------------------- + * | +0 | chunk root | + * ------------------------------------- + */ + chunk_bytenr = sys_chunk_start; + root_bytenr = meta_chunk_start; + extent_bytenr = meta_chunk_start + cfg->nodesize; + dev_bytenr = meta_chunk_start + cfg->nodesize * 2; + fs_bytenr = meta_chunk_start + cfg->nodesize * 3; + csum_bytenr = meta_chunk_start + cfg->nodesize * 4; + + ret = setup_temp_super(fd, cfg, root_bytenr, chunk_bytenr); + if (ret < 0) + goto out; + + ret = setup_temp_root_tree(fd, cfg, root_bytenr, extent_bytenr, + dev_bytenr, fs_bytenr, csum_bytenr); + if (ret < 0) + goto out; + ret = setup_temp_chunk_tree(fd, cfg, sys_chunk_start, meta_chunk_start, + chunk_bytenr); + if (ret < 0) + goto out; + ret = setup_temp_dev_tree(fd, cfg, sys_chunk_start, meta_chunk_start, + dev_bytenr); + if (ret < 0) + goto out; + ret = setup_temp_fs_tree(fd, cfg, fs_bytenr); + if (ret < 0) + goto out; + ret = setup_temp_csum_tree(fd, cfg, csum_bytenr); + if (ret < 0) + goto out; + /* + * Setup extent tree last, since it may need to read tree block key + * for non-skinny metadata case. + */ + ret = setup_temp_extent_tree(fd, cfg, chunk_bytenr, root_bytenr, + extent_bytenr, dev_bytenr, fs_bytenr, + csum_bytenr); +out: + return ret; +} + +/* * @fs_uuid - if NULL, generates a UUID, returns back the new filesystem UUID */ -int make_btrfs(int fd, struct btrfs_mkfs_config *cfg) +int make_btrfs(int fd, struct btrfs_mkfs_config *cfg, + struct btrfs_convert_context *cctx) { struct btrfs_super_block super; struct extent_buffer *buf; @@ -206,6 +1031,8 @@ int make_btrfs(int fd, struct btrfs_mkfs_config *cfg) BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA); u64 num_bytes; + if (cctx) + return make_convert_btrfs(fd, cfg, cctx); buf = malloc(sizeof(*buf) + max(cfg->sectorsize, cfg->nodesize)); if (!buf) return -ENOMEM; @@ -218,13 +1045,12 @@ int make_btrfs(int fd, struct btrfs_mkfs_config *cfg) num_bytes = (cfg->num_bytes / cfg->sectorsize) * cfg->sectorsize; if (cfg->fs_uuid && *cfg->fs_uuid) { if (uuid_parse(cfg->fs_uuid, super.fsid) != 0) { - fprintf(stderr, "could not parse UUID: %s\n", - cfg->fs_uuid); + error("cannot not parse UUID: %s", cfg->fs_uuid); ret = -EINVAL; goto out; } if (!test_uuid_unique(cfg->fs_uuid)) { - fprintf(stderr, "non-unique UUID: %s\n", cfg->fs_uuid); + error("non-unique UUID: %s", cfg->fs_uuid); ret = -EBUSY; goto out; } @@ -1455,7 +2281,7 @@ int check_mounted_where(int fd, const char *file, char *where, int size, return ret; } - /* iterate over the list of currently mountes filesystems */ + /* iterate over the list of currently mounted filesystems */ if ((f = setmntent ("/proc/self/mounts", "r")) == NULL) return -errno; @@ -1713,8 +2539,8 @@ static int check_label(const char *input) int len = strlen(input); if (len > BTRFS_LABEL_SIZE - 1) { - fprintf(stderr, "ERROR: Label %s is too long (max %d)\n", - input, BTRFS_LABEL_SIZE - 1); + error("label %s is too long (max %d)", input, + BTRFS_LABEL_SIZE - 1); return -1; } @@ -1729,12 +2555,11 @@ static int set_label_unmounted(const char *dev, const char *label) ret = check_mounted(dev); if (ret < 0) { - fprintf(stderr, "FATAL: error checking %s mount status\n", dev); + error("checking mount status of %s failed: %d", dev, ret); return -1; } if (ret > 0) { - fprintf(stderr, "ERROR: dev %s is mounted, use mount point\n", - dev); + error("device %s is mounted, use mount point", dev); return -1; } @@ -1762,15 +2587,15 @@ static int set_label_mounted(const char *mount_path, const char *labelp) fd = open(mount_path, O_RDONLY | O_NOATIME); if (fd < 0) { - fprintf(stderr, "ERROR: unable to access '%s'\n", mount_path); + error("unable to access %s: %s", mount_path, strerror(errno)); return -1; } memset(label, 0, sizeof(label)); __strncpy_null(label, labelp, BTRFS_LABEL_SIZE - 1); if (ioctl(fd, BTRFS_IOC_SET_FSLABEL, label) < 0) { - fprintf(stderr, "ERROR: unable to set label %s\n", - strerror(errno)); + error("unable to set label of %s: %s", mount_path, + strerror(errno)); close(fd); return -1; } @@ -1786,7 +2611,7 @@ int get_label_unmounted(const char *dev, char *label) ret = check_mounted(dev); if (ret < 0) { - fprintf(stderr, "FATAL: error checking %s mount status\n", dev); + error("checking mount status of %s failed: %d", dev, ret); return -1; } @@ -1818,7 +2643,7 @@ int get_label_mounted(const char *mount_path, char *labelp) fd = open(mount_path, O_RDONLY | O_NOATIME); if (fd < 0) { - fprintf(stderr, "ERROR: unable to access '%s'\n", mount_path); + error("unable to access %s: %s", mount_path, strerror(errno)); return -1; } @@ -1826,7 +2651,7 @@ int get_label_mounted(const char *mount_path, char *labelp) ret = ioctl(fd, BTRFS_IOC_GET_FSLABEL, label); if (ret < 0) { if (errno != ENOTTY) - fprintf(stderr, "ERROR: unable to get label %s\n", + error("unable to get label of %s: %s", mount_path, strerror(errno)); ret = -errno; close(fd); @@ -1889,21 +2714,20 @@ u64 parse_size(char *s) u64 ret; if (!s) { - fprintf(stderr, "ERROR: Size value is empty\n"); + error("size value is empty"); exit(1); } if (s[0] == '-') { - fprintf(stderr, - "ERROR: Size value '%s' is less equal than 0\n", s); + error("size value '%s' is less equal than 0", s); exit(1); } ret = strtoull(s, &endptr, 10); if (endptr == s) { - fprintf(stderr, "ERROR: Size value '%s' is invalid\n", s); + error("size value '%s' is invalid", s); exit(1); } if (endptr[0] && endptr[1]) { - fprintf(stderr, "ERROR: Illegal suffix contains character '%c' in wrong position\n", + error("illegal suffix contains character '%c' in wrong position", endptr[1]); exit(1); } @@ -1912,8 +2736,7 @@ u64 parse_size(char *s) * need to call strtoull to get the real size */ if (errno == ERANGE && ret == ULLONG_MAX) { - fprintf(stderr, - "ERROR: Size value '%s' is too large for u64\n", s); + error("size value '%s' is too large for u64", s); exit(1); } if (endptr[0]) { @@ -1940,15 +2763,13 @@ u64 parse_size(char *s) case 'b': break; default: - fprintf(stderr, "ERROR: Unknown size descriptor '%c'\n", - c); + error("unknown size descriptor '%c'", c); exit(1); } } /* Check whether ret * mult overflow */ if (fls64(ret) + fls64(mult) - 1 > 64) { - fprintf(stderr, - "ERROR: Size value '%s' is too large for u64\n", s); + error("size value '%s' is too large for u64", s); exit(1); } ret *= mult; @@ -2000,7 +2821,7 @@ path: return id; err: - fprintf(stderr, "ERROR: invalid qgroupid or subvolume path: %s\n", p); + error("invalid qgroupid or subvolume path: %s", p); exit(-1); } @@ -2517,45 +3338,43 @@ int test_dev_for_mkfs(const char *file, int force_overwrite) ret = is_swap_device(file); if (ret < 0) { - fprintf(stderr, "ERROR: checking status of %s: %s\n", file, - strerror(-ret)); + error("checking status of %s: %s", file, strerror(-ret)); return 1; } if (ret == 1) { - fprintf(stderr, "ERROR: %s is a swap device\n", file); + error("%s is a swap device", file); return 1; } if (!force_overwrite) { if (check_overwrite(file)) { - fprintf(stderr, "Use the -f option to force overwrite.\n"); + error("use the -f option to force overwrite of %s", + file); return 1; } } ret = check_mounted(file); if (ret < 0) { - fprintf(stderr, "ERROR: checking mount status of %s: %s\n", - file, strerror(-ret)); + error("cannot check mount status of %s: %s", file, + strerror(-ret)); return 1; } if (ret == 1) { - fprintf(stderr, "ERROR: %s is mounted\n", file); + error("%s is mounted", file); return 1; } /* check if the device is busy */ fd = open(file, O_RDWR|O_EXCL); if (fd < 0) { - fprintf(stderr, "ERROR: unable to open %s: %s\n", file, - strerror(errno)); + error("unable to open %s: %s", file, strerror(errno)); return 1; } if (fstat(fd, &st)) { - fprintf(stderr, "ERROR: unable to stat %s: %s\n", file, - strerror(errno)); + error("unable to stat %s: %s", file, strerror(errno)); close(fd); return 1; } if (!S_ISBLK(st.st_mode)) { - fprintf(stderr, "ERROR: %s is not a block device\n", file); + error("%s is not a block device", file); close(fd); return 1; } @@ -2578,7 +3397,7 @@ int btrfs_scan_lblkid(void) return 0; if (blkid_get_cache(&cache, NULL) < 0) { - printf("ERROR: lblkid cache get failed\n"); + error("blkid cache get failed"); return 1; } blkid_probe_all(cache); @@ -2593,13 +3412,13 @@ int btrfs_scan_lblkid(void) fd = open(path, O_RDONLY); if (fd < 0) { - printf("ERROR: could not open %s\n", path); + error("cannot open %s: %s", path, strerror(errno)); continue; } ret = btrfs_scan_one_device(fd, path, &tmp_devices, &num_devices, BTRFS_SUPER_INFO_OFFSET, 0); if (ret) { - printf("ERROR: could not scan %s\n", path); + error("cannot scan %s: %s", path, strerror(-ret)); close (fd); continue; } @@ -2679,8 +3498,7 @@ int lookup_ino_rootid(int fd, u64 *rootid) ret = ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args); if (ret < 0) { - fprintf(stderr, "ERROR: Failed to lookup root id - %s\n", - strerror(errno)); + error("failed to lookup root id: %s", strerror(errno)); return ret; } @@ -2931,24 +3749,20 @@ int btrfs_tree_search2_ioctl_supported(int fd) int btrfs_check_nodesize(u32 nodesize, u32 sectorsize, u64 features) { if (nodesize < sectorsize) { - fprintf(stderr, - "ERROR: Illegal nodesize %u (smaller than %u)\n", - nodesize, sectorsize); + error("illegal nodesize %u (smaller than %u)", + nodesize, sectorsize); return -1; } else if (nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) { - fprintf(stderr, - "ERROR: Illegal nodesize %u (larger than %u)\n", + error("illegal nodesize %u (larger than %u)", nodesize, BTRFS_MAX_METADATA_BLOCKSIZE); return -1; } else if (nodesize & (sectorsize - 1)) { - fprintf(stderr, - "ERROR: Illegal nodesize %u (not aligned to %u)\n", + error("illegal nodesize %u (not aligned to %u)", nodesize, sectorsize); return -1; } else if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS && nodesize != sectorsize) { - fprintf(stderr, - "ERROR: Illegal nodesize %u (not equal to %u for mixed block group)\n", + error("illegal nodesize %u (not equal to %u for mixed block group)", nodesize, sectorsize); return -1; } @@ -3116,6 +3930,24 @@ void clean_args_no_options(int argc, char *argv[], const char * const *usagestr) } } +/* + * Same as clean_args_no_options but pass through arguments that could look + * like short options. Eg. reisze which takes a negative resize argument like + * '-123M' . + * + * This accepts only two forms: + * - "-- option1 option2 ..." + * - "option1 option2 ..." + */ +void clean_args_no_options_relaxed(int argc, char *argv[], const char * const *usagestr) +{ + if (argc <= 1) + return; + + if (strcmp(argv[1], "--") == 0) + optind = 2; +} + /* Subvolume helper functions */ /* * test if name is a correct subvolume name @@ -3240,3 +4072,62 @@ out: return ret; } + +void init_rand_seed(u64 seed) +{ + int i; + + /* only use the last 48 bits */ + for (i = 0; i < 3; i++) { + rand_seed[i] = (unsigned short)(seed ^ (unsigned short)(-1)); + seed >>= 16; + } + rand_seed_initlized = 1; +} + +static void __init_seed(void) +{ + struct timeval tv; + int ret; + int fd; + + if(rand_seed_initlized) + return; + /* Use urandom as primary seed source. */ + fd = open("/dev/urandom", O_RDONLY); + if (fd >= 0) { + ret = read(fd, rand_seed, sizeof(rand_seed)); + close(fd); + if (ret < sizeof(rand_seed)) + goto fallback; + } else { +fallback: + /* Use time and pid as fallback seed */ + warning("failed to read /dev/urandom, use time and pid as random seed"); + gettimeofday(&tv, 0); + rand_seed[0] = getpid() ^ (tv.tv_sec & 0xFFFF); + rand_seed[1] = getppid() ^ (tv.tv_usec & 0xFFFF); + rand_seed[2] = (tv.tv_sec ^ tv.tv_usec) >> 16; + } + rand_seed_initlized = 1; +} + +u32 rand_u32(void) +{ + __init_seed(); + /* + * Don't use nrand48, its range is [0,2^31) The highest bit will alwasy + * be 0. Use jrand48 to include the highest bit. + */ + return (u32)jrand48(rand_seed); +} + +unsigned int rand_range(unsigned int upper) +{ + __init_seed(); + /* + * Use the full 48bits to mod, which would be more uniformly + * distributed + */ + return (unsigned int)(jrand48(rand_seed) % upper); +} |