From 510be296772108b6d5ae829d76cef2b1a1690262 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 24 Mar 2008 15:03:18 -0400 Subject: Add support for multiple devices per filesystem --- ctree.h | 321 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 290 insertions(+), 31 deletions(-) (limited to 'ctree.h') diff --git a/ctree.h b/ctree.h index 113d38f5..c8585147 100644 --- a/ctree.h +++ b/ctree.h @@ -30,12 +30,44 @@ struct btrfs_trans_handle; #define BTRFS_MAGIC "_B4RfS_M" #define BTRFS_MAX_LEVEL 8 + +/* holds pointers to all of the tree roots */ #define BTRFS_ROOT_TREE_OBJECTID 1ULL + +/* stores information about which extents are in use, and reference counts */ #define BTRFS_EXTENT_TREE_OBJECTID 2ULL + +/* one per subvolume, storing files and directories */ #define BTRFS_FS_TREE_OBJECTID 3ULL + +/* directory objectid inside the root tree */ #define BTRFS_ROOT_TREE_DIR_OBJECTID 4ULL + + +/* + * chunk tree stores translations from logical -> physical block numbering + * the super block points to the chunk tree + */ +#define BTRFS_CHUNK_TREE_OBJECTID 5ULL + +/* + * stores information about which areas of a given device are in use. + * one per device. The tree of tree roots points to the device tree + */ +#define BTRFS_DEV_TREE_OBJECTID 6ULL + +/* + * All files have objectids higher than this. + */ #define BTRFS_FIRST_FREE_OBJECTID 256ULL + +/* + * the device items go into the chunk tree. The key is in the form + * [ 1 BTRFS_DEV_ITEM_KEY device_id ] + */ +#define BTRFS_DEV_ITEMS_OBJECTID 1ULL + /* * we can actually store much bigger names, but lets not confuse the rest * of linux @@ -85,6 +117,81 @@ struct btrfs_key { u64 offset; } __attribute__ ((__packed__)); +struct btrfs_mapping_tree { + struct cache_tree cache_tree; +}; + +#define BTRFS_DEV_UUID_SIZE 16 +struct btrfs_dev_item { + /* the internal btrfs device id */ + __le64 devid; + + /* size of the device */ + __le64 total_bytes; + + /* bytes used */ + __le64 bytes_used; + + /* optimal io alignment for this device */ + __le32 io_align; + + /* optimal io width for this device */ + __le32 io_width; + + /* minimal io size for this device */ + __le32 sector_size; + + /* the kernel device number */ + __le64 rdev; + + /* type and info about this device */ + __le64 type; + + /* partition number, 0 for whole dev */ + __le32 partition; + + /* length of the name data at the end of the item */ + __le16 name_len; + + /* physical drive uuid (or lvm uuid) */ + u8 uuid[BTRFS_DEV_UUID_SIZE]; + /* name goes here */ +} __attribute__ ((__packed__)); + +struct btrfs_stripe { + __le64 devid; + __le64 offset; +} __attribute__ ((__packed__)); + +struct btrfs_chunk { + __le64 owner; + __le64 stripe_len; + __le64 type; + + /* optimal io alignment for this chunk */ + __le32 io_align; + + /* optimal io width for this chunk */ + __le32 io_width; + + /* minimal io size for this chunk */ + __le32 sector_size; + + /* 2^16 stripes is quite a lot, a second limit is the size of a single + * item in the btree + */ + __le16 num_stripes; + struct btrfs_stripe stripe; + /* additional stripes go here */ +} __attribute__ ((__packed__)); + +static inline unsigned long btrfs_chunk_item_size(int num_stripes) +{ + BUG_ON(num_stripes == 0); + return sizeof(struct btrfs_chunk) + + sizeof(struct btrfs_stripe) * (num_stripes - 1); +} + #define BTRFS_FSID_SIZE 16 /* * every tree block (leaf or node) starts with this header. @@ -108,6 +215,13 @@ struct btrfs_header { #define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ sizeof(struct btrfs_item) - \ sizeof(struct btrfs_file_extent_item)) + +/* + * this is a very generous portion of the super block, giving us + * room to translate 14 chunks with 3 stripes each. + */ +#define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048 + /* * the super block basically lists the main trees of the FS * it currently lacks any block count etc etc @@ -120,6 +234,7 @@ struct btrfs_super_block { __le64 magic; __le64 generation; __le64 root; + __le64 chunk_root; __le64 total_bytes; __le64 bytes_used; __le64 root_dir_objectid; @@ -127,7 +242,10 @@ struct btrfs_super_block { __le32 nodesize; __le32 leafsize; __le32 stripesize; + __le32 sys_chunk_array_size; u8 root_level; + u8 chunk_root_level; + u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; } __attribute__ ((__packed__)); /* @@ -197,12 +315,22 @@ struct btrfs_extent_ref { __le64 offset; } __attribute__ ((__packed__)); +/* dev extents record free space on individual devices. The owner + * field points back to the chunk allocation mapping tree that allocated + * the extent + */ +struct btrfs_dev_extent { + __le64 owner; + __le64 length; +} __attribute__ ((__packed__)); + + struct btrfs_inode_ref { __le16 name_len; /* name goes here */ } __attribute__ ((__packed__)); -struct btrfs_inode_timespec { +struct btrfs_timespec { __le64 sec; __le32 nsec; } __attribute__ ((__packed__)); @@ -220,13 +348,13 @@ struct btrfs_inode_item { __le32 uid; __le32 gid; __le32 mode; - __le32 rdev; + __le64 rdev; __le16 flags; __le16 compat_flags; - struct btrfs_inode_timespec atime; - struct btrfs_inode_timespec ctime; - struct btrfs_inode_timespec mtime; - struct btrfs_inode_timespec otime; + struct btrfs_timespec atime; + struct btrfs_timespec ctime; + struct btrfs_timespec mtime; + struct btrfs_timespec otime; } __attribute__ ((__packed__)); struct btrfs_dir_item { @@ -280,24 +408,26 @@ struct btrfs_csum_item { } __attribute__ ((__packed__)); /* tag for the radix tree of block groups in ram */ -#define BTRFS_BLOCK_GROUP_SIZE (256 * 1024 * 1024) - -#define BTRFS_BLOCK_GROUP_DATA 1 -#define BTRFS_BLOCK_GROUP_MIXED 2 +#define BTRFS_BLOCK_GROUP_DATA (1 << 0) +#define BTRFS_BLOCK_GROUP_SYSTEM (1 << 1) +#define BTRFS_BLOCK_GROUP_METADATA (1 << 2) struct btrfs_block_group_item { __le64 used; - u8 flags; + __le64 chunk_tree; + __le64 chunk_objectid; + __le64 flags; } __attribute__ ((__packed__)); struct btrfs_block_group_cache { struct cache_extent cache; struct btrfs_key key; struct btrfs_block_group_item item; - int data; - int cached; u64 pinned; + u64 flags; + int cached; }; + struct btrfs_extent_ops { int (*alloc_extent)(struct btrfs_root *root, u64 num_bytes, u64 hint_byte, struct btrfs_key *ins); @@ -305,11 +435,14 @@ struct btrfs_extent_ops { u64 num_bytes); }; +struct btrfs_device; struct btrfs_fs_info { u8 fsid[BTRFS_FSID_SIZE]; struct btrfs_root *fs_root; struct btrfs_root *extent_root; struct btrfs_root *tree_root; + struct btrfs_root *chunk_root; + struct btrfs_root *dev_root; struct extent_io_tree extent_cache; struct extent_io_tree free_space_cache; @@ -318,18 +451,27 @@ struct btrfs_fs_info { struct extent_io_tree pending_del; struct extent_io_tree extent_ins; + /* logical->physical extent mapping */ + struct btrfs_mapping_tree mapping_tree; + u64 generation; u64 last_trans_committed; struct btrfs_trans_handle *running_transaction; struct btrfs_super_block super_copy; struct extent_buffer *sb_buffer; struct mutex fs_mutex; - int fp; u64 total_pinned; struct btrfs_extent_ops *extent_ops; + struct list_head dirty_cowonly_roots; + + struct list_head devices; + struct list_head *last_device; + int fp; + int force_system_allocs; void *priv_data; }; + /* * in ram representation of the tree. extent_root is used for all allocations * and for the extent tree extent_root root. @@ -356,10 +498,15 @@ struct btrfs_root { u32 stripesize; int ref_cows; + int track_dirty; + u32 type; u64 highest_inode; u64 last_inode_alloc; + + /* the dirty list is only used by non-reference counted roots */ + struct list_head dirty_list; }; /* @@ -408,6 +555,10 @@ struct btrfs_root { */ #define BTRFS_BLOCK_GROUP_ITEM_KEY 50 +#define BTRFS_DEV_EXTENT_KEY 75 +#define BTRFS_DEV_ITEM_KEY 76 +#define BTRFS_CHUNK_ITEM_KEY 77 + /* * string items are for debugging. They just store a short string of * data in the FS @@ -471,11 +622,104 @@ static inline void btrfs_set_##name(type *s, u##bits val) \ s->member = cpu_to_le##bits(val); \ } +BTRFS_SETGET_FUNCS(device_type, struct btrfs_dev_item, type, 64); +BTRFS_SETGET_FUNCS(device_total_bytes, struct btrfs_dev_item, total_bytes, 64); +BTRFS_SETGET_FUNCS(device_bytes_used, struct btrfs_dev_item, bytes_used, 64); +BTRFS_SETGET_FUNCS(device_io_align, struct btrfs_dev_item, io_align, 32); +BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32); +BTRFS_SETGET_FUNCS(device_sector_size, struct btrfs_dev_item, sector_size, 32); +BTRFS_SETGET_FUNCS(device_id, struct btrfs_dev_item, devid, 64); +BTRFS_SETGET_FUNCS(device_rdev, struct btrfs_dev_item, rdev, 64); +BTRFS_SETGET_FUNCS(device_partition, struct btrfs_dev_item, partition, 32); +BTRFS_SETGET_FUNCS(device_name_len, struct btrfs_dev_item, name_len, 16); + +static inline char *btrfs_device_uuid(struct btrfs_dev_item *d) +{ + return (char *)d + offsetof(struct btrfs_dev_item, uuid); +} + +static inline char *btrfs_device_name(struct btrfs_dev_item *d) +{ + return (char *)(d + 1); +} + +BTRFS_SETGET_FUNCS(chunk_owner, struct btrfs_chunk, owner, 64); +BTRFS_SETGET_FUNCS(chunk_stripe_len, struct btrfs_chunk, stripe_len, 64); +BTRFS_SETGET_FUNCS(chunk_io_align, struct btrfs_chunk, io_align, 32); +BTRFS_SETGET_FUNCS(chunk_io_width, struct btrfs_chunk, io_width, 32); +BTRFS_SETGET_FUNCS(chunk_sector_size, struct btrfs_chunk, sector_size, 32); +BTRFS_SETGET_FUNCS(chunk_type, struct btrfs_chunk, type, 64); +BTRFS_SETGET_FUNCS(chunk_num_stripes, struct btrfs_chunk, num_stripes, 16); +BTRFS_SETGET_FUNCS(stripe_devid, struct btrfs_stripe, devid, 64); +BTRFS_SETGET_FUNCS(stripe_offset, struct btrfs_stripe, offset, 64); + +BTRFS_SETGET_STACK_FUNCS(stack_chunk_owner, struct btrfs_chunk, owner, 64); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_stripe_len, struct btrfs_chunk, + stripe_len, 64); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_align, struct btrfs_chunk, + io_align, 32); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_width, struct btrfs_chunk, + io_width, 32); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_sector_size, struct btrfs_chunk, + sector_size, 32); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_type, struct btrfs_chunk, type, 64); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_num_stripes, struct btrfs_chunk, + num_stripes, 16); +BTRFS_SETGET_STACK_FUNCS(stack_stripe_devid, struct btrfs_stripe, devid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_stripe_offset, struct btrfs_stripe, offset, 64); + +static inline struct btrfs_stripe *btrfs_stripe_nr(struct btrfs_chunk *c, + int nr) +{ + unsigned long offset = (unsigned long)c; + offset += offsetof(struct btrfs_chunk, stripe); + offset += nr * sizeof(struct btrfs_stripe); + return (struct btrfs_stripe *)offset; +} + +static inline u64 btrfs_stripe_offset_nr(struct extent_buffer *eb, + struct btrfs_chunk *c, int nr) +{ + return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr)); +} + +static inline void btrfs_set_stripe_offset_nr(struct extent_buffer *eb, + struct btrfs_chunk *c, int nr, + u64 val) +{ + btrfs_set_stripe_offset(eb, btrfs_stripe_nr(c, nr), val); +} + +static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb, + struct btrfs_chunk *c, int nr) +{ + return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr)); +} + +static inline void btrfs_set_stripe_devid_nr(struct extent_buffer *eb, + struct btrfs_chunk *c, int nr, + u64 val) +{ + btrfs_set_stripe_devid(eb, btrfs_stripe_nr(c, nr), val); +} + /* struct btrfs_block_group_item */ BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item, used, 64); BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item, used, 64); +BTRFS_SETGET_STACK_FUNCS(block_group_chunk_tree, struct btrfs_block_group_item, + chunk_tree, 64); +BTRFS_SETGET_FUNCS(disk_block_group_chunk_tree, struct btrfs_block_group_item, + chunk_tree, 64); +BTRFS_SETGET_STACK_FUNCS(block_group_chunk_objectid, + struct btrfs_block_group_item, chunk_objectid, 64); +BTRFS_SETGET_FUNCS(disk_block_group_chunk_objecitd, + struct btrfs_block_group_item, chunk_objectid, 64); +BTRFS_SETGET_FUNCS(disk_block_group_flags, + struct btrfs_block_group_item, flags, 64); +BTRFS_SETGET_STACK_FUNCS(block_group_flags, + struct btrfs_block_group_item, flags, 64); /* struct btrfs_inode_ref */ BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16); @@ -489,7 +733,7 @@ BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32); BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32); BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32); BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32); -BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 32); +BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64); BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 16); BTRFS_SETGET_FUNCS(inode_compat_flags, struct btrfs_inode_item, compat_flags, 16); @@ -511,51 +755,55 @@ BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32); BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, - struct btrfs_inode_item, rdev, 32); + struct btrfs_inode_item, rdev, 64); BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 16); BTRFS_SETGET_STACK_FUNCS(stack_inode_compat_flags, struct btrfs_inode_item, compat_flags, 16); -static inline struct btrfs_inode_timespec * +static inline struct btrfs_timespec * btrfs_inode_atime(struct btrfs_inode_item *inode_item) { unsigned long ptr = (unsigned long)inode_item; ptr += offsetof(struct btrfs_inode_item, atime); - return (struct btrfs_inode_timespec *)ptr; + return (struct btrfs_timespec *)ptr; } -static inline struct btrfs_inode_timespec * +static inline struct btrfs_timespec * btrfs_inode_mtime(struct btrfs_inode_item *inode_item) { unsigned long ptr = (unsigned long)inode_item; ptr += offsetof(struct btrfs_inode_item, mtime); - return (struct btrfs_inode_timespec *)ptr; + return (struct btrfs_timespec *)ptr; } -static inline struct btrfs_inode_timespec * +static inline struct btrfs_timespec * btrfs_inode_ctime(struct btrfs_inode_item *inode_item) { unsigned long ptr = (unsigned long)inode_item; ptr += offsetof(struct btrfs_inode_item, ctime); - return (struct btrfs_inode_timespec *)ptr; + return (struct btrfs_timespec *)ptr; } -static inline struct btrfs_inode_timespec * +static inline struct btrfs_timespec * btrfs_inode_otime(struct btrfs_inode_item *inode_item) { unsigned long ptr = (unsigned long)inode_item; ptr += offsetof(struct btrfs_inode_item, otime); - return (struct btrfs_inode_timespec *)ptr; + return (struct btrfs_timespec *)ptr; } -BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_inode_timespec, sec, 64); -BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_inode_timespec, nsec, 32); -BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_inode_timespec, +BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64); +BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32); +BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64); -BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_inode_timespec, +BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32); +/* struct btrfs_dev_extent */ +BTRFS_SETGET_FUNCS(dev_extent_owner, struct btrfs_dev_extent, owner, 64); +BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64); + /* struct btrfs_extent_item */ BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32); @@ -830,8 +1078,14 @@ BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block, generation, 64); BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64); +BTRFS_SETGET_STACK_FUNCS(super_sys_array_size, + struct btrfs_super_block, sys_chunk_array_size, 32); BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block, root_level, 8); +BTRFS_SETGET_STACK_FUNCS(super_chunk_root, struct btrfs_super_block, + chunk_root, 64); +BTRFS_SETGET_STACK_FUNCS(super_chunk_root_level, struct btrfs_super_block, + chunk_root_level, 64); BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block, total_bytes, 64); BTRFS_SETGET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block, @@ -963,14 +1217,19 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_free_block_groups(struct btrfs_fs_info *info); int btrfs_read_block_groups(struct btrfs_root *root); -int btrfs_make_block_groups(struct btrfs_trans_handle *trans, - struct btrfs_root *root); +int btrfs_make_block_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytes_used, + u64 type, u64 chunk_tree, u64 chunk_objectid, + u64 size); u64 btrfs_hash_extent_ref(u64 root_objectid, u64 ref_generation, u64 owner, u64 owner_offset); int btrfs_update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num, - int alloc, int mark_free, int data); + int alloc, int mark_free); /* ctree.c */ +int btrfs_previous_item(struct btrfs_root *root, + struct btrfs_path *path, u64 min_objectid, + int type); int btrfs_comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2); int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, -- cgit v1.2.3