summaryrefslogtreecommitdiff
path: root/volumes.c
diff options
context:
space:
mode:
authorHidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>2013-09-05 15:57:19 +0900
committerChris Mason <chris.mason@fusionio.com>2013-10-16 08:20:42 -0400
commit03e95396153d325caa01c33d5664ef0aefe7944d (patch)
tree1b2b83420a0cb46bd0e7f8fa6090e3079161bc12 /volumes.c
parent49fef6fc53f79aaa2e8e0c601a2482545b6c2486 (diff)
btrfs-progs: calculate available blocks on device properly
I found that mkfs.btrfs aborts when assigned multi volumes contain a small volume: # parted /dev/sdf p Model: LSI MegaRAID SAS RMB (scsi) Disk /dev/sdf: 72.8GB Sector size (logical/physical): 512B/512B Partition Table: msdos Number Start End Size Type File system Flags 1 32.3kB 72.4GB 72.4GB primary 2 72.4GB 72.8GB 461MB primary # ./mkfs.btrfs -f /dev/sdf1 /dev/sdf2 : SMALL VOLUME: forcing mixed metadata/data groups adding device /dev/sdf2 id 2 mkfs.btrfs: volumes.c:852: btrfs_alloc_chunk: Assertion `!(ret)' failed. Aborted (core dumped) This failure of btrfs_alloc_chunk was caused by following steps: 1) since there is only small space in the small device, mkfs was going to allocate a chunk from free space as much as available. So mkfs called btrfs_alloc_chunk with size = device->total_bytes - device->used_bytes. 2) (According to the comment in source code, to avoid overwriting superblock,) btrfs_alloc_chunk starts taking chunks at an offset of 1MB. It means that the layout of a disk will be like: [[1MB at beginning for sb][allocated chunks]* ... free space ... ] and you can see that the available free space for allocation is: avail = device->total_bytes - device->used_bytes - 1MB. 3) Therefore there is only free space 1MB less than requested. damn. >From further investigations I also found that this issue is easily reproduced by using -A, --alloc-start option: # truncate --size=1G testfile # ./mkfs.btrfs -A900M -f testfile : mkfs.btrfs: volumes.c:852: btrfs_alloc_chunk: Assertion `!(ret)' failed. Aborted (core dumped) In this case there is only 100MB for allocation but btrfs_alloc_chunk was going to allocate more than the 100MB. The root cause of both of above troubles is a same simple bug: btrfs_chunk_alloc does not calculate available bytes properly even though it researches how many devices have enough room to have a chunk to be allocated. So this patch introduces new function btrfs_device_avail_bytes() which returns available bytes for allocation in specified device. Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> Signed-off-by: David Sterba <dsterba@suse.cz> Signed-off-by: Chris Mason <chris.mason@fusionio.com>
Diffstat (limited to 'volumes.c')
-rw-r--r--volumes.c104
1 files changed, 98 insertions, 6 deletions
diff --git a/volumes.c b/volumes.c
index dba5b0e0..c38da6cc 100644
--- a/volumes.c
+++ b/volumes.c
@@ -272,7 +272,7 @@ static int find_free_dev_extent(struct btrfs_trans_handle *trans,
struct btrfs_dev_extent *dev_extent = NULL;
u64 hole_size = 0;
u64 last_byte = 0;
- u64 search_start = 0;
+ u64 search_start = root->fs_info->alloc_start;
u64 search_end = device->total_bytes;
int ret;
int slot = 0;
@@ -287,10 +287,12 @@ static int find_free_dev_extent(struct btrfs_trans_handle *trans,
/* we don't want to overwrite the superblock on the drive,
* so we make sure to start at an offset of at least 1MB
*/
- search_start = max((u64)1024 * 1024, search_start);
+ search_start = max(BTRFS_BLOCK_RESERVED_1M_FOR_SUPER, search_start);
- if (root->fs_info->alloc_start + num_bytes <= device->total_bytes)
- search_start = max(root->fs_info->alloc_start, search_start);
+ if (search_start >= search_end) {
+ ret = -ENOSPC;
+ goto error;
+ }
key.objectid = device->devid;
key.offset = search_start;
@@ -656,6 +658,94 @@ static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target)
return 64 * 1024;
}
+/*
+ * btrfs_device_avail_bytes - count bytes available for alloc_chunk
+ *
+ * It is not equal to "device->total_bytes - device->bytes_used".
+ * We do not allocate any chunk in 1M at beginning of device, and not
+ * allowed to allocate any chunk before alloc_start if it is specified.
+ * So search holes from max(1M, alloc_start) to device->total_bytes.
+ */
+static int btrfs_device_avail_bytes(struct btrfs_trans_handle *trans,
+ struct btrfs_device *device,
+ u64 *avail_bytes)
+{
+ struct btrfs_path *path;
+ struct btrfs_root *root = device->dev_root;
+ struct btrfs_key key;
+ struct btrfs_dev_extent *dev_extent = NULL;
+ struct extent_buffer *l;
+ u64 search_start = root->fs_info->alloc_start;
+ u64 search_end = device->total_bytes;
+ u64 extent_end = 0;
+ u64 free_bytes = 0;
+ int ret;
+ int slot = 0;
+
+ search_start = max(BTRFS_BLOCK_RESERVED_1M_FOR_SUPER, search_start);
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = device->devid;
+ key.offset = root->fs_info->alloc_start;
+ key.type = BTRFS_DEV_EXTENT_KEY;
+
+ path->reada = 2;
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto error;
+ ret = btrfs_previous_item(root, path, 0, key.type);
+ if (ret < 0)
+ goto error;
+
+ while (1) {
+ l = path->nodes[0];
+ slot = path->slots[0];
+ if (slot >= btrfs_header_nritems(l)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret == 0)
+ continue;
+ if (ret < 0)
+ goto error;
+ break;
+ }
+ btrfs_item_key_to_cpu(l, &key, slot);
+
+ if (key.objectid < device->devid)
+ goto next;
+ if (key.objectid > device->devid)
+ break;
+ if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
+ goto next;
+ if (key.offset > search_end)
+ break;
+ if (key.offset > search_start)
+ free_bytes += key.offset - search_start;
+
+ dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
+ extent_end = key.offset + btrfs_dev_extent_length(l,
+ dev_extent);
+ if (extent_end > search_start)
+ search_start = extent_end;
+ if (search_start > search_end)
+ break;
+next:
+ path->slots[0]++;
+ cond_resched();
+ }
+
+ if (search_start < search_end)
+ free_bytes += search_end - search_start;
+
+ *avail_bytes = free_bytes;
+ ret = 0;
+error:
+ btrfs_free_path(path);
+ return ret;
+}
+
int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 *start,
u64 *num_bytes, u64 type)
@@ -674,7 +764,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
u64 calc_size = 8 * 1024 * 1024;
u64 min_free;
u64 max_chunk_size = 4 * calc_size;
- u64 avail;
+ u64 avail = 0;
u64 max_avail = 0;
u64 percent_max;
int num_stripes = 1;
@@ -778,7 +868,9 @@ again:
/* build a private list of devices we will allocate from */
while(index < num_stripes) {
device = list_entry(cur, struct btrfs_device, dev_list);
- avail = device->total_bytes - device->bytes_used;
+ ret = btrfs_device_avail_bytes(trans, device, &avail);
+ if (ret)
+ return ret;
cur = cur->next;
if (avail >= min_free) {
list_move_tail(&device->dev_list, &private_devs);