diff options
Diffstat (limited to 'free-space-cache.c')
-rw-r--r-- | free-space-cache.c | 878 |
1 files changed, 878 insertions, 0 deletions
diff --git a/free-space-cache.c b/free-space-cache.c new file mode 100644 index 00000000..d10a5f51 --- /dev/null +++ b/free-space-cache.c @@ -0,0 +1,878 @@ +/* + * Copyright (C) 2008 Red Hat. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include "kerncompat.h" +#include "ctree.h" +#include "free-space-cache.h" +#include "transaction.h" +#include "disk-io.h" +#include "extent_io.h" +#include "crc32c.h" +#include "bitops.h" + +/* + * Kernel always uses PAGE_CACHE_SIZE for sectorsize, but we don't have + * anything like that in userspace and have to get the value from the + * filesystem + */ +#define BITS_PER_BITMAP(sectorsize) ((sectorsize) * 8) +#define MAX_CACHE_BYTES_PER_GIG (32 * 1024) + +static int link_free_space(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info); +static void merge_space_tree(struct btrfs_free_space_ctl *ctl); + +struct io_ctl { + void *cur, *orig; + void *buffer; + struct btrfs_root *root; + unsigned long size; + u64 total_size; + int index; + int num_pages; + unsigned check_crcs:1; +}; + +static int io_ctl_init(struct io_ctl *io_ctl, u64 size, u64 ino, + struct btrfs_root *root) +{ + memset(io_ctl, 0, sizeof(struct io_ctl)); + io_ctl->num_pages = (size + root->sectorsize - 1) / root->sectorsize; + io_ctl->buffer = kzalloc(size, GFP_NOFS); + if (!io_ctl->buffer) + return -ENOMEM; + io_ctl->total_size = size; + io_ctl->root = root; + if (ino != BTRFS_FREE_INO_OBJECTID) + io_ctl->check_crcs = 1; + return 0; +} + +static void io_ctl_free(struct io_ctl *io_ctl) +{ + kfree(io_ctl->buffer); +} + +static void io_ctl_unmap_page(struct io_ctl *io_ctl) +{ + if (io_ctl->cur) { + io_ctl->cur = NULL; + io_ctl->orig = NULL; + } +} + +static void io_ctl_map_page(struct io_ctl *io_ctl, int clear) +{ + BUG_ON(io_ctl->index >= io_ctl->num_pages); + io_ctl->cur = io_ctl->buffer + (io_ctl->index++ * io_ctl->root->sectorsize); + io_ctl->orig = io_ctl->cur; + io_ctl->size = io_ctl->root->sectorsize; + if (clear) + memset(io_ctl->cur, 0, io_ctl->root->sectorsize); +} + +static void io_ctl_drop_pages(struct io_ctl *io_ctl) +{ + io_ctl_unmap_page(io_ctl); +} + +static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct btrfs_root *root, + struct btrfs_path *path, u64 ino) +{ + struct extent_buffer *leaf; + struct btrfs_file_extent_item *fi; + struct btrfs_key key; + u64 bytenr, len; + u64 total_read = 0; + int ret = 0; + + key.objectid = ino; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = 0; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret) { + fprintf(stderr, + "Couldn't find file extent item for free space inode" + " %Lu\n", ino); + btrfs_release_path(path); + return -EINVAL; + } + + while (total_read < io_ctl->total_size) { + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + ret = btrfs_next_leaf(root, path); + if (ret) { + ret = -EINVAL; + break; + } + } + leaf = path->nodes[0]; + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.objectid != ino) { + ret = -EINVAL; + break; + } + + if (key.type != BTRFS_EXTENT_DATA_KEY) { + ret = -EINVAL; + break; + } + + fi = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(path->nodes[0], fi) != + BTRFS_FILE_EXTENT_REG) { + fprintf(stderr, "Not the file extent type we wanted\n"); + ret = -EINVAL; + break; + } + + bytenr = btrfs_file_extent_disk_bytenr(leaf, fi) + + btrfs_file_extent_offset(leaf, fi); + len = btrfs_file_extent_num_bytes(leaf, fi); + ret = read_data_from_disk(root->fs_info, + io_ctl->buffer + key.offset, bytenr, + len, 0); + if (ret) + break; + total_read += len; + path->slots[0]++; + } + + btrfs_release_path(path); + return ret; +} + +static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation) +{ + __le64 *gen; + + /* + * Skip the crc area. If we don't check crcs then we just have a 64bit + * chunk at the front of the first page. + */ + if (io_ctl->check_crcs) { + io_ctl->cur += sizeof(u32) * io_ctl->num_pages; + io_ctl->size -= sizeof(u64) + + (sizeof(u32) * io_ctl->num_pages); + } else { + io_ctl->cur += sizeof(u64); + io_ctl->size -= sizeof(u64) * 2; + } + + gen = io_ctl->cur; + if (le64_to_cpu(*gen) != generation) { + printk("btrfs: space cache generation " + "(%Lu) does not match inode (%Lu)\n", *gen, + generation); + io_ctl_unmap_page(io_ctl); + return -EIO; + } + io_ctl->cur += sizeof(u64); + return 0; +} + +static int io_ctl_check_crc(struct io_ctl *io_ctl, int index) +{ + u32 *tmp, val; + u32 crc = ~(u32)0; + unsigned offset = 0; + + if (!io_ctl->check_crcs) { + io_ctl_map_page(io_ctl, 0); + return 0; + } + + if (index == 0) + offset = sizeof(u32) * io_ctl->num_pages; + + tmp = io_ctl->buffer; + tmp += index; + val = *tmp; + + io_ctl_map_page(io_ctl, 0); + crc = crc32c(crc, io_ctl->orig + offset, io_ctl->root->sectorsize - offset); + btrfs_csum_final(crc, (char *)&crc); + if (val != crc) { + printk("btrfs: csum mismatch on free space cache\n"); + io_ctl_unmap_page(io_ctl); + return -EIO; + } + + return 0; +} + +static int io_ctl_read_entry(struct io_ctl *io_ctl, + struct btrfs_free_space *entry, u8 *type) +{ + struct btrfs_free_space_entry *e; + int ret; + + if (!io_ctl->cur) { + ret = io_ctl_check_crc(io_ctl, io_ctl->index); + if (ret) + return ret; + } + + e = io_ctl->cur; + entry->offset = le64_to_cpu(e->offset); + entry->bytes = le64_to_cpu(e->bytes); + *type = e->type; + io_ctl->cur += sizeof(struct btrfs_free_space_entry); + io_ctl->size -= sizeof(struct btrfs_free_space_entry); + + if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) + return 0; + + io_ctl_unmap_page(io_ctl); + + return 0; +} + +static int io_ctl_read_bitmap(struct io_ctl *io_ctl, + struct btrfs_free_space *entry) +{ + int ret; + + ret = io_ctl_check_crc(io_ctl, io_ctl->index); + if (ret) + return ret; + + memcpy(entry->bitmap, io_ctl->cur, io_ctl->root->sectorsize); + io_ctl_unmap_page(io_ctl); + + return 0; +} + + +static int __load_free_space_cache(struct btrfs_root *root, + struct btrfs_free_space_ctl *ctl, + struct btrfs_path *path, u64 offset) +{ + struct btrfs_free_space_header *header; + struct btrfs_inode_item *inode_item; + struct extent_buffer *leaf; + struct io_ctl io_ctl; + struct btrfs_key key; + struct btrfs_key inode_location; + struct btrfs_disk_key disk_key; + struct btrfs_free_space *e, *n; + struct list_head bitmaps; + u64 num_entries; + u64 num_bitmaps; + u64 generation; + u64 inode_size; + u8 type; + int ret = 0; + + INIT_LIST_HEAD(&bitmaps); + + key.objectid = BTRFS_FREE_SPACE_OBJECTID; + key.offset = offset; + key.type = 0; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) { + return 0; + } else if (ret > 0) { + btrfs_release_path(path); + return 0; + } + + leaf = path->nodes[0]; + header = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_free_space_header); + num_entries = btrfs_free_space_entries(leaf, header); + num_bitmaps = btrfs_free_space_bitmaps(leaf, header); + generation = btrfs_free_space_generation(leaf, header); + btrfs_free_space_key(leaf, header, &disk_key); + btrfs_disk_key_to_cpu(&inode_location, &disk_key); + btrfs_release_path(path); + + ret = btrfs_search_slot(NULL, root, &inode_location, path, 0, 0); + if (ret) { + fprintf(stderr, "Couldn't find free space inode %d\n", ret); + return 0; + } + + leaf = path->nodes[0]; + inode_item = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_inode_item); + + inode_size = btrfs_inode_size(leaf, inode_item); + if (!inode_size || !btrfs_inode_generation(leaf, inode_item)) { + btrfs_release_path(path); + return 0; + } + + if (btrfs_inode_generation(leaf, inode_item) != generation) { + fprintf(stderr, + "free space inode generation (%llu) did not match " + "free space cache generation (%llu)\n", + (unsigned long long)btrfs_inode_generation(leaf, + inode_item), + (unsigned long long)generation); + btrfs_release_path(path); + return 0; + } + + btrfs_release_path(path); + + if (!num_entries) + return 0; + + ret = io_ctl_init(&io_ctl, inode_size, inode_location.objectid, root); + if (ret) + return ret; + + ret = io_ctl_prepare_pages(&io_ctl, root, path, + inode_location.objectid); + if (ret) + goto out; + + ret = io_ctl_check_crc(&io_ctl, 0); + if (ret) + goto free_cache; + + ret = io_ctl_check_generation(&io_ctl, generation); + if (ret) + goto free_cache; + + while (num_entries) { + e = calloc(1, sizeof(*e)); + if (!e) + goto free_cache; + + ret = io_ctl_read_entry(&io_ctl, e, &type); + if (ret) { + free(e); + goto free_cache; + } + + if (!e->bytes) { + free(e); + goto free_cache; + } + + if (type == BTRFS_FREE_SPACE_EXTENT) { + ret = link_free_space(ctl, e); + if (ret) { + fprintf(stderr, + "Duplicate entries in free space cache\n"); + free(e); + goto free_cache; + } + } else { + BUG_ON(!num_bitmaps); + num_bitmaps--; + e->bitmap = kzalloc(ctl->sectorsize, GFP_NOFS); + if (!e->bitmap) { + free(e); + goto free_cache; + } + ret = link_free_space(ctl, e); + ctl->total_bitmaps++; + if (ret) { + fprintf(stderr, + "Duplicate entries in free space cache\n"); + free(e->bitmap); + free(e); + goto free_cache; + } + list_add_tail(&e->list, &bitmaps); + } + + num_entries--; + } + + io_ctl_unmap_page(&io_ctl); + + /* + * We add the bitmaps at the end of the entries in order that + * the bitmap entries are added to the cache. + */ + list_for_each_entry_safe(e, n, &bitmaps, list) { + list_del_init(&e->list); + ret = io_ctl_read_bitmap(&io_ctl, e); + if (ret) + goto free_cache; + } + + io_ctl_drop_pages(&io_ctl); + merge_space_tree(ctl); + ret = 1; +out: + io_ctl_free(&io_ctl); + return ret; +free_cache: + io_ctl_drop_pages(&io_ctl); + __btrfs_remove_free_space_cache(ctl); + goto out; +} + +int load_free_space_cache(struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *block_group) +{ + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; + struct btrfs_path *path; + u64 used = btrfs_block_group_used(&block_group->item); + int ret = 0; + int matched; + + path = btrfs_alloc_path(); + if (!path) + return 0; + + ret = __load_free_space_cache(fs_info->tree_root, ctl, path, + block_group->key.objectid); + btrfs_free_path(path); + + matched = (ctl->free_space == (block_group->key.offset - used - + block_group->bytes_super)); + if (ret == 1 && !matched) { + __btrfs_remove_free_space_cache(ctl); + fprintf(stderr, + "block group %llu has wrong amount of free space\n", + block_group->key.objectid); + ret = -1; + } + + if (ret < 0) { + ret = 0; + + fprintf(stderr, + "failed to load free space cache for block group %llu\n", + block_group->key.objectid); + } + + return ret; +} + +static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, + u64 offset) +{ + BUG_ON(offset < bitmap_start); + offset -= bitmap_start; + return (unsigned long)(offset / unit); +} + +static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) +{ + return (unsigned long)(bytes / unit); +} + +static int tree_insert_offset(struct rb_root *root, u64 offset, + struct rb_node *node, int bitmap) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct btrfs_free_space *info; + + while (*p) { + parent = *p; + info = rb_entry(parent, struct btrfs_free_space, offset_index); + + if (offset < info->offset) { + p = &(*p)->rb_left; + } else if (offset > info->offset) { + p = &(*p)->rb_right; + } else { + /* + * we could have a bitmap entry and an extent entry + * share the same offset. If this is the case, we want + * the extent entry to always be found first if we do a + * linear search through the tree, since we want to have + * the quickest allocation time, and allocating from an + * extent is faster than allocating from a bitmap. So + * if we're inserting a bitmap and we find an entry at + * this offset, we want to go right, or after this entry + * logically. If we are inserting an extent and we've + * found a bitmap, we want to go left, or before + * logically. + */ + if (bitmap) { + if (info->bitmap) + return -EEXIST; + p = &(*p)->rb_right; + } else { + if (!info->bitmap) + return -EEXIST; + p = &(*p)->rb_left; + } + } + } + + rb_link_node(node, parent, p); + rb_insert_color(node, root); + + return 0; +} + +/* + * searches the tree for the given offset. + * + * fuzzy - If this is set, then we are trying to make an allocation, and we just + * want a section that has at least bytes size and comes at or after the given + * offset. + */ +static struct btrfs_free_space * +tree_search_offset(struct btrfs_free_space_ctl *ctl, + u64 offset, int bitmap_only, int fuzzy) +{ + struct rb_node *n = ctl->free_space_offset.rb_node; + struct btrfs_free_space *entry, *prev = NULL; + u32 sectorsize = ctl->sectorsize; + + /* find entry that is closest to the 'offset' */ + while (1) { + if (!n) { + entry = NULL; + break; + } + + entry = rb_entry(n, struct btrfs_free_space, offset_index); + prev = entry; + + if (offset < entry->offset) + n = n->rb_left; + else if (offset > entry->offset) + n = n->rb_right; + else + break; + } + + if (bitmap_only) { + if (!entry) + return NULL; + if (entry->bitmap) + return entry; + + /* + * bitmap entry and extent entry may share same offset, + * in that case, bitmap entry comes after extent entry. + */ + n = rb_next(n); + if (!n) + return NULL; + entry = rb_entry(n, struct btrfs_free_space, offset_index); + if (entry->offset != offset) + return NULL; + + WARN_ON(!entry->bitmap); + return entry; + } else if (entry) { + if (entry->bitmap) { + /* + * if previous extent entry covers the offset, + * we should return it instead of the bitmap entry + */ + n = rb_prev(&entry->offset_index); + if (n) { + prev = rb_entry(n, struct btrfs_free_space, + offset_index); + if (!prev->bitmap && + prev->offset + prev->bytes > offset) + entry = prev; + } + } + return entry; + } + + if (!prev) + return NULL; + + /* find last entry before the 'offset' */ + entry = prev; + if (entry->offset > offset) { + n = rb_prev(&entry->offset_index); + if (n) { + entry = rb_entry(n, struct btrfs_free_space, + offset_index); + BUG_ON(entry->offset > offset); + } else { + if (fuzzy) + return entry; + else + return NULL; + } + } + + if (entry->bitmap) { + n = rb_prev(&entry->offset_index); + if (n) { + prev = rb_entry(n, struct btrfs_free_space, + offset_index); + if (!prev->bitmap && + prev->offset + prev->bytes > offset) + return prev; + } + if (entry->offset + BITS_PER_BITMAP(sectorsize) * ctl->unit > offset) + return entry; + } else if (entry->offset + entry->bytes > offset) + return entry; + + if (!fuzzy) + return NULL; + + while (1) { + if (entry->bitmap) { + if (entry->offset + BITS_PER_BITMAP(sectorsize) * + ctl->unit > offset) + break; + } else { + if (entry->offset + entry->bytes > offset) + break; + } + + n = rb_next(&entry->offset_index); + if (!n) + return NULL; + entry = rb_entry(n, struct btrfs_free_space, offset_index); + } + return entry; +} + +void unlink_free_space(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info) +{ + rb_erase(&info->offset_index, &ctl->free_space_offset); + ctl->free_extents--; + ctl->free_space -= info->bytes; +} + +static int link_free_space(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info) +{ + int ret = 0; + + BUG_ON(!info->bitmap && !info->bytes); + ret = tree_insert_offset(&ctl->free_space_offset, info->offset, + &info->offset_index, (info->bitmap != NULL)); + if (ret) + return ret; + + ctl->free_space += info->bytes; + ctl->free_extents++; + return ret; +} + +static int search_bitmap(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *bitmap_info, u64 *offset, + u64 *bytes) +{ + unsigned long found_bits = 0; + unsigned long bits, i; + unsigned long next_zero; + u32 sectorsize = ctl->sectorsize; + + i = offset_to_bit(bitmap_info->offset, ctl->unit, + max_t(u64, *offset, bitmap_info->offset)); + bits = bytes_to_bits(*bytes, ctl->unit); + + for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP(sectorsize)) { + next_zero = find_next_zero_bit(bitmap_info->bitmap, + BITS_PER_BITMAP(sectorsize), i); + if ((next_zero - i) >= bits) { + found_bits = next_zero - i; + break; + } + i = next_zero; + } + + if (found_bits) { + *offset = (u64)(i * ctl->unit) + bitmap_info->offset; + *bytes = (u64)(found_bits) * ctl->unit; + return 0; + } + + return -1; +} + +struct btrfs_free_space * +btrfs_find_free_space(struct btrfs_free_space_ctl *ctl, u64 offset, u64 bytes) +{ + return tree_search_offset(ctl, offset, 0, 0); +} + +static void try_merge_free_space(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info) +{ + struct btrfs_free_space *left_info; + struct btrfs_free_space *right_info; + u64 offset = info->offset; + u64 bytes = info->bytes; + + /* + * first we want to see if there is free space adjacent to the range we + * are adding, if there is remove that struct and add a new one to + * cover the entire range + */ + right_info = tree_search_offset(ctl, offset + bytes, 0, 0); + if (right_info && rb_prev(&right_info->offset_index)) + left_info = rb_entry(rb_prev(&right_info->offset_index), + struct btrfs_free_space, offset_index); + else + left_info = tree_search_offset(ctl, offset - 1, 0, 0); + + if (right_info && !right_info->bitmap) { + unlink_free_space(ctl, right_info); + info->bytes += right_info->bytes; + free(right_info); + } + + if (left_info && !left_info->bitmap && + left_info->offset + left_info->bytes == offset) { + unlink_free_space(ctl, left_info); + info->offset = left_info->offset; + info->bytes += left_info->bytes; + free(left_info); + } +} + +void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, + u64 bytes) +{ + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; + struct btrfs_free_space *info; + struct rb_node *n; + int count = 0; + + for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { + info = rb_entry(n, struct btrfs_free_space, offset_index); + if (info->bytes >= bytes && !block_group->ro) + count++; + printk("entry offset %llu, bytes %llu, bitmap %s\n", + (unsigned long long)info->offset, + (unsigned long long)info->bytes, + (info->bitmap) ? "yes" : "no"); + } + printk("%d blocks of free space at or bigger than bytes is \n", count); +} + +int btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group, + int sectorsize) +{ + struct btrfs_free_space_ctl *ctl; + + ctl = calloc(1, sizeof(*ctl)); + if (!ctl) + return -ENOMEM; + + ctl->sectorsize = sectorsize; + ctl->unit = sectorsize; + ctl->start = block_group->key.objectid; + ctl->private = block_group; + block_group->free_space_ctl = ctl; + + return 0; +} + +void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) +{ + struct btrfs_free_space *info; + struct rb_node *node; + + while ((node = rb_last(&ctl->free_space_offset)) != NULL) { + info = rb_entry(node, struct btrfs_free_space, offset_index); + unlink_free_space(ctl, info); + free(info->bitmap); + free(info); + } +} + +void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) +{ + __btrfs_remove_free_space_cache(block_group->free_space_ctl); +} + +int btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, u64 offset, + u64 bytes) +{ + struct btrfs_free_space *info; + int ret = 0; + + info = calloc(1, sizeof(*info)); + if (!info) + return -ENOMEM; + + info->offset = offset; + info->bytes = bytes; + + try_merge_free_space(ctl, info); + + ret = link_free_space(ctl, info); + if (ret) { + printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); + BUG_ON(ret == -EEXIST); + } + + return ret; +} + +/* + * Merges all the free space cache and kills the bitmap entries since we just + * want to use the free space cache to verify it's correct, no reason to keep + * the bitmaps around to confuse things. + */ +static void merge_space_tree(struct btrfs_free_space_ctl *ctl) +{ + struct btrfs_free_space *e, *prev = NULL; + struct rb_node *n; + int ret; + u32 sectorsize = ctl->sectorsize; + +again: + prev = NULL; + for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { + e = rb_entry(n, struct btrfs_free_space, offset_index); + if (e->bitmap) { + u64 offset = e->offset, bytes = ctl->unit; + u64 end; + + end = e->offset + (u64)(BITS_PER_BITMAP(sectorsize) * ctl->unit); + + unlink_free_space(ctl, e); + while (!(search_bitmap(ctl, e, &offset, &bytes))) { + ret = btrfs_add_free_space(ctl, offset, + bytes); + BUG_ON(ret); + offset += bytes; + if (offset >= end) + break; + bytes = ctl->unit; + } + free(e->bitmap); + free(e); + goto again; + } + if (!prev) + goto next; + if (prev->offset + prev->bytes == e->offset) { + unlink_free_space(ctl, prev); + unlink_free_space(ctl, e); + prev->bytes += e->bytes; + free(e); + link_free_space(ctl, prev); + goto again; + } +next: + prev = e; + } +} |