btrfs-progs/free-space-cache.c

873 lines
20 KiB
C

/*
* Copyright (C) 2008 Red Hat. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include "kerncompat.h"
#include "ctree.h"
#include "free-space-cache.h"
#include "transaction.h"
#include "disk-io.h"
#include "extent_io.h"
#include "crc32c.h"
#include "bitops.h"
/*
* Kernel always uses PAGE_CACHE_SIZE for sectorsize, but we don't have
* anything like that in userspace and have to get the value from the
* filesystem
*/
#define BITS_PER_BITMAP(sectorsize) ((sectorsize) * 8)
#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
static int link_free_space(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info);
static void merge_space_tree(struct btrfs_free_space_ctl *ctl);
struct io_ctl {
void *cur, *orig;
void *buffer;
struct btrfs_root *root;
unsigned long size;
u64 total_size;
int index;
int num_pages;
unsigned check_crcs:1;
};
static int io_ctl_init(struct io_ctl *io_ctl, u64 size, u64 ino,
struct btrfs_root *root)
{
memset(io_ctl, 0, sizeof(struct io_ctl));
io_ctl->num_pages = (size + root->sectorsize - 1) / root->sectorsize;
io_ctl->buffer = kzalloc(size, GFP_NOFS);
if (!io_ctl->buffer)
return -ENOMEM;
io_ctl->total_size = size;
io_ctl->root = root;
if (ino != BTRFS_FREE_INO_OBJECTID)
io_ctl->check_crcs = 1;
return 0;
}
static void io_ctl_free(struct io_ctl *io_ctl)
{
kfree(io_ctl->buffer);
}
static void io_ctl_unmap_page(struct io_ctl *io_ctl)
{
if (io_ctl->cur) {
io_ctl->cur = NULL;
io_ctl->orig = NULL;
}
}
static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
{
BUG_ON(io_ctl->index >= io_ctl->num_pages);
io_ctl->cur = io_ctl->buffer + (io_ctl->index++ * io_ctl->root->sectorsize);
io_ctl->orig = io_ctl->cur;
io_ctl->size = io_ctl->root->sectorsize;
if (clear)
memset(io_ctl->cur, 0, io_ctl->root->sectorsize);
}
static void io_ctl_drop_pages(struct io_ctl *io_ctl)
{
io_ctl_unmap_page(io_ctl);
}
static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct btrfs_root *root,
struct btrfs_path *path, u64 ino)
{
struct extent_buffer *leaf;
struct btrfs_file_extent_item *fi;
struct btrfs_key key;
u64 bytenr, len;
u64 total_read = 0;
int ret = 0;
key.objectid = ino;
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret) {
printf("Couldn't find file extent item for free space inode"
" %Lu\n", ino);
btrfs_release_path(path);
return -EINVAL;
}
while (total_read < io_ctl->total_size) {
if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
ret = btrfs_next_leaf(root, path);
if (ret) {
ret = -EINVAL;
break;
}
}
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
if (key.objectid != ino) {
ret = -EINVAL;
break;
}
if (key.type != BTRFS_EXTENT_DATA_KEY) {
ret = -EINVAL;
break;
}
fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_file_extent_item);
if (btrfs_file_extent_type(path->nodes[0], fi) !=
BTRFS_FILE_EXTENT_REG) {
printf("Not the file extent type we wanted\n");
ret = -EINVAL;
break;
}
bytenr = btrfs_file_extent_disk_bytenr(leaf, fi) +
btrfs_file_extent_offset(leaf, fi);
len = btrfs_file_extent_num_bytes(leaf, fi);
ret = read_data_from_disk(root->fs_info,
io_ctl->buffer + key.offset, bytenr,
len, 0);
if (ret)
break;
total_read += len;
path->slots[0]++;
}
btrfs_release_path(path);
return ret;
}
static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
{
__le64 *gen;
/*
* Skip the crc area. If we don't check crcs then we just have a 64bit
* chunk at the front of the first page.
*/
if (io_ctl->check_crcs) {
io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
io_ctl->size -= sizeof(u64) +
(sizeof(u32) * io_ctl->num_pages);
} else {
io_ctl->cur += sizeof(u64);
io_ctl->size -= sizeof(u64) * 2;
}
gen = io_ctl->cur;
if (le64_to_cpu(*gen) != generation) {
printk("btrfs: space cache generation "
"(%Lu) does not match inode (%Lu)\n", *gen,
generation);
io_ctl_unmap_page(io_ctl);
return -EIO;
}
io_ctl->cur += sizeof(u64);
return 0;
}
static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
{
u32 *tmp, val;
u32 crc = ~(u32)0;
unsigned offset = 0;
if (!io_ctl->check_crcs) {
io_ctl_map_page(io_ctl, 0);
return 0;
}
if (index == 0)
offset = sizeof(u32) * io_ctl->num_pages;
tmp = io_ctl->buffer;
tmp += index;
val = *tmp;
io_ctl_map_page(io_ctl, 0);
crc = crc32c(crc, io_ctl->orig + offset, io_ctl->root->sectorsize - offset);
btrfs_csum_final(crc, (char *)&crc);
if (val != crc) {
printk("btrfs: csum mismatch on free space cache\n");
io_ctl_unmap_page(io_ctl);
return -EIO;
}
return 0;
}
static int io_ctl_read_entry(struct io_ctl *io_ctl,
struct btrfs_free_space *entry, u8 *type)
{
struct btrfs_free_space_entry *e;
int ret;
if (!io_ctl->cur) {
ret = io_ctl_check_crc(io_ctl, io_ctl->index);
if (ret)
return ret;
}
e = io_ctl->cur;
entry->offset = le64_to_cpu(e->offset);
entry->bytes = le64_to_cpu(e->bytes);
*type = e->type;
io_ctl->cur += sizeof(struct btrfs_free_space_entry);
io_ctl->size -= sizeof(struct btrfs_free_space_entry);
if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
return 0;
io_ctl_unmap_page(io_ctl);
return 0;
}
static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
struct btrfs_free_space *entry)
{
int ret;
ret = io_ctl_check_crc(io_ctl, io_ctl->index);
if (ret)
return ret;
memcpy(entry->bitmap, io_ctl->cur, io_ctl->root->sectorsize);
io_ctl_unmap_page(io_ctl);
return 0;
}
static int __load_free_space_cache(struct btrfs_root *root,
struct btrfs_free_space_ctl *ctl,
struct btrfs_path *path, u64 offset)
{
struct btrfs_free_space_header *header;
struct btrfs_inode_item *inode_item;
struct extent_buffer *leaf;
struct io_ctl io_ctl;
struct btrfs_key key;
struct btrfs_key inode_location;
struct btrfs_disk_key disk_key;
struct btrfs_free_space *e, *n;
struct list_head bitmaps;
u64 num_entries;
u64 num_bitmaps;
u64 generation;
u64 inode_size;
u8 type;
int ret = 0;
INIT_LIST_HEAD(&bitmaps);
key.objectid = BTRFS_FREE_SPACE_OBJECTID;
key.offset = offset;
key.type = 0;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0) {
return 0;
} else if (ret > 0) {
btrfs_release_path(path);
return 0;
}
leaf = path->nodes[0];
header = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_free_space_header);
num_entries = btrfs_free_space_entries(leaf, header);
num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
generation = btrfs_free_space_generation(leaf, header);
btrfs_free_space_key(leaf, header, &disk_key);
btrfs_disk_key_to_cpu(&inode_location, &disk_key);
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, &inode_location, path, 0, 0);
if (ret) {
printf("Couldn't find free space inode %d\n", ret);
return 0;
}
leaf = path->nodes[0];
inode_item = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_inode_item);
inode_size = btrfs_inode_size(leaf, inode_item);
if (!inode_size || !btrfs_inode_generation(leaf, inode_item)) {
btrfs_release_path(path);
return 0;
}
if (btrfs_inode_generation(leaf, inode_item) != generation) {
printf("free space inode generation (%llu) did not match "
"free space cache generation (%llu)\n",
(unsigned long long)btrfs_inode_generation(leaf,
inode_item),
(unsigned long long)generation);
btrfs_release_path(path);
return 0;
}
btrfs_release_path(path);
if (!num_entries)
return 0;
ret = io_ctl_init(&io_ctl, inode_size, inode_location.objectid, root);
if (ret)
return ret;
ret = io_ctl_prepare_pages(&io_ctl, root, path,
inode_location.objectid);
if (ret)
goto out;
ret = io_ctl_check_crc(&io_ctl, 0);
if (ret)
goto free_cache;
ret = io_ctl_check_generation(&io_ctl, generation);
if (ret)
goto free_cache;
while (num_entries) {
e = calloc(1, sizeof(*e));
if (!e)
goto free_cache;
ret = io_ctl_read_entry(&io_ctl, e, &type);
if (ret) {
free(e);
goto free_cache;
}
if (!e->bytes) {
free(e);
goto free_cache;
}
if (type == BTRFS_FREE_SPACE_EXTENT) {
ret = link_free_space(ctl, e);
if (ret) {
printf("Duplicate entries in free space cache, dumping");
free(e);
goto free_cache;
}
} else {
BUG_ON(!num_bitmaps);
num_bitmaps--;
e->bitmap = kzalloc(ctl->sectorsize, GFP_NOFS);
if (!e->bitmap) {
free(e);
goto free_cache;
}
ret = link_free_space(ctl, e);
ctl->total_bitmaps++;
if (ret) {
printf("Duplicate entries in free space cache, dumping");
free(e->bitmap);
free(e);
goto free_cache;
}
list_add_tail(&e->list, &bitmaps);
}
num_entries--;
}
io_ctl_unmap_page(&io_ctl);
/*
* We add the bitmaps at the end of the entries in order that
* the bitmap entries are added to the cache.
*/
list_for_each_entry_safe(e, n, &bitmaps, list) {
list_del_init(&e->list);
ret = io_ctl_read_bitmap(&io_ctl, e);
if (ret)
goto free_cache;
}
io_ctl_drop_pages(&io_ctl);
merge_space_tree(ctl);
ret = 1;
out:
io_ctl_free(&io_ctl);
return ret;
free_cache:
io_ctl_drop_pages(&io_ctl);
__btrfs_remove_free_space_cache(ctl);
goto out;
}
int load_free_space_cache(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_path *path;
u64 used = btrfs_block_group_used(&block_group->item);
int ret = 0;
int matched;
path = btrfs_alloc_path();
if (!path)
return 0;
ret = __load_free_space_cache(fs_info->tree_root, ctl, path,
block_group->key.objectid);
btrfs_free_path(path);
matched = (ctl->free_space == (block_group->key.offset - used -
block_group->bytes_super));
if (ret == 1 && !matched) {
__btrfs_remove_free_space_cache(ctl);
printf("block group %llu has wrong amount of free space",
block_group->key.objectid);
ret = -1;
}
if (ret < 0) {
ret = 0;
printf("failed to load free space cache for block group %llu\n",
block_group->key.objectid);
}
return ret;
}
static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
u64 offset)
{
BUG_ON(offset < bitmap_start);
offset -= bitmap_start;
return (unsigned long)(offset / unit);
}
static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
{
return (unsigned long)(bytes / unit);
}
static int tree_insert_offset(struct rb_root *root, u64 offset,
struct rb_node *node, int bitmap)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
struct btrfs_free_space *info;
while (*p) {
parent = *p;
info = rb_entry(parent, struct btrfs_free_space, offset_index);
if (offset < info->offset) {
p = &(*p)->rb_left;
} else if (offset > info->offset) {
p = &(*p)->rb_right;
} else {
/*
* we could have a bitmap entry and an extent entry
* share the same offset. If this is the case, we want
* the extent entry to always be found first if we do a
* linear search through the tree, since we want to have
* the quickest allocation time, and allocating from an
* extent is faster than allocating from a bitmap. So
* if we're inserting a bitmap and we find an entry at
* this offset, we want to go right, or after this entry
* logically. If we are inserting an extent and we've
* found a bitmap, we want to go left, or before
* logically.
*/
if (bitmap) {
if (info->bitmap)
return -EEXIST;
p = &(*p)->rb_right;
} else {
if (!info->bitmap)
return -EEXIST;
p = &(*p)->rb_left;
}
}
}
rb_link_node(node, parent, p);
rb_insert_color(node, root);
return 0;
}
/*
* searches the tree for the given offset.
*
* fuzzy - If this is set, then we are trying to make an allocation, and we just
* want a section that has at least bytes size and comes at or after the given
* offset.
*/
static struct btrfs_free_space *
tree_search_offset(struct btrfs_free_space_ctl *ctl,
u64 offset, int bitmap_only, int fuzzy)
{
struct rb_node *n = ctl->free_space_offset.rb_node;
struct btrfs_free_space *entry, *prev = NULL;
u32 sectorsize = ctl->sectorsize;
/* find entry that is closest to the 'offset' */
while (1) {
if (!n) {
entry = NULL;
break;
}
entry = rb_entry(n, struct btrfs_free_space, offset_index);
prev = entry;
if (offset < entry->offset)
n = n->rb_left;
else if (offset > entry->offset)
n = n->rb_right;
else
break;
}
if (bitmap_only) {
if (!entry)
return NULL;
if (entry->bitmap)
return entry;
/*
* bitmap entry and extent entry may share same offset,
* in that case, bitmap entry comes after extent entry.
*/
n = rb_next(n);
if (!n)
return NULL;
entry = rb_entry(n, struct btrfs_free_space, offset_index);
if (entry->offset != offset)
return NULL;
WARN_ON(!entry->bitmap);
return entry;
} else if (entry) {
if (entry->bitmap) {
/*
* if previous extent entry covers the offset,
* we should return it instead of the bitmap entry
*/
n = rb_prev(&entry->offset_index);
if (n) {
prev = rb_entry(n, struct btrfs_free_space,
offset_index);
if (!prev->bitmap &&
prev->offset + prev->bytes > offset)
entry = prev;
}
}
return entry;
}
if (!prev)
return NULL;
/* find last entry before the 'offset' */
entry = prev;
if (entry->offset > offset) {
n = rb_prev(&entry->offset_index);
if (n) {
entry = rb_entry(n, struct btrfs_free_space,
offset_index);
BUG_ON(entry->offset > offset);
} else {
if (fuzzy)
return entry;
else
return NULL;
}
}
if (entry->bitmap) {
n = rb_prev(&entry->offset_index);
if (n) {
prev = rb_entry(n, struct btrfs_free_space,
offset_index);
if (!prev->bitmap &&
prev->offset + prev->bytes > offset)
return prev;
}
if (entry->offset + BITS_PER_BITMAP(sectorsize) * ctl->unit > offset)
return entry;
} else if (entry->offset + entry->bytes > offset)
return entry;
if (!fuzzy)
return NULL;
while (1) {
if (entry->bitmap) {
if (entry->offset + BITS_PER_BITMAP(sectorsize) *
ctl->unit > offset)
break;
} else {
if (entry->offset + entry->bytes > offset)
break;
}
n = rb_next(&entry->offset_index);
if (!n)
return NULL;
entry = rb_entry(n, struct btrfs_free_space, offset_index);
}
return entry;
}
void unlink_free_space(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info)
{
rb_erase(&info->offset_index, &ctl->free_space_offset);
ctl->free_extents--;
ctl->free_space -= info->bytes;
}
static int link_free_space(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info)
{
int ret = 0;
BUG_ON(!info->bitmap && !info->bytes);
ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
&info->offset_index, (info->bitmap != NULL));
if (ret)
return ret;
ctl->free_space += info->bytes;
ctl->free_extents++;
return ret;
}
static int search_bitmap(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *bitmap_info, u64 *offset,
u64 *bytes)
{
unsigned long found_bits = 0;
unsigned long bits, i;
unsigned long next_zero;
u32 sectorsize = ctl->sectorsize;
i = offset_to_bit(bitmap_info->offset, ctl->unit,
max_t(u64, *offset, bitmap_info->offset));
bits = bytes_to_bits(*bytes, ctl->unit);
for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP(sectorsize)) {
next_zero = find_next_zero_bit(bitmap_info->bitmap,
BITS_PER_BITMAP(sectorsize), i);
if ((next_zero - i) >= bits) {
found_bits = next_zero - i;
break;
}
i = next_zero;
}
if (found_bits) {
*offset = (u64)(i * ctl->unit) + bitmap_info->offset;
*bytes = (u64)(found_bits) * ctl->unit;
return 0;
}
return -1;
}
struct btrfs_free_space *
btrfs_find_free_space(struct btrfs_free_space_ctl *ctl, u64 offset, u64 bytes)
{
return tree_search_offset(ctl, offset, 0, 0);
}
static void try_merge_free_space(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info)
{
struct btrfs_free_space *left_info;
struct btrfs_free_space *right_info;
u64 offset = info->offset;
u64 bytes = info->bytes;
/*
* first we want to see if there is free space adjacent to the range we
* are adding, if there is remove that struct and add a new one to
* cover the entire range
*/
right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
if (right_info && rb_prev(&right_info->offset_index))
left_info = rb_entry(rb_prev(&right_info->offset_index),
struct btrfs_free_space, offset_index);
else
left_info = tree_search_offset(ctl, offset - 1, 0, 0);
if (right_info && !right_info->bitmap) {
unlink_free_space(ctl, right_info);
info->bytes += right_info->bytes;
free(right_info);
}
if (left_info && !left_info->bitmap &&
left_info->offset + left_info->bytes == offset) {
unlink_free_space(ctl, left_info);
info->offset = left_info->offset;
info->bytes += left_info->bytes;
free(left_info);
}
}
void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
u64 bytes)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *info;
struct rb_node *n;
int count = 0;
for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
info = rb_entry(n, struct btrfs_free_space, offset_index);
if (info->bytes >= bytes && !block_group->ro)
count++;
printk("entry offset %llu, bytes %llu, bitmap %s\n",
(unsigned long long)info->offset,
(unsigned long long)info->bytes,
(info->bitmap) ? "yes" : "no");
}
printk("%d blocks of free space at or bigger than bytes is \n", count);
}
int btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group,
int sectorsize)
{
struct btrfs_free_space_ctl *ctl;
ctl = calloc(1, sizeof(*ctl));
if (!ctl)
return -ENOMEM;
ctl->sectorsize = sectorsize;
ctl->unit = sectorsize;
ctl->start = block_group->key.objectid;
ctl->private = block_group;
block_group->free_space_ctl = ctl;
return 0;
}
void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
{
struct btrfs_free_space *info;
struct rb_node *node;
while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
info = rb_entry(node, struct btrfs_free_space, offset_index);
unlink_free_space(ctl, info);
free(info->bitmap);
free(info);
}
}
void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
{
__btrfs_remove_free_space_cache(block_group->free_space_ctl);
}
static int btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, u64 offset,
u64 bytes)
{
struct btrfs_free_space *info;
int ret = 0;
info = calloc(1, sizeof(*info));
if (!info)
return -ENOMEM;
info->offset = offset;
info->bytes = bytes;
try_merge_free_space(ctl, info);
ret = link_free_space(ctl, info);
if (ret) {
printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
BUG_ON(ret == -EEXIST);
}
return ret;
}
/*
* Merges all the free space cache and kills the bitmap entries since we just
* want to use the free space cache to verify it's correct, no reason to keep
* the bitmaps around to confuse things.
*/
static void merge_space_tree(struct btrfs_free_space_ctl *ctl)
{
struct btrfs_free_space *e, *prev = NULL;
struct rb_node *n;
int ret;
u32 sectorsize = ctl->sectorsize;
again:
prev = NULL;
for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
e = rb_entry(n, struct btrfs_free_space, offset_index);
if (e->bitmap) {
u64 offset = e->offset, bytes = ctl->unit;
u64 end;
end = e->offset + (u64)(BITS_PER_BITMAP(sectorsize) * ctl->unit);
unlink_free_space(ctl, e);
while (!(search_bitmap(ctl, e, &offset, &bytes))) {
ret = btrfs_add_free_space(ctl, offset,
bytes);
BUG_ON(ret);
offset += bytes;
if (offset >= end)
break;
bytes = ctl->unit;
}
free(e->bitmap);
free(e);
goto again;
}
if (!prev)
goto next;
if (prev->offset + prev->bytes == e->offset) {
unlink_free_space(ctl, prev);
unlink_free_space(ctl, e);
prev->bytes += e->bytes;
free(e);
link_free_space(ctl, prev);
goto again;
}
next:
prev = e;
}
}