From 5856b7e1241b07a5d461ff1592562c8520c42d93 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 26 Apr 2007 16:46:06 -0400 Subject: [PATCH] start of block group code --- Makefile | 3 +- ctree.c | 6 ++ ctree.h | 70 ++++++++++++---- disk-io.c | 15 +++- extent-tree.c | 215 +++++++++++++++++++++++++++++++++++++++++++++++++- mkfs.c | 54 +++++++++++++ print-tree.c | 12 +-- 7 files changed, 348 insertions(+), 27 deletions(-) diff --git a/Makefile b/Makefile index 57597220..ff7e09ef 100644 --- a/Makefile +++ b/Makefile @@ -42,7 +42,8 @@ dir-test: $(objects) dir-test.o $(headers) quick-test: $(objects) quick-test.o $(headers) gcc $(CFLAGS) -o quick-test $(objects) quick-test.o +$(objects): $(headers) + clean : rm debug-tree mkfs.btrfs btrfsctl btrfsck *.o - diff --git a/ctree.c b/ctree.c index ab8f9d03..c6dcba70 100644 --- a/ctree.c +++ b/ctree.c @@ -685,6 +685,8 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_level(&c->header, level); btrfs_set_header_blocknr(&c->header, t->blocknr); btrfs_set_header_owner(&c->header, root->root_key.objectid); + memcpy(c->header.fsid, root->fs_info->disk_super->fsid, + sizeof(c->header.fsid)); lower = &path->nodes[level-1]->node; if (btrfs_is_leaf(lower)) lower_key = &((struct btrfs_leaf *)lower)->items[0].key; @@ -771,6 +773,8 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_level(&split->header, btrfs_header_level(&c->header)); btrfs_set_header_blocknr(&split->header, split_buffer->blocknr); btrfs_set_header_owner(&split->header, root->root_key.objectid); + memcpy(split->header.fsid, root->fs_info->disk_super->fsid, + sizeof(split->header.fsid)); mid = (c_nritems + 1) / 2; memcpy(split->ptrs, c->ptrs + mid, (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); @@ -1096,6 +1100,8 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_blocknr(&right->header, right_buffer->blocknr); btrfs_set_header_level(&right->header, 0); btrfs_set_header_owner(&right->header, root->root_key.objectid); + memcpy(right->header.fsid, root->fs_info->disk_super->fsid, + sizeof(right->header.fsid)); data_copy_size = btrfs_item_end(l->items + mid) - leaf_data_end(root, l); memcpy(right->items, l->items + mid, diff --git a/ctree.h b/ctree.h index 7dedae80..efd044a2 100644 --- a/ctree.h +++ b/ctree.h @@ -237,6 +237,19 @@ struct btrfs_device_item { __le64 device_id; } __attribute__ ((__packed__)); +/* tag for the radix tree of block groups in ram */ +#define BTRFS_BLOCK_GROUP_DIRTY 0 +#define BTRFS_BLOCK_GROUP_HINTS 8 +#define BTRFS_BLOCK_GROUP_SIZE (256 * 1024 * 1024) +struct btrfs_block_group_item { + __le64 used; +} __attribute__ ((__packed__)); + +struct btrfs_block_group_cache { + struct btrfs_key key; + struct btrfs_block_group_item item; +}; + struct btrfs_fs_info { struct btrfs_root *fs_root; struct btrfs_root *extent_root; @@ -247,6 +260,7 @@ struct btrfs_fs_info { struct radix_tree_root cache_radix; struct radix_tree_root pinned_radix; struct radix_tree_root dev_radix; + struct radix_tree_root block_group_radix; struct list_head trans; struct list_head cache; u64 last_inode_alloc; @@ -285,49 +299,67 @@ struct btrfs_root { * info about object characteristics. There is one for every file and dir in * the FS */ -#define BTRFS_INODE_ITEM_KEY 1 +#define BTRFS_INODE_ITEM_KEY 1 + +/* reserve 2-15 close to the inode for later flexibility */ /* * dir items are the name -> inode pointers in a directory. There is one * for every name in a directory. */ -#define BTRFS_DIR_ITEM_KEY 2 -#define BTRFS_DIR_INDEX_KEY 3 +#define BTRFS_DIR_ITEM_KEY 16 +#define BTRFS_DIR_INDEX_KEY 17 /* - * inline data is file data that fits in the btree. + * extent data is for file data */ -#define BTRFS_INLINE_DATA_KEY 4 -/* - * extent data is for data that can't fit in the btree. It points to - * a (hopefully) huge chunk of disk - */ -#define BTRFS_EXTENT_DATA_KEY 5 +#define BTRFS_EXTENT_DATA_KEY 18 /* * csum items have the checksums for data in the extents */ -#define BTRFS_CSUM_ITEM_KEY 6 +#define BTRFS_CSUM_ITEM_KEY 19 + +/* reserve 20-31 for other file stuff */ /* * root items point to tree roots. There are typically in the root * tree used by the super block to find all the other trees */ -#define BTRFS_ROOT_ITEM_KEY 7 +#define BTRFS_ROOT_ITEM_KEY 32 /* * extent items are in the extent map tree. These record which blocks * are used, and how many references there are to each block */ -#define BTRFS_EXTENT_ITEM_KEY 8 +#define BTRFS_EXTENT_ITEM_KEY 33 + +/* + * block groups give us hints into the extent allocation trees. Which + * blocks are free etc etc + */ +#define BTRFS_BLOCK_GROUP_ITEM_KEY 34 /* * dev items list the devices that make up the FS */ -#define BTRFS_DEV_ITEM_KEY 9 +#define BTRFS_DEV_ITEM_KEY 35 /* * string items are for debugging. They just store a short string of * data in the FS */ -#define BTRFS_STRING_ITEM_KEY 10 +#define BTRFS_STRING_ITEM_KEY 253 + + +static inline u64 btrfs_block_group_used(struct btrfs_block_group_item *bi) +{ + return le64_to_cpu(bi->used); +} + +static inline void btrfs_set_block_group_used(struct + btrfs_block_group_item *bi, + u64 val) +{ + bi->used = cpu_to_le64(val); +} static inline u64 btrfs_inode_generation(struct btrfs_inode_item *i) { @@ -1019,4 +1051,12 @@ int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root *inode_item); int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, int mod); +int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, + struct btrfs_root *root); +int btrfs_free_block_groups(struct btrfs_fs_info *info); +int btrfs_read_block_groups(struct btrfs_root *root); +int btrfs_insert_block_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_key *key, + struct btrfs_block_group_item *bi); #endif diff --git a/disk-io.c b/disk-io.c index 0d921f16..d6a7a466 100644 --- a/disk-io.c +++ b/disk-io.c @@ -146,6 +146,8 @@ struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) buf = radix_tree_lookup(&root->fs_info->cache_radix, blocknr); if (buf) { buf->count++; + if (check_tree_block(root, buf)) + BUG(); } else { buf = alloc_tree_block(root, blocknr); if (!buf) @@ -157,9 +159,9 @@ struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) free(buf); return NULL; } + if (check_tree_block(root, buf)) + BUG(); } - if (check_tree_block(root, buf)) - BUG(); return buf; } @@ -170,6 +172,8 @@ int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, return 0; list_add_tail(&buf->dirty, &root->fs_info->trans); buf->count++; + if (check_tree_block(root, buf)) + BUG(); return 0; } @@ -191,6 +195,8 @@ int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (buf->blocknr != btrfs_header_blocknr(&buf->node.header)) BUG(); btrfs_map_bh_to_logical(root, buf, buf->blocknr); + if (check_tree_block(root, buf)) + BUG(); ret = pwrite(buf->fd, &buf->node, root->blocksize, buf->dev_blocknr * root->blocksize); if (ret != root->blocksize) @@ -229,6 +235,7 @@ static int commit_tree_roots(struct btrfs_trans_handle *trans, btrfs_set_super_device_root(fs_info->disk_super, fs_info->dev_root->node->blocknr); } + btrfs_write_dirty_block_groups(trans, fs_info->extent_root); while(1) { old_extent_block = btrfs_root_blocknr(&extent_root->root_item); if (old_extent_block == extent_root->node->blocknr) @@ -239,6 +246,7 @@ static int commit_tree_roots(struct btrfs_trans_handle *trans, &extent_root->root_key, &extent_root->root_item); BUG_ON(ret); + btrfs_write_dirty_block_groups(trans, fs_info->extent_root); } return 0; } @@ -425,6 +433,7 @@ struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super) INIT_RADIX_TREE(&fs_info->cache_radix, GFP_KERNEL); INIT_RADIX_TREE(&fs_info->pinned_radix, GFP_KERNEL); INIT_RADIX_TREE(&fs_info->dev_radix, GFP_KERNEL); + INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); INIT_LIST_HEAD(&fs_info->trans); INIT_LIST_HEAD(&fs_info->cache); fs_info->cache_size = 0; @@ -481,6 +490,7 @@ struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super) root->node->count++; root->ref_cows = 1; root->fs_info->generation = root->root_key.offset + 1; + btrfs_read_block_groups(root); return root; } @@ -550,6 +560,7 @@ int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s) BUG_ON(!list_empty(&root->fs_info->trans)); free_dev_radix(root->fs_info); + btrfs_free_block_groups(root->fs_info); close(root->fs_info->fp); if (root->node) btrfs_block_release(root, root->node); diff --git a/extent-tree.c b/extent-tree.c index a00a216b..97b69754 100644 --- a/extent-tree.c +++ b/extent-tree.c @@ -101,6 +101,106 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, return 0; } +static int write_one_cache_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_block_group_cache *cache) +{ + int ret; + int pending_ret; + struct btrfs_root *extent_root = root->fs_info->extent_root; + struct btrfs_block_group_item *bi; + struct btrfs_key ins; + + ret = find_free_extent(trans, root, 0, 0, (u64)-1, &ins); + if (ret) + return ret; + ret = btrfs_search_slot(trans, root->fs_info->extent_root, + &cache->key, path, 0, 1); + BUG_ON(ret); + bi = btrfs_item_ptr(&path->nodes[0]->leaf, path->slots[0], + struct btrfs_block_group_item); + memcpy(bi, &cache->item, sizeof(*bi)); + dirty_tree_block(trans, extent_root, path->nodes[0]); + btrfs_release_path(extent_root, path); + finish_current_insert(trans, root); + pending_ret = run_pending(trans, root); + if (ret) + return ret; + if (pending_ret) + return pending_ret; + return 0; + +} + +int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_block_group_cache *cache[8]; + int ret; + int err = 0; + int werr = 0; + struct radix_tree_root *radix = &root->fs_info->block_group_radix; + int i; + struct btrfs_path path; + btrfs_init_path(&path); + + while(1) { + ret = radix_tree_gang_lookup_tag(radix, (void **)cache, + 0, ARRAY_SIZE(cache), + BTRFS_BLOCK_GROUP_DIRTY); + if (!ret) + break; + for (i = 0; i < ret; i++) { + radix_tree_tag_clear(radix, cache[i]->key.objectid + + cache[i]->key.offset -1, + BTRFS_BLOCK_GROUP_DIRTY); + err = write_one_cache_group(trans, root, + &path, cache[i]); + if (err) + werr = err; + } + } + return werr; +} + +static int update_block_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 blocknr, u64 num, int alloc) +{ + struct btrfs_block_group_cache *cache; + struct btrfs_fs_info *info = root->fs_info; + u64 total = num; + u64 old_val; + u64 block_in_group; + int ret; + + while(total) { + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)&cache, blocknr, 1); + if (!ret) + return -1; + radix_tree_tag_set(&info->block_group_radix, + cache->key.objectid + cache->key.offset - 1, + BTRFS_BLOCK_GROUP_DIRTY); + + block_in_group = blocknr - cache->key.objectid; + old_val = btrfs_block_group_used(&cache->item); + if (total > cache->key.offset - block_in_group) + num = cache->key.offset - block_in_group; + else + num = total; + total -= num; + blocknr += num; + if (alloc) + old_val += num; + else + old_val -= num; + btrfs_set_block_group_used(&cache->item, old_val); + } + return 0; +} + int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root) { @@ -151,6 +251,9 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct super_blocks_used + 1); ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item, sizeof(extent_item)); + if (ret) { + btrfs_print_tree(extent_root, extent_root->node); + } BUG_ON(ret); } extent_root->fs_info->current_insert.offset = 0; @@ -211,6 +314,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root extent_root->fs_info->last_insert.objectid = blocknr; if (ret) BUG(); + ret = update_block_group(trans, root, blocknr, num_blocks, 0); + BUG_ON(ret); } btrfs_release_path(extent_root, &path); finish_current_insert(trans, extent_root); @@ -334,16 +439,18 @@ check_failed: goto error; if (!start_found) { ins->objectid = search_start; - ins->offset = (u64)-1; + ins->offset = (u64)-1 - search_start; start_found = 1; goto check_pending; } ins->objectid = last_block > search_start ? last_block : search_start; - ins->offset = (u64)-1; + ins->offset = (u64)-1 - ins->objectid; goto check_pending; } btrfs_disk_key_to_cpu(&key, &l->items[slot].key); + if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) + goto next; if (key.objectid >= search_start) { if (start_found) { if (last_block < search_start) @@ -358,6 +465,7 @@ check_failed: } start_found = 1; last_block = key.objectid + key.offset; +next: path.slots[0]++; } // FIXME -ENOSPC @@ -386,7 +494,6 @@ error: btrfs_release_path(root, &path); return ret; } - /* * finds a free extent and does all the dirty work required for allocation * returns the key for the extent through ins, and a tree buffer for @@ -456,13 +563,16 @@ struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, BUG(); return NULL; } + ret = update_block_group(trans, root, ins.objectid, ins.offset, 1); buf = find_tree_block(root, ins.objectid); - dirty_tree_block(trans, root, buf); btrfs_set_header_generation(&buf->node.header, root->root_key.offset + 1); + btrfs_set_header_blocknr(&buf->node.header, buf->blocknr); memcpy(buf->node.header.fsid, root->fs_info->disk_super->fsid, sizeof(buf->node.header.fsid)); + dirty_tree_block(trans, root, buf); return buf; + } /* @@ -589,3 +699,100 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root } return ret; } + +int btrfs_free_block_groups(struct btrfs_fs_info *info) +{ + int ret; + struct btrfs_block_group_cache *cache[8]; + int i; + + while(1) { + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)cache, 0, + ARRAY_SIZE(cache)); + if (!ret) + break; + for (i = 0; i < ret; i++) { + radix_tree_delete(&info->block_group_radix, + cache[i]->key.objectid + + cache[i]->key.offset - 1); + free(cache[i]); + } + } + return 0; +} + +int btrfs_read_block_groups(struct btrfs_root *root) +{ + struct btrfs_path path; + int ret; + int err = 0; + struct btrfs_block_group_item *bi; + struct btrfs_block_group_cache *cache; + struct btrfs_key key; + struct btrfs_key found_key; + struct btrfs_leaf *leaf; + u64 group_size_blocks = BTRFS_BLOCK_GROUP_SIZE / root->blocksize; + + root = root->fs_info->extent_root; + key.objectid = 0; + key.offset = group_size_blocks; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); + btrfs_init_path(&path); + + while(1) { + ret = btrfs_search_slot(NULL, root->fs_info->extent_root, + &key, &path, 0, 0); + if (ret != 0) { + err = ret; + break; + } + leaf = &path.nodes[0]->leaf; + btrfs_disk_key_to_cpu(&found_key, + &leaf->items[path.slots[0]].key); + cache = malloc(sizeof(*cache)); + if (!cache) { + err = -1; + break; + } + bi = btrfs_item_ptr(leaf, path.slots[0], + struct btrfs_block_group_item); + memcpy(&cache->item, bi, sizeof(*bi)); + memcpy(&cache->key, &found_key, sizeof(found_key)); + key.objectid = found_key.objectid + found_key.offset; + btrfs_release_path(root, &path); + ret = radix_tree_insert(&root->fs_info->block_group_radix, + found_key.objectid + + found_key.offset - 1, (void *)cache); + BUG_ON(ret); + if (key.objectid >= + btrfs_super_total_blocks(root->fs_info->disk_super)) + break; + } + btrfs_release_path(root, &path); + return 0; +} + +int btrfs_insert_block_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_key *key, + struct btrfs_block_group_item *bi) +{ + struct btrfs_key ins; + int ret; + int pending_ret; + + root = root->fs_info->extent_root; + ret = find_free_extent(trans, root, 0, 0, (u64)-1, &ins); + if (ret) + return ret; + ret = btrfs_insert_item(trans, root, key, bi, sizeof(*bi)); + finish_current_insert(trans, root); + pending_ret = run_pending(trans, root); + if (ret) + return ret; + if (pending_ret) + return pending_ret; + return ret; +} diff --git a/mkfs.c b/mkfs.c index 90f7f080..e21eecae 100644 --- a/mkfs.c +++ b/mkfs.c @@ -64,6 +64,59 @@ error: return ret; } +static int make_block_groups(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + u64 group_size_blocks; + u64 total_blocks; + u64 cur_start; + int ret; + struct btrfs_block_group_cache *cache; + + root = root->fs_info->extent_root; + /* first we bootstrap the things into cache */ + group_size_blocks = BTRFS_BLOCK_GROUP_SIZE / root->blocksize; + cache = malloc(sizeof(*cache)); + cache->key.objectid = 0; + cache->key.offset = group_size_blocks; + cache->key.flags = 0; + btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); + memset(&cache->item, 0, sizeof(cache->item)); + btrfs_set_block_group_used(&cache->item, + btrfs_super_blocks_used(root->fs_info->disk_super)); + ret = radix_tree_insert(&root->fs_info->block_group_radix, + group_size_blocks - 1, (void *)cache); + BUG_ON(ret); + + total_blocks = btrfs_super_total_blocks(root->fs_info->disk_super); + cur_start = group_size_blocks; + while(cur_start < total_blocks) { + cache = malloc(sizeof(*cache)); + cache->key.objectid = cur_start; + cache->key.offset = group_size_blocks; + cache->key.flags = 0; + btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); + memset(&cache->item, 0, sizeof(cache->item)); + ret = radix_tree_insert(&root->fs_info->block_group_radix, + cur_start + group_size_blocks - 1, + (void *)cache); + BUG_ON(ret); + cur_start += group_size_blocks; + } + /* then insert all the items */ + cur_start = 0; + while(cur_start < total_blocks) { + cache = radix_tree_lookup(&root->fs_info->block_group_radix, + cur_start + group_size_blocks - 1); + BUG_ON(!cache); + ret = btrfs_insert_block_group(trans, root, &cache->key, + &cache->item); + BUG_ON(ret); + cur_start += group_size_blocks; + } + return 0; +} + static int make_root_dir(int fd) { struct btrfs_root *root; struct btrfs_super_block super; @@ -78,6 +131,7 @@ static int make_root_dir(int fd) { return -1; } trans = btrfs_start_transaction(root, 1); + ret = make_block_groups(trans, root); ret = __make_root_dir(trans, root->fs_info->tree_root, BTRFS_ROOT_TREE_DIR_OBJECTID); if (ret) diff --git a/print-tree.c b/print-tree.c index b7018eae..0b4d8059 100644 --- a/print-tree.c +++ b/print-tree.c @@ -37,7 +37,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) struct btrfs_inode_item *ii; struct btrfs_file_extent_item *fi; struct btrfs_csum_item *ci; - char *p; + struct btrfs_block_group_item *bi; u32 type; printf("leaf %Lu ptrs %d free space %d generation %Lu owner %Lu\n", @@ -64,10 +64,6 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) btrfs_inode_size(ii), btrfs_inode_mode(ii)); break; - case BTRFS_INLINE_DATA_KEY: - p = btrfs_item_ptr(l, i, char); - printf("\t\tinline data %.*s\n", 10, p); - break; case BTRFS_DIR_ITEM_KEY: di = btrfs_item_ptr(l, i, struct btrfs_dir_item); print_dir_item(l->items + i, di); @@ -110,6 +106,12 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) btrfs_file_extent_offset(fi), btrfs_file_extent_num_blocks(fi)); break; + case BTRFS_BLOCK_GROUP_ITEM_KEY: + bi = btrfs_item_ptr(l, i, + struct btrfs_block_group_item); + printf("\t\tblock group used %Lu\n", + btrfs_block_group_used(bi)); + break; case BTRFS_DEV_ITEM_KEY: devi = btrfs_item_ptr(l, i, struct btrfs_device_item); printf("\t\tdev id %Lu namelen %u name %.*s\n",