diff --git a/cmds-check.c b/cmds-check.c index c5bca84b..7bb68ceb 100644 --- a/cmds-check.c +++ b/cmds-check.c @@ -4643,6 +4643,353 @@ out: return ret; } +static int pin_down_tree_blocks(struct btrfs_fs_info *fs_info, + struct extent_buffer *eb, int tree_root) +{ + struct extent_buffer *tmp; + struct btrfs_root_item *ri; + struct btrfs_key key; + u64 bytenr; + u32 leafsize; + int level = btrfs_header_level(eb); + int nritems; + int ret; + int i; + + btrfs_pin_extent(fs_info, eb->start, eb->len); + + leafsize = btrfs_super_leafsize(fs_info->super_copy); + nritems = btrfs_header_nritems(eb); + for (i = 0; i < nritems; i++) { + if (level == 0) { + btrfs_item_key_to_cpu(eb, &key, i); + if (key.type != BTRFS_ROOT_ITEM_KEY) + continue; + /* Skip the extent root and reloc roots */ + if (key.objectid == BTRFS_EXTENT_TREE_OBJECTID || + key.objectid == BTRFS_TREE_RELOC_OBJECTID || + key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) + continue; + ri = btrfs_item_ptr(eb, i, struct btrfs_root_item); + bytenr = btrfs_disk_root_bytenr(eb, ri); + + /* + * If at any point we start needing the real root we + * will have to build a stump root for the root we are + * in, but for now this doesn't actually use the root so + * just pass in extent_root. + */ + tmp = read_tree_block(fs_info->extent_root, bytenr, + leafsize, 0); + if (!tmp) { + fprintf(stderr, "Error reading root block\n"); + return -EIO; + } + ret = pin_down_tree_blocks(fs_info, tmp, 0); + free_extent_buffer(tmp); + if (ret) + return ret; + } else { + bytenr = btrfs_node_blockptr(eb, i); + + /* If we aren't the tree root don't read the block */ + if (level == 1 && !tree_root) { + btrfs_pin_extent(fs_info, bytenr, leafsize); + continue; + } + + tmp = read_tree_block(fs_info->extent_root, bytenr, + leafsize, 0); + if (!tmp) { + fprintf(stderr, "Error reading tree block\n"); + return -EIO; + } + ret = pin_down_tree_blocks(fs_info, tmp, tree_root); + free_extent_buffer(tmp); + if (ret) + return ret; + } + } + + return 0; +} + +static int pin_metadata_blocks(struct btrfs_fs_info *fs_info) +{ + int ret; + + ret = pin_down_tree_blocks(fs_info, fs_info->chunk_root->node, 0); + if (ret) + return ret; + + return pin_down_tree_blocks(fs_info, fs_info->tree_root->node, 1); +} + +static int reset_block_groups(struct btrfs_fs_info *fs_info) +{ + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_chunk *chunk; + struct btrfs_key key; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = 0; + key.type = BTRFS_CHUNK_ITEM_KEY; + key.offset = 0; + + ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0); + if (ret < 0) { + btrfs_free_path(path); + return ret; + } + + /* + * We do this in case the block groups were screwed up and had alloc + * bits that aren't actually set on the chunks. This happens with + * restored images every time and could happen in real life I guess. + */ + fs_info->avail_data_alloc_bits = 0; + fs_info->avail_metadata_alloc_bits = 0; + fs_info->avail_system_alloc_bits = 0; + + /* First we need to create the in-memory block groups */ + while (1) { + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + ret = btrfs_next_leaf(fs_info->chunk_root, path); + if (ret < 0) { + btrfs_free_path(path); + return ret; + } + if (ret) { + ret = 0; + break; + } + } + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.type != BTRFS_CHUNK_ITEM_KEY) { + path->slots[0]++; + continue; + } + + chunk = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_chunk); + btrfs_add_block_group(fs_info, 0, + btrfs_chunk_type(leaf, chunk), + key.objectid, key.offset, + btrfs_chunk_length(leaf, chunk)); + path->slots[0]++; + } + + btrfs_free_path(path); + return 0; +} + +static int reset_balance(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) +{ + struct btrfs_root *root = fs_info->tree_root; + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_key key; + int del_slot, del_nr = 0; + int ret; + int found = 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = BTRFS_BALANCE_OBJECTID; + key.type = BTRFS_BALANCE_ITEM_KEY; + key.offset = 0; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret) { + if (ret > 0) + ret = 0; + goto out; + } + + ret = btrfs_del_item(trans, root, path); + if (ret) + goto out; + btrfs_release_path(root, path); + + key.objectid = BTRFS_TREE_RELOC_OBJECTID; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = 0; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) + goto out; + while (1) { + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + if (!found) + break; + + if (del_nr) { + ret = btrfs_del_items(trans, root, path, + del_slot, del_nr); + del_nr = 0; + if (ret) + goto out; + } + key.offset++; + btrfs_release_path(root, path); + + found = 0; + ret = btrfs_search_slot(trans, root, &key, path, + -1, 1); + if (ret < 0) + goto out; + continue; + } + found = 1; + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.objectid > BTRFS_TREE_RELOC_OBJECTID) + break; + if (key.objectid != BTRFS_TREE_RELOC_OBJECTID) { + path->slots[0]++; + continue; + } + if (!del_nr) { + del_slot = path->slots[0]; + del_nr = 1; + } else { + del_nr++; + } + path->slots[0]++; + } + + if (del_nr) { + ret = btrfs_del_items(trans, root, path, del_slot, del_nr); + if (ret) + goto out; + } + btrfs_release_path(root, path); + + key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + root = btrfs_read_fs_root(fs_info, &key); + if (IS_ERR(root)) { + fprintf(stderr, "Error reading data reloc tree\n"); + return PTR_ERR(root); + } + root->track_dirty = 1; + if (root->last_trans != trans->transid) { + root->last_trans = trans->transid; + root->commit_root = root->node; + extent_buffer_get(root->node); + } + ret = btrfs_fsck_reinit_root(trans, root, 0); +out: + btrfs_free_path(path); + return ret; +} + +static int reinit_extent_tree(struct btrfs_fs_info *fs_info) +{ + struct btrfs_trans_handle *trans; + u64 start = 0; + int ret; + + /* + * The only reason we don't do this is because right now we're just + * walking the trees we find and pinning down their bytes, we don't look + * at any of the leaves. In order to do mixed groups we'd have to check + * the leaves of any fs roots and pin down the bytes for any file + * extents we find. Not hard but why do it if we don't have to? + */ + if (btrfs_fs_incompat(fs_info, BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)) { + fprintf(stderr, "We don't support re-initing the extent tree " + "for mixed block groups yet, please notify a btrfs " + "developer you want to do this so they can add this " + "functionality.\n"); + return -EINVAL; + } + + trans = btrfs_start_transaction(fs_info->extent_root, 1); + if (IS_ERR(trans)) { + fprintf(stderr, "Error starting transaction\n"); + return PTR_ERR(trans); + } + + /* + * first we need to walk all of the trees except the extent tree and pin + * down the bytes that are in use so we don't overwrite any existing + * metadata. + */ + ret = pin_metadata_blocks(fs_info); + if (ret) { + fprintf(stderr, "error pinning down used bytes\n"); + return ret; + } + + /* + * Need to drop all the block groups since we're going to recreate all + * of them again. + */ + btrfs_free_block_groups(fs_info); + ret = reset_block_groups(fs_info); + if (ret) { + fprintf(stderr, "error resetting the block groups\n"); + return ret; + } + + /* Ok we can allocate now, reinit the extent root */ + ret = btrfs_fsck_reinit_root(trans, fs_info->extent_root, 1); + if (ret) { + fprintf(stderr, "extent root initialization failed\n"); + /* + * When the transaction code is updated we should end the + * transaction, but for now progs only knows about commit so + * just return an error. + */ + return ret; + } + + ret = reset_balance(trans, fs_info); + if (ret) { + fprintf(stderr, "error reseting the pending balance\n"); + return ret; + } + + /* + * Now we have all the in-memory block groups setup so we can make + * allocations properly, and the metadata we care about is safe since we + * pinned all of it above. + */ + while (1) { + struct btrfs_block_group_cache *cache; + + cache = btrfs_lookup_first_block_group(fs_info, start); + if (!cache) + break; + start = cache->key.objectid + cache->key.offset; + ret = btrfs_insert_item(trans, fs_info->extent_root, + &cache->key, &cache->item, + sizeof(cache->item)); + if (ret) { + fprintf(stderr, "Error adding block group\n"); + return ret; + } + btrfs_extent_post_op(trans, fs_info->extent_root); + } + + /* + * Ok now we commit and run the normal fsck, which will add extent + * entries for all of the items it finds. + */ + return btrfs_commit_transaction(trans, fs_info->extent_root); +} + static struct option long_options[] = { { "super", 1, NULL, 's' }, { "repair", 0, NULL, 0 }, @@ -4674,6 +5021,7 @@ int cmd_check(int argc, char **argv) int repair = 0; int option_index = 0; int init_csum_tree = 0; + int init_extent_tree = 0; int rw = 0; while(1) { @@ -4702,6 +5050,10 @@ int cmd_check(int argc, char **argv) printf("Creating a new CRC tree\n"); init_csum_tree = 1; rw = 1; + } else if (option_index == 3) { + init_extent_tree = 1; + rw = 1; + repair = 1; } } @@ -4740,6 +5092,12 @@ int cmd_check(int argc, char **argv) root = info->fs_root; + if (init_extent_tree) { + printf("Creating a new extent tree\n"); + ret = reinit_extent_tree(info); + if (ret) + return ret; + } fprintf(stderr, "checking extents\n"); if (init_csum_tree) { struct btrfs_trans_handle *trans; @@ -4751,7 +5109,7 @@ int cmd_check(int argc, char **argv) return PTR_ERR(trans); } - ret = btrfs_fsck_reinit_root(trans, info->csum_root); + ret = btrfs_fsck_reinit_root(trans, info->csum_root, 0); if (ret) { fprintf(stderr, "crc root initialization failed\n"); return -EIO; diff --git a/ctree.c b/ctree.c index 4598665f..2a400ec7 100644 --- a/ctree.c +++ b/ctree.c @@ -138,7 +138,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, } int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans, - struct btrfs_root *root) + struct btrfs_root *root, int overwrite) { struct extent_buffer *c; struct extent_buffer *old = root->node; @@ -147,6 +147,11 @@ int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans, level = 0; + if (overwrite) { + c = old; + extent_buffer_get(c); + goto init; + } c = btrfs_alloc_free_block(trans, root, btrfs_level_size(root, 0), root->root_key.objectid, @@ -155,7 +160,7 @@ int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans, c = old; extent_buffer_get(c); } - +init: memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header)); btrfs_set_header_level(c, level); btrfs_set_header_bytenr(c, c->start); diff --git a/ctree.h b/ctree.h index cc145944..3fe14b0e 100644 --- a/ctree.h +++ b/ctree.h @@ -2130,6 +2130,9 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_free_block_groups(struct btrfs_fs_info *info); int btrfs_read_block_groups(struct btrfs_root *root); +struct btrfs_block_group_cache * +btrfs_add_block_group(struct btrfs_fs_info *fs_info, u64 bytes_used, u64 type, + u64 chunk_objectid, u64 chunk_offset, u64 size); int btrfs_make_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytes_used, u64 type, u64 chunk_objectid, u64 chunk_offset, @@ -2149,7 +2152,7 @@ int btrfs_check_leaf(struct btrfs_root *root, struct btrfs_disk_key *parent_key, struct extent_buffer *buf); int btrfs_fsck_reinit_root(struct btrfs_trans_handle *trans, - struct btrfs_root *root); + struct btrfs_root *root, int overwrite); void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, int level, int slot, u64 objectid); struct extent_buffer *read_node_slot(struct btrfs_root *root, diff --git a/extent-tree.c b/extent-tree.c index 9b381836..6c7229b8 100644 --- a/extent-tree.c +++ b/extent-tree.c @@ -3335,19 +3335,16 @@ error: return ret; } -int btrfs_make_block_group(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 bytes_used, - u64 type, u64 chunk_objectid, u64 chunk_offset, - u64 size) +struct btrfs_block_group_cache * +btrfs_add_block_group(struct btrfs_fs_info *fs_info, u64 bytes_used, u64 type, + u64 chunk_objectid, u64 chunk_offset, u64 size) { int ret; int bit = 0; - struct btrfs_root *extent_root; struct btrfs_block_group_cache *cache; struct extent_io_tree *block_group_cache; - extent_root = root->fs_info->extent_root; - block_group_cache = &root->fs_info->block_group_cache; + block_group_cache = &fs_info->block_group_cache; cache = kzalloc(sizeof(*cache), GFP_NOFS); BUG_ON(!cache); @@ -3360,7 +3357,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, cache->flags = type; btrfs_set_block_group_flags(&cache->item, type); - ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, + ret = update_space_info(fs_info, cache->flags, size, bytes_used, &cache->space_info); BUG_ON(ret); @@ -3371,13 +3368,29 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, set_state_private(block_group_cache, chunk_offset, (unsigned long)cache); + set_avail_alloc_bits(fs_info, type); + + return cache; +} + +int btrfs_make_block_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytes_used, + u64 type, u64 chunk_objectid, u64 chunk_offset, + u64 size) +{ + int ret; + struct btrfs_root *extent_root; + struct btrfs_block_group_cache *cache; + + cache = btrfs_add_block_group(root->fs_info, bytes_used, type, + chunk_objectid, chunk_offset, size); + extent_root = root->fs_info->extent_root; ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, sizeof(cache->item)); BUG_ON(ret); finish_current_insert(trans, extent_root); ret = del_pending_extents(trans, extent_root); - set_avail_alloc_bits(extent_root->fs_info, type); return 0; }