btrfs-progs: mkfs/rootdir: Use over-reserve method to make size estimate easier

Use an easier method to calculate the estimate device size for
mkfs.btrfs --rootdir.

The new method will over-estimate, but should ensure we won't encounter
ENOSPC.

It relies on the following data:
1) number of inodes -- for metadata chunk size
2) rounded up data size of each regular inode -- for data chunk size

Total meta chunk size = round_up(nr_inode * (PATH_MAX * 3 + sectorsize),
min_chunk_size) * profile_multiplier

PATH_MAX is the maximum size possible for INODE_REF/DIR_INDEX/DIR_ITEM.
Sectorsize is the maximum size possible for inline extent.
min_chunk_size is 8M for SINGLE, and 32M for DUP, get from
btrfs_alloc_chunk().
profile_multiplier is 1 for Single, 2 for DUP.

Total data chunk size is much easier.
Total data chunk size = round_up(total_data_usage, min_chunk_size) *
profile_multiplier

Total_data_usage is the sum of *rounded up* size of each regular inode
use.
min_chunk_size is 8M for SINGLE, 64M for DUP, get from btrfS_alloc_chunk().
Same profile_multiplier for meta.

This over-estimate calculate is, of course inacurrate, but since we will
later shrink the fs to its real usage, it doesn't matter much now.

Signed-off-by: Qu Wenruo <wqu@suse.com>
[ update comments ]
Signed-off-by: David Sterba <dsterba@suse.com>
master
Qu Wenruo 2017-10-19 10:11:07 +08:00 committed by David Sterba
parent c7bc72264a
commit 599a0abed5
3 changed files with 137 additions and 92 deletions

View File

@ -732,8 +732,6 @@ int main(int argc, char **argv)
int force_overwrite = 0; int force_overwrite = 0;
char *source_dir = NULL; char *source_dir = NULL;
int source_dir_set = 0; int source_dir_set = 0;
u64 num_of_meta_chunks = 0;
u64 size_of_data = 0;
u64 source_dir_size = 0; u64 source_dir_size = 0;
u64 min_dev_size; u64 min_dev_size;
int dev_cnt = 0; int dev_cnt = 0;
@ -952,6 +950,34 @@ int main(int argc, char **argv)
min_dev_size = btrfs_min_dev_size(nodesize, mixed, metadata_profile, min_dev_size = btrfs_min_dev_size(nodesize, mixed, metadata_profile,
data_profile); data_profile);
/*
* Enlarge the destination file or create a new one, using the size
* calculated from source dir.
*
* This must be done before minimal device size checks.
*/
if (source_dir_set) {
fd = open(file, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP |
S_IWGRP | S_IROTH);
if (fd < 0) {
error("unable to open %s: %s", file, strerror(errno));
goto error;
}
source_dir_size = btrfs_mkfs_size_dir(source_dir, sectorsize,
min_dev_size, metadata_profile, data_profile);
if (block_count < source_dir_size)
block_count = source_dir_size;
ret = zero_output_file(fd, block_count);
if (ret) {
error("unable to zero the output file");
close(fd);
goto error;
}
/* our "device" is the new image file */
dev_block_count = block_count;
close(fd);
}
/* Check device/block_count after the nodesize is determined */ /* Check device/block_count after the nodesize is determined */
if (block_count && block_count < min_dev_size) { if (block_count && block_count < min_dev_size) {
error("size %llu is too small to make a usable filesystem", error("size %llu is too small to make a usable filesystem",
@ -985,51 +1011,27 @@ int main(int argc, char **argv)
dev_cnt--; dev_cnt--;
if (!source_dir_set) { /*
/* * Open without O_EXCL so that the problem should not occur by the
* open without O_EXCL so that the problem should not * following operation in kernel:
* occur by the following processing. * (btrfs_register_one_device() fails if O_EXCL is on)
* (btrfs_register_one_device() fails if O_EXCL is on) */
*/ fd = open(file, O_RDWR);
fd = open(file, O_RDWR); if (fd < 0) {
if (fd < 0) { error("unable to open %s: %s", file, strerror(errno));
error("unable to open %s: %s", file, strerror(errno)); goto error;
goto error; }
} ret = btrfs_prepare_device(fd, file, &dev_block_count, block_count,
ret = btrfs_prepare_device(fd, file, &dev_block_count, (zero_end ? PREP_DEVICE_ZERO_END : 0) |
block_count, (discard ? PREP_DEVICE_DISCARD : 0) |
(zero_end ? PREP_DEVICE_ZERO_END : 0) | (verbose ? PREP_DEVICE_VERBOSE : 0));
(discard ? PREP_DEVICE_DISCARD : 0) | if (ret)
(verbose ? PREP_DEVICE_VERBOSE : 0)); goto error;
if (ret) { if (block_count && block_count > dev_block_count) {
goto error; error("%s is smaller than requested size, expected %llu, found %llu",
} file, (unsigned long long)block_count,
if (block_count && block_count > dev_block_count) { (unsigned long long)dev_block_count);
error("%s is smaller than requested size, expected %llu, found %llu", goto error;
file,
(unsigned long long)block_count,
(unsigned long long)dev_block_count);
goto error;
}
} else {
fd = open(file, O_CREAT | O_RDWR,
S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH);
if (fd < 0) {
error("unable to open %s: %s", file, strerror(errno));
goto error;
}
source_dir_size = btrfs_mkfs_size_dir(source_dir, sectorsize,
&num_of_meta_chunks, &size_of_data);
if(block_count < source_dir_size)
block_count = source_dir_size;
ret = zero_output_file(fd, block_count);
if (ret) {
error("unable to zero the output file");
goto error;
}
/* our "device" is the new image file */
dev_block_count = block_count;
} }
/* To create the first block group and chunk 0 in make_btrfs */ /* To create the first block group and chunk 0 in make_btrfs */
@ -1155,13 +1157,11 @@ int main(int argc, char **argv)
} }
raid_groups: raid_groups:
if (!source_dir_set) { ret = create_raid_groups(trans, root, data_profile,
ret = create_raid_groups(trans, root, data_profile, metadata_profile, mixed, &allocation);
metadata_profile, mixed, &allocation); if (ret) {
if (ret) { error("unable to create raid groups: %d", ret);
error("unable to create raid groups: %d", ret); goto out;
goto out;
}
} }
ret = create_tree(trans, root, BTRFS_DATA_RELOC_TREE_OBJECTID); ret = create_tree(trans, root, BTRFS_DATA_RELOC_TREE_OBJECTID);

View File

@ -33,19 +33,29 @@
#include "transaction.h" #include "transaction.h"
#include "utils.h" #include "utils.h"
#include "mkfs/rootdir.h" #include "mkfs/rootdir.h"
#include "mkfs/common.h"
#include "send-utils.h" #include "send-utils.h"
/* static u32 fs_block_size;
* This ignores symlinks with unreadable targets and subdirs that can't
* be read. It's a best-effort to give a rough estimate of the size of
* a subdir. It doesn't guarantee that prepopulating btrfs from this
* tree won't still run out of space.
*/
static u64 global_total_size;
static u64 fs_block_size;
static u64 index_cnt = 2; static u64 index_cnt = 2;
/*
* Size estimate will be done using the following data:
* 1) Number of inodes
* Since we will later shrink the fs, over-estimate is completely fine here
* as long as our estimate ensures we can populate the image without ENOSPC.
* So we only record how many inodes there are, and account the maximum
* space for each inode.
*
* 2) Data space for each (regular) inode
* To estimate data chunk size.
* Don't care if it can fit as an inline extent.
* Always round them up to sectorsize.
*/
static u64 ftw_meta_nr_inode;
static u64 ftw_data_size;
static int add_directory_items(struct btrfs_trans_handle *trans, static int add_directory_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 objectid, struct btrfs_root *root, u64 objectid,
ino_t parent_inum, const char *name, ino_t parent_inum, const char *name,
@ -685,53 +695,88 @@ out:
static int ftw_add_entry_size(const char *fpath, const struct stat *st, static int ftw_add_entry_size(const char *fpath, const struct stat *st,
int type) int type)
{ {
if (type == FTW_F || type == FTW_D) /*
global_total_size += round_up(st->st_size, fs_block_size); * Failed to read the directory, mostly due to EPERM. Abort ASAP, so
* we don't need to populate the fs.
*/
if (type == FTW_DNR || type == FTW_NS)
return -EPERM;
if (S_ISREG(st->st_mode))
ftw_data_size += round_up(st->st_size, fs_block_size);
ftw_meta_nr_inode++;
return 0; return 0;
} }
u64 btrfs_mkfs_size_dir(const char *dir_name, u64 sectorsize, u64 btrfs_mkfs_size_dir(const char *dir_name, u32 sectorsize, u64 min_dev_size,
u64 *num_of_meta_chunks_ret, u64 *size_of_data_ret) u64 meta_profile, u64 data_profile)
{ {
u64 dir_size = 0;
u64 total_size = 0; u64 total_size = 0;
int ret; int ret;
u64 default_chunk_size = SZ_8M;
u64 allocated_meta_size = SZ_8M;
u64 allocated_total_size = 20 * SZ_1M; /* 20MB */
u64 num_of_meta_chunks = 0;
u64 num_of_data_chunks = 0;
u64 num_of_allocated_meta_chunks =
allocated_meta_size / default_chunk_size;
global_total_size = 0; u64 meta_size = 0; /* Based on @ftw_meta_nr_inode */
u64 meta_chunk_size = 0; /* Based on @meta_size */
u64 data_chunk_size = 0; /* Based on @ftw_data_size */
u64 meta_threshold = SZ_8M;
u64 data_threshold = SZ_8M;
float data_multipler = 1;
float meta_multipler = 1;
fs_block_size = sectorsize; fs_block_size = sectorsize;
ftw_data_size = 0;
ftw_meta_nr_inode = 0;
ret = ftw(dir_name, ftw_add_entry_size, 10); ret = ftw(dir_name, ftw_add_entry_size, 10);
dir_size = global_total_size;
if (ret < 0) { if (ret < 0) {
error("ftw subdir walk of %s failed: %s", dir_name, error("ftw subdir walk of %s failed: %s", dir_name,
strerror(errno)); strerror(errno));
exit(1); exit(1);
} }
num_of_data_chunks = (dir_size + default_chunk_size - 1) /
default_chunk_size;
num_of_meta_chunks = (dir_size / 2) / default_chunk_size; /*
if (((dir_size / 2) % default_chunk_size) != 0) * Maximum metadata useage for every inode, which will be PATH_MAX
num_of_meta_chunks++; * for the following items:
if (num_of_meta_chunks <= num_of_allocated_meta_chunks) * 1) DIR_ITEM
num_of_meta_chunks = 0; * 2) DIR_INDEX
else * 3) INODE_REF
num_of_meta_chunks -= num_of_allocated_meta_chunks; *
* Plus possible inline extent size, which is sectorsize.
*
* And finally, allow metadata usage to increase with data size.
* Follow the old kernel 8:1 data:meta ratio.
* This is especially important for --rootdir, as the file extent size
* upper limit is 1M, instead of 128M in kernel.
* This can bump meta usage easily.
*/
meta_size = ftw_meta_nr_inode * (PATH_MAX * 3 + sectorsize) +
ftw_data_size / 8;
total_size = allocated_total_size + /* Minimal chunk size from btrfs_alloc_chunk(). */
(num_of_data_chunks * default_chunk_size) + if (meta_profile & BTRFS_BLOCK_GROUP_DUP) {
(num_of_meta_chunks * default_chunk_size); meta_threshold = SZ_32M;
meta_multipler = 2;
}
if (data_profile & BTRFS_BLOCK_GROUP_DUP) {
data_threshold = SZ_64M;
data_multipler = 2;
}
*num_of_meta_chunks_ret = num_of_meta_chunks; /*
*size_of_data_ret = num_of_data_chunks * default_chunk_size; * Only when the usage is larger than the minimal chunk size (threshold)
* we need to allocate new chunk, or the initial chunk in the image is
* large enough.
*/
if (meta_size > meta_threshold)
meta_chunk_size = (round_up(meta_size, meta_threshold) -
meta_threshold) * meta_multipler;
if (ftw_data_size > data_threshold)
data_chunk_size = (round_up(ftw_data_size, data_threshold) -
data_threshold) * data_multipler;
total_size = data_chunk_size + meta_chunk_size + min_dev_size;
return total_size; return total_size;
} }

View File

@ -30,7 +30,7 @@ struct directory_name_entry {
int btrfs_mkfs_fill_dir(const char *source_dir, struct btrfs_root *root, int btrfs_mkfs_fill_dir(const char *source_dir, struct btrfs_root *root,
bool verbose); bool verbose);
u64 btrfs_mkfs_size_dir(const char *dir_name, u64 sectorsize, u64 btrfs_mkfs_size_dir(const char *dir_name, u32 sectorsize, u64 min_dev_size,
u64 *num_of_meta_chunks_ret, u64 *size_of_data_ret); u64 meta_profile, u64 data_profile);
#endif #endif