btrfs-progs: mkfs/rootdir: Use over-reserve method to make size estimate easier

Use an easier method to calculate the estimate device size for
mkfs.btrfs --rootdir.

The new method will over-estimate, but should ensure we won't encounter
ENOSPC.

It relies on the following data:
1) number of inodes -- for metadata chunk size
2) rounded up data size of each regular inode -- for data chunk size

Total meta chunk size = round_up(nr_inode * (PATH_MAX * 3 + sectorsize),
min_chunk_size) * profile_multiplier

PATH_MAX is the maximum size possible for INODE_REF/DIR_INDEX/DIR_ITEM.
Sectorsize is the maximum size possible for inline extent.
min_chunk_size is 8M for SINGLE, and 32M for DUP, get from
btrfs_alloc_chunk().
profile_multiplier is 1 for Single, 2 for DUP.

Total data chunk size is much easier.
Total data chunk size = round_up(total_data_usage, min_chunk_size) *
profile_multiplier

Total_data_usage is the sum of *rounded up* size of each regular inode
use.
min_chunk_size is 8M for SINGLE, 64M for DUP, get from btrfS_alloc_chunk().
Same profile_multiplier for meta.

This over-estimate calculate is, of course inacurrate, but since we will
later shrink the fs to its real usage, it doesn't matter much now.

Signed-off-by: Qu Wenruo <wqu@suse.com>
[ update comments ]
Signed-off-by: David Sterba <dsterba@suse.com>
master
Qu Wenruo 2017-10-19 10:11:07 +08:00 committed by David Sterba
parent c7bc72264a
commit 599a0abed5
3 changed files with 137 additions and 92 deletions

View File

@ -732,8 +732,6 @@ int main(int argc, char **argv)
int force_overwrite = 0;
char *source_dir = NULL;
int source_dir_set = 0;
u64 num_of_meta_chunks = 0;
u64 size_of_data = 0;
u64 source_dir_size = 0;
u64 min_dev_size;
int dev_cnt = 0;
@ -952,6 +950,34 @@ int main(int argc, char **argv)
min_dev_size = btrfs_min_dev_size(nodesize, mixed, metadata_profile,
data_profile);
/*
* Enlarge the destination file or create a new one, using the size
* calculated from source dir.
*
* This must be done before minimal device size checks.
*/
if (source_dir_set) {
fd = open(file, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP |
S_IWGRP | S_IROTH);
if (fd < 0) {
error("unable to open %s: %s", file, strerror(errno));
goto error;
}
source_dir_size = btrfs_mkfs_size_dir(source_dir, sectorsize,
min_dev_size, metadata_profile, data_profile);
if (block_count < source_dir_size)
block_count = source_dir_size;
ret = zero_output_file(fd, block_count);
if (ret) {
error("unable to zero the output file");
close(fd);
goto error;
}
/* our "device" is the new image file */
dev_block_count = block_count;
close(fd);
}
/* Check device/block_count after the nodesize is determined */
if (block_count && block_count < min_dev_size) {
error("size %llu is too small to make a usable filesystem",
@ -985,51 +1011,27 @@ int main(int argc, char **argv)
dev_cnt--;
if (!source_dir_set) {
/*
* open without O_EXCL so that the problem should not
* occur by the following processing.
* (btrfs_register_one_device() fails if O_EXCL is on)
*/
fd = open(file, O_RDWR);
if (fd < 0) {
error("unable to open %s: %s", file, strerror(errno));
goto error;
}
ret = btrfs_prepare_device(fd, file, &dev_block_count,
block_count,
(zero_end ? PREP_DEVICE_ZERO_END : 0) |
(discard ? PREP_DEVICE_DISCARD : 0) |
(verbose ? PREP_DEVICE_VERBOSE : 0));
if (ret) {
goto error;
}
if (block_count && block_count > dev_block_count) {
error("%s is smaller than requested size, expected %llu, found %llu",
file,
(unsigned long long)block_count,
(unsigned long long)dev_block_count);
goto error;
}
} else {
fd = open(file, O_CREAT | O_RDWR,
S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH);
if (fd < 0) {
error("unable to open %s: %s", file, strerror(errno));
goto error;
}
source_dir_size = btrfs_mkfs_size_dir(source_dir, sectorsize,
&num_of_meta_chunks, &size_of_data);
if(block_count < source_dir_size)
block_count = source_dir_size;
ret = zero_output_file(fd, block_count);
if (ret) {
error("unable to zero the output file");
goto error;
}
/* our "device" is the new image file */
dev_block_count = block_count;
/*
* Open without O_EXCL so that the problem should not occur by the
* following operation in kernel:
* (btrfs_register_one_device() fails if O_EXCL is on)
*/
fd = open(file, O_RDWR);
if (fd < 0) {
error("unable to open %s: %s", file, strerror(errno));
goto error;
}
ret = btrfs_prepare_device(fd, file, &dev_block_count, block_count,
(zero_end ? PREP_DEVICE_ZERO_END : 0) |
(discard ? PREP_DEVICE_DISCARD : 0) |
(verbose ? PREP_DEVICE_VERBOSE : 0));
if (ret)
goto error;
if (block_count && block_count > dev_block_count) {
error("%s is smaller than requested size, expected %llu, found %llu",
file, (unsigned long long)block_count,
(unsigned long long)dev_block_count);
goto error;
}
/* To create the first block group and chunk 0 in make_btrfs */
@ -1155,13 +1157,11 @@ int main(int argc, char **argv)
}
raid_groups:
if (!source_dir_set) {
ret = create_raid_groups(trans, root, data_profile,
metadata_profile, mixed, &allocation);
if (ret) {
error("unable to create raid groups: %d", ret);
goto out;
}
ret = create_raid_groups(trans, root, data_profile,
metadata_profile, mixed, &allocation);
if (ret) {
error("unable to create raid groups: %d", ret);
goto out;
}
ret = create_tree(trans, root, BTRFS_DATA_RELOC_TREE_OBJECTID);

View File

@ -33,19 +33,29 @@
#include "transaction.h"
#include "utils.h"
#include "mkfs/rootdir.h"
#include "mkfs/common.h"
#include "send-utils.h"
/*
* This ignores symlinks with unreadable targets and subdirs that can't
* be read. It's a best-effort to give a rough estimate of the size of
* a subdir. It doesn't guarantee that prepopulating btrfs from this
* tree won't still run out of space.
*/
static u64 global_total_size;
static u64 fs_block_size;
static u32 fs_block_size;
static u64 index_cnt = 2;
/*
* Size estimate will be done using the following data:
* 1) Number of inodes
* Since we will later shrink the fs, over-estimate is completely fine here
* as long as our estimate ensures we can populate the image without ENOSPC.
* So we only record how many inodes there are, and account the maximum
* space for each inode.
*
* 2) Data space for each (regular) inode
* To estimate data chunk size.
* Don't care if it can fit as an inline extent.
* Always round them up to sectorsize.
*/
static u64 ftw_meta_nr_inode;
static u64 ftw_data_size;
static int add_directory_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 objectid,
ino_t parent_inum, const char *name,
@ -685,53 +695,88 @@ out:
static int ftw_add_entry_size(const char *fpath, const struct stat *st,
int type)
{
if (type == FTW_F || type == FTW_D)
global_total_size += round_up(st->st_size, fs_block_size);
/*
* Failed to read the directory, mostly due to EPERM. Abort ASAP, so
* we don't need to populate the fs.
*/
if (type == FTW_DNR || type == FTW_NS)
return -EPERM;
if (S_ISREG(st->st_mode))
ftw_data_size += round_up(st->st_size, fs_block_size);
ftw_meta_nr_inode++;
return 0;
}
u64 btrfs_mkfs_size_dir(const char *dir_name, u64 sectorsize,
u64 *num_of_meta_chunks_ret, u64 *size_of_data_ret)
u64 btrfs_mkfs_size_dir(const char *dir_name, u32 sectorsize, u64 min_dev_size,
u64 meta_profile, u64 data_profile)
{
u64 dir_size = 0;
u64 total_size = 0;
int ret;
u64 default_chunk_size = SZ_8M;
u64 allocated_meta_size = SZ_8M;
u64 allocated_total_size = 20 * SZ_1M; /* 20MB */
u64 num_of_meta_chunks = 0;
u64 num_of_data_chunks = 0;
u64 num_of_allocated_meta_chunks =
allocated_meta_size / default_chunk_size;
global_total_size = 0;
u64 meta_size = 0; /* Based on @ftw_meta_nr_inode */
u64 meta_chunk_size = 0; /* Based on @meta_size */
u64 data_chunk_size = 0; /* Based on @ftw_data_size */
u64 meta_threshold = SZ_8M;
u64 data_threshold = SZ_8M;
float data_multipler = 1;
float meta_multipler = 1;
fs_block_size = sectorsize;
ftw_data_size = 0;
ftw_meta_nr_inode = 0;
ret = ftw(dir_name, ftw_add_entry_size, 10);
dir_size = global_total_size;
if (ret < 0) {
error("ftw subdir walk of %s failed: %s", dir_name,
strerror(errno));
exit(1);
}
num_of_data_chunks = (dir_size + default_chunk_size - 1) /
default_chunk_size;
num_of_meta_chunks = (dir_size / 2) / default_chunk_size;
if (((dir_size / 2) % default_chunk_size) != 0)
num_of_meta_chunks++;
if (num_of_meta_chunks <= num_of_allocated_meta_chunks)
num_of_meta_chunks = 0;
else
num_of_meta_chunks -= num_of_allocated_meta_chunks;
/*
* Maximum metadata useage for every inode, which will be PATH_MAX
* for the following items:
* 1) DIR_ITEM
* 2) DIR_INDEX
* 3) INODE_REF
*
* Plus possible inline extent size, which is sectorsize.
*
* And finally, allow metadata usage to increase with data size.
* Follow the old kernel 8:1 data:meta ratio.
* This is especially important for --rootdir, as the file extent size
* upper limit is 1M, instead of 128M in kernel.
* This can bump meta usage easily.
*/
meta_size = ftw_meta_nr_inode * (PATH_MAX * 3 + sectorsize) +
ftw_data_size / 8;
total_size = allocated_total_size +
(num_of_data_chunks * default_chunk_size) +
(num_of_meta_chunks * default_chunk_size);
/* Minimal chunk size from btrfs_alloc_chunk(). */
if (meta_profile & BTRFS_BLOCK_GROUP_DUP) {
meta_threshold = SZ_32M;
meta_multipler = 2;
}
if (data_profile & BTRFS_BLOCK_GROUP_DUP) {
data_threshold = SZ_64M;
data_multipler = 2;
}
*num_of_meta_chunks_ret = num_of_meta_chunks;
*size_of_data_ret = num_of_data_chunks * default_chunk_size;
/*
* Only when the usage is larger than the minimal chunk size (threshold)
* we need to allocate new chunk, or the initial chunk in the image is
* large enough.
*/
if (meta_size > meta_threshold)
meta_chunk_size = (round_up(meta_size, meta_threshold) -
meta_threshold) * meta_multipler;
if (ftw_data_size > data_threshold)
data_chunk_size = (round_up(ftw_data_size, data_threshold) -
data_threshold) * data_multipler;
total_size = data_chunk_size + meta_chunk_size + min_dev_size;
return total_size;
}

View File

@ -30,7 +30,7 @@ struct directory_name_entry {
int btrfs_mkfs_fill_dir(const char *source_dir, struct btrfs_root *root,
bool verbose);
u64 btrfs_mkfs_size_dir(const char *dir_name, u64 sectorsize,
u64 *num_of_meta_chunks_ret, u64 *size_of_data_ret);
u64 btrfs_mkfs_size_dir(const char *dir_name, u32 sectorsize, u64 min_dev_size,
u64 meta_profile, u64 data_profile);
#endif