forked from Mirrors/btrfs-progs
btrfs-progs: mkfs/rootdir: Use over-reserve method to make size estimate easier
Use an easier method to calculate the estimate device size for mkfs.btrfs --rootdir. The new method will over-estimate, but should ensure we won't encounter ENOSPC. It relies on the following data: 1) number of inodes -- for metadata chunk size 2) rounded up data size of each regular inode -- for data chunk size Total meta chunk size = round_up(nr_inode * (PATH_MAX * 3 + sectorsize), min_chunk_size) * profile_multiplier PATH_MAX is the maximum size possible for INODE_REF/DIR_INDEX/DIR_ITEM. Sectorsize is the maximum size possible for inline extent. min_chunk_size is 8M for SINGLE, and 32M for DUP, get from btrfs_alloc_chunk(). profile_multiplier is 1 for Single, 2 for DUP. Total data chunk size is much easier. Total data chunk size = round_up(total_data_usage, min_chunk_size) * profile_multiplier Total_data_usage is the sum of *rounded up* size of each regular inode use. min_chunk_size is 8M for SINGLE, 64M for DUP, get from btrfS_alloc_chunk(). Same profile_multiplier for meta. This over-estimate calculate is, of course inacurrate, but since we will later shrink the fs to its real usage, it doesn't matter much now. Signed-off-by: Qu Wenruo <wqu@suse.com> [ update comments ] Signed-off-by: David Sterba <dsterba@suse.com>master
parent
c7bc72264a
commit
599a0abed5
108
mkfs/main.c
108
mkfs/main.c
|
@ -732,8 +732,6 @@ int main(int argc, char **argv)
|
||||||
int force_overwrite = 0;
|
int force_overwrite = 0;
|
||||||
char *source_dir = NULL;
|
char *source_dir = NULL;
|
||||||
int source_dir_set = 0;
|
int source_dir_set = 0;
|
||||||
u64 num_of_meta_chunks = 0;
|
|
||||||
u64 size_of_data = 0;
|
|
||||||
u64 source_dir_size = 0;
|
u64 source_dir_size = 0;
|
||||||
u64 min_dev_size;
|
u64 min_dev_size;
|
||||||
int dev_cnt = 0;
|
int dev_cnt = 0;
|
||||||
|
@ -952,6 +950,34 @@ int main(int argc, char **argv)
|
||||||
|
|
||||||
min_dev_size = btrfs_min_dev_size(nodesize, mixed, metadata_profile,
|
min_dev_size = btrfs_min_dev_size(nodesize, mixed, metadata_profile,
|
||||||
data_profile);
|
data_profile);
|
||||||
|
/*
|
||||||
|
* Enlarge the destination file or create a new one, using the size
|
||||||
|
* calculated from source dir.
|
||||||
|
*
|
||||||
|
* This must be done before minimal device size checks.
|
||||||
|
*/
|
||||||
|
if (source_dir_set) {
|
||||||
|
fd = open(file, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP |
|
||||||
|
S_IWGRP | S_IROTH);
|
||||||
|
if (fd < 0) {
|
||||||
|
error("unable to open %s: %s", file, strerror(errno));
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
source_dir_size = btrfs_mkfs_size_dir(source_dir, sectorsize,
|
||||||
|
min_dev_size, metadata_profile, data_profile);
|
||||||
|
if (block_count < source_dir_size)
|
||||||
|
block_count = source_dir_size;
|
||||||
|
ret = zero_output_file(fd, block_count);
|
||||||
|
if (ret) {
|
||||||
|
error("unable to zero the output file");
|
||||||
|
close(fd);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
/* our "device" is the new image file */
|
||||||
|
dev_block_count = block_count;
|
||||||
|
close(fd);
|
||||||
|
}
|
||||||
/* Check device/block_count after the nodesize is determined */
|
/* Check device/block_count after the nodesize is determined */
|
||||||
if (block_count && block_count < min_dev_size) {
|
if (block_count && block_count < min_dev_size) {
|
||||||
error("size %llu is too small to make a usable filesystem",
|
error("size %llu is too small to make a usable filesystem",
|
||||||
|
@ -985,51 +1011,27 @@ int main(int argc, char **argv)
|
||||||
|
|
||||||
dev_cnt--;
|
dev_cnt--;
|
||||||
|
|
||||||
if (!source_dir_set) {
|
/*
|
||||||
/*
|
* Open without O_EXCL so that the problem should not occur by the
|
||||||
* open without O_EXCL so that the problem should not
|
* following operation in kernel:
|
||||||
* occur by the following processing.
|
* (btrfs_register_one_device() fails if O_EXCL is on)
|
||||||
* (btrfs_register_one_device() fails if O_EXCL is on)
|
*/
|
||||||
*/
|
fd = open(file, O_RDWR);
|
||||||
fd = open(file, O_RDWR);
|
if (fd < 0) {
|
||||||
if (fd < 0) {
|
error("unable to open %s: %s", file, strerror(errno));
|
||||||
error("unable to open %s: %s", file, strerror(errno));
|
goto error;
|
||||||
goto error;
|
}
|
||||||
}
|
ret = btrfs_prepare_device(fd, file, &dev_block_count, block_count,
|
||||||
ret = btrfs_prepare_device(fd, file, &dev_block_count,
|
(zero_end ? PREP_DEVICE_ZERO_END : 0) |
|
||||||
block_count,
|
(discard ? PREP_DEVICE_DISCARD : 0) |
|
||||||
(zero_end ? PREP_DEVICE_ZERO_END : 0) |
|
(verbose ? PREP_DEVICE_VERBOSE : 0));
|
||||||
(discard ? PREP_DEVICE_DISCARD : 0) |
|
if (ret)
|
||||||
(verbose ? PREP_DEVICE_VERBOSE : 0));
|
goto error;
|
||||||
if (ret) {
|
if (block_count && block_count > dev_block_count) {
|
||||||
goto error;
|
error("%s is smaller than requested size, expected %llu, found %llu",
|
||||||
}
|
file, (unsigned long long)block_count,
|
||||||
if (block_count && block_count > dev_block_count) {
|
(unsigned long long)dev_block_count);
|
||||||
error("%s is smaller than requested size, expected %llu, found %llu",
|
goto error;
|
||||||
file,
|
|
||||||
(unsigned long long)block_count,
|
|
||||||
(unsigned long long)dev_block_count);
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
fd = open(file, O_CREAT | O_RDWR,
|
|
||||||
S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH);
|
|
||||||
if (fd < 0) {
|
|
||||||
error("unable to open %s: %s", file, strerror(errno));
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
|
|
||||||
source_dir_size = btrfs_mkfs_size_dir(source_dir, sectorsize,
|
|
||||||
&num_of_meta_chunks, &size_of_data);
|
|
||||||
if(block_count < source_dir_size)
|
|
||||||
block_count = source_dir_size;
|
|
||||||
ret = zero_output_file(fd, block_count);
|
|
||||||
if (ret) {
|
|
||||||
error("unable to zero the output file");
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
/* our "device" is the new image file */
|
|
||||||
dev_block_count = block_count;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* To create the first block group and chunk 0 in make_btrfs */
|
/* To create the first block group and chunk 0 in make_btrfs */
|
||||||
|
@ -1155,13 +1157,11 @@ int main(int argc, char **argv)
|
||||||
}
|
}
|
||||||
|
|
||||||
raid_groups:
|
raid_groups:
|
||||||
if (!source_dir_set) {
|
ret = create_raid_groups(trans, root, data_profile,
|
||||||
ret = create_raid_groups(trans, root, data_profile,
|
metadata_profile, mixed, &allocation);
|
||||||
metadata_profile, mixed, &allocation);
|
if (ret) {
|
||||||
if (ret) {
|
error("unable to create raid groups: %d", ret);
|
||||||
error("unable to create raid groups: %d", ret);
|
goto out;
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = create_tree(trans, root, BTRFS_DATA_RELOC_TREE_OBJECTID);
|
ret = create_tree(trans, root, BTRFS_DATA_RELOC_TREE_OBJECTID);
|
||||||
|
|
117
mkfs/rootdir.c
117
mkfs/rootdir.c
|
@ -33,19 +33,29 @@
|
||||||
#include "transaction.h"
|
#include "transaction.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "mkfs/rootdir.h"
|
#include "mkfs/rootdir.h"
|
||||||
|
#include "mkfs/common.h"
|
||||||
#include "send-utils.h"
|
#include "send-utils.h"
|
||||||
|
|
||||||
/*
|
static u32 fs_block_size;
|
||||||
* This ignores symlinks with unreadable targets and subdirs that can't
|
|
||||||
* be read. It's a best-effort to give a rough estimate of the size of
|
|
||||||
* a subdir. It doesn't guarantee that prepopulating btrfs from this
|
|
||||||
* tree won't still run out of space.
|
|
||||||
*/
|
|
||||||
static u64 global_total_size;
|
|
||||||
static u64 fs_block_size;
|
|
||||||
|
|
||||||
static u64 index_cnt = 2;
|
static u64 index_cnt = 2;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Size estimate will be done using the following data:
|
||||||
|
* 1) Number of inodes
|
||||||
|
* Since we will later shrink the fs, over-estimate is completely fine here
|
||||||
|
* as long as our estimate ensures we can populate the image without ENOSPC.
|
||||||
|
* So we only record how many inodes there are, and account the maximum
|
||||||
|
* space for each inode.
|
||||||
|
*
|
||||||
|
* 2) Data space for each (regular) inode
|
||||||
|
* To estimate data chunk size.
|
||||||
|
* Don't care if it can fit as an inline extent.
|
||||||
|
* Always round them up to sectorsize.
|
||||||
|
*/
|
||||||
|
static u64 ftw_meta_nr_inode;
|
||||||
|
static u64 ftw_data_size;
|
||||||
|
|
||||||
static int add_directory_items(struct btrfs_trans_handle *trans,
|
static int add_directory_items(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root, u64 objectid,
|
struct btrfs_root *root, u64 objectid,
|
||||||
ino_t parent_inum, const char *name,
|
ino_t parent_inum, const char *name,
|
||||||
|
@ -685,53 +695,88 @@ out:
|
||||||
static int ftw_add_entry_size(const char *fpath, const struct stat *st,
|
static int ftw_add_entry_size(const char *fpath, const struct stat *st,
|
||||||
int type)
|
int type)
|
||||||
{
|
{
|
||||||
if (type == FTW_F || type == FTW_D)
|
/*
|
||||||
global_total_size += round_up(st->st_size, fs_block_size);
|
* Failed to read the directory, mostly due to EPERM. Abort ASAP, so
|
||||||
|
* we don't need to populate the fs.
|
||||||
|
*/
|
||||||
|
if (type == FTW_DNR || type == FTW_NS)
|
||||||
|
return -EPERM;
|
||||||
|
|
||||||
|
if (S_ISREG(st->st_mode))
|
||||||
|
ftw_data_size += round_up(st->st_size, fs_block_size);
|
||||||
|
ftw_meta_nr_inode++;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 btrfs_mkfs_size_dir(const char *dir_name, u64 sectorsize,
|
u64 btrfs_mkfs_size_dir(const char *dir_name, u32 sectorsize, u64 min_dev_size,
|
||||||
u64 *num_of_meta_chunks_ret, u64 *size_of_data_ret)
|
u64 meta_profile, u64 data_profile)
|
||||||
{
|
{
|
||||||
u64 dir_size = 0;
|
|
||||||
u64 total_size = 0;
|
u64 total_size = 0;
|
||||||
int ret;
|
int ret;
|
||||||
u64 default_chunk_size = SZ_8M;
|
|
||||||
u64 allocated_meta_size = SZ_8M;
|
|
||||||
u64 allocated_total_size = 20 * SZ_1M; /* 20MB */
|
|
||||||
u64 num_of_meta_chunks = 0;
|
|
||||||
u64 num_of_data_chunks = 0;
|
|
||||||
u64 num_of_allocated_meta_chunks =
|
|
||||||
allocated_meta_size / default_chunk_size;
|
|
||||||
|
|
||||||
global_total_size = 0;
|
u64 meta_size = 0; /* Based on @ftw_meta_nr_inode */
|
||||||
|
u64 meta_chunk_size = 0; /* Based on @meta_size */
|
||||||
|
u64 data_chunk_size = 0; /* Based on @ftw_data_size */
|
||||||
|
|
||||||
|
u64 meta_threshold = SZ_8M;
|
||||||
|
u64 data_threshold = SZ_8M;
|
||||||
|
|
||||||
|
float data_multipler = 1;
|
||||||
|
float meta_multipler = 1;
|
||||||
|
|
||||||
fs_block_size = sectorsize;
|
fs_block_size = sectorsize;
|
||||||
|
ftw_data_size = 0;
|
||||||
|
ftw_meta_nr_inode = 0;
|
||||||
ret = ftw(dir_name, ftw_add_entry_size, 10);
|
ret = ftw(dir_name, ftw_add_entry_size, 10);
|
||||||
dir_size = global_total_size;
|
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
error("ftw subdir walk of %s failed: %s", dir_name,
|
error("ftw subdir walk of %s failed: %s", dir_name,
|
||||||
strerror(errno));
|
strerror(errno));
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
num_of_data_chunks = (dir_size + default_chunk_size - 1) /
|
|
||||||
default_chunk_size;
|
|
||||||
|
|
||||||
num_of_meta_chunks = (dir_size / 2) / default_chunk_size;
|
/*
|
||||||
if (((dir_size / 2) % default_chunk_size) != 0)
|
* Maximum metadata useage for every inode, which will be PATH_MAX
|
||||||
num_of_meta_chunks++;
|
* for the following items:
|
||||||
if (num_of_meta_chunks <= num_of_allocated_meta_chunks)
|
* 1) DIR_ITEM
|
||||||
num_of_meta_chunks = 0;
|
* 2) DIR_INDEX
|
||||||
else
|
* 3) INODE_REF
|
||||||
num_of_meta_chunks -= num_of_allocated_meta_chunks;
|
*
|
||||||
|
* Plus possible inline extent size, which is sectorsize.
|
||||||
|
*
|
||||||
|
* And finally, allow metadata usage to increase with data size.
|
||||||
|
* Follow the old kernel 8:1 data:meta ratio.
|
||||||
|
* This is especially important for --rootdir, as the file extent size
|
||||||
|
* upper limit is 1M, instead of 128M in kernel.
|
||||||
|
* This can bump meta usage easily.
|
||||||
|
*/
|
||||||
|
meta_size = ftw_meta_nr_inode * (PATH_MAX * 3 + sectorsize) +
|
||||||
|
ftw_data_size / 8;
|
||||||
|
|
||||||
total_size = allocated_total_size +
|
/* Minimal chunk size from btrfs_alloc_chunk(). */
|
||||||
(num_of_data_chunks * default_chunk_size) +
|
if (meta_profile & BTRFS_BLOCK_GROUP_DUP) {
|
||||||
(num_of_meta_chunks * default_chunk_size);
|
meta_threshold = SZ_32M;
|
||||||
|
meta_multipler = 2;
|
||||||
|
}
|
||||||
|
if (data_profile & BTRFS_BLOCK_GROUP_DUP) {
|
||||||
|
data_threshold = SZ_64M;
|
||||||
|
data_multipler = 2;
|
||||||
|
}
|
||||||
|
|
||||||
*num_of_meta_chunks_ret = num_of_meta_chunks;
|
/*
|
||||||
*size_of_data_ret = num_of_data_chunks * default_chunk_size;
|
* Only when the usage is larger than the minimal chunk size (threshold)
|
||||||
|
* we need to allocate new chunk, or the initial chunk in the image is
|
||||||
|
* large enough.
|
||||||
|
*/
|
||||||
|
if (meta_size > meta_threshold)
|
||||||
|
meta_chunk_size = (round_up(meta_size, meta_threshold) -
|
||||||
|
meta_threshold) * meta_multipler;
|
||||||
|
if (ftw_data_size > data_threshold)
|
||||||
|
data_chunk_size = (round_up(ftw_data_size, data_threshold) -
|
||||||
|
data_threshold) * data_multipler;
|
||||||
|
|
||||||
|
total_size = data_chunk_size + meta_chunk_size + min_dev_size;
|
||||||
return total_size;
|
return total_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -30,7 +30,7 @@ struct directory_name_entry {
|
||||||
|
|
||||||
int btrfs_mkfs_fill_dir(const char *source_dir, struct btrfs_root *root,
|
int btrfs_mkfs_fill_dir(const char *source_dir, struct btrfs_root *root,
|
||||||
bool verbose);
|
bool verbose);
|
||||||
u64 btrfs_mkfs_size_dir(const char *dir_name, u64 sectorsize,
|
u64 btrfs_mkfs_size_dir(const char *dir_name, u32 sectorsize, u64 min_dev_size,
|
||||||
u64 *num_of_meta_chunks_ret, u64 *size_of_data_ret);
|
u64 meta_profile, u64 data_profile);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue