btrfs-progs: mkfs/rootdir: Use over-reserve method to make size estimate easier

Use an easier method to calculate the estimate device size for mkfs.btrfs --rootdir. The new method will over-estimate, but should ensure we won't encounter ENOSPC. It relies on the following data: 1) number of inodes -- for metadata chunk size 2) rounded up data size of each regular inode -- for data chunk size Total meta chunk size = round_up(nr_inode * (PATH_MAX * 3 + sectorsize), min_chunk_size) * profile_multiplier PATH_MAX is the maximum size possible for INODE_REF/DIR_INDEX/DIR_ITEM. Sectorsize is the maximum size possible for inline extent. min_chunk_size is 8M for SINGLE, and 32M for DUP, get from btrfs_alloc_chunk(). profile_multiplier is 1 for Single, 2 for DUP. Total data chunk size is much easier. Total data chunk size = round_up(total_data_usage, min_chunk_size) * profile_multiplier Total_data_usage is the sum of *rounded up* size of each regular inode use. min_chunk_size is 8M for SINGLE, 64M for DUP, get from btrfS_alloc_chunk(). Same profile_multiplier for meta. This over-estimate calculate is, of course inacurrate, but since we will later shrink the fs to its real usage, it doesn't matter much now. Signed-off-by: Qu Wenruo <wqu@suse.com> [ update comments ] Signed-off-by: David Sterba <dsterba@suse.com>
2017-10-19 10:11:07 +08:00 · 2017-10-19 10:11:07 +08:00 · 599a0abed5
parent c7bc72264a
commit 599a0abed5
3 changed files with 137 additions and 92 deletions
--- a/mkfs/main.c
+++ b/mkfs/main.c
@ -732,8 +732,6 @@ int main(int argc, char **argv)
 	int force_overwrite = 0;
 	char *source_dir = NULL;
 	int source_dir_set = 0;
 	u64 num_of_meta_chunks = 0;
 	u64 size_of_data = 0;
 	u64 source_dir_size = 0;
 	u64 min_dev_size;
 	int dev_cnt = 0;
@ -952,6 +950,34 @@ int main(int argc, char **argv)
 	min_dev_size = btrfs_min_dev_size(nodesize, mixed, metadata_profile,
 					  data_profile);
 	/*
 	 * Enlarge the destination file or create a new one, using the size
 	 * calculated from source dir.
 	 *
 	 * This must be done before minimal device size checks.
 	 */
 	if (source_dir_set) {
 		fd = open(file, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP |
 			  S_IWGRP | S_IROTH);
 		if (fd < 0) {
 			error("unable to open %s: %s", file, strerror(errno));
 			goto error;
 		}
 		source_dir_size = btrfs_mkfs_size_dir(source_dir, sectorsize,
 				min_dev_size, metadata_profile, data_profile);
 		if (block_count < source_dir_size)
 			block_count = source_dir_size;
 		ret = zero_output_file(fd, block_count);
 		if (ret) {
 			error("unable to zero the output file");
 			close(fd);
 			goto error;
 		}
 		/* our "device" is the new image file */
 		dev_block_count = block_count;
 		close(fd);
 	}
 	/* Check device/block_count after the nodesize is determined */
 	if (block_count && block_count < min_dev_size) {
 		error("size %llu is too small to make a usable filesystem",
@ -985,51 +1011,27 @@ int main(int argc, char **argv)
 	dev_cnt--;
-	if (!source_dir_set) {
+	/*
-		/*
+	 * Open without O_EXCL so that the problem should not occur by the
-		 * open without O_EXCL so that the problem should not
+	 * following operation in kernel:
-		 * occur by the following processing.
+	 * (btrfs_register_one_device() fails if O_EXCL is on)
-		 * (btrfs_register_one_device() fails if O_EXCL is on)
+	 */
-		 */
+	fd = open(file, O_RDWR);
-		fd = open(file, O_RDWR);
+	if (fd < 0) {
-		if (fd < 0) {
+		error("unable to open %s: %s", file, strerror(errno));
-			error("unable to open %s: %s", file, strerror(errno));
+		goto error;
-			goto error;
+	}
-		}
+	ret = btrfs_prepare_device(fd, file, &dev_block_count, block_count,
-		ret = btrfs_prepare_device(fd, file, &dev_block_count,
+			(zero_end ? PREP_DEVICE_ZERO_END : 0) |
-				block_count,
+			(discard ? PREP_DEVICE_DISCARD : 0) |
-				(zero_end ? PREP_DEVICE_ZERO_END : 0) |
+			(verbose ? PREP_DEVICE_VERBOSE : 0));
-				(discard ? PREP_DEVICE_DISCARD : 0) |
+	if (ret)
-				(verbose ? PREP_DEVICE_VERBOSE : 0));
+		goto error;
-		if (ret) {
+	if (block_count && block_count > dev_block_count) {
-			goto error;
+		error("%s is smaller than requested size, expected %llu, found %llu",
-		}
+		      file, (unsigned long long)block_count,
-		if (block_count && block_count > dev_block_count) {
+		      (unsigned long long)dev_block_count);
-			error("%s is smaller than requested size, expected %llu, found %llu",
+		goto error;
 					file,
 					(unsigned long long)block_count,
 					(unsigned long long)dev_block_count);
 			goto error;
 		}
 	} else {
 		fd = open(file, O_CREAT | O_RDWR,
 				S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH);
 		if (fd < 0) {
 			error("unable to open %s: %s", file, strerror(errno));
 			goto error;
 		}
 		source_dir_size = btrfs_mkfs_size_dir(source_dir, sectorsize,
 					&num_of_meta_chunks, &size_of_data);
 		if(block_count < source_dir_size)
 			block_count = source_dir_size;
 		ret = zero_output_file(fd, block_count);
 		if (ret) {
 			error("unable to zero the output file");
 			goto error;
 		}
 		/* our "device" is the new image file */
 		dev_block_count = block_count;
 	}
 	/* To create the first block group and chunk 0 in make_btrfs */
@ -1155,13 +1157,11 @@ int main(int argc, char **argv)
 	}
 raid_groups:
-	if (!source_dir_set) {
+	ret = create_raid_groups(trans, root, data_profile,
-		ret = create_raid_groups(trans, root, data_profile,
+			 metadata_profile, mixed, &allocation);
-				 metadata_profile, mixed, &allocation);
+	if (ret) {
-		if (ret) {
+		error("unable to create raid groups: %d", ret);
-			error("unable to create raid groups: %d", ret);
+		goto out;
 			goto out;
 		}
 	}
 	ret = create_tree(trans, root, BTRFS_DATA_RELOC_TREE_OBJECTID);
--- a/mkfs/rootdir.c
+++ b/mkfs/rootdir.c
@ -33,19 +33,29 @@
 #include "transaction.h"
 #include "utils.h"
 #include "mkfs/rootdir.h"
 #include "mkfs/common.h"
 #include "send-utils.h"
-/*
+static u32 fs_block_size;
 * This ignores symlinks with unreadable targets and subdirs that can't
 * be read.  It's a best-effort to give a rough estimate of the size of
 * a subdir.  It doesn't guarantee that prepopulating btrfs from this
 * tree won't still run out of space.
 */
 static u64 global_total_size;
 static u64 fs_block_size;
 static u64 index_cnt = 2;
 /*
 * Size estimate will be done using the following data:
 * 1) Number of inodes
 *    Since we will later shrink the fs, over-estimate is completely fine here
 *    as long as our estimate ensures we can populate the image without ENOSPC.
 *    So we only record how many inodes there are, and account the maximum
 *    space for each inode.
 *
 * 2) Data space for each (regular) inode
 *    To estimate data chunk size.
 *    Don't care if it can fit as an inline extent.
 *    Always round them up to sectorsize.
 */
 static u64 ftw_meta_nr_inode;
 static u64 ftw_data_size;
 static int add_directory_items(struct btrfs_trans_handle *trans,
 			       struct btrfs_root *root, u64 objectid,
 			       ino_t parent_inum, const char *name,
@ -685,53 +695,88 @@ out:
 static int ftw_add_entry_size(const char *fpath, const struct stat *st,
 			      int type)
 {
-	if (type == FTW_F || type == FTW_D)
+	/*
-		global_total_size += round_up(st->st_size, fs_block_size);
+	 * Failed to read the directory, mostly due to EPERM.  Abort ASAP, so
 	 * we don't need to populate the fs.
 	 */
 	if (type == FTW_DNR || type == FTW_NS)
 		return -EPERM;
 	if (S_ISREG(st->st_mode))
 		ftw_data_size += round_up(st->st_size, fs_block_size);
 	ftw_meta_nr_inode++;
 	return 0;
 }
-u64 btrfs_mkfs_size_dir(const char *dir_name, u64 sectorsize,
+u64 btrfs_mkfs_size_dir(const char *dir_name, u32 sectorsize, u64 min_dev_size,
-			u64 *num_of_meta_chunks_ret, u64 *size_of_data_ret)
+			u64 meta_profile, u64 data_profile)
 {
 	u64 dir_size = 0;
 	u64 total_size = 0;
 	int ret;
 	u64 default_chunk_size = SZ_8M;
 	u64 allocated_meta_size = SZ_8M;
 	u64 allocated_total_size = 20 * SZ_1M;	/* 20MB */
 	u64 num_of_meta_chunks = 0;
 	u64 num_of_data_chunks = 0;
 	u64 num_of_allocated_meta_chunks =
 			allocated_meta_size / default_chunk_size;
-	global_total_size = 0;
+	u64 meta_size = 0;		/* Based on @ftw_meta_nr_inode */
 	u64 meta_chunk_size = 0;	/* Based on @meta_size */
 	u64 data_chunk_size = 0;	/* Based on @ftw_data_size */
 	u64 meta_threshold = SZ_8M;
 	u64 data_threshold = SZ_8M;
 	float data_multipler = 1;
 	float meta_multipler = 1;
 	fs_block_size = sectorsize;
 	ftw_data_size = 0;
 	ftw_meta_nr_inode = 0;
 	ret = ftw(dir_name, ftw_add_entry_size, 10);
 	dir_size = global_total_size;
 	if (ret < 0) {
 		error("ftw subdir walk of %s failed: %s", dir_name,
 			strerror(errno));
 		exit(1);
 	}
 	num_of_data_chunks = (dir_size + default_chunk_size - 1) /
 		default_chunk_size;
-	num_of_meta_chunks = (dir_size / 2) / default_chunk_size;
+	/*
-	if (((dir_size / 2) % default_chunk_size) != 0)
+	 * Maximum metadata useage for every inode, which will be PATH_MAX
-		num_of_meta_chunks++;
+	 * for the following items:
-	if (num_of_meta_chunks <= num_of_allocated_meta_chunks)
+	 * 1) DIR_ITEM
-		num_of_meta_chunks = 0;
+	 * 2) DIR_INDEX
-	else
+	 * 3) INODE_REF
-		num_of_meta_chunks -= num_of_allocated_meta_chunks;
+	 *
 	 * Plus possible inline extent size, which is sectorsize.
 	 *
 	 * And finally, allow metadata usage to increase with data size.
 	 * Follow the old kernel 8:1 data:meta ratio.
 	 * This is especially important for --rootdir, as the file extent size
 	 * upper limit is 1M, instead of 128M in kernel.
 	 * This can bump meta usage easily.
 	 */
 	meta_size = ftw_meta_nr_inode * (PATH_MAX * 3 + sectorsize) +
 		    ftw_data_size / 8;
-	total_size = allocated_total_size +
+	/* Minimal chunk size from btrfs_alloc_chunk(). */
-		     (num_of_data_chunks * default_chunk_size) +
+	if (meta_profile & BTRFS_BLOCK_GROUP_DUP) {
-		     (num_of_meta_chunks * default_chunk_size);
+		meta_threshold = SZ_32M;
 		meta_multipler = 2;
 	}
 	if (data_profile & BTRFS_BLOCK_GROUP_DUP) {
 		data_threshold = SZ_64M;
 		data_multipler = 2;
 	}
-	*num_of_meta_chunks_ret = num_of_meta_chunks;
+	/*
-	*size_of_data_ret = num_of_data_chunks * default_chunk_size;
+	 * Only when the usage is larger than the minimal chunk size (threshold)
 	 * we need to allocate new chunk, or the initial chunk in the image is
 	 * large enough.
 	 */
 	if (meta_size > meta_threshold)
 		meta_chunk_size = (round_up(meta_size, meta_threshold) -
 				   meta_threshold) * meta_multipler;
 	if (ftw_data_size > data_threshold)
 		data_chunk_size = (round_up(ftw_data_size, data_threshold) -
 				   data_threshold) * data_multipler;
 	total_size = data_chunk_size + meta_chunk_size + min_dev_size;
 	return total_size;
 }
--- a/mkfs/rootdir.h
+++ b/mkfs/rootdir.h
@ -30,7 +30,7 @@ struct directory_name_entry {
 int btrfs_mkfs_fill_dir(const char *source_dir, struct btrfs_root *root,
 			bool verbose);
-u64 btrfs_mkfs_size_dir(const char *dir_name, u64 sectorsize,
+u64 btrfs_mkfs_size_dir(const char *dir_name, u32 sectorsize, u64 min_dev_size,
-			u64 *num_of_meta_chunks_ret, u64 *size_of_data_ret);
+			u64 meta_profile, u64 data_profile);
 #endif