Merge branch 'liubo-image-restore'

Signed-off-by: Chris Mason <chris.mason@fusionio.com>

Conflicts:
	disk-io.c
	volumes.h
master
Chris Mason 2013-07-03 14:24:43 -04:00
commit 0bae08fdab
8 changed files with 572 additions and 217 deletions

View File

@ -35,6 +35,7 @@
#include "utils.h"
#include "version.h"
#include "volumes.h"
#include "extent_io.h"
#define HEADER_MAGIC 0xbd5c25e27295668bULL
#define MAX_PENDING_SIZE (256 * 1024)
@ -136,6 +137,9 @@ struct mdrestore_struct {
int done;
int error;
int old_restore;
int fixup_offset;
int multi_devices;
struct btrfs_fs_info *info;
};
static int search_for_chunk_blocks(struct mdrestore_struct *mdres,
@ -1169,9 +1173,9 @@ static int copy_from_extent_tree(struct metadump_struct *metadump,
bytenr = key.objectid;
if (key.type == BTRFS_METADATA_ITEM_KEY)
num_bytes = key.offset;
else
num_bytes = extent_root->leafsize;
else
num_bytes = key.offset;
if (btrfs_item_size_nr(leaf, path->slots[0]) > sizeof(*ei)) {
ei = btrfs_item_ptr(leaf, path->slots[0],
@ -1589,9 +1593,10 @@ static void *restore_worker(void *data)
u8 *outbuf;
int outfd;
int ret;
int compress_size = MAX_PENDING_SIZE * 4;
outfd = fileno(mdres->out);
buffer = malloc(MAX_PENDING_SIZE * 2);
buffer = malloc(compress_size);
if (!buffer) {
fprintf(stderr, "Error allocing buffer\n");
pthread_mutex_lock(&mdres->mutex);
@ -1619,7 +1624,7 @@ static void *restore_worker(void *data)
pthread_mutex_unlock(&mdres->mutex);
if (mdres->compress_method == COMPRESS_ZLIB) {
size = MAX_PENDING_SIZE * 2;
size = compress_size;
ret = uncompress(buffer, (unsigned long *)&size,
async->buffer, async->bufsize);
if (ret != Z_OK) {
@ -1633,44 +1638,60 @@ static void *restore_worker(void *data)
size = async->bufsize;
}
if (async->start == BTRFS_SUPER_INFO_OFFSET) {
if (mdres->old_restore) {
update_super_old(outbuf);
} else {
ret = update_super(outbuf);
if (!mdres->multi_devices) {
if (async->start == BTRFS_SUPER_INFO_OFFSET) {
if (mdres->old_restore) {
update_super_old(outbuf);
} else {
ret = update_super(outbuf);
if (ret)
err = ret;
}
} else if (!mdres->old_restore) {
ret = fixup_chunk_tree_block(mdres, async, outbuf, size);
if (ret)
err = ret;
}
} else if (!mdres->old_restore) {
ret = fixup_chunk_tree_block(mdres, async, outbuf, size);
if (ret)
err = ret;
}
while (size) {
u64 chunk_size = size;
bytenr = logical_to_physical(mdres,
async->start + offset,
&chunk_size);
ret = pwrite64(outfd, outbuf+offset, chunk_size,
bytenr);
if (ret < chunk_size) {
if (ret < 0) {
fprintf(stderr, "Error writing to "
"device %d\n", errno);
err = errno;
break;
} else {
fprintf(stderr, "Short write\n");
err = -EIO;
break;
if (!mdres->fixup_offset) {
while (size) {
u64 chunk_size = size;
if (!mdres->multi_devices)
bytenr = logical_to_physical(mdres,
async->start + offset,
&chunk_size);
else
bytenr = async->start + offset;
ret = pwrite64(outfd, outbuf+offset, chunk_size,
bytenr);
if (ret != chunk_size) {
if (ret < 0) {
fprintf(stderr, "Error writing to "
"device %d\n", errno);
err = errno;
break;
} else {
fprintf(stderr, "Short write\n");
err = -EIO;
break;
}
}
size -= chunk_size;
offset += chunk_size;
}
} else if (async->start != BTRFS_SUPER_INFO_OFFSET) {
ret = write_data_to_disk(mdres->info, outbuf, async->start, size, 0);
if (ret) {
printk("Error write data\n");
exit(1);
}
size -= chunk_size;
offset += chunk_size;
}
if (async->start == BTRFS_SUPER_INFO_OFFSET)
/* backup super blocks are already there at fixup_offset stage */
if (!mdres->multi_devices && async->start == BTRFS_SUPER_INFO_OFFSET)
write_backup_supers(outfd, outbuf);
pthread_mutex_lock(&mdres->mutex);
@ -1714,7 +1735,8 @@ static void mdrestore_destroy(struct mdrestore_struct *mdres)
static int mdrestore_init(struct mdrestore_struct *mdres,
FILE *in, FILE *out, int old_restore,
int num_threads)
int num_threads, int fixup_offset,
struct btrfs_fs_info *info, int multi_devices)
{
int i, ret = 0;
@ -1726,6 +1748,9 @@ static int mdrestore_init(struct mdrestore_struct *mdres,
mdres->out = out;
mdres->old_restore = old_restore;
mdres->chunk_tree.rb_node = NULL;
mdres->fixup_offset = fixup_offset;
mdres->info = info;
mdres->multi_devices = multi_devices;
if (!num_threads)
return 0;
@ -2186,12 +2211,14 @@ static int build_chunk_tree(struct mdrestore_struct *mdres,
return search_for_chunk_blocks(mdres, chunk_root_bytenr, 0);
}
static int restore_metadump(const char *input, FILE *out, int old_restore,
int num_threads)
static int __restore_metadump(const char *input, FILE *out, int old_restore,
int num_threads, int fixup_offset,
const char *target, int multi_devices)
{
struct meta_cluster *cluster = NULL;
struct meta_cluster_header *header;
struct mdrestore_struct mdrestore;
struct btrfs_fs_info *info = NULL;
u64 bytenr = 0;
FILE *in = NULL;
int ret = 0;
@ -2206,26 +2233,36 @@ static int restore_metadump(const char *input, FILE *out, int old_restore,
}
}
/* NOTE: open with write mode */
if (fixup_offset) {
BUG_ON(!target);
info = open_ctree_fs_info_restore(target, 0, 0, 1, 1);
if (!info) {
fprintf(stderr, "%s: open ctree failed\n", __func__);
ret = -EIO;
goto failed_open;
}
}
cluster = malloc(BLOCK_SIZE);
if (!cluster) {
fprintf(stderr, "Error allocating cluster\n");
if (in != stdin)
fclose(in);
return -ENOMEM;
ret = -ENOMEM;
goto failed_info;
}
ret = mdrestore_init(&mdrestore, in, out, old_restore, num_threads);
ret = mdrestore_init(&mdrestore, in, out, old_restore, num_threads,
fixup_offset, info, multi_devices);
if (ret) {
fprintf(stderr, "Error initing mdrestore %d\n", ret);
if (in != stdin)
fclose(in);
free(cluster);
return ret;
goto failed_cluster;
}
ret = build_chunk_tree(&mdrestore, cluster);
if (ret)
goto out;
if (!multi_devices) {
ret = build_chunk_tree(&mdrestore, cluster);
if (ret)
goto out;
}
if (in != stdin && fseek(in, 0, SEEK_SET)) {
fprintf(stderr, "Error seeking %d\n", errno);
@ -2259,12 +2296,123 @@ static int restore_metadump(const char *input, FILE *out, int old_restore,
}
out:
mdrestore_destroy(&mdrestore);
failed_cluster:
free(cluster);
failed_info:
if (fixup_offset && info)
close_ctree(info->chunk_root);
failed_open:
if (in != stdin)
fclose(in);
return ret;
}
static int restore_metadump(const char *input, FILE *out, int old_restore,
int num_threads, int multi_devices)
{
return __restore_metadump(input, out, old_restore, num_threads, 0, NULL,
multi_devices);
}
static int fixup_metadump(const char *input, FILE *out, int num_threads,
const char *target)
{
return __restore_metadump(input, out, 0, num_threads, 1, target, 1);
}
static int update_disk_super_on_device(struct btrfs_fs_info *info,
const char *other_dev, u64 cur_devid)
{
struct btrfs_key key;
struct extent_buffer *leaf;
struct btrfs_path path;
struct btrfs_dev_item *dev_item;
struct btrfs_super_block *disk_super;
char dev_uuid[BTRFS_UUID_SIZE];
char fs_uuid[BTRFS_UUID_SIZE];
u64 devid, type, io_align, io_width;
u64 sector_size, total_bytes, bytes_used;
char *buf;
int fp;
int ret;
key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
key.type = BTRFS_DEV_ITEM_KEY;
key.offset = cur_devid;
btrfs_init_path(&path);
ret = btrfs_search_slot(NULL, info->chunk_root, &key, &path, 0, 0);
if (ret) {
fprintf(stderr, "search key fails\n");
exit(1);
}
leaf = path.nodes[0];
dev_item = btrfs_item_ptr(leaf, path.slots[0],
struct btrfs_dev_item);
devid = btrfs_device_id(leaf, dev_item);
if (devid != cur_devid) {
printk("devid %llu mismatch with %llu\n", devid, cur_devid);
exit(1);
}
type = btrfs_device_type(leaf, dev_item);
io_align = btrfs_device_io_align(leaf, dev_item);
io_width = btrfs_device_io_width(leaf, dev_item);
sector_size = btrfs_device_sector_size(leaf, dev_item);
total_bytes = btrfs_device_total_bytes(leaf, dev_item);
bytes_used = btrfs_device_bytes_used(leaf, dev_item);
read_extent_buffer(leaf, dev_uuid, (unsigned long)btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE);
read_extent_buffer(leaf, fs_uuid, (unsigned long)btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE);
btrfs_release_path(info->chunk_root, &path);
printk("update disk super on %s devid=%llu\n", other_dev, devid);
/* update other devices' super block */
fp = open(other_dev, O_CREAT | O_RDWR, 0600);
if (fp < 0) {
fprintf(stderr, "could not open %s\n", other_dev);
exit(1);
}
buf = malloc(BTRFS_SUPER_INFO_SIZE);
if (!buf) {
ret = -ENOMEM;
exit(1);
}
memcpy(buf, info->super_copy, BTRFS_SUPER_INFO_SIZE);
disk_super = (struct btrfs_super_block *)buf;
dev_item = &disk_super->dev_item;
btrfs_set_stack_device_type(dev_item, type);
btrfs_set_stack_device_id(dev_item, devid);
btrfs_set_stack_device_total_bytes(dev_item, total_bytes);
btrfs_set_stack_device_bytes_used(dev_item, bytes_used);
btrfs_set_stack_device_io_align(dev_item, io_align);
btrfs_set_stack_device_io_width(dev_item, io_width);
btrfs_set_stack_device_sector_size(dev_item, sector_size);
memcpy(dev_item->uuid, dev_uuid, BTRFS_UUID_SIZE);
memcpy(dev_item->fsid, fs_uuid, BTRFS_UUID_SIZE);
csum_block((u8 *)buf, BTRFS_SUPER_INFO_SIZE);
ret = pwrite64(fp, buf, BTRFS_SUPER_INFO_SIZE, BTRFS_SUPER_INFO_OFFSET);
if (ret != BTRFS_SUPER_INFO_SIZE) {
ret = -EIO;
goto out;
}
write_backup_supers(fp, (u8 *)buf);
out:
free(buf);
close(fp);
return 0;
}
static void print_usage(void)
{
fprintf(stderr, "usage: btrfs-image [options] source target\n");
@ -2272,7 +2420,7 @@ static void print_usage(void)
fprintf(stderr, "\t-c value\tcompression level (0 ~ 9)\n");
fprintf(stderr, "\t-t value\tnumber of threads (1 ~ 32)\n");
fprintf(stderr, "\t-o \tdon't mess with the chunk tree when restoring\n");
fprintf(stderr, "\t-s \tsanitize file names, use once to just use garbage, use twice if you want crc collisions");
fprintf(stderr, "\t-s \tsanitize file names, use once to just use garbage, use twice if you want crc collisions\n");
fprintf(stderr, "\t-w \twalk all trees instead of using extent tree, do this if your extent tree is broken\n");
exit(1);
}
@ -2286,12 +2434,14 @@ int main(int argc, char *argv[])
int create = 1;
int old_restore = 0;
int walk_trees = 0;
int multi_devices = 0;
int ret;
int sanitize = 0;
int dev_cnt = 0;
FILE *out;
while (1) {
int c = getopt(argc, argv, "rc:t:osw");
int c = getopt(argc, argv, "rc:t:oswm");
if (c < 0)
break;
switch (c) {
@ -2317,17 +2467,26 @@ int main(int argc, char *argv[])
case 'w':
walk_trees = 1;
break;
case 'm':
create = 0;
multi_devices = 1;
break;
default:
print_usage();
}
}
if (old_restore && create)
if ((old_restore) && create)
print_usage();
argc = argc - optind;
if (argc != 2)
dev_cnt = argc - 1;
if (multi_devices && dev_cnt < 2)
print_usage();
if (!multi_devices && dev_cnt != 1)
print_usage();
source = argv[optind];
target = argv[optind + 1];
@ -2351,8 +2510,60 @@ int main(int argc, char *argv[])
ret = create_metadump(source, out, num_threads,
compress_level, sanitize, walk_trees);
else
ret = restore_metadump(source, out, old_restore, 1);
ret = restore_metadump(source, out, old_restore, 1,
multi_devices);
if (ret) {
printk("%s failed (%s)\n", (create) ? "create" : "restore",
strerror(errno));
goto out;
}
/* extended support for multiple devices */
if (!create && multi_devices) {
struct btrfs_fs_info *info;
u64 total_devs;
int i;
info = open_ctree_fs_info_restore(target, 0, 0, 0, 1);
if (!info) {
int e = errno;
fprintf(stderr, "unable to open %s error = %s\n",
target, strerror(e));
return 1;
}
total_devs = btrfs_super_num_devices(info->super_copy);
if (total_devs != dev_cnt) {
printk("it needs %llu devices but has only %d\n",
total_devs, dev_cnt);
close_ctree(info->chunk_root);
goto out;
}
/* update super block on other disks */
for (i = 2; i <= dev_cnt; i++) {
ret = update_disk_super_on_device(info,
argv[optind + i], (u64)i);
if (ret) {
printk("update disk super failed devid=%d (error=%d)\n",
i, ret);
close_ctree(info->chunk_root);
exit(1);
}
}
close_ctree(info->chunk_root);
/* fix metadata block to map correct chunk */
ret = fixup_metadump(source, out, 1, target);
if (ret) {
fprintf(stderr, "fix metadump failed (error=%d)\n",
ret);
exit(1);
}
}
out:
if (out == stdout)
fflush(out);
else

View File

@ -949,6 +949,7 @@ struct btrfs_fs_info {
struct list_head space_info;
int system_allocs;
int readonly;
int on_restoring;
int (*free_extent_hook)(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,

227
disk-io.c
View File

@ -192,7 +192,7 @@ out:
}
static int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror)
int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror)
{
unsigned long offset = 0;
struct btrfs_multi_bio *multi = NULL;
@ -203,26 +203,40 @@ static int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, i
while (bytes_left) {
read_len = bytes_left;
ret = btrfs_map_block(&info->mapping_tree, READ,
eb->start + offset, &read_len, &multi,
mirror, NULL);
if (ret) {
printk("Couldn't map the block %Lu\n", eb->start + offset);
kfree(multi);
return -EIO;
}
device = multi->stripes[0].dev;
device = NULL;
if (device->fd == 0) {
kfree(multi);
return -EIO;
}
if (!info->on_restoring) {
ret = btrfs_map_block(&info->mapping_tree, READ,
eb->start + offset, &read_len, &multi,
mirror, NULL);
if (ret) {
printk("Couldn't map the block %Lu\n", eb->start + offset);
kfree(multi);
return -EIO;
}
device = multi->stripes[0].dev;
eb->fd = device->fd;
device->total_ios++;
eb->dev_bytenr = multi->stripes[0].physical;
kfree(multi);
multi = NULL;
if (device->fd == 0) {
kfree(multi);
return -EIO;
}
eb->fd = device->fd;
device->total_ios++;
eb->dev_bytenr = multi->stripes[0].physical;
kfree(multi);
multi = NULL;
} else {
/* special case for restore metadump */
list_for_each_entry(device, &info->fs_devices->devices, dev_list) {
if (device->devid == 1)
break;
}
eb->fd = device->fd;
eb->dev_bytenr = eb->start;
device->total_ios++;
}
if (read_len > bytes_left)
read_len = bytes_left;
@ -291,149 +305,6 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
return NULL;
}
static int rmw_eb(struct btrfs_fs_info *info,
struct extent_buffer *eb, struct extent_buffer *orig_eb)
{
int ret;
unsigned long orig_off = 0;
unsigned long dest_off = 0;
unsigned long copy_len = eb->len;
ret = read_whole_eb(info, eb, 0);
if (ret)
return ret;
if (eb->start + eb->len <= orig_eb->start ||
eb->start >= orig_eb->start + orig_eb->len)
return 0;
/*
* | ----- orig_eb ------- |
* | ----- stripe ------- |
* | ----- orig_eb ------- |
* | ----- orig_eb ------- |
*/
if (eb->start > orig_eb->start)
orig_off = eb->start - orig_eb->start;
if (orig_eb->start > eb->start)
dest_off = orig_eb->start - eb->start;
if (copy_len > orig_eb->len - orig_off)
copy_len = orig_eb->len - orig_off;
if (copy_len > eb->len - dest_off)
copy_len = eb->len - dest_off;
memcpy(eb->data + dest_off, orig_eb->data + orig_off, copy_len);
return 0;
}
static void split_eb_for_raid56(struct btrfs_fs_info *info,
struct extent_buffer *orig_eb,
struct extent_buffer **ebs,
u64 stripe_len, u64 *raid_map,
int num_stripes)
{
struct extent_buffer *eb;
u64 start = orig_eb->start;
u64 this_eb_start;
int i;
int ret;
for (i = 0; i < num_stripes; i++) {
if (raid_map[i] >= BTRFS_RAID5_P_STRIPE)
break;
eb = malloc(sizeof(struct extent_buffer) + stripe_len);
if (!eb)
BUG();
memset(eb, 0, sizeof(struct extent_buffer) + stripe_len);
eb->start = raid_map[i];
eb->len = stripe_len;
eb->refs = 1;
eb->flags = 0;
eb->fd = -1;
eb->dev_bytenr = (u64)-1;
this_eb_start = raid_map[i];
if (start > this_eb_start ||
start + orig_eb->len < this_eb_start + stripe_len) {
ret = rmw_eb(info, eb, orig_eb);
BUG_ON(ret);
} else {
memcpy(eb->data, orig_eb->data + eb->start - start, stripe_len);
}
ebs[i] = eb;
}
}
static int write_raid56_with_parity(struct btrfs_fs_info *info,
struct extent_buffer *eb,
struct btrfs_multi_bio *multi,
u64 stripe_len, u64 *raid_map)
{
struct extent_buffer *ebs[multi->num_stripes], *p_eb = NULL, *q_eb = NULL;
int i;
int j;
int ret;
int alloc_size = eb->len;
if (stripe_len > alloc_size)
alloc_size = stripe_len;
split_eb_for_raid56(info, eb, ebs, stripe_len, raid_map,
multi->num_stripes);
for (i = 0; i < multi->num_stripes; i++) {
struct extent_buffer *new_eb;
if (raid_map[i] < BTRFS_RAID5_P_STRIPE) {
ebs[i]->dev_bytenr = multi->stripes[i].physical;
ebs[i]->fd = multi->stripes[i].dev->fd;
multi->stripes[i].dev->total_ios++;
BUG_ON(ebs[i]->start != raid_map[i]);
continue;
}
new_eb = kmalloc(sizeof(*eb) + alloc_size, GFP_NOFS);
BUG_ON(!new_eb);
new_eb->dev_bytenr = multi->stripes[i].physical;
new_eb->fd = multi->stripes[i].dev->fd;
multi->stripes[i].dev->total_ios++;
new_eb->len = stripe_len;
if (raid_map[i] == BTRFS_RAID5_P_STRIPE)
p_eb = new_eb;
else if (raid_map[i] == BTRFS_RAID6_Q_STRIPE)
q_eb = new_eb;
}
if (q_eb) {
void *pointers[multi->num_stripes];
ebs[multi->num_stripes - 2] = p_eb;
ebs[multi->num_stripes - 1] = q_eb;
for (i = 0; i < multi->num_stripes; i++)
pointers[i] = ebs[i]->data;
raid6_gen_syndrome(multi->num_stripes, stripe_len, pointers);
} else {
ebs[multi->num_stripes - 1] = p_eb;
memcpy(p_eb->data, ebs[0]->data, stripe_len);
for (j = 1; j < multi->num_stripes - 1; j++) {
for (i = 0; i < stripe_len; i += sizeof(unsigned long)) {
*(unsigned long *)(p_eb->data + i) ^=
*(unsigned long *)(ebs[j]->data + i);
}
}
}
for (i = 0; i < multi->num_stripes; i++) {
ret = write_extent_to_disk(ebs[i]);
BUG_ON(ret);
if (ebs[i] != eb)
kfree(ebs[i]);
}
return 0;
}
int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *eb)
{
@ -445,6 +316,7 @@ int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
if (check_tree_block(root, eb))
BUG();
if (!btrfs_buffer_uptodate(eb, trans->transid))
BUG();
@ -1106,7 +978,7 @@ int btrfs_setup_chunk_tree_and_device_map(struct btrfs_fs_info *fs_info)
static struct btrfs_fs_info *__open_ctree_fd(int fp, const char *path,
u64 sb_bytenr,
u64 root_tree_bytenr, int writes,
int partial)
int partial, int restore)
{
struct btrfs_fs_info *fs_info;
struct btrfs_super_block *disk_super;
@ -1126,6 +998,8 @@ static struct btrfs_fs_info *__open_ctree_fd(int fp, const char *path,
fprintf(stderr, "Failed to allocate memory for fs_info\n");
return NULL;
}
if (restore)
fs_info->on_restoring = 1;
ret = btrfs_scan_fs_devices(fp, path, &fs_devices);
if (ret)
@ -1182,6 +1056,29 @@ out:
return NULL;
}
struct btrfs_fs_info *open_ctree_fs_info_restore(const char *filename,
u64 sb_bytenr, u64 root_tree_bytenr,
int writes, int partial)
{
int fp;
struct btrfs_fs_info *info;
int flags = O_CREAT | O_RDWR;
int restore = 1;
if (!writes)
flags = O_RDONLY;
fp = open(filename, flags, 0600);
if (fp < 0) {
fprintf (stderr, "Could not open %s\n", filename);
return NULL;
}
info = __open_ctree_fd(fp, filename, sb_bytenr, root_tree_bytenr,
writes, partial, restore);
close(fp);
return info;
}
struct btrfs_fs_info *open_ctree_fs_info(const char *filename,
u64 sb_bytenr, u64 root_tree_bytenr,
int writes, int partial)
@ -1199,7 +1096,7 @@ struct btrfs_fs_info *open_ctree_fs_info(const char *filename,
return NULL;
}
info = __open_ctree_fd(fp, filename, sb_bytenr, root_tree_bytenr,
writes, partial);
writes, partial, 0);
close(fp);
return info;
}
@ -1218,7 +1115,7 @@ struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr,
int writes)
{
struct btrfs_fs_info *info;
info = __open_ctree_fd(fp, path, sb_bytenr, 0, writes, 0);
info = __open_ctree_fd(fp, path, sb_bytenr, 0, writes, 0, 0);
if (!info)
return NULL;
return info->fs_root;

View File

@ -35,10 +35,13 @@ static inline u64 btrfs_sb_offset(int mirror)
struct btrfs_device;
int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror);
struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
u32 blocksize, u64 parent_transid);
int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
u64 parent_transid);
int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *eb);
struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
u64 bytenr, u32 blocksize);
@ -62,6 +65,9 @@ int btrfs_setup_chunk_tree_and_device_map(struct btrfs_fs_info *fs_info);
struct btrfs_root *open_ctree(const char *filename, u64 sb_bytenr, int writes);
struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr,
int writes);
struct btrfs_fs_info *open_ctree_fs_info_restore(const char *filename,
u64 sb_bytenr, u64 root_tree_bytenr,
int writes, int partial);
struct btrfs_fs_info *open_ctree_fs_info(const char *filename,
u64 sb_bytenr, u64 root_tree_bytenr,
int writes, int partial);

View File

@ -749,6 +749,93 @@ int read_data_from_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
return 0;
}
int write_data_to_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
u64 bytes, int mirror)
{
struct btrfs_multi_bio *multi = NULL;
struct btrfs_device *device;
u64 bytes_left = bytes;
u64 this_len;
u64 total_write = 0;
u64 *raid_map = NULL;
u64 dev_bytenr;
int dev_nr;
int ret = 0;
while (bytes_left > 0) {
this_len = bytes_left;
dev_nr = 0;
ret = btrfs_map_block(&info->mapping_tree, WRITE, offset,
&this_len, &multi, mirror, &raid_map);
if (ret) {
fprintf(stderr, "Couldn't map the block %Lu\n",
offset);
return -EIO;
}
if (raid_map) {
struct extent_buffer *eb;
u64 stripe_len = this_len;
this_len = min(this_len, bytes_left);
this_len = min(this_len, (u64)info->tree_root->leafsize);
eb = malloc(sizeof(struct extent_buffer) + this_len);
BUG_ON(!eb);
memset(eb, 0, sizeof(struct extent_buffer) + this_len);
eb->start = offset;
eb->len = this_len;
memcpy(eb->data, buf + total_write, this_len);
ret = write_raid56_with_parity(info, eb, multi,
stripe_len, raid_map);
BUG_ON(ret);
free(eb);
kfree(raid_map);
raid_map = NULL;
} else while (dev_nr < multi->num_stripes) {
device = multi->stripes[dev_nr].dev;
if (device->fd == 0) {
kfree(multi);
return -EIO;
}
dev_bytenr = multi->stripes[dev_nr].physical;
this_len = min(this_len, bytes_left);
dev_nr++;
ret = pwrite(device->fd, buf + total_write, this_len, dev_bytenr);
if (ret != this_len) {
if (ret < 0) {
fprintf(stderr, "Error writing to "
"device %d\n", errno);
ret = errno;
kfree(multi);
return ret;
} else {
fprintf(stderr, "Short write\n");
kfree(multi);
return -EIO;
}
}
}
BUG_ON(bytes_left < this_len);
bytes_left -= this_len;
offset += this_len;
total_write += this_len;
kfree(multi);
multi = NULL;
}
return 0;
}
int set_extent_buffer_uptodate(struct extent_buffer *eb)
{
eb->flags |= EXTENT_UPTODATE;

View File

@ -132,4 +132,6 @@ int set_extent_buffer_dirty(struct extent_buffer *eb);
int clear_extent_buffer_dirty(struct extent_buffer *eb);
int read_data_from_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
u64 bytes, int mirror);
int write_data_to_disk(struct btrfs_fs_info *info, void *buf, u64 offset,
u64 bytes, int mirror);
#endif

147
volumes.c
View File

@ -189,6 +189,10 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, int flags)
list_for_each(cur, head) {
device = list_entry(cur, struct btrfs_device, dev_list);
if (!device->name) {
printk("no name for device %llu, skip it now\n", device->devid);
continue;
}
fd = open(device->name, flags);
if (fd < 0) {
@ -1769,3 +1773,146 @@ struct list_head *btrfs_scanned_uuids(void)
{
return &fs_uuids;
}
static int rmw_eb(struct btrfs_fs_info *info,
struct extent_buffer *eb, struct extent_buffer *orig_eb)
{
int ret;
unsigned long orig_off = 0;
unsigned long dest_off = 0;
unsigned long copy_len = eb->len;
ret = read_whole_eb(info, eb, 0);
if (ret)
return ret;
if (eb->start + eb->len <= orig_eb->start ||
eb->start >= orig_eb->start + orig_eb->len)
return 0;
/*
* | ----- orig_eb ------- |
* | ----- stripe ------- |
* | ----- orig_eb ------- |
* | ----- orig_eb ------- |
*/
if (eb->start > orig_eb->start)
orig_off = eb->start - orig_eb->start;
if (orig_eb->start > eb->start)
dest_off = orig_eb->start - eb->start;
if (copy_len > orig_eb->len - orig_off)
copy_len = orig_eb->len - orig_off;
if (copy_len > eb->len - dest_off)
copy_len = eb->len - dest_off;
memcpy(eb->data + dest_off, orig_eb->data + orig_off, copy_len);
return 0;
}
static void split_eb_for_raid56(struct btrfs_fs_info *info,
struct extent_buffer *orig_eb,
struct extent_buffer **ebs,
u64 stripe_len, u64 *raid_map,
int num_stripes)
{
struct extent_buffer *eb;
u64 start = orig_eb->start;
u64 this_eb_start;
int i;
int ret;
for (i = 0; i < num_stripes; i++) {
if (raid_map[i] >= BTRFS_RAID5_P_STRIPE)
break;
eb = malloc(sizeof(struct extent_buffer) + stripe_len);
if (!eb)
BUG();
memset(eb, 0, sizeof(struct extent_buffer) + stripe_len);
eb->start = raid_map[i];
eb->len = stripe_len;
eb->refs = 1;
eb->flags = 0;
eb->fd = -1;
eb->dev_bytenr = (u64)-1;
this_eb_start = raid_map[i];
if (start > this_eb_start ||
start + orig_eb->len < this_eb_start + stripe_len) {
ret = rmw_eb(info, eb, orig_eb);
BUG_ON(ret);
} else {
memcpy(eb->data, orig_eb->data + eb->start - start, stripe_len);
}
ebs[i] = eb;
}
}
int write_raid56_with_parity(struct btrfs_fs_info *info,
struct extent_buffer *eb,
struct btrfs_multi_bio *multi,
u64 stripe_len, u64 *raid_map)
{
struct extent_buffer *ebs[multi->num_stripes], *p_eb = NULL, *q_eb = NULL;
int i;
int j;
int ret;
int alloc_size = eb->len;
if (stripe_len > alloc_size)
alloc_size = stripe_len;
split_eb_for_raid56(info, eb, ebs, stripe_len, raid_map,
multi->num_stripes);
for (i = 0; i < multi->num_stripes; i++) {
struct extent_buffer *new_eb;
if (raid_map[i] < BTRFS_RAID5_P_STRIPE) {
ebs[i]->dev_bytenr = multi->stripes[i].physical;
ebs[i]->fd = multi->stripes[i].dev->fd;
multi->stripes[i].dev->total_ios++;
BUG_ON(ebs[i]->start != raid_map[i]);
continue;
}
new_eb = kmalloc(sizeof(*eb) + alloc_size, GFP_NOFS);
BUG_ON(!new_eb);
new_eb->dev_bytenr = multi->stripes[i].physical;
new_eb->fd = multi->stripes[i].dev->fd;
multi->stripes[i].dev->total_ios++;
new_eb->len = stripe_len;
if (raid_map[i] == BTRFS_RAID5_P_STRIPE)
p_eb = new_eb;
else if (raid_map[i] == BTRFS_RAID6_Q_STRIPE)
q_eb = new_eb;
}
if (q_eb) {
void *pointers[multi->num_stripes];
ebs[multi->num_stripes - 2] = p_eb;
ebs[multi->num_stripes - 1] = q_eb;
for (i = 0; i < multi->num_stripes; i++)
pointers[i] = ebs[i]->data;
raid6_gen_syndrome(multi->num_stripes, stripe_len, pointers);
} else {
ebs[multi->num_stripes - 1] = p_eb;
memcpy(p_eb->data, ebs[0]->data, stripe_len);
for (j = 1; j < multi->num_stripes - 1; j++) {
for (i = 0; i < stripe_len; i += sizeof(unsigned long)) {
*(unsigned long *)(p_eb->data + i) ^=
*(unsigned long *)(ebs[j]->data + i);
}
}
}
for (i = 0; i < multi->num_stripes; i++) {
ret = write_extent_to_disk(ebs[i]);
BUG_ON(ret);
if (ebs[i] != eb)
kfree(ebs[i]);
}
return 0;
}

View File

@ -195,4 +195,8 @@ btrfs_find_device_by_devid(struct btrfs_fs_devices *fs_devices,
u64 devid, int instance);
struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
u8 *uuid, u8 *fsid);
int write_raid56_with_parity(struct btrfs_fs_info *info,
struct extent_buffer *eb,
struct btrfs_multi_bio *multi,
u64 stripe_len, u64 *raid_map);
#endif