On Sat, 2009-07-11 at 15:39 +0100, David Woodhouse wrote:
> This is a preliminary attempt to add RAID5 and RAID6 support.
Matching btrfs-progs patch...
diff --git a/ctree.h b/ctree.h
index a9062ea..5b3c690 100644
--- a/ctree.h
+++ b/ctree.h
@@ -640,6 +640,8 @@ struct btrfs_csum_item {
#define BTRFS_BLOCK_GROUP_RAID1 (1 << 4)
#define BTRFS_BLOCK_GROUP_DUP (1 << 5)
#define BTRFS_BLOCK_GROUP_RAID10 (1 << 6)
+#define BTRFS_BLOCK_GROUP_RAID5 (1 << 7)
+#define BTRFS_BLOCK_GROUP_RAID6 (1 << 8)
struct btrfs_block_group_item {
__le64 used;
diff --git a/extent-tree.c b/extent-tree.c
index b2f9bb2..77cfcb5 100644
--- a/extent-tree.c
+++ b/extent-tree.c
@@ -1775,6 +1775,8 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
{
u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID5 |
+ BTRFS_BLOCK_GROUP_RAID6 |
BTRFS_BLOCK_GROUP_DUP);
if (extra_flags) {
if (flags & BTRFS_BLOCK_GROUP_DATA)
diff --git a/mkfs.c b/mkfs.c
index 2e99b95..aefe1af 100644
--- a/mkfs.c
+++ b/mkfs.c
@@ -203,16 +203,22 @@ static int create_raid_groups(struct btrfs_trans_handle *trans,
u64 metadata_profile)
{
u64 num_devices = btrfs_super_num_devices(&root->fs_info->super_copy);
- u64 allowed;
+ u64 allowed = 0;
int ret;
- if (num_devices == 1)
- allowed = BTRFS_BLOCK_GROUP_DUP;
- else if (num_devices >= 4) {
- allowed = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10;
- } else
- allowed = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1;
+ switch (num_devices) {
+ default:
+ case 4:
+ allowed |= BTRFS_BLOCK_GROUP_RAID10;
+ case 3:
+ allowed |= BTRFS_BLOCK_GROUP_RAID6;
+ case 2:
+ allowed |= BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID5;
+ break;
+ case 1:
+ allowed |= BTRFS_BLOCK_GROUP_DUP;
+ }
if (allowed & metadata_profile) {
ret = create_one_raid_group(trans, root,
@@ -292,6 +298,10 @@ static u64 parse_profile(char *s)
return BTRFS_BLOCK_GROUP_RAID0;
} else if (strcmp(s, "raid1") == 0) {
return BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP;
+ } else if (strcmp(s, "raid5") == 0) {
+ return BTRFS_BLOCK_GROUP_RAID5;
+ } else if (strcmp(s, "raid6") == 0) {
+ return BTRFS_BLOCK_GROUP_RAID6;
} else if (strcmp(s, "raid10") == 0) {
return BTRFS_BLOCK_GROUP_RAID10 | BTRFS_BLOCK_GROUP_DUP;
} else if (strcmp(s, "single") == 0) {
diff --git a/volumes.c b/volumes.c
index 7671855..90090b0 100644
--- a/volumes.c
+++ b/volumes.c
@@ -47,6 +47,21 @@ struct map_lookup {
struct btrfs_bio_stripe stripes[];
};
+static inline int nr_parity_stripes(struct map_lookup *map)
+{
+ if (map->type & BTRFS_BLOCK_GROUP_RAID5)
+ return 1;
+ else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
+ return 2;
+ else
+ return 0;
+}
+
+static inline int nr_data_stripes(struct map_lookup *map)
+{
+ return map->num_stripes - nr_parity_stripes(map);
+}
+
#define map_lookup_size(n) (sizeof(struct map_lookup) + \
(sizeof(struct btrfs_bio_stripe) * (n)))
@@ -623,6 +638,10 @@ static u64 chunk_bytes_by_type(u64 type, u64 calc_size, int num_stripes,
return calc_size;
else if (type & BTRFS_BLOCK_GROUP_RAID10)
return calc_size * (num_stripes / sub_stripes);
+ else if (type & BTRFS_BLOCK_GROUP_RAID5)
+ return calc_size * (num_stripes - 1);
+ else if (type & BTRFS_BLOCK_GROUP_RAID6)
+ return calc_size * (num_stripes - 2);
else
return calc_size * num_stripes;
}
@@ -664,6 +683,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
}
if (type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
BTRFS_BLOCK_GROUP_RAID10 |
BTRFS_BLOCK_GROUP_DUP)) {
if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
@@ -703,6 +723,18 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
sub_stripes = 2;
min_stripes = 4;
}
+ if (type & (BTRFS_BLOCK_GROUP_RAID5)) {
+ num_stripes = btrfs_super_num_devices(&info->super_copy);
+ if (num_stripes < 2)
+ return -ENOSPC;
+ min_stripes = 2;
+ }
+ if (type & (BTRFS_BLOCK_GROUP_RAID6)) {
+ num_stripes = btrfs_super_num_devices(&info->super_copy);
+ if (num_stripes < 3)
+ return -ENOSPC;
+ min_stripes = 3;
+ }
/* we don't want a chunk larger than 10% of the FS */
percent_max = div_factor(btrfs_super_total_bytes(&info->super_copy), 1);
@@ -879,6 +911,10 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len)
ret = map->num_stripes;
else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
ret = map->sub_stripes;
+ else if (map->type & BTRFS_BLOCK_GROUP_RAID5)
+ ret = 2;
+ else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
+ ret = 3;
else
ret = 1;
return ret;
@@ -894,6 +930,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
u64 bytenr;
u64 length;
u64 stripe_nr;
+ u64 rmap_len;
int i, j, nr = 0;
ce = find_first_cache_extent(&map_tree->cache_tree, chunk_start);
@@ -901,10 +938,16 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
map = container_of(ce, struct map_lookup, ce);
length = ce->size;
+ rmap_len = map->stripe_len;
if (map->type & BTRFS_BLOCK_GROUP_RAID10)
length = ce->size / (map->num_stripes / map->sub_stripes);
else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
length = ce->size / map->num_stripes;
+ else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
+ BTRFS_BLOCK_GROUP_RAID6)) {
+ length = ce->size / nr_data_stripes(map);
+ rmap_len = map->stripe_len * nr_data_stripes(map);
+ }
buf = kzalloc(sizeof(u64) * map->num_stripes, GFP_NOFS);
@@ -923,8 +966,11 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
map->sub_stripes;
} else if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
stripe_nr = stripe_nr * map->num_stripes + i;
- }
- bytenr = ce->start + stripe_nr * map->stripe_len;
+ } /* else if RAID[56], multiply by nr_data_stripes().
+ * Alternatively, just use rmap_len below instead of
+ * map->stripe_len */
+
+ bytenr = ce->start + stripe_nr * rmap_len;
for (j = 0; j < nr; j++) {
if (buf[j] == bytenr)
break;
@@ -935,7 +981,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
*logical = buf;
*naddrs = nr;
- *stripe_len = map->stripe_len;
+ *stripe_len = rmap_len;
return 0;
}
@@ -1001,6 +1047,7 @@ again:
stripe_offset = offset - stripe_offset;
if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
BTRFS_BLOCK_GROUP_RAID10 |
BTRFS_BLOCK_GROUP_DUP)) {
/* we limit the length of each bio to what fits in a stripe */
@@ -1041,6 +1088,23 @@ again:
multi->num_stripes = map->num_stripes;
else if (mirror_num)
stripe_index = mirror_num - 1;
+ } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
+ BTRFS_BLOCK_GROUP_RAID6)) {
+
+ stripe_index = stripe_nr % nr_data_stripes(map);
+ stripe_nr = stripe_nr / nr_data_stripes(map);
+
+ /*
+ * Mirror #0 or #1 means the original data block.
+ * Mirror #2 is RAID5 parity block.
+ * Mirror #3 is RAID6 Q block.
+ */
+ if (mirror_num > 1)
+ stripe_index = nr_data_stripes(map) + mirror_num - 2;
+
+ /* We distribute the parity blocks across stripes */
+ stripe_index = (stripe_nr + stripe_index) & map->num_stripes;
+
} else {
/*
* after this do_div call, stripe_nr is the number of stripes
--
David Woodhouse Open Source Technology Centre
David.Woodhouse@xxxxxxxxx Intel Corporation
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html