Re: [PATCH v2 10/10] btrfs-progs: Refactor btrfs_alloc_chunk to mimic kernel structure and behavior

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On  9.02.2018 09:44, Qu Wenruo wrote:
> Kernel uses a delayed chunk allocation behavior for metadata chunks
> 
> KERNEL:
> btrfs_alloc_chunk()
> |- __btrfs_alloc_chunk():	Only allocate chunk mapping
>    |- btrfs_make_block_group():	Add corresponding bg to fs_info->new_bgs
> 
> Then at transaction commit time, it finishes the remaining work:
> btrfs_start_dirty_block_groups():
> |- btrfs_create_pending_block_groups()
>    |- btrfs_insert_item():	To insert block group item
>    |- btrfs_finish_chunk_alloc(): Insert chunk items/dev extents
> 
> Although btrfs-progs btrfs_alloc_chunk() does all the work in one
> function, it can still benefit from function refactor like:
> btrfs-progs:
> btrfs_alloc_chunk():	Wrapper for both normal and convert chunks
> |- __btrfs_alloc_chunk():	Only alloc chunk mapping
> |  |- btrfs_make_block_group(): <<Insert bg items into extent tree>>
> |- btrfs_finish_chunk_alloc():  Insert chunk items/dev extents
> 
> With such refactor, the following functions can share most of its code
> with kernel now:
> __btrfs_alloc_chunk()
> btrfs_finish_chunk_alloc()
> btrfs_alloc_dev_extent()
> 
> Signed-off-by: Qu Wenruo <wqu@xxxxxxxx>
> ---
>  volumes.c | 421 ++++++++++++++++++++++++++++++++++++++------------------------
>  1 file changed, 260 insertions(+), 161 deletions(-)
> 
> diff --git a/volumes.c b/volumes.c
> index cff54c612872..e89520326314 100644
> --- a/volumes.c
> +++ b/volumes.c
> @@ -523,55 +523,40 @@ static int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
>  	return find_free_dev_extent_start(device, num_bytes, 0, start, len);
>  }
>  
> -static int btrfs_insert_dev_extents(struct btrfs_trans_handle *trans,
> -				    struct btrfs_fs_info *fs_info,
> -				    struct map_lookup *map, u64 stripe_size)
> +static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
> +				  struct btrfs_device *device,
> +				  u64 chunk_offset, u64 physical,
> +				  u64 stripe_size)
>  {
> -	int ret = 0;
> -	struct btrfs_path path;
> +	int ret;
> +	struct btrfs_path *path;
> +	struct btrfs_fs_info *fs_info = device->fs_info;
>  	struct btrfs_root *root = fs_info->dev_root;
>  	struct btrfs_dev_extent *extent;
>  	struct extent_buffer *leaf;
>  	struct btrfs_key key;
> -	int i;
>  
> -	btrfs_init_path(&path);
> -
> -	for (i = 0; i < map->num_stripes; i++) {
> -		struct btrfs_device *device = map->stripes[i].dev;
> -
> -		key.objectid = device->devid;
> -		key.offset = map->stripes[i].physical;
> -		key.type = BTRFS_DEV_EXTENT_KEY;
> +	path = btrfs_alloc_path();
> +	if (!path)
> +		return -ENOMEM;
>  
> -		ret = btrfs_insert_empty_item(trans, root, &path, &key,
> -					      sizeof(*extent));
> -		if (ret < 0)
> -			goto out;
> -		leaf = path.nodes[0];
> -		extent = btrfs_item_ptr(leaf, path.slots[0],
> -					struct btrfs_dev_extent);
> -		btrfs_set_dev_extent_chunk_tree(leaf, extent,
> +	key.objectid = device->devid;
> +	key.offset = physical;
> +	key.type = BTRFS_DEV_EXTENT_KEY;
> +	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*extent));
> +	if (ret)
> +		goto out;
> +	leaf = path->nodes[0];
> +	extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_extent);
> +	btrfs_set_dev_extent_chunk_tree(leaf, extent,
>  					BTRFS_CHUNK_TREE_OBJECTID);
> -		btrfs_set_dev_extent_chunk_objectid(leaf, extent,
> -					BTRFS_FIRST_CHUNK_TREE_OBJECTID);
> -		btrfs_set_dev_extent_chunk_offset(leaf, extent, map->ce.start);
> -
> -		write_extent_buffer(leaf, fs_info->chunk_tree_uuid,
> -			(unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
> -			BTRFS_UUID_SIZE);
> -
> -		btrfs_set_dev_extent_length(leaf, extent, stripe_size);
> -		btrfs_mark_buffer_dirty(leaf);
> -		btrfs_release_path(&path);
> -
> -		device->bytes_used += stripe_size;
> -		ret = btrfs_update_device(trans, device);
> -		if (ret < 0)
> -			goto out;
> -	}
> +	btrfs_set_dev_extent_chunk_objectid(leaf, extent,
> +					    BTRFS_FIRST_CHUNK_TREE_OBJECTID);
> +	btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
> +	btrfs_set_dev_extent_length(leaf, extent, stripe_size);
> +	btrfs_mark_buffer_dirty(leaf);
>  out:
> -	btrfs_release_path(&path);
> +	btrfs_free_path(path);
>  	return ret;
>  }
>  
> @@ -813,28 +798,28 @@ static int btrfs_cmp_device_info(const void *a, const void *b)
>  				/ sizeof(struct btrfs_stripe) + 1)
>  
>  /*
> - * Alloc a chunk, will insert dev extents, chunk item, and insert new
> - * block group and update space info (so that extent allocator can use
> - * newly allocated chunk).
> + * Alloc a chunk mapping.
> + * Will do chunk size calculation and free dev extent search, and insert
> + * chunk mapping into chunk mapping tree.
> + *
> + * NOTE: This function doesn't handle any chunk item/dev extent insert.
> + * chunk item/dev extent insert is handled by later btrfs_finish_chunk_alloc().
> + * And for convert chunk (1:1 mapped, more flex chunk location), it's
> + * handled by __btrfs_alloc_convert_chunk().
> + *
> + * Qu: Block group item is still inserted in this function by
> + * btrfs_make_block_group(), this still differs from kernel.
>   *

If it still differs does that mean this function should undergo further
refactoring to unify it with the kernel or it means it will never be the
same as the kernel's code? I thought the idea of unifying the
userspace/kernel code is to have only 1 set of functions which work both
in kernel and userspace?

>   * @start:	return value of allocated chunk start bytenr.
>   * @num_bytes:	return value of allocated chunk size
>   * @type:	chunk type (including both profile and type)
> - * @convert:   if the chunk is allocated for convert case.
> - *             If @convert is true, chunk allocator will skip device extent
> - *             search, but use *start and *num_bytes as chunk start/num_bytes
> - *             and devive offset, to build a 1:1 chunk mapping for convert.
>   */
> -int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
> -		      struct btrfs_fs_info *info, u64 *start,
> -		      u64 *num_bytes, u64 type, bool convert)
> +static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
> +			       struct btrfs_fs_info *info, u64 *start,
> +			       u64 *num_bytes, u64 type)
>  {
> -	struct btrfs_root *extent_root = info->extent_root;
> -	struct btrfs_root *chunk_root = info->chunk_root;
>  	struct btrfs_device *device = NULL;
> -	struct btrfs_chunk *chunk;
>  	struct list_head *dev_list = &info->fs_devices->devices;
> -	struct btrfs_stripe *stripe;
>  	struct map_lookup *map;
>  	struct btrfs_device_info *devices_info = NULL;
>  	u64 percent_max;
> @@ -845,8 +830,6 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
>  	int index;
>  	int ndevs = 0;
>  	int rw_devs = 0;
> -	int stripe_len = BTRFS_STRIPE_LEN;
> -	struct btrfs_key key;
>  	u64 offset;
>  	int data_stripes;	/* number of stripes that counts for bg size */
>  	int sub_stripes;	/* sub_stripes info for map */
> @@ -874,34 +857,6 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
>  	devs_increment = btrfs_raid_array[index].devs_increment;
>  	ncopies = btrfs_raid_array[index].ncopies;
>  
> -	if (convert) {
> -		/* For convert, profile must be SINGLE */
> -		if (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
> -			error("convert only suports SINGLE profile");
> -			return -EINVAL;
> -		}
> -		if (!IS_ALIGNED(*start, info->sectorsize)) {
> -			error("chunk start not aligned, start=%llu sectorsize=%u",
> -				*start, info->sectorsize);
> -			return -EINVAL;
> -		}
> -		if (!IS_ALIGNED(*num_bytes, info->sectorsize)) {
> -			error("chunk size not aligned, size=%llu sectorsize=%u",
> -				*num_bytes, info->sectorsize);
> -			return -EINVAL;
> -		}
> -		num_stripes = 1;
> -		data_stripes = 1;
> -		offset = *start;
> -		stripe_size = *num_bytes;
> -		/*
> -		 * For convert, we use 1:1 chunk mapping specified by @start and
> -		 * @num_bytes, so there is no need to go through dev_extent
> -		 * searching.
> -		 */
> -		goto alloc_chunk;
> -	}
> -
>  	/*
>  	 * Chunk size calculation part.
>  	 */
> @@ -1047,55 +1002,23 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
>  	/*
>  	 * Fill chunk mapping and chunk stripes
>  	 */
> -alloc_chunk:
> -	if (!convert) {
> -		ret = find_next_chunk(info, &offset);
> -		if (ret)
> -			goto out_devices_info;
> -	}
> -	key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
> -	key.type = BTRFS_CHUNK_ITEM_KEY;
> -	key.offset = offset;
> -	*num_bytes =  stripe_size * data_stripes;
> -
> -	chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS);
> -	if (!chunk)
> +	ret = find_next_chunk(info, &offset);
> +	if (ret)
>  		goto out_devices_info;
> +	*num_bytes =  stripe_size * data_stripes;
>  
>  	map = kmalloc(btrfs_map_lookup_size(num_stripes), GFP_NOFS);
>  	if (!map)
>  		goto out_chunk_map;
>  	map->num_stripes = num_stripes;
>  
> -	if (convert) {
> -		device = list_entry(dev_list->next, struct btrfs_device,
> -				    dev_list);
> +	for (i = 0; i < ndevs; i++) {
> +		for (j = 0; j < dev_stripes; ++j) {
> +			int s = i * dev_stripes + j;
>  
> -		map->stripes[0].dev = device;
> -		map->stripes[0].physical = *start;
> -		stripe = &chunk->stripe;
> -		btrfs_set_stack_stripe_devid(stripe, device->devid);
> -		btrfs_set_stack_stripe_offset(stripe, *start);
> -		memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
> -	} else {
> -		for (i = 0; i < ndevs; i++) {
> -			for (j = 0; j < dev_stripes; ++j) {
> -				int s = i * dev_stripes + j;
> -
> -				device = devices_info[i].dev;
> -				map->stripes[s].dev = device;
> -				map->stripes[s].physical =
> -					devices_info[i].dev_offset +
> -					j * stripe_size;
> -				stripe = &chunk->stripe + s;
> -				btrfs_set_stack_stripe_devid(stripe,
> -						device->devid);
> -				btrfs_set_stack_stripe_offset(stripe,
> -						devices_info[i].dev_offset +
> -						j * stripe_size);
> -				memcpy(stripe->dev_uuid, device->uuid,
> -						BTRFS_UUID_SIZE);
> -			}
> +			map->stripes[s].dev = devices_info[i].dev;
> +			map->stripes[s].physical = devices_info[i].dev_offset +
> +						   j * stripe_size;
>  		}
>  	}
>  	map->stripe_len = BTRFS_STRIPE_LEN;
> @@ -1104,60 +1027,236 @@ alloc_chunk:
>  	map->type = type;
>  	map->sub_stripes = sub_stripes;
>  	map->sector_size = info->sectorsize;
> -	map->ce.start = key.offset;
> +	map->ce.start = offset;
>  	map->ce.size = *num_bytes;
>  
> +	kfree(devices_info);
>  
> +	/*
> +	 * Insert chunk mapping and block group
> +	 */
>  	ret = insert_cache_extent(&info->mapping_tree.cache_tree, &map->ce);
>  	if (ret < 0)
>  		goto out_chunk_map;
>  
> -	/* key was set above */
> -	btrfs_set_stack_chunk_length(chunk, *num_bytes);
> -	btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
> -	btrfs_set_stack_chunk_stripe_len(chunk, stripe_len);
> -	btrfs_set_stack_chunk_type(chunk, type);
> -	btrfs_set_stack_chunk_num_stripes(chunk, num_stripes);
> -	btrfs_set_stack_chunk_io_align(chunk, stripe_len);
> -	btrfs_set_stack_chunk_io_width(chunk, stripe_len);
> -	btrfs_set_stack_chunk_sector_size(chunk, info->sectorsize);
> -	btrfs_set_stack_chunk_sub_stripes(chunk, sub_stripes);
> +	ret = btrfs_make_block_group(trans, info, 0, type, offset,
> +				     *num_bytes);
> +	*start = offset;
> +	return ret;
> +
> +out_chunk_map:
> +	kfree(map);
> +out_devices_info:
> +	kfree(devices_info);
> +	return ret;
> +}
> +
> +/*
> + * Alloc a chunk mapping for convert.
> + * Convert needs special SINGLE chunk whose logical bytenr is the same as its
> + * physical bytenr.
> + * Chunk size and start bytenr are all specified by @start and @num_bytes
> + *
> + * And just like __btrfs_alloc_chunk() this only handles chunk mapping and
> + * block group item.
> + */
> +static int __btrfs_alloc_convert_chunk(struct btrfs_trans_handle *trans,
> +				       struct btrfs_fs_info *fs_info, u64 start,
> +				       u64 num_bytes, u64 type)
> +{
> +	struct list_head *dev_list = &fs_info->fs_devices->devices;
> +	struct map_lookup *map;
> +	struct btrfs_device *device;
> +	int ret;
> +
> +	/* For convert, profile must be SINGLE */
> +	if (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
> +		error("convert only suports SINGLE profile");
> +		return -EINVAL;
> +	}
> +	if (!IS_ALIGNED(start, fs_info->sectorsize)) {
> +		error("chunk start not aligned, start=%llu sectorsize=%u",
> +		      start, fs_info->sectorsize);
> +		return -EINVAL;
> +	}
> +	if (!IS_ALIGNED(num_bytes, fs_info->sectorsize)) {
> +		error("chunk size not aligned, size=%llu sectorsize=%u",
> +		      num_bytes, fs_info->sectorsize);
> +		return -EINVAL;
> +	}
> +	if (list_empty(dev_list)) {
> +		error("no writable device");
> +		return -ENODEV;
> +	}
> +
> +	device = list_entry(dev_list->next, struct btrfs_device, dev_list);
> +	map = malloc(btrfs_map_lookup_size(1));
> +	if (!map)
> +		return -ENOMEM;
> +	map->num_stripes = 1;
> +	map->stripes[0].dev = device;
> +	map->stripes[0].physical = start;
> +	map->stripe_len = BTRFS_STRIPE_LEN;
> +	map->io_align = BTRFS_STRIPE_LEN;
> +	map->io_width = BTRFS_STRIPE_LEN;
> +	map->type = type;
> +	map->sub_stripes = 1;
> +	map->sector_size = fs_info->sectorsize;
> +	map->ce.start = start;
> +	map->ce.size = num_bytes;
> +
> +	ret = insert_cache_extent(&fs_info->mapping_tree.cache_tree, &map->ce);
> +	if (ret < 0)
> +		goto error;
> +	ret = btrfs_make_block_group(trans, fs_info, 0, type, start, num_bytes);
> +	return ret;
> +error:
> +	free(map);
> +	return ret;
> +}
> +
> +/*
> + * Finish the chunk allocation by inserting needed chunk item and device
> + * extents, and update device used bytes
> + */
> +static int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
> +				    struct btrfs_fs_info *fs_info,
> +				    u64 chunk_start, u64 chunk_size)
> +{
> +	struct btrfs_root *extent_root = fs_info->extent_root;
> +	struct btrfs_root *chunk_root = fs_info->chunk_root;
> +	struct btrfs_key key;
> +	struct btrfs_device *device;
> +	struct btrfs_chunk *chunk;
> +	struct btrfs_stripe *stripe;
> +	struct cache_extent *ce;
> +	struct map_lookup *map;
> +	size_t item_size;
> +	u64 dev_offset;
> +	u64 stripe_size;
> +	int i = 0;
> +	int ret = 0;
> +
> +	ce = search_cache_extent(&fs_info->mapping_tree.cache_tree, chunk_start);
> +	if (!ce)
> +		return -ENOENT;
> +
> +	map = container_of(ce, struct map_lookup, ce);
> +	item_size = btrfs_chunk_item_size(map->num_stripes);
> +	stripe_size = calc_stripe_length(map->type, map->ce.size,
> +					 map->num_stripes);
> +
> +	chunk = kzalloc(item_size, GFP_NOFS);
> +	if (!chunk) {
> +		ret = -ENOMEM;
> +		goto out;
> +	}
>  
>  	/*
> -	 * Insert chunk item and chunk mapping.
> +	 * Take the device list mutex to prevent races with the final phase of
> +	 * a device replace operation that replaces the device object associated
> +	 * with the map's stripes, because the device object's id can change
> +	 * at any time during that final phase of the device replace operation
> +	 * (dev-replace.c:btrfs_dev_replace_finishing()).
>  	 */
> -	ret = btrfs_insert_item(trans, chunk_root, &key, chunk,
> -				btrfs_chunk_item_size(num_stripes));
> -	BUG_ON(ret);
> -	*start = key.offset;
> +	/* mutex_lock(&fs_info->fs_devices->device_list_mutex); */
> +	for (i = 0; i < map->num_stripes; i++) {
> +		device = map->stripes[i].dev;
> +		dev_offset = map->stripes[i].physical;
>  
> -	if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
> -		ret = btrfs_add_system_chunk(info, &key,
> -				    chunk, btrfs_chunk_item_size(num_stripes));
> -		if (ret < 0)
> -			goto out_chunk;
> +		device->bytes_used += stripe_size;
> +		ret = btrfs_update_device(trans, device);
> +		if (ret)
> +			break;
> +		ret = btrfs_alloc_dev_extent(trans, device, chunk_start,
> +					     dev_offset, stripe_size);
> +		if (ret)
> +			break;
> +	}
> +	if (ret) {
> +		/* mutex_unlock(&fs_info->fs_devices->device_list_mutex); */
> +		goto out;
>  	}
>  
> +	stripe = &chunk->stripe;
> +	for (i = 0; i < map->num_stripes; i++) {
> +		device = map->stripes[i].dev;
> +		dev_offset = map->stripes[i].physical;
> +
> +		btrfs_set_stack_stripe_devid(stripe, device->devid);
> +		btrfs_set_stack_stripe_offset(stripe, dev_offset);
> +		memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
> +		stripe++;
> +	}
> +	/* mutex_unlock(&fs_info->fs_devices->device_list_mutex); */
> +
> +	btrfs_set_stack_chunk_length(chunk, chunk_size);
> +	btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
> +	btrfs_set_stack_chunk_stripe_len(chunk, map->stripe_len);
> +	btrfs_set_stack_chunk_type(chunk, map->type);
> +	btrfs_set_stack_chunk_num_stripes(chunk, map->num_stripes);
> +	btrfs_set_stack_chunk_io_align(chunk, map->stripe_len);
> +	btrfs_set_stack_chunk_io_width(chunk, map->stripe_len);
> +	btrfs_set_stack_chunk_sector_size(chunk, fs_info->sectorsize);
> +	btrfs_set_stack_chunk_sub_stripes(chunk, map->sub_stripes);
> +
> +	key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
> +	key.type = BTRFS_CHUNK_ITEM_KEY;
> +	key.offset = chunk_start;
> +
> +	ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size);
> +	if (ret == 0 && map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
> +		/*
> +		 * TODO: Cleanup of inserted chunk root in case of
> +		 * failure.
> +		 */
> +		ret = btrfs_add_system_chunk(fs_info, &key, chunk, item_size);
> +	}
> +
> +out:
>  	kfree(chunk);
> +	return ret;
> +}
> +
> +/*
> + * Alloc a chunk.
> + * Will do all the needed work including seaching free device extent, insert
> + * chunk mapping, chunk item, block group item and device extents.
> + *
> + * @start:	return value of allocated chunk start bytenr.
> + * @num_bytes:	return value of allocated chunk size
> + * @type:	chunk type (including both profile and type)
> + * @convert:	if the chunk is allocated for convert case.
> + *		If @convert is true, chunk allocator will skip device extent
> + *		search, but use *start and *num_bytes as chunk start/num_bytes
> + *		and devive offset, to build a 1:1 chunk mapping for convert.
> + */
> +int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
> +		      struct btrfs_fs_info *fs_info, u64 *start, u64 *num_bytes,
> +		      u64 type, bool convert)
> +{
> +	int ret;
>  
>  	/*
> -	 * Insert device extents
> +	 * Allocate chunk mapping
>  	 */
> -	ret = btrfs_insert_dev_extents(trans, info, map, stripe_size);
> -	if (ret < 0)
> -		goto out_devices_info;
> -
> -	ret = btrfs_make_block_group(trans, info, 0, type, map->ce.start,
> -				     map->ce.size);
> -	kfree(devices_info);
> -	return ret;
> +	if (convert)
> +		ret = __btrfs_alloc_convert_chunk(trans, fs_info, *start,
> +						  *num_bytes, type);
> +	else
> +		ret = __btrfs_alloc_chunk(trans, fs_info, start, num_bytes,
> +					  type);
> +	if (ret < 0) {
> +		error("failed to allocate chunk mapping: %s", strerror(-ret));
> +		return ret;
> +	}
>  
> -out_chunk_map:
> -	kfree(map);
> -out_chunk:
> -	kfree(chunk);
> -out_devices_info:
> -	kfree(devices_info);
> +	/*
> +	 * Insert the remaining part (insert variant items)
> +	 */
> +	ret = btrfs_finish_chunk_alloc(trans, fs_info, *start, *num_bytes);
> +	if (ret < 0)
> +		error("failed to finish chunk allocation: %s", strerror(-ret));
>  	return ret;
>  }
>  
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Filesystem Development]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux