Re: [RFC PATCH] btrfs: Speedup btrfs_read_block_groups()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 2018年02月22日 12:52, Qu Wenruo wrote:
> btrfs_read_block_groups() is used to build up the block group cache for
> all block groups, so it will iterate all block group items in extent
> tree.
> 
> For large filesystem (TB level), it will search for BLOCK_GROUP_ITEM
> thousands times, which is the most time consuming part of mounting
> btrfs.
> 
> So this patch will try to speed it up by:
> 
> 1) Avoid unnecessary readahead
>    We were using READA_FORWARD to search for block group item.
>    However block group items are in fact scattered across quite a lot of
>    leaves. Doing readahead will just waste our IO (especially important
>    for HDD).
> 
>    In real world case, for a filesystem with 3T used space, it would
>    have about 50K extent tree leaves, but only have 3K block group
>    items. Meaning we need to iterate 16 leaves to meet one block group
>    on average.
> 
>    So readahead won't help but waste slow HDD seeks.
> 
> 2) Use chunk mapping to locate block group items
>    Since one block group item always has one corresponding chunk item,
>    we could use chunk mapping to get the block group item size.
> 
>    With block group item size, we can do a pinpoint tree search, instead
>    of searching with some uncertain value and do forward search.
> 
>    In some case, like next BLOCK_GROUP_ITEM is in the next leaf of
>    current path, we could save such unnecessary tree block read.
> 
> Cc: Ellis H. Wilson III <ellisw@xxxxxxxxxxx>

Hi Ellis,

Would you please try this patch to see if it helps to speedup the mount
of your large filesystem?

Thanks,
Qu

> Signed-off-by: Qu Wenruo <wqu@xxxxxxxx>
> ---
> Since all my TB level storage is all occupied by my NAS, any feedback
> (especially for the real world mount speed change) is welcome.
> ---
>  fs/btrfs/extent-tree.c | 88 +++++++++++++++-----------------------------------
>  1 file changed, 26 insertions(+), 62 deletions(-)
> 
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index 2f4328511ac8..a3faa0cbe056 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -9713,60 +9713,6 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
>  	return ret;
>  }
>  
> -static int find_first_block_group(struct btrfs_fs_info *fs_info,
> -				  struct btrfs_path *path,
> -				  struct btrfs_key *key)
> -{
> -	struct btrfs_root *root = fs_info->extent_root;
> -	int ret = 0;
> -	struct btrfs_key found_key;
> -	struct extent_buffer *leaf;
> -	int slot;
> -
> -	ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
> -	if (ret < 0)
> -		goto out;
> -
> -	while (1) {
> -		slot = path->slots[0];
> -		leaf = path->nodes[0];
> -		if (slot >= btrfs_header_nritems(leaf)) {
> -			ret = btrfs_next_leaf(root, path);
> -			if (ret == 0)
> -				continue;
> -			if (ret < 0)
> -				goto out;
> -			break;
> -		}
> -		btrfs_item_key_to_cpu(leaf, &found_key, slot);
> -
> -		if (found_key.objectid >= key->objectid &&
> -		    found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
> -			struct extent_map_tree *em_tree;
> -			struct extent_map *em;
> -
> -			em_tree = &root->fs_info->mapping_tree.map_tree;
> -			read_lock(&em_tree->lock);
> -			em = lookup_extent_mapping(em_tree, found_key.objectid,
> -						   found_key.offset);
> -			read_unlock(&em_tree->lock);
> -			if (!em) {
> -				btrfs_err(fs_info,
> -			"logical %llu len %llu found bg but no related chunk",
> -					  found_key.objectid, found_key.offset);
> -				ret = -ENOENT;
> -			} else {
> -				ret = 0;
> -			}
> -			free_extent_map(em);
> -			goto out;
> -		}
> -		path->slots[0]++;
> -	}
> -out:
> -	return ret;
> -}
> -
>  void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
>  {
>  	struct btrfs_block_group_cache *block_group;
> @@ -9988,12 +9934,15 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
>  {
>  	struct btrfs_path *path;
>  	int ret;
> +	struct btrfs_mapping_tree *map_tree = &info->mapping_tree;
> +	struct btrfs_root *extent_root = info->extent_root;
>  	struct btrfs_block_group_cache *cache;
>  	struct btrfs_space_info *space_info;
>  	struct btrfs_key key;
>  	struct btrfs_key found_key;
>  	struct extent_buffer *leaf;
>  	int need_clear = 0;
> +	u64 cur = 0;
>  	u64 cache_gen;
>  	u64 feature;
>  	int mixed;
> @@ -10001,13 +9950,9 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
>  	feature = btrfs_super_incompat_flags(info->super_copy);
>  	mixed = !!(feature & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS);
>  
> -	key.objectid = 0;
> -	key.offset = 0;
> -	key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
>  	path = btrfs_alloc_path();
>  	if (!path)
>  		return -ENOMEM;
> -	path->reada = READA_FORWARD;
>  
>  	cache_gen = btrfs_super_cache_generation(info->super_copy);
>  	if (btrfs_test_opt(info, SPACE_CACHE) &&
> @@ -10017,10 +9962,30 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
>  		need_clear = 1;
>  
>  	while (1) {
> -		ret = find_first_block_group(info, path, &key);
> -		if (ret > 0)
> +		struct extent_map *em;
> +
> +		read_lock(&map_tree->map_tree.lock);
> +		em = lookup_extent_mapping(&map_tree->map_tree, cur,
> +					   ((u64)-1) - cur);
> +		read_unlock(&map_tree->map_tree.lock);
> +		if (!em)
>  			break;
> -		if (ret != 0)
> +
> +		key.objectid = em->start;
> +		key.offset = em->len;
> +		key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
> +		cur = em->start + em->len;
> +		free_extent_map(em);
> +
> +		ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
> +		if (ret > 0) {
> +			WARN(1, KERN_ERR
> +			"chunk [%llu %llu) doesn't has its block group item\n",
> +			     key.objectid, key.objectid + key.offset);
> +			ret = -ENOENT;
> +			goto error;
> +		}
> +		if (ret < 0)
>  			goto error;
>  
>  		leaf = path->nodes[0];
> @@ -10062,7 +10027,6 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
>  			goto error;
>  		}
>  
> -		key.objectid = found_key.objectid + found_key.offset;
>  		btrfs_release_path(path);
>  
>  		/*
> 

Attachment: signature.asc
Description: OpenPGP digital signature


[Index of Archives]     [Linux Filesystem Development]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux