On 2018年02月22日 12:52, Qu Wenruo wrote:
> btrfs_read_block_groups() is used to build up the block group cache for
> all block groups, so it will iterate all block group items in extent
> tree.
>
> For large filesystem (TB level), it will search for BLOCK_GROUP_ITEM
> thousands times, which is the most time consuming part of mounting
> btrfs.
>
> So this patch will try to speed it up by:
>
> 1) Avoid unnecessary readahead
> We were using READA_FORWARD to search for block group item.
> However block group items are in fact scattered across quite a lot of
> leaves. Doing readahead will just waste our IO (especially important
> for HDD).
>
> In real world case, for a filesystem with 3T used space, it would
> have about 50K extent tree leaves, but only have 3K block group
> items. Meaning we need to iterate 16 leaves to meet one block group
> on average.
>
> So readahead won't help but waste slow HDD seeks.
>
> 2) Use chunk mapping to locate block group items
> Since one block group item always has one corresponding chunk item,
> we could use chunk mapping to get the block group item size.
>
> With block group item size, we can do a pinpoint tree search, instead
> of searching with some uncertain value and do forward search.
>
> In some case, like next BLOCK_GROUP_ITEM is in the next leaf of
> current path, we could save such unnecessary tree block read.
>
> Cc: Ellis H. Wilson III <ellisw@xxxxxxxxxxx>
Hi Ellis,
Would you please try this patch to see if it helps to speedup the mount
of your large filesystem?
Thanks,
Qu
> Signed-off-by: Qu Wenruo <wqu@xxxxxxxx>
> ---
> Since all my TB level storage is all occupied by my NAS, any feedback
> (especially for the real world mount speed change) is welcome.
> ---
> fs/btrfs/extent-tree.c | 88 +++++++++++++++-----------------------------------
> 1 file changed, 26 insertions(+), 62 deletions(-)
>
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index 2f4328511ac8..a3faa0cbe056 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -9713,60 +9713,6 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
> return ret;
> }
>
> -static int find_first_block_group(struct btrfs_fs_info *fs_info,
> - struct btrfs_path *path,
> - struct btrfs_key *key)
> -{
> - struct btrfs_root *root = fs_info->extent_root;
> - int ret = 0;
> - struct btrfs_key found_key;
> - struct extent_buffer *leaf;
> - int slot;
> -
> - ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
> - if (ret < 0)
> - goto out;
> -
> - while (1) {
> - slot = path->slots[0];
> - leaf = path->nodes[0];
> - if (slot >= btrfs_header_nritems(leaf)) {
> - ret = btrfs_next_leaf(root, path);
> - if (ret == 0)
> - continue;
> - if (ret < 0)
> - goto out;
> - break;
> - }
> - btrfs_item_key_to_cpu(leaf, &found_key, slot);
> -
> - if (found_key.objectid >= key->objectid &&
> - found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
> - struct extent_map_tree *em_tree;
> - struct extent_map *em;
> -
> - em_tree = &root->fs_info->mapping_tree.map_tree;
> - read_lock(&em_tree->lock);
> - em = lookup_extent_mapping(em_tree, found_key.objectid,
> - found_key.offset);
> - read_unlock(&em_tree->lock);
> - if (!em) {
> - btrfs_err(fs_info,
> - "logical %llu len %llu found bg but no related chunk",
> - found_key.objectid, found_key.offset);
> - ret = -ENOENT;
> - } else {
> - ret = 0;
> - }
> - free_extent_map(em);
> - goto out;
> - }
> - path->slots[0]++;
> - }
> -out:
> - return ret;
> -}
> -
> void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
> {
> struct btrfs_block_group_cache *block_group;
> @@ -9988,12 +9934,15 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
> {
> struct btrfs_path *path;
> int ret;
> + struct btrfs_mapping_tree *map_tree = &info->mapping_tree;
> + struct btrfs_root *extent_root = info->extent_root;
> struct btrfs_block_group_cache *cache;
> struct btrfs_space_info *space_info;
> struct btrfs_key key;
> struct btrfs_key found_key;
> struct extent_buffer *leaf;
> int need_clear = 0;
> + u64 cur = 0;
> u64 cache_gen;
> u64 feature;
> int mixed;
> @@ -10001,13 +9950,9 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
> feature = btrfs_super_incompat_flags(info->super_copy);
> mixed = !!(feature & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS);
>
> - key.objectid = 0;
> - key.offset = 0;
> - key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
> path = btrfs_alloc_path();
> if (!path)
> return -ENOMEM;
> - path->reada = READA_FORWARD;
>
> cache_gen = btrfs_super_cache_generation(info->super_copy);
> if (btrfs_test_opt(info, SPACE_CACHE) &&
> @@ -10017,10 +9962,30 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
> need_clear = 1;
>
> while (1) {
> - ret = find_first_block_group(info, path, &key);
> - if (ret > 0)
> + struct extent_map *em;
> +
> + read_lock(&map_tree->map_tree.lock);
> + em = lookup_extent_mapping(&map_tree->map_tree, cur,
> + ((u64)-1) - cur);
> + read_unlock(&map_tree->map_tree.lock);
> + if (!em)
> break;
> - if (ret != 0)
> +
> + key.objectid = em->start;
> + key.offset = em->len;
> + key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
> + cur = em->start + em->len;
> + free_extent_map(em);
> +
> + ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
> + if (ret > 0) {
> + WARN(1, KERN_ERR
> + "chunk [%llu %llu) doesn't has its block group item\n",
> + key.objectid, key.objectid + key.offset);
> + ret = -ENOENT;
> + goto error;
> + }
> + if (ret < 0)
> goto error;
>
> leaf = path->nodes[0];
> @@ -10062,7 +10027,6 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
> goto error;
> }
>
> - key.objectid = found_key.objectid + found_key.offset;
> btrfs_release_path(path);
>
> /*
>
Attachment:
signature.asc
Description: OpenPGP digital signature
