On 25.11.19 г. 16:40 ч., Josef Bacik wrote:
> inc_block_group_ro does a calculation to see if we have enough room left
> over if we mark this block group as read only in order to see if it's ok
> to mark the block group as read only.
>
> The problem is this calculation _only_ works for data, where our used is
> always less than our total. For metadata we will overcommit, so this
> will almost always fail for metadata.
>
> Fix this by exporting btrfs_can_overcommit, and then see if we have
> enough space to remove the remaining free space in the block group we
> are trying to mark read only. If we do then we can mark this block
> group as read only.
>
> Signed-off-by: Josef Bacik <josef@xxxxxxxxxxxxxx>
> ---
> fs/btrfs/block-group.c | 37 ++++++++++++++++++++++++++-----------
> fs/btrfs/space-info.c | 19 ++++++++++---------
> fs/btrfs/space-info.h | 3 +++
> 3 files changed, 39 insertions(+), 20 deletions(-)
>
> diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
> index 3ffbc2e0af21..7b1f6d2b9621 100644
> --- a/fs/btrfs/block-group.c
> +++ b/fs/btrfs/block-group.c
> @@ -1184,7 +1184,6 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
> {
> struct btrfs_space_info *sinfo = cache->space_info;
> u64 num_bytes;
> - u64 sinfo_used;
> int ret = -ENOSPC;
>
> spin_lock(&sinfo->lock);
> @@ -1200,19 +1199,38 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
>
> num_bytes = cache->length - cache->reserved - cache->pinned -
> cache->bytes_super - cache->used;
> - sinfo_used = btrfs_space_info_used(sinfo, true);
>
> /*
> - * sinfo_used + num_bytes should always <= sinfo->total_bytes.
> - *
> - * Here we make sure if we mark this bg RO, we still have enough
> - * free space as buffer.
> + * Data never overcommits, even in mixed mode, so do just the straight
> + * check of left over space in how much we have allocated.
> */
> - if (sinfo_used + num_bytes + sinfo->total_bytes) {
> + if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA) {
> + u64 sinfo_used = btrfs_space_info_used(sinfo, true);
does that mean bytes_may_write is always 0 for DATA?
> +
> + /*
> + * sinfo_used + num_bytes should always <= sinfo->total_bytes.
This invariant warrants an ASSERT.
> + *
> + * Here we make sure if we mark this bg RO, we still have enough
> + * free space as buffer.
> + */
> + if (sinfo_used + num_bytes + sinfo->total_bytes)
> + ret = 0;
> + } else {
> + /*
> + * We overcommit metadata, so we need to do the
> + * btrfs_can_overcommit check here, and we need to pass in
> + * BTRFS_RESERVE_NO_FLUSH to give ourselves the most amount of
> + * leeway to allow us to mark this block group as read only.
> + */
> + if (btrfs_can_overcommit(cache->fs_info, sinfo, num_bytes,
> + BTRFS_RESERVE_NO_FLUSH))
> + ret = 0;
> + }
> +
> + if (!ret) {
> sinfo->bytes_readonly += num_bytes;
> cache->ro++;
> list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
> - ret = 0;
> }
> out:
> spin_unlock(&cache->lock);
> @@ -1220,9 +1238,6 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
> if (ret == -ENOSPC && btrfs_test_opt(cache->fs_info, ENOSPC_DEBUG)) {
> btrfs_info(cache->fs_info,
> "unable to make block group %llu ro", cache->start);
> - btrfs_info(cache->fs_info,
> - "sinfo_used=%llu bg_num_bytes=%llu",
> - sinfo_used, num_bytes);
> btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, 0);
> }
> return ret;
> diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
> index df5fb68df798..01297c5b2666 100644
> --- a/fs/btrfs/space-info.c
> +++ b/fs/btrfs/space-info.c
> @@ -159,9 +159,9 @@ static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
> return (global->size << 1);
> }
>
> -static int can_overcommit(struct btrfs_fs_info *fs_info,
> - struct btrfs_space_info *space_info, u64 bytes,
> - enum btrfs_reserve_flush_enum flush)
> +int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
> + struct btrfs_space_info *space_info, u64 bytes,
> + enum btrfs_reserve_flush_enum flush)
> {
> u64 profile;
> u64 avail;
> @@ -226,7 +226,8 @@ void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info,
>
> /* Check and see if our ticket can be satisified now. */
> if ((used + ticket->bytes <= space_info->total_bytes) ||
> - can_overcommit(fs_info, space_info, ticket->bytes, flush)) {
> + btrfs_can_overcommit(fs_info, space_info, ticket->bytes,
> + flush)) {
> btrfs_space_info_update_bytes_may_use(fs_info,
> space_info,
> ticket->bytes);
> @@ -639,14 +640,14 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
> return to_reclaim;
>
> to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
> - if (can_overcommit(fs_info, space_info, to_reclaim,
> - BTRFS_RESERVE_FLUSH_ALL))
> + if (btrfs_can_overcommit(fs_info, space_info, to_reclaim,
> + BTRFS_RESERVE_FLUSH_ALL))
> return 0;
>
> used = btrfs_space_info_used(space_info, true);
>
> - if (can_overcommit(fs_info, space_info, SZ_1M,
> - BTRFS_RESERVE_FLUSH_ALL))
> + if (btrfs_can_overcommit(fs_info, space_info, SZ_1M,
> + BTRFS_RESERVE_FLUSH_ALL))
> expected = div_factor_fine(space_info->total_bytes, 95);
> else
> expected = div_factor_fine(space_info->total_bytes, 90);
> @@ -1005,7 +1006,7 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
> */
> if (!pending_tickets &&
> ((used + orig_bytes <= space_info->total_bytes) ||
> - can_overcommit(fs_info, space_info, orig_bytes, flush))) {
> + btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush))) {
> btrfs_space_info_update_bytes_may_use(fs_info, space_info,
> orig_bytes);
> ret = 0;
> diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
> index 1a349e3f9cc1..24514cd2c6c1 100644
> --- a/fs/btrfs/space-info.h
> +++ b/fs/btrfs/space-info.h
> @@ -127,6 +127,9 @@ int btrfs_reserve_metadata_bytes(struct btrfs_root *root,
> enum btrfs_reserve_flush_enum flush);
> void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info,
> struct btrfs_space_info *space_info);
> +int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
> + struct btrfs_space_info *space_info, u64 bytes,
> + enum btrfs_reserve_flush_enum flush);
>
> static inline void btrfs_space_info_free_bytes_may_use(
> struct btrfs_fs_info *fs_info,
>