On 3.06.20 г. 9:21 ч., Qu Wenruo wrote:
> [BUG]
> When the data space is exhausted, even the inode has NOCOW attribute,
> btrfs will still refuse to truncate unaligned range due to ENOSPC.
>
> The following script can reproduce it pretty easily:
> #!/bin/bash
>
> dev=/dev/test/test
> mnt=/mnt/btrfs
>
> umount $dev &> /dev/null
> umount $mnt&> /dev/null
>
> mkfs.btrfs -f $dev -b 1G
> mount -o nospace_cache $dev $mnt
> touch $mnt/foobar
> chattr +C $mnt/foobar
>
> xfs_io -f -c "pwrite -b 4k 0 4k" $mnt/foobar > /dev/null
> xfs_io -f -c "pwrite -b 4k 0 1G" $mnt/padding &> /dev/null
> sync
>
> xfs_io -c "fpunch 0 2k" $mnt/foobar
> umount $mnt
>
> Current btrfs will fail at the fpunch part.
>
> [CAUSE]
> Because btrfs_truncate_block() always reserve space without checking the
> NOCOW attribute.
>
> Since the writeback path follows NOCOW bit, we only need to bother the
> space reservation code in btrfs_truncate_block().
>
> [FIX]
> Make btrfs_truncate_block() to follow btrfs_buffered_write() to try to
> reserve data space first, and falls back to NOCOW check only when we
> don't have enough space.
>
> Such always-try-reserve is an optimization introduced in
> btrfs_buffered_write(), to avoid expensive btrfs_check_can_nocow() call.
>
> Since now check_can_nocow() is needed outside of inode.c, also export it
> and rename it to btrfs_check_can_nocow().
>
> Reported-by: Martin Doucha <martin.doucha@xxxxxxxx>
> Signed-off-by: Qu Wenruo <wqu@xxxxxxxx>
> Reviewed-by: Filipe Manana <fdmanana@xxxxxxxx>
> ---
> Changelog:
> v2:
> - Rebased to misc-next
> Only one minor conflict in ctree.h
> ---
> fs/btrfs/ctree.h | 2 ++
> fs/btrfs/file.c | 10 +++++-----
> fs/btrfs/inode.c | 41 ++++++++++++++++++++++++++++++++++-------
> 3 files changed, 41 insertions(+), 12 deletions(-)
>
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index 161533040978..40e8c8170d39 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -2984,6 +2984,8 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages,
> size_t num_pages, loff_t pos, size_t write_bytes,
> struct extent_state **cached);
> int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
> +int btrfs_check_can_nocow(struct btrfs_inode *inode, loff_t pos,
> + size_t *write_bytes);
>
> /* tree-defrag.c */
> int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> index fde125616687..a298803e2b08 100644
> --- a/fs/btrfs/file.c
> +++ b/fs/btrfs/file.c
> @@ -1532,8 +1532,8 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
> return ret;
> }
>
> -static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
> - size_t *write_bytes)
> +int btrfs_check_can_nocow(struct btrfs_inode *inode, loff_t pos,
> + size_t *write_bytes)
> {
> struct btrfs_fs_info *fs_info = inode->root->fs_info;
> struct btrfs_root *root = inode->root;
> @@ -1632,8 +1632,8 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
> if (ret < 0) {
> if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
> BTRFS_INODE_PREALLOC)) &&
> - check_can_nocow(BTRFS_I(inode), pos,
> - &write_bytes) > 0) {
> + btrfs_check_can_nocow(BTRFS_I(inode), pos,
> + &write_bytes) > 0) {
This function acquires DREW lock and I don't see this lock being
released in the caller. David already raised concerns about
check_can_nocow obscuring this locking. Let's refactor this function
before introducing new callers.
> /*
> * For nodata cow case, no need to reserve
> * data space.
> @@ -1950,7 +1950,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
> */
> if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
> BTRFS_INODE_PREALLOC)) ||
> - check_can_nocow(BTRFS_I(inode), pos, &count) <= 0) {
> + btrfs_check_can_nocow(BTRFS_I(inode), pos, &count) <= 0) {
> inode_unlock(inode);
> return -EAGAIN;
> }
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 1242d0aa108d..6b94ec7369c6 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -4493,11 +4493,13 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
> struct extent_state *cached_state = NULL;
> struct extent_changeset *data_reserved = NULL;
> char *kaddr;
> + bool only_release_metadata = false;
> u32 blocksize = fs_info->sectorsize;
> pgoff_t index = from >> PAGE_SHIFT;
> unsigned offset = from & (blocksize - 1);
> struct page *page;
> gfp_t mask = btrfs_alloc_write_mask(mapping);
> + size_t write_bytes = blocksize;
> int ret = 0;
> u64 block_start;
> u64 block_end;
> @@ -4509,11 +4511,26 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
> block_start = round_down(from, blocksize);
> block_end = block_start + blocksize - 1;
>
> - ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
> - block_start, blocksize);
> - if (ret)
> + ret = btrfs_check_data_free_space(inode, &data_reserved, block_start,
> + blocksize);
> + if (ret < 0) {
> + if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
> + BTRFS_INODE_PREALLOC)) &&
> + btrfs_check_can_nocow(BTRFS_I(inode), block_start,
> + &write_bytes) > 0) {
> + /* For nocow case, no need to reserve data space. */
> + only_release_metadata = true;
> + } else {
> + goto out;
> + }
> + }
> + ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), blocksize);
> + if (ret < 0) {
> + if (!only_release_metadata)
> + btrfs_free_reserved_data_space(inode, data_reserved,
> + block_start, blocksize);
> goto out;
> -
> + }
> again:
> page = find_or_create_page(mapping, index, mask);
> if (!page) {
> @@ -4582,10 +4599,20 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
> set_page_dirty(page);
> unlock_extent_cached(io_tree, block_start, block_end, &cached_state);
>
> + if (only_release_metadata)
> + set_extent_bit(&BTRFS_I(inode)->io_tree, block_start,
> + block_end, EXTENT_NORESERVE, NULL, NULL,
> + GFP_NOFS);
> +
> out_unlock:
> - if (ret)
> - btrfs_delalloc_release_space(inode, data_reserved, block_start,
> - blocksize, true);
> + if (ret) {
> + if (!only_release_metadata)
> + btrfs_delalloc_release_space(inode, data_reserved,
> + block_start, blocksize, true);
> + else
> + btrfs_delalloc_release_metadata(BTRFS_I(inode),
> + blocksize, true);
> + }
> btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
> unlock_page(page);
> put_page(page);
>