On Thu, Feb 21, 2019 at 01:57:12PM +0200, Nikolay Borisov wrote:
> This commit changes the implementation of cow_file_range_async in order
> to get rid of the BUG_ON in the middle of the loop. Additionally it
> reworks the inner loop in the hopes of making it more understandable.
>
> The idea is to make async_cow be a top-level structured, shared
> amongst all chunks being sent for compression. This allows to perform
> one memory allocation at the beginning and gracefully fail the IO if
> there isn't enough memory. Now, each chunk is going to be described
> by an async_chunk struct. It's the responsibility of the final chunk
> to actually free the memory.
Right, preallocation is the way to fix it. Please pick a more
descriptive subject, that one is too generic.
> Signed-off-by: Nikolay Borisov <nborisov@xxxxxxxx>
> ---
> fs/btrfs/inode.c | 108 +++++++++++++++++++++++++++++++----------------
> 1 file changed, 71 insertions(+), 37 deletions(-)
>
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 05bbfd02ea49..0f82ea348164 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -366,7 +366,7 @@ struct async_extent {
> struct list_head list;
> };
>
> -struct async_cow {
> +struct async_chunk {
> struct inode *inode;
> struct btrfs_fs_info *fs_info;
> struct page *locked_page;
> @@ -375,9 +375,15 @@ struct async_cow {
> unsigned int write_flags;
> struct list_head extents;
> struct btrfs_work work;
> + atomic_t *pending;
> +};
> +
> +struct async_cow {
> + atomic_t num_chunks; /* Number of chunks in flight */
> + struct async_chunk chunks[];
> };
>
> -static noinline int add_async_extent(struct async_cow *cow,
> +static noinline int add_async_extent(struct async_chunk *cow,
> u64 start, u64 ram_size,
> u64 compressed_size,
> struct page **pages,
> @@ -447,7 +453,7 @@ static inline void inode_should_defrag(struct btrfs_inode *inode,
> static noinline void compress_file_range(struct inode *inode,
> struct page *locked_page,
> u64 start, u64 end,
> - struct async_cow *async_cow,
> + struct async_chunk *async_cow,
> int *num_added)
> {
> struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
> @@ -713,7 +719,7 @@ static void free_async_extent_pages(struct async_extent *async_extent)
> * queued. We walk all the async extents created by compress_file_range
> * and send them down to the disk.
> */
> -static noinline void submit_compressed_extents(struct async_cow *async_cow)
> +static noinline void submit_compressed_extents(struct async_chunk *async_cow)
> {
> struct inode *inode = async_cow->inode;
> struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
> @@ -1132,9 +1138,9 @@ static noinline int cow_file_range(struct inode *inode,
> */
> static noinline void async_cow_start(struct btrfs_work *work)
> {
> - struct async_cow *async_cow;
> + struct async_chunk *async_cow;
> int num_added = 0;
> - async_cow = container_of(work, struct async_cow, work);
> + async_cow = container_of(work, struct async_chunk, work);
>
> compress_file_range(async_cow->inode, async_cow->locked_page,
> async_cow->start, async_cow->end, async_cow,
> @@ -1151,10 +1157,10 @@ static noinline void async_cow_start(struct btrfs_work *work)
> static noinline void async_cow_submit(struct btrfs_work *work)
> {
> struct btrfs_fs_info *fs_info;
> - struct async_cow *async_cow;
> + struct async_chunk *async_cow;
> unsigned long nr_pages;
>
> - async_cow = container_of(work, struct async_cow, work);
> + async_cow = container_of(work, struct async_chunk, work);
>
> fs_info = async_cow->fs_info;
> nr_pages = (async_cow->end - async_cow->start + PAGE_SIZE) >>
> @@ -1177,11 +1183,16 @@ static noinline void async_cow_submit(struct btrfs_work *work)
>
> static noinline void async_cow_free(struct btrfs_work *work)
> {
> - struct async_cow *async_cow;
> - async_cow = container_of(work, struct async_cow, work);
> + struct async_chunk *async_cow;
> + async_cow = container_of(work, struct async_chunk, work);
> if (async_cow->inode)
> btrfs_add_delayed_iput(async_cow->inode);
> - kfree(async_cow);
> + /*
> + * Since the pointer to 'pending' is at the beginning of the array of
> + * async_cow's, freeing it ensures the whole array has been freed.
> + */
> + if (atomic_dec_and_test(async_cow->pending))
> + kfree(async_cow->pending);
> }
>
> static int cow_file_range_async(struct inode *inode, struct page *locked_page,
> @@ -1190,45 +1201,68 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
> unsigned int write_flags)
> {
> struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
> - struct async_cow *async_cow;
> + struct async_cow *ctx;
> + struct async_chunk *async_cow;
> unsigned long nr_pages;
> u64 cur_end;
> + u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K);
> + int i;
> + bool should_compress;
>
> clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
> 1, 0, NULL);
> - while (start < end) {
> - async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
> - BUG_ON(!async_cow); /* -ENOMEM */
> - /*
> - * igrab is called higher up in the call chain, take only the
> - * lightweight reference for the callback lifetime
> - */
> - ihold(inode);
> - async_cow->inode = inode;
> - async_cow->fs_info = fs_info;
> - async_cow->locked_page = locked_page;
> - async_cow->start = start;
> - async_cow->write_flags = write_flags;
> -
> - if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
> - !btrfs_test_opt(fs_info, FORCE_COMPRESS))
> - cur_end = end;
> - else
> - cur_end = min(end, start + SZ_512K - 1);
>
> - async_cow->end = cur_end;
> - INIT_LIST_HEAD(&async_cow->extents);
> + if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
> + !btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
> + num_chunks = 1;
> + should_compress = false;
> + } else {
> + should_compress = true;
> + }
> +
> + ctx = kmalloc(sizeof(struct async_cow) + num_chunks*sizeof(*async_cow),
> + GFP_NOFS);
There's a recently introduced macro for calculating the size of struct
with a fixed header followed by array 'struct_size', please use it.
> + if (!ctx) {
> + unsigned clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC |
> + EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
> + EXTENT_DO_ACCOUNTING;
> + unsigned long page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
> + PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK |
> + PAGE_SET_ERROR;
> + extent_clear_unlock_delalloc(inode, start, end, 0, locked_page,
> + clear_bits, page_ops);
> + return -ENOMEM;
> + }
> +
> + async_cow = ctx->chunks;
> + atomic_set(&ctx->num_chunks, num_chunks);
> +
> + for (i = 0; i < num_chunks; i++) {
>
> - btrfs_init_work(&async_cow->work,
> + if (should_compress)
> + cur_end = min(end, start + SZ_512K - 1);
> + else
> + cur_end = end;
> +
> + ihold(inode);
You should preserve the comment, the refactoring does not make it
obsolete or wrong.
> + async_cow[i].pending= &ctx->num_chunks;
> + async_cow[i].inode = inode;
> + async_cow[i].start = start;
> + async_cow[i].end = cur_end;
> + async_cow[i].fs_info = fs_info;
> + async_cow[i].locked_page = locked_page;
> + async_cow[i].write_flags = write_flags;
> + INIT_LIST_HEAD(&async_cow[i].extents);
> +
> + btrfs_init_work(&async_cow[i].work,
> btrfs_delalloc_helper,
> async_cow_start, async_cow_submit,
> async_cow_free);
>
> - nr_pages = (cur_end - start + PAGE_SIZE) >>
> - PAGE_SHIFT;
> + nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE);
> atomic_add(nr_pages, &fs_info->async_delalloc_pages);
>
> - btrfs_queue_work(fs_info->delalloc_workers, &async_cow->work);
> + btrfs_queue_work(fs_info->delalloc_workers, &async_cow[i].work);
>
> *nr_written += nr_pages;
> start = cur_end + 1;
> --
> 2.17.1