Re: [PATCH 1/6] Btrfs: subpagesize-blocksize: Get rid of whole page reads.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Mar 12, 2014 at 07:50:28PM +0530, Chandan Rajendra wrote:
> bio_vec->{bv_offset, bv_len} cannot be relied upon by the end bio functions
> to track the file offset range operated on by the bio. Hence this patch adds
> two new members to 'struct btrfs_io_bio' to track the file offset range.
> 
> This patch also brings back check_page_locked() to reliably unlock pages in
> readpage's end bio function.
> 
> Signed-off-by: Chandan Rajendra <chandan@xxxxxxxxxxxxxxxxxx>
> ---
>  fs/btrfs/extent_io.c | 122 +++++++++++++++++++++++++++++++++------------------
>  fs/btrfs/volumes.h   |   3 ++
>  2 files changed, 82 insertions(+), 43 deletions(-)
> 
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index fbe501d..5a65aee 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -1943,15 +1943,31 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
>   * helper function to set a given page up to date if all the
>   * extents in the tree for that page are up to date
>   */
> -static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
> +static void check_page_uptodate(struct extent_io_tree *tree, struct page *page,
> +				struct extent_state *cached)
>  {
>  	u64 start = page_offset(page);
>  	u64 end = start + PAGE_CACHE_SIZE - 1;
> -	if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
> +	if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, cached))
>  		SetPageUptodate(page);
>  }
>  
>  /*
> + * helper function to unlock a page if all the extents in the tree
> + * for that page are unlocked
> + */
> +static void check_page_locked(struct extent_io_tree *tree, struct page *page)
> +{
> +	u64 start = page_offset(page);
> +	u64 end = start + PAGE_CACHE_SIZE - 1;
> +
> +	if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) {
> +		unlock_page(page);
> +	}
> +}
> +
> +
> +/*
>   * When IO fails, either with EIO or csum verification fails, we
>   * try other mirrors that might have a good copy of the data.  This
>   * io_failure_record is used to record state as we go through all the
> @@ -2414,16 +2430,33 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
>  	bio_put(bio);
>  }
>  
> -static void
> -endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
> -			      int uptodate)
> +static void unlock_extent_and_page(struct address_space *mapping,
> +				   struct extent_io_tree *tree,
> +				   struct btrfs_io_bio *io_bio)
>  {
> -	struct extent_state *cached = NULL;
> -	u64 end = start + len - 1;
> +	pgoff_t index;
> +	u64 offset, len;
> +	/*
> +	 * This btrfs_io_bio may span multiple pages.
> +	 * We need to unlock the pages convered by them
> +	 * if we got endio callback for all the blocks in the page.
> +	 * btrfs_io_bio also contain "contigous blocks of the file"
> +	 * look at submit_extent_page for more details.
> +	 */
>  
> -	if (uptodate && tree->track_uptodate)
> -		set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC);
> -	unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
> +	offset = io_bio->start_offset;
> +	len    = io_bio->len;
> +	unlock_extent(tree, offset, offset + len - 1);
> +
> +	index = offset >> PAGE_CACHE_SHIFT;
> +	while (offset < io_bio->start_offset + len) {
> +		struct page *page;
> +		page = find_get_page(mapping, index);
> +		check_page_locked(tree, page);
> +		page_cache_release(page);
> +		index++;
> +		offset += PAGE_CACHE_SIZE;
> +	}
>  }
>  
>  /*
> @@ -2443,13 +2476,13 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
>  	struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
>  	struct bio_vec *bvec = bio->bi_io_vec;
>  	struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
> +	struct address_space *mapping = bio->bi_io_vec->bv_page->mapping;
>  	struct extent_io_tree *tree;
> +	struct extent_state *cached = NULL;
>  	u64 offset = 0;
>  	u64 start;
>  	u64 end;
>  	u64 len;
> -	u64 extent_start = 0;
> -	u64 extent_len = 0;
>  	int mirror;
>  	int ret;
>  
> @@ -2482,8 +2515,8 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
>  					bvec->bv_offset, bvec->bv_len);
>  		}
>  
> -		start = page_offset(page);
> -		end = start + bvec->bv_offset + bvec->bv_len - 1;
> +		start = page_offset(page) + bvec->bv_offset;
> +		end = start + bvec->bv_len - 1;
>  		len = bvec->bv_len;
>  
>  		if (++bvec <= bvec_end)
> @@ -2540,40 +2573,24 @@ readpage_ok:
>  			offset = i_size & (PAGE_CACHE_SIZE-1);
>  			if (page->index == end_index && offset)
>  				zero_user_segment(page, offset, PAGE_CACHE_SIZE);
> -			SetPageUptodate(page);
> +			if (tree->track_uptodate)
> +				set_extent_uptodate(tree, start, end, &cached,
> +						    GFP_ATOMIC);
>  		} else {
>  			ClearPageUptodate(page);
>  			SetPageError(page);
>  		}
> -		unlock_page(page);
> -		offset += len;
>  
> -		if (unlikely(!uptodate)) {
> -			if (extent_len) {
> -				endio_readpage_release_extent(tree,
> -							      extent_start,
> -							      extent_len, 1);
> -				extent_start = 0;
> -				extent_len = 0;
> -			}
> -			endio_readpage_release_extent(tree, start,
> -						      end - start + 1, 0);
> -		} else if (!extent_len) {
> -			extent_start = start;
> -			extent_len = end + 1 - start;
> -		} else if (extent_start + extent_len == start) {
> -			extent_len += end + 1 - start;
> -		} else {
> -			endio_readpage_release_extent(tree, extent_start,
> -						      extent_len, uptodate);
> -			extent_start = start;
> -			extent_len = end + 1 - start;
> -		}
> +		offset += len;
> +		/*
> +		 * Check whether the page in the bvec can be marked uptodate
> +		 */
> +		check_page_uptodate(tree, page, cached);
>  	} while (bvec <= bvec_end);
> -
> -	if (extent_len)
> -		endio_readpage_release_extent(tree, extent_start, extent_len,
> -					      uptodate);
> +	/*
> +	 * Unlock the btrfs_bio and associated page
> +	 */
> +	unlock_extent_and_page(mapping, tree, io_bio);
>  	if (io_bio->end_io)
>  		io_bio->end_io(io_bio, err);
>  	bio_put(bio);
> @@ -2700,6 +2717,18 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
>  		else
>  			contig = bio_end_sector(bio) == sector;
>  
> +		if (contig) {
> +			/*
> +			 * Check whether we are contig if file offsets.
> +			 * We should mostly be for readpage/readpages
> +			 * We need to do this because we use btrfs_io_bio
> +			 * start_offset and len to unlock in endio routines.
> +			 */
> +			if ((page_offset(page) + offset) !=
> +					(btrfs_io_bio(bio)->start_offset +
> +					 btrfs_io_bio(bio)->len))
> +				contig = 0;
> +		}
>  		if (prev_bio_flags != bio_flags || !contig ||
>  		    merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
>  		    bio_add_page(bio, page, page_size, offset) < page_size) {
> @@ -2709,6 +2738,11 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
>  				return ret;
>  			bio = NULL;
>  		} else {
> +			/*
> +			 * update btrfs_io_bio len. So that we can unlock
> +			 * correctly in end_io callback.
> +			 */
> +			btrfs_io_bio(bio)->len += page_size;
>  			return 0;
>  		}
>  	}
> @@ -2724,6 +2758,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
>  	bio_add_page(bio, page, page_size, offset);
>  	bio->bi_end_io = end_io_func;
>  	bio->bi_private = tree;
> +	btrfs_io_bio(bio)->start_offset = page_offset(page) + offset;
> +	btrfs_io_bio(bio)->len = page_size;
>  
>  	if (bio_ret)
>  		*bio_ret = bio;
> @@ -2914,7 +2950,7 @@ static int __do_readpage(struct extent_io_tree *tree,
>  		/* the get_extent function already copied into the page */
>  		if (test_range_bit(tree, cur, cur_end,
>  				   EXTENT_UPTODATE, 1, NULL)) {
> -			check_page_uptodate(tree, page);
> +			check_page_uptodate(tree, page, NULL);
>  			if (!parent_locked)
>  				unlock_extent(tree, cur, cur + iosize - 1);
>  			cur = cur + iosize;
> diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
> index 80754f9..2e16f4d 100644
> --- a/fs/btrfs/volumes.h
> +++ b/fs/btrfs/volumes.h
> @@ -174,6 +174,9 @@ struct btrfs_io_bio {
>  	u8 *csum_allocated;
>  	btrfs_io_bio_end_io_t *end_io;
>  	struct bio bio;
> +	/* Track file offset range operated on by the bio.*/
> +	u64 start_offset;
> +	u64 len;

These should be put in front of "struct bio bio",
otherwise, it might lead to errors, according to bioset_create()'s comments,

----------------------------------------------------------------------
"Note that the bio must be embedded at the END of that structure always,    
or things will break badly."
----------------------------------------------------------------------

-liubo
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Filesystem Development]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux