Re: [PATCH] Btrfs: recheck bio against block device when we map the bio

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 10/20/2012 05:01 AM, Josef Bacik wrote:
> Alex reported a problem where we were writing between chunks on a rbd
> device.  The thing is we do bio_add_page using logical offsets, but the
> physical offset may be different.  So when we map the bio now check to see
> if the bio is still ok with the physical offset, and if it is not split the
> bio up and redo the bio_add_page with the physical sector.  This fixes the
> problem for Alex and doesn't affect performance in the normal case.  Thanks,
> 
> Reported-and-tested-by: Alex Elder <elder@xxxxxxxxxxx>
> Signed-off-by: Josef Bacik <jbacik@xxxxxxxxxxxx>
> ---
>  fs/btrfs/volumes.c |  159 ++++++++++++++++++++++++++++++++++++++++++---------
>  1 files changed, 131 insertions(+), 28 deletions(-)
> 
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index a8adf26..eaaf0bf 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -4217,6 +4217,113 @@ static noinline void schedule_bio(struct btrfs_root *root,
>  				   &device->work);
>  }
>  
> +static int bio_size_ok(struct block_device *bdev, struct bio *bio,
> +		       sector_t sector)
> +{
> +	struct bio_vec *prev;
> +	struct request_queue *q = bdev_get_queue(bdev);
> +	unsigned short max_sectors = queue_max_sectors(q);
> +	struct bvec_merge_data bvm = {
> +		.bi_bdev = bdev,
> +		.bi_sector = sector,
> +		.bi_rw = bio->bi_rw,
> +	};
> +
> +	if (bio->bi_vcnt == 0) {
> +		WARN_ON(1);
> +		return 1;
> +	}
> +
> +	prev = &bio->bi_io_vec[bio->bi_vcnt - 1];

I prefer 'last' for this.


Others look good to me :)

Reviewed-by: Liu Bo <bo.li.liu@xxxxxxxxxx>

thanks,
liubo

> +	if ((bio->bi_size >> 9) > max_sectors)
> +		return 0;
> +
> +	if (!q->merge_bvec_fn)
> +		return 1;
> +
> +	bvm.bi_size = bio->bi_size - prev->bv_len;
> +	if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len)
> +		return 0;
> +	return 1;
> +}
> +
> +static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
> +			      struct bio *bio, u64 physical, int dev_nr,
> +			      int rw, int async)
> +{
> +	struct btrfs_device *dev = bbio->stripes[dev_nr].dev;
> +
> +	bio->bi_private = bbio;
> +	bio->bi_private = merge_stripe_index_into_bio_private(
> +			bio->bi_private, (unsigned int)dev_nr);
> +	bio->bi_end_io = btrfs_end_bio;
> +	bio->bi_sector = physical >> 9;
> +#ifdef DEBUG
> +	{
> +		struct rcu_string *name;
> +
> +		rcu_read_lock();
> +		name = rcu_dereference(dev->name);
> +		pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu "
> +			 "(%s id %llu), size=%u\n", rw,
> +			 (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev,
> +			 name->str, dev->devid, bio->bi_size);
> +		rcu_read_unlock();
> +	}
> +#endif
> +	bio->bi_bdev = dev->bdev;
> +	if (async)
> +		schedule_bio(root, dev, rw, bio);
> +	else
> +		btrfsic_submit_bio(rw, bio);
> +}
> +
> +static int breakup_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
> +			      struct bio *first_bio, struct btrfs_device *dev,
> +			      int dev_nr, int rw, int async)
> +{
> +	struct bio_vec *bvec = first_bio->bi_io_vec;
> +	struct bio *bio;
> +	int nr_vecs = bio_get_nr_vecs(dev->bdev);
> +	u64 physical = bbio->stripes[dev_nr].physical;
> +
> +again:
> +	bio = btrfs_bio_alloc(dev->bdev, physical >> 9, nr_vecs, GFP_NOFS);
> +	if (!bio)
> +		return -ENOMEM;
> +
> +	while (bvec <= (first_bio->bi_io_vec + first_bio->bi_vcnt - 1)) {
> +		if (bio_add_page(bio, bvec->bv_page, bvec->bv_len,
> +				 bvec->bv_offset) < bvec->bv_len) {
> +			u64 len = bio->bi_size;
> +
> +			atomic_inc(&bbio->stripes_pending);
> +			submit_stripe_bio(root, bbio, bio, physical, dev_nr,
> +					  rw, async);
> +			physical += len;
> +			goto again;
> +		}
> +		bvec++;
> +	}
> +
> +	submit_stripe_bio(root, bbio, bio, physical, dev_nr, rw, async);
> +	return 0;
> +}
> +
> +static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
> +{
> +	atomic_inc(&bbio->error);
> +	if (atomic_dec_and_test(&bbio->stripes_pending)) {
> +		bio->bi_private = bbio->private;
> +		bio->bi_end_io = bbio->end_io;
> +		bio->bi_bdev = (struct block_device *)
> +			(unsigned long)bbio->mirror_num;
> +		bio->bi_sector = logical >> 9;
> +		kfree(bbio);
> +		bio_endio(bio, -EIO);
> +	}
> +}
> +
>  int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
>  		  int mirror_num, int async_submit)
>  {
> @@ -4255,40 +4362,36 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
>  	atomic_set(&bbio->stripes_pending, bbio->num_stripes);
>  
>  	while (dev_nr < total_devs) {
> +		dev = bbio->stripes[dev_nr].dev;
> +		if (!dev || !dev->bdev || (rw & WRITE && !dev->writeable)) {
> +			bbio_error(bbio, first_bio, logical);
> +			dev_nr++;
> +			continue;
> +		}
> +
> +		/*
> +		 * Check and see if we're ok with this bio based on it's size
> +		 * and offset with the given device.
> +		 */
> +		if (!bio_size_ok(dev->bdev, first_bio,
> +				 bbio->stripes[dev_nr].physical >> 9)) {
> +			ret = breakup_stripe_bio(root, bbio, first_bio, dev,
> +						 dev_nr, rw, async_submit);
> +			BUG_ON(ret);
> +			dev_nr++;
> +			continue;
> +		}
> +
>  		if (dev_nr < total_devs - 1) {
>  			bio = bio_clone(first_bio, GFP_NOFS);
>  			BUG_ON(!bio); /* -ENOMEM */
>  		} else {
>  			bio = first_bio;
>  		}
> -		bio->bi_private = bbio;
> -		bio->bi_private = merge_stripe_index_into_bio_private(
> -				bio->bi_private, (unsigned int)dev_nr);
> -		bio->bi_end_io = btrfs_end_bio;
> -		bio->bi_sector = bbio->stripes[dev_nr].physical >> 9;
> -		dev = bbio->stripes[dev_nr].dev;
> -		if (dev && dev->bdev && (rw != WRITE || dev->writeable)) {
> -#ifdef DEBUG
> -			struct rcu_string *name;
> -
> -			rcu_read_lock();
> -			name = rcu_dereference(dev->name);
> -			pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu "
> -				 "(%s id %llu), size=%u\n", rw,
> -				 (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev,
> -				 name->str, dev->devid, bio->bi_size);
> -			rcu_read_unlock();
> -#endif
> -			bio->bi_bdev = dev->bdev;
> -			if (async_submit)
> -				schedule_bio(root, dev, rw, bio);
> -			else
> -				btrfsic_submit_bio(rw, bio);
> -		} else {
> -			bio->bi_bdev = root->fs_info->fs_devices->latest_bdev;
> -			bio->bi_sector = logical >> 9;
> -			bio_endio(bio, -EIO);
> -		}
> +
> +		submit_stripe_bio(root, bbio, bio,
> +				  bbio->stripes[dev_nr].physical, dev_nr, rw,
> +				  async_submit);
>  		dev_nr++;
>  	}
>  	return 0;
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Filesystem Development]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux