Re: [PATCH v2 2/6] btrfs: remove buffer heads from super block reading

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 23.01.20 г. 10:18 ч., Johannes Thumshirn wrote:

<snip>

> @@ -3374,40 +3378,60 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
>  }
>  
>  int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
> -			struct buffer_head **bh_ret)
> +			struct page **super_page)
>  {
> -	struct buffer_head *bh;
>  	struct btrfs_super_block *super;
> +	struct bio_vec bio_vec;
> +	struct bio bio;
> +	struct page *page;
>  	u64 bytenr;
> +	struct address_space *mapping = bdev->bd_inode->i_mapping;
> +	gfp_t gfp_mask;
> +	int ret;
>  
>  	bytenr = btrfs_sb_offset(copy_num);
>  	if (bytenr + BTRFS_SUPER_INFO_SIZE >= i_size_read(bdev->bd_inode))
>  		return -EINVAL;
>  
> -	bh = __bread(bdev, bytenr / BTRFS_BDEV_BLOCKSIZE, BTRFS_SUPER_INFO_SIZE);
> +	gfp_mask = mapping_gfp_constraint(mapping, ~__GFP_FS) | __GFP_NOFAIL;
> +	page = find_or_create_page(mapping, bytenr >> PAGE_SHIFT, gfp_mask);
> +	if (!page)
> +		return -ENOMEM;
> +
> +	bio_init(&bio, &bio_vec, 1);
> +	bio.bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
> +	bio_set_dev(&bio, bdev);
> +	bio_set_op_attrs(&bio, REQ_OP_READ, 0);
> +	bio_add_page(&bio, page, BTRFS_SUPER_INFO_SIZE,
> +		     offset_in_page(bytenr));
> +
> +	ret = submit_bio_wait(&bio);
> +	unlock_page(page);

So this is based on my code where the page is unlocked as soon as it's
read. However, as per out off-list discussion, I think this page needs
to be unlocked once the caller of
btrfs_read_dev_one_super/btrfs_read_dev_super is finished with it.

>  	/*
>  	 * If we fail to read from the underlying devices, as of now
>  	 * the best option we have is to mark it EIO.
>  	 */
> -	if (!bh)
> +	if (ret) {
> +		put_page(page);
>  		return -EIO;
> +	}
>  
> -	super = (struct btrfs_super_block *)bh->b_data;
> +	super = kmap(page);
>  	if (btrfs_super_bytenr(super) != bytenr ||
>  		    btrfs_super_magic(super) != BTRFS_MAGIC) {
> -		brelse(bh);
> +		btrfs_release_disk_super(page);
>  		return -EINVAL;
>  	}
> +	kunmap(page);
>  
> -	*bh_ret = bh;
> +	*super_page = page;
>  	return 0;
>  }
>  
>  
> -struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
> +int btrfs_read_dev_super(struct block_device *bdev, struct page **page)
>  {
> -	struct buffer_head *bh;
> -	struct buffer_head *latest = NULL;
> +	struct page *latest = NULL;
>  	struct btrfs_super_block *super;
>  	int i;
>  	u64 transid = 0;

<snip>

>  
>  void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path)
>  {
> -	struct buffer_head *bh;
> +	struct bio_vec bio_vec;
> +	struct bio bio;
>  	struct btrfs_super_block *disk_super;
>  	int copy_num;
>  
>  	if (!bdev)
>  		return;
>  
> +	bio_init(&bio, &bio_vec, 1);
>  	for (copy_num = 0; copy_num < BTRFS_SUPER_MIRROR_MAX;
>  		copy_num++) {
> +		u64 bytenr = btrfs_sb_offset(copy_num);
> +		struct page *page;
>  
> -		if (btrfs_read_dev_one_super(bdev, copy_num, &bh))
> +		if (btrfs_read_dev_one_super(bdev, copy_num, &page))
>  			continue;
>  
> -		disk_super = (struct btrfs_super_block *)bh->b_data;
> +		disk_super = kmap(page) + offset_in_page(bytenr);
>  
>  		memset(&disk_super->magic, 0, sizeof(disk_super->magic));
> -		set_buffer_dirty(bh);
> -		sync_dirty_buffer(bh);
> -		brelse(bh);
> +
> +		bio.bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
> +		bio_set_dev(&bio, bdev);
> +		bio_set_op_attrs(&bio, REQ_OP_WRITE, 0);
> +		bio_add_page(&bio, page, BTRFS_SUPER_INFO_SIZE,
> +			     offset_in_page(bytenr));
> +
> +		lock_page(page);
> +		submit_bio_wait(&bio);
> +		unlock_page(page);
> +		btrfs_release_disk_super(page);
> +		bio_reset(&bio);

For example in this code if btrfs_read_dev_one_super held the page
locked you would have unlocked after submit_bio_wait is has returned.
But in this case what you do is take the page unlocked and now you have
a race window between btrfs_read_dev_one_super and lock_page(page) in
this function? btrfs_scratch_superblocks is used in replace context so
what if it races with transaction commit? If my worries are moot and I
have missed anything then at the very least safety should be documented
in the changelog of the patch.

> +
>  	}
>  
>  	/* Notify udev that device has changed */
> diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
> index b7f2edbc6581..1e4ebe6d6368 100644
> --- a/fs/btrfs/volumes.h
> +++ b/fs/btrfs/volumes.h
> @@ -17,8 +17,6 @@ extern struct mutex uuid_mutex;
>  
>  #define BTRFS_STRIPE_LEN	SZ_64K
>  
> -struct buffer_head;
> -
>  struct btrfs_io_geometry {
>  	/* remaining bytes before crossing a stripe */
>  	u64 len;
> 



[Index of Archives]     [Linux Filesystem Development]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux