On 23.01.20 г. 10:18 ч., Johannes Thumshirn wrote:
<snip>
> @@ -3374,40 +3378,60 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
> }
>
> int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
> - struct buffer_head **bh_ret)
> + struct page **super_page)
> {
> - struct buffer_head *bh;
> struct btrfs_super_block *super;
> + struct bio_vec bio_vec;
> + struct bio bio;
> + struct page *page;
> u64 bytenr;
> + struct address_space *mapping = bdev->bd_inode->i_mapping;
> + gfp_t gfp_mask;
> + int ret;
>
> bytenr = btrfs_sb_offset(copy_num);
> if (bytenr + BTRFS_SUPER_INFO_SIZE >= i_size_read(bdev->bd_inode))
> return -EINVAL;
>
> - bh = __bread(bdev, bytenr / BTRFS_BDEV_BLOCKSIZE, BTRFS_SUPER_INFO_SIZE);
> + gfp_mask = mapping_gfp_constraint(mapping, ~__GFP_FS) | __GFP_NOFAIL;
> + page = find_or_create_page(mapping, bytenr >> PAGE_SHIFT, gfp_mask);
> + if (!page)
> + return -ENOMEM;
> +
> + bio_init(&bio, &bio_vec, 1);
> + bio.bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
> + bio_set_dev(&bio, bdev);
> + bio_set_op_attrs(&bio, REQ_OP_READ, 0);
> + bio_add_page(&bio, page, BTRFS_SUPER_INFO_SIZE,
> + offset_in_page(bytenr));
> +
> + ret = submit_bio_wait(&bio);
> + unlock_page(page);
So this is based on my code where the page is unlocked as soon as it's
read. However, as per out off-list discussion, I think this page needs
to be unlocked once the caller of
btrfs_read_dev_one_super/btrfs_read_dev_super is finished with it.
> /*
> * If we fail to read from the underlying devices, as of now
> * the best option we have is to mark it EIO.
> */
> - if (!bh)
> + if (ret) {
> + put_page(page);
> return -EIO;
> + }
>
> - super = (struct btrfs_super_block *)bh->b_data;
> + super = kmap(page);
> if (btrfs_super_bytenr(super) != bytenr ||
> btrfs_super_magic(super) != BTRFS_MAGIC) {
> - brelse(bh);
> + btrfs_release_disk_super(page);
> return -EINVAL;
> }
> + kunmap(page);
>
> - *bh_ret = bh;
> + *super_page = page;
> return 0;
> }
>
>
> -struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
> +int btrfs_read_dev_super(struct block_device *bdev, struct page **page)
> {
> - struct buffer_head *bh;
> - struct buffer_head *latest = NULL;
> + struct page *latest = NULL;
> struct btrfs_super_block *super;
> int i;
> u64 transid = 0;
<snip>
>
> void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path)
> {
> - struct buffer_head *bh;
> + struct bio_vec bio_vec;
> + struct bio bio;
> struct btrfs_super_block *disk_super;
> int copy_num;
>
> if (!bdev)
> return;
>
> + bio_init(&bio, &bio_vec, 1);
> for (copy_num = 0; copy_num < BTRFS_SUPER_MIRROR_MAX;
> copy_num++) {
> + u64 bytenr = btrfs_sb_offset(copy_num);
> + struct page *page;
>
> - if (btrfs_read_dev_one_super(bdev, copy_num, &bh))
> + if (btrfs_read_dev_one_super(bdev, copy_num, &page))
> continue;
>
> - disk_super = (struct btrfs_super_block *)bh->b_data;
> + disk_super = kmap(page) + offset_in_page(bytenr);
>
> memset(&disk_super->magic, 0, sizeof(disk_super->magic));
> - set_buffer_dirty(bh);
> - sync_dirty_buffer(bh);
> - brelse(bh);
> +
> + bio.bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
> + bio_set_dev(&bio, bdev);
> + bio_set_op_attrs(&bio, REQ_OP_WRITE, 0);
> + bio_add_page(&bio, page, BTRFS_SUPER_INFO_SIZE,
> + offset_in_page(bytenr));
> +
> + lock_page(page);
> + submit_bio_wait(&bio);
> + unlock_page(page);
> + btrfs_release_disk_super(page);
> + bio_reset(&bio);
For example in this code if btrfs_read_dev_one_super held the page
locked you would have unlocked after submit_bio_wait is has returned.
But in this case what you do is take the page unlocked and now you have
a race window between btrfs_read_dev_one_super and lock_page(page) in
this function? btrfs_scratch_superblocks is used in replace context so
what if it races with transaction commit? If my worries are moot and I
have missed anything then at the very least safety should be documented
in the changelog of the patch.
> +
> }
>
> /* Notify udev that device has changed */
> diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
> index b7f2edbc6581..1e4ebe6d6368 100644
> --- a/fs/btrfs/volumes.h
> +++ b/fs/btrfs/volumes.h
> @@ -17,8 +17,6 @@ extern struct mutex uuid_mutex;
>
> #define BTRFS_STRIPE_LEN SZ_64K
>
> -struct buffer_head;
> -
> struct btrfs_io_geometry {
> /* remaining bytes before crossing a stripe */
> u64 len;
>