On 23/01/2020 15:14, Nikolay Borisov wrote:
>
>
> On 23.01.20 г. 10:18 ч., Johannes Thumshirn wrote:
>
> <snip>
>
>> @@ -3374,40 +3378,60 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
>> }
>>
>> int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
>> - struct buffer_head **bh_ret)
>> + struct page **super_page)
>> {
>> - struct buffer_head *bh;
>> struct btrfs_super_block *super;
>> + struct bio_vec bio_vec;
>> + struct bio bio;
>> + struct page *page;
>> u64 bytenr;
>> + struct address_space *mapping = bdev->bd_inode->i_mapping;
>> + gfp_t gfp_mask;
>> + int ret;
>>
>> bytenr = btrfs_sb_offset(copy_num);
>> if (bytenr + BTRFS_SUPER_INFO_SIZE >= i_size_read(bdev->bd_inode))
>> return -EINVAL;
>>
>> - bh = __bread(bdev, bytenr / BTRFS_BDEV_BLOCKSIZE, BTRFS_SUPER_INFO_SIZE);
>> + gfp_mask = mapping_gfp_constraint(mapping, ~__GFP_FS) | __GFP_NOFAIL;
>> + page = find_or_create_page(mapping, bytenr >> PAGE_SHIFT, gfp_mask);
>> + if (!page)
>> + return -ENOMEM;
>> +
>> + bio_init(&bio, &bio_vec, 1);
>> + bio.bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
>> + bio_set_dev(&bio, bdev);
>> + bio_set_op_attrs(&bio, REQ_OP_READ, 0);
>> + bio_add_page(&bio, page, BTRFS_SUPER_INFO_SIZE,
>> + offset_in_page(bytenr));
>> +
>> + ret = submit_bio_wait(&bio);
>> + unlock_page(page);
>
> So this is based on my code where the page is unlocked as soon as it's
> read. However, as per out off-list discussion, I think this page needs
> to be unlocked once the caller of
> btrfs_read_dev_one_super/btrfs_read_dev_super is finished with it.
As I don't have an answer to that yet, I just gave it a try (see
attached diff) and it instantly deadlocks on mount. So either I did
something wrong or holding the lock until the callers have finished
processing it is a bad idea.
Thanks,
Johannes
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index ac1755793596..7afac96be17f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2856,6 +2856,7 @@ int __cold open_ctree(struct super_block *sb,
ret = btrfs_init_csum_hash(fs_info, csum_type);
if (ret) {
err = ret;
+ unlock_page(super_page);
btrfs_release_disk_super(super_page);
goto fail_alloc;
}
@@ -2867,6 +2868,7 @@ int __cold open_ctree(struct super_block *sb,
if (btrfs_check_super_csum(fs_info, superblock)) {
btrfs_err(fs_info, "superblock checksum mismatch");
err = -EINVAL;
+ unlock_page(super_page);
btrfs_release_disk_super(super_page);
goto fail_csum;
}
@@ -2877,6 +2879,7 @@ int __cold open_ctree(struct super_block *sb,
* the whole block of INFO_SIZE
*/
memcpy(fs_info->super_copy, superblock, sizeof(*fs_info->super_copy));
+ unlock_page(super_page);
btrfs_release_disk_super(super_page);
disk_super = fs_info->super_copy;
@@ -3413,7 +3416,6 @@ int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
offset_in_page(bytenr));
ret = submit_bio_wait(&bio);
- unlock_page(page);
/*
* If we fail to read from the underlying devices, as of now
* the best option we have is to mark it EIO.
@@ -3426,6 +3428,7 @@ int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
super = kmap(page);
if (btrfs_super_bytenr(super) != bytenr ||
btrfs_super_magic(super) != BTRFS_MAGIC) {
+ unlock_page(page);
btrfs_release_disk_super(page);
return -EINVAL;
}
@@ -3457,11 +3460,14 @@ int btrfs_read_dev_super(struct block_device *bdev, struct page **page)
super = kmap(*page);
if (!latest || btrfs_super_generation(super) > transid) {
- if (latest)
+ if (latest) {
+ unlock_page(latest);
btrfs_release_disk_super(latest);
+ }
latest = *page;
transid = btrfs_super_generation(super);
} else {
+ unlock_page(*page);
btrfs_release_disk_super(*page);
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index f4a6ee518f0c..8bb2f9dead63 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -7344,7 +7344,6 @@ void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_pat
bio_add_page(&bio, page, BTRFS_SUPER_INFO_SIZE,
offset_in_page(bytenr));
- lock_page(page);
submit_bio_wait(&bio);
unlock_page(page);
btrfs_release_disk_super(page);