On Tue, Jan 28, 2020 at 12:59:27AM +0900, Johannes Thumshirn wrote:
> struct btrfs_fs_info *fs_info = btrfs_sb(sb);
> struct btrfs_root *tree_root;
> struct btrfs_root *chunk_root;
> + struct page *super_page;
> + u8 *superblock;
Any good reason this isn't a struct btrfs_super_block * instead?
> + csum_type = btrfs_super_csum_type((struct btrfs_super_block *)
> + superblock);
> if (!btrfs_supported_super_csum(csum_type)) {
> btrfs_err(fs_info, "unsupported checksum algorithm: %u",
> csum_type);
> err = -EINVAL;
> - brelse(bh);
> + kunmap(super_page);
> + put_page(super_page);
> goto fail_alloc;
> }
>
> ret = btrfs_init_csum_hash(fs_info, csum_type);
> if (ret) {
> err = ret;
> + kunmap(super_page);
> + put_page(super_page);
> goto fail_alloc;
> }
>
> @@ -2861,10 +2867,11 @@ int __cold open_ctree(struct super_block *sb,
> * We want to check superblock checksum, the type is stored inside.
> * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k).
> */
> - if (btrfs_check_super_csum(fs_info, bh->b_data)) {
> + if (btrfs_check_super_csum(fs_info, superblock)) {
> btrfs_err(fs_info, "superblock checksum mismatch");
> err = -EINVAL;
> - brelse(bh);
> + kunmap(super_page);
> + put_page(super_page);
> goto fail_csum;
> }
>
> @@ -2873,8 +2880,9 @@ int __cold open_ctree(struct super_block *sb,
> * following bytes up to INFO_SIZE, the checksum is calculated from
> * the whole block of INFO_SIZE
> */
> - memcpy(fs_info->super_copy, bh->b_data, sizeof(*fs_info->super_copy));
> - brelse(bh);
> + memcpy(fs_info->super_copy, superblock, sizeof(*fs_info->super_copy));
> + kunmap(super_page);
> + put_page(super_page);
Would it make sense to move the code up to here in a helper to
simplify the error handling?
>
> int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
> - struct buffer_head **bh_ret)
> + struct page **super_page)
> {
> - struct buffer_head *bh;
> struct btrfs_super_block *super;
> + struct bio_vec bio_vec;
> + struct bio bio;
> + struct page *page;
> u64 bytenr;
> + struct address_space *mapping = bdev->bd_inode->i_mapping;
> + gfp_t gfp_mask;
> + int ret;
>
> bytenr = btrfs_sb_offset(copy_num);
> if (bytenr + BTRFS_SUPER_INFO_SIZE >= i_size_read(bdev->bd_inode))
> return -EINVAL;
>
> - bh = __bread(bdev, bytenr / BTRFS_BDEV_BLOCKSIZE, BTRFS_SUPER_INFO_SIZE);
> + gfp_mask = mapping_gfp_constraint(mapping, ~__GFP_FS) | __GFP_NOFAIL;
> + page = find_or_create_page(mapping, bytenr >> PAGE_SHIFT, gfp_mask);
Why not simply use read_cache_page_gfp to find or read the page?
> - super = (struct btrfs_super_block *)bh->b_data;
> + super = kmap(page);
> if (btrfs_super_bytenr(super) != bytenr ||
> btrfs_super_magic(super) != BTRFS_MAGIC) {
> - brelse(bh);
> + kunmap(page);
> + put_page(page);
> return -EINVAL;
> }
> + kunmap(page);
>
> - *bh_ret = bh;
> + *super_page = page;
Given that both callers need the kernel virtual address, why not leave it
kmapped? OTOH if you use read_cache_page_gfp, we could just kill
btrfs_read_dev_one_super and open code the call to read_cache_page_gfp
and btrfs_super_bytenr / btrfs_super_magic in the callers.
> + bio_init(&bio, &bio_vec, 1);
> for (copy_num = 0; copy_num < BTRFS_SUPER_MIRROR_MAX;
> copy_num++) {
> + u64 bytenr = btrfs_sb_offset(copy_num);
> + struct page *page;
>
> - if (btrfs_read_dev_one_super(bdev, copy_num, &bh))
> + if (btrfs_read_dev_one_super(bdev, copy_num, &page))
> continue;
>
> - disk_super = (struct btrfs_super_block *)bh->b_data;
> + disk_super = kmap(page) + offset_in_page(bytenr);
>
> memset(&disk_super->magic, 0, sizeof(disk_super->magic));
> - set_buffer_dirty(bh);
> - sync_dirty_buffer(bh);
> - brelse(bh);
> +
> + bio.bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
> + bio_set_dev(&bio, bdev);
> + bio.bi_opf = REQ_OP_WRITE;
> + bio_add_page(&bio, page, BTRFS_SUPER_INFO_SIZE,
> + offset_in_page(bytenr));
> +
> + lock_page(page);
> + submit_bio_wait(&bio);
> + unlock_page(page);
> + kunmap(page);
> + put_page(page);
> + bio_reset(&bio);
Facoting out the code to write a single sb would clean this up a bit.
Also no real need to keep the page kmapped when under I/O.