On 2018/01/12 19:14, Anand Jain wrote:
>
> Misono,
>
> This change is causing subsequent (subvol) mount to fail when device
> option is specified. The simplest eg for failure is ..
> mkfs.btrfs -qf /dev/sdc /dev/sdb
> mount -o device=/dev/sdb /dev/sdc /btrfs
> mount -o device=/dev/sdb /dev/sdc /btrfs1
> mount: /dev/sdc is already mounted or /btrfs1 busy
>
> Looks like
> blkdev_get_by_path() <-- is failing.
> btrfs_scan_one_device()
> btrfs_parse_early_options()
> btrfs_mount()
>
> Which is due to different holders (viz. btrfs_root_fs_type and
> btrfs_fs_type) one is used for vfs_mount and other for scan,
> so they form different holders and can't let EXCL open which
> is needed for both scan and open.
>
> Thanks, Anand
Thanks for the reporting.
I'm sorry but I will be busy today and tomorrow, and the investigation will be
after Wednesday.
Regards,
Tomohiro Misono
>
>
> On 12/14/2017 04:25 PM, Misono, Tomohiro wrote:
>> Cleanup btrfs_mount() by using btrfs_mount_root(). This avoids getting
>> btrfs_mount() called twice in mount path.
>>
>> Old btrfs_mount() will do:
>> 0. VFS layer calls vfs_kern_mount() with registered file_system_type
>> (for btrfs, btrfs_fs_type). btrfs_mount() is called on the way.
>> 1. btrfs_parse_early_options() parses "subvolid=" mount option and set the
>> value to subvol_objectid. Otherwise, subvol_objectid has the initial
>> value of 0
>> 2. check subvol_objectid is 5 or not. Assume this time id is not 5, then
>> btrfs_mount() returns by calling mount_subvol()
>> 3. In mount_subvol(), original mount options are modified to contain
>> "subvolid=0" in setup_root_args(). Then, vfs_kern_mount() is called with
>> btrfs_fs_type and new options
>> 4. btrfs_mount() is called again
>> 5. btrfs_parse_early_options() parses "subvolid=0" and set 5 (instead of 0)
>> to subvol_objectid
>> 6. check subvol_objectid is 5 or not. This time id is 5 and mount_subvol()
>> is not called. btrfs_mount() finishes mounting a root
>> 7. (in mount_subvol()) with using a return vale of vfs_kern_mount(), it
>> calls mount_subtree()
>> 8. return subvolume's dentry
>>
>> Reusing the same file_system_type (and btrfs_mount()) for vfs_kern_mount()
>> is the cause of complication.
>>
>> Instead, new btrfs_mount() will do:
>> 1. parse subvol id related options for later use in mount_subvol()
>> 2. mount device's root by calling vfs_kern_mount() with
>> btrfs_root_fs_type, which is not registered to VFS by
>> register_filesystem(). As a result, btrfs_mount_root() is called
>> 3. return by calling mount_subvol()
>>
>> The code of 2. is moved from the first part of mount_subvol().
>>
>> Signed-off-by: Tomohiro Misono <misono.tomohiro@xxxxxxxxxxxxxx>
>> ---
>> fs/btrfs/super.c | 193 +++++++++++++++++++------------------------------------
>> 1 file changed, 65 insertions(+), 128 deletions(-)
>>
>> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
>> index 14189ad47466..ce93d87b2a69 100644
>> --- a/fs/btrfs/super.c
>> +++ b/fs/btrfs/super.c
>> @@ -66,6 +66,11 @@
>> #include <trace/events/btrfs.h>
>>
>> static const struct super_operations btrfs_super_ops;
>> +/*
>> + * btrfs_root_fs_type is used internally while
>> + * btrfs_fs_type is used for VFS layer.
>> + * See the comment at btrfs_mount for more detail.
>> + */
>> static struct file_system_type btrfs_root_fs_type;
>> static struct file_system_type btrfs_fs_type;
>>
>> @@ -1404,48 +1409,11 @@ static char *setup_root_args(char *args)
>>
>> static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
>> int flags, const char *device_name,
>> - char *data)
>> + char *data, struct vfsmount *mnt)
>> {
>> struct dentry *root;
>> - struct vfsmount *mnt = NULL;
>> - char *newargs;
>> int ret;
>>
>> - newargs = setup_root_args(data);
>> - if (!newargs) {
>> - root = ERR_PTR(-ENOMEM);
>> - goto out;
>> - }
>> -
>> - mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, newargs);
>> - if (PTR_ERR_OR_ZERO(mnt) == -EBUSY) {
>> - if (flags & SB_RDONLY) {
>> - mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~SB_RDONLY,
>> - device_name, newargs);
>> - } else {
>> - mnt = vfs_kern_mount(&btrfs_fs_type, flags | SB_RDONLY,
>> - device_name, newargs);
>> - if (IS_ERR(mnt)) {
>> - root = ERR_CAST(mnt);
>> - mnt = NULL;
>> - goto out;
>> - }
>> -
>> - down_write(&mnt->mnt_sb->s_umount);
>> - ret = btrfs_remount(mnt->mnt_sb, &flags, NULL);
>> - up_write(&mnt->mnt_sb->s_umount);
>> - if (ret < 0) {
>> - root = ERR_PTR(ret);
>> - goto out;
>> - }
>> - }
>> - }
>> - if (IS_ERR(mnt)) {
>> - root = ERR_CAST(mnt);
>> - mnt = NULL;
>> - goto out;
>> - }
>> -
>> if (!subvol_name) {
>> if (!subvol_objectid) {
>> ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb),
>> @@ -1501,7 +1469,6 @@ static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
>>
>> out:
>> mntput(mnt);
>> - kfree(newargs);
>> kfree(subvol_name);
>> return root;
>> }
>> @@ -1556,6 +1523,12 @@ static int setup_security_options(struct btrfs_fs_info *fs_info,
>> return ret;
>> }
>>
>> +/*
>> + * Find a superblock for the given device / mount point.
>> + *
>> + * Note: This is based on mount_bdev from fs/super.c with a few additions
>> + * for multiple device setup. Make sure to keep it in sync.
>> + */
>> static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
>> int flags, const char *device_name, void *data)
>> {
>> @@ -1662,20 +1635,35 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
>> security_free_mnt_opts(&new_sec_opts);
>> return ERR_PTR(error);
>> }
>> +
>> /*
>> - * Find a superblock for the given device / mount point.
>> + * Mount function which is called by VFS layer.
>> + *
>> + * In order to allow mounting a subvolume directly, btrfs uses
>> + * mount_subtree() which needs vfsmount* of device's root (/).
>> + * This means device's root has to be mounted internally in any case.
>> + *
>> + * Operation flow:
>> + * 1. Parse subvol id related options for later use in mount_subvol().
>> + *
>> + * 2. Mount device's root (/) by calling vfs_kern_mount().
>> *
>> - * Note: This is based on get_sb_bdev from fs/super.c with a few additions
>> - * for multiple device setup. Make sure to keep it in sync.
>> + * NOTE: vfs_kern_mount() is used by VFS to call btrfs_mount() in the
>> + * first place. In order to avoid calling btrfs_mount() again, we use
>> + * different file_system_type which is not registered to VFS by
>> + * register_filesystem() (btrfs_root_fs_type). As a result,
>> + * btrfs_mount_root() is called. The return value will be used by
>> + * mount_subtree() in mount_subvol().
>> + *
>> + * 3. Call mount_subvol() to get the dentry of subvolume. Since there is
>> + * "btrfs subvolume set-default", mount_subvol() is called always.
>> */
>> static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
>> const char *device_name, void *data)
>> {
>> - struct block_device *bdev = NULL;
>> - struct super_block *s;
>> struct btrfs_fs_devices *fs_devices = NULL;
>> - struct btrfs_fs_info *fs_info = NULL;
>> - struct security_mnt_opts new_sec_opts;
>> + struct vfsmount *mnt_root;
>> + struct dentry *root;
>> fmode_t mode = FMODE_READ;
>> char *subvol_name = NULL;
>> u64 subvol_objectid = 0;
>> @@ -1692,93 +1680,42 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
>> return ERR_PTR(error);
>> }
>>
>> - if (subvol_name || subvol_objectid != BTRFS_FS_TREE_OBJECTID) {
>> - /* mount_subvol() will free subvol_name. */
>> - return mount_subvol(subvol_name, subvol_objectid, flags,
>> - device_name, data);
>> - }
>> -
>> - security_init_mnt_opts(&new_sec_opts);
>> - if (data) {
>> - error = parse_security_options(data, &new_sec_opts);
>> - if (error)
>> - return ERR_PTR(error);
>> - }
>> -
>> - error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices);
>> - if (error)
>> - goto error_sec_opts;
>> -
>> - /*
>> - * Setup a dummy root and fs_info for test/set super. This is because
>> - * we don't actually fill this stuff out until open_ctree, but we need
>> - * it for searching for existing supers, so this lets us do that and
>> - * then open_ctree will properly initialize everything later.
>> - */
>> - fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL);
>> - if (!fs_info) {
>> - error = -ENOMEM;
>> - goto error_sec_opts;
>> - }
>> -
>> - fs_info->fs_devices = fs_devices;
>> -
>> - fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
>> - fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
>> - security_init_mnt_opts(&fs_info->security_opts);
>> - if (!fs_info->super_copy || !fs_info->super_for_commit) {
>> - error = -ENOMEM;
>> - goto error_fs_info;
>> - }
>> -
>> - error = btrfs_open_devices(fs_devices, mode, fs_type);
>> - if (error)
>> - goto error_fs_info;
>> -
>> - if (!(flags & SB_RDONLY) && fs_devices->rw_devices == 0) {
>> - error = -EACCES;
>> - goto error_close_devices;
>> - }
>> -
>> - bdev = fs_devices->latest_bdev;
>> - s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | SB_NOSEC,
>> - fs_info);
>> - if (IS_ERR(s)) {
>> - error = PTR_ERR(s);
>> - goto error_close_devices;
>> - }
>> + /* mount device's root (/) */
>> + mnt_root = vfs_kern_mount(&btrfs_root_fs_type, flags,
>> + device_name, data);
>> + if (PTR_ERR_OR_ZERO(mnt_root) == -EBUSY) {
>> + if (flags & SB_RDONLY) {
>> + mnt_root = vfs_kern_mount(&btrfs_root_fs_type,
>> + flags & ~SB_RDONLY, device_name, data);
>> + } else {
>> + mnt_root = vfs_kern_mount(&btrfs_root_fs_type,
>> + flags | SB_RDONLY, device_name, data);
>> + if (IS_ERR(mnt_root)) {
>> + root = ERR_CAST(mnt_root);
>> + goto out;
>> + }
>>
>> - if (s->s_root) {
>> - btrfs_close_devices(fs_devices);
>> - free_fs_info(fs_info);
>> - if ((flags ^ s->s_flags) & SB_RDONLY)
>> - error = -EBUSY;
>> - } else {
>> - snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
>> - btrfs_sb(s)->bdev_holder = fs_type;
>> - error = btrfs_fill_super(s, fs_devices, data);
>> - }
>> - if (error) {
>> - deactivate_locked_super(s);
>> - goto error_sec_opts;
>> + down_write(&mnt_root->mnt_sb->s_umount);
>> + error = btrfs_remount(mnt_root->mnt_sb, &flags, NULL);
>> + up_write(&mnt_root->mnt_sb->s_umount);
>> + if (error < 0) {
>> + root = ERR_PTR(error);
>> + mntput(mnt_root);
>> + goto out;
>> + }
>> + }
>> }
>> -
>> - fs_info = btrfs_sb(s);
>> - error = setup_security_options(fs_info, s, &new_sec_opts);
>> - if (error) {
>> - deactivate_locked_super(s);
>> - goto error_sec_opts;
>> + if (IS_ERR(mnt_root)) {
>> + root = ERR_CAST(mnt_root);
>> + goto out;
>> }
>>
>> - return dget(s->s_root);
>> + /* mount_subvol() will free subvol_name and mnt_root */
>> + root = mount_subvol(subvol_name, subvol_objectid, flags,
>> + device_name, data, mnt_root);
>>
>> -error_close_devices:
>> - btrfs_close_devices(fs_devices);
>> -error_fs_info:
>> - free_fs_info(fs_info);
>> -error_sec_opts:
>> - security_free_mnt_opts(&new_sec_opts);
>> - return ERR_PTR(error);
>> +out:
>> + return root;
>> }
>>
>> static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
>>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html