On 6.10.19 г. 5:47 ч., Anand Jain wrote:
> In open_fs_devices() we identify alien device but we don't reset its
> the device::name. So progs device list does not show the device missing
> as shown in the script below.
>
> mkfs.btrfs -fq /dev/sdd && mount /dev/sdd /btrfs
> mkfs.btrfs -fq -draid1 -mraid1 /dev/sdc /dev/sdb
> sleep 3 # avoid racing with udev's useless scans if needed
> btrfs dev add -f /dev/sdb /btrfs
> mount -o degraded /dev/sdc /btrfs1
>
> No missing device:
> btrfs fi show -m /btrfs1
> Label: none uuid: 3eb7cd50-4594-458f-9d68-c243cc49954d
> Total devices 2 FS bytes used 128.00KiB
> devid 1 size 12.00GiB used 1.26GiB path /dev/sdc
> devid 2 size 12.00GiB used 1.26GiB path /dev/sdb
>
> Signed-off-by: Anand Jain <anand.jain@xxxxxxxxxx>
> ---
> v2: Move free alien part to its parent function btrfs_open_one_device.
> Thanks Nikolay.
>
> PS: Fundamentally its wrong approach that btrfs-progs deduces the device
> missing state in the userland instead of obtaining it from the kernel.
> I objected on the patch, but still those patches got merged, this bug is
> one of its side effects. Ironically I wrote patches to read device_state
> from the kernel using ioctl, procfs and sysfs but didn't get the due
> attention till a merger.
>
> fs/btrfs/volumes.c | 32 +++++++++++++++++++++++---------
> 1 file changed, 23 insertions(+), 9 deletions(-)
>
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index c223a8147bfd..21aaf64c59b2 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -591,13 +591,18 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
> if (ret)
> return ret;
>
> + ret = -EINVAL;
> disk_super = (struct btrfs_super_block *)bh->b_data;
> devid = btrfs_stack_device_id(&disk_super->dev_item);
> - if (devid != device->devid)
> + if (devid != device->devid) {
> + ret = -EUCLEAN;
> goto error_brelse;
> + }
>
> - if (memcmp(device->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE))
> + if (memcmp(device->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE)) {
> + ret = -EUCLEAN;
> goto error_brelse;
> + }
>
> device->generation = btrfs_super_generation(disk_super);
>
> @@ -640,7 +645,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
> brelse(bh);
> blkdev_put(bdev, flags);
>
> - return -EINVAL;
> + return ret;
> }
>
> /*
> @@ -1121,19 +1126,28 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
> static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
> fmode_t flags, void *holder)
> {
> + int ret;
> struct btrfs_device *device;
> + struct btrfs_device *tmp_device;
> struct btrfs_device *latest_dev = NULL;
>
> flags |= FMODE_EXCL;
>
> - list_for_each_entry(device, &fs_devices->devices, dev_list) {
> + list_for_each_entry_safe(device, tmp_device, &fs_devices->devices,
> + dev_list) {
> /* Just open everything we can; ignore failures here */
> - if (btrfs_open_one_device(fs_devices, device, flags, holder))
> - continue;
> -
> - if (!latest_dev ||
> - device->generation > latest_dev->generation)
> + ret = btrfs_open_one_device(fs_devices, device, flags, holder);
> + if (ret == 0 && (!latest_dev ||
> + device->generation > latest_dev->generation)) {
> latest_dev = device;
> + continue;
> + }
nit: Had you used if () {} else if {} you could have done away with the
continue.
> + if (ret == -EUCLEAN) {
> + /* An alien device. Clean it up */
> + fs_devices->num_devices--;
> + list_del(&device->dev_list);
> + btrfs_free_device(device);
> + }
> }
> if (fs_devices->open_devices == 0)
> return -EINVAL;
>