On Tue, Mar 29, 2016 at 10:22:29PM +0800, Anand Jain wrote:
> Write and Flush errors are considered as critical errors,
> upon which the device will be brought offline and marked as
> failed. Write and Flush errors are identified using device
> error statistics.
>
> Signed-off-by: Anand Jain <anand.jain@xxxxxxxxxx>
>
> btrfs: check for failed device and hot replace
>
> This patch creates casualty_kthread to check for the failed
> devices, and triggers device replace.
>
> Signed-off-by: Anand Jain <anand.jain@xxxxxxxxxx>
> ---
> fs/btrfs/ctree.h | 2 +
> fs/btrfs/disk-io.c | 161 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
> fs/btrfs/disk-io.h | 2 +
> fs/btrfs/volumes.c | 1 +
> fs/btrfs/volumes.h | 4 ++
> 5 files changed, 169 insertions(+), 1 deletion(-)
>
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index 2c185a8e92f0..36f1c29e00a0 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -1569,6 +1569,7 @@ struct btrfs_fs_info {
> struct mutex tree_log_mutex;
> struct mutex transaction_kthread_mutex;
> struct mutex cleaner_mutex;
> + struct mutex casualty_mutex;
> struct mutex chunk_mutex;
> struct mutex volume_mutex;
>
> @@ -1686,6 +1687,7 @@ struct btrfs_fs_info {
> struct btrfs_workqueue *extent_workers;
> struct task_struct *transaction_kthread;
> struct task_struct *cleaner_kthread;
> + struct task_struct *casualty_kthread;
> int thread_pool_size;
>
> struct kobject *space_info_kobj;
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index b99329e37965..650e26e0acda 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -1869,6 +1869,153 @@ sleep:
> return 0;
> }
>
> +static int btrfs_check_and_handle_casualty(void *arg)
> +{
> + int ret;
> + int found = 0;
> + struct btrfs_device *device;
> + struct btrfs_root *root = arg;
> + struct btrfs_fs_info *fs_info = root->fs_info;
> + struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
> +
> + btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
> + if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
> + btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
> + return -EBUSY;
> + }
> + btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
> +
> + ret = btrfs_check_devices(fs_devices);
> + if (ret == 1) {
> + /*
> + * There were some casualties, and if its beyond a
> + * chunk group can tolerate, then FS will already
> + * be in readonly, so check that. And that's best
> + * btrfs could do as of now and no replace will help.
> + */
> + if (fs_info->sb->s_flags & MS_RDONLY)
> + return -EROFS;
> +
> + mutex_lock(&fs_devices->device_list_mutex);
> + rcu_read_lock();
> + list_for_each_entry_rcu(device,
> + &fs_devices->devices, dev_list) {
> + if (device->failed) {
> + found = 1;
> + break;
> + }
> + }
> + rcu_read_unlock();
> + mutex_unlock(&fs_devices->device_list_mutex);
> + }
> +
> + /*
> + * We are using the replace code which should be interrupt-able
> + * during unmount, and as of now there is no user land stop
> + * request that we support and this will run until its complete
> + */
> + if (found)
> + ret = btrfs_auto_replace_start(root, device);
> +
> + return ret;
> +}
> +
> +/*
> + * A kthread to check if any auto maintenance be required. This is
> + * multithread safe, and kthread is running only if
> + * fs_info->casualty_kthread is not NULL, fixme: atomic ?
> + */
> +static int casualty_kthread(void *arg)
> +{
> + int ret;
> + int again;
> + struct btrfs_root *root = arg;
> +
> + do {
> + again = 0;
> +
> + if (btrfs_need_cleaner_sleep(root))
> + goto sleep;
> +
> + if (!mutex_trylock(&root->fs_info->casualty_mutex))
> + goto sleep;
> +
> + if (btrfs_need_cleaner_sleep(root)) {
> + mutex_unlock(&root->fs_info->casualty_mutex);
> + goto sleep;
> + }
> +
> + ret = btrfs_check_and_handle_casualty(arg);
> + if (ret == -EROFS) {
> + /*
> + * When checking and fixing the devices, the
> + * FS may be marked as RO in some situations.
> + * And on ROFS casualty thread has no work.
> + * So optimize here, to stop this thread until
> + * FS is back to RW.
> + */
> + }
> + mutex_unlock(&root->fs_info->casualty_mutex);
> +
> +sleep:
> + if (!try_to_freeze() && !again) {
This block was copy-pasted from the cleaner_kthread(). 'again' variable
is not used in reality, and using of try_to_freeze() in the cleaner_kthread()
was eliminated in 'for-linus-4.6' mason's branch in the commit
838fe188 'btrfs: cleaner_kthread() doesn't need explicit freeze'.
casualty_kthread() isn't marked as freezabe too,
so this check can be removed entirely.
> + set_current_state(TASK_INTERRUPTIBLE);
> + if (!kthread_should_stop())
> + schedule();
> + __set_current_state(TASK_RUNNING);
> + }
> + } while (!kthread_should_stop());
> +
> + return 0;
> +}
> +
--
Yauhen Kharuzhy
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html