On 03/30/2016 06:41 AM, Yauhen Kharuzhy wrote:
On Tue, Mar 29, 2016 at 10:22:29PM +0800, Anand Jain wrote:Write and Flush errors are considered as critical errors, upon which the device will be brought offline and marked as failed. Write and Flush errors are identified using device error statistics. Signed-off-by: Anand Jain <anand.jain@xxxxxxxxxx> btrfs: check for failed device and hot replace This patch creates casualty_kthread to check for the failed devices, and triggers device replace. Signed-off-by: Anand Jain <anand.jain@xxxxxxxxxx> --- fs/btrfs/ctree.h | 2 + fs/btrfs/disk-io.c | 161 ++++++++++++++++++++++++++++++++++++++++++++++++++++- fs/btrfs/disk-io.h | 2 + fs/btrfs/volumes.c | 1 + fs/btrfs/volumes.h | 4 ++ 5 files changed, 169 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2c185a8e92f0..36f1c29e00a0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1569,6 +1569,7 @@ struct btrfs_fs_info { struct mutex tree_log_mutex; struct mutex transaction_kthread_mutex; struct mutex cleaner_mutex; + struct mutex casualty_mutex; struct mutex chunk_mutex; struct mutex volume_mutex; @@ -1686,6 +1687,7 @@ struct btrfs_fs_info { struct btrfs_workqueue *extent_workers; struct task_struct *transaction_kthread; struct task_struct *cleaner_kthread; + struct task_struct *casualty_kthread; int thread_pool_size; struct kobject *space_info_kobj; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b99329e37965..650e26e0acda 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1869,6 +1869,153 @@ sleep: return 0; } +static int btrfs_check_and_handle_casualty(void *arg) +{ + int ret; + int found = 0; + struct btrfs_device *device; + struct btrfs_root *root = arg; + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; + + btrfs_dev_replace_lock(&fs_info->dev_replace, 0); + if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) { + btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); + return -EBUSY; + } + btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); + + ret = btrfs_check_devices(fs_devices); + if (ret == 1) { + /* + * There were some casualties, and if its beyond a + * chunk group can tolerate, then FS will already + * be in readonly, so check that. And that's best + * btrfs could do as of now and no replace will help. + */ + if (fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; + + mutex_lock(&fs_devices->device_list_mutex); + rcu_read_lock(); + list_for_each_entry_rcu(device, + &fs_devices->devices, dev_list) { + if (device->failed) { + found = 1; + break; + } + } + rcu_read_unlock(); + mutex_unlock(&fs_devices->device_list_mutex); + } + + /* + * We are using the replace code which should be interrupt-able + * during unmount, and as of now there is no user land stop + * request that we support and this will run until its complete + */ + if (found) + ret = btrfs_auto_replace_start(root, device); + + return ret; +} + +/* + * A kthread to check if any auto maintenance be required. This is + * multithread safe, and kthread is running only if + * fs_info->casualty_kthread is not NULL, fixme: atomic ? + */ +static int casualty_kthread(void *arg) +{ + int ret; + int again; + struct btrfs_root *root = arg; + + do { + again = 0; + + if (btrfs_need_cleaner_sleep(root)) + goto sleep; + + if (!mutex_trylock(&root->fs_info->casualty_mutex)) + goto sleep; + + if (btrfs_need_cleaner_sleep(root)) { + mutex_unlock(&root->fs_info->casualty_mutex); + goto sleep; + } + + ret = btrfs_check_and_handle_casualty(arg); + if (ret == -EROFS) { + /* + * When checking and fixing the devices, the + * FS may be marked as RO in some situations. + * And on ROFS casualty thread has no work. + * So optimize here, to stop this thread until + * FS is back to RW. + */ + } + mutex_unlock(&root->fs_info->casualty_mutex); + +sleep: + if (!try_to_freeze() && !again) {This block was copy-pasted from the cleaner_kthread(). 'again' variable is not used in reality, and using of try_to_freeze() in the cleaner_kthread() was eliminated in 'for-linus-4.6' mason's branch in the commit 838fe188 'btrfs: cleaner_kthread() doesn't need explicit freeze'. casualty_kthread() isn't marked as freezabe too, so this check can be removed entirely.
Thanks this is fixed in v3. Anand
+ set_current_state(TASK_INTERRUPTIBLE); + if (!kthread_should_stop()) + schedule(); + __set_current_state(TASK_RUNNING); + } + } while (!kthread_should_stop()); + + return 0; +} +
-- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html
