Re: [PATCH 12/12] btrfs: check device for critical errors and mark failed

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 03/30/2016 06:41 AM, Yauhen Kharuzhy wrote:
On Tue, Mar 29, 2016 at 10:22:29PM +0800, Anand Jain wrote:
Write and Flush errors are considered as critical errors,
upon which the device will be brought offline and marked as
failed. Write and Flush errors are identified using device
error statistics.

Signed-off-by: Anand Jain <anand.jain@xxxxxxxxxx>

btrfs: check for failed device and hot replace

This patch creates casualty_kthread to check for the failed
devices, and triggers device replace.

Signed-off-by: Anand Jain <anand.jain@xxxxxxxxxx>
---
  fs/btrfs/ctree.h   |   2 +
  fs/btrfs/disk-io.c | 161 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
  fs/btrfs/disk-io.h |   2 +
  fs/btrfs/volumes.c |   1 +
  fs/btrfs/volumes.h |   4 ++
  5 files changed, 169 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2c185a8e92f0..36f1c29e00a0 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1569,6 +1569,7 @@ struct btrfs_fs_info {
  	struct mutex tree_log_mutex;
  	struct mutex transaction_kthread_mutex;
  	struct mutex cleaner_mutex;
+	struct mutex casualty_mutex;
  	struct mutex chunk_mutex;
  	struct mutex volume_mutex;

@@ -1686,6 +1687,7 @@ struct btrfs_fs_info {
  	struct btrfs_workqueue *extent_workers;
  	struct task_struct *transaction_kthread;
  	struct task_struct *cleaner_kthread;
+	struct task_struct *casualty_kthread;
  	int thread_pool_size;

  	struct kobject *space_info_kobj;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b99329e37965..650e26e0acda 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1869,6 +1869,153 @@ sleep:
  	return 0;
  }

+static int btrfs_check_and_handle_casualty(void *arg)
+{
+	int ret;
+	int found = 0;
+	struct btrfs_device *device;
+	struct btrfs_root *root = arg;
+	struct btrfs_fs_info *fs_info = root->fs_info;
+	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+
+	btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
+	if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
+		btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
+		return -EBUSY;
+	}
+	btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
+
+	ret = btrfs_check_devices(fs_devices);
+	if (ret == 1) {
+		/*
+		 * There were some casualties, and if its beyond a
+		 * chunk group can tolerate, then FS will already
+		 * be in readonly, so check that. And that's best
+		 * btrfs could do as of now and no replace will help.
+		 */
+		if (fs_info->sb->s_flags & MS_RDONLY)
+			return -EROFS;
+
+		mutex_lock(&fs_devices->device_list_mutex);
+		rcu_read_lock();
+		list_for_each_entry_rcu(device,
+				&fs_devices->devices, dev_list) {
+			if (device->failed) {
+				found = 1;
+				break;
+			}
+		}
+		rcu_read_unlock();
+		mutex_unlock(&fs_devices->device_list_mutex);
+	}
+
+	/*
+	 * We are using the replace code which should be interrupt-able
+	 * during unmount, and as of now there is no user land stop
+	 * request that we support and this will run until its complete
+	 */
+	if (found)
+		ret = btrfs_auto_replace_start(root, device);
+
+	return ret;
+}
+
+/*
+ * A kthread to check if any auto maintenance be required. This is
+ * multithread safe, and kthread is running only if
+ * fs_info->casualty_kthread is not NULL, fixme: atomic ?
+ */
+static int casualty_kthread(void *arg)
+{
+	int ret;
+	int again;
+	struct btrfs_root *root = arg;
+
+	do {
+		again = 0;
+
+		if (btrfs_need_cleaner_sleep(root))
+			goto sleep;
+
+		if (!mutex_trylock(&root->fs_info->casualty_mutex))
+			goto sleep;
+
+		if (btrfs_need_cleaner_sleep(root)) {
+			mutex_unlock(&root->fs_info->casualty_mutex);
+			goto sleep;
+		}
+
+		ret = btrfs_check_and_handle_casualty(arg);
+		if (ret == -EROFS) {
+			/*
+			 * When checking and fixing the devices, the
+			 * FS may be marked as RO in some situations.
+			 * And on ROFS casualty thread has no work.
+			 * So optimize here, to stop this thread until
+			 * FS is back to RW.
+			 */
+		}
+		mutex_unlock(&root->fs_info->casualty_mutex);
+
+sleep:
+		if (!try_to_freeze() && !again) {

This block was copy-pasted from the cleaner_kthread(). 'again' variable
is not used in reality, and using of try_to_freeze() in the cleaner_kthread()
was eliminated in 'for-linus-4.6' mason's branch in the commit
838fe188 'btrfs: cleaner_kthread() doesn't need explicit freeze'.
casualty_kthread() isn't marked as freezabe too,
so this check can be removed entirely.


Thanks this is fixed in v3.

Anand


+			set_current_state(TASK_INTERRUPTIBLE);
+			if (!kthread_should_stop())
+				schedule();
+			__set_current_state(TASK_RUNNING);
+		}
+	} while (!kthread_should_stop());
+
+	return 0;
+}
+

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Filesystem Development]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux