On Thu, Apr 26, 2012 at 10:58:04AM +0800, Miao Xie wrote:
> The reason the deadlock is that:
> Task Btrfs-cleaner
> umount()
> down_write(&s->s_umount)
> sync_filesystem()
> do auto-defragment and produce
> lots of dirty pages
> close_ctree()
> wait for the end of
> btrfs-cleaner
why it's needed to wait for cleaner during close_ctree? I got bitten
yesterday by (a non-deadlock) scenario, when there were tons of deleted
snapshots, filesystem almost full, so getting and managing free space
was slow (btrfs-cache thread was more active than btrfs-cleaner), and
umount just did not end. interruptible by reboot only.
avoiding this particular case of waiting for cleaner:
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3064,7 +3066,13 @@ int btrfs_commit_super(struct btrfs_root *root)
mutex_lock(&root->fs_info->cleaner_mutex);
btrfs_run_delayed_iputs(root);
- btrfs_clean_old_snapshots(root);
+ if (!btrfs_fs_closing(root->fs_info)) {
+ /* btrfs_clean_old_snapshots(root); */
+ wake_up_process(root->fs_info->cleaner_kthread);
+ printk(KERN_DEBUG "btrfs: wake cleaner from commit_super\n");
+ } else {
+ printk(KERN_DEBUG "btrfs: skip cleaning when going down\n");
+ }
mutex_unlock(&root->fs_info->cleaner_mutex);
/* wait until ongoing cleanup work done */
plus not even trying to call the cleaner directly, rather waking the cleaner
thread. (attached whole work-in-progress patch).
david
---
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 58a232d..0651f6f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1617,13 +1617,14 @@ static int cleaner_kthread(void *arg)
struct btrfs_root *root = arg;
do {
+ int again = 0;
vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
down_read_trylock(&root->fs_info->sb->s_umount) &&
mutex_trylock(&root->fs_info->cleaner_mutex)) {
btrfs_run_delayed_iputs(root);
- btrfs_clean_old_snapshots(root);
+ again = btrfs_clean_one_old_snapshot(root);
mutex_unlock(&root->fs_info->cleaner_mutex);
btrfs_run_defrag_inodes(root->fs_info);
up_read(&root->fs_info->sb->s_umount);
@@ -1631,7 +1632,7 @@ static int cleaner_kthread(void *arg)
if (freezing(current)) {
refrigerator();
- } else {
+ } else if (!again) {//FIXME: check again now directly from dead_roots?
set_current_state(TASK_INTERRUPTIBLE);
if (!kthread_should_stop())
schedule();
@@ -3064,7 +3066,13 @@ int btrfs_commit_super(struct btrfs_root *root)
mutex_lock(&root->fs_info->cleaner_mutex);
btrfs_run_delayed_iputs(root);
- btrfs_clean_old_snapshots(root);
+ if (!btrfs_fs_closing(root->fs_info)) {
+ /* btrfs_clean_old_snapshots(root); */
+ wake_up_process(root->fs_info->cleaner_kthread);
+ printk(KERN_DEBUG "btrfs: wake cleaner from commit_super\n");
+ } else {
+ printk(KERN_DEBUG "btrfs: skip cleaning when going down\n");
+ }
mutex_unlock(&root->fs_info->cleaner_mutex);
/* wait until ongoing cleanup work done */
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index ec1e0c6..3aba911 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -6995,6 +6995,11 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
while (1) {
+ if (btrfs_fs_closing(root->fs_info)) {
+ printk(KERN_DEBUG "btrfs: drop early exit\n");
+ err = -EAGAIN;
+ goto out_end_trans;
+ }
ret = walk_down_tree(trans, root, path, wc);
if (ret < 0) {
err = ret;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 922e6ec..c9dc857 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -4060,6 +4060,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
while (1) {
mutex_lock(&fs_info->cleaner_mutex);
+ // FIXME: wake cleaner
btrfs_clean_old_snapshots(fs_info->tree_root);
ret = relocate_block_group(rc);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index de2942f..3d83f6b 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -783,7 +783,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
int btrfs_add_dead_root(struct btrfs_root *root)
{
spin_lock(&root->fs_info->trans_lock);
- list_add(&root->root_list, &root->fs_info->dead_roots);
+ list_add_tail(&root->root_list, &root->fs_info->dead_roots);
spin_unlock(&root->fs_info->trans_lock);
return 0;
}
@@ -1533,7 +1533,45 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
ret = btrfs_drop_snapshot(root, NULL, 0, 0);
else
ret =btrfs_drop_snapshot(root, NULL, 1, 0);
- BUG_ON(ret < 0);
+ BUG_ON(ret < 0 && ret != -EAGAIN);
}
return 0;
}
+/*
+ * return < 0 if error
+ * 0 if there are no more dead_roots at the time of call
+ * 1 there are more to be processed, call me again
+ *
+ * (racy)
+ */
+int btrfs_clean_one_old_snapshot(struct btrfs_root *root)
+{
+ int ret;
+ int run_again = 1;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+
+ if (root->fs_info->sb->s_flags & MS_RDONLY) {
+ printk(KERN_WARNING "btrfs: cleaner called for RO fs!\n");
+ }
+
+ spin_lock(&fs_info->trans_lock);
+ root = list_first_entry(&fs_info->dead_roots,
+ struct btrfs_root, root_list);
+ list_del(&root->root_list);
+ if (list_empty(&fs_info->dead_roots))
+ run_again = 0;
+ spin_unlock(&fs_info->trans_lock);
+
+ printk(KERN_DEBUG "btrfs: cleaner removing %llu\n",
+ (unsigned long long)root->objectid);
+
+ btrfs_kill_all_delayed_nodes(root);
+
+ if (btrfs_header_backref_rev(root->node) <
+ BTRFS_MIXED_BACKREF_REV)
+ ret = btrfs_drop_snapshot(root, NULL, 0, 0);
+ else
+ ret = btrfs_drop_snapshot(root, NULL, 1, 0);
+ BUG_ON(ret < 0 && ret != -EAGAIN);
+ return run_again || ret == -EAGAIN;
+}
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index fe27379..7071ca5 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -94,6 +94,7 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
int btrfs_add_dead_root(struct btrfs_root *root);
int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
int btrfs_clean_old_snapshots(struct btrfs_root *root);
+int btrfs_clean_one_old_snapshot(struct btrfs_root *root);
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
---
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html