[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 2/4] Btrfs: fix deadlock on sb->s_umount when doing umount



>> The reason the deadlock is that:
>>   Task					Btrfs-cleaner
>>   umount()
>>     down_write(&s->s_umount)
>>     sync_filesystem()
>> 					do auto-defragment and produce
>> 					lots of dirty pages
>>     close_ctree()
>>       wait for the end of
>>       btrfs-cleaner
> 
> why it's needed to wait for cleaner during close_ctree? I got bitten

> yesterday by (a non-deadlock) scenario, when there were tons of deleted

> snapshots, filesystem almost full, so getting and managing free space
> was slow (btrfs-cache thread was more active than btrfs-cleaner), and
> umount just did not end. interruptible by reboot only.

> 

> avoiding this particular case of waiting for cleaner:


Your patch doesn't fix the problem.
  Task					Btrfs-cleaner
  umount()
    down_write(&s->s_umount)
    sync_filesystem()
					do auto-defragment for some files
					and produce lots of dirty pages.
					do auto-defragment for the left files
					  start_transaction
					    reserve space
					      shrink_delalloc()
						writeback_inodes_sb_nr_if_idle()
						  down_read(&sb->s_umount)
    close_ctree()
      stop_kthread()
	wait for the end of
	btrfs-cleaner

the deadlock still happens.

But I found you add a trylock for ->s_umount in cleaner_kthread(), this method
can fix the deadlock problem, I think. It may be introduced by the other patch,
could you send that patch to me? I found if we fail to trylock ->cleaner_mutex,
we will forget to unlock ->s_umount.

Thanks
Miao

> 
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -3064,7 +3066,13 @@ int btrfs_commit_super(struct btrfs_root *root)
> 
>         mutex_lock(&root->fs_info->cleaner_mutex);
>         btrfs_run_delayed_iputs(root);
> -       btrfs_clean_old_snapshots(root);
> +       if (!btrfs_fs_closing(root->fs_info)) {
> +               /* btrfs_clean_old_snapshots(root); */
> +               wake_up_process(root->fs_info->cleaner_kthread);
> +               printk(KERN_DEBUG "btrfs: wake cleaner from commit_super\n");
> +       } else {
> +               printk(KERN_DEBUG "btrfs: skip cleaning when going down\n");
> +       }
>         mutex_unlock(&root->fs_info->cleaner_mutex);
> 
>         /* wait until ongoing cleanup work done */
> 
> 
> plus not even trying to call the cleaner directly, rather waking the cleaner
> thread. (attached whole work-in-progress patch).
> 
> david
> ---
> 
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 58a232d..0651f6f 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -1617,13 +1617,14 @@ static int cleaner_kthread(void *arg)
>  	struct btrfs_root *root = arg;
>  
>  	do {
> +		int again = 0;
>  		vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
>  
>  		if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
>  			down_read_trylock(&root->fs_info->sb->s_umount) &&
>  		    mutex_trylock(&root->fs_info->cleaner_mutex)) {
>  			btrfs_run_delayed_iputs(root);
> -			btrfs_clean_old_snapshots(root);
> +			again = btrfs_clean_one_old_snapshot(root);
>  			mutex_unlock(&root->fs_info->cleaner_mutex);
>  			btrfs_run_defrag_inodes(root->fs_info);
>  			up_read(&root->fs_info->sb->s_umount);
> @@ -1631,7 +1632,7 @@ static int cleaner_kthread(void *arg)
>  
>  		if (freezing(current)) {
>  			refrigerator();
> -		} else {
> +		} else if (!again) {//FIXME: check again now directly from dead_roots?
>  			set_current_state(TASK_INTERRUPTIBLE);
>  			if (!kthread_should_stop())
>  				schedule();
> @@ -3064,7 +3066,13 @@ int btrfs_commit_super(struct btrfs_root *root)
>  
>  	mutex_lock(&root->fs_info->cleaner_mutex);
>  	btrfs_run_delayed_iputs(root);
> -	btrfs_clean_old_snapshots(root);
> +	if (!btrfs_fs_closing(root->fs_info)) {
> +		/* btrfs_clean_old_snapshots(root); */
> +		wake_up_process(root->fs_info->cleaner_kthread);
> +		printk(KERN_DEBUG "btrfs: wake cleaner from commit_super\n");
> +	} else {
> +		printk(KERN_DEBUG "btrfs: skip cleaning when going down\n");
> +	}
>  	mutex_unlock(&root->fs_info->cleaner_mutex);
>  
>  	/* wait until ongoing cleanup work done */
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index ec1e0c6..3aba911 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -6995,6 +6995,11 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
>  	wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
>  
>  	while (1) {
> +		if (btrfs_fs_closing(root->fs_info)) {
> +			printk(KERN_DEBUG "btrfs: drop early exit\n");
> +			err = -EAGAIN;
> +			goto out_end_trans;
> +		}
>  		ret = walk_down_tree(trans, root, path, wc);
>  		if (ret < 0) {
>  			err = ret;
> diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
> index 922e6ec..c9dc857 100644
> --- a/fs/btrfs/relocation.c
> +++ b/fs/btrfs/relocation.c
> @@ -4060,6 +4060,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
>  	while (1) {
>  		mutex_lock(&fs_info->cleaner_mutex);
>  
> +		// FIXME: wake cleaner
>  		btrfs_clean_old_snapshots(fs_info->tree_root);
>  		ret = relocate_block_group(rc);
>  
> diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
> index de2942f..3d83f6b 100644
> --- a/fs/btrfs/transaction.c
> +++ b/fs/btrfs/transaction.c
> @@ -783,7 +783,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
>  int btrfs_add_dead_root(struct btrfs_root *root)
>  {
>  	spin_lock(&root->fs_info->trans_lock);
> -	list_add(&root->root_list, &root->fs_info->dead_roots);
> +	list_add_tail(&root->root_list, &root->fs_info->dead_roots);
>  	spin_unlock(&root->fs_info->trans_lock);
>  	return 0;
>  }
> @@ -1533,7 +1533,45 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
>  			ret = btrfs_drop_snapshot(root, NULL, 0, 0);
>  		else
>  			ret =btrfs_drop_snapshot(root, NULL, 1, 0);
> -		BUG_ON(ret < 0);
> +		BUG_ON(ret < 0 && ret != -EAGAIN);
>  	}
>  	return 0;
>  }
> +/*
> + * return < 0 if error
> + * 0 if there are no more dead_roots at the time of call
> + * 1 there are more to be processed, call me again
> + *
> + * (racy)
> + */
> +int btrfs_clean_one_old_snapshot(struct btrfs_root *root)
> +{
> +	int ret;
> +	int run_again = 1;
> +	struct btrfs_fs_info *fs_info = root->fs_info;
> +
> +	if (root->fs_info->sb->s_flags & MS_RDONLY) {
> +		printk(KERN_WARNING "btrfs: cleaner called for RO fs!\n");
> +	}
> +
> +	spin_lock(&fs_info->trans_lock);
> +	root = list_first_entry(&fs_info->dead_roots,
> +			struct btrfs_root, root_list);
> +	list_del(&root->root_list);
> +	if (list_empty(&fs_info->dead_roots))
> +		run_again = 0;
> +	spin_unlock(&fs_info->trans_lock);
> +
> +	printk(KERN_DEBUG "btrfs: cleaner removing %llu\n",
> +			(unsigned long long)root->objectid);
> +
> +	btrfs_kill_all_delayed_nodes(root);
> +
> +	if (btrfs_header_backref_rev(root->node) <
> +			BTRFS_MIXED_BACKREF_REV)
> +		ret = btrfs_drop_snapshot(root, NULL, 0, 0);
> +	else
> +		ret = btrfs_drop_snapshot(root, NULL, 1, 0);
> +	BUG_ON(ret < 0 && ret != -EAGAIN);
> +	return run_again || ret == -EAGAIN;
> +}
> diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
> index fe27379..7071ca5 100644
> --- a/fs/btrfs/transaction.h
> +++ b/fs/btrfs/transaction.h
> @@ -94,6 +94,7 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
>  int btrfs_add_dead_root(struct btrfs_root *root);
>  int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
>  int btrfs_clean_old_snapshots(struct btrfs_root *root);
> +int btrfs_clean_one_old_snapshot(struct btrfs_root *root);
>  int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
>  			     struct btrfs_root *root);
>  int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
> ---
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]    [Yosemite Photos]    [Free Online Dating]     [Linux Kernel]     [Linux SCSI]     [XFree86]

Add to Google Powered by Linux