Re: [PATCH] btrfs: push relocation recovery into a helper thread

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 4/17/18 2:45 PM, Jeff Mahoney wrote:
> On a file system with many snapshots and qgroups enabled, an interrupted
> balance can end up taking a long time to mount due to recovering the
> relocations during mount.  It does this in the task performing the mount,
> which can't be interrupted and may prevent mounting (and systems booting)
> for a long time as well.  The thing is that as part of balance, this
> runs in the background all the time.  This patch pushes the recovery
> into a helper thread and allows the mount to continue normally.  We hold
> off on resuming any paused balance operation until after the relocation
> has completed, disallow any new balance operations if it's running, and
> wait for it on umount and remounting read-only.
> 
> This doesn't do anything to address the relocation recovery operation
> taking a long time but does allow the file system to mount.

I'm abandoning this patch.  The right fix is to fix qgroups.  The
workload that I was targeting takes seconds to recover without qgroups
in the way.

-Jeff

> Signed-off-by: Jeff Mahoney <jeffm@xxxxxxxx>
> ---
>  fs/btrfs/ctree.h      |    7 +++
>  fs/btrfs/disk-io.c    |    7 ++-
>  fs/btrfs/relocation.c |   92 +++++++++++++++++++++++++++++++++++++++++---------
>  fs/btrfs/super.c      |    5 +-
>  fs/btrfs/volumes.c    |    6 +++
>  5 files changed, 97 insertions(+), 20 deletions(-)
> 
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -1052,6 +1052,10 @@ struct btrfs_fs_info {
>  	struct btrfs_work qgroup_rescan_work;
>  	bool qgroup_rescan_running;	/* protected by qgroup_rescan_lock */
>  
> +	/* relocation recovery items */
> +	bool relocation_recovery_started;
> +	struct completion relocation_recovery_completion;
> +
>  	/* filesystem state */
>  	unsigned long fs_state;
>  
> @@ -3671,7 +3675,8 @@ int btrfs_init_reloc_root(struct btrfs_t
>  			  struct btrfs_root *root);
>  int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
>  			    struct btrfs_root *root);
> -int btrfs_recover_relocation(struct btrfs_root *root);
> +int btrfs_recover_relocation(struct btrfs_fs_info *fs_info);
> +void btrfs_wait_for_relocation_completion(struct btrfs_fs_info *fs_info);
>  int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
>  int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
>  			  struct btrfs_root *root, struct extent_buffer *buf,
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -2999,7 +2999,7 @@ retry_root_backup:
>  			goto fail_qgroup;
>  
>  		mutex_lock(&fs_info->cleaner_mutex);
> -		ret = btrfs_recover_relocation(tree_root);
> +		ret = btrfs_recover_relocation(fs_info);
>  		mutex_unlock(&fs_info->cleaner_mutex);
>  		if (ret < 0) {
>  			btrfs_warn(fs_info, "failed to recover relocation: %d",
> @@ -3017,7 +3017,8 @@ retry_root_backup:
>  	if (IS_ERR(fs_info->fs_root)) {
>  		err = PTR_ERR(fs_info->fs_root);
>  		btrfs_warn(fs_info, "failed to read fs tree: %d", err);
> -		goto fail_qgroup;
> +		close_ctree(fs_info);
> +		return err;
>  	}
>  
>  	if (sb_rdonly(sb))
> @@ -3778,6 +3779,8 @@ void close_ctree(struct btrfs_fs_info *f
>  	/* wait for the qgroup rescan worker to stop */
>  	btrfs_qgroup_wait_for_completion(fs_info, false);
>  
> +	btrfs_wait_for_relocation_completion(fs_info);
> +
>  	/* wait for the uuid_scan task to finish */
>  	down(&fs_info->uuid_tree_rescan_sem);
>  	/* avoid complains from lockdep et al., set sem back to initial state */
> --- a/fs/btrfs/relocation.c
> +++ b/fs/btrfs/relocation.c
> @@ -22,6 +22,7 @@
>  #include <linux/blkdev.h>
>  #include <linux/rbtree.h>
>  #include <linux/slab.h>
> +#include <linux/kthread.h>
>  #include "ctree.h"
>  #include "disk-io.h"
>  #include "transaction.h"
> @@ -4492,14 +4493,61 @@ static noinline_for_stack int mark_garba
>  }
>  
>  /*
> - * recover relocation interrupted by system crash.
> - *
>   * this function resumes merging reloc trees with corresponding fs trees.
>   * this is important for keeping the sharing of tree blocks
>   */
> -int btrfs_recover_relocation(struct btrfs_root *root)
> +static int
> +btrfs_resume_relocation(void *data)
>  {
> -	struct btrfs_fs_info *fs_info = root->fs_info;
> +	struct btrfs_fs_info *fs_info = data;
> +	struct btrfs_trans_handle *trans;
> +	struct reloc_control *rc = fs_info->reloc_ctl;
> +	int err, ret;
> +
> +	btrfs_info(fs_info, "resuming relocation");
> +
> +	BUG_ON(!rc);
> +
> +	mutex_lock(&fs_info->cleaner_mutex);
> +
> +	merge_reloc_roots(rc);
> +
> +	unset_reloc_control(rc);
> +
> +	trans = btrfs_join_transaction(rc->extent_root);
> +	if (IS_ERR(trans))
> +		err = PTR_ERR(trans);
> +	else {
> +		ret = btrfs_commit_transaction(trans);
> +		if (ret < 0)
> +			err = ret;
> +	}
> +
> +	kfree(rc);
> +
> +	if (err == 0) {
> +		struct btrfs_root *fs_root;
> +
> +		/* cleanup orphan inode in data relocation tree */
> +		fs_root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID);
> +		if (IS_ERR(fs_root))
> +			err = PTR_ERR(fs_root);
> +		else
> +			err = btrfs_orphan_cleanup(fs_root);
> +	}
> +	mutex_unlock(&fs_info->cleaner_mutex);
> +	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
> +	complete_all(&fs_info->relocation_recovery_completion);
> +	return err;
> +}
> +
> +/*
> + * recover relocation interrupted by system crash.
> + * this function locates the relocation trees
> + */
> +int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
> +{
> +	struct btrfs_root *tree_root = fs_info->tree_root;
>  	LIST_HEAD(reloc_roots);
>  	struct btrfs_key key;
>  	struct btrfs_root *fs_root;
> @@ -4508,9 +4556,12 @@ int btrfs_recover_relocation(struct btrf
>  	struct extent_buffer *leaf;
>  	struct reloc_control *rc = NULL;
>  	struct btrfs_trans_handle *trans;
> +	struct task_struct *tsk;
>  	int ret;
>  	int err = 0;
>  
> +	WARN_ON(!rwsem_is_locked(&fs_info->sb->s_umount));
> +
>  	path = btrfs_alloc_path();
>  	if (!path)
>  		return -ENOMEM;
> @@ -4521,8 +4572,7 @@ int btrfs_recover_relocation(struct btrf
>  	key.offset = (u64)-1;
>  
>  	while (1) {
> -		ret = btrfs_search_slot(NULL, fs_info->tree_root, &key,
> -					path, 0, 0);
> +		ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
>  		if (ret < 0) {
>  			err = ret;
>  			goto out;
> @@ -4540,7 +4590,7 @@ int btrfs_recover_relocation(struct btrf
>  		    key.type != BTRFS_ROOT_ITEM_KEY)
>  			break;
>  
> -		reloc_root = btrfs_read_fs_root(root, &key);
> +		reloc_root = btrfs_read_fs_root(tree_root, &key);
>  		if (IS_ERR(reloc_root)) {
>  			err = PTR_ERR(reloc_root);
>  			goto out;
> @@ -4620,16 +4670,21 @@ int btrfs_recover_relocation(struct btrf
>  	if (err)
>  		goto out_free;
>  
> -	merge_reloc_roots(rc);
> -
> -	unset_reloc_control(rc);
> -
> -	trans = btrfs_join_transaction(rc->extent_root);
> -	if (IS_ERR(trans)) {
> -		err = PTR_ERR(trans);
> +	tsk = kthread_run(btrfs_resume_relocation, fs_info,
> +			  "relocation-recovery");
> +	if (IS_ERR(tsk)) {
> +		err = PTR_ERR(tsk);
>  		goto out_free;
>  	}
> -	err = btrfs_commit_transaction(trans);
> +
> +	fs_info->relocation_recovery_started = true;
> +
> +	/* protected from racing with resume thread by the cleaner_mutex */
> +	init_completion(&fs_info->relocation_recovery_completion);
> +
> +	set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
> +	return 0;
> +
>  out_free:
>  	kfree(rc);
>  out:
> @@ -4649,6 +4704,13 @@ out:
>  	return err;
>  }
>  
> +void
> +btrfs_wait_for_relocation_completion(struct btrfs_fs_info *fs_info)
> +{
> +	if (fs_info->relocation_recovery_started)
> +		wait_for_completion(&fs_info->relocation_recovery_completion);
> +}
> +
>  /*
>   * helper to add ordered checksum for data relocation.
>   *
> --- a/fs/btrfs/super.c
> +++ b/fs/btrfs/super.c
> @@ -1767,7 +1767,6 @@ static inline void btrfs_remount_cleanup
>  static int btrfs_remount(struct super_block *sb, int *flags, char *data)
>  {
>  	struct btrfs_fs_info *fs_info = btrfs_sb(sb);
> -	struct btrfs_root *root = fs_info->tree_root;
>  	unsigned old_flags = sb->s_flags;
>  	unsigned long old_opts = fs_info->mount_opt;
>  	unsigned long old_compress_type = fs_info->compress_type;
> @@ -1834,6 +1833,8 @@ static int btrfs_remount(struct super_bl
>  		btrfs_scrub_cancel(fs_info);
>  		btrfs_pause_balance(fs_info);
>  
> +		btrfs_wait_for_relocation_completion(fs_info);
> +
>  		ret = btrfs_commit_super(fs_info);
>  		if (ret)
>  			goto restore;
> @@ -1867,7 +1868,7 @@ static int btrfs_remount(struct super_bl
>  
>  		/* recover relocation */
>  		mutex_lock(&fs_info->cleaner_mutex);
> -		ret = btrfs_recover_relocation(root);
> +		ret = btrfs_recover_relocation(fs_info);
>  		mutex_unlock(&fs_info->cleaner_mutex);
>  		if (ret)
>  			goto restore;
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -4034,6 +4034,12 @@ static int balance_kthread(void *data)
>  	struct btrfs_fs_info *fs_info = data;
>  	int ret = 0;
>  
> +	if (fs_info->relocation_recovery_started) {
> +		btrfs_info(fs_info,
> +			   "waiting for relocation recovery before resuming balance");
> +		wait_for_completion(&fs_info->relocation_recovery_completion);
> +	}
> +
>  	mutex_lock(&fs_info->volume_mutex);
>  	mutex_lock(&fs_info->balance_mutex);
>  
> 


-- 
Jeff Mahoney
SUSE Labs

Attachment: signature.asc
Description: OpenPGP digital signature


[Index of Archives]     [Linux Filesystem Development]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux