Re: [PATCH] Btrfs: don't take the chunk_mutex/dev_list mutex in statfs V2

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, 3 Nov 2014 08:56:50 -0500, Josef Bacik wrote:
> Our gluster boxes get several thousand statfs() calls per second, which begins
> to suck hardcore with all of the lock contention on the chunk mutex and dev list
> mutex.  We don't really need to hold these things, if we have transient
> weirdness with statfs() because of the chunk allocator we don't care, so remove
> this locking.
> 
> We still need the dev_list lock if you mount with -o alloc_start however, which
> is a good argument for nuking that thing from orbit, but that's a patch for
> another day.  Thanks,
> 
> Signed-off-by: Josef Bacik <jbacik@xxxxxx>
> ---
> V1->V2: make sure ->alloc_start is set before doing the dev extent lookup logic.

I am strange that why we need dev_list_lock if we mount with -o alloc_start. AFAIK.
->alloc_start is protected by chunk_mutex.

But I think we needn't care that someone changes ->alloc_start, in other words, 
we needn't take chunk_mutex during the whole process, the following case can be
tolerated by the users, I think.

	Task1						Task2
	statfs
	  mutex_lock(&fs_info->chunk_mutex);
	  tmp = fs_info->alloc_start;
	  mutex_unlock(&fs_info->chunk_mutex);
	  btrfs_calc_avail_data_space(fs_info, tmp)
	    ...
							mount -o remount,alloc_start=xxxx
	    ...

Thanks
Miao

> 
>  fs/btrfs/super.c | 72 ++++++++++++++++++++++++++++++++++++--------------------
>  1 file changed, 47 insertions(+), 25 deletions(-)
> 
> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
> index 54bd91e..dc337d1 100644
> --- a/fs/btrfs/super.c
> +++ b/fs/btrfs/super.c
> @@ -1644,8 +1644,20 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
>  	int i = 0, nr_devices;
>  	int ret;
>  
> +	/*
> +	 * We aren't under the device list lock, so this is racey-ish, but good
> +	 * enough for our purposes.
> +	 */
>  	nr_devices = fs_info->fs_devices->open_devices;
> -	BUG_ON(!nr_devices);
> +	if (!nr_devices) {
> +		smp_mb();
> +		nr_devices = fs_info->fs_devices->open_devices;
> +		ASSERT(nr_devices);
> +		if (!nr_devices) {
> +			*free_bytes = 0;
> +			return 0;
> +		}
> +	}
>  
>  	devices_info = kmalloc_array(nr_devices, sizeof(*devices_info),
>  			       GFP_NOFS);
> @@ -1670,11 +1682,17 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
>  	else
>  		min_stripe_size = BTRFS_STRIPE_LEN;
>  
> -	list_for_each_entry(device, &fs_devices->devices, dev_list) {
> +	if (fs_info->alloc_start)
> +		mutex_lock(&fs_devices->device_list_mutex);
> +	rcu_read_lock();
> +	list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
>  		if (!device->in_fs_metadata || !device->bdev ||
>  		    device->is_tgtdev_for_dev_replace)
>  			continue;
>  
> +		if (i >= nr_devices)
> +			break;
> +
>  		avail_space = device->total_bytes - device->bytes_used;
>  
>  		/* align with stripe_len */
> @@ -1689,24 +1707,32 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
>  		skip_space = 1024 * 1024;
>  
>  		/* user can set the offset in fs_info->alloc_start. */
> -		if (fs_info->alloc_start + BTRFS_STRIPE_LEN <=
> -		    device->total_bytes)
> +		if (fs_info->alloc_start &&
> +		    fs_info->alloc_start + BTRFS_STRIPE_LEN <=
> +		    device->total_bytes) {
> +			rcu_read_unlock();
>  			skip_space = max(fs_info->alloc_start, skip_space);
>  
> -		/*
> -		 * btrfs can not use the free space in [0, skip_space - 1],
> -		 * we must subtract it from the total. In order to implement
> -		 * it, we account the used space in this range first.
> -		 */
> -		ret = btrfs_account_dev_extents_size(device, 0, skip_space - 1,
> -						     &used_space);
> -		if (ret) {
> -			kfree(devices_info);
> -			return ret;
> -		}
> +			/*
> +			 * btrfs can not use the free space in
> +			 * [0, skip_space - 1], we must subtract it from the
> +			 * total. In order to implement it, we account the used
> +			 * space in this range first.
> +			 */
> +			ret = btrfs_account_dev_extents_size(device, 0,
> +							     skip_space - 1,
> +							     &used_space);
> +			if (ret) {
> +				kfree(devices_info);
> +				mutex_unlock(&fs_devices->device_list_mutex);
> +				return ret;
> +			}
>  
> -		/* calc the free space in [0, skip_space - 1] */
> -		skip_space -= used_space;
> +			rcu_read_lock();
> +
> +			/* calc the free space in [0, skip_space - 1] */
> +			skip_space -= used_space;
> +		}
>  
>  		/*
>  		 * we can use the free space in [0, skip_space - 1], subtract
> @@ -1725,6 +1751,9 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
>  
>  		i++;
>  	}
> +	rcu_read_unlock();
> +	if (fs_info->alloc_start)
> +		mutex_unlock(&fs_devices->device_list_mutex);
>  
>  	nr_devices = i;
>  
> @@ -1787,8 +1816,6 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
>  	 * holding chunk_muext to avoid allocating new chunks, holding
>  	 * device_list_mutex to avoid the device being removed
>  	 */
> -	mutex_lock(&fs_info->fs_devices->device_list_mutex);
> -	mutex_lock(&fs_info->chunk_mutex);
>  	rcu_read_lock();
>  	list_for_each_entry_rcu(found, head, list) {
>  		if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
> @@ -1826,15 +1853,10 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
>  
>  	buf->f_bavail = total_free_data;
>  	ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data);
> -	if (ret) {
> -		mutex_unlock(&fs_info->chunk_mutex);
> -		mutex_unlock(&fs_info->fs_devices->device_list_mutex);
> +	if (ret)
>  		return ret;
> -	}
>  	buf->f_bavail += div_u64(total_free_data, factor);
>  	buf->f_bavail = buf->f_bavail >> bits;
> -	mutex_unlock(&fs_info->chunk_mutex);
> -	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
>  
>  	buf->f_type = BTRFS_SUPER_MAGIC;
>  	buf->f_bsize = dentry->d_sb->s_blocksize;
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Filesystem Development]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux