On Mon, 3 Nov 2014 08:56:50 -0500, Josef Bacik wrote:
> Our gluster boxes get several thousand statfs() calls per second, which begins
> to suck hardcore with all of the lock contention on the chunk mutex and dev list
> mutex. We don't really need to hold these things, if we have transient
> weirdness with statfs() because of the chunk allocator we don't care, so remove
> this locking.
>
> We still need the dev_list lock if you mount with -o alloc_start however, which
> is a good argument for nuking that thing from orbit, but that's a patch for
> another day. Thanks,
>
> Signed-off-by: Josef Bacik <jbacik@xxxxxx>
> ---
> V1->V2: make sure ->alloc_start is set before doing the dev extent lookup logic.
I am strange that why we need dev_list_lock if we mount with -o alloc_start. AFAIK.
->alloc_start is protected by chunk_mutex.
But I think we needn't care that someone changes ->alloc_start, in other words,
we needn't take chunk_mutex during the whole process, the following case can be
tolerated by the users, I think.
Task1 Task2
statfs
mutex_lock(&fs_info->chunk_mutex);
tmp = fs_info->alloc_start;
mutex_unlock(&fs_info->chunk_mutex);
btrfs_calc_avail_data_space(fs_info, tmp)
...
mount -o remount,alloc_start=xxxx
...
Thanks
Miao
>
> fs/btrfs/super.c | 72 ++++++++++++++++++++++++++++++++++++--------------------
> 1 file changed, 47 insertions(+), 25 deletions(-)
>
> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
> index 54bd91e..dc337d1 100644
> --- a/fs/btrfs/super.c
> +++ b/fs/btrfs/super.c
> @@ -1644,8 +1644,20 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
> int i = 0, nr_devices;
> int ret;
>
> + /*
> + * We aren't under the device list lock, so this is racey-ish, but good
> + * enough for our purposes.
> + */
> nr_devices = fs_info->fs_devices->open_devices;
> - BUG_ON(!nr_devices);
> + if (!nr_devices) {
> + smp_mb();
> + nr_devices = fs_info->fs_devices->open_devices;
> + ASSERT(nr_devices);
> + if (!nr_devices) {
> + *free_bytes = 0;
> + return 0;
> + }
> + }
>
> devices_info = kmalloc_array(nr_devices, sizeof(*devices_info),
> GFP_NOFS);
> @@ -1670,11 +1682,17 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
> else
> min_stripe_size = BTRFS_STRIPE_LEN;
>
> - list_for_each_entry(device, &fs_devices->devices, dev_list) {
> + if (fs_info->alloc_start)
> + mutex_lock(&fs_devices->device_list_mutex);
> + rcu_read_lock();
> + list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
> if (!device->in_fs_metadata || !device->bdev ||
> device->is_tgtdev_for_dev_replace)
> continue;
>
> + if (i >= nr_devices)
> + break;
> +
> avail_space = device->total_bytes - device->bytes_used;
>
> /* align with stripe_len */
> @@ -1689,24 +1707,32 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
> skip_space = 1024 * 1024;
>
> /* user can set the offset in fs_info->alloc_start. */
> - if (fs_info->alloc_start + BTRFS_STRIPE_LEN <=
> - device->total_bytes)
> + if (fs_info->alloc_start &&
> + fs_info->alloc_start + BTRFS_STRIPE_LEN <=
> + device->total_bytes) {
> + rcu_read_unlock();
> skip_space = max(fs_info->alloc_start, skip_space);
>
> - /*
> - * btrfs can not use the free space in [0, skip_space - 1],
> - * we must subtract it from the total. In order to implement
> - * it, we account the used space in this range first.
> - */
> - ret = btrfs_account_dev_extents_size(device, 0, skip_space - 1,
> - &used_space);
> - if (ret) {
> - kfree(devices_info);
> - return ret;
> - }
> + /*
> + * btrfs can not use the free space in
> + * [0, skip_space - 1], we must subtract it from the
> + * total. In order to implement it, we account the used
> + * space in this range first.
> + */
> + ret = btrfs_account_dev_extents_size(device, 0,
> + skip_space - 1,
> + &used_space);
> + if (ret) {
> + kfree(devices_info);
> + mutex_unlock(&fs_devices->device_list_mutex);
> + return ret;
> + }
>
> - /* calc the free space in [0, skip_space - 1] */
> - skip_space -= used_space;
> + rcu_read_lock();
> +
> + /* calc the free space in [0, skip_space - 1] */
> + skip_space -= used_space;
> + }
>
> /*
> * we can use the free space in [0, skip_space - 1], subtract
> @@ -1725,6 +1751,9 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
>
> i++;
> }
> + rcu_read_unlock();
> + if (fs_info->alloc_start)
> + mutex_unlock(&fs_devices->device_list_mutex);
>
> nr_devices = i;
>
> @@ -1787,8 +1816,6 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
> * holding chunk_muext to avoid allocating new chunks, holding
> * device_list_mutex to avoid the device being removed
> */
> - mutex_lock(&fs_info->fs_devices->device_list_mutex);
> - mutex_lock(&fs_info->chunk_mutex);
> rcu_read_lock();
> list_for_each_entry_rcu(found, head, list) {
> if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
> @@ -1826,15 +1853,10 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
>
> buf->f_bavail = total_free_data;
> ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data);
> - if (ret) {
> - mutex_unlock(&fs_info->chunk_mutex);
> - mutex_unlock(&fs_info->fs_devices->device_list_mutex);
> + if (ret)
> return ret;
> - }
> buf->f_bavail += div_u64(total_free_data, factor);
> buf->f_bavail = buf->f_bavail >> bits;
> - mutex_unlock(&fs_info->chunk_mutex);
> - mutex_unlock(&fs_info->fs_devices->device_list_mutex);
>
> buf->f_type = BTRFS_SUPER_MAGIC;
> buf->f_bsize = dentry->d_sb->s_blocksize;
>
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html