On Mon, Feb 21, 2011 at 04:52:27PM +0800, Li Dongyang wrote:
> Here is batched discard support for btrfs, several changes were made:
>
> btrfs_test_opt(root, DISCARD) is moved from btrfs_discard_extent
> to callers, as we still want to trim the fs even it's not mounted
> with -o discard.
> btrfs_discard_extent now reports errors and actual bytes trimmed to
> callers, for EOPNOTSUPP, we will try other stripes as an extent
> could span SSD and other drives, and we won't return error to
> callers unless we failed with all stripes.
>
> And btrfs_discard_extent calls btrfs_map_block with READ, this means
> we won't get all stripes mapped for RAID1/DUP/RAID10, I think this
> should be fixed, Thanks.
>
> Signed-off-by: Li Dongyang <lidongyang@xxxxxxxxxx>
Reviewed-by: David Sterba <dsterba@xxxxxxx>
> ---
> fs/btrfs/ctree.h | 3 +-
> fs/btrfs/disk-io.c | 5 ++-
> fs/btrfs/extent-tree.c | 81 ++++++++++++++++++++++++++++++++++++-------
> fs/btrfs/free-space-cache.c | 79 +++++++++++++++++++++++++++++++++++++++++
> fs/btrfs/free-space-cache.h | 2 +
> fs/btrfs/ioctl.c | 24 +++++++++++++
> 6 files changed, 179 insertions(+), 15 deletions(-)
>
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index 2c98b3a..4486349 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -2217,7 +2217,8 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
> int btrfs_error_unpin_extent_range(struct btrfs_root *root,
> u64 start, u64 end);
> int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
> - u64 num_bytes);
> + u64 num_bytes, u64 *actual_bytes);
> +int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range);
>
> /* ctree.c */
> int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index e1aa8d6..bcb9451 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -2947,7 +2947,10 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
> break;
>
> /* opt_discard */
> - ret = btrfs_error_discard_extent(root, start, end + 1 - start);
> + if (btrfs_test_opt(root, DISCARD))
> + ret = btrfs_error_discard_extent(root, start,
> + end + 1 - start,
> + NULL);
>
> clear_extent_dirty(unpin, start, end, GFP_NOFS);
> btrfs_error_unpin_extent_range(root, start, end);
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index f3c96fc..7bed32a 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -1740,22 +1740,20 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
> return ret;
> }
>
> -static void btrfs_issue_discard(struct block_device *bdev,
> +static int btrfs_issue_discard(struct block_device *bdev,
> u64 start, u64 len)
> {
> - blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 0);
> + return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 0);
> }
>
> static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
> - u64 num_bytes)
> + u64 num_bytes, u64 *actual_bytes)
> {
> int ret;
> u64 map_length = num_bytes;
> + u64 discarded_bytes = 0;
> struct btrfs_multi_bio *multi = NULL;
>
> - if (!btrfs_test_opt(root, DISCARD))
> - return 0;
> -
> /* Tell the block device(s) that the sectors can be discarded */
> ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
> bytenr, &map_length, &multi, 0);
> @@ -1767,13 +1765,25 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
> map_length = num_bytes;
>
> for (i = 0; i < multi->num_stripes; i++, stripe++) {
> - btrfs_issue_discard(stripe->dev->bdev,
> - stripe->physical,
> - map_length);
> + ret = btrfs_issue_discard(stripe->dev->bdev,
> + stripe->physical,
> + map_length);
> + if (!ret)
> + discarded_bytes += map_length;
> + else if (ret == -EOPNOTSUPP)
> + continue;
> + else
> + break;
> }
> kfree(multi);
> }
>
> + if (discarded_bytes)
> + ret = 0;
> +
> + if (actual_bytes)
> + *actual_bytes = discarded_bytes;
> +
> return ret;
> }
>
> @@ -4353,7 +4363,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
> if (ret)
> break;
>
> - ret = btrfs_discard_extent(root, start, end + 1 - start);
> + if (btrfs_test_opt(root, DISCARD))
> + ret = btrfs_discard_extent(root, start, end + 1 - start, NULL);
>
> clear_extent_dirty(unpin, start, end, GFP_NOFS);
> unpin_extent_range(root, start, end);
> @@ -5401,7 +5412,8 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
> return -ENOSPC;
> }
>
> - ret = btrfs_discard_extent(root, start, len);
> + if (btrfs_test_opt(root, DISCARD))
> + ret = btrfs_discard_extent(root, start, len, NULL);
>
> btrfs_add_free_space(cache, start, len);
> update_reserved_bytes(cache, len, 0, 1);
> @@ -8712,7 +8724,50 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
> }
>
> int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
> - u64 num_bytes)
> + u64 num_bytes, u64 *actual_bytes)
> +{
> + return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes);
> +}
> +
> +int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
> {
> - return btrfs_discard_extent(root, bytenr, num_bytes);
> + struct btrfs_fs_info *fs_info = root->fs_info;
> + struct btrfs_block_group_cache *cache = NULL;
> + u64 cnt;
> + u64 start;
> + u64 end;
> + u64 trimmed = 0;
> + int ret = 0;
> +
> + cache = btrfs_lookup_block_group(fs_info, range->start);
> +
> + while (cache) {
> + if (cache->key.objectid >= (range->start + range->len)) {
> + btrfs_put_block_group(cache);
> + break;
> + }
> +
> + start = max(range->start, cache->key.objectid);
> + end = min(range->start + range->len,
> + cache->key.objectid + cache->key.offset);
> +
> + if (end - start >= range->minlen) {
> + ret = btrfs_trim_block_group(cache,
> + &cnt,
> + start,
> + end,
> + range->minlen);
> +
> + trimmed += cnt;
> + if (ret < 0) {
> + btrfs_put_block_group(cache);
> + break;
> + }
> + }
> +
> + cache = next_block_group(fs_info->tree_root, cache);
> + }
> +
> + range->len = trimmed;
> + return ret;
> }
> diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
> index a039065..a274df5 100644
> --- a/fs/btrfs/free-space-cache.c
> +++ b/fs/btrfs/free-space-cache.c
> @@ -2154,3 +2154,82 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
> cluster->block_group = NULL;
> }
>
> +int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
> + u64 *trimmed, u64 start, u64 end, u64 minlen)
> +{
> + struct btrfs_free_space *entry = NULL;
> + struct btrfs_fs_info *fs_info = block_group->fs_info;
> + u64 bytes = 0;
> + u64 actually_trimmed;
> + int ret = 0;
> +
> + *trimmed = 0;
> +
> + while (start < end) {
> + spin_lock(&block_group->tree_lock);
> + if (block_group->free_space < minlen) {
> + spin_unlock(&block_group->tree_lock);
> + break;
> + }
> +
> + entry = tree_search_offset(block_group, start, 0, 1);
> + if (!entry)
> + entry = tree_search_offset(block_group,
> + offset_to_bitmap(block_group,
> + start),
> + 1, 1);
> +
> + if (!entry || entry->offset >= end) {
> + spin_unlock(&block_group->tree_lock);
> + break;
> + }
> +
> + if (entry->bitmap) {
> + ret = search_bitmap(block_group, entry, &start, &bytes);
> + if (!ret) {
> + if (start >= end ) {
> + spin_unlock(&block_group->tree_lock);
> + break;
> + }
> + bytes = min(bytes, end - start);
> + } else {
> + start = entry->offset + BITS_PER_BITMAP *
> + block_group->sectorsize;
> + spin_unlock(&block_group->tree_lock);
> + continue;
> + }
> + } else {
> + start = entry->offset;
> + bytes = min(entry->bytes, end - start);
> + }
> +
> + spin_unlock(&block_group->tree_lock);
> +
> + if (bytes >= minlen && !btrfs_remove_free_space(block_group,
> + start,
> + bytes)) {
> + ret = btrfs_error_discard_extent(fs_info->extent_root,
> + start,
> + bytes,
> + &actually_trimmed);
> +
> + btrfs_add_free_space(block_group,
> + start, bytes);
> + if (ret)
> + break;
> + *trimmed += actually_trimmed;
> + }
> + start += bytes;
> + bytes = 0;
> +
> + if (fatal_signal_pending(current)) {
> + ret = -ERESTARTSYS;
> + break;
> + }
> +
> + if (need_resched())
> + cond_resched();
> + }
> +
> + return ret;
> +}
> diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
> index e49ca5c..65c3b93 100644
> --- a/fs/btrfs/free-space-cache.h
> +++ b/fs/btrfs/free-space-cache.h
> @@ -68,4 +68,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
> int btrfs_return_cluster_to_free_space(
> struct btrfs_block_group_cache *block_group,
> struct btrfs_free_cluster *cluster);
> +int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
> + u64 *trimmed, u64 start, u64 end, u64 minlen);
> #endif
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index be2d4f6..ecd3982 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -225,6 +225,28 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
> return put_user(inode->i_generation, arg);
> }
>
> +static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
> +{
> + struct btrfs_root *root = fdentry(file)->d_sb->s_fs_info;
> + struct fstrim_range range;
> + int ret;
> +
> + if (!capable(CAP_SYS_ADMIN))
> + return -EPERM;
> +
> + if (copy_from_user(&range, arg, sizeof(range)))
> + return -EFAULT;
> +
> + ret = btrfs_trim_fs(root, &range);
> + if (ret < 0)
> + return ret;
> +
> + if (copy_to_user(arg, &range, sizeof(range)))
> + return -EFAULT;
> +
> + return 0;
> +}
> +
> static noinline int create_subvol(struct btrfs_root *root,
> struct dentry *dentry,
> char *name, int namelen,
> @@ -2385,6 +2407,8 @@ long btrfs_ioctl(struct file *file, unsigned int
> return btrfs_ioctl_setflags(file, argp);
> case FS_IOC_GETVERSION:
> return btrfs_ioctl_getversion(file, argp);
> + case FITRIM:
> + return btrfs_ioctl_fitrim(file, argp);
> case BTRFS_IOC_SNAP_CREATE:
> return btrfs_ioctl_snap_create(file, argp, 0);
> case BTRFS_IOC_SNAP_CREATE_V2:
> --
> 1.7.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html