Any Comment?
On Thu, 08 Sep 2011 16:18:09 +0800, Miao Xie wrote:
> This patch introduce free space cluster for each node in the b+tree. And
> we also simplify the fill method and the space allocation of the cluster:
> - Allocate free space cluster for each node
> - Allocate the free space extent (>=256KB, split a large extent or get the
> contiguous blocks from the bitmaps) from the block group cache and cache
> it in the cluster, instead of moving the free space entry from the block
> group cache to the cluster directly. By this way, we just manage a extent
> in the cluster.
> - When doing the tree block allocation, we can get the space just by split
> the extent in the parent's cluster. By this way, we can allocate the tree
> block concurrently and also can store the child nodes/leaves into the
> contiguous blocks.
>
> Beside that, we modify the source of the space cache to make it fit the above
> change. When we write out the information of the free space, we also write out
> the extent information in the clusters. And when we load the free space
> information, we will try to merge the free space fragment at first, because the
> extent in the clusters is small, and may merge them to be a large one.
> (Before applying this patch, we build the free space tree by the cached information
> directly. I think we needn't worry about the compatibility. At the worst, we may
> create lots of small extents which may be merge into a large one, but it will not
> break the use of Btrfs.)
>
> We did some performance test for this patch, we find it works well, and make the
> small file performance grow up.
>
> The sequential write performance of the small file:
> N_files N_threads File Size Before After
> 10240 8 2KB(Inline) 2.2055MB/s 4.1779MB/s
> 10240 8 4KB 1.001MB/s 1.1893MB/s
>
> Signed-off-by: Miao Xie <miaox@xxxxxxxxxxxxxx>
> ---
> fs/btrfs/ctree.c | 28 ++-
> fs/btrfs/ctree.h | 50 +++-
> fs/btrfs/disk-io.c | 2 +-
> fs/btrfs/extent-tree.c | 107 +++++---
> fs/btrfs/extent_io.c | 7 +-
> fs/btrfs/extent_io.h | 3 +
> fs/btrfs/free-space-cache.c | 667 ++++++++++++++++---------------------------
> fs/btrfs/free-space-cache.h | 13 +
> fs/btrfs/inode-map.c | 1 +
> fs/btrfs/inode.c | 25 +-
> fs/btrfs/ioctl.c | 2 +-
> fs/btrfs/tree-log.c | 7 +
> 12 files changed, 419 insertions(+), 493 deletions(-)
>
> diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
> index 011cab3..1d3edd8 100644
> --- a/fs/btrfs/ctree.c
> +++ b/fs/btrfs/ctree.c
> @@ -238,7 +238,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
> else
> btrfs_node_key(buf, &disk_key, 0);
>
> - cow = btrfs_alloc_free_block(trans, root, buf->len, 0,
> + cow = btrfs_alloc_free_block(trans, root, buf->len, NULL, 0,
> new_root_objectid, &disk_key, level,
> buf->start, 0);
> if (IS_ERR(cow))
> @@ -444,9 +444,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
> } else
> parent_start = 0;
>
> - cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start,
> - root->root_key.objectid, &disk_key,
> - level, search_start, empty_size);
> + cow = btrfs_alloc_free_block(trans, root, buf->len, parent,
> + parent_start, root->root_key.objectid,
> + &disk_key, level, search_start,
> + empty_size);
> if (IS_ERR(cow))
> return PTR_ERR(cow);
>
> @@ -467,6 +468,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
> (unsigned long)btrfs_header_fsid(cow),
> BTRFS_FSID_SIZE);
>
> + WARN_ON(cow->cluster);
> + cow->cluster = buf->cluster;
> + buf->cluster = NULL;
> +
> update_ref_for_cow(trans, root, buf, cow, &last_ref);
>
> if (root->ref_cows)
> @@ -2070,7 +2075,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
> else
> btrfs_node_key(lower, &lower_key, 0);
>
> - c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
> + c = btrfs_alloc_free_block(trans, root, root->nodesize, NULL, 0,
> root->root_key.objectid, &lower_key,
> level, root->node->start, 0);
> if (IS_ERR(c))
> @@ -2170,6 +2175,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
> {
> struct extent_buffer *c;
> struct extent_buffer *split;
> + struct extent_buffer *parent = NULL;
> struct btrfs_disk_key disk_key;
> int mid;
> int ret;
> @@ -2197,9 +2203,12 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
> mid = (c_nritems + 1) / 2;
> btrfs_node_key(c, &disk_key, mid);
>
> - split = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
> - root->root_key.objectid,
> - &disk_key, level, c->start, 0);
> + if (level + 1 < BTRFS_MAX_LEVEL)
> + parent = path->nodes[level + 1];
> +
> + split = btrfs_alloc_free_block(trans, root, root->nodesize, parent, 0,
> + root->root_key.objectid,
> + &disk_key, level, c->start, 0);
> if (IS_ERR(split))
> return PTR_ERR(split);
>
> @@ -2951,7 +2960,8 @@ again:
> else
> btrfs_item_key(l, &disk_key, mid);
>
> - right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
> + right = btrfs_alloc_free_block(trans, root, root->leafsize,
> + path->nodes[1], 0,
> root->root_key.objectid,
> &disk_key, 0, l->start, 0);
> if (IS_ERR(right))
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index c364d50..8c33a74 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -789,14 +789,9 @@ struct btrfs_block_rsv {
> */
> struct btrfs_free_cluster {
> spinlock_t lock;
> - spinlock_t refill_lock;
> - struct rb_root root;
> + struct mutex refill_lock;
>
> - /* largest extent in this cluster */
> - u64 max_size;
> -
> - /* first extent starting offset */
> - u64 window_start;
> + struct btrfs_free_space *info;
>
> struct btrfs_block_group_cache *block_group;
> /*
> @@ -2156,7 +2151,9 @@ u64 btrfs_find_block_group(struct btrfs_root *root,
> u64 search_start, u64 search_hint, int owner);
> struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
> struct btrfs_root *root, u32 blocksize,
> - u64 parent, u64 root_objectid,
> + struct extent_buffer *parent,
> + u64 parent_start,
> + u64 root_objectid,
> struct btrfs_disk_key *key, int level,
> u64 hint, u64 empty_size);
> void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
> @@ -2175,12 +2172,37 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
> struct btrfs_root *root,
> u64 root_objectid, u64 owner, u64 offset,
> struct btrfs_key *ins);
> -int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
> - struct btrfs_root *root,
> - u64 num_bytes, u64 min_alloc_size,
> - u64 empty_size, u64 hint_byte,
> - u64 search_end, struct btrfs_key *ins,
> - u64 data);
> +int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
> + struct btrfs_root *root,
> + u64 num_bytes, u64 min_alloc_size,
> + u64 empty_size, u64 hint_byte,
> + u64 search_end, struct btrfs_key *ins,
> + u64 data, struct btrfs_free_cluster *cluster);
> +static inline int btrfs_reserve_extent_data(struct btrfs_trans_handle *trans,
> + struct btrfs_root *root,
> + u64 num_bytes, u64 min_alloc_size,
> + u64 empty_size, u64 hint_byte,
> + u64 search_end,
> + struct btrfs_key *ins)
> +{
> + return __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
> + empty_size, hint_byte, search_end, ins,
> + 1, NULL);
> +}
> +
> +static inline int btrfs_reserve_extent_mdata(struct btrfs_trans_handle *trans,
> + struct btrfs_root *root,
> + u64 num_bytes, u64 min_alloc_size,
> + u64 empty_size, u64 hint_byte,
> + u64 search_end,
> + struct btrfs_key *ins,
> + struct btrfs_free_cluster *cluster)
> +{
> + return __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
> + empty_size, hint_byte, search_end, ins,
> + 0, cluster);
> +}
> +
> int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
> struct extent_buffer *buf, int full_backref);
> int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 07b3ac6..951a57f 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -1171,7 +1171,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
> */
> root->ref_cows = 0;
>
> - leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
> + leaf = btrfs_alloc_free_block(trans, root, root->leafsize, NULL, 0,
> BTRFS_TREE_LOG_OBJECTID, NULL, 0, 0, 0);
> if (IS_ERR(leaf)) {
> kfree(root);
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index 80d6148..43cc5c4 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -4672,6 +4672,8 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
> struct btrfs_block_group_cache *cache = NULL;
> int ret;
>
> + btrfs_free_extent_cluster(buf);
> +
> if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
> ret = btrfs_add_delayed_tree_ref(trans, buf->start, buf->len,
> parent, root->root_key.objectid,
> @@ -4853,8 +4855,10 @@ enum btrfs_loop_type {
> LOOP_FIND_IDEAL = 0,
> LOOP_CACHING_NOWAIT = 1,
> LOOP_CACHING_WAIT = 2,
> - LOOP_ALLOC_CHUNK = 3,
> - LOOP_NO_EMPTY_SIZE = 4,
> + LOOP_RECLAIM_CLUSTERS = 3,
> + LOOP_RECLAIM_ALL_CLUSTERS = 4,
> + LOOP_ALLOC_CHUNK = 5,
> + LOOP_NO_EMPTY_SIZE = 6,
> };
>
> /*
> @@ -4870,7 +4874,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
> u64 num_bytes, u64 empty_size,
> u64 search_start, u64 search_end,
> u64 hint_byte, struct btrfs_key *ins,
> - u64 data)
> + u64 data,
> + struct btrfs_free_cluster *cluster)
> {
> int ret = 0;
> struct btrfs_root *root = orig_root->fs_info->extent_root;
> @@ -4912,9 +4917,12 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
> allowed_chunk_alloc = 1;
>
> if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
> - last_ptr = &root->fs_info->meta_alloc_cluster;
> + if (cluster)
> + last_ptr = cluster;
> + else
> + last_ptr = &root->fs_info->meta_alloc_cluster;
> if (!btrfs_test_opt(root, SSD))
> - empty_cluster = 64 * 1024;
> + empty_cluster = 256 * 1024;
> }
>
> if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
> @@ -4925,7 +4933,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
> if (last_ptr) {
> spin_lock(&last_ptr->lock);
> if (last_ptr->block_group)
> - hint_byte = last_ptr->window_start;
> + hint_byte = last_ptr->info->offset;
> spin_unlock(&last_ptr->lock);
> }
>
> @@ -5043,6 +5051,13 @@ have_block_group:
> if (unlikely(block_group->ro))
> goto loop;
>
> +
> + if (loop == LOOP_RECLAIM_CLUSTERS) {
> + btrfs_reclaim_extent_clusters(block_group,
> + empty_cluster * 2);
> + } else if (loop == LOOP_RECLAIM_ALL_CLUSTERS)
> + btrfs_reclaim_extent_clusters(block_group, 0);
> +
> spin_lock(&block_group->free_space_ctl->tree_lock);
> if (cached &&
> block_group->free_space_ctl->free_space <
> @@ -5066,7 +5081,7 @@ have_block_group:
> * the refill lock keeps out other
> * people trying to start a new cluster
> */
> - spin_lock(&last_ptr->refill_lock);
> + mutex_lock(&last_ptr->refill_lock);
> if (last_ptr->block_group &&
> (last_ptr->block_group->ro ||
> !block_group_bits(last_ptr->block_group, data))) {
> @@ -5078,7 +5093,7 @@ have_block_group:
> num_bytes, search_start);
> if (offset) {
> /* we have a block, we're done */
> - spin_unlock(&last_ptr->refill_lock);
> + mutex_unlock(&last_ptr->refill_lock);
> goto checks;
> }
>
> @@ -5097,7 +5112,7 @@ have_block_group:
> block_group = last_ptr->block_group;
> btrfs_get_block_group(block_group);
> spin_unlock(&last_ptr->lock);
> - spin_unlock(&last_ptr->refill_lock);
> + mutex_unlock(&last_ptr->refill_lock);
>
> last_ptr_loop = 1;
> search_start = block_group->key.objectid;
> @@ -5123,7 +5138,7 @@ refill_cluster:
> /* allocate a cluster in this block group */
> ret = btrfs_find_space_cluster(trans, root,
> block_group, last_ptr,
> - offset, num_bytes,
> + search_start, num_bytes,
> empty_cluster + empty_size);
> if (ret == 0) {
> /*
> @@ -5135,12 +5150,12 @@ refill_cluster:
> search_start);
> if (offset) {
> /* we found one, proceed */
> - spin_unlock(&last_ptr->refill_lock);
> + mutex_unlock(&last_ptr->refill_lock);
> goto checks;
> }
> } else if (!cached && loop > LOOP_CACHING_NOWAIT
> && !failed_cluster_refill) {
> - spin_unlock(&last_ptr->refill_lock);
> + mutex_unlock(&last_ptr->refill_lock);
>
> failed_cluster_refill = true;
> wait_block_group_cache_progress(block_group,
> @@ -5155,7 +5170,7 @@ refill_cluster:
> * to use, and go to the next block group
> */
> btrfs_return_cluster_to_free_space(NULL, last_ptr);
> - spin_unlock(&last_ptr->refill_lock);
> + mutex_unlock(&last_ptr->refill_lock);
> goto loop;
> }
>
> @@ -5197,9 +5212,11 @@ checks:
> ins->objectid = search_start;
> ins->offset = num_bytes;
>
> - if (offset < search_start)
> + if (offset < search_start) {
> btrfs_add_free_space(block_group, offset,
> search_start - offset);
> + offset = search_start;
> + }
> BUG_ON(offset > search_start);
>
> ret = btrfs_update_reserved_bytes(block_group, num_bytes, 1,
> @@ -5210,13 +5227,6 @@ checks:
> }
>
> /* we are all good, lets return */
> - ins->objectid = search_start;
> - ins->offset = num_bytes;
> -
> - if (offset < search_start)
> - btrfs_add_free_space(block_group, offset,
> - search_start - offset);
> - BUG_ON(offset > search_start);
> btrfs_put_block_group(block_group);
> break;
> loop:
> @@ -5236,6 +5246,12 @@ loop:
> * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
> * caching kthreads as we move along
> * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
> + * LOOP_RECLAIM_CLUSTERS, reclaim free space from some clusters, and
> + * by this way, we may find enough continuous
> + * space to fill the cluster, and then search
> + * the free space again.
> + * LOOP_RECLAIM_ALL_CLUSTERS, reclaim free space from all the clusters,
> + * and then search again.
> * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
> * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
> * again
> @@ -5362,12 +5378,12 @@ again:
> up_read(&info->groups_sem);
> }
>
> -int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
> - struct btrfs_root *root,
> - u64 num_bytes, u64 min_alloc_size,
> - u64 empty_size, u64 hint_byte,
> - u64 search_end, struct btrfs_key *ins,
> - u64 data)
> +int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
> + struct btrfs_root *root,
> + u64 num_bytes, u64 min_alloc_size,
> + u64 empty_size, u64 hint_byte,
> + u64 search_end, struct btrfs_key *ins,
> + u64 data, struct btrfs_free_cluster *cluster)
> {
> int ret;
> u64 search_start = 0;
> @@ -5386,7 +5402,7 @@ again:
> WARN_ON(num_bytes < root->sectorsize);
> ret = find_free_extent(trans, root, num_bytes, empty_size,
> search_start, search_end, hint_byte,
> - ins, data);
> + ins, data, cluster);
>
> if (ret == -ENOSPC && num_bytes > min_alloc_size) {
> num_bytes = num_bytes >> 1;
> @@ -5741,13 +5757,15 @@ static void unuse_block_rsv(struct btrfs_block_rsv *block_rsv, u32 blocksize)
> */
> struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
> struct btrfs_root *root, u32 blocksize,
> - u64 parent, u64 root_objectid,
> + struct extent_buffer *parent,
> + u64 parent_start, u64 root_objectid,
> struct btrfs_disk_key *key, int level,
> u64 hint, u64 empty_size)
> {
> struct btrfs_key ins;
> struct btrfs_block_rsv *block_rsv;
> struct extent_buffer *buf;
> + struct btrfs_free_cluster *cluster = NULL;
> u64 flags = 0;
> int ret;
>
> @@ -5756,8 +5774,19 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
> if (IS_ERR(block_rsv))
> return ERR_CAST(block_rsv);
>
> - ret = btrfs_reserve_extent(trans, root, blocksize, blocksize,
> - empty_size, hint, (u64)-1, &ins, 0);
> + /*
> + * We needn't worry about allocation failure, because if failed,
> + * we will use the default metadata cluster in fs_info
> + */
> + if (parent && !parent->cluster)
> + parent->cluster = btrfs_alloc_free_cluster();
> +
> + if (parent)
> + cluster = parent->cluster;
> +
> + ret = btrfs_reserve_extent_mdata(trans, root, blocksize, blocksize,
> + empty_size, hint, (u64)-1, &ins,
> + cluster);
> if (ret) {
> unuse_block_rsv(block_rsv, blocksize);
> return ERR_PTR(ret);
> @@ -5768,11 +5797,11 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
> BUG_ON(IS_ERR(buf));
>
> if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
> - if (parent == 0)
> - parent = ins.objectid;
> + if (parent_start == 0)
> + parent_start = ins.objectid;
> flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
> } else
> - BUG_ON(parent > 0);
> + BUG_ON(parent_start > 0);
>
> if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
> struct btrfs_delayed_extent_op *extent_op;
> @@ -5788,7 +5817,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
> extent_op->is_data = 0;
>
> ret = btrfs_add_delayed_tree_ref(trans, ins.objectid,
> - ins.offset, parent, root_objectid,
> + ins.offset, parent_start, root_objectid,
> level, BTRFS_ADD_DELAYED_EXTENT,
> extent_op);
> BUG_ON(ret);
> @@ -7231,18 +7260,18 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
>
> /* make sure this block group isn't part of an allocation cluster */
> cluster = &root->fs_info->data_alloc_cluster;
> - spin_lock(&cluster->refill_lock);
> + mutex_lock(&cluster->refill_lock);
> btrfs_return_cluster_to_free_space(block_group, cluster);
> - spin_unlock(&cluster->refill_lock);
> + mutex_unlock(&cluster->refill_lock);
>
> /*
> * make sure this block group isn't part of a metadata
> * allocation cluster
> */
> cluster = &root->fs_info->meta_alloc_cluster;
> - spin_lock(&cluster->refill_lock);
> + mutex_lock(&cluster->refill_lock);
> btrfs_return_cluster_to_free_space(block_group, cluster);
> - spin_unlock(&cluster->refill_lock);
> + mutex_unlock(&cluster->refill_lock);
>
> path = btrfs_alloc_path();
> if (!path) {
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index d418164..78196f2 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -17,6 +17,7 @@
> #include "compat.h"
> #include "ctree.h"
> #include "btrfs_inode.h"
> +#include "free-space-cache.h"
>
> static struct kmem_cache *extent_state_cache;
> static struct kmem_cache *extent_buffer_cache;
> @@ -2988,6 +2989,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
> spin_unlock_irqrestore(&leak_lock, flags);
> #endif
> atomic_set(&eb->refs, 1);
> + eb->cluster = NULL;
>
> return eb;
> }
> @@ -3827,7 +3829,10 @@ out:
> spin_unlock(&tree->buffer_lock);
>
> /* at this point we can safely release the extent buffer */
> - if (atomic_read(&eb->refs) == 0)
> + if (atomic_read(&eb->refs) == 0) {
> + btrfs_free_extent_cluster(eb);
> call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
> + }
> return ret;
> }
> +
> diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
> index 7b2f0c3..8ee8953 100644
> --- a/fs/btrfs/extent_io.h
> +++ b/fs/btrfs/extent_io.h
> @@ -146,6 +146,9 @@ struct extent_buffer {
> * to unlock
> */
> wait_queue_head_t read_lock_wq;
> +
> + /* Used for the node to allocate space for its children */
> + struct btrfs_free_cluster *cluster;
> };
>
> static inline void extent_set_compress_type(unsigned long *bio_flags,
> diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
> index e555899..e540e77 100644
> --- a/fs/btrfs/free-space-cache.c
> +++ b/fs/btrfs/free-space-cache.c
> @@ -31,9 +31,15 @@
> #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
>
> static struct kmem_cache *btrfs_free_space_cachep;
> +static struct kmem_cache *btrfs_free_cluster_cachep;
>
> static int link_free_space(struct btrfs_free_space_ctl *ctl,
> struct btrfs_free_space *info);
> +static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
> + struct btrfs_free_space *info,
> + bool update_stat);
> +static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
> + struct btrfs_free_space *info, bool update_stat);
>
> int __init free_space_cache_init(void)
> {
> @@ -43,6 +49,14 @@ int __init free_space_cache_init(void)
> if (!btrfs_free_space_cachep)
> return -ENOMEM;
>
> + btrfs_free_cluster_cachep = kmem_cache_create("extent_clusters",
> + sizeof(struct btrfs_free_cluster), 0,
> + SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
> + if (!btrfs_free_cluster_cachep) {
> + kmem_cache_destroy(btrfs_free_space_cachep);
> + return -ENOMEM;
> + }
> +
> return 0;
> }
>
> @@ -50,6 +64,8 @@ void free_space_cache_exit(void)
> {
> if (btrfs_free_space_cachep)
> kmem_cache_destroy(btrfs_free_space_cachep);
> + if (btrfs_free_cluster_cachep)
> + kmem_cache_destroy(btrfs_free_cluster_cachep);
> }
>
> static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
> @@ -397,11 +413,32 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
>
> if (entry->type == BTRFS_FREE_SPACE_EXTENT) {
> spin_lock(&ctl->tree_lock);
> + if (try_merge_free_space(ctl, e, true))
> + goto link;
> +
> + ret = insert_into_bitmap(ctl, e, true);
> + if (ret < 0) {
> + spin_unlock(&ctl->tree_lock);
> + printk(KERN_ERR "Inserting into bitmap "
> + "failed, dumping\n");
> + kmem_cache_free(btrfs_free_space_cachep,
> + e);
> + kunmap(page);
> + unlock_page(page);
> + page_cache_release(page);
> + goto free_cache;
> + } else if (ret) {
> + ret = 0;
> + goto next_entry;
> + }
> +link:
> ret = link_free_space(ctl, e);
> spin_unlock(&ctl->tree_lock);
> if (ret) {
> printk(KERN_ERR "Duplicate entries in "
> "free space cache, dumping\n");
> + kmem_cache_free(btrfs_free_space_cachep,
> + e);
> kunmap(page);
> unlock_page(page);
> page_cache_release(page);
> @@ -425,6 +462,9 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
> if (ret) {
> printk(KERN_ERR "Duplicate entries in "
> "free space cache, dumping\n");
> + kfree(e->bitmap);
> + kmem_cache_free(
> + btrfs_free_space_cachep, e);
> kunmap(page);
> unlock_page(page);
> page_cache_release(page);
> @@ -432,7 +472,7 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
> }
> list_add_tail(&e->list, &bitmaps);
> }
> -
> +next_entry:
> num_entries--;
> offset += sizeof(struct btrfs_free_space_entry);
> if (offset + sizeof(struct btrfs_free_space_entry) >=
> @@ -676,7 +716,8 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
> while (node && !next_page) {
> struct btrfs_free_space *e;
>
> - e = rb_entry(node, struct btrfs_free_space, offset_index);
> + e = rb_entry(node, struct btrfs_free_space,
> + offset_index);
> entries++;
>
> entry->offset = cpu_to_le64(e->offset);
> @@ -689,10 +730,39 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
> entry->type = BTRFS_FREE_SPACE_EXTENT;
> }
> node = rb_next(node);
> - if (!node && cluster) {
> - node = rb_first(&cluster->root);
> + offset += sizeof(struct btrfs_free_space_entry);
> + if (offset + sizeof(struct btrfs_free_space_entry) >=
> + PAGE_CACHE_SIZE)
> + next_page = true;
> + entry++;
> + }
> +
> + /*
> + * We want to write the extent in the cluster to our free space
> + * cache.
> + */
> + while (cluster && !next_page) {
> + struct btrfs_free_space *e;
> +
> + e = cluster->info;
> + if (!e || !e->bytes)
> + break;
> +
> + entries++;
> +
> + entry->offset = cpu_to_le64(e->offset);
> + entry->bytes = cpu_to_le64(e->bytes);
> + entry->type = BTRFS_FREE_SPACE_EXTENT;
> +
> + if (list_is_last(&cluster->block_group_list,
> + &block_group->cluster_list))
> cluster = NULL;
> - }
> + else
> + cluster = list_entry(
> + cluster->block_group_list.next,
> + struct btrfs_free_cluster,
> + block_group_list);
> +
> offset += sizeof(struct btrfs_free_space_entry);
> if (offset + sizeof(struct btrfs_free_space_entry) >=
> PAGE_CACHE_SIZE)
> @@ -1125,19 +1195,30 @@ static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
> ctl->free_space -= info->bytes;
> }
>
> +static int __link_free_space(struct btrfs_free_space_ctl *ctl,
> + struct btrfs_free_space *info)
> +{
> + int ret;
> +
> + ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
> + &info->offset_index, (info->bitmap != NULL));
> + if (ret)
> + return ret;
> + ctl->free_extents++;
> + return 0;
> +}
> +
> static int link_free_space(struct btrfs_free_space_ctl *ctl,
> struct btrfs_free_space *info)
> {
> - int ret = 0;
> + int ret;
>
> BUG_ON(!info->bitmap && !info->bytes);
> - ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
> - &info->offset_index, (info->bitmap != NULL));
> + ret = __link_free_space(ctl, info);
> if (ret)
> return ret;
>
> ctl->free_space += info->bytes;
> - ctl->free_extents++;
> return ret;
> }
>
> @@ -1212,7 +1293,7 @@ static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
>
> static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
> struct btrfs_free_space *info, u64 offset,
> - u64 bytes)
> + u64 bytes, bool update_stat)
> {
> unsigned long start, count;
>
> @@ -1223,7 +1304,8 @@ static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
> bitmap_set(info->bitmap, start, count);
>
> info->bytes += bytes;
> - ctl->free_space += bytes;
> + if (update_stat)
> + ctl->free_space += bytes;
> }
>
> static int search_bitmap(struct btrfs_free_space_ctl *ctl,
> @@ -1394,7 +1476,7 @@ again:
>
> static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
> struct btrfs_free_space *info, u64 offset,
> - u64 bytes)
> + u64 bytes, bool update_stat)
> {
> u64 bytes_to_set = 0;
> u64 end;
> @@ -1403,7 +1485,7 @@ static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
>
> bytes_to_set = min(end - offset, bytes);
>
> - bitmap_set_bits(ctl, info, offset, bytes_to_set);
> + bitmap_set_bits(ctl, info, offset, bytes_to_set, update_stat);
>
> return bytes_to_set;
>
> @@ -1451,10 +1533,9 @@ static struct btrfs_free_space_op free_space_op = {
> };
>
> static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
> - struct btrfs_free_space *info)
> + struct btrfs_free_space *info, bool update_stat)
> {
> struct btrfs_free_space *bitmap_info;
> - struct btrfs_block_group_cache *block_group = NULL;
> int added = 0;
> u64 bytes, offset, bytes_added;
> int ret;
> @@ -1465,49 +1546,7 @@ static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
> if (!ctl->op->use_bitmap(ctl, info))
> return 0;
>
> - if (ctl->op == &free_space_op)
> - block_group = ctl->private;
> again:
> - /*
> - * Since we link bitmaps right into the cluster we need to see if we
> - * have a cluster here, and if so and it has our bitmap we need to add
> - * the free space to that bitmap.
> - */
> - if (block_group && !list_empty(&block_group->cluster_list)) {
> - struct btrfs_free_cluster *cluster;
> - struct rb_node *node;
> - struct btrfs_free_space *entry;
> -
> - cluster = list_entry(block_group->cluster_list.next,
> - struct btrfs_free_cluster,
> - block_group_list);
> - spin_lock(&cluster->lock);
> - node = rb_first(&cluster->root);
> - if (!node) {
> - spin_unlock(&cluster->lock);
> - goto no_cluster_bitmap;
> - }
> -
> - entry = rb_entry(node, struct btrfs_free_space, offset_index);
> - if (!entry->bitmap) {
> - spin_unlock(&cluster->lock);
> - goto no_cluster_bitmap;
> - }
> -
> - if (entry->offset == offset_to_bitmap(ctl, offset)) {
> - bytes_added = add_bytes_to_bitmap(ctl, entry,
> - offset, bytes);
> - bytes -= bytes_added;
> - offset += bytes_added;
> - }
> - spin_unlock(&cluster->lock);
> - if (!bytes) {
> - ret = 1;
> - goto out;
> - }
> - }
> -
> -no_cluster_bitmap:
> bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
> 1, 0);
> if (!bitmap_info) {
> @@ -1515,7 +1554,8 @@ no_cluster_bitmap:
> goto new_bitmap;
> }
>
> - bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
> + bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes,
> + update_stat);
> bytes -= bytes_added;
> offset += bytes_added;
> added = 0;
> @@ -1533,6 +1573,12 @@ new_bitmap:
> info = NULL;
> goto again;
> } else {
> + if (current->flags & PF_MEMALLOC) {
> + printk(KERN_INFO "Alloc memory when reclaim "
> + "the pages\n");
> + return 0;
> + }
> +
> spin_unlock(&ctl->tree_lock);
>
> /* no pre-allocated info, allocate a new one */
> @@ -1635,7 +1681,7 @@ int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
> * extent then we know we're going to have to allocate a new extent, so
> * before we do that see if we need to drop this into a bitmap
> */
> - ret = insert_into_bitmap(ctl, info);
> + ret = insert_into_bitmap(ctl, info, true);
> if (ret < 0) {
> goto out;
> } else if (ret) {
> @@ -1829,36 +1875,50 @@ __btrfs_return_cluster_to_free_space(
> {
> struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
> struct btrfs_free_space *entry;
> - struct rb_node *node;
> + int ret = 0;
>
> spin_lock(&cluster->lock);
> - if (cluster->block_group != block_group)
> + if (cluster->block_group != block_group) {
> + spin_unlock(&cluster->lock);
> goto out;
> + }
>
> cluster->block_group = NULL;
> - cluster->window_start = 0;
> +
> + entry = cluster->info;
> + cluster->info = NULL;
> +
> list_del_init(&cluster->block_group_list);
> + spin_unlock(&cluster->lock);
>
> - node = rb_first(&cluster->root);
> - while (node) {
> - bool bitmap;
> + if (!entry)
> + goto out;
>
> - entry = rb_entry(node, struct btrfs_free_space, offset_index);
> - node = rb_next(&entry->offset_index);
> - rb_erase(&entry->offset_index, &cluster->root);
> -
> - bitmap = (entry->bitmap != NULL);
> - if (!bitmap)
> - try_merge_free_space(ctl, entry, false);
> - tree_insert_offset(&ctl->free_space_offset,
> - entry->offset, &entry->offset_index, bitmap);
> + if (!entry->bytes) {
> + kmem_cache_free(btrfs_free_space_cachep, entry);
> + goto out;
> }
> - cluster->root = RB_ROOT;
>
> + if (try_merge_free_space(ctl, entry, false))
> + goto link;
> +
> + ret = insert_into_bitmap(ctl, entry, false);
> + if (ret < 0)
> + goto out;
> + else if (ret) {
> + ret = 0;
> + goto out;
> + }
> +link:
> + ret = __link_free_space(ctl, entry);
> + if (ret) {
> + kmem_cache_free(btrfs_free_space_cachep, entry);
> + printk(KERN_ERR "Duplicate entries in free space cache, "
> + "dumping\n");
> + }
> out:
> - spin_unlock(&cluster->lock);
> btrfs_put_block_group(block_group);
> - return 0;
> + return ret;
> }
>
> void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
> @@ -1889,29 +1949,58 @@ void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
> spin_unlock(&ctl->tree_lock);
> }
>
> -void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
> +static void __btrfs_reclaim_extent_clusters(
> + struct btrfs_block_group_cache *block_group,
> + u64 to_reclaim)
> {
> - struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
> struct btrfs_free_cluster *cluster;
> struct list_head *head;
> + u64 bytes;
> + bool reclaim_all = (to_reclaim == 0);
>
> - spin_lock(&ctl->tree_lock);
> while ((head = block_group->cluster_list.next) !=
> &block_group->cluster_list) {
> cluster = list_entry(head, struct btrfs_free_cluster,
> block_group_list);
>
> WARN_ON(cluster->block_group != block_group);
> +
> + bytes = cluster->info->bytes;
> __btrfs_return_cluster_to_free_space(block_group, cluster);
> +
> + if (!reclaim_all) {
> + if (to_reclaim > bytes)
> + to_reclaim -= bytes;
> + else {
> + to_reclaim = 0;
> + break;
> + }
> + }
> +
> if (need_resched()) {
> - spin_unlock(&ctl->tree_lock);
> + spin_unlock(&block_group->free_space_ctl->tree_lock);
> cond_resched();
> - spin_lock(&ctl->tree_lock);
> + spin_lock(&block_group->free_space_ctl->tree_lock);
> }
> }
> +}
> +
> +void btrfs_reclaim_extent_clusters(struct btrfs_block_group_cache *block_group,
> + u64 to_reclaim)
> +{
> + spin_lock(&block_group->free_space_ctl->tree_lock);
> + __btrfs_reclaim_extent_clusters(block_group, to_reclaim);
> + spin_unlock(&block_group->free_space_ctl->tree_lock);
> +}
> +
> +void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
> +{
> + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
> +
> + spin_lock(&ctl->tree_lock);
> + __btrfs_reclaim_extent_clusters(block_group, 0);
> __btrfs_remove_free_space_cache_locked(ctl);
> spin_unlock(&ctl->tree_lock);
> -
> }
>
> u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
> @@ -1991,30 +2080,6 @@ int btrfs_return_cluster_to_free_space(
> return ret;
> }
>
> -static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
> - struct btrfs_free_cluster *cluster,
> - struct btrfs_free_space *entry,
> - u64 bytes, u64 min_start)
> -{
> - struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
> - int err;
> - u64 search_start = cluster->window_start;
> - u64 search_bytes = bytes;
> - u64 ret = 0;
> -
> - search_start = min_start;
> - search_bytes = bytes;
> -
> - err = search_bitmap(ctl, entry, &search_start, &search_bytes);
> - if (err)
> - return 0;
> -
> - ret = search_start;
> - __bitmap_clear_bits(ctl, entry, ret, bytes);
> -
> - return ret;
> -}
> -
> /*
> * given a cluster, try to allocate 'bytes' from it, returns 0
> * if it couldn't find anything suitably large, or a logical disk offset
> @@ -2025,56 +2090,26 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
> u64 min_start)
> {
> struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
> - struct btrfs_free_space *entry = NULL;
> - struct rb_node *node;
> u64 ret = 0;
>
> spin_lock(&cluster->lock);
> - if (bytes > cluster->max_size)
> - goto out;
> -
> if (cluster->block_group != block_group)
> goto out;
>
> - node = rb_first(&cluster->root);
> - if (!node)
> + /*
> + * If we set ->block_group, it means we have filled this cluster,
> + * and ->info also has been set. So we needn't check ->info is
> + * NULL or not now.
> + */
> + if (bytes > cluster->info->bytes)
> goto out;
>
> - entry = rb_entry(node, struct btrfs_free_space, offset_index);
> - while(1) {
> - if (entry->bytes < bytes ||
> - (!entry->bitmap && entry->offset < min_start)) {
> - node = rb_next(&entry->offset_index);
> - if (!node)
> - break;
> - entry = rb_entry(node, struct btrfs_free_space,
> - offset_index);
> - continue;
> - }
> -
> - if (entry->bitmap) {
> - ret = btrfs_alloc_from_bitmap(block_group,
> - cluster, entry, bytes,
> - min_start);
> - if (ret == 0) {
> - node = rb_next(&entry->offset_index);
> - if (!node)
> - break;
> - entry = rb_entry(node, struct btrfs_free_space,
> - offset_index);
> - continue;
> - }
> - } else {
> - ret = entry->offset;
> -
> - entry->offset += bytes;
> - entry->bytes -= bytes;
> - }
> + if (min_start >= cluster->info->offset + cluster->info->bytes)
> + goto out;
>
> - if (entry->bytes == 0)
> - rb_erase(&entry->offset_index, &cluster->root);
> - break;
> - }
> + ret = cluster->info->offset;
> + cluster->info->offset += bytes;
> + cluster->info->bytes -= bytes;
> out:
> spin_unlock(&cluster->lock);
>
> @@ -2082,260 +2117,12 @@ out:
> return 0;
>
> spin_lock(&ctl->tree_lock);
> -
> ctl->free_space -= bytes;
> - if (entry->bytes == 0) {
> - ctl->free_extents--;
> - if (entry->bitmap) {
> - kfree(entry->bitmap);
> - ctl->total_bitmaps--;
> - ctl->op->recalc_thresholds(ctl);
> - }
> - kmem_cache_free(btrfs_free_space_cachep, entry);
> - }
> -
> spin_unlock(&ctl->tree_lock);
>
> return ret;
> }
>
> -static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
> - struct btrfs_free_space *entry,
> - struct btrfs_free_cluster *cluster,
> - u64 offset, u64 bytes, u64 min_bytes)
> -{
> - struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
> - unsigned long next_zero;
> - unsigned long i;
> - unsigned long search_bits;
> - unsigned long total_bits;
> - unsigned long found_bits;
> - unsigned long start = 0;
> - unsigned long total_found = 0;
> - int ret;
> - bool found = false;
> -
> - i = offset_to_bit(entry->offset, block_group->sectorsize,
> - max_t(u64, offset, entry->offset));
> - search_bits = bytes_to_bits(bytes, block_group->sectorsize);
> - total_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
> -
> -again:
> - found_bits = 0;
> - for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
> - i < BITS_PER_BITMAP;
> - i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
> - next_zero = find_next_zero_bit(entry->bitmap,
> - BITS_PER_BITMAP, i);
> - if (next_zero - i >= search_bits) {
> - found_bits = next_zero - i;
> - break;
> - }
> - i = next_zero;
> - }
> -
> - if (!found_bits)
> - return -ENOSPC;
> -
> - if (!found) {
> - start = i;
> - found = true;
> - }
> -
> - total_found += found_bits;
> -
> - if (cluster->max_size < found_bits * block_group->sectorsize)
> - cluster->max_size = found_bits * block_group->sectorsize;
> -
> - if (total_found < total_bits) {
> - i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
> - if (i - start > total_bits * 2) {
> - total_found = 0;
> - cluster->max_size = 0;
> - found = false;
> - }
> - goto again;
> - }
> -
> - cluster->window_start = start * block_group->sectorsize +
> - entry->offset;
> - rb_erase(&entry->offset_index, &ctl->free_space_offset);
> - ret = tree_insert_offset(&cluster->root, entry->offset,
> - &entry->offset_index, 1);
> - BUG_ON(ret);
> -
> - return 0;
> -}
> -
> -/*
> - * This searches the block group for just extents to fill the cluster with.
> - */
> -static noinline int
> -setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
> - struct btrfs_free_cluster *cluster,
> - struct list_head *bitmaps, u64 offset, u64 bytes,
> - u64 min_bytes)
> -{
> - struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
> - struct btrfs_free_space *first = NULL;
> - struct btrfs_free_space *entry = NULL;
> - struct btrfs_free_space *prev = NULL;
> - struct btrfs_free_space *last;
> - struct rb_node *node;
> - u64 window_start;
> - u64 window_free;
> - u64 max_extent;
> - u64 max_gap = 128 * 1024;
> -
> - entry = tree_search_offset(ctl, offset, 0, 1);
> - if (!entry)
> - return -ENOSPC;
> -
> - /*
> - * We don't want bitmaps, so just move along until we find a normal
> - * extent entry.
> - */
> - while (entry->bitmap) {
> - if (list_empty(&entry->list))
> - list_add_tail(&entry->list, bitmaps);
> - node = rb_next(&entry->offset_index);
> - if (!node)
> - return -ENOSPC;
> - entry = rb_entry(node, struct btrfs_free_space, offset_index);
> - }
> -
> - window_start = entry->offset;
> - window_free = entry->bytes;
> - max_extent = entry->bytes;
> - first = entry;
> - last = entry;
> - prev = entry;
> -
> - while (window_free <= min_bytes) {
> - node = rb_next(&entry->offset_index);
> - if (!node)
> - return -ENOSPC;
> - entry = rb_entry(node, struct btrfs_free_space, offset_index);
> -
> - if (entry->bitmap) {
> - if (list_empty(&entry->list))
> - list_add_tail(&entry->list, bitmaps);
> - continue;
> - }
> -
> - /*
> - * we haven't filled the empty size and the window is
> - * very large. reset and try again
> - */
> - if (entry->offset - (prev->offset + prev->bytes) > max_gap ||
> - entry->offset - window_start > (min_bytes * 2)) {
> - first = entry;
> - window_start = entry->offset;
> - window_free = entry->bytes;
> - last = entry;
> - max_extent = entry->bytes;
> - } else {
> - last = entry;
> - window_free += entry->bytes;
> - if (entry->bytes > max_extent)
> - max_extent = entry->bytes;
> - }
> - prev = entry;
> - }
> -
> - cluster->window_start = first->offset;
> -
> - node = &first->offset_index;
> -
> - /*
> - * now we've found our entries, pull them out of the free space
> - * cache and put them into the cluster rbtree
> - */
> - do {
> - int ret;
> -
> - entry = rb_entry(node, struct btrfs_free_space, offset_index);
> - node = rb_next(&entry->offset_index);
> - if (entry->bitmap)
> - continue;
> -
> - rb_erase(&entry->offset_index, &ctl->free_space_offset);
> - ret = tree_insert_offset(&cluster->root, entry->offset,
> - &entry->offset_index, 0);
> - BUG_ON(ret);
> - } while (node && entry != last);
> -
> - cluster->max_size = max_extent;
> -
> - return 0;
> -}
> -
> -/*
> - * This specifically looks for bitmaps that may work in the cluster, we assume
> - * that we have already failed to find extents that will work.
> - */
> -static noinline int
> -setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
> - struct btrfs_free_cluster *cluster,
> - struct list_head *bitmaps, u64 offset, u64 bytes,
> - u64 min_bytes)
> -{
> - struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
> - struct btrfs_free_space *entry;
> - struct rb_node *node;
> - int ret = -ENOSPC;
> -
> - if (ctl->total_bitmaps == 0)
> - return -ENOSPC;
> -
> - /*
> - * First check our cached list of bitmaps and see if there is an entry
> - * here that will work.
> - */
> - list_for_each_entry(entry, bitmaps, list) {
> - if (entry->bytes < min_bytes)
> - continue;
> - ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
> - bytes, min_bytes);
> - if (!ret)
> - return 0;
> - }
> -
> - /*
> - * If we do have entries on our list and we are here then we didn't find
> - * anything, so go ahead and get the next entry after the last entry in
> - * this list and start the search from there.
> - */
> - if (!list_empty(bitmaps)) {
> - entry = list_entry(bitmaps->prev, struct btrfs_free_space,
> - list);
> - node = rb_next(&entry->offset_index);
> - if (!node)
> - return -ENOSPC;
> - entry = rb_entry(node, struct btrfs_free_space, offset_index);
> - goto search;
> - }
> -
> - entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1);
> - if (!entry)
> - return -ENOSPC;
> -
> -search:
> - node = &entry->offset_index;
> - do {
> - entry = rb_entry(node, struct btrfs_free_space, offset_index);
> - node = rb_next(&entry->offset_index);
> - if (!entry->bitmap)
> - continue;
> - if (entry->bytes < min_bytes)
> - continue;
> - ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
> - bytes, min_bytes);
> - } while (ret && node);
> -
> - return ret;
> -}
> -
> /*
> * here we try to find a cluster of blocks in a block group. The goal
> * is to find at least bytes free and up to empty_size + bytes free.
> @@ -2351,11 +2138,12 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
> u64 offset, u64 bytes, u64 empty_size)
> {
> struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
> - struct list_head bitmaps;
> - struct btrfs_free_space *entry, *tmp;
> + struct btrfs_free_space *entry, *info;
> u64 min_bytes;
> int ret;
>
> + BUG_ON(cluster->info);
> +
> /* for metadata, allow allocates with more holes */
> if (btrfs_test_opt(root, SSD_SPREAD)) {
> min_bytes = bytes + empty_size;
> @@ -2369,9 +2157,16 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
> min_bytes = max(bytes, (bytes + empty_size) >> 1);
> else
> min_bytes = max(bytes, (bytes + empty_size) >> 4);
> + min_bytes = max(min_bytes, (u64)256 * 1024);
> } else
> min_bytes = max(bytes, (bytes + empty_size) >> 2);
>
> + min_bytes = round_up(min_bytes, root->sectorsize);
> +
> + info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
> + if (!info)
> + return -ENOMEM;
> +
> spin_lock(&ctl->tree_lock);
>
> /*
> @@ -2380,6 +2175,7 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
> */
> if (ctl->free_space < min_bytes) {
> spin_unlock(&ctl->tree_lock);
> + kmem_cache_free(btrfs_free_space_cachep, info);
> return -ENOSPC;
> }
>
> @@ -2388,26 +2184,44 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
> /* someone already found a cluster, hooray */
> if (cluster->block_group) {
> ret = 0;
> + kmem_cache_free(btrfs_free_space_cachep, info);
> goto out;
> }
>
> - INIT_LIST_HEAD(&bitmaps);
> - ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
> - bytes, min_bytes);
> - if (ret)
> - ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
> - offset, bytes, min_bytes);
> + bytes = min_bytes;
> + entry = find_free_space(ctl, &offset, &min_bytes);
> + if (!entry) {
> + ret = -ENOSPC;
> + kmem_cache_free(btrfs_free_space_cachep, info);
> + goto out;
> + }
>
> - /* Clear our temporary list */
> - list_for_each_entry_safe(entry, tmp, &bitmaps, list)
> - list_del_init(&entry->list);
> + BUG_ON(min_bytes < bytes);
>
> - if (!ret) {
> - atomic_inc(&block_group->count);
> - list_add_tail(&cluster->block_group_list,
> - &block_group->cluster_list);
> - cluster->block_group = block_group;
> + ret = 0;
> + if (entry->bitmap) {
> + __bitmap_clear_bits(ctl, entry, offset, bytes);
> + if (!entry->bytes)
> + free_bitmap(ctl, entry);
> + } else {
> + __unlink_free_space(ctl, entry);
> + entry->offset += bytes;
> + entry->bytes -= bytes;
> + if (!entry->bytes)
> + kmem_cache_free(btrfs_free_space_cachep, entry);
> + else
> + __link_free_space(ctl, entry);
> }
> +
> + info->offset = offset;
> + info->bytes = bytes;
> +
> + cluster->info = info;
> +
> + atomic_inc(&block_group->count);
> + list_add_tail(&cluster->block_group_list,
> + &block_group->cluster_list);
> + cluster->block_group = block_group;
> out:
> spin_unlock(&cluster->lock);
> spin_unlock(&ctl->tree_lock);
> @@ -2421,11 +2235,10 @@ out:
> void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
> {
> spin_lock_init(&cluster->lock);
> - spin_lock_init(&cluster->refill_lock);
> - cluster->root = RB_ROOT;
> - cluster->max_size = 0;
> + mutex_init(&cluster->refill_lock);
> INIT_LIST_HEAD(&cluster->block_group_list);
> cluster->block_group = NULL;
> + cluster->info = NULL;
> }
>
> int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
> @@ -2665,3 +2478,27 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
> iput(inode);
> return ret;
> }
> +
> +struct btrfs_free_cluster *btrfs_alloc_free_cluster(void)
> +{
> + struct btrfs_free_cluster *cluster;
> +
> + cluster = kmem_cache_zalloc(btrfs_free_cluster_cachep, GFP_NOFS);
> + if (!cluster)
> + return NULL;
> +
> + btrfs_init_free_cluster(cluster);
> + return cluster;
> +}
> +
> +void btrfs_release_free_cluster(struct btrfs_free_cluster *cluster)
> +{
> + /* return the free space in the cluster to the space cache. */
> + mutex_lock(&cluster->refill_lock);
> + btrfs_return_cluster_to_free_space(NULL, cluster);
> + mutex_unlock(&cluster->refill_lock);
> +
> + /* free the cluster. */
> + kmem_cache_free(btrfs_free_cluster_cachep, cluster);
> +}
> +
> diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
> index c27ccba..1f7e75c 100644
> --- a/fs/btrfs/free-space-cache.h
> +++ b/fs/btrfs/free-space-cache.h
> @@ -34,6 +34,7 @@ struct btrfs_free_space_ctl {
> int extents_thresh;
> int free_extents;
> int total_bitmaps;
> + int total_clusters;
> int unit;
> u64 start;
> struct btrfs_free_space_op *op;
> @@ -113,4 +114,16 @@ int btrfs_return_cluster_to_free_space(
> struct btrfs_free_cluster *cluster);
> int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
> u64 *trimmed, u64 start, u64 end, u64 minlen);
> +void btrfs_reclaim_extent_clusters(struct btrfs_block_group_cache *block_group,
> + u64 to_reclaim);
> +struct btrfs_free_cluster *btrfs_alloc_free_cluster(void);
> +void btrfs_release_free_cluster(struct btrfs_free_cluster *cluster);
> +static inline void btrfs_free_extent_cluster(struct extent_buffer *eb)
> +{
> + if (!eb->cluster)
> + return;
> +
> + btrfs_release_free_cluster(eb->cluster);
> + eb->cluster = NULL;
> +}
> #endif
> diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
> index b4087e0..d501c1f 100644
> --- a/fs/btrfs/inode-map.c
> +++ b/fs/btrfs/inode-map.c
> @@ -457,6 +457,7 @@ again:
> spin_unlock(&root->cache_lock);
>
> spin_lock(&ctl->tree_lock);
> + WARN_ON(ctl->total_clusters);
> prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents;
> prealloc = ALIGN(prealloc, PAGE_CACHE_SIZE);
> prealloc += ctl->total_bitmaps * PAGE_CACHE_SIZE;
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 7a9e01f..38a5da9 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -51,7 +51,6 @@
> #include "tree-log.h"
> #include "compression.h"
> #include "locking.h"
> -#include "free-space-cache.h"
> #include "inode-map.h"
>
> struct btrfs_iget_args {
> @@ -623,11 +622,10 @@ retry:
> trans = btrfs_join_transaction(root);
> BUG_ON(IS_ERR(trans));
> trans->block_rsv = &root->fs_info->delalloc_block_rsv;
> - ret = btrfs_reserve_extent(trans, root,
> - async_extent->compressed_size,
> - async_extent->compressed_size,
> - 0, alloc_hint,
> - (u64)-1, &ins, 1);
> + ret = btrfs_reserve_extent_data(trans, root,
> + async_extent->compressed_size,
> + async_extent->compressed_size,
> + 0, alloc_hint, (u64)-1, &ins);
> btrfs_end_transaction(trans, root);
>
> if (ret) {
> @@ -828,9 +826,9 @@ static noinline int cow_file_range(struct inode *inode,
> unsigned long op;
>
> cur_alloc_size = disk_num_bytes;
> - ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
> - root->sectorsize, 0, alloc_hint,
> - (u64)-1, &ins, 1);
> + ret = btrfs_reserve_extent_data(trans, root, cur_alloc_size,
> + root->sectorsize, 0, alloc_hint,
> + (u64)-1, &ins);
> BUG_ON(ret);
>
> em = alloc_extent_map();
> @@ -5409,8 +5407,8 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
> trans->block_rsv = &root->fs_info->delalloc_block_rsv;
>
> alloc_hint = get_extent_allocation_hint(inode, start, len);
> - ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0,
> - alloc_hint, (u64)-1, &ins, 1);
> + ret = btrfs_reserve_extent_data(trans, root, len, root->sectorsize, 0,
> + alloc_hint, (u64)-1, &ins);
> if (ret) {
> em = ERR_PTR(ret);
> goto out;
> @@ -7276,8 +7274,9 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
> }
> }
>
> - ret = btrfs_reserve_extent(trans, root, num_bytes, min_size,
> - 0, *alloc_hint, (u64)-1, &ins, 1);
> + ret = btrfs_reserve_extent_data(trans, root, num_bytes,
> + min_size, 0, *alloc_hint,
> + (u64)-1, &ins);
> if (ret) {
> if (own_trans)
> btrfs_end_transaction(trans, root);
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 970977a..808203c 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -347,7 +347,7 @@ static noinline int create_subvol(struct btrfs_root *root,
> if (IS_ERR(trans))
> return PTR_ERR(trans);
>
> - leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
> + leaf = btrfs_alloc_free_block(trans, root, root->leafsize, NULL,
> 0, objectid, NULL, 0, 0, 0);
> if (IS_ERR(leaf)) {
> ret = PTR_ERR(leaf);
> diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
> index 786639f..21d08e2 100644
> --- a/fs/btrfs/tree-log.c
> +++ b/fs/btrfs/tree-log.c
> @@ -25,6 +25,7 @@
> #include "print-tree.h"
> #include "compat.h"
> #include "tree-log.h"
> +#include "free-space-cache.h"
>
> /* magic values for the inode_only field in btrfs_log_inode:
> *
> @@ -1763,6 +1764,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
> ret = btrfs_free_reserved_extent(root,
> bytenr, blocksize);
> BUG_ON(ret);
> +
> + btrfs_free_extent_cluster(next);
> }
> free_extent_buffer(next);
> continue;
> @@ -1832,6 +1835,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
> path->nodes[*level]->start,
> path->nodes[*level]->len);
> BUG_ON(ret);
> +
> + btrfs_free_extent_cluster(next);
> }
> free_extent_buffer(path->nodes[*level]);
> path->nodes[*level] = NULL;
> @@ -1900,6 +1905,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
> ret = btrfs_free_reserved_extent(log, next->start,
> next->len);
> BUG_ON(ret);
> +
> + btrfs_free_extent_cluster(next);
> }
> }
>
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html