When we clear the dirty bits in btrfs_delete_unused_bgs for extents
in the empty block group, it results in btrfs_finish_extent_commit being
unable to discard the freed extents.
The block group removal patch added an alternate path to forget extents
other than btrfs_finish_extent_commit. As a result, any extents that would
be freed when the block group is removed aren't discarded. In my
test run, with a large copy of mixed sized files followed by removal, it
left nearly 2/3 of extents undiscarded.
To clean up the block groups, we add the removed block group onto a list
that will be discarded after transaction commit.
Signed-off-by: Jeff Mahoney <jeffm@xxxxxxxx>
---
fs/btrfs/ctree.h | 2 ++
fs/btrfs/extent-tree.c | 58 +++++++++++++++++++++++++++++++++++++++++++++
fs/btrfs/free-space-cache.c | 45 +++++++++++++++++++----------------
fs/btrfs/super.c | 2 +-
fs/btrfs/transaction.c | 2 ++
fs/btrfs/transaction.h | 2 ++
6 files changed, 90 insertions(+), 21 deletions(-)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 6f364e1..3448a54 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3438,6 +3438,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 group_start,
struct extent_map *em);
void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
+void btrfs_cleanup_block_group_mapping(struct btrfs_block_group_cache *cache);
void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
@@ -4068,6 +4069,7 @@ __printf(5, 6)
void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
unsigned int line, int errno, const char *fmt, ...);
+const char *btrfs_decode_error(int errno);
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root, const char *function,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 6d1d74d..521a294 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -6011,6 +6011,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_block_group_cache *block_group, *tmp;
+ struct list_head *deleted_bgs;
struct extent_io_tree *unpin;
u64 start;
u64 end;
@@ -6043,6 +6045,29 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
cond_resched();
}
+ /* Transaction is finished. We don't need the lock anymore. */
+ deleted_bgs = &trans->transaction->deleted_bgs;
+ list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
+ u64 start, end, trimmed = 0;
+ int ret;
+ list_del_init(&block_group->bg_list);
+
+ start = block_group->key.objectid;
+ end = start + block_group->key.offset - 1;
+
+ ret = btrfs_discard_extent(root, start, end, &trimmed);
+
+ btrfs_cleanup_block_group_mapping(block_group);
+ btrfs_put_block_group(block_group);
+
+ if (ret) {
+ const char *errstr = btrfs_decode_error(ret);
+ btrfs_warn(fs_info,
+ "Discard failed while removing blockgroup: errno=%d %s\n",
+ ret, errstr);
+ }
+ }
+
return 0;
}
@@ -9802,6 +9827,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
* currently running transaction might finish and a new one start,
* allowing for new block groups to be created that can reuse the same
* physical device locations unless we take this special care.
+ *
+ * There may also be an implicit trim operation if the file system
+ * is mounted with -odiscard. The same protections must remain
+ * in place until the extents have been discarded completely when
+ * the transaction commit has completed.
*/
remove_em = (atomic_read(&block_group->trimming) == 0);
/*
@@ -9876,6 +9906,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
spin_lock(&fs_info->unused_bgs_lock);
while (!list_empty(&fs_info->unused_bgs)) {
u64 start, end;
+ int trimming;
block_group = list_first_entry(&fs_info->unused_bgs,
struct btrfs_block_group_cache,
@@ -9973,12 +10004,39 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
spin_unlock(&block_group->lock);
spin_unlock(&space_info->lock);
+ /* DISCARD can flip during remount */
+ trimming = btrfs_test_opt(root, DISCARD);
+
+ /* Implicit trim during transaction commit. */
+ if (trimming)
+ atomic_inc(&block_group->trimming);
+
/*
* Btrfs_remove_chunk will abort the transaction if things go
* horribly wrong.
*/
ret = btrfs_remove_chunk(trans, root,
block_group->key.objectid);
+
+ if (ret) {
+ if (trimming)
+ atomic_dec(&block_group->trimming);
+ goto end_trans;
+ }
+
+ /*
+ * If we're not mounted with -odiscard, we can just forget
+ * about this block group. Otherwise we'll need to wait
+ * until transaction commit to do the actual discard.
+ */
+ if (trimming) {
+ WARN_ON(!list_empty(&block_group->bg_list));
+ spin_lock(&trans->transaction->deleted_bgs_lock);
+ list_add(&block_group->bg_list,
+ &trans->transaction->deleted_bgs);
+ spin_unlock(&trans->transaction->deleted_bgs_lock);
+ btrfs_get_block_group(block_group);
+ }
end_trans:
btrfs_end_transaction(trans, root);
next:
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 41c510b..d33b146 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -3274,6 +3274,30 @@ next:
return ret;
}
+void btrfs_cleanup_block_group_mapping(struct btrfs_block_group_cache *cache)
+{
+ struct extent_map_tree *em_tree;
+ struct extent_map *em;
+
+ lock_chunks(cache->fs_info->chunk_root);
+ em_tree = &cache->fs_info->mapping_tree.map_tree;
+ write_lock(&em_tree->lock);
+ em = lookup_extent_mapping(em_tree, cache->key.objectid, 1);
+ BUG_ON(!em); /* logic error, can't happen */
+
+ /*
+ * remove_extent_mapping() will delete us from the pinned_chunks
+ * list, which is protected by the chunk mutex.
+ */
+ remove_extent_mapping(em_tree, em);
+ write_unlock(&em_tree->lock);
+ unlock_chunks(cache->fs_info->chunk_root);
+
+ /* once for us and once for the tree */
+ free_extent_map(em);
+ free_extent_map(em);
+}
+
int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
u64 *trimmed, u64 start, u64 end, u64 minlen)
{
@@ -3298,28 +3322,9 @@ out:
spin_lock(&block_group->lock);
if (atomic_dec_and_test(&block_group->trimming) &&
block_group->removed) {
- struct extent_map_tree *em_tree;
- struct extent_map *em;
-
spin_unlock(&block_group->lock);
- lock_chunks(block_group->fs_info->chunk_root);
- em_tree = &block_group->fs_info->mapping_tree.map_tree;
- write_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, block_group->key.objectid,
- 1);
- BUG_ON(!em); /* logic error, can't happen */
- /*
- * remove_extent_mapping() will delete us from the pinned_chunks
- * list, which is protected by the chunk mutex.
- */
- remove_extent_mapping(em_tree, em);
- write_unlock(&em_tree->lock);
- unlock_chunks(block_group->fs_info->chunk_root);
-
- /* once for us and once for the tree */
- free_extent_map(em);
- free_extent_map(em);
+ btrfs_cleanup_block_group_mapping(block_group);
/*
* We've left one free space entry and other tasks trimming
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9e66f5e..016e65a 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -69,7 +69,7 @@ static struct file_system_type btrfs_fs_type;
static int btrfs_remount(struct super_block *sb, int *flags, char *data);
-static const char *btrfs_decode_error(int errno)
+const char *btrfs_decode_error(int errno)
{
char *errstr = "unknown";
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 5628e25..2005262 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -256,6 +256,8 @@ loop:
mutex_init(&cur_trans->cache_write_mutex);
cur_trans->num_dirty_bgs = 0;
spin_lock_init(&cur_trans->dirty_bgs_lock);
+ INIT_LIST_HEAD(&cur_trans->deleted_bgs);
+ spin_lock_init(&cur_trans->deleted_bgs_lock);
list_add_tail(&cur_trans->list, &fs_info->trans_list);
extent_io_tree_init(&cur_trans->dirty_pages,
fs_info->btree_inode->i_mapping);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 0b24755..14325f2 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -74,6 +74,8 @@ struct btrfs_transaction {
*/
struct mutex cache_write_mutex;
spinlock_t dirty_bgs_lock;
+ struct list_head deleted_bgs;
+ spinlock_t deleted_bgs_lock;
struct btrfs_delayed_ref_root delayed_refs;
int aborted;
int dirty_bg_run;
--
1.8.5.6
--
Jeff Mahoney
SUSE Labs
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html