On Wed, Oct 12, 2011 at 03:45:04PM -0500, Mitch Harder wrote:
> On Wed, Oct 12, 2011 at 12:50 PM, Josef Bacik <josef@xxxxxxxxxx> wrote:
> > On Tue, Oct 11, 2011 at 03:45:45PM -0500, Mitch Harder wrote:
> >> On Tue, Oct 11, 2011 at 3:01 PM, Josef Bacik <josef@xxxxxxxxxx> wrote:
> >> > On Tue, Oct 11, 2011 at 02:44:09PM -0500, Mitch Harder wrote:
> >> >> On Tue, Oct 11, 2011 at 2:00 PM, Josef Bacik <josef@xxxxxxxxxx> wrote:
> >> >> > On Tue, Oct 11, 2011 at 12:33:48PM -0500, Mitch Harder wrote:
> >> >> >> On Mon, Sep 26, 2011 at 4:22 PM, Josef Bacik <josef@xxxxxxxxxx> wrote:
> >> >> >> >
> >> >> >> > go from taking around 45 minutes to 10 seconds on my freshly formatted 3 TiB
> >> >> >> > file system. This doesn't seem to break my other enospc tests, but could really
> >> >> >> > use some more testing as this is a super scary change. Thanks,
> >> >> >> >
> >> >> >>
> >> >> >> I've been testing Josef's git.kernel.org testing tree, and I've
> >> >> >> bisected an error down to this commit.
> >> >> >>
> >> >> >> I'm triggering the error using a removedirs benchmark in filebench
> >> >> >> with the following profile:
> >> >> >> load removedirs
> >> >> >> set $dir=/mnt/benchmark/filebench
> >> >> >> set $ndirs=400000
> >> >> >> run
> >> >> >>
> >> >> >
> >> >> > Ok try this one, it will write out more and harder, see if that helps. Thanks,
> >> >> >
> >> >>
> >> >> Still running into BUG at fs/btrfs/inode.c:2176!
> >> >
> >> > How about this one?
> >> >
> >>
> >> Sorry, still getting the same bug.
> >>
> >> [ 175.956273] kernel BUG at fs/btrfs/inode.c:2176!
> >
> > Ok I think I see what's happening, this patch replaces the previous one, let me
> > know how it goes. Thanks,
> >
>
> Getting a slightly different BUG this time:
>
Ok looks like I've fixed the original problem and now we're hitting a problem
with the free space cache. This patch will replace the last one, its all the
fixes up to now and a new set of BUG_ON()'s to figure out which free space cache
inode is screwing us up. Thanks,
Josef
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index fc0de68..e595372 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3334,7 +3334,7 @@ out:
* shrink metadata reservation for delalloc
*/
static int shrink_delalloc(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 to_reclaim, int sync)
+ struct btrfs_root *root, u64 to_reclaim, int retries)
{
struct btrfs_block_rsv *block_rsv;
struct btrfs_space_info *space_info;
@@ -3365,12 +3365,10 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
}
max_reclaim = min(reserved, to_reclaim);
+ if (max_reclaim > (2 * 1024 * 1024))
+ nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
while (loops < 1024) {
- /* have the flusher threads jump in and do some IO */
- smp_mb();
- nr_pages = min_t(unsigned long, nr_pages,
- root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
spin_lock(&space_info->lock);
@@ -3384,14 +3382,22 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
if (reserved == 0 || reclaimed >= max_reclaim)
break;
- if (trans && trans->transaction->blocked)
+ if (trans)
return -EAGAIN;
- time_left = schedule_timeout_interruptible(1);
+ if (!retries) {
+ time_left = schedule_timeout_interruptible(1);
- /* We were interrupted, exit */
- if (time_left)
- break;
+ /* We were interrupted, exit */
+ if (time_left)
+ break;
+ } else {
+ /*
+ * We've already done this song and dance once, let's
+ * really wait for some work to get done.
+ */
+ btrfs_wait_ordered_extents(root, 0, 0);
+ }
/* we've kicked the IO a few times, if anything has been freed,
* exit. There is no sense in looping here for a long time
@@ -3399,15 +3405,13 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
* just too many writers without enough free space
*/
- if (loops > 3) {
+ if (!retries && loops > 3) {
smp_mb();
if (progress != space_info->reservation_progress)
break;
}
}
- if (reclaimed < to_reclaim && !trans)
- btrfs_wait_ordered_extents(root, 0, 0);
return reclaimed >= to_reclaim;
}
@@ -3552,7 +3556,7 @@ again:
* We do synchronous shrinking since we don't actually unreserve
* metadata until after the IO is completed.
*/
- ret = shrink_delalloc(trans, root, num_bytes, 1);
+ ret = shrink_delalloc(trans, root, num_bytes, retries);
if (ret < 0)
goto out;
@@ -3568,17 +3572,6 @@ again:
goto again;
}
- /*
- * Not enough space to be reclaimed, don't bother committing the
- * transaction.
- */
- spin_lock(&space_info->lock);
- if (space_info->bytes_pinned < orig_bytes)
- ret = -ENOSPC;
- spin_unlock(&space_info->lock);
- if (ret)
- goto out;
-
ret = -EAGAIN;
if (trans)
goto out;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d6ba353..cb63904 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -782,7 +782,8 @@ static noinline int cow_file_range(struct inode *inode,
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
int ret = 0;
- BUG_ON(btrfs_is_free_space_inode(root, inode));
+ BUG_ON(root == root->fs_info->tree_root);
+ BUG_ON(BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID);
trans = btrfs_join_transaction(root);
BUG_ON(IS_ERR(trans));
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
@@ -2790,7 +2791,8 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
return ERR_PTR(-ENOMEM);
}
- trans = btrfs_start_transaction(root, 0);
+ /* 1 for the orphan item */
+ trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
btrfs_free_path(path);
root->fs_info->enospc_unlink = 0;
@@ -2901,6 +2903,11 @@ out:
return ERR_PTR(err);
}
+ ret = btrfs_block_rsv_migrate(trans->block_rsv,
+ &root->fs_info->global_block_rsv,
+ btrfs_calc_trans_metadata_size(root, 1));
+ BUG_ON(ret);
+
trans->block_rsv = &root->fs_info->global_block_rsv;
return trans;
}
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html