Re: [PATCH] Btrfs: allow us to overcommit our enospc reservations TEST THIS PLEASE!!!

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Oct 11, 2011 at 03:45:45PM -0500, Mitch Harder wrote:
> On Tue, Oct 11, 2011 at 3:01 PM, Josef Bacik <josef@xxxxxxxxxx> wrote:
> > On Tue, Oct 11, 2011 at 02:44:09PM -0500, Mitch Harder wrote:
> >> On Tue, Oct 11, 2011 at 2:00 PM, Josef Bacik <josef@xxxxxxxxxx> wrote:
> >> > On Tue, Oct 11, 2011 at 12:33:48PM -0500, Mitch Harder wrote:
> >> >> On Mon, Sep 26, 2011 at 4:22 PM, Josef Bacik <josef@xxxxxxxxxx> wrote:
> >> >> >
> >> >> > go from taking around 45 minutes to 10 seconds on my freshly formatted 3 TiB
> >> >> > file system.  This doesn't seem to break my other enospc tests, but could really
> >> >> > use some more testing as this is a super scary change.  Thanks,
> >> >> >
> >> >>
> >> >> I've been testing Josef's git.kernel.org testing tree, and I've
> >> >> bisected an error down to this commit.
> >> >>
> >> >> I'm triggering the error using a removedirs benchmark in filebench
> >> >> with the following profile:
> >> >> load removedirs
> >> >> set $dir=/mnt/benchmark/filebench
> >> >> set $ndirs=400000
> >> >> run
> >> >>
> >> >
> >> > Ok try this one, it will write out more and harder, see if that helps.  Thanks,
> >> >
> >>
> >> Still running into BUG at fs/btrfs/inode.c:2176!
> >
> > How about this one?
> >
> 
> Sorry, still getting the same bug.
> 
> [  175.956273] kernel BUG at fs/btrfs/inode.c:2176!

Ok I think I see what's happening, this patch replaces the previous one, let me
know how it goes.  Thanks,

Josef

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index fc0de68..e595372 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3334,7 +3334,7 @@ out:
  * shrink metadata reservation for delalloc
  */
 static int shrink_delalloc(struct btrfs_trans_handle *trans,
-			   struct btrfs_root *root, u64 to_reclaim, int sync)
+			   struct btrfs_root *root, u64 to_reclaim, int retries)
 {
 	struct btrfs_block_rsv *block_rsv;
 	struct btrfs_space_info *space_info;
@@ -3365,12 +3365,10 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
 	}
 
 	max_reclaim = min(reserved, to_reclaim);
+	if (max_reclaim > (2 * 1024 * 1024))
+		nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
 
 	while (loops < 1024) {
-		/* have the flusher threads jump in and do some IO */
-		smp_mb();
-		nr_pages = min_t(unsigned long, nr_pages,
-		       root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
 		writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
 
 		spin_lock(&space_info->lock);
@@ -3384,14 +3382,22 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
 		if (reserved == 0 || reclaimed >= max_reclaim)
 			break;
 
-		if (trans && trans->transaction->blocked)
+		if (trans)
 			return -EAGAIN;
 
-		time_left = schedule_timeout_interruptible(1);
+		if (!retries) {
+			time_left = schedule_timeout_interruptible(1);
 
-		/* We were interrupted, exit */
-		if (time_left)
-			break;
+			/* We were interrupted, exit */
+			if (time_left)
+				break;
+		} else {
+			/*
+			 * We've already done this song and dance once, let's
+			 * really wait for some work to get done.
+			 */
+			btrfs_wait_ordered_extents(root, 0, 0);
+		}
 
 		/* we've kicked the IO a few times, if anything has been freed,
 		 * exit.  There is no sense in looping here for a long time
@@ -3399,15 +3405,13 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
 		 * just too many writers without enough free space
 		 */
 
-		if (loops > 3) {
+		if (!retries && loops > 3) {
 			smp_mb();
 			if (progress != space_info->reservation_progress)
 				break;
 		}
 
 	}
-	if (reclaimed < to_reclaim && !trans)
-		btrfs_wait_ordered_extents(root, 0, 0);
 	return reclaimed >= to_reclaim;
 }
 
@@ -3552,7 +3556,7 @@ again:
 	 * We do synchronous shrinking since we don't actually unreserve
 	 * metadata until after the IO is completed.
 	 */
-	ret = shrink_delalloc(trans, root, num_bytes, 1);
+	ret = shrink_delalloc(trans, root, num_bytes, retries);
 	if (ret < 0)
 		goto out;
 
@@ -3568,17 +3572,6 @@ again:
 		goto again;
 	}
 
-	/*
-	 * Not enough space to be reclaimed, don't bother committing the
-	 * transaction.
-	 */
-	spin_lock(&space_info->lock);
-	if (space_info->bytes_pinned < orig_bytes)
-		ret = -ENOSPC;
-	spin_unlock(&space_info->lock);
-	if (ret)
-		goto out;
-
 	ret = -EAGAIN;
 	if (trans)
 		goto out;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1153731..1785307 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2758,7 +2758,16 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
 	u64 ino = btrfs_ino(inode);
 	u64 dir_ino = btrfs_ino(dir);
 
-	trans = btrfs_start_transaction(root, 10);
+	/*
+	 * 1 for the possible orphan item
+	 * 1 for the dir item
+	 * 1 for the dir index
+	 * 1 for the inode ref
+	 * 1 for the inode ref in the tree log
+	 * 2 for the dir entries in the log
+	 * 1 for the inode
+	 */
+	trans = btrfs_start_transaction(root, 8);
 	if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
 		return trans;
 
@@ -2781,7 +2790,8 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
 		return ERR_PTR(-ENOMEM);
 	}
 
-	trans = btrfs_start_transaction(root, 0);
+	/* 1 for the orphan item */
+	trans = btrfs_start_transaction(root, 1);
 	if (IS_ERR(trans)) {
 		btrfs_free_path(path);
 		root->fs_info->enospc_unlink = 0;
@@ -2892,6 +2902,11 @@ out:
 		return ERR_PTR(err);
 	}
 
+	ret = btrfs_block_rsv_migrate(trans->block_rsv,
+				      &root->fs_info->global_block_rsv,
+				      btrfs_calc_trans_metadata_size(root, 1));
+	BUG_ON(ret);
+
 	trans->block_rsv = &root->fs_info->global_block_rsv;
 	return trans;
 }
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Filesystem Development]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux