[PATCH] btrfs: Make the critical section of will_be_snapshotted and snapshot_force_cow smaller

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Btrfs uses btrfs_root::will_be_snapshotted and
btrfs_root::snapshot_force_cow to control data space reservation and
delalloc for NODATACOW/COW switch.

To be more specific:

                   /------------------------ inc(will_be_snapshotted)
                   |                     /-- inc(snapshot_force_cow)
-------------------|---------------------|---------------
reserve: NOCOW     | reserve: COW        | reserve: COW
delalloc: NOCOW    | delalloc: NOCOW     | delalloc: COW

They combined together ensure btrfs data NOCOW cooperate with
snapshot creation, and no data reservation underflow.
- No data written into shared extents
  This is ensured by snapshot_force_cow, as with it set, all
  NOCOW delalloc range will be forced back to COW.

- No data reserved space underflow
  This is ensured by will_be_snapshot and snapshot_force_cow sequence.

Normally, we use the following call sequence:
  atomic_inc(&root->will_be_snapshotted)
  smp_mb__after_atomic();
  wait_event(root->subv_writers->wait,
  	     percpu_counter_sum(&root->subv_writers->coutner) == 0;
  btrfs_start_delalloc_snapshot(root);
  atomic_inc(&root->snapshot_force_cow);

And such calls happens in create_snapshot().

However since commit 09e804d771f ("Btrfs: fix file corruption after
snapshotting due to mix of buffered/DIO writes"), we also call
btrfs_start_delalloc_snapstho() in btrfs_start_delalloc_flush().

This means we can make the critical section smaller, instead of doing
above call sequence in the ioctl context, we reuse the
btrfs_start_delalloc_snapshot() in btrfs_start_delalloc_flush() in the
transaction commit context.

So old call critical section will be changed from:

/-------- snapshot ioctl
|    /- inc will_be/force_cow and call start_delalloc_snapshot()
|    |     /- btrfs_start_transaction()
|    |     |                        /- btrfs_commit_transaction()
|    |     |                        |  /- dec will_be/force_cow
|----|/////|///////////////////////////|-----| <- ioctl return

To new one:

/-------- snapshot ioctl
|    /- inc will_be/force_cow and call start_delalloc_snapshot()
|    |     /- btrfs_start_transaction()
|    |     |                        /- btrfs_commit_transaction()
|    |     |                        |  /- dec will_be/force_cow
|----|-----|--|//////////////////|--|--|-----| <- ioctl return
              |                  |
              |                  \- create_pending_snapshot() return
              \- btrfs_start_delalloc_flush()

Signed-off-by: Qu Wenruo <wqu@xxxxxxxx>
---
 fs/btrfs/ioctl.c       | 31 +-------------------------
 fs/btrfs/transaction.c | 50 ++++++++++++++++++++++++++++++++++++------
 2 files changed, 44 insertions(+), 37 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 8c9a908d3acc..d41e8b6aa36f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -766,7 +766,6 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
 	struct btrfs_pending_snapshot *pending_snapshot;
 	struct btrfs_trans_handle *trans;
 	int ret;
-	bool snapshot_force_cow = false;
 
 	if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
 		return -EINVAL;
@@ -789,29 +788,6 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
 		goto free_pending;
 	}
 
-	/*
-	 * Force new buffered writes to reserve space even when NOCOW is
-	 * possible. This is to avoid later writeback (running dealloc) to
-	 * fallback to COW mode and unexpectedly fail with ENOSPC.
-	 */
-	atomic_inc(&root->will_be_snapshotted);
-	smp_mb__after_atomic();
-	/* wait for no snapshot writes */
-	wait_event(root->subv_writers->wait,
-		   percpu_counter_sum(&root->subv_writers->counter) == 0);
-
-	ret = btrfs_start_delalloc_snapshot(root);
-	if (ret)
-		goto dec_and_free;
-
-	/*
-	 * All previous writes have started writeback in NOCOW mode, so now
-	 * we force future writes to fallback to COW mode during snapshot
-	 * creation.
-	 */
-	atomic_inc(&root->snapshot_force_cow);
-	snapshot_force_cow = true;
-
 	btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
 
 	btrfs_init_block_rsv(&pending_snapshot->block_rsv,
@@ -828,7 +804,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
 					&pending_snapshot->block_rsv, 8,
 					false);
 	if (ret)
-		goto dec_and_free;
+		goto free_pending;
 
 	pending_snapshot->dentry = dentry;
 	pending_snapshot->root = root;
@@ -875,11 +851,6 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
 	ret = 0;
 fail:
 	btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv);
-dec_and_free:
-	if (snapshot_force_cow)
-		atomic_dec(&root->snapshot_force_cow);
-	if (atomic_dec_and_test(&root->will_be_snapshotted))
-		wake_up_var(&root->will_be_snapshotted);
 free_pending:
 	kfree(pending_snapshot->root_item);
 	btrfs_free_path(pending_snapshot->path);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 3f6811cdf803..67fb520fed9b 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1605,6 +1605,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 	}
 
 fail:
+	if (atomic_dec_and_test(&root->will_be_snapshotted))
+		wake_up_var(&root->will_be_snapshotted);
+	atomic_dec(&root->snapshot_force_cow);
 	pending->error = ret;
 dir_item_existed:
 	trans->block_rsv = rsv;
@@ -1852,6 +1855,10 @@ static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans)
 static inline int btrfs_start_delalloc_flush(struct btrfs_trans_handle *trans)
 {
 	struct btrfs_fs_info *fs_info = trans->fs_info;
+	struct btrfs_pending_snapshot *last_fail;
+	struct btrfs_pending_snapshot *pending;
+	struct list_head *head = &trans->transaction->pending_snapshots;
+	int ret = 0;
 
 	/*
 	 * We use writeback_inodes_sb here because if we used
@@ -1865,8 +1872,6 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_trans_handle *trans)
 	if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) {
 		writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC);
 	} else {
-		struct btrfs_pending_snapshot *pending;
-		struct list_head *head = &trans->transaction->pending_snapshots;
 
 		/*
 		 * Flush dellaloc for any root that is going to be snapshotted.
@@ -1876,14 +1881,45 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_trans_handle *trans)
 		 * the inode's size on disk.
 		 */
 		list_for_each_entry(pending, head, list) {
-			int ret;
-
-			ret = btrfs_start_delalloc_snapshot(pending->root);
-			if (ret)
-				return ret;
+			struct btrfs_root *root = pending->root;
+
+			/*
+			 * Force new buffered writes to reserve space even
+			 * when NOCOW is possible. This is to avoid later
+			 * writeback (running dealloc) to fallback to COW mode
+			 * and unexpectedly fail with ENOSPC.
+			 */
+			atomic_inc(&root->will_be_snapshotted);
+			smp_mb__after_atomic();
+			/* wait for no snapshot writes */
+			wait_event(root->subv_writers->wait,
+				   percpu_counter_sum(&root->subv_writers->counter) == 0);
+			ret = btrfs_start_delalloc_snapshot(root);
+			if (ret) {
+				atomic_dec(&root->will_be_snapshotted);
+				last_fail = pending;
+				goto cleanup;
+			}
+			/*
+			 * All previous writes have started writeback in NOCOW
+			 * mode, so now we force future writes to fallback
+			 * to COW mode during snapshot creation.
+			 */
+			atomic_inc(&root->snapshot_force_cow);
 		}
 	}
 	return 0;
+cleanup:
+	list_for_each_entry(pending, head, list) {
+		struct btrfs_root *root = pending->root;
+
+		if (pending == last_fail)
+			break;
+		if (atomic_dec_and_test(&root->will_be_snapshotted))
+			wake_up_var(&root->will_be_snapshotted);
+		atomic_dec(&root->snapshot_force_cow);
+	}
+	return ret;
 }
 
 static inline void btrfs_wait_delalloc_flush(struct btrfs_trans_handle *trans)
-- 
2.21.0




[Index of Archives]     [Linux Filesystem Development]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux