BTRFS sports a mechanism to provide exclusion when a snapshot is about
to be created. This is implemented via btrfs_start_write_no_snapshotting
et al. Currently the implementation of that mechanism is some perverse
amalgamation of a percpu variable, an explicit waitqueue, an atomic_t
variable and an implicit wait bit on said atomic_t via wait_var_event
family of API. And for good measure there is a memory barrier thrown in
the mix...
Astute reader should have concluded by now that it's bordering on
impossible to prove whether this scheme works. What's worse - all of
this is required to achieve something really simple - ensure certain
operations cannot run during snapshot creation. Let's simplify this by
relying on a single atomic_t used as a boolean flag. This commit changes
only the implementation and not the semantics of the existing mechanism.
Now, if the atomic is 1 (snapshot is in progress) callers of
btrfs_start_write_no_snapshotting will get a ret val of 0 that should be
handled accordingly.
btrfs_wait_for_snapshot_creation OTOH will block until snapshotting is
in progress and return when current snapshot in progress is finished and
will acquire the right to create a snapshot.
Signed-off-by: Nikolay Borisov <nborisov@xxxxxxxx>
---
fs/btrfs/extent-tree.c | 20 +++++---------------
fs/btrfs/ioctl.c | 9 ++-------
2 files changed, 7 insertions(+), 22 deletions(-)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 8f2b7b29c3fd..d9e2e35700fd 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -11333,25 +11333,15 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
*/
void btrfs_end_write_no_snapshotting(struct btrfs_root *root)
{
- percpu_counter_dec(&root->subv_writers->counter);
- cond_wake_up(&root->subv_writers->wait);
+ ASSERT(atomic_read(&root->will_be_snapshotted) == 1);
+ if (atomic_dec_and_test(&root->will_be_snapshotted))
+ wake_up_var(&root->will_be_snapshotted);
}
int btrfs_start_write_no_snapshotting(struct btrfs_root *root)
{
- if (atomic_read(&root->will_be_snapshotted))
- return 0;
-
- percpu_counter_inc(&root->subv_writers->counter);
- /*
- * Make sure counter is updated before we check for snapshot creation.
- */
- smp_mb();
- if (atomic_read(&root->will_be_snapshotted)) {
- btrfs_end_write_no_snapshotting(root);
- return 0;
- }
- return 1;
+ ASSERT(atomic_read(&root->will_be_snapshotted) >= 0);
+ return atomic_add_unless(&root->will_be_snapshotted, 1, 1);
}
void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 8774d4be7c97..f9f66c8a5dad 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -794,11 +794,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
* possible. This is to avoid later writeback (running dealloc) to
* fallback to COW mode and unexpectedly fail with ENOSPC.
*/
- atomic_inc(&root->will_be_snapshotted);
- smp_mb__after_atomic();
- /* wait for no snapshot writes */
- wait_event(root->subv_writers->wait,
- percpu_counter_sum(&root->subv_writers->counter) == 0);
+ btrfs_wait_for_snapshot_creation(root);
ret = btrfs_start_delalloc_snapshot(root);
if (ret)
@@ -878,8 +874,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
dec_and_free:
if (snapshot_force_cow)
atomic_dec(&root->snapshot_force_cow);
- if (atomic_dec_and_test(&root->will_be_snapshotted))
- wake_up_var(&root->will_be_snapshotted);
+ btrfs_end_write_no_snapshotting(root);
free_pending:
kfree(pending_snapshot->root_item);
btrfs_free_path(pending_snapshot->path);
--
2.17.1