[PATCH] btrfs: flushoncommit mount option

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The 'flushoncommit' mount option forces any data dirtied by a write in a
prior transaction to commit as part of the current commit.  This makes
the committed state a fully consistent view of the file system from the
application's perspective (i.e., it includes all completed file system
operations).  This was previously the behavior only when a snapshot is
created.

While we're at it, make sync_fs also commit a consistent view (even
without 'flushoncommit') by moving the start_delalloc and
wait_ordered_extents into commit_transaction.

This is used by Ceph to ensure that completed writes make it to the
platter along with the metadata operations they are bound to (by
BTRFS_IOC_TRANS_{START,END}).

I'm not entirely sure why previously a snapshot creation didn't require
a start_delalloc_inodes but sync_fs did.  I suspect that the call is
either also desirable if snap_pending in commit_transaction, or is not
needed by sync_fs either...?

Let me know if this looks reasonable, or if you would prefer a different
approach.

Thanks-

Signed-off-by: Sage Weil <sage@xxxxxxxxxxxx>
---
 fs/btrfs/ctree.h       |    1 +
 fs/btrfs/disk-io.c     |    6 +++---
 fs/btrfs/extent-tree.c |    6 +++---
 fs/btrfs/file.c        |    4 ++--
 fs/btrfs/inode.c       |    2 +-
 fs/btrfs/ioctl.c       |    8 ++++----
 fs/btrfs/super.c       |   15 ++++++++-------
 fs/btrfs/transaction.c |   12 +++++++++---
 fs/btrfs/transaction.h |    3 ++-
 fs/btrfs/tree-log.c    |    2 +-
 fs/btrfs/volumes.c     |    4 ++--
 11 files changed, 36 insertions(+), 27 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 471fa67..019e7a7 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -951,6 +951,7 @@ struct btrfs_root {
 #define BTRFS_MOUNT_DEGRADED		(1 << 4)
 #define BTRFS_MOUNT_COMPRESS		(1 << 5)
 #define BTRFS_MOUNT_NOTREELOG           (1 << 6)
+#define BTRFS_MOUNT_FLUSHONCOMMIT       (1 << 7)
 
 #define btrfs_clear_opt(o, opt)		((o) &= ~BTRFS_MOUNT_##opt)
 #define btrfs_set_opt(o, opt)		((o) |= BTRFS_MOUNT_##opt)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7feac5a..2d4e7c0 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1443,7 +1443,7 @@ static int transaction_kthread(void *arg)
 		}
 		mutex_unlock(&root->fs_info->trans_mutex);
 		trans = btrfs_start_transaction(root, 1);
-		ret = btrfs_commit_transaction(trans, root);
+		ret = btrfs_commit_transaction(trans, root, 0);
 sleep:
 		wake_up_process(root->fs_info->cleaner_kthread);
 		mutex_unlock(&root->fs_info->transaction_kthread_mutex);
@@ -2192,11 +2192,11 @@ int btrfs_commit_super(struct btrfs_root *root)
 	btrfs_clean_old_snapshots(root);
 	mutex_unlock(&root->fs_info->cleaner_mutex);
 	trans = btrfs_start_transaction(root, 1);
-	ret = btrfs_commit_transaction(trans, root);
+	ret = btrfs_commit_transaction(trans, root, 0);
 	BUG_ON(ret);
 	/* run commit again to drop the original snapshot */
 	trans = btrfs_start_transaction(root, 1);
-	btrfs_commit_transaction(trans, root);
+	btrfs_commit_transaction(trans, root, 0);
 	ret = btrfs_write_and_wait_transaction(NULL, root);
 	BUG_ON(ret);
 
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 3b26f09..b06d857 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5021,7 +5021,7 @@ int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
 	if (found) {
 		trans = btrfs_start_transaction(root, 1);
 		BUG_ON(!trans);
-		ret = btrfs_commit_transaction(trans, root);
+		ret = btrfs_commit_transaction(trans, root, 0);
 		BUG_ON(ret);
 	}
 
@@ -5642,7 +5642,7 @@ again:
 	cur_byte = key.objectid;
 
 	trans = btrfs_start_transaction(info->tree_root, 1);
-	btrfs_commit_transaction(trans, info->tree_root);
+	btrfs_commit_transaction(trans, info->tree_root, 0);
 
 	mutex_lock(&root->fs_info->cleaner_mutex);
 	btrfs_clean_old_snapshots(info->tree_root);
@@ -5728,7 +5728,7 @@ next:
 
 	/* unpin extents in this range */
 	trans = btrfs_start_transaction(info->tree_root, 1);
-	btrfs_commit_transaction(trans, info->tree_root);
+	btrfs_commit_transaction(trans, info->tree_root, 0);
 
 	spin_lock(&block_group->lock);
 	WARN_ON(block_group->pinned > 0);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 3e8023e..158963a 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1160,7 +1160,7 @@ out_nolock:
 				btrfs_sync_log(trans, root);
 				btrfs_end_transaction(trans, root);
 			} else {
-				btrfs_commit_transaction(trans, root);
+				btrfs_commit_transaction(trans, root, 0);
 			}
 		}
 		if (file->f_flags & O_DIRECT) {
@@ -1248,7 +1248,7 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
 	mutex_unlock(&file->f_dentry->d_inode->i_mutex);
 
 	if (ret > 0) {
-		ret = btrfs_commit_transaction(trans, root);
+		ret = btrfs_commit_transaction(trans, root, 0);
 	} else {
 		btrfs_sync_log(trans, root);
 		ret = btrfs_end_transaction(trans, root);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 288c2cd..553278c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3285,7 +3285,7 @@ int btrfs_write_inode(struct inode *inode, int wait)
 	if (wait) {
 		trans = btrfs_join_transaction(root, 1);
 		btrfs_set_trans_block_group(trans, inode);
-		ret = btrfs_commit_transaction(trans, root);
+		ret = btrfs_commit_transaction(trans, root, 0);
 	}
 	return ret;
 }
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 988fdc8..f793814 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -166,7 +166,7 @@ static noinline int create_subvol(struct btrfs_root *root,
 
 	BUG_ON(ret);
 
-	ret = btrfs_commit_transaction(trans, root);
+	ret = btrfs_commit_transaction(trans, root, 0);
 	if (ret)
 		goto fail_commit;
 
@@ -183,7 +183,7 @@ static noinline int create_subvol(struct btrfs_root *root,
 
 fail:
 	nr = trans->blocks_used;
-	err = btrfs_commit_transaction(trans, new_root);
+	err = btrfs_commit_transaction(trans, new_root, 0);
 	if (err && !ret)
 		ret = err;
 fail_commit:
@@ -226,7 +226,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
 	pending_snapshot->root = root;
 	list_add(&pending_snapshot->list,
 		 &trans->transaction->pending_snapshots);
-	err = btrfs_commit_transaction(trans, root);
+	err = btrfs_commit_transaction(trans, root, 0);
 
 fail_unlock:
 	btrfs_btree_balance_dirty(root, nr);
@@ -538,7 +538,7 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
 	if (new_size > old_size) {
 		trans = btrfs_start_transaction(root, 1);
 		ret = btrfs_grow_device(trans, device, new_size);
-		btrfs_commit_transaction(trans, root);
+		btrfs_commit_transaction(trans, root, 0);
 	} else {
 		ret = btrfs_shrink_device(device, new_size);
 	}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index d8c664c..4c9f661 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -67,7 +67,7 @@ enum {
 	Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow,
 	Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier,
 	Opt_ssd, Opt_thread_pool, Opt_noacl,  Opt_compress, Opt_notreelog,
-	Opt_err,
+	Opt_flushoncommit, Opt_err,
 };
 
 static match_table_t tokens = {
@@ -85,6 +85,7 @@ static match_table_t tokens = {
 	{Opt_ssd, "ssd"},
 	{Opt_noacl, "noacl"},
 	{Opt_notreelog, "notreelog"},
+	{Opt_flushoncommit, "flushoncommit"},
 	{Opt_err, NULL},
 };
 
@@ -228,6 +229,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
 			printk(KERN_INFO "btrfs: disabling tree log\n");
 			btrfs_set_opt(info->mount_opt, NOTREELOG);
 			break;
+		case Opt_flushoncommit:
+			printk(KERN_INFO "btrfs: turning on flush-on-commit\n");
+			btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT);
+			break;
 		default:
 			break;
 		}
@@ -369,9 +374,8 @@ fail_close:
 int btrfs_sync_fs(struct super_block *sb, int wait)
 {
 	struct btrfs_trans_handle *trans;
-	struct btrfs_root *root;
+	struct btrfs_root *root = btrfs_sb(sb);
 	int ret;
-	root = btrfs_sb(sb);
 
 	if (sb->s_flags & MS_RDONLY)
 		return 0;
@@ -382,12 +386,9 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
 		return 0;
 	}
 
-	btrfs_start_delalloc_inodes(root);
-	btrfs_wait_ordered_extents(root, 0);
-
 	btrfs_clean_old_snapshots(root);
 	trans = btrfs_start_transaction(root, 1);
-	ret = btrfs_commit_transaction(trans, root);
+	ret = btrfs_commit_transaction(trans, root, 1);
 	sb->s_dirt = 0;
 	return ret;
 }
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 919172d..f687e66 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -881,7 +881,8 @@ static noinline int finish_pending_snapshots(struct btrfs_trans_handle *trans,
 }
 
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
-			     struct btrfs_root *root)
+			     struct btrfs_root *root,
+			     int ordered)
 {
 	unsigned long joined = 0;
 	unsigned long timeout = 1;
@@ -893,6 +894,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 	DEFINE_WAIT(wait);
 	int ret;
 
+	if (btrfs_test_opt(root, FLUSHONCOMMIT))
+		ordered = 1;
+
 	INIT_LIST_HEAD(&dirty_fs_roots);
 	mutex_lock(&root->fs_info->trans_mutex);
 	if (trans->transaction->in_commit) {
@@ -951,8 +955,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 			timeout = 1;
 
 		mutex_unlock(&root->fs_info->trans_mutex);
-
-		if (snap_pending) {
+		
+		if (ordered || snap_pending) {
+			if (ordered)
+				ret = btrfs_start_delalloc_inodes(root);
 			ret = btrfs_wait_ordered_extents(root, 1);
 			BUG_ON(ret);
 		}
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index ea29211..e167b70 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -96,7 +96,8 @@ int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest);
 int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
 int btrfs_clean_old_snapshots(struct btrfs_root *root);
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
-			     struct btrfs_root *root);
+			     struct btrfs_root *root,
+			     int ordered);
 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
 				   struct btrfs_root *root);
 void btrfs_throttle(struct btrfs_root *root);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index ac58991..b01d6c2 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2877,7 +2877,7 @@ again:
 	fs_info->log_root_recovering = 0;
 
 	/* step 4: commit the transaction, which also unpins the blocks */
-	btrfs_commit_transaction(trans, fs_info->tree_root);
+	btrfs_commit_transaction(trans, fs_info->tree_root, 0);
 
 	kfree(log_root_tree);
 	return 0;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index fd0bedb..6cfec73 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -964,7 +964,7 @@ static int btrfs_rm_dev_item(struct btrfs_root *root,
 out:
 	btrfs_free_path(path);
 	unlock_chunks(root);
-	btrfs_commit_transaction(trans, root);
+	btrfs_commit_transaction(trans, root, 0);
 	return ret;
 }
 
@@ -1368,7 +1368,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 	}
 
 	unlock_chunks(root);
-	btrfs_commit_transaction(trans, root);
+	btrfs_commit_transaction(trans, root, 0);
 
 	if (seeding_dev) {
 		mutex_unlock(&uuid_mutex);
-- 
1.5.6.5

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Filesystem Development]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux