Re: [PATCH] nodatacow fix

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Chris Mason wrote:
On Tue, 2008-08-05 at 22:15 +0800, Yan Zheng wrote:
Hello,

This patch adapts nodatacow code for the new data ordered code. Ordered
extents are used in all cases. It avoid writepage_start_hook kicking off
nodatacow IO contiguously. This patch also makes btrfs wait for ordered
extents before creating snapshots. It's important for nodatcow IO since
creating snapshots invalidates the results of reference checking.


Thanks Yan!  Can you please change this to make it only wait for
nodatacow ordered extents?

OK, Here is the new patch.

Regards
YZ
---
diff -r b1c27a6f049b ctree.h
--- a/ctree.h	Mon Aug 04 23:23:47 2008 -0400
+++ b/ctree.h	Tue Aug 05 22:12:08 2008 +0800
@@ -1403,7 +1403,8 @@ static inline struct dentry *fdentry(str
}

/* extent-tree.c */
-int btrfs_cross_ref_exists(struct btrfs_root *root,
+int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans,
+			   struct btrfs_root *root,
			   struct btrfs_key *key, u64 bytenr);
int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
			 struct btrfs_root *root);
diff -r b1c27a6f049b extent-tree.c
--- a/extent-tree.c	Mon Aug 04 23:23:47 2008 -0400
+++ b/extent-tree.c	Wed Aug 06 00:07:51 2008 +0800
@@ -893,10 +893,10 @@ out:
	return ret;
}

-int btrfs_cross_ref_exists(struct btrfs_root *root,
+int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans,
+			   struct btrfs_root *root,
			   struct btrfs_key *key, u64 bytenr)
{
-	struct btrfs_trans_handle *trans;
	struct btrfs_root *old_root;
	struct btrfs_path *path = NULL;
	struct extent_buffer *eb;
@@ -908,6 +908,7 @@ int btrfs_cross_ref_exists(struct btrfs_
	int level;
	int ret;

+	BUG_ON(trans == NULL);
	BUG_ON(key->type != BTRFS_EXTENT_DATA_KEY);
	ret = get_reference_status(root, bytenr, 0, key->objectid,
				   &min_generation, &ref_count);
@@ -917,7 +918,6 @@ int btrfs_cross_ref_exists(struct btrfs_
	if (ref_count != 1)
		return 1;

-	trans = btrfs_start_transaction(root, 0);
	old_root = root->dirty_root->root;
	ref_generation = old_root->root_key.offset;

@@ -973,7 +973,6 @@ out:
out:
	if (path)
		btrfs_free_path(path);
-	btrfs_end_transaction(trans, root);
	return ret;
}

@@ -3320,7 +3319,7 @@ again:
	mutex_unlock(&root->fs_info->alloc_mutex);

	btrfs_start_delalloc_inodes(root);
-	btrfs_wait_ordered_extents(tree_root);
+	btrfs_wait_ordered_extents(tree_root, 0);

	mutex_lock(&root->fs_info->alloc_mutex);

@@ -3407,7 +3406,7 @@ next:
		btrfs_clean_old_snapshots(tree_root);

		btrfs_start_delalloc_inodes(root);
-		btrfs_wait_ordered_extents(tree_root);
+		btrfs_wait_ordered_extents(tree_root, 0);

		trans = btrfs_start_transaction(tree_root, 1);
		btrfs_commit_transaction(trans, tree_root);
diff -r b1c27a6f049b inode.c
--- a/inode.c	Mon Aug 04 23:23:47 2008 -0400
+++ b/inode.c	Tue Aug 05 22:12:08 2008 +0800
@@ -166,7 +166,7 @@ static int cow_file_range(struct inode *

		cur_alloc_size = ins.offset;
		ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
-					       ins.offset);
+					       ins.offset, 0);
		BUG_ON(ret);
		if (num_bytes < cur_alloc_size) {
			printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes,
@@ -187,31 +187,32 @@ static int run_delalloc_nocow(struct ino
	u64 extent_start;
	u64 extent_end;
	u64 bytenr;
-	u64 cow_end;
	u64 loops = 0;
	u64 total_fs_bytes;
	struct btrfs_root *root = BTRFS_I(inode)->root;
	struct btrfs_block_group_cache *block_group;
+	struct btrfs_trans_handle *trans;
	struct extent_buffer *leaf;
	int found_type;
	struct btrfs_path *path;
	struct btrfs_file_extent_item *item;
	int ret;
-	int err;
+	int err = 0;
	struct btrfs_key found_key;

	total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
	path = btrfs_alloc_path();
	BUG_ON(!path);
+	trans = btrfs_join_transaction(root, 1);
+	BUG_ON(!trans);
again:
	ret = btrfs_lookup_file_extent(NULL, root, path,
				       inode->i_ino, start, 0);
	if (ret < 0) {
-		btrfs_free_path(path);
-		return ret;
-	}
-
-	cow_end = end;
+		err = ret;
+		goto out;
+	}
+
	if (ret != 0) {
		if (path->slots[0] == 0)
			goto not_found;
@@ -244,12 +245,11 @@ again:
		if (start < extent_start || start >= extent_end)
			goto not_found;

-		cow_end = min(end, extent_end - 1);
		bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
		if (bytenr == 0)
			goto not_found;

-		if (btrfs_cross_ref_exists(root, &found_key, bytenr))
+		if (btrfs_cross_ref_exists(trans, root, &found_key, bytenr))
			goto not_found;
		/*
		 * we may be called by the resizer, make sure we're inside
@@ -260,24 +260,32 @@ again:
		if (!block_group || block_group->ro)
			goto not_found;

+		bytenr += btrfs_file_extent_offset(leaf, item);
+		extent_num_bytes = min(end + 1, extent_end) - start;
+		ret = btrfs_add_ordered_extent(inode, start, bytenr,
+						extent_num_bytes, 1);
+		if (ret) {
+			err = ret;
+			goto out;
+		}
+
+		btrfs_release_path(root, path);
		start = extent_end;
+		if (start <= end) {
+			loops++;
+			goto again;
+		}
	} else {
-		goto not_found;
-	}
-loop:
-	if (start > end) {
+not_found:
+		btrfs_end_transaction(trans, root);
		btrfs_free_path(path);
-		return 0;
-	}
-	btrfs_release_path(root, path);
-	loops++;
-	goto again;
-
-not_found:
-	btrfs_release_path(root, path);
-	cow_file_range(inode, start, end);
-	start = end + 1;
-	goto loop;
+		return cow_file_range(inode, start, end);
+	}
+out:
+	WARN_ON(err);
+	btrfs_end_transaction(trans, root);
+	btrfs_free_path(path);
+	return err;
}

static int run_delalloc_range(struct inode *inode, u64 start, u64 end)
@@ -382,6 +390,11 @@ int btrfs_submit_bio_hook(struct inode *
	BUG_ON(ret);

	if (!(rw & (1 << BIO_RW))) {
+		goto mapit;
+	}
+
+	if (btrfs_test_opt(root, NODATASUM) ||
+	    btrfs_test_flag(inode, NODATASUM)) {
		goto mapit;
	}

@@ -527,6 +540,8 @@ static int btrfs_finish_ordered_io(struc

	ordered_extent = btrfs_lookup_ordered_extent(inode, start);
	BUG_ON(!ordered_extent);
+	if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags))
+		goto nocow;

	lock_extent(io_tree, ordered_extent->file_offset,
		    ordered_extent->file_offset + ordered_extent->len - 1,
@@ -567,6 +582,7 @@ static int btrfs_finish_ordered_io(struc
	unlock_extent(io_tree, ordered_extent->file_offset,
		    ordered_extent->file_offset + ordered_extent->len - 1,
		    GFP_NOFS);
+nocow:
	add_pending_csums(trans, inode, ordered_extent->file_offset,
			  &ordered_extent->list);

diff -r b1c27a6f049b ioctl.c
--- a/ioctl.c	Mon Aug 04 23:23:47 2008 -0400
+++ b/ioctl.c	Tue Aug 05 22:12:08 2008 +0800
@@ -36,6 +36,7 @@
#include <linux/bit_spinlock.h>
#include <linux/version.h>
#include <linux/xattr.h>
+#include <linux/vmalloc.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
diff -r b1c27a6f049b ordered-data.c
--- a/ordered-data.c	Mon Aug 04 23:23:47 2008 -0400
+++ b/ordered-data.c	Wed Aug 06 00:41:00 2008 +0800
@@ -152,7 +152,7 @@ static inline struct rb_node *tree_searc
 * inserted.
 */
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
-			     u64 start, u64 len)
+			     u64 start, u64 len, int nocow)
{
	struct btrfs_ordered_inode_tree *tree;
	struct rb_node *node;
@@ -168,6 +168,8 @@ int btrfs_add_ordered_extent(struct inod
	entry->start = start;
	entry->len = len;
	entry->inode = inode;
+	if (nocow)
+		set_bit(BTRFS_ORDERED_NOCOW, &entry->flags);

	/* one ref for the tree */
	atomic_set(&entry->refs, 1);
@@ -303,10 +305,11 @@ int btrfs_remove_ordered_extent(struct i
	return 0;
}

-int btrfs_wait_ordered_extents(struct btrfs_root *root)
+int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
{
	struct list_head splice;
	struct list_head *cur;
+	struct list_head *tmp;
	struct btrfs_ordered_extent *ordered;
	struct inode *inode;

@@ -314,10 +317,16 @@ int btrfs_wait_ordered_extents(struct bt

	spin_lock(&root->fs_info->ordered_extent_lock);
	list_splice_init(&root->fs_info->ordered_extents, &splice);
-	while(!list_empty(&splice)) {
+	list_for_each_safe(cur, tmp, &splice) {
		cur = splice.next;
		ordered = list_entry(cur, struct btrfs_ordered_extent,
				     root_extent_list);
+		if (nocow_only &&
+		    !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) {
+			cond_resched_lock(&root->fs_info->ordered_extent_lock);
+			continue;
+		}
+
		list_del_init(&ordered->root_extent_list);
		atomic_inc(&ordered->refs);
		inode = ordered->inode;
@@ -338,6 +347,7 @@ int btrfs_wait_ordered_extents(struct bt

		spin_lock(&root->fs_info->ordered_extent_lock);
	}
+	list_splice_init(&splice, &root->fs_info->ordered_extents);
	spin_unlock(&root->fs_info->ordered_extent_lock);
	return 0;
}
diff -r b1c27a6f049b ordered-data.h
--- a/ordered-data.h	Mon Aug 04 23:23:47 2008 -0400
+++ b/ordered-data.h	Wed Aug 06 00:07:08 2008 +0800
@@ -64,6 +64,8 @@ struct btrfs_ordered_sum {

#define BTRFS_ORDERED_COMPLETE 1 /* set when removed from the tree */

+#define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */
+
struct btrfs_ordered_extent {
	/* logical offset in the file */
	u64 file_offset;
@@ -125,7 +127,7 @@ int btrfs_dec_test_ordered_pending(struc
int btrfs_dec_test_ordered_pending(struct inode *inode,
				       u64 file_offset, u64 io_size);
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
-			     u64 start, u64 len);
+			     u64 start, u64 len, int nocow);
int btrfs_add_ordered_sum(struct inode *inode,
			  struct btrfs_ordered_extent *entry,
			  struct btrfs_ordered_sum *sum);
@@ -143,5 +145,5 @@ int btrfs_wait_on_page_writeback_range(s
				       pgoff_t start, pgoff_t end);
int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start,
			   loff_t end, int sync_mode);
-int btrfs_wait_ordered_extents(struct btrfs_root *root);
+int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only);
#endif
diff -r b1c27a6f049b transaction.c
--- a/transaction.c	Mon Aug 04 23:23:47 2008 -0400
+++ b/transaction.c	Wed Aug 06 00:08:20 2008 +0800
@@ -438,6 +438,7 @@ static noinline int add_dirty_roots(stru

				free_extent_buffer(root->commit_root);
				root->commit_root = NULL;
+				root->dirty_root = NULL;

				spin_lock(&root->list_lock);
				list_del_init(&dirty->root->dead_list);
@@ -461,6 +462,7 @@ static noinline int add_dirty_roots(stru
			       sizeof(struct btrfs_disk_key));
			root->root_item.drop_level = 0;
			root->commit_root = NULL;
+			root->dirty_root = NULL;
			root->root_key.offset = root->fs_info->generation;
			btrfs_set_root_bytenr(&root->root_item,
					      root->node->start);
@@ -762,7 +764,11 @@ int btrfs_commit_transaction(struct btrf
	}

	do {
+		int snap_pending = 0;
		joined = cur_trans->num_joined;
+		if (!list_empty(&trans->transaction->pending_snapshots))
+			snap_pending = 1;
+
		WARN_ON(cur_trans != trans->transaction);
		prepare_to_wait(&cur_trans->writer_wait, &wait,
				TASK_UNINTERRUPTIBLE);
@@ -773,6 +779,11 @@ int btrfs_commit_transaction(struct btrf
			timeout = 1;

		mutex_unlock(&root->fs_info->trans_mutex);
+
+		if (snap_pending) {
+			ret = btrfs_wait_ordered_extents(root, 1);
+			BUG_ON(ret);
+		}

		schedule_timeout(timeout);

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Filesystem Development]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux