[PATCH 1/5] btrfs: extend readahead interface

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This extends the readahead interface with callbacks. The old readahead
behaviour is now moved into a callback that is used by default if no
other callback is given. For a detailed description of the callbacks
see the inline comments in reada.c.
It also fixes some cases where the hook has not been called. This is
not a problem with the default callback, as it just cut some branches
from readahead. With the callback mechanism, we want a guaranteed
delivery.
This patch also makes readaheads hierarchical. A readahead can have
sub-readaheads. The idea is that the content of one tree can trigger
readaheads to other trees.
Also added is a function to cancel all outstanding requests for a
given readahead and all its sub-readas.
As the interface changes slightly, scrub has been edited to reflect
the changes.

Signed-off-by: Arne Jansen <sensille@xxxxxxx>
---
 fs/btrfs/ctree.h |   37 ++++-
 fs/btrfs/reada.c |  481 ++++++++++++++++++++++++++++++++++++++++++------------
 fs/btrfs/scrub.c |   29 ++--
 3 files changed, 420 insertions(+), 127 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 8e4457e..52b8a91 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3020,6 +3020,13 @@ int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
 			 struct btrfs_scrub_progress *progress);
 
 /* reada.c */
+#undef READA_DEBUG
+struct reada_extctl;
+struct reada_control;
+typedef void (*reada_cb_t)(struct btrfs_root *root, struct reada_control *rc,
+			   u64 wanted_generation, struct extent_buffer *eb,
+			   u64 start, int err, struct btrfs_key *top,
+			   void *ctx);
 struct reada_control {
 	struct btrfs_root	*root;		/* tree to prefetch */
 	struct btrfs_key	key_start;
@@ -3027,12 +3034,34 @@ struct reada_control {
 	atomic_t		elems;
 	struct kref		refcnt;
 	wait_queue_head_t	wait;
+	struct reada_control	*parent;
+	reada_cb_t		callback;
+#ifdef READA_DEBUG
+	int			not_first;
+#endif
 };
-struct reada_control *btrfs_reada_add(struct btrfs_root *root,
-			      struct btrfs_key *start, struct btrfs_key *end);
-int btrfs_reada_wait(void *handle);
+struct reada_control *btrfs_reada_alloc(struct reada_control *parent,
+			struct btrfs_root *root,
+			struct btrfs_key *key_start, struct btrfs_key *key_end,
+			reada_cb_t callback);
+int btrfs_reada_add(struct reada_control *parent,
+			struct btrfs_root *root,
+			struct btrfs_key *key_start, struct btrfs_key *key_end,
+			reada_cb_t callback, void *ctx,
+			struct reada_control **rcp);
+int btrfs_reada_wait(struct reada_control *handle);
 void btrfs_reada_detach(void *handle);
 int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
 			 u64 start, int err);
-
+int reada_add_block(struct reada_control *rc, u64 logical,
+		   struct btrfs_key *top, int level, u64 generation, void *ctx);
+void reada_control_elem_get(struct reada_control *rc);
+void reada_control_elem_put(struct reada_control *rc);
+void reada_start_machine(struct btrfs_fs_info *fs_info);
+int btrfs_reada_abort(struct btrfs_fs_info *fs_info, struct reada_control *rc);
+
+/* droptree.c */
+int btrfs_droptree_pause(struct btrfs_fs_info *fs_info);
+void btrfs_droptree_continue(struct btrfs_fs_info *fs_info);
+void droptree_drop_list(struct btrfs_fs_info *fs_info, struct list_head *list);
 #endif
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 2373b39..0d88163 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -27,18 +27,18 @@
 #include "volumes.h"
 #include "disk-io.h"
 #include "transaction.h"
-
-#undef DEBUG
+#include "locking.h"
 
 /*
  * This is the implementation for the generic read ahead framework.
  *
  * To trigger a readahead, btrfs_reada_add must be called. It will start
- * a read ahead for the given range [start, end) on tree root. The returned
+ * a readahead for the given range [start, end) on tree root. The returned
  * handle can either be used to wait on the readahead to finish
  * (btrfs_reada_wait), or to send it to the background (btrfs_reada_detach).
+ * If no return pointer is given, the readahead is started in the background.
  *
- * The read ahead works as follows:
+ * The readahead works as follows:
  * On btrfs_reada_add, the root of the tree is inserted into a radix_tree.
  * reada_start_machine will then search for extents to prefetch and trigger
  * some reads. When a read finishes for a node, all contained node/leaf
@@ -52,6 +52,27 @@
  * Any number of readaheads can be started in parallel. The read order will be
  * determined globally, i.e. 2 parallel readaheads will normally finish faster
  * than the 2 started one after another.
+ *
+ * In addition to the default behaviour, a callback can be passed to reada_add.
+ * This callback will be called for each completed read, in an unspecified
+ * order. This callback can then enqueue further reada requests via
+ * reada_add_block or create sub-readaheads with btrfs_reada_add (detached).
+ * The rules for custom callbacks are:
+ *  - The elem count must never go to zero unless the reada is completed. So
+ *    either enqueue further blocks or create sub-readaheads with itself as
+ *    parent. Each sub-readahead will add one to the parent's element count.
+ *    If you need to defer some work, keep the count from dropping to zero
+ *    by calling reada_control_elem_get(). When finished, return it with
+ *    reada_control_elem_put(). This might also free the rc.
+ *  - The extent buffer passed to the callback will be read locked, spinning.
+ *  - The callback is called in the context of the checksum workers
+ *  - The callback is also called if the read failed. This is signaled via
+ *    the err parameter. In this case the eb might be NULL. Make sure to
+ *    properly update your data structures even in error cases to not leave
+ *    refs anywhere.
+ *
+ * If no callback is given, the default callback is used giving the initially
+ * described behaviour.
  */
 
 #define MAX_MIRRORS 2
@@ -60,6 +81,7 @@
 struct reada_extctl {
 	struct list_head	list;
 	struct reada_control	*rc;
+	void			*ctx;
 	u64			generation;
 };
 
@@ -97,30 +119,87 @@ struct reada_machine_work {
 static void reada_extent_put(struct btrfs_fs_info *, struct reada_extent *);
 static void reada_control_release(struct kref *kref);
 static void reada_zone_release(struct kref *kref);
-static void reada_start_machine(struct btrfs_fs_info *fs_info);
 static void __reada_start_machine(struct btrfs_fs_info *fs_info);
 
-static int reada_add_block(struct reada_control *rc, u64 logical,
-			   struct btrfs_key *top, int level, u64 generation);
+/*
+ * this is the default callback for readahead. It just descends into the
+ * tree within the range given at creation. if an error occurs, just cut
+ * this part of the tree
+ */
+static void readahead_descend(struct btrfs_root *root, struct reada_control *rc,
+			      u64 wanted_generation, struct extent_buffer *eb,
+			      u64 start, int err, struct btrfs_key *top,
+			      void *ctx)
+{
+	int nritems;
+	u64 generation;
+	int level;
+	int i;
+
+	BUG_ON(err == -EAGAIN); /* FIXME: not yet implemented, don't cancel
+				 * readahead with default callback */
+
+	if (err || eb == NULL) {
+		/*
+		 * this is the error case, the extent buffer has not been
+		 * read correctly. We won't access anything from it and
+		 * just cleanup our data structures. Effectively this will
+		 * cut the branch below this node from read ahead.
+		 */
+		return;
+	}
+
+	level = btrfs_header_level(eb);
+	if (level == 0) {
+		/*
+		 * if this is a leaf, ignore the content.
+		 */
+		return;
+	}
+
+	nritems = btrfs_header_nritems(eb);
+	generation = btrfs_header_generation(eb);
+
+	/*
+	 * if the generation doesn't match, just ignore this node.
+	 * This will cut off a branch from prefetch. Alternatively one could
+	 * start a new (sub-) prefetch for this branch, starting again from
+	 * root.
+	 */
+	if (wanted_generation != generation)
+		return;
+
+	for (i = 0; i < nritems; i++) {
+		u64 n_gen;
+		struct btrfs_key key;
+		struct btrfs_key next_key;
+		u64 bytenr;
+
+		btrfs_node_key_to_cpu(eb, &key, i);
+		if (i + 1 < nritems)
+			btrfs_node_key_to_cpu(eb, &next_key, i + 1);
+		else
+			next_key = *top;
+		bytenr = btrfs_node_blockptr(eb, i);
+		n_gen = btrfs_node_ptr_generation(eb, i);
+
+		if (btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 &&
+		    btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0)
+			reada_add_block(rc, bytenr, &next_key,
+					level - 1, n_gen, ctx);
+	}
+}
 
-/* recurses */
 /* in case of err, eb might be NULL */
 static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
 			    u64 start, int err)
 {
-	int level = 0;
-	int nritems;
-	int i;
-	u64 bytenr;
-	u64 generation;
 	struct reada_extent *re;
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct list_head list;
 	unsigned long index = start >> PAGE_CACHE_SHIFT;
 	struct btrfs_device *for_dev;
-
-	if (eb)
-		level = btrfs_header_level(eb);
+	struct reada_extctl *rec;
 
 	/* find extent */
 	spin_lock(&fs_info->reada_lock);
@@ -142,65 +221,21 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
 	re->scheduled_for = NULL;
 	spin_unlock(&re->lock);
 
-	if (err == 0) {
-		nritems = level ? btrfs_header_nritems(eb) : 0;
-		generation = btrfs_header_generation(eb);
-		/*
-		 * FIXME: currently we just set nritems to 0 if this is a leaf,
-		 * effectively ignoring the content. In a next step we could
-		 * trigger more readahead depending from the content, e.g.
-		 * fetch the checksums for the extents in the leaf.
-		 */
-	} else {
+	/*
+	 * call hooks for all registered readaheads
+	 */
+	list_for_each_entry(rec, &list, list) {
+		btrfs_tree_read_lock(eb);
 		/*
-		 * this is the error case, the extent buffer has not been
-		 * read correctly. We won't access anything from it and
-		 * just cleanup our data structures. Effectively this will
-		 * cut the branch below this node from read ahead.
+		 * we set the lock to blocking, as the callback might want to
+		 * sleep on allocations.
 		 */
-		nritems = 0;
-		generation = 0;
+		btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+		rec->rc->callback(root, rec->rc, rec->generation, eb, start,
+				  err, &re->top, rec->ctx);
+		btrfs_tree_read_unlock_blocking(eb);
 	}
 
-	for (i = 0; i < nritems; i++) {
-		struct reada_extctl *rec;
-		u64 n_gen;
-		struct btrfs_key key;
-		struct btrfs_key next_key;
-
-		btrfs_node_key_to_cpu(eb, &key, i);
-		if (i + 1 < nritems)
-			btrfs_node_key_to_cpu(eb, &next_key, i + 1);
-		else
-			next_key = re->top;
-		bytenr = btrfs_node_blockptr(eb, i);
-		n_gen = btrfs_node_ptr_generation(eb, i);
-
-		list_for_each_entry(rec, &list, list) {
-			struct reada_control *rc = rec->rc;
-
-			/*
-			 * if the generation doesn't match, just ignore this
-			 * extctl. This will probably cut off a branch from
-			 * prefetch. Alternatively one could start a new (sub-)
-			 * prefetch for this branch, starting again from root.
-			 * FIXME: move the generation check out of this loop
-			 */
-#ifdef DEBUG
-			if (rec->generation != generation) {
-				printk(KERN_DEBUG "generation mismatch for "
-						"(%llu,%d,%llu) %llu != %llu\n",
-				       key.objectid, key.type, key.offset,
-				       rec->generation, generation);
-			}
-#endif
-			if (rec->generation == generation &&
-			    btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 &&
-			    btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0)
-				reada_add_block(rc, bytenr, &next_key,
-						level - 1, n_gen);
-		}
-	}
 	/*
 	 * free extctl records
 	 */
@@ -213,12 +248,7 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
 		rc = rec->rc;
 		kfree(rec);
 
-		kref_get(&rc->refcnt);
-		if (atomic_dec_and_test(&rc->elems)) {
-			kref_put(&rc->refcnt, reada_control_release);
-			wake_up(&rc->wait);
-		}
-		kref_put(&rc->refcnt, reada_control_release);
+		reada_control_elem_put(rc);
 
 		reada_extent_put(fs_info, re);	/* one ref for each entry */
 	}
@@ -352,7 +382,8 @@ again:
 	blocksize = btrfs_level_size(root, level);
 	re->logical = logical;
 	re->blocksize = blocksize;
-	re->top = *top;
+	if (top)
+		re->top = *top;
 	INIT_LIST_HEAD(&re->extctl);
 	spin_lock_init(&re->lock);
 	kref_init(&re->refcnt);
@@ -503,6 +534,47 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info,
 	kfree(re);
 }
 
+void reada_control_elem_get(struct reada_control *rc)
+{
+#ifndef READA_DEBUG
+	atomic_inc(&rc->elems);
+#else
+	int new = atomic_inc_return(&rc->elems);
+
+	if (rc->not_first && new == 1) {
+		/*
+		 * warn if we try to get an elem although it
+		 * was already down to zero
+		 */
+		WARN_ON(1);
+	}
+	rc->not_first = 1;
+#endif
+}
+
+void reada_control_elem_put(struct reada_control *rc)
+{
+	struct reada_control *next_rc;
+
+	do {
+		next_rc = NULL;
+		kref_get(&rc->refcnt);
+		if (atomic_dec_and_test(&rc->elems)) {
+			/*
+			 * when the last elem is finished, wake all
+			 * waiters. Also, if we have a parent, remove
+			 * our element from there and wake the waiters.
+			 * Walk up the chain of parents as long as
+			 * we finish the last elem. Drop our ref.
+			 */
+			kref_put(&rc->refcnt, reada_control_release);
+			wake_up(&rc->wait);
+			next_rc = rc->parent;
+		}
+		kref_put(&rc->refcnt, reada_control_release);
+	} while ((rc = next_rc));
+}
+
 static void reada_zone_release(struct kref *kref)
 {
 	struct reada_zone *zone = container_of(kref, struct reada_zone, refcnt);
@@ -521,12 +593,87 @@ static void reada_control_release(struct kref *kref)
 	kfree(rc);
 }
 
-static int reada_add_block(struct reada_control *rc, u64 logical,
-			   struct btrfs_key *top, int level, u64 generation)
+/*
+ * context to pass from reada_add_block to worker in case the extent is
+ * already uptodate in memory
+ */
+struct reada_uptodate_ctx {
+	struct btrfs_key	top;
+	struct extent_buffer	*eb;
+	struct reada_control	*rc;
+	u64			logical;
+	u64			generation;
+	void			*ctx;
+	struct btrfs_work	work;
+};
+
+/* worker for immediate processing of uptodate blocks */
+static void reada_add_block_uptodate(struct btrfs_work *work)
+{
+	struct reada_uptodate_ctx *ruc;
+
+	ruc = container_of(work, struct reada_uptodate_ctx, work);
+
+	btrfs_tree_read_lock(ruc->eb);
+	/*
+	 * we set the lock to blocking, as the callback might want to sleep
+	 * on allocations.
+	 */
+	btrfs_set_lock_blocking_rw(ruc->eb, BTRFS_READ_LOCK);
+	ruc->rc->callback(ruc->rc->root, ruc->rc, ruc->generation, ruc->eb,
+			 ruc->logical, 0, &ruc->top, ruc->ctx);
+	btrfs_tree_read_unlock_blocking(ruc->eb);
+
+	reada_control_elem_put(ruc->rc);
+	free_extent_buffer(ruc->eb);
+	kfree(ruc);
+}
+
+int reada_add_block(struct reada_control *rc, u64 logical,
+		    struct btrfs_key *top, int level, u64 generation,
+		    void *ctx)
 {
 	struct btrfs_root *root = rc->root;
 	struct reada_extent *re;
 	struct reada_extctl *rec;
+	struct extent_buffer *eb;
+	struct inode *btree_inode;
+
+	/*
+	 * first check if the buffer is already uptodate in memory. In this
+	 * case it wouldn't make much sense to go through the reada dance.
+	 * Instead process it as soon as possible, but in worker context to
+	 * prevent recursion.
+	 */
+	eb = btrfs_find_tree_block(root, logical,
+				   btrfs_level_size(root, level));
+	btree_inode = eb->first_page->mapping->host;
+
+	if (eb && btrfs_buffer_uptodate(eb, generation)) {
+		struct reada_uptodate_ctx *ruc;
+
+		ruc = kzalloc(sizeof(*ruc), GFP_NOFS);
+		if (!ruc) {
+			free_extent_buffer(eb);
+			return -1;
+		}
+		ruc->rc = rc;
+		ruc->ctx = ctx;
+		ruc->generation = generation;
+		ruc->logical = logical;
+		ruc->eb = eb;
+		if (top)
+			ruc->top = *top;
+		ruc->work.func = reada_add_block_uptodate;
+		reada_control_elem_get(rc);
+
+		btrfs_queue_worker(&root->fs_info->readahead_workers,
+				   &ruc->work);
+
+		return 0;
+	}
+	if (eb)
+		free_extent_buffer(eb);
 
 	re = reada_find_extent(root, logical, top, level); /* takes one ref */
 	if (!re)
@@ -539,14 +686,17 @@ static int reada_add_block(struct reada_control *rc, u64 logical,
 	}
 
 	rec->rc = rc;
+	rec->ctx = ctx;
 	rec->generation = generation;
-	atomic_inc(&rc->elems);
+	reada_control_elem_get(rc);
 
 	spin_lock(&re->lock);
 	list_add_tail(&rec->list, &re->extctl);
 	spin_unlock(&re->lock);
 
-	/* leave the ref on the extent */
+	reada_start_machine(root->fs_info);
+
+	/* leave the ref on re */
 
 	return 0;
 }
@@ -750,10 +900,14 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info)
 		reada_start_machine(fs_info);
 }
 
-static void reada_start_machine(struct btrfs_fs_info *fs_info)
+void reada_start_machine(struct btrfs_fs_info *fs_info)
 {
 	struct reada_machine_work *rmw;
 
+	/*
+	 * FIXME if there are still requests in flight, we don't need to
+	 * kick a worker. Add a check to prevent unnecessary work
+	 */
 	rmw = kzalloc(sizeof(*rmw), GFP_NOFS);
 	if (!rmw) {
 		/* FIXME we cannot handle this properly right now */
@@ -765,7 +919,7 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info)
 	btrfs_queue_worker(&fs_info->readahead_workers, &rmw->work);
 }
 
-#ifdef DEBUG
+#ifdef READA_DEBUG
 static void dump_devs(struct btrfs_fs_info *fs_info, int all)
 {
 	struct btrfs_device *device;
@@ -870,15 +1024,49 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
 #endif
 
 /*
- * interface
+ * if parent is given, the caller has to hold a ref on parent
  */
-struct reada_control *btrfs_reada_add(struct btrfs_root *root,
-			struct btrfs_key *key_start, struct btrfs_key *key_end)
+struct reada_control *btrfs_reada_alloc(struct reada_control *parent,
+			struct btrfs_root *root,
+			struct btrfs_key *key_start, struct btrfs_key *key_end,
+			reada_cb_t callback)
+{
+	struct reada_control *rc;
+
+	rc = kzalloc(sizeof(*rc), GFP_NOFS);
+	if (!rc)
+		return ERR_PTR(-ENOMEM);
+
+	rc->root = root;
+	rc->parent = parent;
+	rc->callback = callback ? callback : readahead_descend;
+	if (key_start)
+		rc->key_start = *key_start;
+	if (key_end)
+		rc->key_end = *key_end;
+	atomic_set(&rc->elems, 0);
+	init_waitqueue_head(&rc->wait);
+	kref_init(&rc->refcnt);
+	if (parent) {
+		/*
+		 * we just add one element to the parent as long as we're
+		 * not finished
+		 */
+		reada_control_elem_get(parent);
+	}
+
+	return rc;
+}
+
+int btrfs_reada_add(struct reada_control *parent, struct btrfs_root *root,
+		    struct btrfs_key *key_start, struct btrfs_key *key_end,
+		    reada_cb_t callback, void *ctx, struct reada_control **rcp)
 {
 	struct reada_control *rc;
 	u64 start;
 	u64 generation;
 	int level;
+	int ret;
 	struct extent_buffer *node;
 	static struct btrfs_key max_key = {
 		.objectid = (u64)-1,
@@ -886,17 +1074,18 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
 		.offset = (u64)-1
 	};
 
-	rc = kzalloc(sizeof(*rc), GFP_NOFS);
+	rc = btrfs_reada_alloc(parent, root, key_start, key_end, callback);
 	if (!rc)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 
-	rc->root = root;
-	rc->key_start = *key_start;
-	rc->key_end = *key_end;
-	atomic_set(&rc->elems, 0);
-	init_waitqueue_head(&rc->wait);
-	kref_init(&rc->refcnt);
-	kref_get(&rc->refcnt); /* one ref for having elements */
+	if (rcp) {
+		*rcp = rc;
+		/*
+		 * as we return the rc, get an addition ref on it for
+		 * the caller
+		 */
+		kref_get(&rc->refcnt);
+	}
 
 	node = btrfs_root_node(root);
 	start = node->start;
@@ -904,35 +1093,36 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
 	generation = btrfs_header_generation(node);
 	free_extent_buffer(node);
 
-	reada_add_block(rc, start, &max_key, level, generation);
+	ret = reada_add_block(rc, start, &max_key, level, generation, ctx);
 
 	reada_start_machine(root->fs_info);
 
-	return rc;
+	return ret;
 }
 
-#ifdef DEBUG
-int btrfs_reada_wait(void *handle)
+#ifdef READA_DEBUG
+int btrfs_reada_wait(struct reada_control *rc)
 {
-	struct reada_control *rc = handle;
+	struct btrfs_fs_info *fs_info = rc->root->fs_info;
+	int i;
 
 	while (atomic_read(&rc->elems)) {
 		wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0,
-				   5 * HZ);
-		dump_devs(rc->root->fs_info, rc->elems < 10 ? 1 : 0);
+				   1 * HZ);
+		dump_devs(fs_info, atomic_read(&rc->elems) < 10 ? 1 : 0);
+		printk(KERN_DEBUG "reada_wait on %p: %d elems\n", rc,
+			atomic_read(&rc->elems));
 	}
 
-	dump_devs(rc->root->fs_info, rc->elems < 10 ? 1 : 0);
+	dump_devs(fs_info, atomic_read(&rc->elems) < 10 ? 1 : 0);
 
 	kref_put(&rc->refcnt, reada_control_release);
 
 	return 0;
 }
 #else
-int btrfs_reada_wait(void *handle)
+int btrfs_reada_wait(struct reada_control *rc)
 {
-	struct reada_control *rc = handle;
-
 	while (atomic_read(&rc->elems)) {
 		wait_event(rc->wait, atomic_read(&rc->elems) == 0);
 	}
@@ -949,3 +1139,80 @@ void btrfs_reada_detach(void *handle)
 
 	kref_put(&rc->refcnt, reada_control_release);
 }
+
+/*
+ * abort all readahead for a specific reada_control
+ * this function does not wait for outstanding requests to finish, so
+ * when it returns, the abort is not fully complete. This function will
+ * cancel all currently enqueued readaheads for the given rc and all children
+ * of it.
+ */
+int btrfs_reada_abort(struct btrfs_fs_info *fs_info, struct reada_control *rc)
+{
+	struct reada_extent *re = NULL;
+	struct list_head list;
+	int ret;
+	u64 logical = 0;
+	struct reada_extctl *rec;
+	struct reada_extctl *tmp;
+
+	INIT_LIST_HEAD(&list);
+
+	while (1) {
+		spin_lock(&fs_info->reada_lock);
+		ret = radix_tree_gang_lookup(&fs_info->reada_tree, (void **)&re,
+					     logical >> PAGE_CACHE_SHIFT, 1);
+		if (ret == 1)
+			kref_get(&re->refcnt);
+		spin_unlock(&fs_info->reada_lock);
+
+		if (ret != 1)
+			break;
+
+		/*
+		 * take out all extctls that should get deleted into another
+		 * list
+		 */
+		spin_lock(&re->lock);
+		if (re->scheduled_for) {
+			spin_unlock(&re->lock);
+			goto next;
+		}
+
+		list_for_each_entry_safe(rec, tmp, &re->extctl, list) {
+			struct reada_control *it;
+
+			for (it = rec->rc; it; it = it->parent) {
+				if (it == rc) {
+					list_move(&rec->list, &list);
+					break;
+				}
+			}
+		}
+		spin_unlock(&re->lock);
+
+		/*
+		 * now cancel all extctls in the list
+		 */
+		while (!list_empty(&list)) {
+			struct reada_control *tmp_rc;
+
+			rec = list_first_entry(&list, struct reada_extctl,
+					       list);
+			rec->rc->callback(rec->rc->root, rec->rc, 0, NULL,
+					  re->logical,
+					  -EAGAIN, &re->top, rec->ctx);
+			list_del(&rec->list);
+			tmp_rc = rec->rc;
+			kfree(rec);
+
+			reada_control_elem_put(tmp_rc);
+			reada_extent_put(fs_info, re);
+		}
+next:
+		logical = re->logical + PAGE_CACHE_SIZE;
+		reada_extent_put(fs_info, re);	/* our ref */
+	}
+
+	return 1;
+}
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index abc0fbf..80140a8 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1136,7 +1136,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
 	u64 generation;
 	int mirror_num;
 	struct reada_control *reada1;
-	struct reada_control *reada2;
 	struct btrfs_key key_start;
 	struct btrfs_key key_end;
 
@@ -1189,23 +1188,21 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
 	key_start.objectid = logical;
 	key_start.type = BTRFS_EXTENT_ITEM_KEY;
 	key_start.offset = (u64)0;
+	key_end = key_start;
 	key_end.objectid = base + offset + nstripes * increment;
-	key_end.type = BTRFS_EXTENT_ITEM_KEY;
-	key_end.offset = (u64)0;
-	reada1 = btrfs_reada_add(root, &key_start, &key_end);
-
-	key_start.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
-	key_start.type = BTRFS_EXTENT_CSUM_KEY;
-	key_start.offset = logical;
-	key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
-	key_end.type = BTRFS_EXTENT_CSUM_KEY;
-	key_end.offset = base + offset + nstripes * increment;
-	reada2 = btrfs_reada_add(csum_root, &key_start, &key_end);
-
-	if (!IS_ERR(reada1))
+	ret = btrfs_reada_add(NULL, root, &key_start, &key_end,
+			      NULL, NULL, &reada1);
+	/* if readahead fails, we just go ahead without it */
+	if (ret == 0) {
+		key_start.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
+		key_start.type = BTRFS_EXTENT_CSUM_KEY;
+		key_start.offset = logical;
+		key_end = key_start;
+		key_end.offset = base + offset + nstripes * increment;
+		ret = btrfs_reada_add(reada1, csum_root, &key_start,
+				      &key_end, NULL, NULL, NULL);
 		btrfs_reada_wait(reada1);
-	if (!IS_ERR(reada2))
-		btrfs_reada_wait(reada2);
+	}
 
 	mutex_lock(&fs_info->scrub_lock);
 	while (atomic_read(&fs_info->scrub_pause_req)) {
-- 
1.7.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Filesystem Development]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux