The extent_state structure is used at the core of the extent i/o code
for managing flags, locking, etc. It requires allocations deep in the
write code and if failures occur they are difficult to recover from.
We avoid most of the failures by using a mempool, which can sleep when
required, to honor the allocations. This allows future patches to convert
most of the {set,clear,convert}_extent_bit and derivatives to return
void.
Signed-off-by: Jeff Mahoney <jeffm@xxxxxxxx>
---
fs/btrfs/extent_io.c | 71 ++++++++++++++++++++++++++++++++++++---------------
1 file changed, 51 insertions(+), 20 deletions(-)
Index: source/fs/btrfs/extent_io.c
===================================================================
--- source.orig/fs/btrfs/extent_io.c 2011-11-21 14:13:55.000000000 -0500
+++ source/fs/btrfs/extent_io.c 2011-11-21 14:38:23.000000000 -0500
@@ -12,6 +12,7 @@
#include <linux/pagevec.h>
#include <linux/prefetch.h>
#include <linux/cleancache.h>
+#include <linux/mempool.h>
#include "extent_io.h"
#include "extent_map.h"
#include "compat.h"
@@ -21,6 +22,8 @@
static struct kmem_cache *extent_state_cache;
static struct kmem_cache *extent_buffer_cache;
+static mempool_t *extent_state_pool;
+#define EXTENT_STATE_POOL_SIZE (64*1024)
static LIST_HEAD(buffers);
static LIST_HEAD(states);
@@ -61,18 +64,28 @@ tree_fs_info(struct extent_io_tree *tree
int __init extent_io_init(void)
{
extent_state_cache = kmem_cache_create("extent_state",
- sizeof(struct extent_state), 0,
- SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+ sizeof(struct extent_state), 0,
+ SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+ NULL);
if (!extent_state_cache)
return -ENOMEM;
+ extent_state_pool = mempool_create_slab_pool(
+ EXTENT_STATE_POOL_SIZE /
+ sizeof(struct extent_state),
+ extent_state_cache);
+ if (!extent_state_pool)
+ goto free_state_cache;
+
extent_buffer_cache = kmem_cache_create("extent_buffers",
sizeof(struct extent_buffer), 0,
SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!extent_buffer_cache)
- goto free_state_cache;
+ goto free_state_mempool;
return 0;
+free_state_mempool:
+ mempool_destroy(extent_state_pool);
free_state_cache:
kmem_cache_destroy(extent_state_cache);
return -ENOMEM;
@@ -103,6 +116,8 @@ void extent_io_exit(void)
list_del(&eb->leak_list);
kmem_cache_free(extent_buffer_cache, eb);
}
+ if (extent_state_pool)
+ mempool_destroy(extent_state_pool);
if (extent_state_cache)
kmem_cache_destroy(extent_state_cache);
if (extent_buffer_cache)
@@ -128,7 +143,7 @@ static struct extent_state *alloc_extent
unsigned long flags;
#endif
- state = kmem_cache_alloc(extent_state_cache, mask);
+ state = mempool_alloc(extent_state_pool, mask);
if (!state)
return state;
state->state = 0;
@@ -145,6 +160,12 @@ static struct extent_state *alloc_extent
return state;
}
+static struct extent_state *alloc_extent_state_nofail(gfp_t mask)
+{
+ BUG_ON(!(mask & __GFP_WAIT));
+ return alloc_extent_state(mask);
+}
+
void free_extent_state(struct extent_state *state)
{
if (!state)
@@ -160,7 +181,7 @@ void free_extent_state(struct extent_sta
spin_unlock_irqrestore(&leak_lock, flags);
#endif
trace_free_extent_state(state, _RET_IP_);
- kmem_cache_free(extent_state_cache, state);
+ mempool_free(state, extent_state_pool);
}
}
@@ -437,6 +458,12 @@ static int clear_state_bit(struct extent
return ret;
}
+static void
+assert_atomic_alloc(struct extent_state *prealloc, gfp_t mask)
+{
+ WARN_ON(!prealloc && (mask & __GFP_WAIT));
+}
+
static struct extent_state *
alloc_extent_state_atomic(struct extent_state *prealloc)
{
@@ -464,6 +491,7 @@ NORET_TYPE void extent_io_tree_panic(str
* the range [start, end] is inclusive.
*
* This takes the tree lock, and returns 0 on success and < 0 on error.
+ * If (mask & __GFP_WAIT) == 0, there are no error conditions.
*/
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
int bits, int wake, int delete,
@@ -486,11 +514,8 @@ int clear_extent_bit(struct extent_io_tr
if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
clear = 1;
again:
- if (!prealloc && (mask & __GFP_WAIT)) {
- prealloc = alloc_extent_state(mask);
- if (!prealloc)
- return -ENOMEM;
- }
+ if (!prealloc && (mask & __GFP_WAIT))
+ prealloc = alloc_extent_state_nofail(mask);
spin_lock(&tree->lock);
if (cached_state) {
@@ -542,6 +567,7 @@ hit_next:
*/
if (state->start < start) {
+ assert_atomic_alloc(prealloc, mask);
prealloc = alloc_extent_state_atomic(prealloc);
BUG_ON(!prealloc);
err = split_state(tree, state, prealloc, start);
@@ -566,6 +592,7 @@ hit_next:
* on the first half
*/
if (state->start <= end && state->end > end) {
+ assert_atomic_alloc(prealloc, mask);
prealloc = alloc_extent_state_atomic(prealloc);
BUG_ON(!prealloc);
err = split_state(tree, state, prealloc, end + 1);
@@ -726,15 +753,14 @@ int set_extent_bit(struct extent_io_tree
struct extent_state *prealloc = NULL;
struct rb_node *node;
int err = 0;
+ int wait = mask & __GFP_WAIT;
u64 last_start;
u64 last_end;
bits |= EXTENT_FIRST_DELALLOC;
again:
- if (!prealloc && (mask & __GFP_WAIT)) {
- prealloc = alloc_extent_state(mask);
- BUG_ON(!prealloc);
- }
+ if (!prealloc && wait)
+ prealloc = alloc_extent_state_nofail(mask);
spin_lock(&tree->lock);
if (cached_state && *cached_state) {
@@ -751,6 +777,7 @@ again:
*/
node = tree_search(tree, start);
if (!node) {
+ assert_atomic_alloc(prealloc, mask);
prealloc = alloc_extent_state_atomic(prealloc);
BUG_ON(!prealloc);
err = insert_state(tree, prealloc, start, end, &bits);
@@ -820,6 +847,7 @@ hit_next:
goto out;
}
+ assert_atomic_alloc(prealloc, mask);
prealloc = alloc_extent_state_atomic(prealloc);
BUG_ON(!prealloc);
err = split_state(tree, state, prealloc, start);
@@ -853,6 +881,7 @@ hit_next:
else
this_end = last_start - 1;
+ assert_atomic_alloc(prealloc, mask);
prealloc = alloc_extent_state_atomic(prealloc);
BUG_ON(!prealloc);
@@ -883,6 +912,7 @@ hit_next:
goto out;
}
+ assert_atomic_alloc(prealloc, mask);
prealloc = alloc_extent_state_atomic(prealloc);
BUG_ON(!prealloc);
err = split_state(tree, state, prealloc, end + 1);
@@ -909,7 +939,7 @@ search_again:
if (start > end)
goto out;
spin_unlock(&tree->lock);
- if (mask & __GFP_WAIT)
+ if (wait)
cond_resched();
goto again;
}
@@ -940,11 +970,8 @@ int convert_extent_bit(struct extent_io_
u64 last_end;
again:
- if (!prealloc && (mask & __GFP_WAIT)) {
- prealloc = alloc_extent_state(mask);
- if (!prealloc)
- return -ENOMEM;
- }
+ if (!prealloc && (mask & __GFP_WAIT))
+ prealloc = alloc_extent_state_nofail(mask);
spin_lock(&tree->lock);
/*
@@ -953,6 +980,7 @@ again:
*/
node = tree_search(tree, start);
if (!node) {
+ assert_atomic_alloc(prealloc, mask);
prealloc = alloc_extent_state_atomic(prealloc);
if (!prealloc)
return -ENOMEM;
@@ -1010,6 +1038,7 @@ hit_next:
* desired bit on it.
*/
if (state->start < start) {
+ assert_atomic_alloc(prealloc, mask);
prealloc = alloc_extent_state_atomic(prealloc);
if (!prealloc)
return -ENOMEM;
@@ -1042,6 +1071,7 @@ hit_next:
else
this_end = last_start - 1;
+ assert_atomic_alloc(prealloc, mask);
prealloc = alloc_extent_state_atomic(prealloc);
if (!prealloc)
return -ENOMEM;
@@ -1069,6 +1099,7 @@ hit_next:
* on the first half
*/
if (state->start <= end && state->end > end) {
+ assert_atomic_alloc(prealloc, mask);
prealloc = alloc_extent_state_atomic(prealloc);
if (!prealloc)
return -ENOMEM;
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html