Hi Miao,
Chris' stress test, stress.sh -n 50 -c /mnt/linux-2.6 /mnt gave me another lockdep splat
(see below). I applied your V5 patches on top of the next-rc branch.
I haven't triggered it in my actual testing, but do you think we can iterate a list of block
groups in an lockless manner using rcu?
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2164296..f40ff4e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -740,6 +740,7 @@ struct btrfs_space_info {
struct list_head block_groups[BTRFS_NR_RAID_TYPES];
spinlock_t lock;
struct rw_semaphore groups_sem;
+ struct srcu_struct groups_srcu;
atomic_t caching_threads;
};
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9e4c9f4..22d6dbb 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3003,6 +3003,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
INIT_LIST_HEAD(&found->block_groups[i]);
init_rwsem(&found->groups_sem);
+ init_srcu_struct(&found->groups_srcu);
spin_lock_init(&found->lock);
found->flags = flags & (BTRFS_BLOCK_GROUP_DATA |
BTRFS_BLOCK_GROUP_SYSTEM |
@@ -4853,6 +4854,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
int data)
{
int ret = 0;
+ int idx;
struct btrfs_root *root = orig_root->fs_info->extent_root;
struct btrfs_free_cluster *last_ptr = NULL;
struct btrfs_block_group_cache *block_group = NULL;
@@ -4929,7 +4931,7 @@ ideal_cache:
if (block_group && block_group_bits(block_group, data) &&
(block_group->cached != BTRFS_CACHE_NO ||
search_start == ideal_cache_offset)) {
- down_read(&space_info->groups_sem);
+ idx = srcu_read_lock(&space_info->groups_srcu);
if (list_empty(&block_group->list) ||
block_group->ro) {
/*
@@ -4939,7 +4941,7 @@ ideal_cache:
* valid
*/
btrfs_put_block_group(block_group);
- up_read(&space_info->groups_sem);
+ srcu_read_unlock(&space_info->groups_srcu, idx);
} else {
index = get_block_group_index(block_group);
goto have_block_group;
@@ -4949,8 +4951,8 @@ ideal_cache:
}
}
search:
- down_read(&space_info->groups_sem);
- list_for_each_entry(block_group, &space_info->block_groups[index],
+ idx = srcu_read_lock(&space_info->groups_srcu);
+ list_for_each_entry_rcu(block_group, &space_info->block_groups[index],
list) {
u64 offset;
int cached;
@@ -5197,8 +5199,8 @@ loop:
BUG_ON(index != get_block_group_index(block_group));
btrfs_put_block_group(block_group);
}
- up_read(&space_info->groups_sem);
-
+ srcu_read_unlock(&space_info->groups_srcu, idx);
+
if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES)
goto search;
=========================================================
[ INFO: possible irq lock inversion dependency detected ]
2.6.36-v5+ #2
---------------------------------------------------------
kswapd0/49 just changed the state of lock:
(&delayed_node->mutex){+.+.-.}, at: [<ffffffff812131f7>] btrfs_remove_delayed_node+0x3e/0xd2
but this lock took another, RECLAIM_FS-READ-unsafe lock in the past:
(&found->groups_sem){++++.+}
and interrupts could create inverse lock ordering between them.
other info that might help us debug this:
2 locks held by kswapd0/49:
#0: (shrinker_rwsem){++++..}, at: [<ffffffff810e242a>] shrink_slab+0x3d/0x164
#1: (iprune_sem){++++.-}, at: [<ffffffff811316d0>] shrink_icache_memory+0x4d/0x213
the shortest dependencies between 2nd lock and 1st lock:
-> (&found->groups_sem){++++.+} ops: 1334 {
HARDIRQ-ON-W at:
[<ffffffff81075ec0>] __lock_acquire+0x346/0xda6
[<ffffffff81076a3d>] lock_acquire+0x11d/0x143
[<ffffffff814c6a2a>] down_write+0x55/0x9b
[<ffffffff811c352a>] __link_block_group+0x5a/0x83
[<ffffffff811ca562>] btrfs_read_block_groups+0x2fb/0x56c
[<ffffffff811d4921>] open_ctree+0xf78/0x14ab
[<ffffffff811bafdf>] btrfs_get_sb+0x236/0x467
[<ffffffff8111f25e>] vfs_kern_mount+0xbd/0x1a7
[<ffffffff8111f3b0>] do_kern_mount+0x4d/0xed
[<ffffffff8113668d>] do_mount+0x74e/0x7c5
[<ffffffff8113678c>] sys_mount+0x88/0xc2
[<ffffffff81002ddb>] system_call_fastpath+0x16/0x1b
HARDIRQ-ON-R at:
[<ffffffff81075e98>] __lock_acquire+0x31e/0xda6
[<ffffffff81076a3d>] lock_acquire+0x11d/0x143
[<ffffffff814c6abc>] down_read+0x4c/0x91
[<ffffffff811cb5b2>] find_free_extent+0x3ec/0xa86
[<ffffffff811cbd00>] btrfs_reserve_extent+0xb4/0x142
[<ffffffff811cbef5>] btrfs_alloc_free_block+0x167/0x2b2
[<ffffffff811be610>] __btrfs_cow_block+0x103/0x346
[<ffffffff811bedb8>] btrfs_cow_block+0x101/0x110
[<ffffffff811c05d8>] btrfs_search_slot+0x143/0x513
[<ffffffff811c1495>] btrfs_insert_empty_items+0x6a/0xbc
[<ffffffff811ffb68>] btrfs_insert_orphan_item+0x5d/0x75
[<ffffffff811df1a1>] btrfs_orphan_add+0x139/0x152
[<ffffffff811e0dd3>] btrfs_setattr+0xff/0x253
[<ffffffff8113201e>] notify_change+0x1a2/0x29d
[<ffffffff8111bf08>] do_truncate+0x6c/0x89
[<ffffffff81127a77>] do_last+0x579/0x57e
[<ffffffff81129502>] do_filp_open+0x215/0x5ae
[<ffffffff8111aec0>] do_sys_open+0x60/0xfc
[<ffffffff8111af8f>] sys_open+0x20/0x22
[<ffffffff81002ddb>] system_call_fastpath+0x16/0x1b
SOFTIRQ-ON-W at:
[<ffffffff81075ee1>] __lock_acquire+0x367/0xda6
[<ffffffff81076a3d>] lock_acquire+0x11d/0x143
[<ffffffff814c6a2a>] down_write+0x55/0x9b
[<ffffffff811c352a>] __link_block_group+0x5a/0x83
[<ffffffff811ca562>] btrfs_read_block_groups+0x2fb/0x56c
[<ffffffff811d4921>] open_ctree+0xf78/0x14ab
[<ffffffff811bafdf>] btrfs_get_sb+0x236/0x467
[<ffffffff8111f25e>] vfs_kern_mount+0xbd/0x1a7
[<ffffffff8111f3b0>] do_kern_mount+0x4d/0xed
[<ffffffff8113668d>] do_mount+0x74e/0x7c5
[<ffffffff8113678c>] sys_mount+0x88/0xc2
[<ffffffff81002ddb>] system_call_fastpath+0x16/0x1b
SOFTIRQ-ON-R at:
[<ffffffff81075ee1>] __lock_acquire+0x367/0xda6
[<ffffffff81076a3d>] lock_acquire+0x11d/0x143
[<ffffffff814c6abc>] down_read+0x4c/0x91
[<ffffffff811cb5b2>] find_free_extent+0x3ec/0xa86
[<ffffffff811cbd00>] btrfs_reserve_extent+0xb4/0x142
[<ffffffff811cbef5>] btrfs_alloc_free_block+0x167/0x2b2
[<ffffffff811be610>] __btrfs_cow_block+0x103/0x346
[<ffffffff811bedb8>] btrfs_cow_block+0x101/0x110
[<ffffffff811c05d8>] btrfs_search_slot+0x143/0x513
[<ffffffff811c1495>] btrfs_insert_empty_items+0x6a/0xbc
[<ffffffff811ffb68>] btrfs_insert_orphan_item+0x5d/0x75
[<ffffffff811df1a1>] btrfs_orphan_add+0x139/0x152
[<ffffffff811e0dd3>] btrfs_setattr+0xff/0x253
[<ffffffff8113201e>] notify_change+0x1a2/0x29d
[<ffffffff8111bf08>] do_truncate+0x6c/0x89
[<ffffffff81127a77>] do_last+0x579/0x57e
[<ffffffff81129502>] do_filp_open+0x215/0x5ae
[<ffffffff8111aec0>] do_sys_open+0x60/0xfc
[<ffffffff8111af8f>] sys_open+0x20/0x22
[<ffffffff81002ddb>] system_call_fastpath+0x16/0x1b
RECLAIM_FS-ON-R at:
[<ffffffff81074292>] mark_held_locks+0x52/0x70
[<ffffffff81074354>] lockdep_trace_alloc+0xa4/0xc2
[<ffffffff810db873>] __alloc_pages_nodemask+0x96/0x841
[<ffffffff81105bcb>] alloc_pages_current+0xa7/0xca
[<ffffffff810d4d91>] __page_cache_alloc+0x85/0x8c
[<ffffffff810ddef6>] __do_page_cache_readahead+0xb5/0x19d
[<ffffffff810ddfff>] ra_submit+0x21/0x25
[<ffffffff810de3b9>] ondemand_readahead+0x1b6/0x1c9
[<ffffffff810de4b2>] page_cache_sync_readahead+0x3d/0x3f
[<ffffffff8120798d>] load_free_space_cache+0x262/0x671
[<ffffffff811c886f>] cache_block_group+0x97/0x233
[<ffffffff811cb63f>] find_free_extent+0x479/0xa86
[<ffffffff811cbd00>] btrfs_reserve_extent+0xb4/0x142
[<ffffffff811cbef5>] btrfs_alloc_free_block+0x167/0x2b2
[<ffffffff811be610>] __btrfs_cow_block+0x103/0x346
[<ffffffff811bedb8>] btrfs_cow_block+0x101/0x110
[<ffffffff811c05d8>] btrfs_search_slot+0x143/0x513
[<ffffffff811c1495>] btrfs_insert_empty_items+0x6a/0xbc
[<ffffffff811ffb68>] btrfs_insert_orphan_item+0x5d/0x75
[<ffffffff811df1a1>] btrfs_orphan_add+0x139/0x152
[<ffffffff811e0dd3>] btrfs_setattr+0xff/0x253
[<ffffffff8113201e>] notify_change+0x1a2/0x29d
[<ffffffff8111bf08>] do_truncate+0x6c/0x89
[<ffffffff81127a77>] do_last+0x579/0x57e
[<ffffffff81129502>] do_filp_open+0x215/0x5ae
[<ffffffff8111aec0>] do_sys_open+0x60/0xfc
[<ffffffff8111af8f>] sys_open+0x20/0x22
[<ffffffff81002ddb>] system_call_fastpath+0x16/0x1b
INITIAL USE at:
[<ffffffff81075f37>] __lock_acquire+0x3bd/0xda6
[<ffffffff81076a3d>] lock_acquire+0x11d/0x143
[<ffffffff814c6a2a>] down_write+0x55/0x9b
[<ffffffff811c352a>] __link_block_group+0x5a/0x83
[<ffffffff811ca562>] btrfs_read_block_groups+0x2fb/0x56c
[<ffffffff811d4921>] open_ctree+0xf78/0x14ab
[<ffffffff811bafdf>] btrfs_get_sb+0x236/0x467
[<ffffffff8111f25e>] vfs_kern_mount+0xbd/0x1a7
[<ffffffff8111f3b0>] do_kern_mount+0x4d/0xed
[<ffffffff8113668d>] do_mount+0x74e/0x7c5
[<ffffffff8113678c>] sys_mount+0x88/0xc2
[<ffffffff81002ddb>] system_call_fastpath+0x16/0x1b
}
... key at: [<ffffffff82924fb8>] __key.40112+0x0/0x8
... acquired at:
[<ffffffff81076a3d>] lock_acquire+0x11d/0x143
[<ffffffff814c6abc>] down_read+0x4c/0x91
[<ffffffff811cb48a>] find_free_extent+0x2c4/0xa86
[<ffffffff811cbd00>] btrfs_reserve_extent+0xb4/0x142
[<ffffffff811cbef5>] btrfs_alloc_free_block+0x167/0x2b2
[<ffffffff811be610>] __btrfs_cow_block+0x103/0x346
[<ffffffff811bedb8>] btrfs_cow_block+0x101/0x110
[<ffffffff811c05d8>] btrfs_search_slot+0x143/0x513
[<ffffffff811cf58b>] btrfs_lookup_inode+0x2f/0x8f
[<ffffffff812123e5>] btrfs_update_delayed_inode+0x75/0x135
[<ffffffff8121306e>] btrfs_async_run_delayed_node_done+0xd5/0x194
[<ffffffff811fb48e>] worker_loop+0x198/0x4dd
[<ffffffff81061a60>] kthread+0x9d/0xa5
[<ffffffff81003c14>] kernel_thread_helper+0x4/0x10
-> (&delayed_node->mutex){+.+.-.} ops: 8932 {
HARDIRQ-ON-W at:
[<ffffffff81075ec0>] __lock_acquire+0x346/0xda6
[<ffffffff81076a3d>] lock_acquire+0x11d/0x143
[<ffffffff814c6291>] __mutex_lock_common+0x5a/0x444
[<ffffffff814c6730>] mutex_lock_nested+0x39/0x3e
[<ffffffff81211fb4>] btrfs_delayed_update_inode+0x45/0x101
[<ffffffff811dc5a3>] btrfs_update_inode+0x2e/0x129
[<ffffffff811e0c9a>] btrfs_truncate+0x43d/0x477
[<ffffffff810dfb22>] vmtruncate+0x44/0x52
[<ffffffff811e0ed6>] btrfs_setattr+0x202/0x253
[<ffffffff8113201e>] notify_change+0x1a2/0x29d
[<ffffffff8111bf08>] do_truncate+0x6c/0x89
[<ffffffff81127a77>] do_last+0x579/0x57e
[<ffffffff81129502>] do_filp_open+0x215/0x5ae
[<ffffffff8111aec0>] do_sys_open+0x60/0xfc
[<ffffffff8111af8f>] sys_open+0x20/0x22
[<ffffffff81002ddb>] system_call_fastpath+0x16/0x1b
SOFTIRQ-ON-W at:
[<ffffffff81075ee1>] __lock_acquire+0x367/0xda6
[<ffffffff81076a3d>] lock_acquire+0x11d/0x143
[<ffffffff814c6291>] __mutex_lock_common+0x5a/0x444
[<ffffffff814c6730>] mutex_lock_nested+0x39/0x3e
[<ffffffff81211fb4>] btrfs_delayed_update_inode+0x45/0x101
[<ffffffff811dc5a3>] btrfs_update_inode+0x2e/0x129
[<ffffffff811e0c9a>] btrfs_truncate+0x43d/0x477
[<ffffffff810dfb22>] vmtruncate+0x44/0x52
[<ffffffff811e0ed6>] btrfs_setattr+0x202/0x253
[<ffffffff8113201e>] notify_change+0x1a2/0x29d
[<ffffffff8111bf08>] do_truncate+0x6c/0x89
[<ffffffff81127a77>] do_last+0x579/0x57e
[<ffffffff81129502>] do_filp_open+0x215/0x5ae
[<ffffffff8111aec0>] do_sys_open+0x60/0xfc
[<ffffffff8111af8f>] sys_open+0x20/0x22
[<ffffffff81002ddb>] system_call_fastpath+0x16/0x1b
IN-RECLAIM_FS-W at:
[<ffffffff81075f1f>] __lock_acquire+0x3a5/0xda6
[<ffffffff81076a3d>] lock_acquire+0x11d/0x143
[<ffffffff814c6291>] __mutex_lock_common+0x5a/0x444
[<ffffffff814c6730>] mutex_lock_nested+0x39/0x3e
[<ffffffff812131f7>] btrfs_remove_delayed_node+0x3e/0xd2
[<ffffffff811d77aa>] btrfs_destroy_inode+0x2ae/0x2d4
[<ffffffff81130dc1>] destroy_inode+0x2f/0x45
[<ffffffff811312ca>] dispose_list+0xaa/0xdf
[<ffffffff81131866>] shrink_icache_memory+0x1e3/0x213
[<ffffffff810e24cd>] shrink_slab+0xe0/0x164
[<ffffffff810e4619>] balance_pgdat+0x2e8/0x50b
[<ffffffff810e4bbc>] kswapd+0x380/0x3c0
[<ffffffff81061a60>] kthread+0x9d/0xa5
[<ffffffff81003c14>] kernel_thread_helper+0x4/0x10
INITIAL USE at:
[<ffffffff81075f37>] __lock_acquire+0x3bd/0xda6
[<ffffffff81076a3d>] lock_acquire+0x11d/0x143
[<ffffffff814c6291>] __mutex_lock_common+0x5a/0x444
[<ffffffff814c6730>] mutex_lock_nested+0x39/0x3e
[<ffffffff81211fb4>] btrfs_delayed_update_inode+0x45/0x101
[<ffffffff811dc5a3>] btrfs_update_inode+0x2e/0x129
[<ffffffff811e0c9a>] btrfs_truncate+0x43d/0x477
[<ffffffff810dfb22>] vmtruncate+0x44/0x52
[<ffffffff811e0ed6>] btrfs_setattr+0x202/0x253
[<ffffffff8113201e>] notify_change+0x1a2/0x29d
[<ffffffff8111bf08>] do_truncate+0x6c/0x89
[<ffffffff81127a77>] do_last+0x579/0x57e
[<ffffffff81129502>] do_filp_open+0x215/0x5ae
[<ffffffff8111aec0>] do_sys_open+0x60/0xfc
[<ffffffff8111af8f>] sys_open+0x20/0x22
[<ffffffff81002ddb>] system_call_fastpath+0x16/0x1b
}
... key at: [<ffffffff82925450>] __key.31289+0x0/0x8
... acquired at:
[<ffffffff810749bf>] check_usage_forwards+0x71/0x7e
[<ffffffff81074162>] mark_lock+0x18c/0x26a
[<ffffffff81075f1f>] __lock_acquire+0x3a5/0xda6
[<ffffffff81076a3d>] lock_acquire+0x11d/0x143
[<ffffffff814c6291>] __mutex_lock_common+0x5a/0x444
[<ffffffff814c6730>] mutex_lock_nested+0x39/0x3e
[<ffffffff812131f7>] btrfs_remove_delayed_node+0x3e/0xd2
[<ffffffff811d77aa>] btrfs_destroy_inode+0x2ae/0x2d4
[<ffffffff81130dc1>] destroy_inode+0x2f/0x45
[<ffffffff811312ca>] dispose_list+0xaa/0xdf
[<ffffffff81131866>] shrink_icache_memory+0x1e3/0x213
[<ffffffff810e24cd>] shrink_slab+0xe0/0x164
[<ffffffff810e4619>] balance_pgdat+0x2e8/0x50b
[<ffffffff810e4bbc>] kswapd+0x380/0x3c0
[<ffffffff81061a60>] kthread+0x9d/0xa5
[<ffffffff81003c14>] kernel_thread_helper+0x4/0x10
stack backtrace:
Pid: 49, comm: kswapd0 Not tainted 2.6.36-v5+ #2
Call Trace:
[<ffffffff8107493d>] print_irq_inversion_bug+0x124/0x135
[<ffffffff810749bf>] check_usage_forwards+0x71/0x7e
[<ffffffff8107494e>] ? check_usage_forwards+0x0/0x7e
[<ffffffff81074162>] mark_lock+0x18c/0x26a
[<ffffffff81075f1f>] __lock_acquire+0x3a5/0xda6
[<ffffffff81076911>] ? __lock_acquire+0xd97/0xda6
[<ffffffff812131f7>] ? btrfs_remove_delayed_node+0x3e/0xd2
[<ffffffff81076a3d>] lock_acquire+0x11d/0x143
[<ffffffff812131f7>] ? btrfs_remove_delayed_node+0x3e/0xd2
[<ffffffff812131f7>] ? btrfs_remove_delayed_node+0x3e/0xd2
[<ffffffff814c6291>] __mutex_lock_common+0x5a/0x444
[<ffffffff812131f7>] ? btrfs_remove_delayed_node+0x3e/0xd2
[<ffffffff81074604>] ? trace_hardirqs_on+0xd/0xf
[<ffffffff814c6730>] mutex_lock_nested+0x39/0x3e
[<ffffffff812131f7>] btrfs_remove_delayed_node+0x3e/0xd2
[<ffffffff811d77aa>] btrfs_destroy_inode+0x2ae/0x2d4
[<ffffffff81130dc1>] destroy_inode+0x2f/0x45
[<ffffffff811312ca>] dispose_list+0xaa/0xdf
[<ffffffff81131866>] shrink_icache_memory+0x1e3/0x213
[<ffffffff810e24cd>] shrink_slab+0xe0/0x164
[<ffffffff810e4619>] balance_pgdat+0x2e8/0x50b
[<ffffffff810e4bbc>] kswapd+0x380/0x3c0
[<ffffffff81062032>] ? autoremove_wake_function+0x0/0x39
[<ffffffff810e483c>] ? kswapd+0x0/0x3c0
[<ffffffff81061a60>] kthread+0x9d/0xa5
[<ffffffff81003c14>] kernel_thread_helper+0x4/0x10
[<ffffffff81038cd9>] ? finish_task_switch+0x70/0xb9
[<ffffffff814c8880>] ? restore_args+0x0/0x30
[<ffffffff810619c3>] ? kthread+0x0/0xa5
[<ffffffff81003c10>] ? kernel_thread_helper+0x0/0x10
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html