On Thu, 27 Jun 2013 18:51:00 +0800, Miao Xie wrote:
> - It makes no sense that we deal with a inode in the dead tree.
This caused that the replace procedure was not dealing with free space cache entries anymore (which have btrfs_root_refs() == 0). I accidentally fixed it as a side-effect of "Btrf: cleanup: don't check for root_refs == 0 twice" and intentionally fixed it with "Btrfs: eliminate the exceptional root_tree refs=0" (which is not yet sent).
> - fix the race between dio and page copy by waiting the dio completion
This causes lockdep issues which I've seen once after running './check -g all' followed by './check btrfs/005' during the 005 run, and on a second box once while running the btrfs xfstests 001 up to 011 during the xfstest 011 run, both boxes were running the latest btrfs-next plus the patch "Btrfs: eliminate the exceptional root_tree refs=0" (but I do not think that the latter patch matters). I'm unable to reproduce this lockdep warning, but it seems to make sense, see below.
### scrub.c:
static void copy_nocow_pages_worker(struct btrfs_work *work)
{
...
trans = btrfs_join_transaction(root);
...
ret = iterate_inodes_from_logical(logical, fs_info, path,
copy_nocow_pages_for_inode,
nocow_ctx);
...
btrfs_end_transaction(trans, root);
...
}
static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
{
...
/* Avoid truncate/dio/punch hole.. */
mutex_lock(&inode->i_mutex);
inode_dio_wait(inode);
...
mutex_unlock(&inode->i_mutex);
...
}
### file.c:
int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
...
mutex_lock(&inode->i_mutex);
...
trans = btrfs_start_transaction(root, 0);
...
mutex_unlock(&inode->i_mutex);
...
ret = btrfs_end_transaction(trans, root);
...
}
[ INFO: possible circular locking dependency detected ]
3.10.0+ #5 Not tainted
-------------------------------------------------------
xfs_io/30404 is trying to acquire lock:
(sb_internal){.+.+..}, at: [<ffffffffa00abad6>] start_transaction+0x296/0x4f0 [btrfs]
but task is already holding lock:
(&sb->s_type->i_mutex_key#11){+.+.+.}, at: [<ffffffffa00bd029>] btrfs_sync_file+0xb9/0x2c0 [btrfs]
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 (&sb->s_type->i_mutex_key#11){+.+.+.}:
[<ffffffff810e564a>] lock_acquire+0x8a/0x120
[<ffffffff81993f13>] mutex_lock_nested+0x73/0x390
[<ffffffffa01005d5>] copy_nocow_pages_for_inode+0x105/0x2b0 [btrfs]
[<ffffffffa010941e>] iterate_extent_inodes+0x1ce/0x300 [btrfs]
[<ffffffffa01095f7>] iterate_inodes_from_logical+0xa7/0xb0 [btrfs]
[<ffffffffa00fffeb>] copy_nocow_pages_worker+0x9b/0x160 [btrfs]
[<ffffffffa00d9b1f>] worker_loop+0x13f/0x5b0 [btrfs]
[<ffffffff810abd36>] kthread+0xd6/0xe0
[<ffffffff8199f66c>] ret_from_fork+0x7c/0xb0
-> #0 (sb_internal){.+.+..}:
[<ffffffff810e4f42>] __lock_acquire+0x1d12/0x1e10
[<ffffffff810e564a>] lock_acquire+0x8a/0x120
[<ffffffff8119d7ab>] __sb_start_write+0xdb/0x1c0
[<ffffffffa00abad6>] start_transaction+0x296/0x4f0 [btrfs]
[<ffffffffa00ac0e6>] btrfs_start_transaction+0x16/0x20 [btrfs]
[<ffffffffa00bd0a9>] btrfs_sync_file+0x139/0x2c0 [btrfs]
[<ffffffff811c9848>] do_fsync+0x58/0x80
[<ffffffff811c9adb>] SyS_fsync+0xb/0x10
[<ffffffff8199f712>] system_call_fastpath+0x16/0x1b
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(&sb->s_type->i_mutex_key#11);
lock(sb_internal);
lock(&sb->s_type->i_mutex_key#11);
lock(sb_internal);
*** DEADLOCK ***
1 lock held by xfs_io/30404:
#0: (&sb->s_type->i_mutex_key#11){+.+.+.}, at: [<ffffffffa00bd029>] btrfs_sync_file+0xb9/0x2c0 [btrfs]
stack backtrace:
CPU: 3 PID: 30404 Comm: xfs_io Not tainted 3.10.0+ #5
Hardware name: Supermicro X9SCL/X9SCM/X9SCL/X9SCM, BIOS 2.0b 09/17/2012
ffffffff865d9790 ffff880804ef5bb8 ffffffff8198eaba ffff880804ef5c08
ffffffff81989a92 00000000000003d0 ffff880804ef5c98 ffff880804d786d0
ffff880804d78708 ffff880804d786d0 ffff880804d78000 0000000000000000
Call Trace:
[<ffffffff8198eaba>] dump_stack+0x19/0x1b
[<ffffffff81989a92>] print_circular_bug+0x1fe/0x20f
[<ffffffff810e4f42>] __lock_acquire+0x1d12/0x1e10
[<ffffffff810e564a>] lock_acquire+0x8a/0x120
[<ffffffffa00abad6>] ? start_transaction+0x296/0x4f0 [btrfs]
[<ffffffff8119d7ab>] __sb_start_write+0xdb/0x1c0
[<ffffffffa00abad6>] ? start_transaction+0x296/0x4f0 [btrfs]
[<ffffffffa00c2db1>] ? btrfs_put_ordered_extent+0x71/0xd0 [btrfs]
[<ffffffffa00abad6>] ? start_transaction+0x296/0x4f0 [btrfs]
[<ffffffffa00ab939>] ? start_transaction+0xf9/0x4f0 [btrfs]
[<ffffffff811920ef>] ? kmem_cache_alloc+0xdf/0x1b0
[<ffffffffa00ab939>] ? start_transaction+0xf9/0x4f0 [btrfs]
[<ffffffffa00abad6>] start_transaction+0x296/0x4f0 [btrfs]
[<ffffffffa00ac0e6>] btrfs_start_transaction+0x16/0x20 [btrfs]
[<ffffffffa00bd0a9>] btrfs_sync_file+0x139/0x2c0 [btrfs]
[<ffffffff81997c49>] ? retint_swapgs+0xe/0x13
[<ffffffff811c9848>] do_fsync+0x58/0x80
[<ffffffff8144bd8e>] ? trace_hardirqs_on_thunk+0x3a/0x3f
[<ffffffff811c9adb>] SyS_fsync+0xb/0x10
[<ffffffff8199f712>] system_call_fastpath+0x16/0x1b
(gdb) list *(copy_nocow_pages_for_inode+0x105)
0x875d5 is in copy_nocow_pages_for_inode (fs/btrfs/scrub.c:3230).
3225 if (IS_ERR(inode))
3226 return PTR_ERR(inode);
3227
3228 /* Avoid truncate/dio/punch hole.. */
3229 mutex_lock(&inode->i_mutex);
3230 inode_dio_wait(inode);
3231
3232 ret = 0;
3233 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
3234 len = nocow_ctx->len;
> - avoid the page copy vs truncate/punch hole
> - check if the page is in the page cache or not
>
> Signed-off-by: Miao Xie <miaox@xxxxxxxxxxxxxx>
> ---
> fs/btrfs/scrub.c | 23 ++++++++++++++++++++++-
> 1 file changed, 22 insertions(+), 1 deletion(-)
>
> diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
> index 186ea82..4ba2a69 100644
> --- a/fs/btrfs/scrub.c
> +++ b/fs/btrfs/scrub.c
> @@ -3224,6 +3224,11 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
> return PTR_ERR(local_root);
> }
>
> + if (btrfs_root_refs(&local_root->root_item) == 0) {
> + srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
> + return -ENOENT;
> + }
> +
> key.type = BTRFS_INODE_ITEM_KEY;
> key.objectid = inum;
> key.offset = 0;
> @@ -3232,12 +3237,16 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
> if (IS_ERR(inode))
> return PTR_ERR(inode);
>
> + /* Avoid truncate/dio/punch hole.. */
> + mutex_lock(&inode->i_mutex);
> + inode_dio_wait(inode);
> +
> ret = 0;
> physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
> len = nocow_ctx->len;
> while (len >= PAGE_CACHE_SIZE) {
> index = offset >> PAGE_CACHE_SHIFT;
> -
> +again:
> page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
> if (!page) {
> pr_err("find_or_create_page() failed\n");
> @@ -3258,7 +3267,18 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
> ret = err;
> goto next_page;
> }
> +
> lock_page(page);
> + /*
> + * If the page has been remove from the page cache,
> + * the data on it is meaningless, because it may be
> + * old one, the new data may be written into the new
> + * page in the page cache.
> + */
> + if (page->mapping != inode->i_mapping) {
> + page_cache_release(page);
> + goto again;
> + }
> if (!PageUptodate(page)) {
> ret = -EIO;
> goto next_page;
> @@ -3280,6 +3300,7 @@ next_page:
> len -= PAGE_CACHE_SIZE;
> }
> out:
> + mutex_unlock(&inode->i_mutex);
> iput(inode);
> return ret;
> }
>
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html