On Thu, Apr 12, 2018 at 06:00:03PM +0800, Qu Wenruo wrote:
> The original first key check is handled in
> btree_read_extent_buffer_pages(), which is good enough for level check,
> but to verify first key, we need at least read lock on the extent
> buffer, or we can hit some race and cause false alert.
>
> Fix it by adding new verifcation function: btrfs_verify_first_key(),
> which will first check lock context of both parent and child extent
> buffer, and only when we have proper lock hold (write or read lock if
> extent buffer is not in commit root) we will verify the first key.
>
> Most of the verification happens in ctree.c after function call like
> read_tree_block(), read_node_slot() and read_block_for_search(), and
> only call btrfs_verify_first_key() after we acquire read/write lock.
>
> Also, since first_key check is not as good as level and could easily
> trigger false alerts due to lock context, put it under
> CONFIG_BTRFS_FS_CHECK_INTEGRITY to avoid damage to end user.
>
> Signed-off-by: Qu Wenruo <wqu@xxxxxxxx>
> ---
> fs/btrfs/ctree.c | 69 ++++++++++++++++++++++++++++++++++++++++++
> fs/btrfs/ctree.h | 58 +++++++++++++++++++++++++++++++++++
> fs/btrfs/extent-tree.c | 6 ++++
> fs/btrfs/qgroup.c | 5 +++
> fs/btrfs/ref-verify.c | 6 ++++
> fs/btrfs/relocation.c | 13 ++++++++
> 6 files changed, 157 insertions(+)
>
> diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
> index e044b51a2789..57721aead371 100644
> --- a/fs/btrfs/ctree.c
> +++ b/fs/btrfs/ctree.c
> @@ -1649,6 +1649,11 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
>
> btrfs_tree_lock(cur);
> btrfs_set_lock_blocking(cur);
> + if (btrfs_verify_first_key(cur, parent, i)) {
> + err = -EUCLEAN;
> + btrfs_tree_unlock(cur);
> + free_extent_buffer(cur);
> + }
> err = __btrfs_cow_block(trans, root, cur, parent, i,
> &cur, search_start,
> min(16 * blocksize,
> @@ -1863,6 +1868,12 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
>
> btrfs_tree_lock(child);
> btrfs_set_lock_blocking(child);
> + if (btrfs_verify_first_key(child, mid, 0)) {
> + ret = -EUCLEAN;
> + btrfs_tree_unlock(child);
> + free_extent_buffer(child);
> + goto enospc;
> + }
> ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
> if (ret) {
> btrfs_tree_unlock(child);
> @@ -1901,6 +1912,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
> if (left) {
> btrfs_tree_lock(left);
> btrfs_set_lock_blocking(left);
> + if (btrfs_verify_first_key(left, parent, pslot - 1)) {
> + ret = -EUCLEAN;
> + goto enospc;
> + }
> wret = btrfs_cow_block(trans, root, left,
> parent, pslot - 1, &left);
> if (wret) {
> @@ -1916,6 +1931,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
> if (right) {
> btrfs_tree_lock(right);
> btrfs_set_lock_blocking(right);
> + if (btrfs_verify_first_key(right, parent, pslot + 1)) {
> + ret = -EUCLEAN;
> + goto enospc;
> + }
> wret = btrfs_cow_block(trans, root, right,
> parent, pslot + 1, &right);
> if (wret) {
> @@ -2079,6 +2098,8 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
>
> btrfs_tree_lock(left);
> btrfs_set_lock_blocking(left);
> + if (btrfs_verify_first_key(left, parent, pslot - 1))
> + goto left_out;
>
> left_nr = btrfs_header_nritems(left);
> if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
> @@ -2119,6 +2140,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
> }
> return 0;
> }
> +left_out:
> btrfs_tree_unlock(left);
> free_extent_buffer(left);
> }
> @@ -2134,6 +2156,8 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
>
> btrfs_tree_lock(right);
> btrfs_set_lock_blocking(right);
> + if (btrfs_verify_first_key(right, parent, pslot + 1))
> + goto right_out;
>
> right_nr = btrfs_header_nritems(right);
> if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
> @@ -2174,6 +2198,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
> }
> return 0;
> }
> +right_out:
> btrfs_tree_unlock(right);
> free_extent_buffer(right);
> }
> @@ -2868,6 +2893,11 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
> p->locks[level] = BTRFS_READ_LOCK;
> }
> p->nodes[level] = b;
> + if (btrfs_verify_first_key(b, p->nodes[level + 1],
> + slot)) {
> + ret = -EUCLEAN;
> + goto done;
> + }
> }
> } else {
> p->slots[level] = slot;
> @@ -2981,6 +3011,12 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
> goto done;
> }
>
> + /*
> + * NOTE: both parent and child go through
> + * tree_mod_log_rewind(), first key check is no
> + * longer consistent. So no btrfs_verify_first_key()
> + * for this read_block_for_search().
> + */
> err = read_block_for_search(root, p, &b, level,
> slot, key);
> if (err == -EAGAIN)
> @@ -3753,6 +3789,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
>
> btrfs_tree_lock(right);
> btrfs_set_lock_blocking(right);
> + if (btrfs_verify_first_key(right, upper, slot + 1))
> + goto out_unlock;
>
> free_space = btrfs_leaf_free_space(fs_info, right);
> if (free_space < data_size)
> @@ -3987,6 +4025,10 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
>
> btrfs_tree_lock(left);
> btrfs_set_lock_blocking(left);
> + if (btrfs_verify_first_key(left, path->nodes[1], slot - 1)) {
> + ret = 1;
> + goto out;
> + }
>
> free_space = btrfs_leaf_free_space(fs_info, left);
> if (free_space < data_size) {
> @@ -5205,6 +5247,12 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
> }
>
> btrfs_tree_read_lock(cur);
> + if (btrfs_verify_first_key(cur, path->nodes[level], slot)) {
> + btrfs_tree_read_unlock(cur);
> + free_extent_buffer(cur);
> + ret = -EUCLEAN;
> + goto out;
> + }
>
> path->locks[level - 1] = BTRFS_READ_LOCK;
> path->nodes[level - 1] = cur;
> @@ -5232,6 +5280,11 @@ static int tree_move_down(struct btrfs_fs_info *fs_info,
> if (IS_ERR(eb))
> return PTR_ERR(eb);
>
> + if (btrfs_verify_first_key(eb, path->nodes[*level],
> + path->slots[*level])) {
> + free_extent_buffer(eb);
> + return -EUCLEAN;
> + }
> path->nodes[*level - 1] = eb;
> path->slots[*level - 1] = 0;
> (*level)--;
> @@ -5786,6 +5839,14 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
> BTRFS_READ_LOCK);
> }
> next_rw_lock = BTRFS_READ_LOCK;
> + if (btrfs_verify_first_key(next, path->nodes[level],
> + slot)) {
> + ret = -EUCLEAN;
> + btrfs_tree_read_unlock(next);
> + free_extent_buffer(next);
> + btrfs_release_path(path);
> + goto done;
> + }
> }
> break;
> }
> @@ -5823,6 +5884,14 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
> BTRFS_READ_LOCK);
> }
> next_rw_lock = BTRFS_READ_LOCK;
> + if (btrfs_verify_first_key(next, path->nodes[level],
> + 0)) {
> + ret = -EUCLEAN;
> + btrfs_tree_read_unlock(next);
> + free_extent_buffer(next);
> + btrfs_release_path(path);
> + goto done;
> + }
> }
> }
> ret = 0;
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index 0eb55825862a..033d8ae027c3 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -44,6 +44,7 @@
> #include "extent_io.h"
> #include "extent_map.h"
> #include "async-thread.h"
> +#include "print-tree.h"
>
> struct btrfs_trans_handle;
> struct btrfs_transaction;
> @@ -3752,4 +3753,61 @@ static inline int btrfs_is_testing(struct btrfs_fs_info *fs_info)
> #endif
> return 0;
> }
> +
> +/*
> + * Verify the first key of an extent buffer matches with its parent pointer.
> + *
> + * For tree blocks in current transaction, caller should hold at least read lock
> + * for both @eb and @parent to avoid race.
> + * While for tree blocks in commit root, no such requirement.
> + */
> +static inline int btrfs_verify_first_key(struct extent_buffer *eb,
Too big for a static inline and too much code for a header. Please add
an #ifdef that will export a static inline for { return 0; } and a
normal function that will be declared in a header and defined in .c
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html