On 2018/12/24 下午3:41, Qu Wenruo wrote:
> [PROBLEM]
> One user reports strange super block generation like:
> generation 7937947
> backup 0:
> backup_tree_root: 1113909100544 gen: 7937935 level: 1
> backup 1:
> backup_tree_root: 1113907347456 gen: 7937936 level: 1
> backup 2:
> backup_tree_root: 1113911951360 gen: 7937937 level: 1
> backup 3:
> backup_tree_root: 1113907494912 gen: 7937934 level: 1
>
> sb gen: 7937947 = 0x791f9b
> backup gen: 7937937 = 0x791f91
>
> The reason why such mismatch happens is still uknown, but one assumption
> is 2 bits flipped at runtime.
>
> [SOLUTION]
> Even we did super block verification in commit 75cb857d2618
> ("btrfs: Do super block verification before writing it to disk"), it
> doesn't check generation against transaction as generation corruption is
> not that obvious.
> This allows super block generation corruption sneak in.
>
> So in this patch, btrfs will check super block generation by:
> - Check it against expected transid
> We have 2 different callers who writes all super blocks:
> * btrfs_commit_transaction()
> The expected generation is trans->transid.
> * btrfs_sync_log()
> The expected generation is trans->transid - 1, since we only
> update log_root, while not updating generation.
>
> - Chech it against backup roots
> If we have any valid backup roots (new fs can have no backup root at
> all), we should find one backup root matching the generation,
This part is too restrict to handle btrfs check --repair.
For kernel it's OK, but btrfs-progs transaction commit code won't update
backup roots, so it would cause false alerts.
I'll remove the match check in next version.
Thanks,
Qu
> and no
> backup root newer than current generation.
>
> With above solution, at least we should be able to catch potential
> generation corruption early on.
>
> Reported-by: Peter Chant <pete@xxxxxxxxxxxxxxx>
> Signed-off-by: Qu Wenruo <wqu@xxxxxxxx>
> ---
> fs/btrfs/disk-io.c | 49 +++++++++++++++++++++++++++++++++++++++---
> fs/btrfs/disk-io.h | 2 +-
> fs/btrfs/transaction.c | 2 +-
> fs/btrfs/tree-log.c | 2 +-
> 4 files changed, 49 insertions(+), 6 deletions(-)
>
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 5228320030a5..c972baafe664 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -2573,9 +2573,14 @@ static int btrfs_validate_mount_super(struct btrfs_fs_info *fs_info)
> * Extra checks like csum type and incompat flags will be done here.
> */
> static int btrfs_validate_write_super(struct btrfs_fs_info *fs_info,
> - struct btrfs_super_block *sb)
> + struct btrfs_super_block *sb,
> + u64 expected_gen)
> {
> + u64 sb_gen = btrfs_super_generation(sb);
> + u64 newest_gen = 0;
> + bool found = false;
> int ret;
> + int i;
>
> ret = validate_super(fs_info, sb, -1);
> if (ret < 0)
> @@ -2594,6 +2599,44 @@ static int btrfs_validate_write_super(struct btrfs_fs_info *fs_info,
> (unsigned long long)BTRFS_FEATURE_INCOMPAT_SUPP);
> goto out;
> }
> +
> + /* super block generation check */
> + if (sb_gen != expected_gen) {
> + ret = -EUCLEAN;
> + btrfs_err(fs_info,
> + "invalid generation detected, has %llu expect %llu",
> + sb_gen, expected_gen);
> + goto out;
> + }
> +
> + /* check against backup roots */
> + for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) {
> + struct btrfs_root_backup *root_backup = sb->super_roots + i;
> + u64 backup_gen = btrfs_backup_tree_root_gen(root_backup);
> +
> + if (backup_gen > sb_gen) {
> + ret = -EUCLEAN;
> + btrfs_err(fs_info,
> + "invalid backup root generation detected, slot %u has %llu expect <%llu",
> + i, backup_gen, sb_gen);
> + goto out;
> + }
> + newest_gen = max(newest_gen, backup_gen);
> + if (backup_gen == expected_gen)
> + found = true;
> + }
> +
> + /*
> + * If we have any valid backup roots but still fail to find one
> + * matching sb generation, either backup roots or sb itself is
> + * corrupted.
> + */
> + if (newest_gen && !found && newest_gen < expected_gen) {
> + ret = -EUCLEAN;
> + btrfs_err(fs_info,
> +"no backup root generation matches current, found newest %llu expect %llu",
> + newest_gen, expected_gen);
> + }
> out:
> if (ret < 0)
> btrfs_err(fs_info,
> @@ -3721,7 +3764,7 @@ int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags)
> return min_tolerated;
> }
>
> -int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
> +int write_all_supers(struct btrfs_fs_info *fs_info, u64 gen, int max_mirrors)
> {
> struct list_head *head;
> struct btrfs_device *dev;
> @@ -3786,7 +3829,7 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
> flags = btrfs_super_flags(sb);
> btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN);
>
> - ret = btrfs_validate_write_super(fs_info, sb);
> + ret = btrfs_validate_write_super(fs_info, sb, gen);
> if (ret < 0) {
> mutex_unlock(&fs_info->fs_devices->device_list_mutex);
> btrfs_handle_fs_error(fs_info, -EUCLEAN,
> diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
> index 4cccba22640f..9f8fd0a9a3a4 100644
> --- a/fs/btrfs/disk-io.h
> +++ b/fs/btrfs/disk-io.h
> @@ -53,7 +53,7 @@ int open_ctree(struct super_block *sb,
> struct btrfs_fs_devices *fs_devices,
> char *options);
> void close_ctree(struct btrfs_fs_info *fs_info);
> -int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors);
> +int write_all_supers(struct btrfs_fs_info *fs_info, u64 gen, int max_mirrors);
> struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
> int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
> struct buffer_head **bh_ret);
> diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
> index d1eeef9ec5da..b61cec2780b9 100644
> --- a/fs/btrfs/transaction.c
> +++ b/fs/btrfs/transaction.c
> @@ -2241,7 +2241,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
> goto scrub_continue;
> }
>
> - ret = write_all_supers(fs_info, 0);
> + ret = write_all_supers(fs_info, trans->transid, 0);
> /*
> * the super is written, we can safely allow the tree-loggers
> * to go about their business
> diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
> index e07f3376b7df..4f84345fdbb6 100644
> --- a/fs/btrfs/tree-log.c
> +++ b/fs/btrfs/tree-log.c
> @@ -3155,7 +3155,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
> * the running transaction open, so a full commit can't hop
> * in and cause problems either.
> */
> - ret = write_all_supers(fs_info, 1);
> + ret = write_all_supers(fs_info, trans->transid - 1, 1);
> if (ret) {
> btrfs_set_log_full_commit(fs_info, trans);
> btrfs_abort_transaction(trans, ret);
>
Attachment:
signature.asc
Description: OpenPGP digital signature
