On Tue, Nov 18, 2014 at 05:19:41PM -0500, Josef Bacik wrote:
> Liu Bo pointed out that my previous fix would lose the generation update in the
> scenario I described. It is actually much worse than that, we could lose the
> entire extent if we lose power right after the transaction commits. Consider
> the following
>
> write extent 0-4k
> log extent in log tree
> commit transaction
> < power fail happens here
> ordered extent completes
>
> We would lose the 0-4k extent because it hasn't updated the actual fs tree, and
> the transaction commit will reset the log so it isn't replayed. If we lose
> power before the transaction commit we are save, otherwise we are not.
>
> Fix this by keeping track of all extents we logged in this transaction. Then
> when we go to commit the transaction make sure we wait for all of those ordered
> extents to complete before proceeding. This will make sure that if we lose
> power after the transaction commit we still have our data. This also fixes the
> problem of the improperly updated extent generation. Thanks,
This looks saner.
Reviewed-by: Liu Bo <bo.li.liu@xxxxxxxxxx>
thanks,
-liubo
>
> cc: stable@xxxxxxxxxxxxxxx
> Signed-off-by: Josef Bacik <jbacik@xxxxxx>
> ---
> fs/btrfs/ordered-data.c | 6 ++++--
> fs/btrfs/ordered-data.h | 6 +++++-
> fs/btrfs/transaction.c | 33 +++++++++++++++++++++++++++++++++
> fs/btrfs/transaction.h | 2 ++
> fs/btrfs/tree-log.c | 6 +++---
> 5 files changed, 47 insertions(+), 6 deletions(-)
>
> diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
> index ac734ec..7c2dd7a 100644
> --- a/fs/btrfs/ordered-data.c
> +++ b/fs/btrfs/ordered-data.c
> @@ -220,6 +220,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
> INIT_LIST_HEAD(&entry->work_list);
> init_completion(&entry->completion);
> INIT_LIST_HEAD(&entry->log_list);
> + INIT_LIST_HEAD(&entry->trans_list);
>
> trace_btrfs_ordered_extent_add(inode, entry);
>
> @@ -472,7 +473,8 @@ void btrfs_submit_logged_extents(struct list_head *logged_list,
> spin_unlock_irq(&log->log_extents_lock[index]);
> }
>
> -void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid)
> +void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
> + struct btrfs_root *log, u64 transid)
> {
> struct btrfs_ordered_extent *ordered;
> int index = transid % 2;
> @@ -497,7 +499,7 @@ void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid)
> wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
> &ordered->flags));
>
> - btrfs_put_ordered_extent(ordered);
> + list_add_tail(&ordered->trans_list, &trans->ordered);
> spin_lock_irq(&log->log_extents_lock[index]);
> }
> spin_unlock_irq(&log->log_extents_lock[index]);
> diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
> index d81a274..171a841 100644
> --- a/fs/btrfs/ordered-data.h
> +++ b/fs/btrfs/ordered-data.h
> @@ -121,6 +121,9 @@ struct btrfs_ordered_extent {
> /* If we need to wait on this to be done */
> struct list_head log_list;
>
> + /* If the transaction needs to wait on this ordered extent */
> + struct list_head trans_list;
> +
> /* used to wait for the BTRFS_ORDERED_COMPLETE bit */
> wait_queue_head_t wait;
>
> @@ -197,7 +200,8 @@ void btrfs_get_logged_extents(struct inode *inode,
> void btrfs_put_logged_extents(struct list_head *logged_list);
> void btrfs_submit_logged_extents(struct list_head *logged_list,
> struct btrfs_root *log);
> -void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid);
> +void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
> + struct btrfs_root *log, u64 transid);
> void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid);
> int __init ordered_data_init(void);
> void ordered_data_exit(void);
> diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
> index dcaae36..63c6d05 100644
> --- a/fs/btrfs/transaction.c
> +++ b/fs/btrfs/transaction.c
> @@ -220,6 +220,7 @@ loop:
> INIT_LIST_HEAD(&cur_trans->pending_snapshots);
> INIT_LIST_HEAD(&cur_trans->pending_chunks);
> INIT_LIST_HEAD(&cur_trans->switch_commits);
> + INIT_LIST_HEAD(&cur_trans->pending_ordered);
> list_add_tail(&cur_trans->list, &fs_info->trans_list);
> extent_io_tree_init(&cur_trans->dirty_pages,
> fs_info->btree_inode->i_mapping);
> @@ -488,6 +489,7 @@ again:
> h->sync = false;
> INIT_LIST_HEAD(&h->qgroup_ref_list);
> INIT_LIST_HEAD(&h->new_bgs);
> + INIT_LIST_HEAD(&h->ordered);
>
> smp_mb();
> if (cur_trans->state >= TRANS_STATE_BLOCKED &&
> @@ -719,6 +721,12 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
> if (!list_empty(&trans->new_bgs))
> btrfs_create_pending_block_groups(trans, root);
>
> + if (!list_empty(&trans->ordered)) {
> + spin_lock(&info->trans_lock);
> + list_splice(&trans->ordered, &cur_trans->pending_ordered);
> + spin_unlock(&info->trans_lock);
> + }
> +
> trans->delayed_ref_updates = 0;
> if (!trans->sync) {
> must_run_delayed_refs =
> @@ -1652,6 +1660,28 @@ static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
> btrfs_wait_ordered_roots(fs_info, -1);
> }
>
> +static inline void
> +btrfs_wait_pending_ordered(struct btrfs_transaction *cur_trans,
> + struct btrfs_fs_info *fs_info)
> +{
> + struct btrfs_ordered_extent *ordered;
> +
> + spin_lock(&fs_info->trans_lock);
> + while (!list_empty(&cur_trans->pending_ordered)) {
> + ordered = list_first_entry(&cur_trans->pending_ordered,
> + struct btrfs_ordered_extent,
> + trans_list);
> + list_del_init(&ordered->trans_list);
> + spin_unlock(&fs_info->trans_lock);
> +
> + wait_event(ordered->wait, test_bit(BTRFS_ORDERED_COMPLETE,
> + &ordered->flags));
> + btrfs_put_ordered_extent(ordered);
> + spin_lock(&fs_info->trans_lock);
> + }
> + spin_unlock(&fs_info->trans_lock);
> +}
> +
> int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
> struct btrfs_root *root)
> {
> @@ -1702,6 +1732,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
> }
>
> spin_lock(&root->fs_info->trans_lock);
> + list_splice(&trans->ordered, &cur_trans->pending_ordered);
> if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
> spin_unlock(&root->fs_info->trans_lock);
> atomic_inc(&cur_trans->use_count);
> @@ -1754,6 +1785,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
>
> btrfs_wait_delalloc_flush(root->fs_info);
>
> + btrfs_wait_pending_ordered(cur_trans, root->fs_info);
> +
> btrfs_scrub_pause(root);
> /*
> * Ok now we need to make sure to block out any other joins while we
> diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
> index d8f40e1..1ba9c3e 100644
> --- a/fs/btrfs/transaction.h
> +++ b/fs/btrfs/transaction.h
> @@ -56,6 +56,7 @@ struct btrfs_transaction {
> wait_queue_head_t commit_wait;
> struct list_head pending_snapshots;
> struct list_head pending_chunks;
> + struct list_head pending_ordered;
> struct list_head switch_commits;
> struct btrfs_delayed_ref_root delayed_refs;
> int aborted;
> @@ -105,6 +106,7 @@ struct btrfs_trans_handle {
> */
> struct btrfs_root *root;
> struct seq_list delayed_ref_elem;
> + struct list_head ordered;
> struct list_head qgroup_ref_list;
> struct list_head new_bgs;
> };
> diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
> index 70f99b1..337e4bc 100644
> --- a/fs/btrfs/tree-log.c
> +++ b/fs/btrfs/tree-log.c
> @@ -2600,7 +2600,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
> if (atomic_read(&log_root_tree->log_commit[index2])) {
> blk_finish_plug(&plug);
> btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
> - btrfs_wait_logged_extents(log, log_transid);
> + btrfs_wait_logged_extents(trans, log, log_transid);
> wait_log_commit(trans, log_root_tree,
> root_log_ctx.log_transid);
> mutex_unlock(&log_root_tree->log_mutex);
> @@ -2645,7 +2645,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
> btrfs_wait_marked_extents(log_root_tree,
> &log_root_tree->dirty_log_pages,
> EXTENT_NEW | EXTENT_DIRTY);
> - btrfs_wait_logged_extents(log, log_transid);
> + btrfs_wait_logged_extents(trans, log, log_transid);
>
> btrfs_set_super_log_root(root->fs_info->super_for_commit,
> log_root_tree->node->start);
> @@ -3766,7 +3766,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
> fi = btrfs_item_ptr(leaf, path->slots[0],
> struct btrfs_file_extent_item);
>
> - btrfs_set_token_file_extent_generation(leaf, fi, em->generation,
> + btrfs_set_token_file_extent_generation(leaf, fi, trans->transid,
> &token);
> if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
> btrfs_set_token_file_extent_type(leaf, fi,
> --
> 1.8.3.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html