Re: [patch 5/9 v2] raid5: reduce chance release_stripe() taking device_lock

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]


On Mon, Jun 18, 2012 at 6:59 PM, Shaohua Li <shli@xxxxxxxxxx> wrote:
> release_stripe() is a place conf->device_lock is heavily contended. We take the
> lock even stripe count isn't 1, which isn't required. On the on the other hand,
> decreasing count first and taking lock if count is 0 can expose races:
> 1. bewteen dec count and taking lock, another thread hits the stripe in cache,
> so increase count. The stripe will be deleted from any list. In this case
> stripe count isn't 0.
> 2. between dec count and taking lock, another thread hits the stripe in cache
> and release it. In this case the stripe is already in specific list. We do
> list_move to adjust its position.
> So both cases are fixable to me.
>

These "1" and "2" comments no longer apply right?  atomic_dec_and_lock
is equivalently safe to lock+dec_and_test.


> Signed-off-by: Shaohua Li <shli@xxxxxxxxxxxx>
> ---
>  drivers/md/raid5.c |   79 ++++++++++++++++++++++++++++++-----------------------
>  1 file changed, 45 insertions(+), 34 deletions(-)
>
> Index: linux/drivers/md/raid5.c
> ===================================================================
> --- linux.orig/drivers/md/raid5.c       2012-06-08 13:18:39.296561227 +0800
> +++ linux/drivers/md/raid5.c    2012-06-08 13:38:38.209487084 +0800
> @@ -196,47 +196,61 @@ static int stripe_operations_active(stru
>               test_bit(STRIPE_COMPUTE_RUN, &sh->state);
>  }
>
> -static void __release_stripe(struct r5conf *conf, struct stripe_head *sh)
> +static void handle_release_stripe(struct r5conf *conf, struct stripe_head *sh)
>  {
> -       if (atomic_dec_and_test(&sh->count)) {
> -               BUG_ON(!list_empty(&sh->lru));

Hmm, this BUG_ON does not get carried over?

> -               BUG_ON(atomic_read(&conf->active_stripes)==0);
> -               if (test_bit(STRIPE_HANDLE, &sh->state)) {
> -                       if (test_bit(STRIPE_DELAYED, &sh->state))
> -                               list_add_tail(&sh->lru, &conf->delayed_list);
> -                       else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
> -                                  sh->bm_seq - conf->seq_write > 0)
> -                               list_add_tail(&sh->lru, &conf->bitmap_list);
> -                       else {
> -                               clear_bit(STRIPE_BIT_DELAY, &sh->state);
> -                               list_add_tail(&sh->lru, &conf->handle_list);
> -                       }
> -                       md_wakeup_thread(conf->mddev->thread);
> -               } else {
> -                       BUG_ON(stripe_operations_active(sh));
> -                       if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
> -                               if (atomic_dec_return(&conf->preread_active_stripes)
> -                                   < IO_THRESHOLD)
> -                                       md_wakeup_thread(conf->mddev->thread);
> -                       atomic_dec(&conf->active_stripes);
> -                       if (!test_bit(STRIPE_EXPANDING, &sh->state)) {
> -                               list_add_tail(&sh->lru, &conf->inactive_list);
> -                               wake_up(&conf->wait_for_stripe);
> -                               if (conf->retry_read_aligned)
> -                                       md_wakeup_thread(conf->mddev->thread);
> -                       }
> +       /*
> +        * Before we hold device_lock, other thread can hit this stripe
> +        * in cache. It could do:
> +        * 1. just get_active_stripe(). The stripe count isn't 0 then.
> +        * 2. do get_active_stripe() and follow release_stripe(). So the
> +        * stripe might be already released and already in specific
> +        * list. we do list_move to adjust its position in the list.
> +        */
> +       BUG_ON(atomic_read(&conf->active_stripes)==0);
> +       if (test_bit(STRIPE_HANDLE, &sh->state)) {
> +               if (test_bit(STRIPE_DELAYED, &sh->state))
> +                       list_move_tail(&sh->lru, &conf->delayed_list);
> +               else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
> +                          sh->bm_seq - conf->seq_write > 0)
> +                       list_move_tail(&sh->lru, &conf->bitmap_list);
> +               else {
> +                       clear_bit(STRIPE_BIT_DELAY, &sh->state);
> +                       list_move_tail(&sh->lru, &conf->handle_list);
> +               }
> +               md_wakeup_thread(conf->mddev->thread);
> +       } else {
> +               BUG_ON(stripe_operations_active(sh));
> +               if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
> +                       if (atomic_dec_return(&conf->preread_active_stripes)
> +                           < IO_THRESHOLD)
> +                               md_wakeup_thread(conf->mddev->thread);
> +               atomic_dec(&conf->active_stripes);
> +               if (!test_bit(STRIPE_EXPANDING, &sh->state)) {
> +                       list_move_tail(&sh->lru, &conf->inactive_list);
> +                       wake_up(&conf->wait_for_stripe);
> +                       if (conf->retry_read_aligned)
> +                               md_wakeup_thread(conf->mddev->thread);
>                }
>        }
>  }
>
> +static void __release_stripe(struct r5conf *conf, struct stripe_head *sh)
> +{
> +       if (atomic_dec_and_test(&sh->count))
> +               handle_release_stripe(conf, sh);
> +}
> +
>  static void release_stripe(struct stripe_head *sh)
>  {
>        struct r5conf *conf = sh->raid_conf;
>        unsigned long flags;
>
> -       spin_lock_irqsave(&conf->device_lock, flags);
> -       __release_stripe(conf, sh);
> -       spin_unlock_irqrestore(&conf->device_lock, flags);
> +       local_irq_save(flags);
> +       if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) {
> +               handle_release_stripe(conf, sh);
> +               spin_unlock(&conf->device_lock);
> +       }
> +       local_irq_restore(flags);
>  }
>
>  static inline void remove_hash(struct stripe_head *sh)
> @@ -479,9 +493,6 @@ get_active_stripe(struct r5conf *conf, s
>                        } else {
>                                if (!test_bit(STRIPE_HANDLE, &sh->state))
>                                        atomic_inc(&conf->active_stripes);
> -                               if (list_empty(&sh->lru) &&
> -                                   !test_bit(STRIPE_EXPANDING, &sh->state))
> -                                       BUG();

Why are these checks removed?
--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[ATA RAID]     [Linux SCSI Target Infrastructure]     [Managing RAID on Linux]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device-Mapper]     [Kernel]     [Linux Books]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Photos]     [Yosemite Photos]     [Yosemite News]     [AMD 64]     [Linux Networking]

Add to Google Powered by Linux