md/raid5: replace sh->lock with an 'active' flag.

author NeilBrown <neilb@suse.de>

Tue, 26 Jul 2011 01:34:20 +0000 (11:34 +1000)

committer NeilBrown <neilb@suse.de>

Tue, 26 Jul 2011 01:34:20 +0000 (11:34 +1000)
author NeilBrown <neilb@suse.de>
Tue, 26 Jul 2011 01:34:20 +0000 (11:34 +1000)
committer NeilBrown <neilb@suse.de>
Tue, 26 Jul 2011 01:34:20 +0000 (11:34 +1000)
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c

index 9985138f4c04465f8a3414091bc0a8c9ba24f1df..f8275b5a6fbef149f3fab4106d48e54dc01bebf4 100644 (file)
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1020,14 +1020,12 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
                 if (test_and_clear_bit(R5_Wantdrain, &dev->flags)) {
                         struct bio *wbi;
  
-                       spin_lock(&sh->lock);
                         spin_lock_irq(&sh->raid_conf->device_lock);
                         chosen = dev->towrite;
                         dev->towrite = NULL;
                         BUG_ON(dev->written);
                         wbi = dev->written = chosen;
                         spin_unlock_irq(&sh->raid_conf->device_lock);
-                       spin_unlock(&sh->lock);
  
                         while (wbi && wbi->bi_sector <
                                 dev->sector + STRIPE_SECTORS) {
@@ -1322,7 +1320,6 @@ static int grow_one_stripe(raid5_conf_t *conf)
                 return 0;
  
         sh->raid_conf = conf;
-       spin_lock_init(&sh->lock);
         #ifdef CONFIG_MULTICORE_RAID456
         init_waitqueue_head(&sh->ops.wait_for_ops);
         #endif
@@ -1442,7 +1439,6 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
                         break;
  
                 nsh->raid_conf = conf;
-               spin_lock_init(&nsh->lock);
                 #ifdef CONFIG_MULTICORE_RAID456
                 init_waitqueue_head(&nsh->ops.wait_for_ops);
                 #endif
@@ -2148,7 +2144,6 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
                 (unsigned long long)sh->sector);
  
  
-       spin_lock(&sh->lock);
         spin_lock_irq(&conf->device_lock);
         if (forwrite) {
                 bip = &sh->dev[dd_idx].towrite;
@@ -2184,7 +2179,6 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
                         set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags);
         }
         spin_unlock_irq(&conf->device_lock);
-       spin_unlock(&sh->lock);
  
         pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
                 (unsigned long long)(*bip)->bi_sector,
@@ -2201,7 +2195,6 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
   overlap:
         set_bit(R5_Overlap, &sh->dev[dd_idx].flags);
         spin_unlock_irq(&conf->device_lock);
-       spin_unlock(&sh->lock);
         return 0;
  }
  
@@ -3023,12 +3016,10 @@ static void handle_stripe5(struct stripe_head *sh)
                  atomic_read(&sh->count), sh->pd_idx, sh->check_state,
                  sh->reconstruct_state);
  
-       spin_lock(&sh->lock);
         if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
                 set_bit(STRIPE_SYNCING, &sh->state);
                 clear_bit(STRIPE_INSYNC, &sh->state);
         }
-       clear_bit(STRIPE_HANDLE, &sh->state);
         clear_bit(STRIPE_DELAYED, &sh->state);
  
         s.syncing = test_bit(STRIPE_SYNCING, &sh->state);
@@ -3037,6 +3028,7 @@ static void handle_stripe5(struct stripe_head *sh)
  
         /* Now to look around and see what can be done */
         rcu_read_lock();
+       spin_lock_irq(&conf->device_lock);
         for (i=disks; i--; ) {
                 mdk_rdev_t *rdev;
  
@@ -3099,6 +3091,7 @@ static void handle_stripe5(struct stripe_head *sh)
                         s.failed_num = i;
                 }
         }
+       spin_unlock_irq(&conf->device_lock);
         rcu_read_unlock();
  
         if (unlikely(blocked_rdev)) {
@@ -3275,7 +3268,6 @@ static void handle_stripe5(struct stripe_head *sh)
                 handle_stripe_expansion(conf, sh, NULL);
  
   unlock:
-       spin_unlock(&sh->lock);
  
         /* wait for this device to become unblocked */
         if (unlikely(blocked_rdev))
@@ -3318,12 +3310,10 @@ static void handle_stripe6(struct stripe_head *sh)
                sh->check_state, sh->reconstruct_state);
         memset(&s, 0, sizeof(s));
  
-       spin_lock(&sh->lock);
         if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
                 set_bit(STRIPE_SYNCING, &sh->state);
                 clear_bit(STRIPE_INSYNC, &sh->state);
         }
-       clear_bit(STRIPE_HANDLE, &sh->state);
         clear_bit(STRIPE_DELAYED, &sh->state);
  
         s.syncing = test_bit(STRIPE_SYNCING, &sh->state);
@@ -3332,6 +3322,7 @@ static void handle_stripe6(struct stripe_head *sh)
         /* Now to look around and see what can be done */
  
         rcu_read_lock();
+       spin_lock_irq(&conf->device_lock);
         for (i=disks; i--; ) {
                 mdk_rdev_t *rdev;
                 dev = &sh->dev[i];
@@ -3395,6 +3386,7 @@ static void handle_stripe6(struct stripe_head *sh)
                         s.failed++;
                 }
         }
+       spin_unlock_irq(&conf->device_lock);
         rcu_read_unlock();
  
         if (unlikely(blocked_rdev)) {
@@ -3580,7 +3572,6 @@ static void handle_stripe6(struct stripe_head *sh)
                 handle_stripe_expansion(conf, sh, &r6s);
  
   unlock:
-       spin_unlock(&sh->lock);
  
         /* wait for this device to become unblocked */
         if (unlikely(blocked_rdev))
@@ -3608,10 +3599,19 @@ static void handle_stripe6(struct stripe_head *sh)
  
  static void handle_stripe(struct stripe_head *sh)
  {
+       clear_bit(STRIPE_HANDLE, &sh->state);
+       if (test_and_set_bit(STRIPE_ACTIVE, &sh->state)) {
+               /* already being handled, ensure it gets handled
+                * again when current action finishes */
+               set_bit(STRIPE_HANDLE, &sh->state);
+               return;
+       }
+
         if (sh->raid_conf->level == 6)
                 handle_stripe6(sh);
         else
                 handle_stripe5(sh);
+       clear_bit(STRIPE_ACTIVE, &sh->state);
  }
  
  static void raid5_activate_delayed(raid5_conf_t *conf)
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h

index a33001137bf8dac95cf68fe61c074d1d2fb2c7f1..bb246d9e05473e6348f494cceb4e83674fbdfd80 100644 (file)
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -6,11 +6,11 @@
  
  /*
   *
- * Each stripe contains one buffer per disc.  Each buffer can be in
+ * Each stripe contains one buffer per device.  Each buffer can be in
   * one of a number of states stored in "flags".  Changes between
- * these states happen *almost* exclusively under a per-stripe
- * spinlock.  Some very specific changes can happen in bi_end_io, and
- * these are not protected by the spin lock.
+ * these states happen *almost* exclusively under the protection of the
+ * STRIPE_ACTIVE flag.  Some very specific changes can happen in bi_end_io, and
+ * these are not protected by STRIPE_ACTIVE.
   *
   * The flag bits that are used to represent these states are:
   *   R5_UPTODATE and R5_LOCKED
@@ -76,12 +76,10 @@
   * block and the cached buffer are successfully written, any buffer on
   * a written list can be returned with b_end_io.
   *
- * The write list and read list both act as fifos.  The read list is
- * protected by the device_lock.  The write and written lists are
- * protected by the stripe lock.  The device_lock, which can be
- * claimed while the stipe lock is held, is only for list
- * manipulations and will only be held for a very short time.  It can
- * be claimed from interrupts.
+ * The write list and read list both act as fifos.  The read list,
+ * write list and written list are protected by the device_lock.
+ * The device_lock is only for list manipulations and will only be
+ * held for a very short time.  It can be claimed from interrupts.
   *
   *
   * Stripes in the stripe cache can be on one of two lists (or on
@@ -96,7 +94,6 @@
   *
   * The inactive_list, handle_list and hash bucket lists are all protected by the
   * device_lock.
- *  - stripes on the inactive_list never have their stripe_lock held.
   *  - stripes have a reference counter. If count==0, they are on a list.
   *  - If a stripe might need handling, STRIPE_HANDLE is set.
   *  - When refcount reaches zero, then if STRIPE_HANDLE it is put on
@@ -116,10 +113,10 @@
   *  attach a request to an active stripe (add_stripe_bh())
   *     lockdev attach-buffer unlockdev
   *  handle a stripe (handle_stripe())
- *     lockstripe clrSTRIPE_HANDLE ...
+ *     setSTRIPE_ACTIVE,  clrSTRIPE_HANDLE ...
   *             (lockdev check-buffers unlockdev) ..
   *             change-state ..
- *             record io/ops needed unlockstripe schedule io/ops
+ *             record io/ops needed clearSTRIPE_ACTIVE schedule io/ops
   *  release an active stripe (release_stripe())
   *     lockdev if (!--cnt) { if  STRIPE_HANDLE, add to handle_list else add to inactive-list } unlockdev
   *
@@ -128,8 +125,7 @@
   * on a cached buffer, and plus one if the stripe is undergoing stripe
   * operations.
   *
- * Stripe operations are performed outside the stripe lock,
- * the stripe operations are:
+ * The stripe operations are:
   * -copying data between the stripe cache and user application buffers
   * -computing blocks to save a disk access, or to recover a missing block
   * -updating the parity on a write operation (reconstruct write and
@@ -159,7 +155,8 @@
   */
  
  /*
- * Operations state - intermediate states that are visible outside of sh->lock
+ * Operations state - intermediate states that are visible outside of 
+ *   STRIPE_ACTIVE.
   * In general _idle indicates nothing is running, _run indicates a data
   * processing operation is active, and _result means the data processing result
   * is stable and can be acted upon.  For simple operations like biofill and
@@ -209,7 +206,6 @@ struct stripe_head {
         short                   ddf_layout;/* use DDF ordering to calculate Q */
         unsigned long           state;          /* state flags */
         atomic_t                count;        /* nr of active thread/requests */
-       spinlock_t              lock;
         int                     bm_seq; /* sequence number for bitmap flushes */
         int                     disks;          /* disks in stripe */
         enum check_states       check_state;
@@ -240,7 +236,7 @@ struct stripe_head {
  };
  
  /* stripe_head_state - collects and tracks the dynamic state of a stripe_head
- *     for handle_stripe.  It is only valid under spin_lock(sh->lock);
+ *     for handle_stripe.
   */
  struct stripe_head_state {
         int syncing, expanding, expanded;
@@ -290,6 +286,7 @@ struct r6_state {
   * Stripe state
   */
  enum {
+       STRIPE_ACTIVE,
         STRIPE_HANDLE,
         STRIPE_SYNC_REQUESTED,
         STRIPE_SYNCING,
@@ -339,7 +336,7 @@ enum {
   * PREREAD_ACTIVE.
   * In stripe_handle, if we find pre-reading is necessary, we do it if
   * PREREAD_ACTIVE is set, else we set DELAYED which will send it to the delayed queue.
- * HANDLE gets cleared if stripe_handle leave nothing locked.
+ * HANDLE gets cleared if stripe_handle leaves nothing locked.
   */
author	NeilBrown <neilb@suse.de>
	Tue, 26 Jul 2011 01:34:20 +0000 (11:34 +1000)
committer	NeilBrown <neilb@suse.de>
	Tue, 26 Jul 2011 01:34:20 +0000 (11:34 +1000)
drivers/md/raid5.c		patch \| blob \| history
drivers/md/raid5.h		patch \| blob \| history