NFSv4: Handle NFS4ERR_GRACE when recovering an expired lease.

[karo-tx-linux.git] / fs / fs-writeback.c
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c

index 916e83489caa8fef98684f4a7fce532c7cb454f5..ef2acd24cc02fd13bfb402d7b47fb2cc355e6f5b 100644 (file)
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -250,9 +250,11 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
   *   completion. Caller need not hold sb s_umount semaphore.
   *
   */
-void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
+void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
+                        long nr_pages)
  {
         struct wb_writeback_args args = {
+               .sb             = sb,
                 .sync_mode      = WB_SYNC_NONE,
                 .nr_pages       = nr_pages,
                 .range_cyclic   = 1,
@@ -320,7 +322,7 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t)
          * For inodes being constantly redirtied, dirtied_when can get stuck.
          * It _appears_ to be in the future, but is actually in distant past.
          * This test is necessary to prevent such wrapped-around relative times
-        * from permanently stopping the whole pdflush writeback.
+        * from permanently stopping the whole bdi writeback.
          */
         ret = ret && time_before_eq(inode->dirtied_when, jiffies);
  #endif
@@ -334,13 +336,38 @@ static void move_expired_inodes(struct list_head *delaying_queue,
                                struct list_head *dispatch_queue,
                                 unsigned long *older_than_this)
  {
+       LIST_HEAD(tmp);
+       struct list_head *pos, *node;
+       struct super_block *sb = NULL;
+       struct inode *inode;
+       int do_sb_sort = 0;
+
         while (!list_empty(delaying_queue)) {
-               struct inode *inode = list_entry(delaying_queue->prev,
-                                               struct inode, i_list);
+               inode = list_entry(delaying_queue->prev, struct inode, i_list);
                 if (older_than_this &&
                     inode_dirtied_after(inode, *older_than_this))
                         break;
-               list_move(&inode->i_list, dispatch_queue);
+               if (sb && sb != inode->i_sb)
+                       do_sb_sort = 1;
+               sb = inode->i_sb;
+               list_move(&inode->i_list, &tmp);
+       }
+
+       /* just one sb in list, splice to dispatch_queue and we're done */
+       if (!do_sb_sort) {
+               list_splice(&tmp, dispatch_queue);
+               return;
+       }
+
+       /* Move inodes from one superblock together */
+       while (!list_empty(&tmp)) {
+               inode = list_entry(tmp.prev, struct inode, i_list);
+               sb = inode->i_sb;
+               list_for_each_prev_safe(pos, node, &tmp) {
+                       inode = list_entry(pos, struct inode, i_list);
+                       if (inode->i_sb == sb)
+                               list_move(&inode->i_list, dispatch_queue);
+               }
         }
  }
  
@@ -449,10 +476,15 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
         spin_lock(&inode_lock);
         inode->i_state &= ~I_SYNC;
         if (!(inode->i_state & (I_FREEING | I_CLEAR))) {
-               if (inode->i_state & I_DIRTY) {
+               if ((inode->i_state & I_DIRTY_PAGES) && wbc->for_kupdate) {
+                       /*
+                        * More pages get dirtied by a fast dirtier.
+                        */
+                       goto select_queue;
+               } else if (inode->i_state & I_DIRTY) {
                         /*
-                        * Someone redirtied the inode while were writing back
-                        * the pages.
+                        * At least XFS will redirty the inode during the
+                        * writeback (delalloc) and on io completion (isize).
                          */
                         redirty_tail(inode);
                 } else if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
@@ -477,6 +509,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
                                  * soon as the queue becomes uncongested.
                                  */
                                 inode->i_state |= I_DIRTY_PAGES;
+select_queue:
                                 if (wbc->nr_to_write <= 0) {
                                         /*
                                          * slice used up: queue for next turn
@@ -515,6 +548,17 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
         return ret;
  }
  
+static void unpin_sb_for_writeback(struct super_block **psb)
+{
+       struct super_block *sb = *psb;
+
+       if (sb) {
+               up_read(&sb->s_umount);
+               put_super(sb);
+               *psb = NULL;
+       }
+}
+
  /*
   * For WB_SYNC_NONE writeback, the caller does not have the sb pinned
   * before calling writeback. So make sure that we do pin it, so it doesn't
@@ -524,10 +568,19 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
   * 1 if we failed.
   */
  static int pin_sb_for_writeback(struct writeback_control *wbc,
-                                  struct inode *inode)
+                               struct inode *inode, struct super_block **psb)
  {
         struct super_block *sb = inode->i_sb;
  
+       /*
+        * If this sb is already pinned, nothing more to do. If not and
+        * *psb is non-NULL, unpin the old one first
+        */
+       if (sb == *psb)
+               return 0;
+       else if (*psb)
+               unpin_sb_for_writeback(psb);
+
         /*
          * Caller must already hold the ref for this
          */
@@ -541,7 +594,7 @@ static int pin_sb_for_writeback(struct writeback_control *wbc,
         if (down_read_trylock(&sb->s_umount)) {
                 if (sb->s_root) {
                         spin_unlock(&sb_lock);
-                       return 0;
+                       goto pinned;
                 }
                 /*
                  * umounted, drop rwsem again and fall through to failure
@@ -552,24 +605,15 @@ static int pin_sb_for_writeback(struct writeback_control *wbc,
         sb->s_count--;
         spin_unlock(&sb_lock);
         return 1;
-}
-
-static void unpin_sb_for_writeback(struct writeback_control *wbc,
-                                  struct inode *inode)
-{
-       struct super_block *sb = inode->i_sb;
-
-       if (wbc->sync_mode == WB_SYNC_ALL)
-               return;
-
-       up_read(&sb->s_umount);
-       put_super(sb);
+pinned:
+       *psb = sb;
+       return 0;
  }
  
  static void writeback_inodes_wb(struct bdi_writeback *wb,
                                 struct writeback_control *wbc)
  {
-       struct super_block *sb = wbc->sb;
+       struct super_block *sb = wbc->sb, *pin_sb = NULL;
         const int is_blkdev_sb = sb_is_blkdev_sb(sb);
         const unsigned long start = jiffies;    /* livelock avoidance */
  
@@ -628,7 +672,7 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
                 if (inode_dirtied_after(inode, start))
                         break;
  
-               if (pin_sb_for_writeback(wbc, inode)) {
+               if (pin_sb_for_writeback(wbc, inode, &pin_sb)) {
                         requeue_io(inode);
                         continue;
                 }
@@ -637,7 +681,6 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
                 __iget(inode);
                 pages_skipped = wbc->pages_skipped;
                 writeback_single_inode(inode, wbc);
-               unpin_sb_for_writeback(wbc, inode);
                 if (wbc->pages_skipped != pages_skipped) {
                         /*
                          * writeback is not making progress due to locked
@@ -657,6 +700,8 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
                         wbc->more_io = 1;
         }
  
+       unpin_sb_for_writeback(&pin_sb);
+
         spin_unlock(&inode_lock);
         /* Leave any unwritten inodes on b_io */
  }
@@ -750,29 +795,32 @@ static long wb_writeback(struct bdi_writeback *wb,
                 wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
  
                 /*
-                * If we ran out of stuff to write, bail unless more_io got set
+                * If we consumed everything, see if we have more
                  */
-               if (wbc.nr_to_write > 0) {
-                       if (wbc.more_io) {
-                               if (wbc.nr_to_write < MAX_WRITEBACK_PAGES)
-                                       continue;
-                               /*
-                                * Nothing written. Wait for some inode to
-                                * become available for writeback. Otherwise
-                                * we'll just busyloop.
-                                */
-                               spin_lock(&inode_lock);
-                               if (!list_empty(&wb->b_more_io))  {
-                                       inode = list_entry(
-                                                       wb->b_more_io.prev,
-                                                       struct inode, i_list);
-                                       inode_wait_for_writeback(inode);
-                               }
-                               spin_unlock(&inode_lock);
-                               continue;
-                       }
+               if (wbc.nr_to_write <= 0)
+                       continue;
+               /*
+                * Didn't write everything and we don't have more IO, bail
+                */
+               if (!wbc.more_io)
                         break;
+               /*
+                * Did we write something? Try for more
+                */
+               if (wbc.nr_to_write < MAX_WRITEBACK_PAGES)
+                       continue;
+               /*
+                * Nothing written. Wait for some inode to
+                * become available for writeback. Otherwise
+                * we'll just busyloop.
+                */
+               spin_lock(&inode_lock);
+               if (!list_empty(&wb->b_more_io))  {
+                       inode = list_entry(wb->b_more_io.prev,
+                                               struct inode, i_list);
+                       inode_wait_for_writeback(inode);
                 }
+               spin_unlock(&inode_lock);
         }
  
         return wrote;
@@ -811,6 +859,12 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
         unsigned long expired;
         long nr_pages;
  
+       /*
+        * When set to zero, disable periodic writeback
+        */
+       if (!dirty_writeback_interval)
+               return 0;
+
         expired = wb->last_old_flush +
                         msecs_to_jiffies(dirty_writeback_interval * 10);
         if (time_before(jiffies, expired))
@@ -906,8 +960,12 @@ int bdi_writeback_task(struct bdi_writeback *wb)
                                 break;
                 }
  
-               wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10);
-               schedule_timeout_interruptible(wait_jiffies);
+               if (dirty_writeback_interval) {
+                       wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10);
+                       schedule_timeout_interruptible(wait_jiffies);
+               } else
+                       schedule();
+
                 try_to_freeze();
         }
  
@@ -1082,9 +1140,6 @@ EXPORT_SYMBOL(__mark_inode_dirty);
   * If older_than_this is non-NULL, then only write out inodes which
   * had their first dirtying at a time earlier than *older_than_this.
   *
- * If we're a pdlfush thread, then implement pdflush collision avoidance
- * against the entire list.
- *
   * If `bdi' is non-zero then we're being asked to writeback a specific queue.
   * This function assumes that the blockdev superblock's inodes are backed by
   * a variety of queues, so all inodes are searched.  For other superblocks,
@@ -1163,10 +1218,27 @@ void writeback_inodes_sb(struct super_block *sb)
         nr_to_write = nr_dirty + nr_unstable +
                         (inodes_stat.nr_inodes - inodes_stat.nr_unused);
  
-       bdi_writeback_all(sb, nr_to_write);
+       bdi_start_writeback(sb->s_bdi, sb, nr_to_write);
  }
  EXPORT_SYMBOL(writeback_inodes_sb);
  
+/**
+ * writeback_inodes_sb_if_idle -       start writeback if none underway
+ * @sb: the superblock
+ *
+ * Invoke writeback_inodes_sb if no writeback is currently underway.
+ * Returns 1 if writeback was started, 0 if not.
+ */
+int writeback_inodes_sb_if_idle(struct super_block *sb)
+{
+       if (!writeback_in_progress(sb->s_bdi)) {
+               writeback_inodes_sb(sb);
+               return 1;
+       } else
+               return 0;
+}
+EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
+
  /**
   * sync_inodes_sb      -       sync sb inode pages
   * @sb: the superblock