]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Merge remote-tracking branch 'file-locks/linux-next'
authorStephen Rothwell <sfr@canb.auug.org.au>
Thu, 5 Nov 2015 00:21:53 +0000 (11:21 +1100)
committerStephen Rothwell <sfr@canb.auug.org.au>
Thu, 5 Nov 2015 00:21:53 +0000 (11:21 +1100)
1  2 
fs/cifs/file.c
fs/gfs2/file.c
fs/locks.c
fs/nfs/nfs4proc.c
include/linux/fs.h

diff --combined fs/cifs/file.c
index 62203c387db45a23b05c1cadcc0946843ea5332f,6afdad7165613f64d5509a452bf2518fe66d5f10..47c5c97e2dd31c20663f1fa0584da2c3f87722bd
@@@ -1553,7 -1553,7 +1553,7 @@@ cifs_setlk(struct file *file, struct fi
  
  out:
        if (flock->fl_flags & FL_POSIX && !rc)
-               rc = posix_lock_file_wait(file, flock);
+               rc = locks_lock_file_wait(file, flock);
        return rc;
  }
  
@@@ -3380,7 -3380,6 +3380,7 @@@ readpages_get_pages(struct address_spac
        struct page *page, *tpage;
        unsigned int expected_index;
        int rc;
 +      gfp_t gfp = GFP_KERNEL & mapping_gfp_mask(mapping);
  
        INIT_LIST_HEAD(tmplist);
  
         */
        __set_page_locked(page);
        rc = add_to_page_cache_locked(page, mapping,
 -                                    page->index, GFP_KERNEL);
 +                                    page->index, gfp);
  
        /* give up if we can't stick it in the cache */
        if (rc) {
                        break;
  
                __set_page_locked(page);
 -              if (add_to_page_cache_locked(page, mapping, page->index,
 -                                                              GFP_KERNEL)) {
 +              if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
                        __clear_page_locked(page);
                        break;
                }
diff --combined fs/gfs2/file.c
index 71cd138c0676e9602fd146cbe5c36922a32be913,9287a2d17b8c3f6cdfe220f7f1717dc23e753a6b..5e425469f0c2659b0475535b18c634e487b15e12
@@@ -897,8 -897,8 +897,8 @@@ static long __gfs2_fallocate(struct fil
  
        if (!(mode & FALLOC_FL_KEEP_SIZE) && (pos + count) > inode->i_size) {
                i_size_write(inode, pos + count);
 -              /* Marks the inode as dirty */
                file_update_time(file);
 +              mark_inode_dirty(inode);
        }
  
        return generic_write_sync(file, pos, count);
@@@ -1000,7 -1000,7 +1000,7 @@@ static int gfs2_lock(struct file *file
        }
        if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
                if (fl->fl_type == F_UNLCK)
-                       posix_lock_file_wait(file, fl);
+                       locks_lock_file_wait(file, fl);
                return -EIO;
        }
        if (IS_GETLK(cmd))
@@@ -1031,7 -1031,7 +1031,7 @@@ static int do_flock(struct file *file, 
        if (gl) {
                if (fl_gh->gh_state == state)
                        goto out;
-               flock_lock_file_wait(file,
+               locks_lock_file_wait(file,
                                     &(struct file_lock){.fl_type = F_UNLCK});
                gfs2_glock_dq(fl_gh);
                gfs2_holder_reinit(state, flags, fl_gh);
                if (error == GLR_TRYFAILED)
                        error = -EAGAIN;
        } else {
-               error = flock_lock_file_wait(file, fl);
+               error = locks_lock_file_wait(file, fl);
                gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error);
        }
  
@@@ -1071,7 -1071,7 +1071,7 @@@ static void do_unflock(struct file *fil
        struct gfs2_holder *fl_gh = &fp->f_fl_gh;
  
        mutex_lock(&fp->f_fl_mutex);
-       flock_lock_file_wait(file, fl);
+       locks_lock_file_wait(file, fl);
        if (fl_gh->gh_gl) {
                gfs2_glock_dq(fl_gh);
                gfs2_holder_uninit(fl_gh);
diff --combined fs/locks.c
index add3eeb79acecc4a0b8d7dcc393c76a9ffc18582,0d2b3267e2a3eb8fefbffb3cd3f58b31fe4ed04d..ba7fcdb6fc7a641add53e4d8223d65b66991cf2e
@@@ -205,28 -205,32 +205,32 @@@ static struct kmem_cache *filelock_cach
  static struct file_lock_context *
  locks_get_lock_context(struct inode *inode, int type)
  {
-       struct file_lock_context *new;
+       struct file_lock_context *ctx;
  
-       if (likely(inode->i_flctx) || type == F_UNLCK)
+       /* paired with cmpxchg() below */
+       ctx = smp_load_acquire(&inode->i_flctx);
+       if (likely(ctx) || type == F_UNLCK)
                goto out;
  
-       new = kmem_cache_alloc(flctx_cache, GFP_KERNEL);
-       if (!new)
+       ctx = kmem_cache_alloc(flctx_cache, GFP_KERNEL);
+       if (!ctx)
                goto out;
  
-       spin_lock_init(&new->flc_lock);
-       INIT_LIST_HEAD(&new->flc_flock);
-       INIT_LIST_HEAD(&new->flc_posix);
-       INIT_LIST_HEAD(&new->flc_lease);
+       spin_lock_init(&ctx->flc_lock);
+       INIT_LIST_HEAD(&ctx->flc_flock);
+       INIT_LIST_HEAD(&ctx->flc_posix);
+       INIT_LIST_HEAD(&ctx->flc_lease);
  
        /*
         * Assign the pointer if it's not already assigned. If it is, then
         * free the context we just allocated.
         */
-       if (cmpxchg(&inode->i_flctx, NULL, new))
-               kmem_cache_free(flctx_cache, new);
+       if (cmpxchg(&inode->i_flctx, NULL, ctx)) {
+               kmem_cache_free(flctx_cache, ctx);
+               ctx = smp_load_acquire(&inode->i_flctx);
+       }
  out:
-       return inode->i_flctx;
+       return ctx;
  }
  
  void
@@@ -762,7 -766,7 +766,7 @@@ posix_test_lock(struct file *filp, stru
        struct file_lock_context *ctx;
        struct inode *inode = file_inode(filp);
  
-       ctx = inode->i_flctx;
+       ctx = smp_load_acquire(&inode->i_flctx);
        if (!ctx || list_empty_careful(&ctx->flc_posix)) {
                fl->fl_type = F_UNLCK;
                return;
@@@ -1167,10 -1171,9 +1171,9 @@@ EXPORT_SYMBOL(posix_lock_file)
   * @inode: inode of file to which lock request should be applied
   * @fl: The lock to be applied
   *
-  * Variant of posix_lock_file_wait that does not take a filp, and so can be
-  * used after the filp has already been torn down.
+  * Apply a POSIX style lock request to an inode.
   */
- int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
  {
        int error;
        might_sleep ();
        }
        return error;
  }
- EXPORT_SYMBOL(posix_lock_inode_wait);
  
  /**
   * locks_mandatory_locked - Check for an active lock
@@@ -1203,7 -1205,7 +1205,7 @@@ int locks_mandatory_locked(struct file 
        struct file_lock_context *ctx;
        struct file_lock *fl;
  
-       ctx = inode->i_flctx;
+       ctx = smp_load_acquire(&inode->i_flctx);
        if (!ctx || list_empty_careful(&ctx->flc_posix))
                return 0;
  
@@@ -1388,7 -1390,7 +1390,7 @@@ any_leases_conflict(struct inode *inode
  int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
  {
        int error = 0;
-       struct file_lock_context *ctx = inode->i_flctx;
+       struct file_lock_context *ctx;
        struct file_lock *new_fl, *fl, *tmp;
        unsigned long break_time;
        int want_write = (mode & O_ACCMODE) != O_RDONLY;
        new_fl->fl_flags = type;
  
        /* typically we will check that ctx is non-NULL before calling */
+       ctx = smp_load_acquire(&inode->i_flctx);
        if (!ctx) {
                WARN_ON_ONCE(1);
                return error;
@@@ -1494,9 -1497,10 +1497,10 @@@ EXPORT_SYMBOL(__break_lease)
  void lease_get_mtime(struct inode *inode, struct timespec *time)
  {
        bool has_lease = false;
-       struct file_lock_context *ctx = inode->i_flctx;
+       struct file_lock_context *ctx;
        struct file_lock *fl;
  
+       ctx = smp_load_acquire(&inode->i_flctx);
        if (ctx && !list_empty_careful(&ctx->flc_lease)) {
                spin_lock(&ctx->flc_lock);
                if (!list_empty(&ctx->flc_lease)) {
@@@ -1543,10 -1547,11 +1547,11 @@@ int fcntl_getlease(struct file *filp
  {
        struct file_lock *fl;
        struct inode *inode = file_inode(filp);
-       struct file_lock_context *ctx = inode->i_flctx;
+       struct file_lock_context *ctx;
        int type = F_UNLCK;
        LIST_HEAD(dispose);
  
+       ctx = smp_load_acquire(&inode->i_flctx);
        if (ctx && !list_empty_careful(&ctx->flc_lease)) {
                spin_lock(&ctx->flc_lock);
                time_out_leases(file_inode(filp), &dispose);
@@@ -1711,11 -1716,11 +1716,11 @@@ static int generic_delete_lease(struct 
  {
        int error = -EAGAIN;
        struct file_lock *fl, *victim = NULL;
-       struct dentry *dentry = filp->f_path.dentry;
-       struct inode *inode = dentry->d_inode;
-       struct file_lock_context *ctx = inode->i_flctx;
+       struct inode *inode = file_inode(filp);
+       struct file_lock_context *ctx;
        LIST_HEAD(dispose);
  
+       ctx = smp_load_acquire(&inode->i_flctx);
        if (!ctx) {
                trace_generic_delete_lease(inode, NULL);
                return error;
  int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
                        void **priv)
  {
-       struct dentry *dentry = filp->f_path.dentry;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = file_inode(filp);
        int error;
  
        if ((!uid_eq(current_fsuid(), inode->i_uid)) && !capable(CAP_LEASE))
  }
  EXPORT_SYMBOL(generic_setlease);
  
 +#if IS_ENABLED(CONFIG_SRCU)
 +/*
 + * Kernel subsystems can register to be notified on any attempt to set
 + * a new lease with the lease_notifier_chain. This is used by (e.g.) nfsd
 + * to close files that it may have cached when there is an attempt to set a
 + * conflicting lease.
 + */
 +struct srcu_notifier_head lease_notifier_chain;
 +EXPORT_SYMBOL_GPL(lease_notifier_chain);
 +
 +static inline void
 +lease_notifier_chain_init(void)
 +{
 +      srcu_init_notifier_head(&lease_notifier_chain);
 +}
 +
 +static inline void
 +setlease_notifier(long arg, struct file_lock *lease)
 +{
 +      if (arg != F_UNLCK)
 +              srcu_notifier_call_chain(&lease_notifier_chain, arg, lease);
 +}
 +#else /* !IS_ENABLED(CONFIG_SRCU) */
 +static inline void
 +lease_notifier_chain_init(void)
 +{
 +}
 +
 +static inline void
 +setlease_notifier(long arg, struct file_lock *lease)
 +{
 +}
 +#endif /* IS_ENABLED(CONFIG_SRCU) */
 +
  /**
   * vfs_setlease        -       sets a lease on an open file
   * @filp:     file pointer
  int
  vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
  {
 +      if (lease)
 +              setlease_notifier(arg, *lease);
        if (filp->f_op->setlease)
                return filp->f_op->setlease(filp, arg, lease, priv);
        else
@@@ -1892,7 -1860,7 +1896,7 @@@ int fcntl_setlease(unsigned int fd, str
   *
   * Apply a FLOCK style lock request to an inode.
   */
- int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl)
static int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl)
  {
        int error;
        might_sleep();
        }
        return error;
  }
- EXPORT_SYMBOL(flock_lock_inode_wait);
+ /**
+  * locks_lock_inode_wait - Apply a lock to an inode
+  * @inode: inode of the file to apply to
+  * @fl: The lock to be applied
+  *
+  * Apply a POSIX or FLOCK style lock request to an inode.
+  */
+ int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl)
+ {
+       int res = 0;
+       switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) {
+               case FL_POSIX:
+                       res = posix_lock_inode_wait(inode, fl);
+                       break;
+               case FL_FLOCK:
+                       res = flock_lock_inode_wait(inode, fl);
+                       break;
+               default:
+                       BUG();
+       }
+       return res;
+ }
+ EXPORT_SYMBOL(locks_lock_inode_wait);
  
  /**
   *    sys_flock: - flock() system call.
@@@ -1967,7 -1958,7 +1994,7 @@@ SYSCALL_DEFINE2(flock, unsigned int, fd
                                          (can_sleep) ? F_SETLKW : F_SETLK,
                                          lock);
        else
-               error = flock_lock_file_wait(f.file, lock);
+               error = locks_lock_file_wait(f.file, lock);
  
   out_free:
        locks_free_lock(lock);
@@@ -2143,7 -2134,7 +2170,7 @@@ static int do_lock_file_wait(struct fil
        return error;
  }
  
- /* Ensure that fl->fl_filp has compatible f_mode for F_SETLK calls */
+ /* Ensure that fl->fl_file has compatible f_mode for F_SETLK calls */
  static int
  check_fmode_for_setlk(struct file_lock *fl)
  {
  void locks_remove_posix(struct file *filp, fl_owner_t owner)
  {
        struct file_lock lock;
-       struct file_lock_context *ctx = file_inode(filp)->i_flctx;
+       struct file_lock_context *ctx;
  
        /*
         * If there are no locks held on this file, we don't need to call
         * posix_lock_file().  Another process could be setting a lock on this
         * file at the same time, but we wouldn't remove that lock anyway.
         */
+       ctx =  smp_load_acquire(&file_inode(filp)->i_flctx);
        if (!ctx || list_empty(&ctx->flc_posix))
                return;
  
@@@ -2425,7 -2417,7 +2453,7 @@@ EXPORT_SYMBOL(locks_remove_posix)
  
  /* The i_flctx must be valid when calling into here */
  static void
- locks_remove_flock(struct file *filp)
+ locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
  {
        struct file_lock fl = {
                .fl_owner = filp,
                .fl_end = OFFSET_MAX,
        };
        struct inode *inode = file_inode(filp);
-       struct file_lock_context *flctx = inode->i_flctx;
  
        if (list_empty(&flctx->flc_flock))
                return;
  
  /* The i_flctx must be valid when calling into here */
  static void
- locks_remove_lease(struct file *filp)
+ locks_remove_lease(struct file *filp, struct file_lock_context *ctx)
  {
-       struct inode *inode = file_inode(filp);
-       struct file_lock_context *ctx = inode->i_flctx;
        struct file_lock *fl, *tmp;
        LIST_HEAD(dispose);
  
   */
  void locks_remove_file(struct file *filp)
  {
-       if (!file_inode(filp)->i_flctx)
+       struct file_lock_context *ctx;
+       ctx = smp_load_acquire(&file_inode(filp)->i_flctx);
+       if (!ctx)
                return;
  
        /* remove any OFD locks */
        locks_remove_posix(filp, filp);
  
        /* remove flock locks */
-       locks_remove_flock(filp);
+       locks_remove_flock(filp, ctx);
  
        /* remove any leases */
-       locks_remove_lease(filp);
+       locks_remove_lease(filp, ctx);
  }
  
  /**
@@@ -2652,7 -2644,7 +2680,7 @@@ void show_fd_locks(struct seq_file *f
        struct file_lock_context *ctx;
        int id = 0;
  
-       ctx = inode->i_flctx;
+       ctx = smp_load_acquire(&inode->i_flctx);
        if (!ctx)
                return;
  
@@@ -2732,7 -2724,6 +2760,7 @@@ static int __init filelock_init(void
        for_each_possible_cpu(i)
                INIT_HLIST_HEAD(per_cpu_ptr(&file_lock_list, i));
  
 +      lease_notifier_chain_init();
        return 0;
  }
  
diff --combined fs/nfs/nfs4proc.c
index 7ed8f2cd97f8711ce86817f7449c97acb7ff6a27,ce64a45ab648ae6f0f6d4312e52551397f27a47b..ff5bddc49a2a30449a63a6c2a32a2aadc6db84b5
@@@ -78,6 -78,7 +78,6 @@@ struct nfs4_opendata
  static int _nfs4_proc_open(struct nfs4_opendata *data);
  static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
  static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
 -static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *, long *);
  static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
  static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label);
  static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label);
@@@ -238,7 -239,6 +238,7 @@@ const u32 nfs4_fsinfo_bitmap[3] = { FAT
                        FATTR4_WORD1_TIME_DELTA
                        | FATTR4_WORD1_FS_LAYOUT_TYPES,
                        FATTR4_WORD2_LAYOUT_BLKSIZE
 +                      | FATTR4_WORD2_CLONE_BLKSIZE
  };
  
  const u32 nfs4_fs_locations_bitmap[3] = {
@@@ -344,16 -344,13 +344,16 @@@ static int nfs4_delay(struct rpc_clnt *
  /* This is the error handling routine for processes that are allowed
   * to sleep.
   */
 -int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
 +static int nfs4_do_handle_exception(struct nfs_server *server,
 +              int errorcode, struct nfs4_exception *exception)
  {
        struct nfs_client *clp = server->nfs_client;
        struct nfs4_state *state = exception->state;
        struct inode *inode = exception->inode;
        int ret = errorcode;
  
 +      exception->delay = 0;
 +      exception->recovering = 0;
        exception->retry = 0;
        switch(errorcode) {
                case 0:
                case -NFS4ERR_DELEG_REVOKED:
                case -NFS4ERR_ADMIN_REVOKED:
                case -NFS4ERR_BAD_STATEID:
 -                      if (inode && nfs4_have_delegation(inode, FMODE_READ)) {
 -                              nfs4_inode_return_delegation(inode);
 -                              exception->retry = 1;
 -                              return 0;
 -                      }
 +                      if (inode && nfs_async_inode_return_delegation(inode,
 +                                              NULL) == 0)
 +                              goto wait_on_recovery;
                        if (state == NULL)
                                break;
                        ret = nfs4_schedule_stateid_recovery(server, state);
                                ret = -EBUSY;
                                break;
                        }
 -              case -NFS4ERR_GRACE:
                case -NFS4ERR_DELAY:
 -                      ret = nfs4_delay(server->client, &exception->timeout);
 -                      if (ret != 0)
 -                              break;
 +                      nfs_inc_server_stats(server, NFSIOS_DELAY);
 +              case -NFS4ERR_GRACE:
 +                      exception->delay = 1;
 +                      return 0;
 +
                case -NFS4ERR_RETRY_UNCACHED_REP:
                case -NFS4ERR_OLD_STATEID:
                        exception->retry = 1;
        /* We failed to handle the error */
        return nfs4_map_errors(ret);
  wait_on_recovery:
 -      ret = nfs4_wait_clnt_recover(clp);
 +      exception->recovering = 1;
 +      return 0;
 +}
 +
 +/* This is the error handling routine for processes that are allowed
 + * to sleep.
 + */
 +int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
 +{
 +      struct nfs_client *clp = server->nfs_client;
 +      int ret;
 +
 +      ret = nfs4_do_handle_exception(server, errorcode, exception);
 +      if (exception->delay) {
 +              ret = nfs4_delay(server->client, &exception->timeout);
 +              goto out_retry;
 +      }
 +      if (exception->recovering) {
 +              ret = nfs4_wait_clnt_recover(clp);
 +              if (test_bit(NFS_MIG_FAILED, &server->mig_status))
 +                      return -EIO;
 +              goto out_retry;
 +      }
 +      return ret;
 +out_retry:
 +      if (ret == 0)
 +              exception->retry = 1;
 +      return ret;
 +}
 +
 +static int
 +nfs4_async_handle_exception(struct rpc_task *task, struct nfs_server *server,
 +              int errorcode, struct nfs4_exception *exception)
 +{
 +      struct nfs_client *clp = server->nfs_client;
 +      int ret;
 +
 +      ret = nfs4_do_handle_exception(server, errorcode, exception);
 +      if (exception->delay) {
 +              rpc_delay(task, nfs4_update_delay(&exception->timeout));
 +              goto out_retry;
 +      }
 +      if (exception->recovering) {
 +              rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
 +              if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
 +                      rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
 +              goto out_retry;
 +      }
        if (test_bit(NFS_MIG_FAILED, &server->mig_status))
 -              return -EIO;
 +              ret = -EIO;
 +      return ret;
 +out_retry:
        if (ret == 0)
                exception->retry = 1;
        return ret;
  }
  
 +static int
 +nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server,
 +                      struct nfs4_state *state, long *timeout)
 +{
 +      struct nfs4_exception exception = {
 +              .state = state,
 +      };
 +
 +      if (task->tk_status >= 0)
 +              return 0;
 +      if (timeout)
 +              exception.timeout = *timeout;
 +      task->tk_status = nfs4_async_handle_exception(task, server,
 +                      task->tk_status,
 +                      &exception);
 +      if (exception.delay && timeout)
 +              *timeout = exception.timeout;
 +      if (exception.retry)
 +              return -EAGAIN;
 +      return 0;
 +}
 +
  /*
   * Return 'true' if 'clp' is using an rpc_client that is integrity protected
   * or 'false' otherwise.
@@@ -1200,21 -1127,6 +1200,21 @@@ static int nfs4_wait_for_completion_rpc
        return ret;
  }
  
 +static bool nfs4_mode_match_open_stateid(struct nfs4_state *state,
 +              fmode_t fmode)
 +{
 +      switch(fmode & (FMODE_READ|FMODE_WRITE)) {
 +      case FMODE_READ|FMODE_WRITE:
 +              return state->n_rdwr != 0;
 +      case FMODE_WRITE:
 +              return state->n_wronly != 0;
 +      case FMODE_READ:
 +              return state->n_rdonly != 0;
 +      }
 +      WARN_ON_ONCE(1);
 +      return false;
 +}
 +
  static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode)
  {
        int ret = 0;
@@@ -1531,18 -1443,12 +1531,18 @@@ nfs4_opendata_check_deleg(struct nfs4_o
        if (delegation)
                delegation_flags = delegation->flags;
        rcu_read_unlock();
 -      if (data->o_arg.claim == NFS4_OPEN_CLAIM_DELEGATE_CUR) {
 +      switch (data->o_arg.claim) {
 +      default:
 +              break;
 +      case NFS4_OPEN_CLAIM_DELEGATE_CUR:
 +      case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
                pr_err_ratelimited("NFS: Broken NFSv4 server %s is "
                                   "returning a delegation for "
                                   "OPEN(CLAIM_DELEGATE_CUR)\n",
                                   clp->cl_hostname);
 -      } else if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0)
 +              return;
 +      }
 +      if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0)
                nfs_inode_set_delegation(state->inode,
                                         data->owner->so_cred,
                                         &data->o_res);
@@@ -1665,13 -1571,17 +1665,13 @@@ static struct nfs4_opendata *nfs4_open_
        return opendata;
  }
  
 -static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmode, struct nfs4_state **res)
 +static int nfs4_open_recover_helper(struct nfs4_opendata *opendata,
 +              fmode_t fmode)
  {
        struct nfs4_state *newstate;
        int ret;
  
 -      if ((opendata->o_arg.claim == NFS4_OPEN_CLAIM_DELEGATE_CUR ||
 -           opendata->o_arg.claim == NFS4_OPEN_CLAIM_DELEG_CUR_FH) &&
 -          (opendata->o_arg.u.delegation_type & fmode) != fmode)
 -              /* This mode can't have been delegated, so we must have
 -               * a valid open_stateid to cover it - not need to reclaim.
 -               */
 +      if (!nfs4_mode_match_open_stateid(opendata->state, fmode))
                return 0;
        opendata->o_arg.open_flags = 0;
        opendata->o_arg.fmode = fmode;
        newstate = nfs4_opendata_to_nfs4_state(opendata);
        if (IS_ERR(newstate))
                return PTR_ERR(newstate);
 +      if (newstate != opendata->state)
 +              ret = -ESTALE;
        nfs4_close_state(newstate, fmode);
 -      *res = newstate;
 -      return 0;
 +      return ret;
  }
  
  static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *state)
  {
 -      struct nfs4_state *newstate;
        int ret;
  
        /* Don't trigger recovery in nfs_test_and_clear_all_open_stateid */
        clear_bit(NFS_DELEGATED_STATE, &state->flags);
        clear_bit(NFS_OPEN_STATE, &state->flags);
        smp_rmb();
 -      if (state->n_rdwr != 0) {
 -              ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate);
 -              if (ret != 0)
 -                      return ret;
 -              if (newstate != state)
 -                      return -ESTALE;
 -      }
 -      if (state->n_wronly != 0) {
 -              ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate);
 -              if (ret != 0)
 -                      return ret;
 -              if (newstate != state)
 -                      return -ESTALE;
 -      }
 -      if (state->n_rdonly != 0) {
 -              ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate);
 -              if (ret != 0)
 -                      return ret;
 -              if (newstate != state)
 -                      return -ESTALE;
 -      }
 +      ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE);
 +      if (ret != 0)
 +              return ret;
 +      ret = nfs4_open_recover_helper(opendata, FMODE_WRITE);
 +      if (ret != 0)
 +              return ret;
 +      ret = nfs4_open_recover_helper(opendata, FMODE_READ);
 +      if (ret != 0)
 +              return ret;
        /*
         * We may have performed cached opens for all three recoveries.
         * Check if we need to update the current stateid.
@@@ -1837,35 -1759,18 +1837,35 @@@ static int nfs4_handle_delegation_recal
        return err;
  }
  
 -int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
 +int nfs4_open_delegation_recall(struct nfs_open_context *ctx,
 +              struct nfs4_state *state, const nfs4_stateid *stateid,
 +              fmode_t type)
  {
        struct nfs_server *server = NFS_SERVER(state->inode);
        struct nfs4_opendata *opendata;
 -      int err;
 +      int err = 0;
  
        opendata = nfs4_open_recoverdata_alloc(ctx, state,
                        NFS4_OPEN_CLAIM_DELEG_CUR_FH);
        if (IS_ERR(opendata))
                return PTR_ERR(opendata);
        nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid);
 -      err = nfs4_open_recover(opendata, state);
 +      write_seqlock(&state->seqlock);
 +      nfs4_stateid_copy(&state->stateid, &state->open_stateid);
 +      write_sequnlock(&state->seqlock);
 +      clear_bit(NFS_DELEGATED_STATE, &state->flags);
 +      switch (type & (FMODE_READ|FMODE_WRITE)) {
 +      case FMODE_READ|FMODE_WRITE:
 +      case FMODE_WRITE:
 +              err = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE);
 +              if (err)
 +                      break;
 +              err = nfs4_open_recover_helper(opendata, FMODE_WRITE);
 +              if (err)
 +                      break;
 +      case FMODE_READ:
 +              err = nfs4_open_recover_helper(opendata, FMODE_READ);
 +      }
        nfs4_opendata_put(opendata);
        return nfs4_handle_delegation_recall_error(server, state, stateid, err);
  }
@@@ -1945,8 -1850,6 +1945,8 @@@ static int _nfs4_proc_open_confirm(stru
        data->rpc_done = 0;
        data->rpc_status = 0;
        data->timestamp = jiffies;
 +      if (data->is_recover)
 +              nfs4_set_sequence_privileged(&data->c_arg.seq_args);
        task = rpc_run_task(&task_setup_data);
        if (IS_ERR(task))
                return PTR_ERR(task);
        return err;
  }
  
 +static bool
 +nfs4_wait_on_layoutreturn(struct inode *inode, struct rpc_task *task)
 +{
 +      if (inode == NULL || !nfs_have_layout(inode))
 +              return false;
 +
 +      return pnfs_wait_on_layoutreturn(inode, task);
 +}
 +
  struct nfs4_closedata {
        struct inode *inode;
        struct nfs4_state *state;
@@@ -2869,11 -2763,6 +2869,11 @@@ static void nfs4_close_prepare(struct r
                goto out_no_action;
        }
  
 +      if (nfs4_wait_on_layoutreturn(inode, task)) {
 +              nfs_release_seqid(calldata->arg.seqid);
 +              goto out_wait;
 +      }
 +
        if (calldata->arg.fmode == 0)
                task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
        if (calldata->roc)
@@@ -4603,7 -4492,7 +4603,7 @@@ static inline int nfs4_server_supports_
  #define NFS4ACL_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE)
  
  static int buf_to_pages_noslab(const void *buf, size_t buflen,
 -              struct page **pages, unsigned int *pgbase)
 +              struct page **pages)
  {
        struct page *newpage, **spages;
        int rc = 0;
@@@ -4747,6 -4636,7 +4747,6 @@@ static ssize_t __nfs4_get_acl_uncached(
                goto out_free;
  
        args.acl_len = npages * PAGE_SIZE;
 -      args.acl_pgbase = 0;
  
        dprintk("%s  buf %p buflen %zu npages %d args.acl_len %zu\n",
                __func__, buf, buflen, npages, args.acl_len);
@@@ -4838,7 -4728,7 +4838,7 @@@ static int __nfs4_proc_set_acl(struct i
                return -EOPNOTSUPP;
        if (npages > ARRAY_SIZE(pages))
                return -ERANGE;
 -      i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
 +      i = buf_to_pages_noslab(buf, buflen, arg.acl_pages);
        if (i < 0)
                return i;
        nfs4_inode_return_delegation(inode);
@@@ -5027,6 -4917,79 +5027,6 @@@ out
  #endif        /* CONFIG_NFS_V4_SECURITY_LABEL */
  
  
 -static int
 -nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
 -                      struct nfs4_state *state, long *timeout)
 -{
 -      struct nfs_client *clp = server->nfs_client;
 -
 -      if (task->tk_status >= 0)
 -              return 0;
 -      switch(task->tk_status) {
 -              case -NFS4ERR_DELEG_REVOKED:
 -              case -NFS4ERR_ADMIN_REVOKED:
 -              case -NFS4ERR_BAD_STATEID:
 -              case -NFS4ERR_OPENMODE:
 -                      if (state == NULL)
 -                              break;
 -                      if (nfs4_schedule_stateid_recovery(server, state) < 0)
 -                              goto recovery_failed;
 -                      goto wait_on_recovery;
 -              case -NFS4ERR_EXPIRED:
 -                      if (state != NULL) {
 -                              if (nfs4_schedule_stateid_recovery(server, state) < 0)
 -                                      goto recovery_failed;
 -                      }
 -              case -NFS4ERR_STALE_STATEID:
 -              case -NFS4ERR_STALE_CLIENTID:
 -                      nfs4_schedule_lease_recovery(clp);
 -                      goto wait_on_recovery;
 -              case -NFS4ERR_MOVED:
 -                      if (nfs4_schedule_migration_recovery(server) < 0)
 -                              goto recovery_failed;
 -                      goto wait_on_recovery;
 -              case -NFS4ERR_LEASE_MOVED:
 -                      nfs4_schedule_lease_moved_recovery(clp);
 -                      goto wait_on_recovery;
 -#if defined(CONFIG_NFS_V4_1)
 -              case -NFS4ERR_BADSESSION:
 -              case -NFS4ERR_BADSLOT:
 -              case -NFS4ERR_BAD_HIGH_SLOT:
 -              case -NFS4ERR_DEADSESSION:
 -              case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
 -              case -NFS4ERR_SEQ_FALSE_RETRY:
 -              case -NFS4ERR_SEQ_MISORDERED:
 -                      dprintk("%s ERROR %d, Reset session\n", __func__,
 -                              task->tk_status);
 -                      nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
 -                      goto wait_on_recovery;
 -#endif /* CONFIG_NFS_V4_1 */
 -              case -NFS4ERR_DELAY:
 -                      nfs_inc_server_stats(server, NFSIOS_DELAY);
 -                      rpc_delay(task, nfs4_update_delay(timeout));
 -                      goto restart_call;
 -              case -NFS4ERR_GRACE:
 -                      rpc_delay(task, NFS4_POLL_RETRY_MAX);
 -              case -NFS4ERR_RETRY_UNCACHED_REP:
 -              case -NFS4ERR_OLD_STATEID:
 -                      goto restart_call;
 -      }
 -      task->tk_status = nfs4_map_errors(task->tk_status);
 -      return 0;
 -recovery_failed:
 -      task->tk_status = -EIO;
 -      return 0;
 -wait_on_recovery:
 -      rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
 -      if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
 -              rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
 -      if (test_bit(NFS_MIG_FAILED, &server->mig_status))
 -              goto recovery_failed;
 -restart_call:
 -      task->tk_status = 0;
 -      return -EAGAIN;
 -}
 -
  static void nfs4_init_boot_verifier(const struct nfs_client *clp,
                                    nfs4_verifier *bootverf)
  {
@@@ -5345,9 -5308,6 +5345,9 @@@ static void nfs4_delegreturn_prepare(st
  
        d_data = (struct nfs4_delegreturndata *)data;
  
 +      if (nfs4_wait_on_layoutreturn(d_data->inode, task))
 +              return;
 +
        if (d_data->roc)
                pnfs_roc_get_barrier(d_data->inode, &d_data->roc_barrier);
  
@@@ -5512,18 -5472,7 +5512,7 @@@ static int nfs4_proc_getlk(struct nfs4_
  
  static int do_vfs_lock(struct inode *inode, struct file_lock *fl)
  {
-       int res = 0;
-       switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) {
-               case FL_POSIX:
-                       res = posix_lock_inode_wait(inode, fl);
-                       break;
-               case FL_FLOCK:
-                       res = flock_lock_inode_wait(inode, fl);
-                       break;
-               default:
-                       BUG();
-       }
-       return res;
+       return locks_lock_inode_wait(inode, fl);
  }
  
  struct nfs4_unlockdata {
        struct nfs4_lock_state *lsp;
        struct nfs_open_context *ctx;
        struct file_lock fl;
 -      const struct nfs_server *server;
 +      struct nfs_server *server;
        unsigned long timestamp;
  };
  
@@@ -7840,46 -7789,39 +7829,46 @@@ static void nfs4_layoutget_done(struct 
                        dprintk("%s: NFS4ERR_RECALLCONFLICT waiting %lu\n",
                                __func__, delay);
                        rpc_delay(task, delay);
 -                      task->tk_status = 0;
 -                      rpc_restart_call_prepare(task);
 -                      goto out; /* Do not call nfs4_async_handle_error() */
 +                      /* Do not call nfs4_async_handle_error() */
 +                      goto out_restart;
                }
                break;
        case -NFS4ERR_EXPIRED:
        case -NFS4ERR_BAD_STATEID:
                spin_lock(&inode->i_lock);
 -              lo = NFS_I(inode)->layout;
 -              if (!lo || list_empty(&lo->plh_segs)) {
 +              if (nfs4_stateid_match(&lgp->args.stateid,
 +                                      &lgp->args.ctx->state->stateid)) {
                        spin_unlock(&inode->i_lock);
                        /* If the open stateid was bad, then recover it. */
                        state = lgp->args.ctx->state;
 -              } else {
 +                      break;
 +              }
 +              lo = NFS_I(inode)->layout;
 +              if (lo && nfs4_stateid_match(&lgp->args.stateid,
 +                                      &lo->plh_stateid)) {
                        LIST_HEAD(head);
  
                        /*
                         * Mark the bad layout state as invalid, then retry
                         * with the current stateid.
                         */
 +                      set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
                        pnfs_mark_matching_lsegs_invalid(lo, &head, NULL);
                        spin_unlock(&inode->i_lock);
                        pnfs_free_lseg_list(&head);
 -      
 -                      task->tk_status = 0;
 -                      rpc_restart_call_prepare(task);
 -              }
 +              } else
 +                      spin_unlock(&inode->i_lock);
 +              goto out_restart;
        }
        if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN)
 -              rpc_restart_call_prepare(task);
 +              goto out_restart;
  out:
        dprintk("<-- %s\n", __func__);
        return;
 +out_restart:
 +      task->tk_status = 0;
 +      rpc_restart_call_prepare(task);
 +      return;
  out_overflow:
        task->tk_status = -EOVERFLOW;
        goto out;
@@@ -8728,8 -8670,7 +8717,8 @@@ static const struct nfs4_minor_version_
                | NFS_CAP_ALLOCATE
                | NFS_CAP_DEALLOCATE
                | NFS_CAP_SEEK
 -              | NFS_CAP_LAYOUTSTATS,
 +              | NFS_CAP_LAYOUTSTATS
 +              | NFS_CAP_CLONE,
        .init_client = nfs41_init_client,
        .shutdown_client = nfs41_shutdown_client,
        .match_stateid = nfs41_match_stateid,
diff --combined include/linux/fs.h
index c0df75909dd4189dd6e88360f29fb11b1935975a,49749688156d91a12f68ab78ebc29685fa59ce88..f2325998cd20cd551d445072bbcd0fb15c17732c
@@@ -1042,7 -1042,6 +1042,7 @@@ extern int fcntl_setlease(unsigned int 
  extern int fcntl_getlease(struct file *filp);
  
  /* fs/locks.c */
 +extern struct srcu_notifier_head      lease_notifier_chain;
  void locks_free_lock_context(struct file_lock_context *ctx);
  void locks_free_lock(struct file_lock *fl);
  extern void locks_init_lock(struct file_lock *);
@@@ -1054,12 -1053,11 +1054,11 @@@ extern void locks_remove_file(struct fi
  extern void locks_release_private(struct file_lock *);
  extern void posix_test_lock(struct file *, struct file_lock *);
  extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *);
- extern int posix_lock_inode_wait(struct inode *, struct file_lock *);
  extern int posix_unblock_lock(struct file_lock *);
  extern int vfs_test_lock(struct file *, struct file_lock *);
  extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *);
  extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
- extern int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl);
+ extern int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl);
  extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type);
  extern void lease_get_mtime(struct inode *, struct timespec *time);
  extern int generic_setlease(struct file *, long, struct file_lock **, void **priv);
@@@ -1145,12 -1143,6 +1144,6 @@@ static inline int posix_lock_file(struc
        return -ENOLCK;
  }
  
- static inline int posix_lock_inode_wait(struct inode *inode,
-                                       struct file_lock *fl)
- {
-       return -ENOLCK;
- }
  static inline int posix_unblock_lock(struct file_lock *waiter)
  {
        return -ENOENT;
@@@ -1172,8 -1164,7 +1165,7 @@@ static inline int vfs_cancel_lock(struc
        return 0;
  }
  
- static inline int flock_lock_inode_wait(struct inode *inode,
-                                       struct file_lock *request)
+ static inline int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl)
  {
        return -ENOLCK;
  }
@@@ -1216,14 -1207,9 +1208,9 @@@ static inline struct inode *file_inode(
        return f->f_inode;
  }
  
- static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl)
- {
-       return posix_lock_inode_wait(file_inode(filp), fl);
- }
- static inline int flock_lock_file_wait(struct file *filp, struct file_lock *fl)
+ static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)
  {
-       return flock_lock_inode_wait(file_inode(filp), fl);
+       return locks_lock_inode_wait(file_inode(filp), fl);
  }
  
  struct fasync_struct {