Merge remote-tracking branch 'file-locks/linux-next'
[karo-tx-linux.git] / fs / nfs / nfs4proc.c
1 /*
2  *  fs/nfs/nfs4proc.c
3  *
4  *  Client-side procedure declarations for NFSv4.
5  *
6  *  Copyright (c) 2002 The Regents of the University of Michigan.
7  *  All rights reserved.
8  *
9  *  Kendrick Smith <kmsmith@umich.edu>
10  *  Andy Adamson   <andros@umich.edu>
11  *
12  *  Redistribution and use in source and binary forms, with or without
13  *  modification, are permitted provided that the following conditions
14  *  are met:
15  *
16  *  1. Redistributions of source code must retain the above copyright
17  *     notice, this list of conditions and the following disclaimer.
18  *  2. Redistributions in binary form must reproduce the above copyright
19  *     notice, this list of conditions and the following disclaimer in the
20  *     documentation and/or other materials provided with the distribution.
21  *  3. Neither the name of the University nor the names of its
22  *     contributors may be used to endorse or promote products derived
23  *     from this software without specific prior written permission.
24  *
25  *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
26  *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
27  *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
28  *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
32  *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33  *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34  *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35  *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36  */
37
38 #include <linux/mm.h>
39 #include <linux/delay.h>
40 #include <linux/errno.h>
41 #include <linux/file.h>
42 #include <linux/string.h>
43 #include <linux/ratelimit.h>
44 #include <linux/printk.h>
45 #include <linux/slab.h>
46 #include <linux/sunrpc/clnt.h>
47 #include <linux/nfs.h>
48 #include <linux/nfs4.h>
49 #include <linux/nfs_fs.h>
50 #include <linux/nfs_page.h>
51 #include <linux/nfs_mount.h>
52 #include <linux/namei.h>
53 #include <linux/mount.h>
54 #include <linux/module.h>
55 #include <linux/xattr.h>
56 #include <linux/utsname.h>
57 #include <linux/freezer.h>
58
59 #include "nfs4_fs.h"
60 #include "delegation.h"
61 #include "internal.h"
62 #include "iostat.h"
63 #include "callback.h"
64 #include "pnfs.h"
65 #include "netns.h"
66 #include "nfs4idmap.h"
67 #include "nfs4session.h"
68 #include "fscache.h"
69
70 #include "nfs4trace.h"
71
72 #define NFSDBG_FACILITY         NFSDBG_PROC
73
74 #define NFS4_POLL_RETRY_MIN     (HZ/10)
75 #define NFS4_POLL_RETRY_MAX     (15*HZ)
76
77 struct nfs4_opendata;
78 static int _nfs4_proc_open(struct nfs4_opendata *data);
79 static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
80 static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
81 static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
82 static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label);
83 static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label);
84 static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
85                             struct nfs_fattr *fattr, struct iattr *sattr,
86                             struct nfs4_state *state, struct nfs4_label *ilabel,
87                             struct nfs4_label *olabel);
88 #ifdef CONFIG_NFS_V4_1
89 static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *,
90                 struct rpc_cred *);
91 static int nfs41_free_stateid(struct nfs_server *, nfs4_stateid *,
92                 struct rpc_cred *);
93 #endif
94
95 #ifdef CONFIG_NFS_V4_SECURITY_LABEL
96 static inline struct nfs4_label *
97 nfs4_label_init_security(struct inode *dir, struct dentry *dentry,
98         struct iattr *sattr, struct nfs4_label *label)
99 {
100         int err;
101
102         if (label == NULL)
103                 return NULL;
104
105         if (nfs_server_capable(dir, NFS_CAP_SECURITY_LABEL) == 0)
106                 return NULL;
107
108         err = security_dentry_init_security(dentry, sattr->ia_mode,
109                                 &dentry->d_name, (void **)&label->label, &label->len);
110         if (err == 0)
111                 return label;
112
113         return NULL;
114 }
115 static inline void
116 nfs4_label_release_security(struct nfs4_label *label)
117 {
118         if (label)
119                 security_release_secctx(label->label, label->len);
120 }
121 static inline u32 *nfs4_bitmask(struct nfs_server *server, struct nfs4_label *label)
122 {
123         if (label)
124                 return server->attr_bitmask;
125
126         return server->attr_bitmask_nl;
127 }
128 #else
129 static inline struct nfs4_label *
130 nfs4_label_init_security(struct inode *dir, struct dentry *dentry,
131         struct iattr *sattr, struct nfs4_label *l)
132 { return NULL; }
133 static inline void
134 nfs4_label_release_security(struct nfs4_label *label)
135 { return; }
136 static inline u32 *
137 nfs4_bitmask(struct nfs_server *server, struct nfs4_label *label)
138 { return server->attr_bitmask; }
139 #endif
140
141 /* Prevent leaks of NFSv4 errors into userland */
142 static int nfs4_map_errors(int err)
143 {
144         if (err >= -1000)
145                 return err;
146         switch (err) {
147         case -NFS4ERR_RESOURCE:
148         case -NFS4ERR_LAYOUTTRYLATER:
149         case -NFS4ERR_RECALLCONFLICT:
150                 return -EREMOTEIO;
151         case -NFS4ERR_WRONGSEC:
152         case -NFS4ERR_WRONG_CRED:
153                 return -EPERM;
154         case -NFS4ERR_BADOWNER:
155         case -NFS4ERR_BADNAME:
156                 return -EINVAL;
157         case -NFS4ERR_SHARE_DENIED:
158                 return -EACCES;
159         case -NFS4ERR_MINOR_VERS_MISMATCH:
160                 return -EPROTONOSUPPORT;
161         case -NFS4ERR_FILE_OPEN:
162                 return -EBUSY;
163         default:
164                 dprintk("%s could not handle NFSv4 error %d\n",
165                                 __func__, -err);
166                 break;
167         }
168         return -EIO;
169 }
170
171 /*
172  * This is our standard bitmap for GETATTR requests.
173  */
174 const u32 nfs4_fattr_bitmap[3] = {
175         FATTR4_WORD0_TYPE
176         | FATTR4_WORD0_CHANGE
177         | FATTR4_WORD0_SIZE
178         | FATTR4_WORD0_FSID
179         | FATTR4_WORD0_FILEID,
180         FATTR4_WORD1_MODE
181         | FATTR4_WORD1_NUMLINKS
182         | FATTR4_WORD1_OWNER
183         | FATTR4_WORD1_OWNER_GROUP
184         | FATTR4_WORD1_RAWDEV
185         | FATTR4_WORD1_SPACE_USED
186         | FATTR4_WORD1_TIME_ACCESS
187         | FATTR4_WORD1_TIME_METADATA
188         | FATTR4_WORD1_TIME_MODIFY
189         | FATTR4_WORD1_MOUNTED_ON_FILEID,
190 #ifdef CONFIG_NFS_V4_SECURITY_LABEL
191         FATTR4_WORD2_SECURITY_LABEL
192 #endif
193 };
194
195 static const u32 nfs4_pnfs_open_bitmap[3] = {
196         FATTR4_WORD0_TYPE
197         | FATTR4_WORD0_CHANGE
198         | FATTR4_WORD0_SIZE
199         | FATTR4_WORD0_FSID
200         | FATTR4_WORD0_FILEID,
201         FATTR4_WORD1_MODE
202         | FATTR4_WORD1_NUMLINKS
203         | FATTR4_WORD1_OWNER
204         | FATTR4_WORD1_OWNER_GROUP
205         | FATTR4_WORD1_RAWDEV
206         | FATTR4_WORD1_SPACE_USED
207         | FATTR4_WORD1_TIME_ACCESS
208         | FATTR4_WORD1_TIME_METADATA
209         | FATTR4_WORD1_TIME_MODIFY,
210         FATTR4_WORD2_MDSTHRESHOLD
211 };
212
213 static const u32 nfs4_open_noattr_bitmap[3] = {
214         FATTR4_WORD0_TYPE
215         | FATTR4_WORD0_CHANGE
216         | FATTR4_WORD0_FILEID,
217 };
218
219 const u32 nfs4_statfs_bitmap[3] = {
220         FATTR4_WORD0_FILES_AVAIL
221         | FATTR4_WORD0_FILES_FREE
222         | FATTR4_WORD0_FILES_TOTAL,
223         FATTR4_WORD1_SPACE_AVAIL
224         | FATTR4_WORD1_SPACE_FREE
225         | FATTR4_WORD1_SPACE_TOTAL
226 };
227
228 const u32 nfs4_pathconf_bitmap[3] = {
229         FATTR4_WORD0_MAXLINK
230         | FATTR4_WORD0_MAXNAME,
231         0
232 };
233
234 const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE
235                         | FATTR4_WORD0_MAXREAD
236                         | FATTR4_WORD0_MAXWRITE
237                         | FATTR4_WORD0_LEASE_TIME,
238                         FATTR4_WORD1_TIME_DELTA
239                         | FATTR4_WORD1_FS_LAYOUT_TYPES,
240                         FATTR4_WORD2_LAYOUT_BLKSIZE
241                         | FATTR4_WORD2_CLONE_BLKSIZE
242 };
243
244 const u32 nfs4_fs_locations_bitmap[3] = {
245         FATTR4_WORD0_TYPE
246         | FATTR4_WORD0_CHANGE
247         | FATTR4_WORD0_SIZE
248         | FATTR4_WORD0_FSID
249         | FATTR4_WORD0_FILEID
250         | FATTR4_WORD0_FS_LOCATIONS,
251         FATTR4_WORD1_MODE
252         | FATTR4_WORD1_NUMLINKS
253         | FATTR4_WORD1_OWNER
254         | FATTR4_WORD1_OWNER_GROUP
255         | FATTR4_WORD1_RAWDEV
256         | FATTR4_WORD1_SPACE_USED
257         | FATTR4_WORD1_TIME_ACCESS
258         | FATTR4_WORD1_TIME_METADATA
259         | FATTR4_WORD1_TIME_MODIFY
260         | FATTR4_WORD1_MOUNTED_ON_FILEID,
261 };
262
263 static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry,
264                 struct nfs4_readdir_arg *readdir)
265 {
266         __be32 *start, *p;
267
268         if (cookie > 2) {
269                 readdir->cookie = cookie;
270                 memcpy(&readdir->verifier, verifier, sizeof(readdir->verifier));
271                 return;
272         }
273
274         readdir->cookie = 0;
275         memset(&readdir->verifier, 0, sizeof(readdir->verifier));
276         if (cookie == 2)
277                 return;
278         
279         /*
280          * NFSv4 servers do not return entries for '.' and '..'
281          * Therefore, we fake these entries here.  We let '.'
282          * have cookie 0 and '..' have cookie 1.  Note that
283          * when talking to the server, we always send cookie 0
284          * instead of 1 or 2.
285          */
286         start = p = kmap_atomic(*readdir->pages);
287         
288         if (cookie == 0) {
289                 *p++ = xdr_one;                                  /* next */
290                 *p++ = xdr_zero;                   /* cookie, first word */
291                 *p++ = xdr_one;                   /* cookie, second word */
292                 *p++ = xdr_one;                             /* entry len */
293                 memcpy(p, ".\0\0\0", 4);                        /* entry */
294                 p++;
295                 *p++ = xdr_one;                         /* bitmap length */
296                 *p++ = htonl(FATTR4_WORD0_FILEID);             /* bitmap */
297                 *p++ = htonl(8);              /* attribute buffer length */
298                 p = xdr_encode_hyper(p, NFS_FILEID(d_inode(dentry)));
299         }
300         
301         *p++ = xdr_one;                                  /* next */
302         *p++ = xdr_zero;                   /* cookie, first word */
303         *p++ = xdr_two;                   /* cookie, second word */
304         *p++ = xdr_two;                             /* entry len */
305         memcpy(p, "..\0\0", 4);                         /* entry */
306         p++;
307         *p++ = xdr_one;                         /* bitmap length */
308         *p++ = htonl(FATTR4_WORD0_FILEID);             /* bitmap */
309         *p++ = htonl(8);              /* attribute buffer length */
310         p = xdr_encode_hyper(p, NFS_FILEID(d_inode(dentry->d_parent)));
311
312         readdir->pgbase = (char *)p - (char *)start;
313         readdir->count -= readdir->pgbase;
314         kunmap_atomic(start);
315 }
316
317 static long nfs4_update_delay(long *timeout)
318 {
319         long ret;
320         if (!timeout)
321                 return NFS4_POLL_RETRY_MAX;
322         if (*timeout <= 0)
323                 *timeout = NFS4_POLL_RETRY_MIN;
324         if (*timeout > NFS4_POLL_RETRY_MAX)
325                 *timeout = NFS4_POLL_RETRY_MAX;
326         ret = *timeout;
327         *timeout <<= 1;
328         return ret;
329 }
330
331 static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
332 {
333         int res = 0;
334
335         might_sleep();
336
337         freezable_schedule_timeout_killable_unsafe(
338                 nfs4_update_delay(timeout));
339         if (fatal_signal_pending(current))
340                 res = -ERESTARTSYS;
341         return res;
342 }
343
344 /* This is the error handling routine for processes that are allowed
345  * to sleep.
346  */
347 static int nfs4_do_handle_exception(struct nfs_server *server,
348                 int errorcode, struct nfs4_exception *exception)
349 {
350         struct nfs_client *clp = server->nfs_client;
351         struct nfs4_state *state = exception->state;
352         struct inode *inode = exception->inode;
353         int ret = errorcode;
354
355         exception->delay = 0;
356         exception->recovering = 0;
357         exception->retry = 0;
358         switch(errorcode) {
359                 case 0:
360                         return 0;
361                 case -NFS4ERR_OPENMODE:
362                 case -NFS4ERR_DELEG_REVOKED:
363                 case -NFS4ERR_ADMIN_REVOKED:
364                 case -NFS4ERR_BAD_STATEID:
365                         if (inode && nfs_async_inode_return_delegation(inode,
366                                                 NULL) == 0)
367                                 goto wait_on_recovery;
368                         if (state == NULL)
369                                 break;
370                         ret = nfs4_schedule_stateid_recovery(server, state);
371                         if (ret < 0)
372                                 break;
373                         goto wait_on_recovery;
374                 case -NFS4ERR_EXPIRED:
375                         if (state != NULL) {
376                                 ret = nfs4_schedule_stateid_recovery(server, state);
377                                 if (ret < 0)
378                                         break;
379                         }
380                 case -NFS4ERR_STALE_STATEID:
381                 case -NFS4ERR_STALE_CLIENTID:
382                         nfs4_schedule_lease_recovery(clp);
383                         goto wait_on_recovery;
384                 case -NFS4ERR_MOVED:
385                         ret = nfs4_schedule_migration_recovery(server);
386                         if (ret < 0)
387                                 break;
388                         goto wait_on_recovery;
389                 case -NFS4ERR_LEASE_MOVED:
390                         nfs4_schedule_lease_moved_recovery(clp);
391                         goto wait_on_recovery;
392 #if defined(CONFIG_NFS_V4_1)
393                 case -NFS4ERR_BADSESSION:
394                 case -NFS4ERR_BADSLOT:
395                 case -NFS4ERR_BAD_HIGH_SLOT:
396                 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
397                 case -NFS4ERR_DEADSESSION:
398                 case -NFS4ERR_SEQ_FALSE_RETRY:
399                 case -NFS4ERR_SEQ_MISORDERED:
400                         dprintk("%s ERROR: %d Reset session\n", __func__,
401                                 errorcode);
402                         nfs4_schedule_session_recovery(clp->cl_session, errorcode);
403                         goto wait_on_recovery;
404 #endif /* defined(CONFIG_NFS_V4_1) */
405                 case -NFS4ERR_FILE_OPEN:
406                         if (exception->timeout > HZ) {
407                                 /* We have retried a decent amount, time to
408                                  * fail
409                                  */
410                                 ret = -EBUSY;
411                                 break;
412                         }
413                 case -NFS4ERR_DELAY:
414                         nfs_inc_server_stats(server, NFSIOS_DELAY);
415                 case -NFS4ERR_GRACE:
416                         exception->delay = 1;
417                         return 0;
418
419                 case -NFS4ERR_RETRY_UNCACHED_REP:
420                 case -NFS4ERR_OLD_STATEID:
421                         exception->retry = 1;
422                         break;
423                 case -NFS4ERR_BADOWNER:
424                         /* The following works around a Linux server bug! */
425                 case -NFS4ERR_BADNAME:
426                         if (server->caps & NFS_CAP_UIDGID_NOMAP) {
427                                 server->caps &= ~NFS_CAP_UIDGID_NOMAP;
428                                 exception->retry = 1;
429                                 printk(KERN_WARNING "NFS: v4 server %s "
430                                                 "does not accept raw "
431                                                 "uid/gids. "
432                                                 "Reenabling the idmapper.\n",
433                                                 server->nfs_client->cl_hostname);
434                         }
435         }
436         /* We failed to handle the error */
437         return nfs4_map_errors(ret);
438 wait_on_recovery:
439         exception->recovering = 1;
440         return 0;
441 }
442
443 /* This is the error handling routine for processes that are allowed
444  * to sleep.
445  */
446 int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
447 {
448         struct nfs_client *clp = server->nfs_client;
449         int ret;
450
451         ret = nfs4_do_handle_exception(server, errorcode, exception);
452         if (exception->delay) {
453                 ret = nfs4_delay(server->client, &exception->timeout);
454                 goto out_retry;
455         }
456         if (exception->recovering) {
457                 ret = nfs4_wait_clnt_recover(clp);
458                 if (test_bit(NFS_MIG_FAILED, &server->mig_status))
459                         return -EIO;
460                 goto out_retry;
461         }
462         return ret;
463 out_retry:
464         if (ret == 0)
465                 exception->retry = 1;
466         return ret;
467 }
468
469 static int
470 nfs4_async_handle_exception(struct rpc_task *task, struct nfs_server *server,
471                 int errorcode, struct nfs4_exception *exception)
472 {
473         struct nfs_client *clp = server->nfs_client;
474         int ret;
475
476         ret = nfs4_do_handle_exception(server, errorcode, exception);
477         if (exception->delay) {
478                 rpc_delay(task, nfs4_update_delay(&exception->timeout));
479                 goto out_retry;
480         }
481         if (exception->recovering) {
482                 rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
483                 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
484                         rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
485                 goto out_retry;
486         }
487         if (test_bit(NFS_MIG_FAILED, &server->mig_status))
488                 ret = -EIO;
489         return ret;
490 out_retry:
491         if (ret == 0)
492                 exception->retry = 1;
493         return ret;
494 }
495
496 static int
497 nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server,
498                         struct nfs4_state *state, long *timeout)
499 {
500         struct nfs4_exception exception = {
501                 .state = state,
502         };
503
504         if (task->tk_status >= 0)
505                 return 0;
506         if (timeout)
507                 exception.timeout = *timeout;
508         task->tk_status = nfs4_async_handle_exception(task, server,
509                         task->tk_status,
510                         &exception);
511         if (exception.delay && timeout)
512                 *timeout = exception.timeout;
513         if (exception.retry)
514                 return -EAGAIN;
515         return 0;
516 }
517
518 /*
519  * Return 'true' if 'clp' is using an rpc_client that is integrity protected
520  * or 'false' otherwise.
521  */
522 static bool _nfs4_is_integrity_protected(struct nfs_client *clp)
523 {
524         rpc_authflavor_t flavor = clp->cl_rpcclient->cl_auth->au_flavor;
525
526         if (flavor == RPC_AUTH_GSS_KRB5I ||
527             flavor == RPC_AUTH_GSS_KRB5P)
528                 return true;
529
530         return false;
531 }
532
533 static void do_renew_lease(struct nfs_client *clp, unsigned long timestamp)
534 {
535         spin_lock(&clp->cl_lock);
536         if (time_before(clp->cl_last_renewal,timestamp))
537                 clp->cl_last_renewal = timestamp;
538         spin_unlock(&clp->cl_lock);
539 }
540
541 static void renew_lease(const struct nfs_server *server, unsigned long timestamp)
542 {
543         struct nfs_client *clp = server->nfs_client;
544
545         if (!nfs4_has_session(clp))
546                 do_renew_lease(clp, timestamp);
547 }
548
549 struct nfs4_call_sync_data {
550         const struct nfs_server *seq_server;
551         struct nfs4_sequence_args *seq_args;
552         struct nfs4_sequence_res *seq_res;
553 };
554
555 void nfs4_init_sequence(struct nfs4_sequence_args *args,
556                         struct nfs4_sequence_res *res, int cache_reply)
557 {
558         args->sa_slot = NULL;
559         args->sa_cache_this = cache_reply;
560         args->sa_privileged = 0;
561
562         res->sr_slot = NULL;
563 }
564
565 static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args)
566 {
567         args->sa_privileged = 1;
568 }
569
570 int nfs40_setup_sequence(struct nfs4_slot_table *tbl,
571                          struct nfs4_sequence_args *args,
572                          struct nfs4_sequence_res *res,
573                          struct rpc_task *task)
574 {
575         struct nfs4_slot *slot;
576
577         /* slot already allocated? */
578         if (res->sr_slot != NULL)
579                 goto out_start;
580
581         spin_lock(&tbl->slot_tbl_lock);
582         if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
583                 goto out_sleep;
584
585         slot = nfs4_alloc_slot(tbl);
586         if (IS_ERR(slot)) {
587                 if (slot == ERR_PTR(-ENOMEM))
588                         task->tk_timeout = HZ >> 2;
589                 goto out_sleep;
590         }
591         spin_unlock(&tbl->slot_tbl_lock);
592
593         args->sa_slot = slot;
594         res->sr_slot = slot;
595
596 out_start:
597         rpc_call_start(task);
598         return 0;
599
600 out_sleep:
601         if (args->sa_privileged)
602                 rpc_sleep_on_priority(&tbl->slot_tbl_waitq, task,
603                                 NULL, RPC_PRIORITY_PRIVILEGED);
604         else
605                 rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
606         spin_unlock(&tbl->slot_tbl_lock);
607         return -EAGAIN;
608 }
609 EXPORT_SYMBOL_GPL(nfs40_setup_sequence);
610
611 static int nfs40_sequence_done(struct rpc_task *task,
612                                struct nfs4_sequence_res *res)
613 {
614         struct nfs4_slot *slot = res->sr_slot;
615         struct nfs4_slot_table *tbl;
616
617         if (slot == NULL)
618                 goto out;
619
620         tbl = slot->table;
621         spin_lock(&tbl->slot_tbl_lock);
622         if (!nfs41_wake_and_assign_slot(tbl, slot))
623                 nfs4_free_slot(tbl, slot);
624         spin_unlock(&tbl->slot_tbl_lock);
625
626         res->sr_slot = NULL;
627 out:
628         return 1;
629 }
630
631 #if defined(CONFIG_NFS_V4_1)
632
633 static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
634 {
635         struct nfs4_session *session;
636         struct nfs4_slot_table *tbl;
637         struct nfs4_slot *slot = res->sr_slot;
638         bool send_new_highest_used_slotid = false;
639
640         tbl = slot->table;
641         session = tbl->session;
642
643         spin_lock(&tbl->slot_tbl_lock);
644         /* Be nice to the server: try to ensure that the last transmitted
645          * value for highest_user_slotid <= target_highest_slotid
646          */
647         if (tbl->highest_used_slotid > tbl->target_highest_slotid)
648                 send_new_highest_used_slotid = true;
649
650         if (nfs41_wake_and_assign_slot(tbl, slot)) {
651                 send_new_highest_used_slotid = false;
652                 goto out_unlock;
653         }
654         nfs4_free_slot(tbl, slot);
655
656         if (tbl->highest_used_slotid != NFS4_NO_SLOT)
657                 send_new_highest_used_slotid = false;
658 out_unlock:
659         spin_unlock(&tbl->slot_tbl_lock);
660         res->sr_slot = NULL;
661         if (send_new_highest_used_slotid)
662                 nfs41_notify_server(session->clp);
663 }
664
665 int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
666 {
667         struct nfs4_session *session;
668         struct nfs4_slot *slot = res->sr_slot;
669         struct nfs_client *clp;
670         bool interrupted = false;
671         int ret = 1;
672
673         if (slot == NULL)
674                 goto out_noaction;
675         /* don't increment the sequence number if the task wasn't sent */
676         if (!RPC_WAS_SENT(task))
677                 goto out;
678
679         session = slot->table->session;
680
681         if (slot->interrupted) {
682                 slot->interrupted = 0;
683                 interrupted = true;
684         }
685
686         trace_nfs4_sequence_done(session, res);
687         /* Check the SEQUENCE operation status */
688         switch (res->sr_status) {
689         case 0:
690                 /* Update the slot's sequence and clientid lease timer */
691                 ++slot->seq_nr;
692                 clp = session->clp;
693                 do_renew_lease(clp, res->sr_timestamp);
694                 /* Check sequence flags */
695                 nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
696                 nfs41_update_target_slotid(slot->table, slot, res);
697                 break;
698         case 1:
699                 /*
700                  * sr_status remains 1 if an RPC level error occurred.
701                  * The server may or may not have processed the sequence
702                  * operation..
703                  * Mark the slot as having hosted an interrupted RPC call.
704                  */
705                 slot->interrupted = 1;
706                 goto out;
707         case -NFS4ERR_DELAY:
708                 /* The server detected a resend of the RPC call and
709                  * returned NFS4ERR_DELAY as per Section 2.10.6.2
710                  * of RFC5661.
711                  */
712                 dprintk("%s: slot=%u seq=%u: Operation in progress\n",
713                         __func__,
714                         slot->slot_nr,
715                         slot->seq_nr);
716                 goto out_retry;
717         case -NFS4ERR_BADSLOT:
718                 /*
719                  * The slot id we used was probably retired. Try again
720                  * using a different slot id.
721                  */
722                 goto retry_nowait;
723         case -NFS4ERR_SEQ_MISORDERED:
724                 /*
725                  * Was the last operation on this sequence interrupted?
726                  * If so, retry after bumping the sequence number.
727                  */
728                 if (interrupted) {
729                         ++slot->seq_nr;
730                         goto retry_nowait;
731                 }
732                 /*
733                  * Could this slot have been previously retired?
734                  * If so, then the server may be expecting seq_nr = 1!
735                  */
736                 if (slot->seq_nr != 1) {
737                         slot->seq_nr = 1;
738                         goto retry_nowait;
739                 }
740                 break;
741         case -NFS4ERR_SEQ_FALSE_RETRY:
742                 ++slot->seq_nr;
743                 goto retry_nowait;
744         default:
745                 /* Just update the slot sequence no. */
746                 ++slot->seq_nr;
747         }
748 out:
749         /* The session may be reset by one of the error handlers. */
750         dprintk("%s: Error %d free the slot \n", __func__, res->sr_status);
751         nfs41_sequence_free_slot(res);
752 out_noaction:
753         return ret;
754 retry_nowait:
755         if (rpc_restart_call_prepare(task)) {
756                 task->tk_status = 0;
757                 ret = 0;
758         }
759         goto out;
760 out_retry:
761         if (!rpc_restart_call(task))
762                 goto out;
763         rpc_delay(task, NFS4_POLL_RETRY_MAX);
764         return 0;
765 }
766 EXPORT_SYMBOL_GPL(nfs41_sequence_done);
767
768 int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
769 {
770         if (res->sr_slot == NULL)
771                 return 1;
772         if (!res->sr_slot->table->session)
773                 return nfs40_sequence_done(task, res);
774         return nfs41_sequence_done(task, res);
775 }
776 EXPORT_SYMBOL_GPL(nfs4_sequence_done);
777
778 int nfs41_setup_sequence(struct nfs4_session *session,
779                                 struct nfs4_sequence_args *args,
780                                 struct nfs4_sequence_res *res,
781                                 struct rpc_task *task)
782 {
783         struct nfs4_slot *slot;
784         struct nfs4_slot_table *tbl;
785
786         dprintk("--> %s\n", __func__);
787         /* slot already allocated? */
788         if (res->sr_slot != NULL)
789                 goto out_success;
790
791         tbl = &session->fc_slot_table;
792
793         task->tk_timeout = 0;
794
795         spin_lock(&tbl->slot_tbl_lock);
796         if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state) &&
797             !args->sa_privileged) {
798                 /* The state manager will wait until the slot table is empty */
799                 dprintk("%s session is draining\n", __func__);
800                 goto out_sleep;
801         }
802
803         slot = nfs4_alloc_slot(tbl);
804         if (IS_ERR(slot)) {
805                 /* If out of memory, try again in 1/4 second */
806                 if (slot == ERR_PTR(-ENOMEM))
807                         task->tk_timeout = HZ >> 2;
808                 dprintk("<-- %s: no free slots\n", __func__);
809                 goto out_sleep;
810         }
811         spin_unlock(&tbl->slot_tbl_lock);
812
813         args->sa_slot = slot;
814
815         dprintk("<-- %s slotid=%u seqid=%u\n", __func__,
816                         slot->slot_nr, slot->seq_nr);
817
818         res->sr_slot = slot;
819         res->sr_timestamp = jiffies;
820         res->sr_status_flags = 0;
821         /*
822          * sr_status is only set in decode_sequence, and so will remain
823          * set to 1 if an rpc level failure occurs.
824          */
825         res->sr_status = 1;
826         trace_nfs4_setup_sequence(session, args);
827 out_success:
828         rpc_call_start(task);
829         return 0;
830 out_sleep:
831         /* Privileged tasks are queued with top priority */
832         if (args->sa_privileged)
833                 rpc_sleep_on_priority(&tbl->slot_tbl_waitq, task,
834                                 NULL, RPC_PRIORITY_PRIVILEGED);
835         else
836                 rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
837         spin_unlock(&tbl->slot_tbl_lock);
838         return -EAGAIN;
839 }
840 EXPORT_SYMBOL_GPL(nfs41_setup_sequence);
841
842 static int nfs4_setup_sequence(const struct nfs_server *server,
843                                struct nfs4_sequence_args *args,
844                                struct nfs4_sequence_res *res,
845                                struct rpc_task *task)
846 {
847         struct nfs4_session *session = nfs4_get_session(server);
848         int ret = 0;
849
850         if (!session)
851                 return nfs40_setup_sequence(server->nfs_client->cl_slot_tbl,
852                                             args, res, task);
853
854         dprintk("--> %s clp %p session %p sr_slot %u\n",
855                 __func__, session->clp, session, res->sr_slot ?
856                         res->sr_slot->slot_nr : NFS4_NO_SLOT);
857
858         ret = nfs41_setup_sequence(session, args, res, task);
859
860         dprintk("<-- %s status=%d\n", __func__, ret);
861         return ret;
862 }
863
864 static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata)
865 {
866         struct nfs4_call_sync_data *data = calldata;
867         struct nfs4_session *session = nfs4_get_session(data->seq_server);
868
869         dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server);
870
871         nfs41_setup_sequence(session, data->seq_args, data->seq_res, task);
872 }
873
874 static void nfs41_call_sync_done(struct rpc_task *task, void *calldata)
875 {
876         struct nfs4_call_sync_data *data = calldata;
877
878         nfs41_sequence_done(task, data->seq_res);
879 }
880
881 static const struct rpc_call_ops nfs41_call_sync_ops = {
882         .rpc_call_prepare = nfs41_call_sync_prepare,
883         .rpc_call_done = nfs41_call_sync_done,
884 };
885
886 #else   /* !CONFIG_NFS_V4_1 */
887
888 static int nfs4_setup_sequence(const struct nfs_server *server,
889                                struct nfs4_sequence_args *args,
890                                struct nfs4_sequence_res *res,
891                                struct rpc_task *task)
892 {
893         return nfs40_setup_sequence(server->nfs_client->cl_slot_tbl,
894                                     args, res, task);
895 }
896
897 int nfs4_sequence_done(struct rpc_task *task,
898                        struct nfs4_sequence_res *res)
899 {
900         return nfs40_sequence_done(task, res);
901 }
902 EXPORT_SYMBOL_GPL(nfs4_sequence_done);
903
904 #endif  /* !CONFIG_NFS_V4_1 */
905
906 static void nfs40_call_sync_prepare(struct rpc_task *task, void *calldata)
907 {
908         struct nfs4_call_sync_data *data = calldata;
909         nfs4_setup_sequence(data->seq_server,
910                                 data->seq_args, data->seq_res, task);
911 }
912
913 static void nfs40_call_sync_done(struct rpc_task *task, void *calldata)
914 {
915         struct nfs4_call_sync_data *data = calldata;
916         nfs4_sequence_done(task, data->seq_res);
917 }
918
919 static const struct rpc_call_ops nfs40_call_sync_ops = {
920         .rpc_call_prepare = nfs40_call_sync_prepare,
921         .rpc_call_done = nfs40_call_sync_done,
922 };
923
924 static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
925                                    struct nfs_server *server,
926                                    struct rpc_message *msg,
927                                    struct nfs4_sequence_args *args,
928                                    struct nfs4_sequence_res *res)
929 {
930         int ret;
931         struct rpc_task *task;
932         struct nfs_client *clp = server->nfs_client;
933         struct nfs4_call_sync_data data = {
934                 .seq_server = server,
935                 .seq_args = args,
936                 .seq_res = res,
937         };
938         struct rpc_task_setup task_setup = {
939                 .rpc_client = clnt,
940                 .rpc_message = msg,
941                 .callback_ops = clp->cl_mvops->call_sync_ops,
942                 .callback_data = &data
943         };
944
945         task = rpc_run_task(&task_setup);
946         if (IS_ERR(task))
947                 ret = PTR_ERR(task);
948         else {
949                 ret = task->tk_status;
950                 rpc_put_task(task);
951         }
952         return ret;
953 }
954
955 int nfs4_call_sync(struct rpc_clnt *clnt,
956                    struct nfs_server *server,
957                    struct rpc_message *msg,
958                    struct nfs4_sequence_args *args,
959                    struct nfs4_sequence_res *res,
960                    int cache_reply)
961 {
962         nfs4_init_sequence(args, res, cache_reply);
963         return nfs4_call_sync_sequence(clnt, server, msg, args, res);
964 }
965
966 static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
967 {
968         struct nfs_inode *nfsi = NFS_I(dir);
969
970         spin_lock(&dir->i_lock);
971         nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
972         if (!cinfo->atomic || cinfo->before != dir->i_version)
973                 nfs_force_lookup_revalidate(dir);
974         dir->i_version = cinfo->after;
975         nfsi->attr_gencount = nfs_inc_attr_generation_counter();
976         nfs_fscache_invalidate(dir);
977         spin_unlock(&dir->i_lock);
978 }
979
980 struct nfs4_opendata {
981         struct kref kref;
982         struct nfs_openargs o_arg;
983         struct nfs_openres o_res;
984         struct nfs_open_confirmargs c_arg;
985         struct nfs_open_confirmres c_res;
986         struct nfs4_string owner_name;
987         struct nfs4_string group_name;
988         struct nfs4_label *a_label;
989         struct nfs_fattr f_attr;
990         struct nfs4_label *f_label;
991         struct dentry *dir;
992         struct dentry *dentry;
993         struct nfs4_state_owner *owner;
994         struct nfs4_state *state;
995         struct iattr attrs;
996         unsigned long timestamp;
997         unsigned int rpc_done : 1;
998         unsigned int file_created : 1;
999         unsigned int is_recover : 1;
1000         int rpc_status;
1001         int cancelled;
1002 };
1003
1004 static bool nfs4_clear_cap_atomic_open_v1(struct nfs_server *server,
1005                 int err, struct nfs4_exception *exception)
1006 {
1007         if (err != -EINVAL)
1008                 return false;
1009         if (!(server->caps & NFS_CAP_ATOMIC_OPEN_V1))
1010                 return false;
1011         server->caps &= ~NFS_CAP_ATOMIC_OPEN_V1;
1012         exception->retry = 1;
1013         return true;
1014 }
1015
1016 static u32
1017 nfs4_map_atomic_open_share(struct nfs_server *server,
1018                 fmode_t fmode, int openflags)
1019 {
1020         u32 res = 0;
1021
1022         switch (fmode & (FMODE_READ | FMODE_WRITE)) {
1023         case FMODE_READ:
1024                 res = NFS4_SHARE_ACCESS_READ;
1025                 break;
1026         case FMODE_WRITE:
1027                 res = NFS4_SHARE_ACCESS_WRITE;
1028                 break;
1029         case FMODE_READ|FMODE_WRITE:
1030                 res = NFS4_SHARE_ACCESS_BOTH;
1031         }
1032         if (!(server->caps & NFS_CAP_ATOMIC_OPEN_V1))
1033                 goto out;
1034         /* Want no delegation if we're using O_DIRECT */
1035         if (openflags & O_DIRECT)
1036                 res |= NFS4_SHARE_WANT_NO_DELEG;
1037 out:
1038         return res;
1039 }
1040
1041 static enum open_claim_type4
1042 nfs4_map_atomic_open_claim(struct nfs_server *server,
1043                 enum open_claim_type4 claim)
1044 {
1045         if (server->caps & NFS_CAP_ATOMIC_OPEN_V1)
1046                 return claim;
1047         switch (claim) {
1048         default:
1049                 return claim;
1050         case NFS4_OPEN_CLAIM_FH:
1051                 return NFS4_OPEN_CLAIM_NULL;
1052         case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
1053                 return NFS4_OPEN_CLAIM_DELEGATE_CUR;
1054         case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
1055                 return NFS4_OPEN_CLAIM_DELEGATE_PREV;
1056         }
1057 }
1058
1059 static void nfs4_init_opendata_res(struct nfs4_opendata *p)
1060 {
1061         p->o_res.f_attr = &p->f_attr;
1062         p->o_res.f_label = p->f_label;
1063         p->o_res.seqid = p->o_arg.seqid;
1064         p->c_res.seqid = p->c_arg.seqid;
1065         p->o_res.server = p->o_arg.server;
1066         p->o_res.access_request = p->o_arg.access;
1067         nfs_fattr_init(&p->f_attr);
1068         nfs_fattr_init_names(&p->f_attr, &p->owner_name, &p->group_name);
1069 }
1070
1071 static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
1072                 struct nfs4_state_owner *sp, fmode_t fmode, int flags,
1073                 const struct iattr *attrs,
1074                 struct nfs4_label *label,
1075                 enum open_claim_type4 claim,
1076                 gfp_t gfp_mask)
1077 {
1078         struct dentry *parent = dget_parent(dentry);
1079         struct inode *dir = d_inode(parent);
1080         struct nfs_server *server = NFS_SERVER(dir);
1081         struct nfs_seqid *(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t);
1082         struct nfs4_opendata *p;
1083
1084         p = kzalloc(sizeof(*p), gfp_mask);
1085         if (p == NULL)
1086                 goto err;
1087
1088         p->f_label = nfs4_label_alloc(server, gfp_mask);
1089         if (IS_ERR(p->f_label))
1090                 goto err_free_p;
1091
1092         p->a_label = nfs4_label_alloc(server, gfp_mask);
1093         if (IS_ERR(p->a_label))
1094                 goto err_free_f;
1095
1096         alloc_seqid = server->nfs_client->cl_mvops->alloc_seqid;
1097         p->o_arg.seqid = alloc_seqid(&sp->so_seqid, gfp_mask);
1098         if (IS_ERR(p->o_arg.seqid))
1099                 goto err_free_label;
1100         nfs_sb_active(dentry->d_sb);
1101         p->dentry = dget(dentry);
1102         p->dir = parent;
1103         p->owner = sp;
1104         atomic_inc(&sp->so_count);
1105         p->o_arg.open_flags = flags;
1106         p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE);
1107         p->o_arg.share_access = nfs4_map_atomic_open_share(server,
1108                         fmode, flags);
1109         /* don't put an ACCESS op in OPEN compound if O_EXCL, because ACCESS
1110          * will return permission denied for all bits until close */
1111         if (!(flags & O_EXCL)) {
1112                 /* ask server to check for all possible rights as results
1113                  * are cached */
1114                 p->o_arg.access = NFS4_ACCESS_READ | NFS4_ACCESS_MODIFY |
1115                                   NFS4_ACCESS_EXTEND | NFS4_ACCESS_EXECUTE;
1116         }
1117         p->o_arg.clientid = server->nfs_client->cl_clientid;
1118         p->o_arg.id.create_time = ktime_to_ns(sp->so_seqid.create_time);
1119         p->o_arg.id.uniquifier = sp->so_seqid.owner_id;
1120         p->o_arg.name = &dentry->d_name;
1121         p->o_arg.server = server;
1122         p->o_arg.bitmask = nfs4_bitmask(server, label);
1123         p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0];
1124         p->o_arg.label = nfs4_label_copy(p->a_label, label);
1125         p->o_arg.claim = nfs4_map_atomic_open_claim(server, claim);
1126         switch (p->o_arg.claim) {
1127         case NFS4_OPEN_CLAIM_NULL:
1128         case NFS4_OPEN_CLAIM_DELEGATE_CUR:
1129         case NFS4_OPEN_CLAIM_DELEGATE_PREV:
1130                 p->o_arg.fh = NFS_FH(dir);
1131                 break;
1132         case NFS4_OPEN_CLAIM_PREVIOUS:
1133         case NFS4_OPEN_CLAIM_FH:
1134         case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
1135         case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
1136                 p->o_arg.fh = NFS_FH(d_inode(dentry));
1137         }
1138         if (attrs != NULL && attrs->ia_valid != 0) {
1139                 __u32 verf[2];
1140
1141                 p->o_arg.u.attrs = &p->attrs;
1142                 memcpy(&p->attrs, attrs, sizeof(p->attrs));
1143
1144                 verf[0] = jiffies;
1145                 verf[1] = current->pid;
1146                 memcpy(p->o_arg.u.verifier.data, verf,
1147                                 sizeof(p->o_arg.u.verifier.data));
1148         }
1149         p->c_arg.fh = &p->o_res.fh;
1150         p->c_arg.stateid = &p->o_res.stateid;
1151         p->c_arg.seqid = p->o_arg.seqid;
1152         nfs4_init_opendata_res(p);
1153         kref_init(&p->kref);
1154         return p;
1155
1156 err_free_label:
1157         nfs4_label_free(p->a_label);
1158 err_free_f:
1159         nfs4_label_free(p->f_label);
1160 err_free_p:
1161         kfree(p);
1162 err:
1163         dput(parent);
1164         return NULL;
1165 }
1166
1167 static void nfs4_opendata_free(struct kref *kref)
1168 {
1169         struct nfs4_opendata *p = container_of(kref,
1170                         struct nfs4_opendata, kref);
1171         struct super_block *sb = p->dentry->d_sb;
1172
1173         nfs_free_seqid(p->o_arg.seqid);
1174         if (p->state != NULL)
1175                 nfs4_put_open_state(p->state);
1176         nfs4_put_state_owner(p->owner);
1177
1178         nfs4_label_free(p->a_label);
1179         nfs4_label_free(p->f_label);
1180
1181         dput(p->dir);
1182         dput(p->dentry);
1183         nfs_sb_deactive(sb);
1184         nfs_fattr_free_names(&p->f_attr);
1185         kfree(p->f_attr.mdsthreshold);
1186         kfree(p);
1187 }
1188
1189 static void nfs4_opendata_put(struct nfs4_opendata *p)
1190 {
1191         if (p != NULL)
1192                 kref_put(&p->kref, nfs4_opendata_free);
1193 }
1194
1195 static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
1196 {
1197         int ret;
1198
1199         ret = rpc_wait_for_completion_task(task);
1200         return ret;
1201 }
1202
1203 static bool nfs4_mode_match_open_stateid(struct nfs4_state *state,
1204                 fmode_t fmode)
1205 {
1206         switch(fmode & (FMODE_READ|FMODE_WRITE)) {
1207         case FMODE_READ|FMODE_WRITE:
1208                 return state->n_rdwr != 0;
1209         case FMODE_WRITE:
1210                 return state->n_wronly != 0;
1211         case FMODE_READ:
1212                 return state->n_rdonly != 0;
1213         }
1214         WARN_ON_ONCE(1);
1215         return false;
1216 }
1217
1218 static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode)
1219 {
1220         int ret = 0;
1221
1222         if (open_mode & (O_EXCL|O_TRUNC))
1223                 goto out;
1224         switch (mode & (FMODE_READ|FMODE_WRITE)) {
1225                 case FMODE_READ:
1226                         ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0
1227                                 && state->n_rdonly != 0;
1228                         break;
1229                 case FMODE_WRITE:
1230                         ret |= test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0
1231                                 && state->n_wronly != 0;
1232                         break;
1233                 case FMODE_READ|FMODE_WRITE:
1234                         ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0
1235                                 && state->n_rdwr != 0;
1236         }
1237 out:
1238         return ret;
1239 }
1240
1241 static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode,
1242                 enum open_claim_type4 claim)
1243 {
1244         if (delegation == NULL)
1245                 return 0;
1246         if ((delegation->type & fmode) != fmode)
1247                 return 0;
1248         if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
1249                 return 0;
1250         switch (claim) {
1251         case NFS4_OPEN_CLAIM_NULL:
1252         case NFS4_OPEN_CLAIM_FH:
1253                 break;
1254         case NFS4_OPEN_CLAIM_PREVIOUS:
1255                 if (!test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags))
1256                         break;
1257         default:
1258                 return 0;
1259         }
1260         nfs_mark_delegation_referenced(delegation);
1261         return 1;
1262 }
1263
1264 static void update_open_stateflags(struct nfs4_state *state, fmode_t fmode)
1265 {
1266         switch (fmode) {
1267                 case FMODE_WRITE:
1268                         state->n_wronly++;
1269                         break;
1270                 case FMODE_READ:
1271                         state->n_rdonly++;
1272                         break;
1273                 case FMODE_READ|FMODE_WRITE:
1274                         state->n_rdwr++;
1275         }
1276         nfs4_state_set_mode_locked(state, state->state | fmode);
1277 }
1278
1279 static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state)
1280 {
1281         struct nfs_client *clp = state->owner->so_server->nfs_client;
1282         bool need_recover = false;
1283
1284         if (test_and_clear_bit(NFS_O_RDONLY_STATE, &state->flags) && state->n_rdonly)
1285                 need_recover = true;
1286         if (test_and_clear_bit(NFS_O_WRONLY_STATE, &state->flags) && state->n_wronly)
1287                 need_recover = true;
1288         if (test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags) && state->n_rdwr)
1289                 need_recover = true;
1290         if (need_recover)
1291                 nfs4_state_mark_reclaim_nograce(clp, state);
1292 }
1293
1294 static bool nfs_need_update_open_stateid(struct nfs4_state *state,
1295                 nfs4_stateid *stateid)
1296 {
1297         if (test_and_set_bit(NFS_OPEN_STATE, &state->flags) == 0)
1298                 return true;
1299         if (!nfs4_stateid_match_other(stateid, &state->open_stateid)) {
1300                 nfs_test_and_clear_all_open_stateid(state);
1301                 return true;
1302         }
1303         if (nfs4_stateid_is_newer(stateid, &state->open_stateid))
1304                 return true;
1305         return false;
1306 }
1307
1308 static void nfs_resync_open_stateid_locked(struct nfs4_state *state)
1309 {
1310         if (!(state->n_wronly || state->n_rdonly || state->n_rdwr))
1311                 return;
1312         if (state->n_wronly)
1313                 set_bit(NFS_O_WRONLY_STATE, &state->flags);
1314         if (state->n_rdonly)
1315                 set_bit(NFS_O_RDONLY_STATE, &state->flags);
1316         if (state->n_rdwr)
1317                 set_bit(NFS_O_RDWR_STATE, &state->flags);
1318         set_bit(NFS_OPEN_STATE, &state->flags);
1319 }
1320
1321 static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
1322                 nfs4_stateid *arg_stateid,
1323                 nfs4_stateid *stateid, fmode_t fmode)
1324 {
1325         clear_bit(NFS_O_RDWR_STATE, &state->flags);
1326         switch (fmode & (FMODE_READ|FMODE_WRITE)) {
1327         case FMODE_WRITE:
1328                 clear_bit(NFS_O_RDONLY_STATE, &state->flags);
1329                 break;
1330         case FMODE_READ:
1331                 clear_bit(NFS_O_WRONLY_STATE, &state->flags);
1332                 break;
1333         case 0:
1334                 clear_bit(NFS_O_RDONLY_STATE, &state->flags);
1335                 clear_bit(NFS_O_WRONLY_STATE, &state->flags);
1336                 clear_bit(NFS_OPEN_STATE, &state->flags);
1337         }
1338         if (stateid == NULL)
1339                 return;
1340         /* Handle races with OPEN */
1341         if (!nfs4_stateid_match_other(arg_stateid, &state->open_stateid) ||
1342             (nfs4_stateid_match_other(stateid, &state->open_stateid) &&
1343             !nfs4_stateid_is_newer(stateid, &state->open_stateid))) {
1344                 nfs_resync_open_stateid_locked(state);
1345                 return;
1346         }
1347         if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
1348                 nfs4_stateid_copy(&state->stateid, stateid);
1349         nfs4_stateid_copy(&state->open_stateid, stateid);
1350 }
1351
1352 static void nfs_clear_open_stateid(struct nfs4_state *state,
1353         nfs4_stateid *arg_stateid,
1354         nfs4_stateid *stateid, fmode_t fmode)
1355 {
1356         write_seqlock(&state->seqlock);
1357         nfs_clear_open_stateid_locked(state, arg_stateid, stateid, fmode);
1358         write_sequnlock(&state->seqlock);
1359         if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags))
1360                 nfs4_schedule_state_manager(state->owner->so_server->nfs_client);
1361 }
1362
1363 static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
1364 {
1365         switch (fmode) {
1366                 case FMODE_READ:
1367                         set_bit(NFS_O_RDONLY_STATE, &state->flags);
1368                         break;
1369                 case FMODE_WRITE:
1370                         set_bit(NFS_O_WRONLY_STATE, &state->flags);
1371                         break;
1372                 case FMODE_READ|FMODE_WRITE:
1373                         set_bit(NFS_O_RDWR_STATE, &state->flags);
1374         }
1375         if (!nfs_need_update_open_stateid(state, stateid))
1376                 return;
1377         if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
1378                 nfs4_stateid_copy(&state->stateid, stateid);
1379         nfs4_stateid_copy(&state->open_stateid, stateid);
1380 }
1381
1382 static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, const nfs4_stateid *deleg_stateid, fmode_t fmode)
1383 {
1384         /*
1385          * Protect the call to nfs4_state_set_mode_locked and
1386          * serialise the stateid update
1387          */
1388         write_seqlock(&state->seqlock);
1389         if (deleg_stateid != NULL) {
1390                 nfs4_stateid_copy(&state->stateid, deleg_stateid);
1391                 set_bit(NFS_DELEGATED_STATE, &state->flags);
1392         }
1393         if (open_stateid != NULL)
1394                 nfs_set_open_stateid_locked(state, open_stateid, fmode);
1395         write_sequnlock(&state->seqlock);
1396         spin_lock(&state->owner->so_lock);
1397         update_open_stateflags(state, fmode);
1398         spin_unlock(&state->owner->so_lock);
1399 }
1400
1401 static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, nfs4_stateid *delegation, fmode_t fmode)
1402 {
1403         struct nfs_inode *nfsi = NFS_I(state->inode);
1404         struct nfs_delegation *deleg_cur;
1405         int ret = 0;
1406
1407         fmode &= (FMODE_READ|FMODE_WRITE);
1408
1409         rcu_read_lock();
1410         deleg_cur = rcu_dereference(nfsi->delegation);
1411         if (deleg_cur == NULL)
1412                 goto no_delegation;
1413
1414         spin_lock(&deleg_cur->lock);
1415         if (rcu_dereference(nfsi->delegation) != deleg_cur ||
1416            test_bit(NFS_DELEGATION_RETURNING, &deleg_cur->flags) ||
1417             (deleg_cur->type & fmode) != fmode)
1418                 goto no_delegation_unlock;
1419
1420         if (delegation == NULL)
1421                 delegation = &deleg_cur->stateid;
1422         else if (!nfs4_stateid_match(&deleg_cur->stateid, delegation))
1423                 goto no_delegation_unlock;
1424
1425         nfs_mark_delegation_referenced(deleg_cur);
1426         __update_open_stateid(state, open_stateid, &deleg_cur->stateid, fmode);
1427         ret = 1;
1428 no_delegation_unlock:
1429         spin_unlock(&deleg_cur->lock);
1430 no_delegation:
1431         rcu_read_unlock();
1432
1433         if (!ret && open_stateid != NULL) {
1434                 __update_open_stateid(state, open_stateid, NULL, fmode);
1435                 ret = 1;
1436         }
1437         if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags))
1438                 nfs4_schedule_state_manager(state->owner->so_server->nfs_client);
1439
1440         return ret;
1441 }
1442
1443 static bool nfs4_update_lock_stateid(struct nfs4_lock_state *lsp,
1444                 const nfs4_stateid *stateid)
1445 {
1446         struct nfs4_state *state = lsp->ls_state;
1447         bool ret = false;
1448
1449         spin_lock(&state->state_lock);
1450         if (!nfs4_stateid_match_other(stateid, &lsp->ls_stateid))
1451                 goto out_noupdate;
1452         if (!nfs4_stateid_is_newer(stateid, &lsp->ls_stateid))
1453                 goto out_noupdate;
1454         nfs4_stateid_copy(&lsp->ls_stateid, stateid);
1455         ret = true;
1456 out_noupdate:
1457         spin_unlock(&state->state_lock);
1458         return ret;
1459 }
1460
1461 static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmode)
1462 {
1463         struct nfs_delegation *delegation;
1464
1465         rcu_read_lock();
1466         delegation = rcu_dereference(NFS_I(inode)->delegation);
1467         if (delegation == NULL || (delegation->type & fmode) == fmode) {
1468                 rcu_read_unlock();
1469                 return;
1470         }
1471         rcu_read_unlock();
1472         nfs4_inode_return_delegation(inode);
1473 }
1474
1475 static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
1476 {
1477         struct nfs4_state *state = opendata->state;
1478         struct nfs_inode *nfsi = NFS_I(state->inode);
1479         struct nfs_delegation *delegation;
1480         int open_mode = opendata->o_arg.open_flags;
1481         fmode_t fmode = opendata->o_arg.fmode;
1482         enum open_claim_type4 claim = opendata->o_arg.claim;
1483         nfs4_stateid stateid;
1484         int ret = -EAGAIN;
1485
1486         for (;;) {
1487                 spin_lock(&state->owner->so_lock);
1488                 if (can_open_cached(state, fmode, open_mode)) {
1489                         update_open_stateflags(state, fmode);
1490                         spin_unlock(&state->owner->so_lock);
1491                         goto out_return_state;
1492                 }
1493                 spin_unlock(&state->owner->so_lock);
1494                 rcu_read_lock();
1495                 delegation = rcu_dereference(nfsi->delegation);
1496                 if (!can_open_delegated(delegation, fmode, claim)) {
1497                         rcu_read_unlock();
1498                         break;
1499                 }
1500                 /* Save the delegation */
1501                 nfs4_stateid_copy(&stateid, &delegation->stateid);
1502                 rcu_read_unlock();
1503                 nfs_release_seqid(opendata->o_arg.seqid);
1504                 if (!opendata->is_recover) {
1505                         ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode);
1506                         if (ret != 0)
1507                                 goto out;
1508                 }
1509                 ret = -EAGAIN;
1510
1511                 /* Try to update the stateid using the delegation */
1512                 if (update_open_stateid(state, NULL, &stateid, fmode))
1513                         goto out_return_state;
1514         }
1515 out:
1516         return ERR_PTR(ret);
1517 out_return_state:
1518         atomic_inc(&state->count);
1519         return state;
1520 }
1521
1522 static void
1523 nfs4_opendata_check_deleg(struct nfs4_opendata *data, struct nfs4_state *state)
1524 {
1525         struct nfs_client *clp = NFS_SERVER(state->inode)->nfs_client;
1526         struct nfs_delegation *delegation;
1527         int delegation_flags = 0;
1528
1529         rcu_read_lock();
1530         delegation = rcu_dereference(NFS_I(state->inode)->delegation);
1531         if (delegation)
1532                 delegation_flags = delegation->flags;
1533         rcu_read_unlock();
1534         switch (data->o_arg.claim) {
1535         default:
1536                 break;
1537         case NFS4_OPEN_CLAIM_DELEGATE_CUR:
1538         case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
1539                 pr_err_ratelimited("NFS: Broken NFSv4 server %s is "
1540                                    "returning a delegation for "
1541                                    "OPEN(CLAIM_DELEGATE_CUR)\n",
1542                                    clp->cl_hostname);
1543                 return;
1544         }
1545         if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0)
1546                 nfs_inode_set_delegation(state->inode,
1547                                          data->owner->so_cred,
1548                                          &data->o_res);
1549         else
1550                 nfs_inode_reclaim_delegation(state->inode,
1551                                              data->owner->so_cred,
1552                                              &data->o_res);
1553 }
1554
1555 /*
1556  * Check the inode attributes against the CLAIM_PREVIOUS returned attributes
1557  * and update the nfs4_state.
1558  */
1559 static struct nfs4_state *
1560 _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data)
1561 {
1562         struct inode *inode = data->state->inode;
1563         struct nfs4_state *state = data->state;
1564         int ret;
1565
1566         if (!data->rpc_done) {
1567                 if (data->rpc_status) {
1568                         ret = data->rpc_status;
1569                         goto err;
1570                 }
1571                 /* cached opens have already been processed */
1572                 goto update;
1573         }
1574
1575         ret = nfs_refresh_inode(inode, &data->f_attr);
1576         if (ret)
1577                 goto err;
1578
1579         if (data->o_res.delegation_type != 0)
1580                 nfs4_opendata_check_deleg(data, state);
1581 update:
1582         update_open_stateid(state, &data->o_res.stateid, NULL,
1583                             data->o_arg.fmode);
1584         atomic_inc(&state->count);
1585
1586         return state;
1587 err:
1588         return ERR_PTR(ret);
1589
1590 }
1591
1592 static struct nfs4_state *
1593 _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
1594 {
1595         struct inode *inode;
1596         struct nfs4_state *state = NULL;
1597         int ret;
1598
1599         if (!data->rpc_done) {
1600                 state = nfs4_try_open_cached(data);
1601                 goto out;
1602         }
1603
1604         ret = -EAGAIN;
1605         if (!(data->f_attr.valid & NFS_ATTR_FATTR))
1606                 goto err;
1607         inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr, data->f_label);
1608         ret = PTR_ERR(inode);
1609         if (IS_ERR(inode))
1610                 goto err;
1611         ret = -ENOMEM;
1612         state = nfs4_get_open_state(inode, data->owner);
1613         if (state == NULL)
1614                 goto err_put_inode;
1615         if (data->o_res.delegation_type != 0)
1616                 nfs4_opendata_check_deleg(data, state);
1617         update_open_stateid(state, &data->o_res.stateid, NULL,
1618                         data->o_arg.fmode);
1619         iput(inode);
1620 out:
1621         nfs_release_seqid(data->o_arg.seqid);
1622         return state;
1623 err_put_inode:
1624         iput(inode);
1625 err:
1626         return ERR_PTR(ret);
1627 }
1628
1629 static struct nfs4_state *
1630 nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
1631 {
1632         if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS)
1633                 return _nfs4_opendata_reclaim_to_nfs4_state(data);
1634         return _nfs4_opendata_to_nfs4_state(data);
1635 }
1636
1637 static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state)
1638 {
1639         struct nfs_inode *nfsi = NFS_I(state->inode);
1640         struct nfs_open_context *ctx;
1641
1642         spin_lock(&state->inode->i_lock);
1643         list_for_each_entry(ctx, &nfsi->open_files, list) {
1644                 if (ctx->state != state)
1645                         continue;
1646                 get_nfs_open_context(ctx);
1647                 spin_unlock(&state->inode->i_lock);
1648                 return ctx;
1649         }
1650         spin_unlock(&state->inode->i_lock);
1651         return ERR_PTR(-ENOENT);
1652 }
1653
1654 static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context *ctx,
1655                 struct nfs4_state *state, enum open_claim_type4 claim)
1656 {
1657         struct nfs4_opendata *opendata;
1658
1659         opendata = nfs4_opendata_alloc(ctx->dentry, state->owner, 0, 0,
1660                         NULL, NULL, claim, GFP_NOFS);
1661         if (opendata == NULL)
1662                 return ERR_PTR(-ENOMEM);
1663         opendata->state = state;
1664         atomic_inc(&state->count);
1665         return opendata;
1666 }
1667
1668 static int nfs4_open_recover_helper(struct nfs4_opendata *opendata,
1669                 fmode_t fmode)
1670 {
1671         struct nfs4_state *newstate;
1672         int ret;
1673
1674         if (!nfs4_mode_match_open_stateid(opendata->state, fmode))
1675                 return 0;
1676         opendata->o_arg.open_flags = 0;
1677         opendata->o_arg.fmode = fmode;
1678         opendata->o_arg.share_access = nfs4_map_atomic_open_share(
1679                         NFS_SB(opendata->dentry->d_sb),
1680                         fmode, 0);
1681         memset(&opendata->o_res, 0, sizeof(opendata->o_res));
1682         memset(&opendata->c_res, 0, sizeof(opendata->c_res));
1683         nfs4_init_opendata_res(opendata);
1684         ret = _nfs4_recover_proc_open(opendata);
1685         if (ret != 0)
1686                 return ret; 
1687         newstate = nfs4_opendata_to_nfs4_state(opendata);
1688         if (IS_ERR(newstate))
1689                 return PTR_ERR(newstate);
1690         if (newstate != opendata->state)
1691                 ret = -ESTALE;
1692         nfs4_close_state(newstate, fmode);
1693         return ret;
1694 }
1695
1696 static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *state)
1697 {
1698         int ret;
1699
1700         /* Don't trigger recovery in nfs_test_and_clear_all_open_stateid */
1701         clear_bit(NFS_O_RDWR_STATE, &state->flags);
1702         clear_bit(NFS_O_WRONLY_STATE, &state->flags);
1703         clear_bit(NFS_O_RDONLY_STATE, &state->flags);
1704         /* memory barrier prior to reading state->n_* */
1705         clear_bit(NFS_DELEGATED_STATE, &state->flags);
1706         clear_bit(NFS_OPEN_STATE, &state->flags);
1707         smp_rmb();
1708         ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE);
1709         if (ret != 0)
1710                 return ret;
1711         ret = nfs4_open_recover_helper(opendata, FMODE_WRITE);
1712         if (ret != 0)
1713                 return ret;
1714         ret = nfs4_open_recover_helper(opendata, FMODE_READ);
1715         if (ret != 0)
1716                 return ret;
1717         /*
1718          * We may have performed cached opens for all three recoveries.
1719          * Check if we need to update the current stateid.
1720          */
1721         if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 &&
1722             !nfs4_stateid_match(&state->stateid, &state->open_stateid)) {
1723                 write_seqlock(&state->seqlock);
1724                 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
1725                         nfs4_stateid_copy(&state->stateid, &state->open_stateid);
1726                 write_sequnlock(&state->seqlock);
1727         }
1728         return 0;
1729 }
1730
1731 /*
1732  * OPEN_RECLAIM:
1733  *      reclaim state on the server after a reboot.
1734  */
1735 static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state)
1736 {
1737         struct nfs_delegation *delegation;
1738         struct nfs4_opendata *opendata;
1739         fmode_t delegation_type = 0;
1740         int status;
1741
1742         opendata = nfs4_open_recoverdata_alloc(ctx, state,
1743                         NFS4_OPEN_CLAIM_PREVIOUS);
1744         if (IS_ERR(opendata))
1745                 return PTR_ERR(opendata);
1746         rcu_read_lock();
1747         delegation = rcu_dereference(NFS_I(state->inode)->delegation);
1748         if (delegation != NULL && test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) != 0)
1749                 delegation_type = delegation->type;
1750         rcu_read_unlock();
1751         opendata->o_arg.u.delegation_type = delegation_type;
1752         status = nfs4_open_recover(opendata, state);
1753         nfs4_opendata_put(opendata);
1754         return status;
1755 }
1756
1757 static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state)
1758 {
1759         struct nfs_server *server = NFS_SERVER(state->inode);
1760         struct nfs4_exception exception = { };
1761         int err;
1762         do {
1763                 err = _nfs4_do_open_reclaim(ctx, state);
1764                 trace_nfs4_open_reclaim(ctx, 0, err);
1765                 if (nfs4_clear_cap_atomic_open_v1(server, err, &exception))
1766                         continue;
1767                 if (err != -NFS4ERR_DELAY)
1768                         break;
1769                 nfs4_handle_exception(server, err, &exception);
1770         } while (exception.retry);
1771         return err;
1772 }
1773
1774 static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state)
1775 {
1776         struct nfs_open_context *ctx;
1777         int ret;
1778
1779         ctx = nfs4_state_find_open_context(state);
1780         if (IS_ERR(ctx))
1781                 return -EAGAIN;
1782         ret = nfs4_do_open_reclaim(ctx, state);
1783         put_nfs_open_context(ctx);
1784         return ret;
1785 }
1786
1787 static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct nfs4_state *state, const nfs4_stateid *stateid, int err)
1788 {
1789         switch (err) {
1790                 default:
1791                         printk(KERN_ERR "NFS: %s: unhandled error "
1792                                         "%d.\n", __func__, err);
1793                 case 0:
1794                 case -ENOENT:
1795                 case -EAGAIN:
1796                 case -ESTALE:
1797                         break;
1798                 case -NFS4ERR_BADSESSION:
1799                 case -NFS4ERR_BADSLOT:
1800                 case -NFS4ERR_BAD_HIGH_SLOT:
1801                 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1802                 case -NFS4ERR_DEADSESSION:
1803                         set_bit(NFS_DELEGATED_STATE, &state->flags);
1804                         nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
1805                         return -EAGAIN;
1806                 case -NFS4ERR_STALE_CLIENTID:
1807                 case -NFS4ERR_STALE_STATEID:
1808                         set_bit(NFS_DELEGATED_STATE, &state->flags);
1809                 case -NFS4ERR_EXPIRED:
1810                         /* Don't recall a delegation if it was lost */
1811                         nfs4_schedule_lease_recovery(server->nfs_client);
1812                         return -EAGAIN;
1813                 case -NFS4ERR_MOVED:
1814                         nfs4_schedule_migration_recovery(server);
1815                         return -EAGAIN;
1816                 case -NFS4ERR_LEASE_MOVED:
1817                         nfs4_schedule_lease_moved_recovery(server->nfs_client);
1818                         return -EAGAIN;
1819                 case -NFS4ERR_DELEG_REVOKED:
1820                 case -NFS4ERR_ADMIN_REVOKED:
1821                 case -NFS4ERR_BAD_STATEID:
1822                 case -NFS4ERR_OPENMODE:
1823                         nfs_inode_find_state_and_recover(state->inode,
1824                                         stateid);
1825                         nfs4_schedule_stateid_recovery(server, state);
1826                         return -EAGAIN;
1827                 case -NFS4ERR_DELAY:
1828                 case -NFS4ERR_GRACE:
1829                         set_bit(NFS_DELEGATED_STATE, &state->flags);
1830                         ssleep(1);
1831                         return -EAGAIN;
1832                 case -ENOMEM:
1833                 case -NFS4ERR_DENIED:
1834                         /* kill_proc(fl->fl_pid, SIGLOST, 1); */
1835                         return 0;
1836         }
1837         return err;
1838 }
1839
1840 int nfs4_open_delegation_recall(struct nfs_open_context *ctx,
1841                 struct nfs4_state *state, const nfs4_stateid *stateid,
1842                 fmode_t type)
1843 {
1844         struct nfs_server *server = NFS_SERVER(state->inode);
1845         struct nfs4_opendata *opendata;
1846         int err = 0;
1847
1848         opendata = nfs4_open_recoverdata_alloc(ctx, state,
1849                         NFS4_OPEN_CLAIM_DELEG_CUR_FH);
1850         if (IS_ERR(opendata))
1851                 return PTR_ERR(opendata);
1852         nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid);
1853         write_seqlock(&state->seqlock);
1854         nfs4_stateid_copy(&state->stateid, &state->open_stateid);
1855         write_sequnlock(&state->seqlock);
1856         clear_bit(NFS_DELEGATED_STATE, &state->flags);
1857         switch (type & (FMODE_READ|FMODE_WRITE)) {
1858         case FMODE_READ|FMODE_WRITE:
1859         case FMODE_WRITE:
1860                 err = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE);
1861                 if (err)
1862                         break;
1863                 err = nfs4_open_recover_helper(opendata, FMODE_WRITE);
1864                 if (err)
1865                         break;
1866         case FMODE_READ:
1867                 err = nfs4_open_recover_helper(opendata, FMODE_READ);
1868         }
1869         nfs4_opendata_put(opendata);
1870         return nfs4_handle_delegation_recall_error(server, state, stateid, err);
1871 }
1872
1873 static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata)
1874 {
1875         struct nfs4_opendata *data = calldata;
1876
1877         nfs40_setup_sequence(data->o_arg.server->nfs_client->cl_slot_tbl,
1878                              &data->c_arg.seq_args, &data->c_res.seq_res, task);
1879 }
1880
1881 static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
1882 {
1883         struct nfs4_opendata *data = calldata;
1884
1885         nfs40_sequence_done(task, &data->c_res.seq_res);
1886
1887         data->rpc_status = task->tk_status;
1888         if (data->rpc_status == 0) {
1889                 nfs4_stateid_copy(&data->o_res.stateid, &data->c_res.stateid);
1890                 nfs_confirm_seqid(&data->owner->so_seqid, 0);
1891                 renew_lease(data->o_res.server, data->timestamp);
1892                 data->rpc_done = 1;
1893         }
1894 }
1895
1896 static void nfs4_open_confirm_release(void *calldata)
1897 {
1898         struct nfs4_opendata *data = calldata;
1899         struct nfs4_state *state = NULL;
1900
1901         /* If this request hasn't been cancelled, do nothing */
1902         if (data->cancelled == 0)
1903                 goto out_free;
1904         /* In case of error, no cleanup! */
1905         if (!data->rpc_done)
1906                 goto out_free;
1907         state = nfs4_opendata_to_nfs4_state(data);
1908         if (!IS_ERR(state))
1909                 nfs4_close_state(state, data->o_arg.fmode);
1910 out_free:
1911         nfs4_opendata_put(data);
1912 }
1913
1914 static const struct rpc_call_ops nfs4_open_confirm_ops = {
1915         .rpc_call_prepare = nfs4_open_confirm_prepare,
1916         .rpc_call_done = nfs4_open_confirm_done,
1917         .rpc_release = nfs4_open_confirm_release,
1918 };
1919
1920 /*
1921  * Note: On error, nfs4_proc_open_confirm will free the struct nfs4_opendata
1922  */
1923 static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
1924 {
1925         struct nfs_server *server = NFS_SERVER(d_inode(data->dir));
1926         struct rpc_task *task;
1927         struct  rpc_message msg = {
1928                 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM],
1929                 .rpc_argp = &data->c_arg,
1930                 .rpc_resp = &data->c_res,
1931                 .rpc_cred = data->owner->so_cred,
1932         };
1933         struct rpc_task_setup task_setup_data = {
1934                 .rpc_client = server->client,
1935                 .rpc_message = &msg,
1936                 .callback_ops = &nfs4_open_confirm_ops,
1937                 .callback_data = data,
1938                 .workqueue = nfsiod_workqueue,
1939                 .flags = RPC_TASK_ASYNC,
1940         };
1941         int status;
1942
1943         nfs4_init_sequence(&data->c_arg.seq_args, &data->c_res.seq_res, 1);
1944         kref_get(&data->kref);
1945         data->rpc_done = 0;
1946         data->rpc_status = 0;
1947         data->timestamp = jiffies;
1948         if (data->is_recover)
1949                 nfs4_set_sequence_privileged(&data->c_arg.seq_args);
1950         task = rpc_run_task(&task_setup_data);
1951         if (IS_ERR(task))
1952                 return PTR_ERR(task);
1953         status = nfs4_wait_for_completion_rpc_task(task);
1954         if (status != 0) {
1955                 data->cancelled = 1;
1956                 smp_wmb();
1957         } else
1958                 status = data->rpc_status;
1959         rpc_put_task(task);
1960         return status;
1961 }
1962
1963 static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
1964 {
1965         struct nfs4_opendata *data = calldata;
1966         struct nfs4_state_owner *sp = data->owner;
1967         struct nfs_client *clp = sp->so_server->nfs_client;
1968         enum open_claim_type4 claim = data->o_arg.claim;
1969
1970         if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0)
1971                 goto out_wait;
1972         /*
1973          * Check if we still need to send an OPEN call, or if we can use
1974          * a delegation instead.
1975          */
1976         if (data->state != NULL) {
1977                 struct nfs_delegation *delegation;
1978
1979                 if (can_open_cached(data->state, data->o_arg.fmode, data->o_arg.open_flags))
1980                         goto out_no_action;
1981                 rcu_read_lock();
1982                 delegation = rcu_dereference(NFS_I(data->state->inode)->delegation);
1983                 if (can_open_delegated(delegation, data->o_arg.fmode, claim))
1984                         goto unlock_no_action;
1985                 rcu_read_unlock();
1986         }
1987         /* Update client id. */
1988         data->o_arg.clientid = clp->cl_clientid;
1989         switch (claim) {
1990         default:
1991                 break;
1992         case NFS4_OPEN_CLAIM_PREVIOUS:
1993         case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
1994         case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
1995                 data->o_arg.open_bitmap = &nfs4_open_noattr_bitmap[0];
1996         case NFS4_OPEN_CLAIM_FH:
1997                 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
1998                 nfs_copy_fh(&data->o_res.fh, data->o_arg.fh);
1999         }
2000         data->timestamp = jiffies;
2001         if (nfs4_setup_sequence(data->o_arg.server,
2002                                 &data->o_arg.seq_args,
2003                                 &data->o_res.seq_res,
2004                                 task) != 0)
2005                 nfs_release_seqid(data->o_arg.seqid);
2006
2007         /* Set the create mode (note dependency on the session type) */
2008         data->o_arg.createmode = NFS4_CREATE_UNCHECKED;
2009         if (data->o_arg.open_flags & O_EXCL) {
2010                 data->o_arg.createmode = NFS4_CREATE_EXCLUSIVE;
2011                 if (nfs4_has_persistent_session(clp))
2012                         data->o_arg.createmode = NFS4_CREATE_GUARDED;
2013                 else if (clp->cl_mvops->minor_version > 0)
2014                         data->o_arg.createmode = NFS4_CREATE_EXCLUSIVE4_1;
2015         }
2016         return;
2017 unlock_no_action:
2018         rcu_read_unlock();
2019 out_no_action:
2020         task->tk_action = NULL;
2021 out_wait:
2022         nfs4_sequence_done(task, &data->o_res.seq_res);
2023 }
2024
2025 static void nfs4_open_done(struct rpc_task *task, void *calldata)
2026 {
2027         struct nfs4_opendata *data = calldata;
2028
2029         data->rpc_status = task->tk_status;
2030
2031         if (!nfs4_sequence_done(task, &data->o_res.seq_res))
2032                 return;
2033
2034         if (task->tk_status == 0) {
2035                 if (data->o_res.f_attr->valid & NFS_ATTR_FATTR_TYPE) {
2036                         switch (data->o_res.f_attr->mode & S_IFMT) {
2037                         case S_IFREG:
2038                                 break;
2039                         case S_IFLNK:
2040                                 data->rpc_status = -ELOOP;
2041                                 break;
2042                         case S_IFDIR:
2043                                 data->rpc_status = -EISDIR;
2044                                 break;
2045                         default:
2046                                 data->rpc_status = -ENOTDIR;
2047                         }
2048                 }
2049                 renew_lease(data->o_res.server, data->timestamp);
2050                 if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM))
2051                         nfs_confirm_seqid(&data->owner->so_seqid, 0);
2052         }
2053         data->rpc_done = 1;
2054 }
2055
2056 static void nfs4_open_release(void *calldata)
2057 {
2058         struct nfs4_opendata *data = calldata;
2059         struct nfs4_state *state = NULL;
2060
2061         /* If this request hasn't been cancelled, do nothing */
2062         if (data->cancelled == 0)
2063                 goto out_free;
2064         /* In case of error, no cleanup! */
2065         if (data->rpc_status != 0 || !data->rpc_done)
2066                 goto out_free;
2067         /* In case we need an open_confirm, no cleanup! */
2068         if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)
2069                 goto out_free;
2070         state = nfs4_opendata_to_nfs4_state(data);
2071         if (!IS_ERR(state))
2072                 nfs4_close_state(state, data->o_arg.fmode);
2073 out_free:
2074         nfs4_opendata_put(data);
2075 }
2076
2077 static const struct rpc_call_ops nfs4_open_ops = {
2078         .rpc_call_prepare = nfs4_open_prepare,
2079         .rpc_call_done = nfs4_open_done,
2080         .rpc_release = nfs4_open_release,
2081 };
2082
2083 static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover)
2084 {
2085         struct inode *dir = d_inode(data->dir);
2086         struct nfs_server *server = NFS_SERVER(dir);
2087         struct nfs_openargs *o_arg = &data->o_arg;
2088         struct nfs_openres *o_res = &data->o_res;
2089         struct rpc_task *task;
2090         struct rpc_message msg = {
2091                 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN],
2092                 .rpc_argp = o_arg,
2093                 .rpc_resp = o_res,
2094                 .rpc_cred = data->owner->so_cred,
2095         };
2096         struct rpc_task_setup task_setup_data = {
2097                 .rpc_client = server->client,
2098                 .rpc_message = &msg,
2099                 .callback_ops = &nfs4_open_ops,
2100                 .callback_data = data,
2101                 .workqueue = nfsiod_workqueue,
2102                 .flags = RPC_TASK_ASYNC,
2103         };
2104         int status;
2105
2106         nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1);
2107         kref_get(&data->kref);
2108         data->rpc_done = 0;
2109         data->rpc_status = 0;
2110         data->cancelled = 0;
2111         data->is_recover = 0;
2112         if (isrecover) {
2113                 nfs4_set_sequence_privileged(&o_arg->seq_args);
2114                 data->is_recover = 1;
2115         }
2116         task = rpc_run_task(&task_setup_data);
2117         if (IS_ERR(task))
2118                 return PTR_ERR(task);
2119         status = nfs4_wait_for_completion_rpc_task(task);
2120         if (status != 0) {
2121                 data->cancelled = 1;
2122                 smp_wmb();
2123         } else
2124                 status = data->rpc_status;
2125         rpc_put_task(task);
2126
2127         return status;
2128 }
2129
2130 static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
2131 {
2132         struct inode *dir = d_inode(data->dir);
2133         struct nfs_openres *o_res = &data->o_res;
2134         int status;
2135
2136         status = nfs4_run_open_task(data, 1);
2137         if (status != 0 || !data->rpc_done)
2138                 return status;
2139
2140         nfs_fattr_map_and_free_names(NFS_SERVER(dir), &data->f_attr);
2141
2142         if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
2143                 status = _nfs4_proc_open_confirm(data);
2144                 if (status != 0)
2145                         return status;
2146         }
2147
2148         return status;
2149 }
2150
2151 /*
2152  * Additional permission checks in order to distinguish between an
2153  * open for read, and an open for execute. This works around the
2154  * fact that NFSv4 OPEN treats read and execute permissions as being
2155  * the same.
2156  * Note that in the non-execute case, we want to turn off permission
2157  * checking if we just created a new file (POSIX open() semantics).
2158  */
2159 static int nfs4_opendata_access(struct rpc_cred *cred,
2160                                 struct nfs4_opendata *opendata,
2161                                 struct nfs4_state *state, fmode_t fmode,
2162                                 int openflags)
2163 {
2164         struct nfs_access_entry cache;
2165         u32 mask;
2166
2167         /* access call failed or for some reason the server doesn't
2168          * support any access modes -- defer access call until later */
2169         if (opendata->o_res.access_supported == 0)
2170                 return 0;
2171
2172         mask = 0;
2173         /*
2174          * Use openflags to check for exec, because fmode won't
2175          * always have FMODE_EXEC set when file open for exec.
2176          */
2177         if (openflags & __FMODE_EXEC) {
2178                 /* ONLY check for exec rights */
2179                 mask = MAY_EXEC;
2180         } else if ((fmode & FMODE_READ) && !opendata->file_created)
2181                 mask = MAY_READ;
2182
2183         cache.cred = cred;
2184         cache.jiffies = jiffies;
2185         nfs_access_set_mask(&cache, opendata->o_res.access_result);
2186         nfs_access_add_cache(state->inode, &cache);
2187
2188         if ((mask & ~cache.mask & (MAY_READ | MAY_EXEC)) == 0)
2189                 return 0;
2190
2191         /* even though OPEN succeeded, access is denied. Close the file */
2192         nfs4_close_state(state, fmode);
2193         return -EACCES;
2194 }
2195
2196 /*
2197  * Note: On error, nfs4_proc_open will free the struct nfs4_opendata
2198  */
2199 static int _nfs4_proc_open(struct nfs4_opendata *data)
2200 {
2201         struct inode *dir = d_inode(data->dir);
2202         struct nfs_server *server = NFS_SERVER(dir);
2203         struct nfs_openargs *o_arg = &data->o_arg;
2204         struct nfs_openres *o_res = &data->o_res;
2205         int status;
2206
2207         status = nfs4_run_open_task(data, 0);
2208         if (!data->rpc_done)
2209                 return status;
2210         if (status != 0) {
2211                 if (status == -NFS4ERR_BADNAME &&
2212                                 !(o_arg->open_flags & O_CREAT))
2213                         return -ENOENT;
2214                 return status;
2215         }
2216
2217         nfs_fattr_map_and_free_names(server, &data->f_attr);
2218
2219         if (o_arg->open_flags & O_CREAT) {
2220                 update_changeattr(dir, &o_res->cinfo);
2221                 if (o_arg->open_flags & O_EXCL)
2222                         data->file_created = 1;
2223                 else if (o_res->cinfo.before != o_res->cinfo.after)
2224                         data->file_created = 1;
2225         }
2226         if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0)
2227                 server->caps &= ~NFS_CAP_POSIX_LOCK;
2228         if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
2229                 status = _nfs4_proc_open_confirm(data);
2230                 if (status != 0)
2231                         return status;
2232         }
2233         if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
2234                 nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label);
2235         return 0;
2236 }
2237
2238 static int nfs4_recover_expired_lease(struct nfs_server *server)
2239 {
2240         return nfs4_client_recover_expired_lease(server->nfs_client);
2241 }
2242
2243 /*
2244  * OPEN_EXPIRED:
2245  *      reclaim state on the server after a network partition.
2246  *      Assumes caller holds the appropriate lock
2247  */
2248 static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state)
2249 {
2250         struct nfs4_opendata *opendata;
2251         int ret;
2252
2253         opendata = nfs4_open_recoverdata_alloc(ctx, state,
2254                         NFS4_OPEN_CLAIM_FH);
2255         if (IS_ERR(opendata))
2256                 return PTR_ERR(opendata);
2257         ret = nfs4_open_recover(opendata, state);
2258         if (ret == -ESTALE)
2259                 d_drop(ctx->dentry);
2260         nfs4_opendata_put(opendata);
2261         return ret;
2262 }
2263
2264 static int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state)
2265 {
2266         struct nfs_server *server = NFS_SERVER(state->inode);
2267         struct nfs4_exception exception = { };
2268         int err;
2269
2270         do {
2271                 err = _nfs4_open_expired(ctx, state);
2272                 trace_nfs4_open_expired(ctx, 0, err);
2273                 if (nfs4_clear_cap_atomic_open_v1(server, err, &exception))
2274                         continue;
2275                 switch (err) {
2276                 default:
2277                         goto out;
2278                 case -NFS4ERR_GRACE:
2279                 case -NFS4ERR_DELAY:
2280                         nfs4_handle_exception(server, err, &exception);
2281                         err = 0;
2282                 }
2283         } while (exception.retry);
2284 out:
2285         return err;
2286 }
2287
2288 static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
2289 {
2290         struct nfs_open_context *ctx;
2291         int ret;
2292
2293         ctx = nfs4_state_find_open_context(state);
2294         if (IS_ERR(ctx))
2295                 return -EAGAIN;
2296         ret = nfs4_do_open_expired(ctx, state);
2297         put_nfs_open_context(ctx);
2298         return ret;
2299 }
2300
2301 static void nfs_finish_clear_delegation_stateid(struct nfs4_state *state)
2302 {
2303         nfs_remove_bad_delegation(state->inode);
2304         write_seqlock(&state->seqlock);
2305         nfs4_stateid_copy(&state->stateid, &state->open_stateid);
2306         write_sequnlock(&state->seqlock);
2307         clear_bit(NFS_DELEGATED_STATE, &state->flags);
2308 }
2309
2310 static void nfs40_clear_delegation_stateid(struct nfs4_state *state)
2311 {
2312         if (rcu_access_pointer(NFS_I(state->inode)->delegation) != NULL)
2313                 nfs_finish_clear_delegation_stateid(state);
2314 }
2315
2316 static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
2317 {
2318         /* NFSv4.0 doesn't allow for delegation recovery on open expire */
2319         nfs40_clear_delegation_stateid(state);
2320         return nfs4_open_expired(sp, state);
2321 }
2322
2323 #if defined(CONFIG_NFS_V4_1)
2324 static void nfs41_check_delegation_stateid(struct nfs4_state *state)
2325 {
2326         struct nfs_server *server = NFS_SERVER(state->inode);
2327         nfs4_stateid stateid;
2328         struct nfs_delegation *delegation;
2329         struct rpc_cred *cred;
2330         int status;
2331
2332         /* Get the delegation credential for use by test/free_stateid */
2333         rcu_read_lock();
2334         delegation = rcu_dereference(NFS_I(state->inode)->delegation);
2335         if (delegation == NULL) {
2336                 rcu_read_unlock();
2337                 return;
2338         }
2339
2340         nfs4_stateid_copy(&stateid, &delegation->stateid);
2341         cred = get_rpccred(delegation->cred);
2342         rcu_read_unlock();
2343         status = nfs41_test_stateid(server, &stateid, cred);
2344         trace_nfs4_test_delegation_stateid(state, NULL, status);
2345
2346         if (status != NFS_OK) {
2347                 /* Free the stateid unless the server explicitly
2348                  * informs us the stateid is unrecognized. */
2349                 if (status != -NFS4ERR_BAD_STATEID)
2350                         nfs41_free_stateid(server, &stateid, cred);
2351                 nfs_finish_clear_delegation_stateid(state);
2352         }
2353
2354         put_rpccred(cred);
2355 }
2356
2357 /**
2358  * nfs41_check_open_stateid - possibly free an open stateid
2359  *
2360  * @state: NFSv4 state for an inode
2361  *
2362  * Returns NFS_OK if recovery for this stateid is now finished.
2363  * Otherwise a negative NFS4ERR value is returned.
2364  */
2365 static int nfs41_check_open_stateid(struct nfs4_state *state)
2366 {
2367         struct nfs_server *server = NFS_SERVER(state->inode);
2368         nfs4_stateid *stateid = &state->open_stateid;
2369         struct rpc_cred *cred = state->owner->so_cred;
2370         int status;
2371
2372         /* If a state reset has been done, test_stateid is unneeded */
2373         if ((test_bit(NFS_O_RDONLY_STATE, &state->flags) == 0) &&
2374             (test_bit(NFS_O_WRONLY_STATE, &state->flags) == 0) &&
2375             (test_bit(NFS_O_RDWR_STATE, &state->flags) == 0))
2376                 return -NFS4ERR_BAD_STATEID;
2377
2378         status = nfs41_test_stateid(server, stateid, cred);
2379         trace_nfs4_test_open_stateid(state, NULL, status);
2380         if (status != NFS_OK) {
2381                 /* Free the stateid unless the server explicitly
2382                  * informs us the stateid is unrecognized. */
2383                 if (status != -NFS4ERR_BAD_STATEID)
2384                         nfs41_free_stateid(server, stateid, cred);
2385
2386                 clear_bit(NFS_O_RDONLY_STATE, &state->flags);
2387                 clear_bit(NFS_O_WRONLY_STATE, &state->flags);
2388                 clear_bit(NFS_O_RDWR_STATE, &state->flags);
2389                 clear_bit(NFS_OPEN_STATE, &state->flags);
2390         }
2391         return status;
2392 }
2393
2394 static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
2395 {
2396         int status;
2397
2398         nfs41_check_delegation_stateid(state);
2399         status = nfs41_check_open_stateid(state);
2400         if (status != NFS_OK)
2401                 status = nfs4_open_expired(sp, state);
2402         return status;
2403 }
2404 #endif
2405
2406 /*
2407  * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-*
2408  * fields corresponding to attributes that were used to store the verifier.
2409  * Make sure we clobber those fields in the later setattr call
2410  */
2411 static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata,
2412                                 struct iattr *sattr, struct nfs4_label **label)
2413 {
2414         const u32 *attrset = opendata->o_res.attrset;
2415
2416         if ((attrset[1] & FATTR4_WORD1_TIME_ACCESS) &&
2417             !(sattr->ia_valid & ATTR_ATIME_SET))
2418                 sattr->ia_valid |= ATTR_ATIME;
2419
2420         if ((attrset[1] & FATTR4_WORD1_TIME_MODIFY) &&
2421             !(sattr->ia_valid & ATTR_MTIME_SET))
2422                 sattr->ia_valid |= ATTR_MTIME;
2423
2424         /* Except MODE, it seems harmless of setting twice. */
2425         if ((attrset[1] & FATTR4_WORD1_MODE))
2426                 sattr->ia_valid &= ~ATTR_MODE;
2427
2428         if (attrset[2] & FATTR4_WORD2_SECURITY_LABEL)
2429                 *label = NULL;
2430 }
2431
2432 static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
2433                 fmode_t fmode,
2434                 int flags,
2435                 struct nfs_open_context *ctx)
2436 {
2437         struct nfs4_state_owner *sp = opendata->owner;
2438         struct nfs_server *server = sp->so_server;
2439         struct dentry *dentry;
2440         struct nfs4_state *state;
2441         unsigned int seq;
2442         int ret;
2443
2444         seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
2445
2446         ret = _nfs4_proc_open(opendata);
2447         if (ret != 0)
2448                 goto out;
2449
2450         state = nfs4_opendata_to_nfs4_state(opendata);
2451         ret = PTR_ERR(state);
2452         if (IS_ERR(state))
2453                 goto out;
2454         if (server->caps & NFS_CAP_POSIX_LOCK)
2455                 set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
2456
2457         dentry = opendata->dentry;
2458         if (d_really_is_negative(dentry)) {
2459                 /* FIXME: Is this d_drop() ever needed? */
2460                 d_drop(dentry);
2461                 dentry = d_add_unique(dentry, igrab(state->inode));
2462                 if (dentry == NULL) {
2463                         dentry = opendata->dentry;
2464                 } else if (dentry != ctx->dentry) {
2465                         dput(ctx->dentry);
2466                         ctx->dentry = dget(dentry);
2467                 }
2468                 nfs_set_verifier(dentry,
2469                                 nfs_save_change_attribute(d_inode(opendata->dir)));
2470         }
2471
2472         ret = nfs4_opendata_access(sp->so_cred, opendata, state, fmode, flags);
2473         if (ret != 0)
2474                 goto out;
2475
2476         ctx->state = state;
2477         if (d_inode(dentry) == state->inode) {
2478                 nfs_inode_attach_open_context(ctx);
2479                 if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
2480                         nfs4_schedule_stateid_recovery(server, state);
2481         }
2482 out:
2483         return ret;
2484 }
2485
2486 /*
2487  * Returns a referenced nfs4_state
2488  */
2489 static int _nfs4_do_open(struct inode *dir,
2490                         struct nfs_open_context *ctx,
2491                         int flags,
2492                         struct iattr *sattr,
2493                         struct nfs4_label *label,
2494                         int *opened)
2495 {
2496         struct nfs4_state_owner  *sp;
2497         struct nfs4_state     *state = NULL;
2498         struct nfs_server       *server = NFS_SERVER(dir);
2499         struct nfs4_opendata *opendata;
2500         struct dentry *dentry = ctx->dentry;
2501         struct rpc_cred *cred = ctx->cred;
2502         struct nfs4_threshold **ctx_th = &ctx->mdsthreshold;
2503         fmode_t fmode = ctx->mode & (FMODE_READ|FMODE_WRITE|FMODE_EXEC);
2504         enum open_claim_type4 claim = NFS4_OPEN_CLAIM_NULL;
2505         struct nfs4_label *olabel = NULL;
2506         int status;
2507
2508         /* Protect against reboot recovery conflicts */
2509         status = -ENOMEM;
2510         sp = nfs4_get_state_owner(server, cred, GFP_KERNEL);
2511         if (sp == NULL) {
2512                 dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n");
2513                 goto out_err;
2514         }
2515         status = nfs4_recover_expired_lease(server);
2516         if (status != 0)
2517                 goto err_put_state_owner;
2518         if (d_really_is_positive(dentry))
2519                 nfs4_return_incompatible_delegation(d_inode(dentry), fmode);
2520         status = -ENOMEM;
2521         if (d_really_is_positive(dentry))
2522                 claim = NFS4_OPEN_CLAIM_FH;
2523         opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr,
2524                         label, claim, GFP_KERNEL);
2525         if (opendata == NULL)
2526                 goto err_put_state_owner;
2527
2528         if (label) {
2529                 olabel = nfs4_label_alloc(server, GFP_KERNEL);
2530                 if (IS_ERR(olabel)) {
2531                         status = PTR_ERR(olabel);
2532                         goto err_opendata_put;
2533                 }
2534         }
2535
2536         if (server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) {
2537                 if (!opendata->f_attr.mdsthreshold) {
2538                         opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc();
2539                         if (!opendata->f_attr.mdsthreshold)
2540                                 goto err_free_label;
2541                 }
2542                 opendata->o_arg.open_bitmap = &nfs4_pnfs_open_bitmap[0];
2543         }
2544         if (d_really_is_positive(dentry))
2545                 opendata->state = nfs4_get_open_state(d_inode(dentry), sp);
2546
2547         status = _nfs4_open_and_get_state(opendata, fmode, flags, ctx);
2548         if (status != 0)
2549                 goto err_free_label;
2550         state = ctx->state;
2551
2552         if ((opendata->o_arg.open_flags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL) &&
2553             (opendata->o_arg.createmode != NFS4_CREATE_GUARDED)) {
2554                 nfs4_exclusive_attrset(opendata, sattr, &label);
2555
2556                 nfs_fattr_init(opendata->o_res.f_attr);
2557                 status = nfs4_do_setattr(state->inode, cred,
2558                                 opendata->o_res.f_attr, sattr,
2559                                 state, label, olabel);
2560                 if (status == 0) {
2561                         nfs_setattr_update_inode(state->inode, sattr,
2562                                         opendata->o_res.f_attr);
2563                         nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel);
2564                 }
2565         }
2566         if (opened && opendata->file_created)
2567                 *opened |= FILE_CREATED;
2568
2569         if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) {
2570                 *ctx_th = opendata->f_attr.mdsthreshold;
2571                 opendata->f_attr.mdsthreshold = NULL;
2572         }
2573
2574         nfs4_label_free(olabel);
2575
2576         nfs4_opendata_put(opendata);
2577         nfs4_put_state_owner(sp);
2578         return 0;
2579 err_free_label:
2580         nfs4_label_free(olabel);
2581 err_opendata_put:
2582         nfs4_opendata_put(opendata);
2583 err_put_state_owner:
2584         nfs4_put_state_owner(sp);
2585 out_err:
2586         return status;
2587 }
2588
2589
2590 static struct nfs4_state *nfs4_do_open(struct inode *dir,
2591                                         struct nfs_open_context *ctx,
2592                                         int flags,
2593                                         struct iattr *sattr,
2594                                         struct nfs4_label *label,
2595                                         int *opened)
2596 {
2597         struct nfs_server *server = NFS_SERVER(dir);
2598         struct nfs4_exception exception = { };
2599         struct nfs4_state *res;
2600         int status;
2601
2602         do {
2603                 status = _nfs4_do_open(dir, ctx, flags, sattr, label, opened);
2604                 res = ctx->state;
2605                 trace_nfs4_open_file(ctx, flags, status);
2606                 if (status == 0)
2607                         break;
2608                 /* NOTE: BAD_SEQID means the server and client disagree about the
2609                  * book-keeping w.r.t. state-changing operations
2610                  * (OPEN/CLOSE/LOCK/LOCKU...)
2611                  * It is actually a sign of a bug on the client or on the server.
2612                  *
2613                  * If we receive a BAD_SEQID error in the particular case of
2614                  * doing an OPEN, we assume that nfs_increment_open_seqid() will
2615                  * have unhashed the old state_owner for us, and that we can
2616                  * therefore safely retry using a new one. We should still warn
2617                  * the user though...
2618                  */
2619                 if (status == -NFS4ERR_BAD_SEQID) {
2620                         pr_warn_ratelimited("NFS: v4 server %s "
2621                                         " returned a bad sequence-id error!\n",
2622                                         NFS_SERVER(dir)->nfs_client->cl_hostname);
2623                         exception.retry = 1;
2624                         continue;
2625                 }
2626                 /*
2627                  * BAD_STATEID on OPEN means that the server cancelled our
2628                  * state before it received the OPEN_CONFIRM.
2629                  * Recover by retrying the request as per the discussion
2630                  * on Page 181 of RFC3530.
2631                  */
2632                 if (status == -NFS4ERR_BAD_STATEID) {
2633                         exception.retry = 1;
2634                         continue;
2635                 }
2636                 if (status == -EAGAIN) {
2637                         /* We must have found a delegation */
2638                         exception.retry = 1;
2639                         continue;
2640                 }
2641                 if (nfs4_clear_cap_atomic_open_v1(server, status, &exception))
2642                         continue;
2643                 res = ERR_PTR(nfs4_handle_exception(server,
2644                                         status, &exception));
2645         } while (exception.retry);
2646         return res;
2647 }
2648
2649 static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
2650                             struct nfs_fattr *fattr, struct iattr *sattr,
2651                             struct nfs4_state *state, struct nfs4_label *ilabel,
2652                             struct nfs4_label *olabel)
2653 {
2654         struct nfs_server *server = NFS_SERVER(inode);
2655         struct nfs_setattrargs  arg = {
2656                 .fh             = NFS_FH(inode),
2657                 .iap            = sattr,
2658                 .server         = server,
2659                 .bitmask = server->attr_bitmask,
2660                 .label          = ilabel,
2661         };
2662         struct nfs_setattrres  res = {
2663                 .fattr          = fattr,
2664                 .label          = olabel,
2665                 .server         = server,
2666         };
2667         struct rpc_message msg = {
2668                 .rpc_proc       = &nfs4_procedures[NFSPROC4_CLNT_SETATTR],
2669                 .rpc_argp       = &arg,
2670                 .rpc_resp       = &res,
2671                 .rpc_cred       = cred,
2672         };
2673         unsigned long timestamp = jiffies;
2674         fmode_t fmode;
2675         bool truncate;
2676         int status;
2677
2678         arg.bitmask = nfs4_bitmask(server, ilabel);
2679         if (ilabel)
2680                 arg.bitmask = nfs4_bitmask(server, olabel);
2681
2682         nfs_fattr_init(fattr);
2683
2684         /* Servers should only apply open mode checks for file size changes */
2685         truncate = (sattr->ia_valid & ATTR_SIZE) ? true : false;
2686         fmode = truncate ? FMODE_WRITE : FMODE_READ;
2687
2688         if (nfs4_copy_delegation_stateid(&arg.stateid, inode, fmode)) {
2689                 /* Use that stateid */
2690         } else if (truncate && state != NULL) {
2691                 struct nfs_lockowner lockowner = {
2692                         .l_owner = current->files,
2693                         .l_pid = current->tgid,
2694                 };
2695                 if (!nfs4_valid_open_stateid(state))
2696                         return -EBADF;
2697                 if (nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE,
2698                                 &lockowner) == -EIO)
2699                         return -EBADF;
2700         } else
2701                 nfs4_stateid_copy(&arg.stateid, &zero_stateid);
2702
2703         status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
2704         if (status == 0 && state != NULL)
2705                 renew_lease(server, timestamp);
2706         return status;
2707 }
2708
2709 static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
2710                            struct nfs_fattr *fattr, struct iattr *sattr,
2711                            struct nfs4_state *state, struct nfs4_label *ilabel,
2712                            struct nfs4_label *olabel)
2713 {
2714         struct nfs_server *server = NFS_SERVER(inode);
2715         struct nfs4_exception exception = {
2716                 .state = state,
2717                 .inode = inode,
2718         };
2719         int err;
2720         do {
2721                 err = _nfs4_do_setattr(inode, cred, fattr, sattr, state, ilabel, olabel);
2722                 trace_nfs4_setattr(inode, err);
2723                 switch (err) {
2724                 case -NFS4ERR_OPENMODE:
2725                         if (!(sattr->ia_valid & ATTR_SIZE)) {
2726                                 pr_warn_once("NFSv4: server %s is incorrectly "
2727                                                 "applying open mode checks to "
2728                                                 "a SETATTR that is not "
2729                                                 "changing file size.\n",
2730                                                 server->nfs_client->cl_hostname);
2731                         }
2732                         if (state && !(state->state & FMODE_WRITE)) {
2733                                 err = -EBADF;
2734                                 if (sattr->ia_valid & ATTR_OPEN)
2735                                         err = -EACCES;
2736                                 goto out;
2737                         }
2738                 }
2739                 err = nfs4_handle_exception(server, err, &exception);
2740         } while (exception.retry);
2741 out:
2742         return err;
2743 }
2744
2745 static bool
2746 nfs4_wait_on_layoutreturn(struct inode *inode, struct rpc_task *task)
2747 {
2748         if (inode == NULL || !nfs_have_layout(inode))
2749                 return false;
2750
2751         return pnfs_wait_on_layoutreturn(inode, task);
2752 }
2753
2754 struct nfs4_closedata {
2755         struct inode *inode;
2756         struct nfs4_state *state;
2757         struct nfs_closeargs arg;
2758         struct nfs_closeres res;
2759         struct nfs_fattr fattr;
2760         unsigned long timestamp;
2761         bool roc;
2762         u32 roc_barrier;
2763 };
2764
2765 static void nfs4_free_closedata(void *data)
2766 {
2767         struct nfs4_closedata *calldata = data;
2768         struct nfs4_state_owner *sp = calldata->state->owner;
2769         struct super_block *sb = calldata->state->inode->i_sb;
2770
2771         if (calldata->roc)
2772                 pnfs_roc_release(calldata->state->inode);
2773         nfs4_put_open_state(calldata->state);
2774         nfs_free_seqid(calldata->arg.seqid);
2775         nfs4_put_state_owner(sp);
2776         nfs_sb_deactive(sb);
2777         kfree(calldata);
2778 }
2779
2780 static void nfs4_close_done(struct rpc_task *task, void *data)
2781 {
2782         struct nfs4_closedata *calldata = data;
2783         struct nfs4_state *state = calldata->state;
2784         struct nfs_server *server = NFS_SERVER(calldata->inode);
2785         nfs4_stateid *res_stateid = NULL;
2786
2787         dprintk("%s: begin!\n", __func__);
2788         if (!nfs4_sequence_done(task, &calldata->res.seq_res))
2789                 return;
2790         trace_nfs4_close(state, &calldata->arg, &calldata->res, task->tk_status);
2791         /* hmm. we are done with the inode, and in the process of freeing
2792          * the state_owner. we keep this around to process errors
2793          */
2794         switch (task->tk_status) {
2795                 case 0:
2796                         res_stateid = &calldata->res.stateid;
2797                         if (calldata->roc)
2798                                 pnfs_roc_set_barrier(state->inode,
2799                                                      calldata->roc_barrier);
2800                         renew_lease(server, calldata->timestamp);
2801                         break;
2802                 case -NFS4ERR_ADMIN_REVOKED:
2803                 case -NFS4ERR_STALE_STATEID:
2804                 case -NFS4ERR_OLD_STATEID:
2805                 case -NFS4ERR_BAD_STATEID:
2806                 case -NFS4ERR_EXPIRED:
2807                         if (!nfs4_stateid_match(&calldata->arg.stateid,
2808                                                 &state->open_stateid)) {
2809                                 rpc_restart_call_prepare(task);
2810                                 goto out_release;
2811                         }
2812                         if (calldata->arg.fmode == 0)
2813                                 break;
2814                 default:
2815                         if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) {
2816                                 rpc_restart_call_prepare(task);
2817                                 goto out_release;
2818                         }
2819         }
2820         nfs_clear_open_stateid(state, &calldata->arg.stateid,
2821                         res_stateid, calldata->arg.fmode);
2822 out_release:
2823         nfs_release_seqid(calldata->arg.seqid);
2824         nfs_refresh_inode(calldata->inode, calldata->res.fattr);
2825         dprintk("%s: done, ret = %d!\n", __func__, task->tk_status);
2826 }
2827
2828 static void nfs4_close_prepare(struct rpc_task *task, void *data)
2829 {
2830         struct nfs4_closedata *calldata = data;
2831         struct nfs4_state *state = calldata->state;
2832         struct inode *inode = calldata->inode;
2833         bool is_rdonly, is_wronly, is_rdwr;
2834         int call_close = 0;
2835
2836         dprintk("%s: begin!\n", __func__);
2837         if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
2838                 goto out_wait;
2839
2840         task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
2841         spin_lock(&state->owner->so_lock);
2842         is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags);
2843         is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags);
2844         is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags);
2845         nfs4_stateid_copy(&calldata->arg.stateid, &state->open_stateid);
2846         /* Calculate the change in open mode */
2847         calldata->arg.fmode = 0;
2848         if (state->n_rdwr == 0) {
2849                 if (state->n_rdonly == 0)
2850                         call_close |= is_rdonly;
2851                 else if (is_rdonly)
2852                         calldata->arg.fmode |= FMODE_READ;
2853                 if (state->n_wronly == 0)
2854                         call_close |= is_wronly;
2855                 else if (is_wronly)
2856                         calldata->arg.fmode |= FMODE_WRITE;
2857         } else if (is_rdwr)
2858                 calldata->arg.fmode |= FMODE_READ|FMODE_WRITE;
2859
2860         if (calldata->arg.fmode == 0)
2861                 call_close |= is_rdwr;
2862
2863         if (!nfs4_valid_open_stateid(state))
2864                 call_close = 0;
2865         spin_unlock(&state->owner->so_lock);
2866
2867         if (!call_close) {
2868                 /* Note: exit _without_ calling nfs4_close_done */
2869                 goto out_no_action;
2870         }
2871
2872         if (nfs4_wait_on_layoutreturn(inode, task)) {
2873                 nfs_release_seqid(calldata->arg.seqid);
2874                 goto out_wait;
2875         }
2876
2877         if (calldata->arg.fmode == 0)
2878                 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
2879         if (calldata->roc)
2880                 pnfs_roc_get_barrier(inode, &calldata->roc_barrier);
2881
2882         calldata->arg.share_access =
2883                 nfs4_map_atomic_open_share(NFS_SERVER(inode),
2884                                 calldata->arg.fmode, 0);
2885
2886         nfs_fattr_init(calldata->res.fattr);
2887         calldata->timestamp = jiffies;
2888         if (nfs4_setup_sequence(NFS_SERVER(inode),
2889                                 &calldata->arg.seq_args,
2890                                 &calldata->res.seq_res,
2891                                 task) != 0)
2892                 nfs_release_seqid(calldata->arg.seqid);
2893         dprintk("%s: done!\n", __func__);
2894         return;
2895 out_no_action:
2896         task->tk_action = NULL;
2897 out_wait:
2898         nfs4_sequence_done(task, &calldata->res.seq_res);
2899 }
2900
2901 static const struct rpc_call_ops nfs4_close_ops = {
2902         .rpc_call_prepare = nfs4_close_prepare,
2903         .rpc_call_done = nfs4_close_done,
2904         .rpc_release = nfs4_free_closedata,
2905 };
2906
2907 static bool nfs4_roc(struct inode *inode)
2908 {
2909         if (!nfs_have_layout(inode))
2910                 return false;
2911         return pnfs_roc(inode);
2912 }
2913
2914 /* 
2915  * It is possible for data to be read/written from a mem-mapped file 
2916  * after the sys_close call (which hits the vfs layer as a flush).
2917  * This means that we can't safely call nfsv4 close on a file until 
2918  * the inode is cleared. This in turn means that we are not good
2919  * NFSv4 citizens - we do not indicate to the server to update the file's 
2920  * share state even when we are done with one of the three share 
2921  * stateid's in the inode.
2922  *
2923  * NOTE: Caller must be holding the sp->so_owner semaphore!
2924  */
2925 int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait)
2926 {
2927         struct nfs_server *server = NFS_SERVER(state->inode);
2928         struct nfs_seqid *(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t);
2929         struct nfs4_closedata *calldata;
2930         struct nfs4_state_owner *sp = state->owner;
2931         struct rpc_task *task;
2932         struct rpc_message msg = {
2933                 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE],
2934                 .rpc_cred = state->owner->so_cred,
2935         };
2936         struct rpc_task_setup task_setup_data = {
2937                 .rpc_client = server->client,
2938                 .rpc_message = &msg,
2939                 .callback_ops = &nfs4_close_ops,
2940                 .workqueue = nfsiod_workqueue,
2941                 .flags = RPC_TASK_ASYNC,
2942         };
2943         int status = -ENOMEM;
2944
2945         nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_CLEANUP,
2946                 &task_setup_data.rpc_client, &msg);
2947
2948         calldata = kzalloc(sizeof(*calldata), gfp_mask);
2949         if (calldata == NULL)
2950                 goto out;
2951         nfs4_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 1);
2952         calldata->inode = state->inode;
2953         calldata->state = state;
2954         calldata->arg.fh = NFS_FH(state->inode);
2955         /* Serialization for the sequence id */
2956         alloc_seqid = server->nfs_client->cl_mvops->alloc_seqid;
2957         calldata->arg.seqid = alloc_seqid(&state->owner->so_seqid, gfp_mask);
2958         if (IS_ERR(calldata->arg.seqid))
2959                 goto out_free_calldata;
2960         calldata->arg.fmode = 0;
2961         calldata->arg.bitmask = server->cache_consistency_bitmask;
2962         calldata->res.fattr = &calldata->fattr;
2963         calldata->res.seqid = calldata->arg.seqid;
2964         calldata->res.server = server;
2965         calldata->roc = nfs4_roc(state->inode);
2966         nfs_sb_active(calldata->inode->i_sb);
2967
2968         msg.rpc_argp = &calldata->arg;
2969         msg.rpc_resp = &calldata->res;
2970         task_setup_data.callback_data = calldata;
2971         task = rpc_run_task(&task_setup_data);
2972         if (IS_ERR(task))
2973                 return PTR_ERR(task);
2974         status = 0;
2975         if (wait)
2976                 status = rpc_wait_for_completion_task(task);
2977         rpc_put_task(task);
2978         return status;
2979 out_free_calldata:
2980         kfree(calldata);
2981 out:
2982         nfs4_put_open_state(state);
2983         nfs4_put_state_owner(sp);
2984         return status;
2985 }
2986
2987 static struct inode *
2988 nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx,
2989                 int open_flags, struct iattr *attr, int *opened)
2990 {
2991         struct nfs4_state *state;
2992         struct nfs4_label l = {0, 0, 0, NULL}, *label = NULL;
2993
2994         label = nfs4_label_init_security(dir, ctx->dentry, attr, &l);
2995
2996         /* Protect against concurrent sillydeletes */
2997         state = nfs4_do_open(dir, ctx, open_flags, attr, label, opened);
2998
2999         nfs4_label_release_security(label);
3000
3001         if (IS_ERR(state))
3002                 return ERR_CAST(state);
3003         return state->inode;
3004 }
3005
3006 static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
3007 {
3008         if (ctx->state == NULL)
3009                 return;
3010         if (is_sync)
3011                 nfs4_close_sync(ctx->state, ctx->mode);
3012         else
3013                 nfs4_close_state(ctx->state, ctx->mode);
3014 }
3015
3016 #define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL)
3017 #define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL)
3018 #define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_SECURITY_LABEL - 1UL)
3019
3020 static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
3021 {
3022         u32 bitmask[3] = {}, minorversion = server->nfs_client->cl_minorversion;
3023         struct nfs4_server_caps_arg args = {
3024                 .fhandle = fhandle,
3025                 .bitmask = bitmask,
3026         };
3027         struct nfs4_server_caps_res res = {};
3028         struct rpc_message msg = {
3029                 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SERVER_CAPS],
3030                 .rpc_argp = &args,
3031                 .rpc_resp = &res,
3032         };
3033         int status;
3034
3035         bitmask[0] = FATTR4_WORD0_SUPPORTED_ATTRS |
3036                      FATTR4_WORD0_FH_EXPIRE_TYPE |
3037                      FATTR4_WORD0_LINK_SUPPORT |
3038                      FATTR4_WORD0_SYMLINK_SUPPORT |
3039                      FATTR4_WORD0_ACLSUPPORT;
3040         if (minorversion)
3041                 bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT;
3042
3043         status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
3044         if (status == 0) {
3045                 /* Sanity check the server answers */
3046                 switch (minorversion) {
3047                 case 0:
3048                         res.attr_bitmask[1] &= FATTR4_WORD1_NFS40_MASK;
3049                         res.attr_bitmask[2] = 0;
3050                         break;
3051                 case 1:
3052                         res.attr_bitmask[2] &= FATTR4_WORD2_NFS41_MASK;
3053                         break;
3054                 case 2:
3055                         res.attr_bitmask[2] &= FATTR4_WORD2_NFS42_MASK;
3056                 }
3057                 memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
3058                 server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS|
3059                                 NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
3060                                 NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|
3061                                 NFS_CAP_OWNER_GROUP|NFS_CAP_ATIME|
3062                                 NFS_CAP_CTIME|NFS_CAP_MTIME|
3063                                 NFS_CAP_SECURITY_LABEL);
3064                 if (res.attr_bitmask[0] & FATTR4_WORD0_ACL &&
3065                                 res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL)
3066                         server->caps |= NFS_CAP_ACLS;
3067                 if (res.has_links != 0)
3068                         server->caps |= NFS_CAP_HARDLINKS;
3069                 if (res.has_symlinks != 0)
3070                         server->caps |= NFS_CAP_SYMLINKS;
3071                 if (res.attr_bitmask[0] & FATTR4_WORD0_FILEID)
3072                         server->caps |= NFS_CAP_FILEID;
3073                 if (res.attr_bitmask[1] & FATTR4_WORD1_MODE)
3074                         server->caps |= NFS_CAP_MODE;
3075                 if (res.attr_bitmask[1] & FATTR4_WORD1_NUMLINKS)
3076                         server->caps |= NFS_CAP_NLINK;
3077                 if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER)
3078                         server->caps |= NFS_CAP_OWNER;
3079                 if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER_GROUP)
3080                         server->caps |= NFS_CAP_OWNER_GROUP;
3081                 if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_ACCESS)
3082                         server->caps |= NFS_CAP_ATIME;
3083                 if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_METADATA)
3084                         server->caps |= NFS_CAP_CTIME;
3085                 if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY)
3086                         server->caps |= NFS_CAP_MTIME;
3087 #ifdef CONFIG_NFS_V4_SECURITY_LABEL
3088                 if (res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL)
3089                         server->caps |= NFS_CAP_SECURITY_LABEL;
3090 #endif
3091                 memcpy(server->attr_bitmask_nl, res.attr_bitmask,
3092                                 sizeof(server->attr_bitmask));
3093                 server->attr_bitmask_nl[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
3094
3095                 memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask));
3096                 server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE;
3097                 server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
3098                 server->cache_consistency_bitmask[2] = 0;
3099                 memcpy(server->exclcreat_bitmask, res.exclcreat_bitmask,
3100                         sizeof(server->exclcreat_bitmask));
3101                 server->acl_bitmask = res.acl_bitmask;
3102                 server->fh_expire_type = res.fh_expire_type;
3103         }
3104
3105         return status;
3106 }
3107
3108 int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
3109 {
3110         struct nfs4_exception exception = { };
3111         int err;
3112         do {
3113                 err = nfs4_handle_exception(server,
3114                                 _nfs4_server_capabilities(server, fhandle),
3115                                 &exception);
3116         } while (exception.retry);
3117         return err;
3118 }
3119
3120 static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
3121                 struct nfs_fsinfo *info)
3122 {
3123         u32 bitmask[3];
3124         struct nfs4_lookup_root_arg args = {
3125                 .bitmask = bitmask,
3126         };
3127         struct nfs4_lookup_res res = {
3128                 .server = server,
3129                 .fattr = info->fattr,
3130                 .fh = fhandle,
3131         };
3132         struct rpc_message msg = {
3133                 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP_ROOT],
3134                 .rpc_argp = &args,
3135                 .rpc_resp = &res,
3136         };
3137
3138         bitmask[0] = nfs4_fattr_bitmap[0];
3139         bitmask[1] = nfs4_fattr_bitmap[1];
3140         /*
3141          * Process the label in the upcoming getfattr
3142          */
3143         bitmask[2] = nfs4_fattr_bitmap[2] & ~FATTR4_WORD2_SECURITY_LABEL;
3144
3145         nfs_fattr_init(info->fattr);
3146         return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
3147 }
3148
3149 static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
3150                 struct nfs_fsinfo *info)
3151 {
3152         struct nfs4_exception exception = { };
3153         int err;
3154         do {
3155                 err = _nfs4_lookup_root(server, fhandle, info);
3156                 trace_nfs4_lookup_root(server, fhandle, info->fattr, err);
3157                 switch (err) {
3158                 case 0:
3159                 case -NFS4ERR_WRONGSEC:
3160                         goto out;
3161                 default:
3162                         err = nfs4_handle_exception(server, err, &exception);
3163                 }
3164         } while (exception.retry);
3165 out:
3166         return err;
3167 }
3168
3169 static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
3170                                 struct nfs_fsinfo *info, rpc_authflavor_t flavor)
3171 {
3172         struct rpc_auth_create_args auth_args = {
3173                 .pseudoflavor = flavor,
3174         };
3175         struct rpc_auth *auth;
3176         int ret;
3177
3178         auth = rpcauth_create(&auth_args, server->client);
3179         if (IS_ERR(auth)) {
3180                 ret = -EACCES;
3181                 goto out;
3182         }
3183         ret = nfs4_lookup_root(server, fhandle, info);
3184 out:
3185         return ret;
3186 }
3187
3188 /*
3189  * Retry pseudoroot lookup with various security flavors.  We do this when:
3190  *
3191  *   NFSv4.0: the PUTROOTFH operation returns NFS4ERR_WRONGSEC
3192  *   NFSv4.1: the server does not support the SECINFO_NO_NAME operation
3193  *
3194  * Returns zero on success, or a negative NFS4ERR value, or a
3195  * negative errno value.
3196  */
3197 static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
3198                               struct nfs_fsinfo *info)
3199 {
3200         /* Per 3530bis 15.33.5 */
3201         static const rpc_authflavor_t flav_array[] = {
3202                 RPC_AUTH_GSS_KRB5P,
3203                 RPC_AUTH_GSS_KRB5I,
3204                 RPC_AUTH_GSS_KRB5,
3205                 RPC_AUTH_UNIX,                  /* courtesy */
3206                 RPC_AUTH_NULL,
3207         };
3208         int status = -EPERM;
3209         size_t i;
3210
3211         if (server->auth_info.flavor_len > 0) {
3212                 /* try each flavor specified by user */
3213                 for (i = 0; i < server->auth_info.flavor_len; i++) {
3214                         status = nfs4_lookup_root_sec(server, fhandle, info,
3215                                                 server->auth_info.flavors[i]);
3216                         if (status == -NFS4ERR_WRONGSEC || status == -EACCES)
3217                                 continue;
3218                         break;
3219                 }
3220         } else {
3221                 /* no flavors specified by user, try default list */
3222                 for (i = 0; i < ARRAY_SIZE(flav_array); i++) {
3223                         status = nfs4_lookup_root_sec(server, fhandle, info,
3224                                                       flav_array[i]);
3225                         if (status == -NFS4ERR_WRONGSEC || status == -EACCES)
3226                                 continue;
3227                         break;
3228                 }
3229         }
3230
3231         /*
3232          * -EACCESS could mean that the user doesn't have correct permissions
3233          * to access the mount.  It could also mean that we tried to mount
3234          * with a gss auth flavor, but rpc.gssd isn't running.  Either way,
3235          * existing mount programs don't handle -EACCES very well so it should
3236          * be mapped to -EPERM instead.
3237          */
3238         if (status == -EACCES)
3239                 status = -EPERM;
3240         return status;
3241 }
3242
3243 static int nfs4_do_find_root_sec(struct nfs_server *server,
3244                 struct nfs_fh *fhandle, struct nfs_fsinfo *info)
3245 {
3246         int mv = server->nfs_client->cl_minorversion;
3247         return nfs_v4_minor_ops[mv]->find_root_sec(server, fhandle, info);
3248 }
3249
3250 /**
3251  * nfs4_proc_get_rootfh - get file handle for server's pseudoroot
3252  * @server: initialized nfs_server handle
3253  * @fhandle: we fill in the pseudo-fs root file handle
3254  * @info: we fill in an FSINFO struct
3255  * @auth_probe: probe the auth flavours
3256  *
3257  * Returns zero on success, or a negative errno.
3258  */
3259 int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle,
3260                          struct nfs_fsinfo *info,
3261                          bool auth_probe)
3262 {
3263         int status = 0;
3264
3265         if (!auth_probe)
3266                 status = nfs4_lookup_root(server, fhandle, info);
3267
3268         if (auth_probe || status == NFS4ERR_WRONGSEC)
3269                 status = nfs4_do_find_root_sec(server, fhandle, info);
3270
3271         if (status == 0)
3272                 status = nfs4_server_capabilities(server, fhandle);
3273         if (status == 0)
3274                 status = nfs4_do_fsinfo(server, fhandle, info);
3275
3276         return nfs4_map_errors(status);
3277 }
3278
3279 static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *mntfh,
3280                               struct nfs_fsinfo *info)
3281 {
3282         int error;
3283         struct nfs_fattr *fattr = info->fattr;
3284         struct nfs4_label *label = NULL;
3285
3286         error = nfs4_server_capabilities(server, mntfh);
3287         if (error < 0) {
3288                 dprintk("nfs4_get_root: getcaps error = %d\n", -error);
3289                 return error;
3290         }
3291
3292         label = nfs4_label_alloc(server, GFP_KERNEL);
3293         if (IS_ERR(label))
3294                 return PTR_ERR(label);
3295
3296         error = nfs4_proc_getattr(server, mntfh, fattr, label);
3297         if (error < 0) {
3298                 dprintk("nfs4_get_root: getattr error = %d\n", -error);
3299                 goto err_free_label;
3300         }
3301
3302         if (fattr->valid & NFS_ATTR_FATTR_FSID &&
3303             !nfs_fsid_equal(&server->fsid, &fattr->fsid))
3304                 memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
3305
3306 err_free_label:
3307         nfs4_label_free(label);
3308
3309         return error;
3310 }
3311
3312 /*
3313  * Get locations and (maybe) other attributes of a referral.
3314  * Note that we'll actually follow the referral later when
3315  * we detect fsid mismatch in inode revalidation
3316  */
3317 static int nfs4_get_referral(struct rpc_clnt *client, struct inode *dir,
3318                              const struct qstr *name, struct nfs_fattr *fattr,
3319                              struct nfs_fh *fhandle)
3320 {
3321         int status = -ENOMEM;
3322         struct page *page = NULL;
3323         struct nfs4_fs_locations *locations = NULL;
3324
3325         page = alloc_page(GFP_KERNEL);
3326         if (page == NULL)
3327                 goto out;
3328         locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
3329         if (locations == NULL)
3330                 goto out;
3331
3332         status = nfs4_proc_fs_locations(client, dir, name, locations, page);
3333         if (status != 0)
3334                 goto out;
3335
3336         /*
3337          * If the fsid didn't change, this is a migration event, not a
3338          * referral.  Cause us to drop into the exception handler, which
3339          * will kick off migration recovery.
3340          */
3341         if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &locations->fattr.fsid)) {
3342                 dprintk("%s: server did not return a different fsid for"
3343                         " a referral at %s\n", __func__, name->name);
3344                 status = -NFS4ERR_MOVED;
3345                 goto out;
3346         }
3347         /* Fixup attributes for the nfs_lookup() call to nfs_fhget() */
3348         nfs_fixup_referral_attributes(&locations->fattr);
3349
3350         /* replace the lookup nfs_fattr with the locations nfs_fattr */
3351         memcpy(fattr, &locations->fattr, sizeof(struct nfs_fattr));
3352         memset(fhandle, 0, sizeof(struct nfs_fh));
3353 out:
3354         if (page)
3355                 __free_page(page);
3356         kfree(locations);
3357         return status;
3358 }
3359
3360 static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
3361                                 struct nfs_fattr *fattr, struct nfs4_label *label)
3362 {
3363         struct nfs4_getattr_arg args = {
3364                 .fh = fhandle,
3365                 .bitmask = server->attr_bitmask,
3366         };
3367         struct nfs4_getattr_res res = {
3368                 .fattr = fattr,
3369                 .label = label,
3370                 .server = server,
3371         };
3372         struct rpc_message msg = {
3373                 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETATTR],
3374                 .rpc_argp = &args,
3375                 .rpc_resp = &res,
3376         };
3377
3378         args.bitmask = nfs4_bitmask(server, label);
3379
3380         nfs_fattr_init(fattr);
3381         return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
3382 }
3383
3384 static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
3385                                 struct nfs_fattr *fattr, struct nfs4_label *label)
3386 {
3387         struct nfs4_exception exception = { };
3388         int err;
3389         do {
3390                 err = _nfs4_proc_getattr(server, fhandle, fattr, label);
3391                 trace_nfs4_getattr(server, fhandle, fattr, err);
3392                 err = nfs4_handle_exception(server, err,
3393                                 &exception);
3394         } while (exception.retry);
3395         return err;
3396 }
3397
3398 /* 
3399  * The file is not closed if it is opened due to the a request to change
3400  * the size of the file. The open call will not be needed once the
3401  * VFS layer lookup-intents are implemented.
3402  *
3403  * Close is called when the inode is destroyed.
3404  * If we haven't opened the file for O_WRONLY, we
3405  * need to in the size_change case to obtain a stateid.
3406  *
3407  * Got race?
3408  * Because OPEN is always done by name in nfsv4, it is
3409  * possible that we opened a different file by the same
3410  * name.  We can recognize this race condition, but we
3411  * can't do anything about it besides returning an error.
3412  *
3413  * This will be fixed with VFS changes (lookup-intent).
3414  */
3415 static int
3416 nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
3417                   struct iattr *sattr)
3418 {
3419         struct inode *inode = d_inode(dentry);
3420         struct rpc_cred *cred = NULL;
3421         struct nfs4_state *state =