]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - fs/cifs/file.c
Merge remote-tracking branch 'ext4/dev'
[karo-tx-linux.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45
46
47 static inline int cifs_convert_flags(unsigned int flags)
48 {
49         if ((flags & O_ACCMODE) == O_RDONLY)
50                 return GENERIC_READ;
51         else if ((flags & O_ACCMODE) == O_WRONLY)
52                 return GENERIC_WRITE;
53         else if ((flags & O_ACCMODE) == O_RDWR) {
54                 /* GENERIC_ALL is too much permission to request
55                    can cause unnecessary access denied on create */
56                 /* return GENERIC_ALL; */
57                 return (GENERIC_READ | GENERIC_WRITE);
58         }
59
60         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
62                 FILE_READ_DATA);
63 }
64
65 static u32 cifs_posix_convert_flags(unsigned int flags)
66 {
67         u32 posix_flags = 0;
68
69         if ((flags & O_ACCMODE) == O_RDONLY)
70                 posix_flags = SMB_O_RDONLY;
71         else if ((flags & O_ACCMODE) == O_WRONLY)
72                 posix_flags = SMB_O_WRONLY;
73         else if ((flags & O_ACCMODE) == O_RDWR)
74                 posix_flags = SMB_O_RDWR;
75
76         if (flags & O_CREAT) {
77                 posix_flags |= SMB_O_CREAT;
78                 if (flags & O_EXCL)
79                         posix_flags |= SMB_O_EXCL;
80         } else if (flags & O_EXCL)
81                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82                          current->comm, current->tgid);
83
84         if (flags & O_TRUNC)
85                 posix_flags |= SMB_O_TRUNC;
86         /* be safe and imply O_SYNC for O_DSYNC */
87         if (flags & O_DSYNC)
88                 posix_flags |= SMB_O_SYNC;
89         if (flags & O_DIRECTORY)
90                 posix_flags |= SMB_O_DIRECTORY;
91         if (flags & O_NOFOLLOW)
92                 posix_flags |= SMB_O_NOFOLLOW;
93         if (flags & O_DIRECT)
94                 posix_flags |= SMB_O_DIRECT;
95
96         return posix_flags;
97 }
98
99 static inline int cifs_get_disposition(unsigned int flags)
100 {
101         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
102                 return FILE_CREATE;
103         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104                 return FILE_OVERWRITE_IF;
105         else if ((flags & O_CREAT) == O_CREAT)
106                 return FILE_OPEN_IF;
107         else if ((flags & O_TRUNC) == O_TRUNC)
108                 return FILE_OVERWRITE;
109         else
110                 return FILE_OPEN;
111 }
112
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114                         struct super_block *sb, int mode, unsigned int f_flags,
115                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
116 {
117         int rc;
118         FILE_UNIX_BASIC_INFO *presp_data;
119         __u32 posix_flags = 0;
120         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121         struct cifs_fattr fattr;
122         struct tcon_link *tlink;
123         struct cifs_tcon *tcon;
124
125         cifs_dbg(FYI, "posix open %s\n", full_path);
126
127         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128         if (presp_data == NULL)
129                 return -ENOMEM;
130
131         tlink = cifs_sb_tlink(cifs_sb);
132         if (IS_ERR(tlink)) {
133                 rc = PTR_ERR(tlink);
134                 goto posix_open_ret;
135         }
136
137         tcon = tlink_tcon(tlink);
138         mode &= ~current_umask();
139
140         posix_flags = cifs_posix_convert_flags(f_flags);
141         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142                              poplock, full_path, cifs_sb->local_nls,
143                              cifs_remap(cifs_sb));
144         cifs_put_tlink(tlink);
145
146         if (rc)
147                 goto posix_open_ret;
148
149         if (presp_data->Type == cpu_to_le32(-1))
150                 goto posix_open_ret; /* open ok, caller does qpathinfo */
151
152         if (!pinode)
153                 goto posix_open_ret; /* caller does not need info */
154
155         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
156
157         /* get new inode and set it up */
158         if (*pinode == NULL) {
159                 cifs_fill_uniqueid(sb, &fattr);
160                 *pinode = cifs_iget(sb, &fattr);
161                 if (!*pinode) {
162                         rc = -ENOMEM;
163                         goto posix_open_ret;
164                 }
165         } else {
166                 cifs_fattr_to_inode(*pinode, &fattr);
167         }
168
169 posix_open_ret:
170         kfree(presp_data);
171         return rc;
172 }
173
174 static int
175 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
176              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
177              struct cifs_fid *fid, unsigned int xid)
178 {
179         int rc;
180         int desired_access;
181         int disposition;
182         int create_options = CREATE_NOT_DIR;
183         FILE_ALL_INFO *buf;
184         struct TCP_Server_Info *server = tcon->ses->server;
185         struct cifs_open_parms oparms;
186
187         if (!server->ops->open)
188                 return -ENOSYS;
189
190         desired_access = cifs_convert_flags(f_flags);
191
192 /*********************************************************************
193  *  open flag mapping table:
194  *
195  *      POSIX Flag            CIFS Disposition
196  *      ----------            ----------------
197  *      O_CREAT               FILE_OPEN_IF
198  *      O_CREAT | O_EXCL      FILE_CREATE
199  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
200  *      O_TRUNC               FILE_OVERWRITE
201  *      none of the above     FILE_OPEN
202  *
203  *      Note that there is not a direct match between disposition
204  *      FILE_SUPERSEDE (ie create whether or not file exists although
205  *      O_CREAT | O_TRUNC is similar but truncates the existing
206  *      file rather than creating a new file as FILE_SUPERSEDE does
207  *      (which uses the attributes / metadata passed in on open call)
208  *?
209  *?  O_SYNC is a reasonable match to CIFS writethrough flag
210  *?  and the read write flags match reasonably.  O_LARGEFILE
211  *?  is irrelevant because largefile support is always used
212  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
213  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
214  *********************************************************************/
215
216         disposition = cifs_get_disposition(f_flags);
217
218         /* BB pass O_SYNC flag through on file attributes .. BB */
219
220         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
221         if (!buf)
222                 return -ENOMEM;
223
224         if (backup_cred(cifs_sb))
225                 create_options |= CREATE_OPEN_BACKUP_INTENT;
226
227         oparms.tcon = tcon;
228         oparms.cifs_sb = cifs_sb;
229         oparms.desired_access = desired_access;
230         oparms.create_options = create_options;
231         oparms.disposition = disposition;
232         oparms.path = full_path;
233         oparms.fid = fid;
234         oparms.reconnect = false;
235
236         rc = server->ops->open(xid, &oparms, oplock, buf);
237
238         if (rc)
239                 goto out;
240
241         if (tcon->unix_ext)
242                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
243                                               xid);
244         else
245                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
246                                          xid, fid);
247
248 out:
249         kfree(buf);
250         return rc;
251 }
252
253 static bool
254 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
255 {
256         struct cifs_fid_locks *cur;
257         bool has_locks = false;
258
259         down_read(&cinode->lock_sem);
260         list_for_each_entry(cur, &cinode->llist, llist) {
261                 if (!list_empty(&cur->locks)) {
262                         has_locks = true;
263                         break;
264                 }
265         }
266         up_read(&cinode->lock_sem);
267         return has_locks;
268 }
269
270 struct cifsFileInfo *
271 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
272                   struct tcon_link *tlink, __u32 oplock)
273 {
274         struct dentry *dentry = file->f_path.dentry;
275         struct inode *inode = d_inode(dentry);
276         struct cifsInodeInfo *cinode = CIFS_I(inode);
277         struct cifsFileInfo *cfile;
278         struct cifs_fid_locks *fdlocks;
279         struct cifs_tcon *tcon = tlink_tcon(tlink);
280         struct TCP_Server_Info *server = tcon->ses->server;
281
282         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
283         if (cfile == NULL)
284                 return cfile;
285
286         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
287         if (!fdlocks) {
288                 kfree(cfile);
289                 return NULL;
290         }
291
292         INIT_LIST_HEAD(&fdlocks->locks);
293         fdlocks->cfile = cfile;
294         cfile->llist = fdlocks;
295         down_write(&cinode->lock_sem);
296         list_add(&fdlocks->llist, &cinode->llist);
297         up_write(&cinode->lock_sem);
298
299         cfile->count = 1;
300         cfile->pid = current->tgid;
301         cfile->uid = current_fsuid();
302         cfile->dentry = dget(dentry);
303         cfile->f_flags = file->f_flags;
304         cfile->invalidHandle = false;
305         cfile->tlink = cifs_get_tlink(tlink);
306         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
307         mutex_init(&cfile->fh_mutex);
308
309         cifs_sb_active(inode->i_sb);
310
311         /*
312          * If the server returned a read oplock and we have mandatory brlocks,
313          * set oplock level to None.
314          */
315         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
316                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
317                 oplock = 0;
318         }
319
320         spin_lock(&cifs_file_list_lock);
321         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
322                 oplock = fid->pending_open->oplock;
323         list_del(&fid->pending_open->olist);
324
325         fid->purge_cache = false;
326         server->ops->set_fid(cfile, fid, oplock);
327
328         list_add(&cfile->tlist, &tcon->openFileList);
329         /* if readable file instance put first in list*/
330         if (file->f_mode & FMODE_READ)
331                 list_add(&cfile->flist, &cinode->openFileList);
332         else
333                 list_add_tail(&cfile->flist, &cinode->openFileList);
334         spin_unlock(&cifs_file_list_lock);
335
336         if (fid->purge_cache)
337                 cifs_zap_mapping(inode);
338
339         file->private_data = cfile;
340         return cfile;
341 }
342
343 struct cifsFileInfo *
344 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
345 {
346         spin_lock(&cifs_file_list_lock);
347         cifsFileInfo_get_locked(cifs_file);
348         spin_unlock(&cifs_file_list_lock);
349         return cifs_file;
350 }
351
352 /*
353  * Release a reference on the file private data. This may involve closing
354  * the filehandle out on the server. Must be called without holding
355  * cifs_file_list_lock.
356  */
357 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
358 {
359         struct inode *inode = d_inode(cifs_file->dentry);
360         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
361         struct TCP_Server_Info *server = tcon->ses->server;
362         struct cifsInodeInfo *cifsi = CIFS_I(inode);
363         struct super_block *sb = inode->i_sb;
364         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
365         struct cifsLockInfo *li, *tmp;
366         struct cifs_fid fid;
367         struct cifs_pending_open open;
368         bool oplock_break_cancelled;
369
370         spin_lock(&cifs_file_list_lock);
371         if (--cifs_file->count > 0) {
372                 spin_unlock(&cifs_file_list_lock);
373                 return;
374         }
375
376         if (server->ops->get_lease_key)
377                 server->ops->get_lease_key(inode, &fid);
378
379         /* store open in pending opens to make sure we don't miss lease break */
380         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
381
382         /* remove it from the lists */
383         list_del(&cifs_file->flist);
384         list_del(&cifs_file->tlist);
385
386         if (list_empty(&cifsi->openFileList)) {
387                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
388                          d_inode(cifs_file->dentry));
389                 /*
390                  * In strict cache mode we need invalidate mapping on the last
391                  * close  because it may cause a error when we open this file
392                  * again and get at least level II oplock.
393                  */
394                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
395                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
396                 cifs_set_oplock_level(cifsi, 0);
397         }
398         spin_unlock(&cifs_file_list_lock);
399
400         oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
401
402         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
403                 struct TCP_Server_Info *server = tcon->ses->server;
404                 unsigned int xid;
405
406                 xid = get_xid();
407                 if (server->ops->close)
408                         server->ops->close(xid, tcon, &cifs_file->fid);
409                 _free_xid(xid);
410         }
411
412         if (oplock_break_cancelled)
413                 cifs_done_oplock_break(cifsi);
414
415         cifs_del_pending_open(&open);
416
417         /*
418          * Delete any outstanding lock records. We'll lose them when the file
419          * is closed anyway.
420          */
421         down_write(&cifsi->lock_sem);
422         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
423                 list_del(&li->llist);
424                 cifs_del_lock_waiters(li);
425                 kfree(li);
426         }
427         list_del(&cifs_file->llist->llist);
428         kfree(cifs_file->llist);
429         up_write(&cifsi->lock_sem);
430
431         cifs_put_tlink(cifs_file->tlink);
432         dput(cifs_file->dentry);
433         cifs_sb_deactive(sb);
434         kfree(cifs_file);
435 }
436
437 int cifs_open(struct inode *inode, struct file *file)
438
439 {
440         int rc = -EACCES;
441         unsigned int xid;
442         __u32 oplock;
443         struct cifs_sb_info *cifs_sb;
444         struct TCP_Server_Info *server;
445         struct cifs_tcon *tcon;
446         struct tcon_link *tlink;
447         struct cifsFileInfo *cfile = NULL;
448         char *full_path = NULL;
449         bool posix_open_ok = false;
450         struct cifs_fid fid;
451         struct cifs_pending_open open;
452
453         xid = get_xid();
454
455         cifs_sb = CIFS_SB(inode->i_sb);
456         tlink = cifs_sb_tlink(cifs_sb);
457         if (IS_ERR(tlink)) {
458                 free_xid(xid);
459                 return PTR_ERR(tlink);
460         }
461         tcon = tlink_tcon(tlink);
462         server = tcon->ses->server;
463
464         full_path = build_path_from_dentry(file->f_path.dentry);
465         if (full_path == NULL) {
466                 rc = -ENOMEM;
467                 goto out;
468         }
469
470         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
471                  inode, file->f_flags, full_path);
472
473         if (file->f_flags & O_DIRECT &&
474             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
475                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
476                         file->f_op = &cifs_file_direct_nobrl_ops;
477                 else
478                         file->f_op = &cifs_file_direct_ops;
479         }
480
481         if (server->oplocks)
482                 oplock = REQ_OPLOCK;
483         else
484                 oplock = 0;
485
486         if (!tcon->broken_posix_open && tcon->unix_ext &&
487             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
488                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
489                 /* can not refresh inode info since size could be stale */
490                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
491                                 cifs_sb->mnt_file_mode /* ignored */,
492                                 file->f_flags, &oplock, &fid.netfid, xid);
493                 if (rc == 0) {
494                         cifs_dbg(FYI, "posix open succeeded\n");
495                         posix_open_ok = true;
496                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
497                         if (tcon->ses->serverNOS)
498                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
499                                          tcon->ses->serverName,
500                                          tcon->ses->serverNOS);
501                         tcon->broken_posix_open = true;
502                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
503                          (rc != -EOPNOTSUPP)) /* path not found or net err */
504                         goto out;
505                 /*
506                  * Else fallthrough to retry open the old way on network i/o
507                  * or DFS errors.
508                  */
509         }
510
511         if (server->ops->get_lease_key)
512                 server->ops->get_lease_key(inode, &fid);
513
514         cifs_add_pending_open(&fid, tlink, &open);
515
516         if (!posix_open_ok) {
517                 if (server->ops->get_lease_key)
518                         server->ops->get_lease_key(inode, &fid);
519
520                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
521                                   file->f_flags, &oplock, &fid, xid);
522                 if (rc) {
523                         cifs_del_pending_open(&open);
524                         goto out;
525                 }
526         }
527
528         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
529         if (cfile == NULL) {
530                 if (server->ops->close)
531                         server->ops->close(xid, tcon, &fid);
532                 cifs_del_pending_open(&open);
533                 rc = -ENOMEM;
534                 goto out;
535         }
536
537         cifs_fscache_set_inode_cookie(inode, file);
538
539         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
540                 /*
541                  * Time to set mode which we can not set earlier due to
542                  * problems creating new read-only files.
543                  */
544                 struct cifs_unix_set_info_args args = {
545                         .mode   = inode->i_mode,
546                         .uid    = INVALID_UID, /* no change */
547                         .gid    = INVALID_GID, /* no change */
548                         .ctime  = NO_CHANGE_64,
549                         .atime  = NO_CHANGE_64,
550                         .mtime  = NO_CHANGE_64,
551                         .device = 0,
552                 };
553                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
554                                        cfile->pid);
555         }
556
557 out:
558         kfree(full_path);
559         free_xid(xid);
560         cifs_put_tlink(tlink);
561         return rc;
562 }
563
564 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
565
566 /*
567  * Try to reacquire byte range locks that were released when session
568  * to server was lost.
569  */
570 static int
571 cifs_relock_file(struct cifsFileInfo *cfile)
572 {
573         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
574         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
575         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
576         int rc = 0;
577
578         down_read(&cinode->lock_sem);
579         if (cinode->can_cache_brlcks) {
580                 /* can cache locks - no need to relock */
581                 up_read(&cinode->lock_sem);
582                 return rc;
583         }
584
585         if (cap_unix(tcon->ses) &&
586             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
587             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
588                 rc = cifs_push_posix_locks(cfile);
589         else
590                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
591
592         up_read(&cinode->lock_sem);
593         return rc;
594 }
595
596 static int
597 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
598 {
599         int rc = -EACCES;
600         unsigned int xid;
601         __u32 oplock;
602         struct cifs_sb_info *cifs_sb;
603         struct cifs_tcon *tcon;
604         struct TCP_Server_Info *server;
605         struct cifsInodeInfo *cinode;
606         struct inode *inode;
607         char *full_path = NULL;
608         int desired_access;
609         int disposition = FILE_OPEN;
610         int create_options = CREATE_NOT_DIR;
611         struct cifs_open_parms oparms;
612
613         xid = get_xid();
614         mutex_lock(&cfile->fh_mutex);
615         if (!cfile->invalidHandle) {
616                 mutex_unlock(&cfile->fh_mutex);
617                 rc = 0;
618                 free_xid(xid);
619                 return rc;
620         }
621
622         inode = d_inode(cfile->dentry);
623         cifs_sb = CIFS_SB(inode->i_sb);
624         tcon = tlink_tcon(cfile->tlink);
625         server = tcon->ses->server;
626
627         /*
628          * Can not grab rename sem here because various ops, including those
629          * that already have the rename sem can end up causing writepage to get
630          * called and if the server was down that means we end up here, and we
631          * can never tell if the caller already has the rename_sem.
632          */
633         full_path = build_path_from_dentry(cfile->dentry);
634         if (full_path == NULL) {
635                 rc = -ENOMEM;
636                 mutex_unlock(&cfile->fh_mutex);
637                 free_xid(xid);
638                 return rc;
639         }
640
641         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
642                  inode, cfile->f_flags, full_path);
643
644         if (tcon->ses->server->oplocks)
645                 oplock = REQ_OPLOCK;
646         else
647                 oplock = 0;
648
649         if (tcon->unix_ext && cap_unix(tcon->ses) &&
650             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
651                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
652                 /*
653                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
654                  * original open. Must mask them off for a reopen.
655                  */
656                 unsigned int oflags = cfile->f_flags &
657                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
658
659                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
660                                      cifs_sb->mnt_file_mode /* ignored */,
661                                      oflags, &oplock, &cfile->fid.netfid, xid);
662                 if (rc == 0) {
663                         cifs_dbg(FYI, "posix reopen succeeded\n");
664                         oparms.reconnect = true;
665                         goto reopen_success;
666                 }
667                 /*
668                  * fallthrough to retry open the old way on errors, especially
669                  * in the reconnect path it is important to retry hard
670                  */
671         }
672
673         desired_access = cifs_convert_flags(cfile->f_flags);
674
675         if (backup_cred(cifs_sb))
676                 create_options |= CREATE_OPEN_BACKUP_INTENT;
677
678         if (server->ops->get_lease_key)
679                 server->ops->get_lease_key(inode, &cfile->fid);
680
681         oparms.tcon = tcon;
682         oparms.cifs_sb = cifs_sb;
683         oparms.desired_access = desired_access;
684         oparms.create_options = create_options;
685         oparms.disposition = disposition;
686         oparms.path = full_path;
687         oparms.fid = &cfile->fid;
688         oparms.reconnect = true;
689
690         /*
691          * Can not refresh inode by passing in file_info buf to be returned by
692          * ops->open and then calling get_inode_info with returned buf since
693          * file might have write behind data that needs to be flushed and server
694          * version of file size can be stale. If we knew for sure that inode was
695          * not dirty locally we could do this.
696          */
697         rc = server->ops->open(xid, &oparms, &oplock, NULL);
698         if (rc == -ENOENT && oparms.reconnect == false) {
699                 /* durable handle timeout is expired - open the file again */
700                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
701                 /* indicate that we need to relock the file */
702                 oparms.reconnect = true;
703         }
704
705         if (rc) {
706                 mutex_unlock(&cfile->fh_mutex);
707                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
708                 cifs_dbg(FYI, "oplock: %d\n", oplock);
709                 goto reopen_error_exit;
710         }
711
712 reopen_success:
713         cfile->invalidHandle = false;
714         mutex_unlock(&cfile->fh_mutex);
715         cinode = CIFS_I(inode);
716
717         if (can_flush) {
718                 rc = filemap_write_and_wait(inode->i_mapping);
719                 mapping_set_error(inode->i_mapping, rc);
720
721                 if (tcon->unix_ext)
722                         rc = cifs_get_inode_info_unix(&inode, full_path,
723                                                       inode->i_sb, xid);
724                 else
725                         rc = cifs_get_inode_info(&inode, full_path, NULL,
726                                                  inode->i_sb, xid, NULL);
727         }
728         /*
729          * Else we are writing out data to server already and could deadlock if
730          * we tried to flush data, and since we do not know if we have data that
731          * would invalidate the current end of file on the server we can not go
732          * to the server to get the new inode info.
733          */
734
735         server->ops->set_fid(cfile, &cfile->fid, oplock);
736         if (oparms.reconnect)
737                 cifs_relock_file(cfile);
738
739 reopen_error_exit:
740         kfree(full_path);
741         free_xid(xid);
742         return rc;
743 }
744
745 int cifs_close(struct inode *inode, struct file *file)
746 {
747         if (file->private_data != NULL) {
748                 cifsFileInfo_put(file->private_data);
749                 file->private_data = NULL;
750         }
751
752         /* return code from the ->release op is always ignored */
753         return 0;
754 }
755
756 int cifs_closedir(struct inode *inode, struct file *file)
757 {
758         int rc = 0;
759         unsigned int xid;
760         struct cifsFileInfo *cfile = file->private_data;
761         struct cifs_tcon *tcon;
762         struct TCP_Server_Info *server;
763         char *buf;
764
765         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
766
767         if (cfile == NULL)
768                 return rc;
769
770         xid = get_xid();
771         tcon = tlink_tcon(cfile->tlink);
772         server = tcon->ses->server;
773
774         cifs_dbg(FYI, "Freeing private data in close dir\n");
775         spin_lock(&cifs_file_list_lock);
776         if (server->ops->dir_needs_close(cfile)) {
777                 cfile->invalidHandle = true;
778                 spin_unlock(&cifs_file_list_lock);
779                 if (server->ops->close_dir)
780                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
781                 else
782                         rc = -ENOSYS;
783                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
784                 /* not much we can do if it fails anyway, ignore rc */
785                 rc = 0;
786         } else
787                 spin_unlock(&cifs_file_list_lock);
788
789         buf = cfile->srch_inf.ntwrk_buf_start;
790         if (buf) {
791                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
792                 cfile->srch_inf.ntwrk_buf_start = NULL;
793                 if (cfile->srch_inf.smallBuf)
794                         cifs_small_buf_release(buf);
795                 else
796                         cifs_buf_release(buf);
797         }
798
799         cifs_put_tlink(cfile->tlink);
800         kfree(file->private_data);
801         file->private_data = NULL;
802         /* BB can we lock the filestruct while this is going on? */
803         free_xid(xid);
804         return rc;
805 }
806
807 static struct cifsLockInfo *
808 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
809 {
810         struct cifsLockInfo *lock =
811                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
812         if (!lock)
813                 return lock;
814         lock->offset = offset;
815         lock->length = length;
816         lock->type = type;
817         lock->pid = current->tgid;
818         INIT_LIST_HEAD(&lock->blist);
819         init_waitqueue_head(&lock->block_q);
820         return lock;
821 }
822
823 void
824 cifs_del_lock_waiters(struct cifsLockInfo *lock)
825 {
826         struct cifsLockInfo *li, *tmp;
827         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
828                 list_del_init(&li->blist);
829                 wake_up(&li->block_q);
830         }
831 }
832
833 #define CIFS_LOCK_OP    0
834 #define CIFS_READ_OP    1
835 #define CIFS_WRITE_OP   2
836
837 /* @rw_check : 0 - no op, 1 - read, 2 - write */
838 static bool
839 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
840                             __u64 length, __u8 type, struct cifsFileInfo *cfile,
841                             struct cifsLockInfo **conf_lock, int rw_check)
842 {
843         struct cifsLockInfo *li;
844         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
845         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
846
847         list_for_each_entry(li, &fdlocks->locks, llist) {
848                 if (offset + length <= li->offset ||
849                     offset >= li->offset + li->length)
850                         continue;
851                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
852                     server->ops->compare_fids(cfile, cur_cfile)) {
853                         /* shared lock prevents write op through the same fid */
854                         if (!(li->type & server->vals->shared_lock_type) ||
855                             rw_check != CIFS_WRITE_OP)
856                                 continue;
857                 }
858                 if ((type & server->vals->shared_lock_type) &&
859                     ((server->ops->compare_fids(cfile, cur_cfile) &&
860                      current->tgid == li->pid) || type == li->type))
861                         continue;
862                 if (conf_lock)
863                         *conf_lock = li;
864                 return true;
865         }
866         return false;
867 }
868
869 bool
870 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
871                         __u8 type, struct cifsLockInfo **conf_lock,
872                         int rw_check)
873 {
874         bool rc = false;
875         struct cifs_fid_locks *cur;
876         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
877
878         list_for_each_entry(cur, &cinode->llist, llist) {
879                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
880                                                  cfile, conf_lock, rw_check);
881                 if (rc)
882                         break;
883         }
884
885         return rc;
886 }
887
888 /*
889  * Check if there is another lock that prevents us to set the lock (mandatory
890  * style). If such a lock exists, update the flock structure with its
891  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
892  * or leave it the same if we can't. Returns 0 if we don't need to request to
893  * the server or 1 otherwise.
894  */
895 static int
896 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
897                __u8 type, struct file_lock *flock)
898 {
899         int rc = 0;
900         struct cifsLockInfo *conf_lock;
901         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
902         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
903         bool exist;
904
905         down_read(&cinode->lock_sem);
906
907         exist = cifs_find_lock_conflict(cfile, offset, length, type,
908                                         &conf_lock, CIFS_LOCK_OP);
909         if (exist) {
910                 flock->fl_start = conf_lock->offset;
911                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
912                 flock->fl_pid = conf_lock->pid;
913                 if (conf_lock->type & server->vals->shared_lock_type)
914                         flock->fl_type = F_RDLCK;
915                 else
916                         flock->fl_type = F_WRLCK;
917         } else if (!cinode->can_cache_brlcks)
918                 rc = 1;
919         else
920                 flock->fl_type = F_UNLCK;
921
922         up_read(&cinode->lock_sem);
923         return rc;
924 }
925
926 static void
927 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
928 {
929         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
930         down_write(&cinode->lock_sem);
931         list_add_tail(&lock->llist, &cfile->llist->locks);
932         up_write(&cinode->lock_sem);
933 }
934
935 /*
936  * Set the byte-range lock (mandatory style). Returns:
937  * 1) 0, if we set the lock and don't need to request to the server;
938  * 2) 1, if no locks prevent us but we need to request to the server;
939  * 3) -EACCESS, if there is a lock that prevents us and wait is false.
940  */
941 static int
942 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
943                  bool wait)
944 {
945         struct cifsLockInfo *conf_lock;
946         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
947         bool exist;
948         int rc = 0;
949
950 try_again:
951         exist = false;
952         down_write(&cinode->lock_sem);
953
954         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
955                                         lock->type, &conf_lock, CIFS_LOCK_OP);
956         if (!exist && cinode->can_cache_brlcks) {
957                 list_add_tail(&lock->llist, &cfile->llist->locks);
958                 up_write(&cinode->lock_sem);
959                 return rc;
960         }
961
962         if (!exist)
963                 rc = 1;
964         else if (!wait)
965                 rc = -EACCES;
966         else {
967                 list_add_tail(&lock->blist, &conf_lock->blist);
968                 up_write(&cinode->lock_sem);
969                 rc = wait_event_interruptible(lock->block_q,
970                                         (lock->blist.prev == &lock->blist) &&
971                                         (lock->blist.next == &lock->blist));
972                 if (!rc)
973                         goto try_again;
974                 down_write(&cinode->lock_sem);
975                 list_del_init(&lock->blist);
976         }
977
978         up_write(&cinode->lock_sem);
979         return rc;
980 }
981
982 /*
983  * Check if there is another lock that prevents us to set the lock (posix
984  * style). If such a lock exists, update the flock structure with its
985  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
986  * or leave it the same if we can't. Returns 0 if we don't need to request to
987  * the server or 1 otherwise.
988  */
989 static int
990 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
991 {
992         int rc = 0;
993         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
994         unsigned char saved_type = flock->fl_type;
995
996         if ((flock->fl_flags & FL_POSIX) == 0)
997                 return 1;
998
999         down_read(&cinode->lock_sem);
1000         posix_test_lock(file, flock);
1001
1002         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1003                 flock->fl_type = saved_type;
1004                 rc = 1;
1005         }
1006
1007         up_read(&cinode->lock_sem);
1008         return rc;
1009 }
1010
1011 /*
1012  * Set the byte-range lock (posix style). Returns:
1013  * 1) 0, if we set the lock and don't need to request to the server;
1014  * 2) 1, if we need to request to the server;
1015  * 3) <0, if the error occurs while setting the lock.
1016  */
1017 static int
1018 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1019 {
1020         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1021         int rc = 1;
1022
1023         if ((flock->fl_flags & FL_POSIX) == 0)
1024                 return rc;
1025
1026 try_again:
1027         down_write(&cinode->lock_sem);
1028         if (!cinode->can_cache_brlcks) {
1029                 up_write(&cinode->lock_sem);
1030                 return rc;
1031         }
1032
1033         rc = posix_lock_file(file, flock, NULL);
1034         up_write(&cinode->lock_sem);
1035         if (rc == FILE_LOCK_DEFERRED) {
1036                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1037                 if (!rc)
1038                         goto try_again;
1039                 posix_unblock_lock(flock);
1040         }
1041         return rc;
1042 }
1043
1044 int
1045 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1046 {
1047         unsigned int xid;
1048         int rc = 0, stored_rc;
1049         struct cifsLockInfo *li, *tmp;
1050         struct cifs_tcon *tcon;
1051         unsigned int num, max_num, max_buf;
1052         LOCKING_ANDX_RANGE *buf, *cur;
1053         int types[] = {LOCKING_ANDX_LARGE_FILES,
1054                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1055         int i;
1056
1057         xid = get_xid();
1058         tcon = tlink_tcon(cfile->tlink);
1059
1060         /*
1061          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1062          * and check it for zero before using.
1063          */
1064         max_buf = tcon->ses->server->maxBuf;
1065         if (!max_buf) {
1066                 free_xid(xid);
1067                 return -EINVAL;
1068         }
1069
1070         max_num = (max_buf - sizeof(struct smb_hdr)) /
1071                                                 sizeof(LOCKING_ANDX_RANGE);
1072         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1073         if (!buf) {
1074                 free_xid(xid);
1075                 return -ENOMEM;
1076         }
1077
1078         for (i = 0; i < 2; i++) {
1079                 cur = buf;
1080                 num = 0;
1081                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1082                         if (li->type != types[i])
1083                                 continue;
1084                         cur->Pid = cpu_to_le16(li->pid);
1085                         cur->LengthLow = cpu_to_le32((u32)li->length);
1086                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1087                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1088                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1089                         if (++num == max_num) {
1090                                 stored_rc = cifs_lockv(xid, tcon,
1091                                                        cfile->fid.netfid,
1092                                                        (__u8)li->type, 0, num,
1093                                                        buf);
1094                                 if (stored_rc)
1095                                         rc = stored_rc;
1096                                 cur = buf;
1097                                 num = 0;
1098                         } else
1099                                 cur++;
1100                 }
1101
1102                 if (num) {
1103                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1104                                                (__u8)types[i], 0, num, buf);
1105                         if (stored_rc)
1106                                 rc = stored_rc;
1107                 }
1108         }
1109
1110         kfree(buf);
1111         free_xid(xid);
1112         return rc;
1113 }
1114
1115 struct lock_to_push {
1116         struct list_head llist;
1117         __u64 offset;
1118         __u64 length;
1119         __u32 pid;
1120         __u16 netfid;
1121         __u8 type;
1122 };
1123
1124 static int
1125 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1126 {
1127         struct inode *inode = d_inode(cfile->dentry);
1128         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1129         struct file_lock *flock;
1130         struct file_lock_context *flctx = inode->i_flctx;
1131         unsigned int count = 0, i;
1132         int rc = 0, xid, type;
1133         struct list_head locks_to_send, *el;
1134         struct lock_to_push *lck, *tmp;
1135         __u64 length;
1136
1137         xid = get_xid();
1138
1139         if (!flctx)
1140                 goto out;
1141
1142         spin_lock(&flctx->flc_lock);
1143         list_for_each(el, &flctx->flc_posix) {
1144                 count++;
1145         }
1146         spin_unlock(&flctx->flc_lock);
1147
1148         INIT_LIST_HEAD(&locks_to_send);
1149
1150         /*
1151          * Allocating count locks is enough because no FL_POSIX locks can be
1152          * added to the list while we are holding cinode->lock_sem that
1153          * protects locking operations of this inode.
1154          */
1155         for (i = 0; i < count; i++) {
1156                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1157                 if (!lck) {
1158                         rc = -ENOMEM;
1159                         goto err_out;
1160                 }
1161                 list_add_tail(&lck->llist, &locks_to_send);
1162         }
1163
1164         el = locks_to_send.next;
1165         spin_lock(&flctx->flc_lock);
1166         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1167                 if (el == &locks_to_send) {
1168                         /*
1169                          * The list ended. We don't have enough allocated
1170                          * structures - something is really wrong.
1171                          */
1172                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1173                         break;
1174                 }
1175                 length = 1 + flock->fl_end - flock->fl_start;
1176                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1177                         type = CIFS_RDLCK;
1178                 else
1179                         type = CIFS_WRLCK;
1180                 lck = list_entry(el, struct lock_to_push, llist);
1181                 lck->pid = flock->fl_pid;
1182                 lck->netfid = cfile->fid.netfid;
1183                 lck->length = length;
1184                 lck->type = type;
1185                 lck->offset = flock->fl_start;
1186         }
1187         spin_unlock(&flctx->flc_lock);
1188
1189         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1190                 int stored_rc;
1191
1192                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1193                                              lck->offset, lck->length, NULL,
1194                                              lck->type, 0);
1195                 if (stored_rc)
1196                         rc = stored_rc;
1197                 list_del(&lck->llist);
1198                 kfree(lck);
1199         }
1200
1201 out:
1202         free_xid(xid);
1203         return rc;
1204 err_out:
1205         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1206                 list_del(&lck->llist);
1207                 kfree(lck);
1208         }
1209         goto out;
1210 }
1211
1212 static int
1213 cifs_push_locks(struct cifsFileInfo *cfile)
1214 {
1215         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1216         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1217         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1218         int rc = 0;
1219
1220         /* we are going to update can_cache_brlcks here - need a write access */
1221         down_write(&cinode->lock_sem);
1222         if (!cinode->can_cache_brlcks) {
1223                 up_write(&cinode->lock_sem);
1224                 return rc;
1225         }
1226
1227         if (cap_unix(tcon->ses) &&
1228             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1229             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1230                 rc = cifs_push_posix_locks(cfile);
1231         else
1232                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1233
1234         cinode->can_cache_brlcks = false;
1235         up_write(&cinode->lock_sem);
1236         return rc;
1237 }
1238
1239 static void
1240 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1241                 bool *wait_flag, struct TCP_Server_Info *server)
1242 {
1243         if (flock->fl_flags & FL_POSIX)
1244                 cifs_dbg(FYI, "Posix\n");
1245         if (flock->fl_flags & FL_FLOCK)
1246                 cifs_dbg(FYI, "Flock\n");
1247         if (flock->fl_flags & FL_SLEEP) {
1248                 cifs_dbg(FYI, "Blocking lock\n");
1249                 *wait_flag = true;
1250         }
1251         if (flock->fl_flags & FL_ACCESS)
1252                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1253         if (flock->fl_flags & FL_LEASE)
1254                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1255         if (flock->fl_flags &
1256             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1257                FL_ACCESS | FL_LEASE | FL_CLOSE)))
1258                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1259
1260         *type = server->vals->large_lock_type;
1261         if (flock->fl_type == F_WRLCK) {
1262                 cifs_dbg(FYI, "F_WRLCK\n");
1263                 *type |= server->vals->exclusive_lock_type;
1264                 *lock = 1;
1265         } else if (flock->fl_type == F_UNLCK) {
1266                 cifs_dbg(FYI, "F_UNLCK\n");
1267                 *type |= server->vals->unlock_lock_type;
1268                 *unlock = 1;
1269                 /* Check if unlock includes more than one lock range */
1270         } else if (flock->fl_type == F_RDLCK) {
1271                 cifs_dbg(FYI, "F_RDLCK\n");
1272                 *type |= server->vals->shared_lock_type;
1273                 *lock = 1;
1274         } else if (flock->fl_type == F_EXLCK) {
1275                 cifs_dbg(FYI, "F_EXLCK\n");
1276                 *type |= server->vals->exclusive_lock_type;
1277                 *lock = 1;
1278         } else if (flock->fl_type == F_SHLCK) {
1279                 cifs_dbg(FYI, "F_SHLCK\n");
1280                 *type |= server->vals->shared_lock_type;
1281                 *lock = 1;
1282         } else
1283                 cifs_dbg(FYI, "Unknown type of lock\n");
1284 }
1285
1286 static int
1287 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1288            bool wait_flag, bool posix_lck, unsigned int xid)
1289 {
1290         int rc = 0;
1291         __u64 length = 1 + flock->fl_end - flock->fl_start;
1292         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1293         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1294         struct TCP_Server_Info *server = tcon->ses->server;
1295         __u16 netfid = cfile->fid.netfid;
1296
1297         if (posix_lck) {
1298                 int posix_lock_type;
1299
1300                 rc = cifs_posix_lock_test(file, flock);
1301                 if (!rc)
1302                         return rc;
1303
1304                 if (type & server->vals->shared_lock_type)
1305                         posix_lock_type = CIFS_RDLCK;
1306                 else
1307                         posix_lock_type = CIFS_WRLCK;
1308                 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1309                                       flock->fl_start, length, flock,
1310                                       posix_lock_type, wait_flag);
1311                 return rc;
1312         }
1313
1314         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1315         if (!rc)
1316                 return rc;
1317
1318         /* BB we could chain these into one lock request BB */
1319         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1320                                     1, 0, false);
1321         if (rc == 0) {
1322                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1323                                             type, 0, 1, false);
1324                 flock->fl_type = F_UNLCK;
1325                 if (rc != 0)
1326                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1327                                  rc);
1328                 return 0;
1329         }
1330
1331         if (type & server->vals->shared_lock_type) {
1332                 flock->fl_type = F_WRLCK;
1333                 return 0;
1334         }
1335
1336         type &= ~server->vals->exclusive_lock_type;
1337
1338         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1339                                     type | server->vals->shared_lock_type,
1340                                     1, 0, false);
1341         if (rc == 0) {
1342                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1343                         type | server->vals->shared_lock_type, 0, 1, false);
1344                 flock->fl_type = F_RDLCK;
1345                 if (rc != 0)
1346                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1347                                  rc);
1348         } else
1349                 flock->fl_type = F_WRLCK;
1350
1351         return 0;
1352 }
1353
1354 void
1355 cifs_move_llist(struct list_head *source, struct list_head *dest)
1356 {
1357         struct list_head *li, *tmp;
1358         list_for_each_safe(li, tmp, source)
1359                 list_move(li, dest);
1360 }
1361
1362 void
1363 cifs_free_llist(struct list_head *llist)
1364 {
1365         struct cifsLockInfo *li, *tmp;
1366         list_for_each_entry_safe(li, tmp, llist, llist) {
1367                 cifs_del_lock_waiters(li);
1368                 list_del(&li->llist);
1369                 kfree(li);
1370         }
1371 }
1372
1373 int
1374 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1375                   unsigned int xid)
1376 {
1377         int rc = 0, stored_rc;
1378         int types[] = {LOCKING_ANDX_LARGE_FILES,
1379                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1380         unsigned int i;
1381         unsigned int max_num, num, max_buf;
1382         LOCKING_ANDX_RANGE *buf, *cur;
1383         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1384         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1385         struct cifsLockInfo *li, *tmp;
1386         __u64 length = 1 + flock->fl_end - flock->fl_start;
1387         struct list_head tmp_llist;
1388
1389         INIT_LIST_HEAD(&tmp_llist);
1390
1391         /*
1392          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1393          * and check it for zero before using.
1394          */
1395         max_buf = tcon->ses->server->maxBuf;
1396         if (!max_buf)
1397                 return -EINVAL;
1398
1399         max_num = (max_buf - sizeof(struct smb_hdr)) /
1400                                                 sizeof(LOCKING_ANDX_RANGE);
1401         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1402         if (!buf)
1403                 return -ENOMEM;
1404
1405         down_write(&cinode->lock_sem);
1406         for (i = 0; i < 2; i++) {
1407                 cur = buf;
1408                 num = 0;
1409                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1410                         if (flock->fl_start > li->offset ||
1411                             (flock->fl_start + length) <
1412                             (li->offset + li->length))
1413                                 continue;
1414                         if (current->tgid != li->pid)
1415                                 continue;
1416                         if (types[i] != li->type)
1417                                 continue;
1418                         if (cinode->can_cache_brlcks) {
1419                                 /*
1420                                  * We can cache brlock requests - simply remove
1421                                  * a lock from the file's list.
1422                                  */
1423                                 list_del(&li->llist);
1424                                 cifs_del_lock_waiters(li);
1425                                 kfree(li);
1426                                 continue;
1427                         }
1428                         cur->Pid = cpu_to_le16(li->pid);
1429                         cur->LengthLow = cpu_to_le32((u32)li->length);
1430                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1431                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1432                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1433                         /*
1434                          * We need to save a lock here to let us add it again to
1435                          * the file's list if the unlock range request fails on
1436                          * the server.
1437                          */
1438                         list_move(&li->llist, &tmp_llist);
1439                         if (++num == max_num) {
1440                                 stored_rc = cifs_lockv(xid, tcon,
1441                                                        cfile->fid.netfid,
1442                                                        li->type, num, 0, buf);
1443                                 if (stored_rc) {
1444                                         /*
1445                                          * We failed on the unlock range
1446                                          * request - add all locks from the tmp
1447                                          * list to the head of the file's list.
1448                                          */
1449                                         cifs_move_llist(&tmp_llist,
1450                                                         &cfile->llist->locks);
1451                                         rc = stored_rc;
1452                                 } else
1453                                         /*
1454                                          * The unlock range request succeed -
1455                                          * free the tmp list.
1456                                          */
1457                                         cifs_free_llist(&tmp_llist);
1458                                 cur = buf;
1459                                 num = 0;
1460                         } else
1461                                 cur++;
1462                 }
1463                 if (num) {
1464                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1465                                                types[i], num, 0, buf);
1466                         if (stored_rc) {
1467                                 cifs_move_llist(&tmp_llist,
1468                                                 &cfile->llist->locks);
1469                                 rc = stored_rc;
1470                         } else
1471                                 cifs_free_llist(&tmp_llist);
1472                 }
1473         }
1474
1475         up_write(&cinode->lock_sem);
1476         kfree(buf);
1477         return rc;
1478 }
1479
1480 static int
1481 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1482            bool wait_flag, bool posix_lck, int lock, int unlock,
1483            unsigned int xid)
1484 {
1485         int rc = 0;
1486         __u64 length = 1 + flock->fl_end - flock->fl_start;
1487         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1488         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1489         struct TCP_Server_Info *server = tcon->ses->server;
1490         struct inode *inode = d_inode(cfile->dentry);
1491
1492         if (posix_lck) {
1493                 int posix_lock_type;
1494
1495                 rc = cifs_posix_lock_set(file, flock);
1496                 if (!rc || rc < 0)
1497                         return rc;
1498
1499                 if (type & server->vals->shared_lock_type)
1500                         posix_lock_type = CIFS_RDLCK;
1501                 else
1502                         posix_lock_type = CIFS_WRLCK;
1503
1504                 if (unlock == 1)
1505                         posix_lock_type = CIFS_UNLCK;
1506
1507                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1508                                       current->tgid, flock->fl_start, length,
1509                                       NULL, posix_lock_type, wait_flag);
1510                 goto out;
1511         }
1512
1513         if (lock) {
1514                 struct cifsLockInfo *lock;
1515
1516                 lock = cifs_lock_init(flock->fl_start, length, type);
1517                 if (!lock)
1518                         return -ENOMEM;
1519
1520                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1521                 if (rc < 0) {
1522                         kfree(lock);
1523                         return rc;
1524                 }
1525                 if (!rc)
1526                         goto out;
1527
1528                 /*
1529                  * Windows 7 server can delay breaking lease from read to None
1530                  * if we set a byte-range lock on a file - break it explicitly
1531                  * before sending the lock to the server to be sure the next
1532                  * read won't conflict with non-overlapted locks due to
1533                  * pagereading.
1534                  */
1535                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1536                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1537                         cifs_zap_mapping(inode);
1538                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1539                                  inode);
1540                         CIFS_I(inode)->oplock = 0;
1541                 }
1542
1543                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1544                                             type, 1, 0, wait_flag);
1545                 if (rc) {
1546                         kfree(lock);
1547                         return rc;
1548                 }
1549
1550                 cifs_lock_add(cfile, lock);
1551         } else if (unlock)
1552                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1553
1554 out:
1555         if (flock->fl_flags & FL_POSIX && !rc)
1556                 rc = posix_lock_file_wait(file, flock);
1557         return rc;
1558 }
1559
1560 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1561 {
1562         int rc, xid;
1563         int lock = 0, unlock = 0;
1564         bool wait_flag = false;
1565         bool posix_lck = false;
1566         struct cifs_sb_info *cifs_sb;
1567         struct cifs_tcon *tcon;
1568         struct cifsInodeInfo *cinode;
1569         struct cifsFileInfo *cfile;
1570         __u16 netfid;
1571         __u32 type;
1572
1573         rc = -EACCES;
1574         xid = get_xid();
1575
1576         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1577                  cmd, flock->fl_flags, flock->fl_type,
1578                  flock->fl_start, flock->fl_end);
1579
1580         cfile = (struct cifsFileInfo *)file->private_data;
1581         tcon = tlink_tcon(cfile->tlink);
1582
1583         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1584                         tcon->ses->server);
1585
1586         cifs_sb = CIFS_FILE_SB(file);
1587         netfid = cfile->fid.netfid;
1588         cinode = CIFS_I(file_inode(file));
1589
1590         if (cap_unix(tcon->ses) &&
1591             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1592             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1593                 posix_lck = true;
1594         /*
1595          * BB add code here to normalize offset and length to account for
1596          * negative length which we can not accept over the wire.
1597          */
1598         if (IS_GETLK(cmd)) {
1599                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1600                 free_xid(xid);
1601                 return rc;
1602         }
1603
1604         if (!lock && !unlock) {
1605                 /*
1606                  * if no lock or unlock then nothing to do since we do not
1607                  * know what it is
1608                  */
1609                 free_xid(xid);
1610                 return -EOPNOTSUPP;
1611         }
1612
1613         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1614                         xid);
1615         free_xid(xid);
1616         return rc;
1617 }
1618
1619 /*
1620  * update the file size (if needed) after a write. Should be called with
1621  * the inode->i_lock held
1622  */
1623 void
1624 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1625                       unsigned int bytes_written)
1626 {
1627         loff_t end_of_write = offset + bytes_written;
1628
1629         if (end_of_write > cifsi->server_eof)
1630                 cifsi->server_eof = end_of_write;
1631 }
1632
1633 static ssize_t
1634 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1635            size_t write_size, loff_t *offset)
1636 {
1637         int rc = 0;
1638         unsigned int bytes_written = 0;
1639         unsigned int total_written;
1640         struct cifs_sb_info *cifs_sb;
1641         struct cifs_tcon *tcon;
1642         struct TCP_Server_Info *server;
1643         unsigned int xid;
1644         struct dentry *dentry = open_file->dentry;
1645         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1646         struct cifs_io_parms io_parms;
1647
1648         cifs_sb = CIFS_SB(dentry->d_sb);
1649
1650         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1651                  write_size, *offset, dentry);
1652
1653         tcon = tlink_tcon(open_file->tlink);
1654         server = tcon->ses->server;
1655
1656         if (!server->ops->sync_write)
1657                 return -ENOSYS;
1658
1659         xid = get_xid();
1660
1661         for (total_written = 0; write_size > total_written;
1662              total_written += bytes_written) {
1663                 rc = -EAGAIN;
1664                 while (rc == -EAGAIN) {
1665                         struct kvec iov[2];
1666                         unsigned int len;
1667
1668                         if (open_file->invalidHandle) {
1669                                 /* we could deadlock if we called
1670                                    filemap_fdatawait from here so tell
1671                                    reopen_file not to flush data to
1672                                    server now */
1673                                 rc = cifs_reopen_file(open_file, false);
1674                                 if (rc != 0)
1675                                         break;
1676                         }
1677
1678                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1679                                   (unsigned int)write_size - total_written);
1680                         /* iov[0] is reserved for smb header */
1681                         iov[1].iov_base = (char *)write_data + total_written;
1682                         iov[1].iov_len = len;
1683                         io_parms.pid = pid;
1684                         io_parms.tcon = tcon;
1685                         io_parms.offset = *offset;
1686                         io_parms.length = len;
1687                         rc = server->ops->sync_write(xid, &open_file->fid,
1688                                         &io_parms, &bytes_written, iov, 1);
1689                 }
1690                 if (rc || (bytes_written == 0)) {
1691                         if (total_written)
1692                                 break;
1693                         else {
1694                                 free_xid(xid);
1695                                 return rc;
1696                         }
1697                 } else {
1698                         spin_lock(&d_inode(dentry)->i_lock);
1699                         cifs_update_eof(cifsi, *offset, bytes_written);
1700                         spin_unlock(&d_inode(dentry)->i_lock);
1701                         *offset += bytes_written;
1702                 }
1703         }
1704
1705         cifs_stats_bytes_written(tcon, total_written);
1706
1707         if (total_written > 0) {
1708                 spin_lock(&d_inode(dentry)->i_lock);
1709                 if (*offset > d_inode(dentry)->i_size)
1710                         i_size_write(d_inode(dentry), *offset);
1711                 spin_unlock(&d_inode(dentry)->i_lock);
1712         }
1713         mark_inode_dirty_sync(d_inode(dentry));
1714         free_xid(xid);
1715         return total_written;
1716 }
1717
1718 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1719                                         bool fsuid_only)
1720 {
1721         struct cifsFileInfo *open_file = NULL;
1722         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1723
1724         /* only filter by fsuid on multiuser mounts */
1725         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1726                 fsuid_only = false;
1727
1728         spin_lock(&cifs_file_list_lock);
1729         /* we could simply get the first_list_entry since write-only entries
1730            are always at the end of the list but since the first entry might
1731            have a close pending, we go through the whole list */
1732         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1733                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1734                         continue;
1735                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1736                         if (!open_file->invalidHandle) {
1737                                 /* found a good file */
1738                                 /* lock it so it will not be closed on us */
1739                                 cifsFileInfo_get_locked(open_file);
1740                                 spin_unlock(&cifs_file_list_lock);
1741                                 return open_file;
1742                         } /* else might as well continue, and look for
1743                              another, or simply have the caller reopen it
1744                              again rather than trying to fix this handle */
1745                 } else /* write only file */
1746                         break; /* write only files are last so must be done */
1747         }
1748         spin_unlock(&cifs_file_list_lock);
1749         return NULL;
1750 }
1751
1752 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1753                                         bool fsuid_only)
1754 {
1755         struct cifsFileInfo *open_file, *inv_file = NULL;
1756         struct cifs_sb_info *cifs_sb;
1757         bool any_available = false;
1758         int rc;
1759         unsigned int refind = 0;
1760
1761         /* Having a null inode here (because mapping->host was set to zero by
1762         the VFS or MM) should not happen but we had reports of on oops (due to
1763         it being zero) during stress testcases so we need to check for it */
1764
1765         if (cifs_inode == NULL) {
1766                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1767                 dump_stack();
1768                 return NULL;
1769         }
1770
1771         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1772
1773         /* only filter by fsuid on multiuser mounts */
1774         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1775                 fsuid_only = false;
1776
1777         spin_lock(&cifs_file_list_lock);
1778 refind_writable:
1779         if (refind > MAX_REOPEN_ATT) {
1780                 spin_unlock(&cifs_file_list_lock);
1781                 return NULL;
1782         }
1783         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1784                 if (!any_available && open_file->pid != current->tgid)
1785                         continue;
1786                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1787                         continue;
1788                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1789                         if (!open_file->invalidHandle) {
1790                                 /* found a good writable file */
1791                                 cifsFileInfo_get_locked(open_file);
1792                                 spin_unlock(&cifs_file_list_lock);
1793                                 return open_file;
1794                         } else {
1795                                 if (!inv_file)
1796                                         inv_file = open_file;
1797                         }
1798                 }
1799         }
1800         /* couldn't find useable FH with same pid, try any available */
1801         if (!any_available) {
1802                 any_available = true;
1803                 goto refind_writable;
1804         }
1805
1806         if (inv_file) {
1807                 any_available = false;
1808                 cifsFileInfo_get_locked(inv_file);
1809         }
1810
1811         spin_unlock(&cifs_file_list_lock);
1812
1813         if (inv_file) {
1814                 rc = cifs_reopen_file(inv_file, false);
1815                 if (!rc)
1816                         return inv_file;
1817                 else {
1818                         spin_lock(&cifs_file_list_lock);
1819                         list_move_tail(&inv_file->flist,
1820                                         &cifs_inode->openFileList);
1821                         spin_unlock(&cifs_file_list_lock);
1822                         cifsFileInfo_put(inv_file);
1823                         spin_lock(&cifs_file_list_lock);
1824                         ++refind;
1825                         inv_file = NULL;
1826                         goto refind_writable;
1827                 }
1828         }
1829
1830         return NULL;
1831 }
1832
1833 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1834 {
1835         struct address_space *mapping = page->mapping;
1836         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1837         char *write_data;
1838         int rc = -EFAULT;
1839         int bytes_written = 0;
1840         struct inode *inode;
1841         struct cifsFileInfo *open_file;
1842
1843         if (!mapping || !mapping->host)
1844                 return -EFAULT;
1845
1846         inode = page->mapping->host;
1847
1848         offset += (loff_t)from;
1849         write_data = kmap(page);
1850         write_data += from;
1851
1852         if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1853                 kunmap(page);
1854                 return -EIO;
1855         }
1856
1857         /* racing with truncate? */
1858         if (offset > mapping->host->i_size) {
1859                 kunmap(page);
1860                 return 0; /* don't care */
1861         }
1862
1863         /* check to make sure that we are not extending the file */
1864         if (mapping->host->i_size - offset < (loff_t)to)
1865                 to = (unsigned)(mapping->host->i_size - offset);
1866
1867         open_file = find_writable_file(CIFS_I(mapping->host), false);
1868         if (open_file) {
1869                 bytes_written = cifs_write(open_file, open_file->pid,
1870                                            write_data, to - from, &offset);
1871                 cifsFileInfo_put(open_file);
1872                 /* Does mm or vfs already set times? */
1873                 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1874                 if ((bytes_written > 0) && (offset))
1875                         rc = 0;
1876                 else if (bytes_written < 0)
1877                         rc = bytes_written;
1878         } else {
1879                 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1880                 rc = -EIO;
1881         }
1882
1883         kunmap(page);
1884         return rc;
1885 }
1886
1887 static struct cifs_writedata *
1888 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1889                           pgoff_t end, pgoff_t *index,
1890                           unsigned int *found_pages)
1891 {
1892         unsigned int nr_pages;
1893         struct page **pages;
1894         struct cifs_writedata *wdata;
1895
1896         wdata = cifs_writedata_alloc((unsigned int)tofind,
1897                                      cifs_writev_complete);
1898         if (!wdata)
1899                 return NULL;
1900
1901         /*
1902          * find_get_pages_tag seems to return a max of 256 on each
1903          * iteration, so we must call it several times in order to
1904          * fill the array or the wsize is effectively limited to
1905          * 256 * PAGE_CACHE_SIZE.
1906          */
1907         *found_pages = 0;
1908         pages = wdata->pages;
1909         do {
1910                 nr_pages = find_get_pages_tag(mapping, index,
1911                                               PAGECACHE_TAG_DIRTY, tofind,
1912                                               pages);
1913                 *found_pages += nr_pages;
1914                 tofind -= nr_pages;
1915                 pages += nr_pages;
1916         } while (nr_pages && tofind && *index <= end);
1917
1918         return wdata;
1919 }
1920
1921 static unsigned int
1922 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1923                     struct address_space *mapping,
1924                     struct writeback_control *wbc,
1925                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1926 {
1927         unsigned int nr_pages = 0, i;
1928         struct page *page;
1929
1930         for (i = 0; i < found_pages; i++) {
1931                 page = wdata->pages[i];
1932                 /*
1933                  * At this point we hold neither mapping->tree_lock nor
1934                  * lock on the page itself: the page may be truncated or
1935                  * invalidated (changing page->mapping to NULL), or even
1936                  * swizzled back from swapper_space to tmpfs file
1937                  * mapping
1938                  */
1939
1940                 if (nr_pages == 0)
1941                         lock_page(page);
1942                 else if (!trylock_page(page))
1943                         break;
1944
1945                 if (unlikely(page->mapping != mapping)) {
1946                         unlock_page(page);
1947                         break;
1948                 }
1949
1950                 if (!wbc->range_cyclic && page->index > end) {
1951                         *done = true;
1952                         unlock_page(page);
1953                         break;
1954                 }
1955
1956                 if (*next && (page->index != *next)) {
1957                         /* Not next consecutive page */
1958                         unlock_page(page);
1959                         break;
1960                 }
1961
1962                 if (wbc->sync_mode != WB_SYNC_NONE)
1963                         wait_on_page_writeback(page);
1964
1965                 if (PageWriteback(page) ||
1966                                 !clear_page_dirty_for_io(page)) {
1967                         unlock_page(page);
1968                         break;
1969                 }
1970
1971                 /*
1972                  * This actually clears the dirty bit in the radix tree.
1973                  * See cifs_writepage() for more commentary.
1974                  */
1975                 set_page_writeback(page);
1976                 if (page_offset(page) >= i_size_read(mapping->host)) {
1977                         *done = true;
1978                         unlock_page(page);
1979                         end_page_writeback(page);
1980                         break;
1981                 }
1982
1983                 wdata->pages[i] = page;
1984                 *next = page->index + 1;
1985                 ++nr_pages;
1986         }
1987
1988         /* reset index to refind any pages skipped */
1989         if (nr_pages == 0)
1990                 *index = wdata->pages[0]->index + 1;
1991
1992         /* put any pages we aren't going to use */
1993         for (i = nr_pages; i < found_pages; i++) {
1994                 page_cache_release(wdata->pages[i]);
1995                 wdata->pages[i] = NULL;
1996         }
1997
1998         return nr_pages;
1999 }
2000
2001 static int
2002 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2003                  struct address_space *mapping, struct writeback_control *wbc)
2004 {
2005         int rc = 0;
2006         struct TCP_Server_Info *server;
2007         unsigned int i;
2008
2009         wdata->sync_mode = wbc->sync_mode;
2010         wdata->nr_pages = nr_pages;
2011         wdata->offset = page_offset(wdata->pages[0]);
2012         wdata->pagesz = PAGE_CACHE_SIZE;
2013         wdata->tailsz = min(i_size_read(mapping->host) -
2014                         page_offset(wdata->pages[nr_pages - 1]),
2015                         (loff_t)PAGE_CACHE_SIZE);
2016         wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + wdata->tailsz;
2017
2018         if (wdata->cfile != NULL)
2019                 cifsFileInfo_put(wdata->cfile);
2020         wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2021         if (!wdata->cfile) {
2022                 cifs_dbg(VFS, "No writable handles for inode\n");
2023                 rc = -EBADF;
2024         } else {
2025                 wdata->pid = wdata->cfile->pid;
2026                 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2027                 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2028         }
2029
2030         for (i = 0; i < nr_pages; ++i)
2031                 unlock_page(wdata->pages[i]);
2032
2033         return rc;
2034 }
2035
2036 static int cifs_writepages(struct address_space *mapping,
2037                            struct writeback_control *wbc)
2038 {
2039         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2040         struct TCP_Server_Info *server;
2041         bool done = false, scanned = false, range_whole = false;
2042         pgoff_t end, index;
2043         struct cifs_writedata *wdata;
2044         int rc = 0;
2045
2046         /*
2047          * If wsize is smaller than the page cache size, default to writing
2048          * one page at a time via cifs_writepage
2049          */
2050         if (cifs_sb->wsize < PAGE_CACHE_SIZE)
2051                 return generic_writepages(mapping, wbc);
2052
2053         if (wbc->range_cyclic) {
2054                 index = mapping->writeback_index; /* Start from prev offset */
2055                 end = -1;
2056         } else {
2057                 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2058                 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2059                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2060                         range_whole = true;
2061                 scanned = true;
2062         }
2063         server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2064 retry:
2065         while (!done && index <= end) {
2066                 unsigned int i, nr_pages, found_pages, wsize, credits;
2067                 pgoff_t next = 0, tofind, saved_index = index;
2068
2069                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2070                                                    &wsize, &credits);
2071                 if (rc)
2072                         break;
2073
2074                 tofind = min((wsize / PAGE_CACHE_SIZE) - 1, end - index) + 1;
2075
2076                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2077                                                   &found_pages);
2078                 if (!wdata) {
2079                         rc = -ENOMEM;
2080                         add_credits_and_wake_if(server, credits, 0);
2081                         break;
2082                 }
2083
2084                 if (found_pages == 0) {
2085                         kref_put(&wdata->refcount, cifs_writedata_release);
2086                         add_credits_and_wake_if(server, credits, 0);
2087                         break;
2088                 }
2089
2090                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2091                                                end, &index, &next, &done);
2092
2093                 /* nothing to write? */
2094                 if (nr_pages == 0) {
2095                         kref_put(&wdata->refcount, cifs_writedata_release);
2096                         add_credits_and_wake_if(server, credits, 0);
2097                         continue;
2098                 }
2099
2100                 wdata->credits = credits;
2101
2102                 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2103
2104                 /* send failure -- clean up the mess */
2105                 if (rc != 0) {
2106                         add_credits_and_wake_if(server, wdata->credits, 0);
2107                         for (i = 0; i < nr_pages; ++i) {
2108                                 if (rc == -EAGAIN)
2109                                         redirty_page_for_writepage(wbc,
2110                                                            wdata->pages[i]);
2111                                 else
2112                                         SetPageError(wdata->pages[i]);
2113                                 end_page_writeback(wdata->pages[i]);
2114                                 page_cache_release(wdata->pages[i]);
2115                         }
2116                         if (rc != -EAGAIN)
2117                                 mapping_set_error(mapping, rc);
2118                 }
2119                 kref_put(&wdata->refcount, cifs_writedata_release);
2120
2121                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2122                         index = saved_index;
2123                         continue;
2124                 }
2125
2126                 wbc->nr_to_write -= nr_pages;
2127                 if (wbc->nr_to_write <= 0)
2128                         done = true;
2129
2130                 index = next;
2131         }
2132
2133         if (!scanned && !done) {
2134                 /*
2135                  * We hit the last page and there is more work to be done: wrap
2136                  * back to the start of the file
2137                  */
2138                 scanned = true;
2139                 index = 0;
2140                 goto retry;
2141         }
2142
2143         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2144                 mapping->writeback_index = index;
2145
2146         return rc;
2147 }
2148
2149 static int
2150 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2151 {
2152         int rc;
2153         unsigned int xid;
2154
2155         xid = get_xid();
2156 /* BB add check for wbc flags */
2157         page_cache_get(page);
2158         if (!PageUptodate(page))
2159                 cifs_dbg(FYI, "ppw - page not up to date\n");
2160
2161         /*
2162          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2163          *
2164          * A writepage() implementation always needs to do either this,
2165          * or re-dirty the page with "redirty_page_for_writepage()" in
2166          * the case of a failure.
2167          *
2168          * Just unlocking the page will cause the radix tree tag-bits
2169          * to fail to update with the state of the page correctly.
2170          */
2171         set_page_writeback(page);
2172 retry_write:
2173         rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2174         if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2175                 goto retry_write;
2176         else if (rc == -EAGAIN)
2177                 redirty_page_for_writepage(wbc, page);
2178         else if (rc != 0)
2179                 SetPageError(page);
2180         else
2181                 SetPageUptodate(page);
2182         end_page_writeback(page);
2183         page_cache_release(page);
2184         free_xid(xid);
2185         return rc;
2186 }
2187
2188 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2189 {
2190         int rc = cifs_writepage_locked(page, wbc);
2191         unlock_page(page);
2192         return rc;
2193 }
2194
2195 static int cifs_write_end(struct file *file, struct address_space *mapping,
2196                         loff_t pos, unsigned len, unsigned copied,
2197                         struct page *page, void *fsdata)
2198 {
2199         int rc;
2200         struct inode *inode = mapping->host;
2201         struct cifsFileInfo *cfile = file->private_data;
2202         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2203         __u32 pid;
2204
2205         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2206                 pid = cfile->pid;
2207         else
2208                 pid = current->tgid;
2209
2210         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2211                  page, pos, copied);
2212
2213         if (PageChecked(page)) {
2214                 if (copied == len)
2215                         SetPageUptodate(page);
2216                 ClearPageChecked(page);
2217         } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2218                 SetPageUptodate(page);
2219
2220         if (!PageUptodate(page)) {
2221                 char *page_data;
2222                 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2223                 unsigned int xid;
2224
2225                 xid = get_xid();
2226                 /* this is probably better than directly calling
2227                    partialpage_write since in this function the file handle is
2228                    known which we might as well leverage */
2229                 /* BB check if anything else missing out of ppw
2230                    such as updating last write time */
2231                 page_data = kmap(page);
2232                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2233                 /* if (rc < 0) should we set writebehind rc? */
2234                 kunmap(page);
2235
2236                 free_xid(xid);
2237         } else {
2238                 rc = copied;
2239                 pos += copied;
2240                 set_page_dirty(page);
2241         }
2242
2243         if (rc > 0) {
2244                 spin_lock(&inode->i_lock);
2245                 if (pos > inode->i_size)
2246                         i_size_write(inode, pos);
2247                 spin_unlock(&inode->i_lock);
2248         }
2249
2250         unlock_page(page);
2251         page_cache_release(page);
2252
2253         return rc;
2254 }
2255
2256 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2257                       int datasync)
2258 {
2259         unsigned int xid;
2260         int rc = 0;
2261         struct cifs_tcon *tcon;
2262         struct TCP_Server_Info *server;
2263         struct cifsFileInfo *smbfile = file->private_data;
2264         struct inode *inode = file_inode(file);
2265         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2266
2267         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2268         if (rc)
2269                 return rc;
2270         mutex_lock(&inode->i_mutex);
2271
2272         xid = get_xid();
2273
2274         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2275                  file, datasync);
2276
2277         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2278                 rc = cifs_zap_mapping(inode);
2279                 if (rc) {
2280                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2281                         rc = 0; /* don't care about it in fsync */
2282                 }
2283         }
2284
2285         tcon = tlink_tcon(smbfile->tlink);
2286         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2287                 server = tcon->ses->server;
2288                 if (server->ops->flush)
2289                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2290                 else
2291                         rc = -ENOSYS;
2292         }
2293
2294         free_xid(xid);
2295         mutex_unlock(&inode->i_mutex);
2296         return rc;
2297 }
2298
2299 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2300 {
2301         unsigned int xid;
2302         int rc = 0;
2303         struct cifs_tcon *tcon;
2304         struct TCP_Server_Info *server;
2305         struct cifsFileInfo *smbfile = file->private_data;
2306         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2307         struct inode *inode = file->f_mapping->host;
2308
2309         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2310         if (rc)
2311                 return rc;
2312         mutex_lock(&inode->i_mutex);
2313
2314         xid = get_xid();
2315
2316         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2317                  file, datasync);
2318
2319         tcon = tlink_tcon(smbfile->tlink);
2320         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2321                 server = tcon->ses->server;
2322                 if (server->ops->flush)
2323                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2324                 else
2325                         rc = -ENOSYS;
2326         }
2327
2328         free_xid(xid);
2329         mutex_unlock(&inode->i_mutex);
2330         return rc;
2331 }
2332
2333 /*
2334  * As file closes, flush all cached write data for this inode checking
2335  * for write behind errors.
2336  */
2337 int cifs_flush(struct file *file, fl_owner_t id)
2338 {
2339         struct inode *inode = file_inode(file);
2340         int rc = 0;
2341
2342         if (file->f_mode & FMODE_WRITE)
2343                 rc = filemap_write_and_wait(inode->i_mapping);
2344
2345         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2346
2347         return rc;
2348 }
2349
2350 static int
2351 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2352 {
2353         int rc = 0;
2354         unsigned long i;
2355
2356         for (i = 0; i < num_pages; i++) {
2357                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2358                 if (!pages[i]) {
2359                         /*
2360                          * save number of pages we have already allocated and
2361                          * return with ENOMEM error
2362                          */
2363                         num_pages = i;
2364                         rc = -ENOMEM;
2365                         break;
2366                 }
2367         }
2368
2369         if (rc) {
2370                 for (i = 0; i < num_pages; i++)
2371                         put_page(pages[i]);
2372         }
2373         return rc;
2374 }
2375
2376 static inline
2377 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2378 {
2379         size_t num_pages;
2380         size_t clen;
2381
2382         clen = min_t(const size_t, len, wsize);
2383         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2384
2385         if (cur_len)
2386                 *cur_len = clen;
2387
2388         return num_pages;
2389 }
2390
2391 static void
2392 cifs_uncached_writedata_release(struct kref *refcount)
2393 {
2394         int i;
2395         struct cifs_writedata *wdata = container_of(refcount,
2396                                         struct cifs_writedata, refcount);
2397
2398         for (i = 0; i < wdata->nr_pages; i++)
2399                 put_page(wdata->pages[i]);
2400         cifs_writedata_release(refcount);
2401 }
2402
2403 static void
2404 cifs_uncached_writev_complete(struct work_struct *work)
2405 {
2406         struct cifs_writedata *wdata = container_of(work,
2407                                         struct cifs_writedata, work);
2408         struct inode *inode = d_inode(wdata->cfile->dentry);
2409         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2410
2411         spin_lock(&inode->i_lock);
2412         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2413         if (cifsi->server_eof > inode->i_size)
2414                 i_size_write(inode, cifsi->server_eof);
2415         spin_unlock(&inode->i_lock);
2416
2417         complete(&wdata->done);
2418
2419         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2420 }
2421
2422 static int
2423 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2424                       size_t *len, unsigned long *num_pages)
2425 {
2426         size_t save_len, copied, bytes, cur_len = *len;
2427         unsigned long i, nr_pages = *num_pages;
2428
2429         save_len = cur_len;
2430         for (i = 0; i < nr_pages; i++) {
2431                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2432                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2433                 cur_len -= copied;
2434                 /*
2435                  * If we didn't copy as much as we expected, then that
2436                  * may mean we trod into an unmapped area. Stop copying
2437                  * at that point. On the next pass through the big
2438                  * loop, we'll likely end up getting a zero-length
2439                  * write and bailing out of it.
2440                  */
2441                 if (copied < bytes)
2442                         break;
2443         }
2444         cur_len = save_len - cur_len;
2445         *len = cur_len;
2446
2447         /*
2448          * If we have no data to send, then that probably means that
2449          * the copy above failed altogether. That's most likely because
2450          * the address in the iovec was bogus. Return -EFAULT and let
2451          * the caller free anything we allocated and bail out.
2452          */
2453         if (!cur_len)
2454                 return -EFAULT;
2455
2456         /*
2457          * i + 1 now represents the number of pages we actually used in
2458          * the copy phase above.
2459          */
2460         *num_pages = i + 1;
2461         return 0;
2462 }
2463
2464 static int
2465 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2466                      struct cifsFileInfo *open_file,
2467                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list)
2468 {
2469         int rc = 0;
2470         size_t cur_len;
2471         unsigned long nr_pages, num_pages, i;
2472         struct cifs_writedata *wdata;
2473         struct iov_iter saved_from;
2474         loff_t saved_offset = offset;
2475         pid_t pid;
2476         struct TCP_Server_Info *server;
2477
2478         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2479                 pid = open_file->pid;
2480         else
2481                 pid = current->tgid;
2482
2483         server = tlink_tcon(open_file->tlink)->ses->server;
2484         memcpy(&saved_from, from, sizeof(struct iov_iter));
2485
2486         do {
2487                 unsigned int wsize, credits;
2488
2489                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2490                                                    &wsize, &credits);
2491                 if (rc)
2492                         break;
2493
2494                 nr_pages = get_numpages(wsize, len, &cur_len);
2495                 wdata = cifs_writedata_alloc(nr_pages,
2496                                              cifs_uncached_writev_complete);
2497                 if (!wdata) {
2498                         rc = -ENOMEM;
2499                         add_credits_and_wake_if(server, credits, 0);
2500                         break;
2501                 }
2502
2503                 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2504                 if (rc) {
2505                         kfree(wdata);
2506                         add_credits_and_wake_if(server, credits, 0);
2507                         break;
2508                 }
2509
2510                 num_pages = nr_pages;
2511                 rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2512                 if (rc) {
2513                         for (i = 0; i < nr_pages; i++)
2514                                 put_page(wdata->pages[i]);
2515                         kfree(wdata);
2516                         add_credits_and_wake_if(server, credits, 0);
2517                         break;
2518                 }
2519
2520                 /*
2521                  * Bring nr_pages down to the number of pages we actually used,
2522                  * and free any pages that we didn't use.
2523                  */
2524                 for ( ; nr_pages > num_pages; nr_pages--)
2525                         put_page(wdata->pages[nr_pages - 1]);
2526
2527                 wdata->sync_mode = WB_SYNC_ALL;
2528                 wdata->nr_pages = nr_pages;
2529                 wdata->offset = (__u64)offset;
2530                 wdata->cfile = cifsFileInfo_get(open_file);
2531                 wdata->pid = pid;
2532                 wdata->bytes = cur_len;
2533                 wdata->pagesz = PAGE_SIZE;
2534                 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2535                 wdata->credits = credits;
2536
2537                 if (!wdata->cfile->invalidHandle ||
2538                     !cifs_reopen_file(wdata->cfile, false))
2539                         rc = server->ops->async_writev(wdata,
2540                                         cifs_uncached_writedata_release);
2541                 if (rc) {
2542                         add_credits_and_wake_if(server, wdata->credits, 0);
2543                         kref_put(&wdata->refcount,
2544                                  cifs_uncached_writedata_release);
2545                         if (rc == -EAGAIN) {
2546                                 memcpy(from, &saved_from,
2547                                        sizeof(struct iov_iter));
2548                                 iov_iter_advance(from, offset - saved_offset);
2549                                 continue;
2550                         }
2551                         break;
2552                 }
2553
2554                 list_add_tail(&wdata->list, wdata_list);
2555                 offset += cur_len;
2556                 len -= cur_len;
2557         } while (len > 0);
2558
2559         return rc;
2560 }
2561
2562 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2563 {
2564         struct file *file = iocb->ki_filp;
2565         ssize_t total_written = 0;
2566         struct cifsFileInfo *open_file;
2567         struct cifs_tcon *tcon;
2568         struct cifs_sb_info *cifs_sb;
2569         struct cifs_writedata *wdata, *tmp;
2570         struct list_head wdata_list;
2571         struct iov_iter saved_from;
2572         int rc;
2573
2574         /*
2575          * BB - optimize the way when signing is disabled. We can drop this
2576          * extra memory-to-memory copying and use iovec buffers for constructing
2577          * write request.
2578          */
2579
2580         rc = generic_write_checks(iocb, from);
2581         if (rc <= 0)
2582                 return rc;
2583
2584         INIT_LIST_HEAD(&wdata_list);
2585         cifs_sb = CIFS_FILE_SB(file);
2586         open_file = file->private_data;
2587         tcon = tlink_tcon(open_file->tlink);
2588
2589         if (!tcon->ses->server->ops->async_writev)
2590                 return -ENOSYS;
2591
2592         memcpy(&saved_from, from, sizeof(struct iov_iter));
2593
2594         rc = cifs_write_from_iter(iocb->ki_pos, iov_iter_count(from), from,
2595                                   open_file, cifs_sb, &wdata_list);
2596
2597         /*
2598          * If at least one write was successfully sent, then discard any rc
2599          * value from the later writes. If the other write succeeds, then
2600          * we'll end up returning whatever was written. If it fails, then
2601          * we'll get a new rc value from that.
2602          */
2603         if (!list_empty(&wdata_list))
2604                 rc = 0;
2605
2606         /*
2607          * Wait for and collect replies for any successful sends in order of
2608          * increasing offset. Once an error is hit or we get a fatal signal
2609          * while waiting, then return without waiting for any more replies.
2610          */
2611 restart_loop:
2612         list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2613                 if (!rc) {
2614                         /* FIXME: freezable too? */
2615                         rc = wait_for_completion_killable(&wdata->done);
2616                         if (rc)
2617                                 rc = -EINTR;
2618                         else if (wdata->result)
2619                                 rc = wdata->result;
2620                         else
2621                                 total_written += wdata->bytes;
2622
2623                         /* resend call if it's a retryable error */
2624                         if (rc == -EAGAIN) {
2625                                 struct list_head tmp_list;
2626                                 struct iov_iter tmp_from;
2627
2628                                 INIT_LIST_HEAD(&tmp_list);
2629                                 list_del_init(&wdata->list);
2630
2631                                 memcpy(&tmp_from, &saved_from,
2632                                        sizeof(struct iov_iter));
2633                                 iov_iter_advance(&tmp_from,
2634                                                  wdata->offset - iocb->ki_pos);
2635
2636                                 rc = cifs_write_from_iter(wdata->offset,
2637                                                 wdata->bytes, &tmp_from,
2638                                                 open_file, cifs_sb, &tmp_list);
2639
2640                                 list_splice(&tmp_list, &wdata_list);
2641
2642                                 kref_put(&wdata->refcount,
2643                                          cifs_uncached_writedata_release);
2644                                 goto restart_loop;
2645                         }
2646                 }
2647                 list_del_init(&wdata->list);
2648                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2649         }
2650
2651         if (unlikely(!total_written))
2652                 return rc;
2653
2654         iocb->ki_pos += total_written;
2655         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(file_inode(file))->flags);
2656         cifs_stats_bytes_written(tcon, total_written);
2657         return total_written;
2658 }
2659
2660 static ssize_t
2661 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2662 {
2663         struct file *file = iocb->ki_filp;
2664         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2665         struct inode *inode = file->f_mapping->host;
2666         struct cifsInodeInfo *cinode = CIFS_I(inode);
2667         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2668         ssize_t rc;
2669
2670         /*
2671          * We need to hold the sem to be sure nobody modifies lock list
2672          * with a brlock that prevents writing.
2673          */
2674         down_read(&cinode->lock_sem);
2675         mutex_lock(&inode->i_mutex);
2676
2677         rc = generic_write_checks(iocb, from);
2678         if (rc <= 0)
2679                 goto out;
2680
2681         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2682                                      server->vals->exclusive_lock_type, NULL,
2683                                      CIFS_WRITE_OP))
2684                 rc = __generic_file_write_iter(iocb, from);
2685         else
2686                 rc = -EACCES;
2687 out:
2688         mutex_unlock(&inode->i_mutex);
2689
2690         if (rc > 0) {
2691                 ssize_t err = generic_write_sync(file, iocb->ki_pos - rc, rc);
2692                 if (err < 0)
2693                         rc = err;
2694         }
2695         up_read(&cinode->lock_sem);
2696         return rc;
2697 }
2698
2699 ssize_t
2700 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2701 {
2702         struct inode *inode = file_inode(iocb->ki_filp);
2703         struct cifsInodeInfo *cinode = CIFS_I(inode);
2704         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2705         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2706                                                 iocb->ki_filp->private_data;
2707         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2708         ssize_t written;
2709
2710         written = cifs_get_writer(cinode);
2711         if (written)
2712                 return written;
2713
2714         if (CIFS_CACHE_WRITE(cinode)) {
2715                 if (cap_unix(tcon->ses) &&
2716                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2717                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2718                         written = generic_file_write_iter(iocb, from);
2719                         goto out;
2720                 }
2721                 written = cifs_writev(iocb, from);
2722                 goto out;
2723         }
2724         /*
2725          * For non-oplocked files in strict cache mode we need to write the data
2726          * to the server exactly from the pos to pos+len-1 rather than flush all
2727          * affected pages because it may cause a error with mandatory locks on
2728          * these pages but not on the region from pos to ppos+len-1.
2729          */
2730         written = cifs_user_writev(iocb, from);
2731         if (written > 0 && CIFS_CACHE_READ(cinode)) {
2732                 /*
2733                  * Windows 7 server can delay breaking level2 oplock if a write
2734                  * request comes - break it on the client to prevent reading
2735                  * an old data.
2736                  */
2737                 cifs_zap_mapping(inode);
2738                 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2739                          inode);
2740                 cinode->oplock = 0;
2741         }
2742 out:
2743         cifs_put_writer(cinode);
2744         return written;
2745 }
2746
2747 static struct cifs_readdata *
2748 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2749 {
2750         struct cifs_readdata *rdata;
2751
2752         rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2753                         GFP_KERNEL);
2754         if (rdata != NULL) {
2755                 kref_init(&rdata->refcount);
2756                 INIT_LIST_HEAD(&rdata->list);
2757                 init_completion(&rdata->done);
2758                 INIT_WORK(&rdata->work, complete);
2759         }
2760
2761         return rdata;
2762 }
2763
2764 void
2765 cifs_readdata_release(struct kref *refcount)
2766 {
2767         struct cifs_readdata *rdata = container_of(refcount,
2768                                         struct cifs_readdata, refcount);
2769
2770         if (rdata->cfile)
2771                 cifsFileInfo_put(rdata->cfile);
2772
2773         kfree(rdata);
2774 }
2775
2776 static int
2777 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2778 {
2779         int rc = 0;
2780         struct page *page;
2781         unsigned int i;
2782
2783         for (i = 0; i < nr_pages; i++) {
2784                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2785                 if (!page) {
2786                         rc = -ENOMEM;
2787                         break;
2788                 }
2789                 rdata->pages[i] = page;
2790         }
2791
2792         if (rc) {
2793                 for (i = 0; i < nr_pages; i++) {
2794                         put_page(rdata->pages[i]);
2795                         rdata->pages[i] = NULL;
2796                 }
2797         }
2798         return rc;
2799 }
2800
2801 static void
2802 cifs_uncached_readdata_release(struct kref *refcount)
2803 {
2804         struct cifs_readdata *rdata = container_of(refcount,
2805                                         struct cifs_readdata, refcount);
2806         unsigned int i;
2807
2808         for (i = 0; i < rdata->nr_pages; i++) {
2809                 put_page(rdata->pages[i]);
2810                 rdata->pages[i] = NULL;
2811         }
2812         cifs_readdata_release(refcount);
2813 }
2814
2815 /**
2816  * cifs_readdata_to_iov - copy data from pages in response to an iovec
2817  * @rdata:      the readdata response with list of pages holding data
2818  * @iter:       destination for our data
2819  *
2820  * This function copies data from a list of pages in a readdata response into
2821  * an array of iovecs. It will first calculate where the data should go
2822  * based on the info in the readdata and then copy the data into that spot.
2823  */
2824 static int
2825 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2826 {
2827         size_t remaining = rdata->got_bytes;
2828         unsigned int i;
2829
2830         for (i = 0; i < rdata->nr_pages; i++) {
2831                 struct page *page = rdata->pages[i];
2832                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
2833                 size_t written = copy_page_to_iter(page, 0, copy, iter);
2834                 remaining -= written;
2835                 if (written < copy && iov_iter_count(iter) > 0)
2836                         break;
2837         }
2838         return remaining ? -EFAULT : 0;
2839 }
2840
2841 static void
2842 cifs_uncached_readv_complete(struct work_struct *work)
2843 {
2844         struct cifs_readdata *rdata = container_of(work,
2845                                                 struct cifs_readdata, work);
2846
2847         complete(&rdata->done);
2848         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2849 }
2850
2851 static int
2852 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2853                         struct cifs_readdata *rdata, unsigned int len)
2854 {
2855         int result = 0;
2856         unsigned int i;
2857         unsigned int nr_pages = rdata->nr_pages;
2858         struct kvec iov;
2859
2860         rdata->got_bytes = 0;
2861         rdata->tailsz = PAGE_SIZE;
2862         for (i = 0; i < nr_pages; i++) {
2863                 struct page *page = rdata->pages[i];
2864
2865                 if (len >= PAGE_SIZE) {
2866                         /* enough data to fill the page */
2867                         iov.iov_base = kmap(page);
2868                         iov.iov_len = PAGE_SIZE;
2869                         cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2870                                  i, iov.iov_base, iov.iov_len);
2871                         len -= PAGE_SIZE;
2872                 } else if (len > 0) {
2873                         /* enough for partial page, fill and zero the rest */
2874                         iov.iov_base = kmap(page);
2875                         iov.iov_len = len;
2876                         cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2877                                  i, iov.iov_base, iov.iov_len);
2878                         memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2879                         rdata->tailsz = len;
2880                         len = 0;
2881                 } else {
2882                         /* no need to hold page hostage */
2883                         rdata->pages[i] = NULL;
2884                         rdata->nr_pages--;
2885                         put_page(page);
2886                         continue;
2887                 }
2888
2889                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2890                 kunmap(page);
2891                 if (result < 0)
2892                         break;
2893
2894                 rdata->got_bytes += result;
2895         }
2896
2897         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
2898                                                 rdata->got_bytes : result;
2899 }
2900
2901 static int
2902 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
2903                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list)
2904 {
2905         struct cifs_readdata *rdata;
2906         unsigned int npages, rsize, credits;
2907         size_t cur_len;
2908         int rc;
2909         pid_t pid;
2910         struct TCP_Server_Info *server;
2911
2912         server = tlink_tcon(open_file->tlink)->ses->server;
2913
2914         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2915                 pid = open_file->pid;
2916         else
2917                 pid = current->tgid;
2918
2919         do {
2920                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
2921                                                    &rsize, &credits);
2922                 if (rc)
2923                         break;
2924
2925                 cur_len = min_t(const size_t, len, rsize);
2926                 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2927
2928                 /* allocate a readdata struct */
2929                 rdata = cifs_readdata_alloc(npages,
2930                                             cifs_uncached_readv_complete);
2931                 if (!rdata) {
2932                         add_credits_and_wake_if(server, credits, 0);
2933                         rc = -ENOMEM;
2934                         break;
2935                 }
2936
2937                 rc = cifs_read_allocate_pages(rdata, npages);
2938                 if (rc)
2939                         goto error;
2940
2941                 rdata->cfile = cifsFileInfo_get(open_file);
2942                 rdata->nr_pages = npages;
2943                 rdata->offset = offset;
2944                 rdata->bytes = cur_len;
2945                 rdata->pid = pid;
2946                 rdata->pagesz = PAGE_SIZE;
2947                 rdata->read_into_pages = cifs_uncached_read_into_pages;
2948                 rdata->credits = credits;
2949
2950                 if (!rdata->cfile->invalidHandle ||
2951                     !cifs_reopen_file(rdata->cfile, true))
2952                         rc = server->ops->async_readv(rdata);
2953 error:
2954                 if (rc) {
2955                         add_credits_and_wake_if(server, rdata->credits, 0);
2956                         kref_put(&rdata->refcount,
2957                                  cifs_uncached_readdata_release);
2958                         if (rc == -EAGAIN)
2959                                 continue;
2960                         break;
2961                 }
2962
2963                 list_add_tail(&rdata->list, rdata_list);
2964                 offset += cur_len;
2965                 len -= cur_len;
2966         } while (len > 0);
2967
2968         return rc;
2969 }
2970
2971 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
2972 {
2973         struct file *file = iocb->ki_filp;
2974         ssize_t rc;
2975         size_t len;
2976         ssize_t total_read = 0;
2977         loff_t offset = iocb->ki_pos;
2978         struct cifs_sb_info *cifs_sb;
2979         struct cifs_tcon *tcon;
2980         struct cifsFileInfo *open_file;
2981         struct cifs_readdata *rdata, *tmp;
2982         struct list_head rdata_list;
2983
2984         len = iov_iter_count(to);
2985         if (!len)
2986                 return 0;
2987
2988         INIT_LIST_HEAD(&rdata_list);
2989         cifs_sb = CIFS_FILE_SB(file);
2990         open_file = file->private_data;
2991         tcon = tlink_tcon(open_file->tlink);
2992
2993         if (!tcon->ses->server->ops->async_readv)
2994                 return -ENOSYS;
2995
2996         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2997                 cifs_dbg(FYI, "attempting read on write only file instance\n");
2998
2999         rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list);
3000
3001         /* if at least one read request send succeeded, then reset rc */
3002         if (!list_empty(&rdata_list))
3003                 rc = 0;
3004
3005         len = iov_iter_count(to);
3006         /* the loop below should proceed in the order of increasing offsets */
3007 again:
3008         list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
3009                 if (!rc) {
3010                         /* FIXME: freezable sleep too? */
3011                         rc = wait_for_completion_killable(&rdata->done);
3012                         if (rc)
3013                                 rc = -EINTR;
3014                         else if (rdata->result == -EAGAIN) {
3015                                 /* resend call if it's a retryable error */
3016                                 struct list_head tmp_list;
3017                                 unsigned int got_bytes = rdata->got_bytes;
3018
3019                                 list_del_init(&rdata->list);
3020                                 INIT_LIST_HEAD(&tmp_list);
3021
3022                                 /*
3023                                  * Got a part of data and then reconnect has
3024                                  * happened -- fill the buffer and continue
3025                                  * reading.
3026                                  */
3027                                 if (got_bytes && got_bytes < rdata->bytes) {
3028                                         rc = cifs_readdata_to_iov(rdata, to);
3029                                         if (rc) {
3030                                                 kref_put(&rdata->refcount,
3031                                                 cifs_uncached_readdata_release);
3032                                                 continue;
3033                                         }
3034                                 }
3035
3036                                 rc = cifs_send_async_read(
3037                                                 rdata->offset + got_bytes,
3038                                                 rdata->bytes - got_bytes,
3039                                                 rdata->cfile, cifs_sb,
3040                                                 &tmp_list);
3041
3042                                 list_splice(&tmp_list, &rdata_list);
3043
3044                                 kref_put(&rdata->refcount,
3045                                          cifs_uncached_readdata_release);
3046                                 goto again;
3047                         } else if (rdata->result)
3048                                 rc = rdata->result;
3049                         else
3050                                 rc = cifs_readdata_to_iov(rdata, to);
3051
3052                         /* if there was a short read -- discard anything left */
3053                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3054                                 rc = -ENODATA;
3055                 }
3056                 list_del_init(&rdata->list);
3057                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3058         }
3059
3060         total_read = len - iov_iter_count(to);
3061
3062         cifs_stats_bytes_read(tcon, total_read);
3063
3064         /* mask nodata case */
3065         if (rc == -ENODATA)
3066                 rc = 0;
3067
3068         if (total_read) {
3069                 iocb->ki_pos += total_read;
3070                 return total_read;
3071         }
3072         return rc;
3073 }
3074
3075 ssize_t
3076 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3077 {
3078         struct inode *inode = file_inode(iocb->ki_filp);
3079         struct cifsInodeInfo *cinode = CIFS_I(inode);
3080         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3081         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3082                                                 iocb->ki_filp->private_data;
3083         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3084         int rc = -EACCES;
3085
3086         /*
3087          * In strict cache mode we need to read from the server all the time
3088          * if we don't have level II oplock because the server can delay mtime
3089          * change - so we can't make a decision about inode invalidating.
3090          * And we can also fail with pagereading if there are mandatory locks
3091          * on pages affected by this read but not on the region from pos to
3092          * pos+len-1.
3093          */
3094         if (!CIFS_CACHE_READ(cinode))
3095                 return cifs_user_readv(iocb, to);
3096
3097         if (cap_unix(tcon->ses) &&
3098             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3099             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3100                 return generic_file_read_iter(iocb, to);
3101
3102         /*
3103          * We need to hold the sem to be sure nobody modifies lock list
3104          * with a brlock that prevents reading.
3105          */
3106         down_read(&cinode->lock_sem);
3107         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3108                                      tcon->ses->server->vals->shared_lock_type,
3109                                      NULL, CIFS_READ_OP))
3110                 rc = generic_file_read_iter(iocb, to);
3111         up_read(&cinode->lock_sem);
3112         return rc;
3113 }
3114
3115 static ssize_t
3116 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3117 {
3118         int rc = -EACCES;
3119         unsigned int bytes_read = 0;
3120         unsigned int total_read;
3121         unsigned int current_read_size;
3122         unsigned int rsize;
3123         struct cifs_sb_info *cifs_sb;
3124         struct cifs_tcon *tcon;
3125         struct TCP_Server_Info *server;
3126         unsigned int xid;
3127         char *cur_offset;
3128         struct cifsFileInfo *open_file;
3129         struct cifs_io_parms io_parms;
3130         int buf_type = CIFS_NO_BUFFER;
3131         __u32 pid;
3132
3133         xid = get_xid();
3134         cifs_sb = CIFS_FILE_SB(file);
3135
3136         /* FIXME: set up handlers for larger reads and/or convert to async */
3137         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3138
3139         if (file->private_data == NULL) {
3140                 rc = -EBADF;
3141                 free_xid(xid);
3142                 return rc;
3143         }
3144         open_file = file->private_data;
3145         tcon = tlink_tcon(open_file->tlink);
3146         server = tcon->ses->server;
3147
3148         if (!server->ops->sync_read) {
3149                 free_xid(xid);
3150                 return -ENOSYS;
3151         }
3152
3153         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3154                 pid = open_file->pid;
3155         else
3156                 pid = current->tgid;
3157
3158         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3159                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3160
3161         for (total_read = 0, cur_offset = read_data; read_size > total_read;
3162              total_read += bytes_read, cur_offset += bytes_read) {
3163                 do {
3164                         current_read_size = min_t(uint, read_size - total_read,
3165                                                   rsize);
3166                         /*
3167                          * For windows me and 9x we do not want to request more
3168                          * than it negotiated since it will refuse the read
3169                          * then.
3170                          */
3171                         if ((tcon->ses) && !(tcon->ses->capabilities &
3172                                 tcon->ses->server->vals->cap_large_files)) {
3173                                 current_read_size = min_t(uint,
3174                                         current_read_size, CIFSMaxBufSize);
3175                         }
3176                         if (open_file->invalidHandle) {
3177                                 rc = cifs_reopen_file(open_file, true);
3178                                 if (rc != 0)
3179                                         break;
3180                         }
3181                         io_parms.pid = pid;
3182                         io_parms.tcon = tcon;
3183                         io_parms.offset = *offset;
3184                         io_parms.length = current_read_size;
3185                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3186                                                     &bytes_read, &cur_offset,
3187                                                     &buf_type);
3188                 } while (rc == -EAGAIN);
3189
3190                 if (rc || (bytes_read == 0)) {
3191                         if (total_read) {
3192                                 break;
3193                         } else {
3194                                 free_xid(xid);
3195                                 return rc;
3196                         }
3197                 } else {
3198                         cifs_stats_bytes_read(tcon, total_read);
3199                         *offset += bytes_read;
3200                 }
3201         }
3202         free_xid(xid);
3203         return total_read;
3204 }
3205
3206 /*
3207  * If the page is mmap'ed into a process' page tables, then we need to make
3208  * sure that it doesn't change while being written back.
3209  */
3210 static int
3211 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3212 {
3213         struct page *page = vmf->page;
3214
3215         lock_page(page);
3216         return VM_FAULT_LOCKED;
3217 }
3218
3219 static const struct vm_operations_struct cifs_file_vm_ops = {
3220         .fault = filemap_fault,
3221         .map_pages = filemap_map_pages,
3222         .page_mkwrite = cifs_page_mkwrite,
3223 };
3224
3225 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3226 {
3227         int rc, xid;
3228         struct inode *inode = file_inode(file);
3229
3230         xid = get_xid();
3231
3232         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3233                 rc = cifs_zap_mapping(inode);
3234                 if (rc)
3235                         return rc;
3236         }
3237
3238         rc = generic_file_mmap(file, vma);
3239         if (rc == 0)
3240                 vma->vm_ops = &cifs_file_vm_ops;
3241         free_xid(xid);
3242         return rc;
3243 }
3244
3245 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3246 {
3247         int rc, xid;
3248
3249         xid = get_xid();
3250         rc = cifs_revalidate_file(file);
3251         if (rc) {
3252                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3253                          rc);
3254                 free_xid(xid);
3255                 return rc;
3256         }
3257         rc = generic_file_mmap(file, vma);
3258         if (rc == 0)
3259                 vma->vm_ops = &cifs_file_vm_ops;
3260         free_xid(xid);
3261         return rc;
3262 }
3263
3264 static void
3265 cifs_readv_complete(struct work_struct *work)
3266 {
3267         unsigned int i, got_bytes;
3268         struct cifs_readdata *rdata = container_of(work,
3269                                                 struct cifs_readdata, work);
3270
3271         got_bytes = rdata->got_bytes;
3272         for (i = 0; i < rdata->nr_pages; i++) {
3273                 struct page *page = rdata->pages[i];
3274
3275                 lru_cache_add_file(page);
3276
3277                 if (rdata->result == 0 ||
3278                     (rdata->result == -EAGAIN && got_bytes)) {
3279                         flush_dcache_page(page);
3280                         SetPageUptodate(page);
3281                 }
3282
3283                 unlock_page(page);
3284
3285                 if (rdata->result == 0 ||
3286                     (rdata->result == -EAGAIN && got_bytes))
3287                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3288
3289                 got_bytes -= min_t(unsigned int, PAGE_CACHE_SIZE, got_bytes);
3290
3291                 page_cache_release(page);
3292                 rdata->pages[i] = NULL;
3293         }
3294         kref_put(&rdata->refcount, cifs_readdata_release);
3295 }
3296
3297 static int
3298 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3299                         struct cifs_readdata *rdata, unsigned int len)
3300 {
3301         int result = 0;
3302         unsigned int i;
3303         u64 eof;
3304         pgoff_t eof_index;
3305         unsigned int nr_pages = rdata->nr_pages;
3306         struct kvec iov;
3307
3308         /* determine the eof that the server (probably) has */
3309         eof = CIFS_I(rdata->mapping->host)->server_eof;
3310         eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3311         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3312
3313         rdata->got_bytes = 0;
3314         rdata->tailsz = PAGE_CACHE_SIZE;
3315         for (i = 0; i < nr_pages; i++) {
3316                 struct page *page = rdata->pages[i];
3317
3318                 if (len >= PAGE_CACHE_SIZE) {
3319                         /* enough data to fill the page */
3320                         iov.iov_base = kmap(page);
3321                         iov.iov_len = PAGE_CACHE_SIZE;
3322                         cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3323                                  i, page->index, iov.iov_base, iov.iov_len);
3324                         len -= PAGE_CACHE_SIZE;
3325                 } else if (len > 0) {
3326                         /* enough for partial page, fill and zero the rest */
3327                         iov.iov_base = kmap(page);
3328                         iov.iov_len = len;
3329                         cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3330                                  i, page->index, iov.iov_base, iov.iov_len);
3331                         memset(iov.iov_base + len,
3332                                 '\0', PAGE_CACHE_SIZE - len);
3333                         rdata->tailsz = len;
3334                         len = 0;
3335                 } else if (page->index > eof_index) {
3336                         /*
3337                          * The VFS will not try to do readahead past the
3338                          * i_size, but it's possible that we have outstanding
3339                          * writes with gaps in the middle and the i_size hasn't
3340                          * caught up yet. Populate those with zeroed out pages
3341                          * to prevent the VFS from repeatedly attempting to
3342                          * fill them until the writes are flushed.
3343                          */
3344                         zero_user(page, 0, PAGE_CACHE_SIZE);
3345                         lru_cache_add_file(page);
3346                         flush_dcache_page(page);
3347                         SetPageUptodate(page);
3348                         unlock_page(page);
3349                         page_cache_release(page);
3350                         rdata->pages[i] = NULL;
3351                         rdata->nr_pages--;
3352                         continue;
3353                 } else {
3354                         /* no need to hold page hostage */
3355                         lru_cache_add_file(page);
3356                         unlock_page(page);
3357                         page_cache_release(page);
3358                         rdata->pages[i] = NULL;
3359                         rdata->nr_pages--;
3360                         continue;
3361                 }
3362
3363                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3364                 kunmap(page);
3365                 if (result < 0)
3366                         break;
3367
3368                 rdata->got_bytes += result;
3369         }
3370
3371         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3372                                                 rdata->got_bytes : result;
3373 }
3374
3375 static int
3376 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3377                     unsigned int rsize, struct list_head *tmplist,
3378                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3379 {
3380         struct page *page, *tpage;
3381         unsigned int expected_index;
3382         int rc;
3383         gfp_t gfp = GFP_KERNEL & mapping_gfp_mask(mapping);
3384
3385         INIT_LIST_HEAD(tmplist);
3386
3387         page = list_entry(page_list->prev, struct page, lru);
3388
3389         /*
3390          * Lock the page and put it in the cache. Since no one else
3391          * should have access to this page, we're safe to simply set
3392          * PG_locked without checking it first.
3393          */
3394         __set_page_locked(page);
3395         rc = add_to_page_cache_locked(page, mapping,
3396                                       page->index, gfp);
3397
3398         /* give up if we can't stick it in the cache */
3399         if (rc) {
3400                 __clear_page_locked(page);
3401                 return rc;
3402         }
3403
3404         /* move first page to the tmplist */
3405         *offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3406         *bytes = PAGE_CACHE_SIZE;
3407         *nr_pages = 1;
3408         list_move_tail(&page->lru, tmplist);
3409
3410         /* now try and add more pages onto the request */
3411         expected_index = page->index + 1;
3412         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3413                 /* discontinuity ? */
3414                 if (page->index != expected_index)
3415                         break;
3416
3417                 /* would this page push the read over the rsize? */
3418                 if (*bytes + PAGE_CACHE_SIZE > rsize)
3419                         break;
3420
3421                 __set_page_locked(page);
3422                 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
3423                         __clear_page_locked(page);
3424                         break;
3425                 }
3426                 list_move_tail(&page->lru, tmplist);
3427                 (*bytes) += PAGE_CACHE_SIZE;
3428                 expected_index++;
3429                 (*nr_pages)++;
3430         }
3431         return rc;
3432 }
3433
3434 static int cifs_readpages(struct file *file, struct address_space *mapping,
3435         struct list_head *page_list, unsigned num_pages)
3436 {
3437         int rc;
3438         struct list_head tmplist;
3439         struct cifsFileInfo *open_file = file->private_data;
3440         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3441         struct TCP_Server_Info *server;
3442         pid_t pid;
3443
3444         /*
3445          * Reads as many pages as possible from fscache. Returns -ENOBUFS
3446          * immediately if the cookie is negative
3447          *
3448          * After this point, every page in the list might have PG_fscache set,
3449          * so we will need to clean that up off of every page we don't use.
3450          */
3451         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3452                                          &num_pages);
3453         if (rc == 0)
3454                 return rc;
3455
3456         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3457                 pid = open_file->pid;
3458         else
3459                 pid = current->tgid;
3460
3461         rc = 0;
3462         server = tlink_tcon(open_file->tlink)->ses->server;
3463
3464         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3465                  __func__, file, mapping, num_pages);
3466
3467         /*
3468          * Start with the page at end of list and move it to private
3469          * list. Do the same with any following pages until we hit
3470          * the rsize limit, hit an index discontinuity, or run out of
3471          * pages. Issue the async read and then start the loop again
3472          * until the list is empty.
3473          *
3474          * Note that list order is important. The page_list is in
3475          * the order of declining indexes. When we put the pages in
3476          * the rdata->pages, then we want them in increasing order.
3477          */
3478         while (!list_empty(page_list)) {
3479                 unsigned int i, nr_pages, bytes, rsize;
3480                 loff_t offset;
3481                 struct page *page, *tpage;
3482                 struct cifs_readdata *rdata;
3483                 unsigned credits;
3484
3485                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3486                                                    &rsize, &credits);
3487                 if (rc)
3488                         break;
3489
3490                 /*
3491                  * Give up immediately if rsize is too small to read an entire
3492                  * page. The VFS will fall back to readpage. We should never
3493                  * reach this point however since we set ra_pages to 0 when the
3494                  * rsize is smaller than a cache page.
3495                  */
3496                 if (unlikely(rsize < PAGE_CACHE_SIZE)) {
3497                         add_credits_and_wake_if(server, credits, 0);
3498                         return 0;
3499                 }
3500
3501                 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3502                                          &nr_pages, &offset, &bytes);
3503                 if (rc) {
3504                         add_credits_and_wake_if(server, credits, 0);
3505                         break;
3506                 }
3507
3508                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3509                 if (!rdata) {
3510                         /* best to give up if we're out of mem */
3511                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3512                                 list_del(&page->lru);
3513                                 lru_cache_add_file(page);
3514                                 unlock_page(page);
3515                                 page_cache_release(page);
3516                         }
3517                         rc = -ENOMEM;
3518                         add_credits_and_wake_if(server, credits, 0);
3519                         break;
3520                 }
3521
3522                 rdata->cfile = cifsFileInfo_get(open_file);
3523                 rdata->mapping = mapping;
3524                 rdata->offset = offset;
3525                 rdata->bytes = bytes;
3526                 rdata->pid = pid;
3527                 rdata->pagesz = PAGE_CACHE_SIZE;
3528                 rdata->read_into_pages = cifs_readpages_read_into_pages;
3529                 rdata->credits = credits;
3530
3531                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3532                         list_del(&page->lru);
3533                         rdata->pages[rdata->nr_pages++] = page;
3534                 }
3535
3536                 if (!rdata->cfile->invalidHandle ||
3537                     !cifs_reopen_file(rdata->cfile, true))
3538                         rc = server->ops->async_readv(rdata);
3539                 if (rc) {
3540                         add_credits_and_wake_if(server, rdata->credits, 0);
3541                         for (i = 0; i < rdata->nr_pages; i++) {
3542                                 page = rdata->pages[i];
3543                                 lru_cache_add_file(page);
3544                                 unlock_page(page);
3545                                 page_cache_release(page);
3546                         }
3547                         /* Fallback to the readpage in error/reconnect cases */
3548                         kref_put(&rdata->refcount, cifs_readdata_release);
3549                         break;
3550                 }
3551
3552                 kref_put(&rdata->refcount, cifs_readdata_release);
3553         }
3554
3555         /* Any pages that have been shown to fscache but didn't get added to
3556          * the pagecache must be uncached before they get returned to the
3557          * allocator.
3558          */
3559         cifs_fscache_readpages_cancel(mapping->host, page_list);
3560         return rc;
3561 }
3562
3563 /*
3564  * cifs_readpage_worker must be called with the page pinned
3565  */
3566 static int cifs_readpage_worker(struct file *file, struct page *page,
3567         loff_t *poffset)
3568 {
3569         char *read_data;
3570         int rc;
3571
3572         /* Is the page cached? */
3573         rc = cifs_readpage_from_fscache(file_inode(file), page);
3574         if (rc == 0)
3575                 goto read_complete;
3576
3577         read_data = kmap(page);
3578         /* for reads over a certain size could initiate async read ahead */
3579
3580         rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3581
3582         if (rc < 0)
3583                 goto io_error;
3584         else
3585                 cifs_dbg(FYI, "Bytes read %d\n", rc);
3586
3587         file_inode(file)->i_atime =
3588                 current_fs_time(file_inode(file)->i_sb);
3589
3590         if (PAGE_CACHE_SIZE > rc)
3591                 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3592
3593         flush_dcache_page(page);
3594         SetPageUptodate(page);
3595
3596         /* send this page to the cache */
3597         cifs_readpage_to_fscache(file_inode(file), page);
3598
3599         rc = 0;
3600
3601 io_error:
3602         kunmap(page);
3603         unlock_page(page);
3604
3605 read_complete:
3606         return rc;
3607 }
3608
3609 static int cifs_readpage(struct file *file, struct page *page)
3610 {
3611         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3612         int rc = -EACCES;
3613         unsigned int xid;
3614
3615         xid = get_xid();
3616
3617         if (file->private_data == NULL) {
3618                 rc = -EBADF;
3619                 free_xid(xid);
3620                 return rc;
3621         }
3622
3623         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3624                  page, (int)offset, (int)offset);
3625
3626         rc = cifs_readpage_worker(file, page, &offset);
3627
3628         free_xid(xid);
3629         return rc;
3630 }
3631
3632 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3633 {
3634         struct cifsFileInfo *open_file;
3635
3636         spin_lock(&cifs_file_list_lock);
3637         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3638                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3639                         spin_unlock(&cifs_file_list_lock);
3640                         return 1;
3641                 }
3642         }
3643         spin_unlock(&cifs_file_list_lock);
3644         return 0;
3645 }
3646
3647 /* We do not want to update the file size from server for inodes
3648    open for write - to avoid races with writepage extending
3649    the file - in the future we could consider allowing
3650    refreshing the inode only on increases in the file size
3651    but this is tricky to do without racing with writebehind
3652    page caching in the current Linux kernel design */
3653 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3654 {
3655         if (!cifsInode)
3656                 return true;
3657
3658         if (is_inode_writable(cifsInode)) {
3659                 /* This inode is open for write at least once */
3660                 struct cifs_sb_info *cifs_sb;
3661
3662                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3663                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3664                         /* since no page cache to corrupt on directio
3665                         we can change size safely */
3666                         return true;
3667                 }
3668
3669                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3670                         return true;
3671
3672                 return false;
3673         } else
3674                 return true;
3675 }
3676
3677 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3678                         loff_t pos, unsigned len, unsigned flags,
3679                         struct page **pagep, void **fsdata)
3680 {
3681         int oncethru = 0;
3682         pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3683         loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3684         loff_t page_start = pos & PAGE_MASK;
3685         loff_t i_size;
3686         struct page *page;
3687         int rc = 0;
3688
3689         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3690
3691 start:
3692         page = grab_cache_page_write_begin(mapping, index, flags);
3693         if (!page) {
3694                 rc = -ENOMEM;
3695                 goto out;
3696         }
3697
3698         if (PageUptodate(page))
3699                 goto out;
3700
3701         /*
3702          * If we write a full page it will be up to date, no need to read from
3703          * the server. If the write is short, we'll end up doing a sync write
3704          * instead.
3705          */
3706         if (len == PAGE_CACHE_SIZE)
3707                 goto out;
3708
3709         /*
3710          * optimize away the read when we have an oplock, and we're not
3711          * expecting to use any of the data we'd be reading in. That
3712          * is, when the page lies beyond the EOF, or straddles the EOF
3713          * and the write will cover all of the existing data.
3714          */
3715         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3716                 i_size = i_size_read(mapping->host);
3717                 if (page_start >= i_size ||
3718                     (offset == 0 && (pos + len) >= i_size)) {
3719                         zero_user_segments(page, 0, offset,
3720                                            offset + len,
3721                                            PAGE_CACHE_SIZE);
3722                         /*
3723                          * PageChecked means that the parts of the page
3724                          * to which we're not writing are considered up
3725                          * to date. Once the data is copied to the
3726                          * page, it can be set uptodate.
3727                          */
3728                         SetPageChecked(page);
3729                         goto out;
3730                 }
3731         }
3732
3733         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
3734                 /*
3735                  * might as well read a page, it is fast enough. If we get
3736                  * an error, we don't need to return it. cifs_write_end will
3737                  * do a sync write instead since PG_uptodate isn't set.
3738                  */
3739                 cifs_readpage_worker(file, page, &page_start);
3740                 page_cache_release(page);
3741                 oncethru = 1;
3742                 goto start;
3743         } else {
3744                 /* we could try using another file handle if there is one -
3745                    but how would we lock it to prevent close of that handle
3746                    racing with this read? In any case
3747                    this will be written out by write_end so is fine */
3748         }
3749 out:
3750         *pagep = page;
3751         return rc;
3752 }
3753
3754 static int cifs_release_page(struct page *page, gfp_t gfp)
3755 {
3756         if (PagePrivate(page))
3757                 return 0;
3758
3759         return cifs_fscache_release_page(page, gfp);
3760 }
3761
3762 static void cifs_invalidate_page(struct page *page, unsigned int offset,
3763                                  unsigned int length)
3764 {
3765         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3766
3767         if (offset == 0 && length == PAGE_CACHE_SIZE)
3768                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3769 }
3770
3771 static int cifs_launder_page(struct page *page)
3772 {
3773         int rc = 0;
3774         loff_t range_start = page_offset(page);
3775         loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3776         struct writeback_control wbc = {
3777                 .sync_mode = WB_SYNC_ALL,
3778                 .nr_to_write = 0,
3779                 .range_start = range_start,
3780                 .range_end = range_end,
3781         };
3782
3783         cifs_dbg(FYI, "Launder page: %p\n", page);
3784
3785         if (clear_page_dirty_for_io(page))
3786                 rc = cifs_writepage_locked(page, &wbc);
3787
3788         cifs_fscache_invalidate_page(page, page->mapping->host);
3789         return rc;
3790 }
3791
3792 void cifs_oplock_break(struct work_struct *work)
3793 {
3794         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3795                                                   oplock_break);
3796         struct inode *inode = d_inode(cfile->dentry);
3797         struct cifsInodeInfo *cinode = CIFS_I(inode);
3798         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3799         struct TCP_Server_Info *server = tcon->ses->server;
3800         int rc = 0;
3801
3802         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
3803                         TASK_UNINTERRUPTIBLE);
3804
3805         server->ops->downgrade_oplock(server, cinode,
3806                 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
3807
3808         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
3809                                                 cifs_has_mand_locks(cinode)) {
3810                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3811                          inode);
3812                 cinode->oplock = 0;
3813         }
3814
3815         if (inode && S_ISREG(inode->i_mode)) {
3816                 if (CIFS_CACHE_READ(cinode))
3817                         break_lease(inode, O_RDONLY);
3818                 else
3819                         break_lease(inode, O_WRONLY);
3820                 rc = filemap_fdatawrite(inode->i_mapping);
3821                 if (!CIFS_CACHE_READ(cinode)) {
3822                         rc = filemap_fdatawait(inode->i_mapping);
3823                         mapping_set_error(inode->i_mapping, rc);
3824                         cifs_zap_mapping(inode);
3825                 }
3826                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3827         }
3828
3829         rc = cifs_push_locks(cfile);
3830         if (rc)
3831                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3832
3833         /*
3834          * releasing stale oplock after recent reconnect of smb session using
3835          * a now incorrect file handle is not a data integrity issue but do
3836          * not bother sending an oplock release if session to server still is
3837          * disconnected since oplock already released by the server
3838          */
3839         if (!cfile->oplock_break_cancelled) {
3840                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3841                                                              cinode);
3842                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3843         }
3844         cifs_done_oplock_break(cinode);
3845 }
3846
3847 /*
3848  * The presence of cifs_direct_io() in the address space ops vector
3849  * allowes open() O_DIRECT flags which would have failed otherwise.
3850  *
3851  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
3852  * so this method should never be called.
3853  *
3854  * Direct IO is not yet supported in the cached mode. 
3855  */
3856 static ssize_t
3857 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
3858 {
3859         /*
3860          * FIXME
3861          * Eventually need to support direct IO for non forcedirectio mounts
3862          */
3863         return -EINVAL;
3864 }
3865
3866
3867 const struct address_space_operations cifs_addr_ops = {
3868         .readpage = cifs_readpage,
3869         .readpages = cifs_readpages,
3870         .writepage = cifs_writepage,
3871         .writepages = cifs_writepages,
3872         .write_begin = cifs_write_begin,
3873         .write_end = cifs_write_end,
3874         .set_page_dirty = __set_page_dirty_nobuffers,
3875         .releasepage = cifs_release_page,
3876         .direct_IO = cifs_direct_io,
3877         .invalidatepage = cifs_invalidate_page,
3878         .launder_page = cifs_launder_page,
3879 };
3880
3881 /*
3882  * cifs_readpages requires the server to support a buffer large enough to
3883  * contain the header plus one complete page of data.  Otherwise, we need
3884  * to leave cifs_readpages out of the address space operations.
3885  */
3886 const struct address_space_operations cifs_addr_ops_smallbuf = {
3887         .readpage = cifs_readpage,
3888         .writepage = cifs_writepage,
3889         .writepages = cifs_writepages,
3890         .write_begin = cifs_write_begin,
3891         .write_end = cifs_write_end,
3892         .set_page_dirty = __set_page_dirty_nobuffers,
3893         .releasepage = cifs_release_page,
3894         .invalidatepage = cifs_invalidate_page,
3895         .launder_page = cifs_launder_page,
3896 };