4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <asm/div64.h>
39 #include "cifsproto.h"
40 #include "cifs_unicode.h"
41 #include "cifs_debug.h"
42 #include "cifs_fs_sb.h"
44 static inline int cifs_convert_flags(unsigned int flags)
46 if ((flags & O_ACCMODE) == O_RDONLY)
48 else if ((flags & O_ACCMODE) == O_WRONLY)
50 else if ((flags & O_ACCMODE) == O_RDWR) {
51 /* GENERIC_ALL is too much permission to request
52 can cause unnecessary access denied on create */
53 /* return GENERIC_ALL; */
54 return (GENERIC_READ | GENERIC_WRITE);
57 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
58 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
62 static inline fmode_t cifs_posix_convert_flags(unsigned int flags)
64 fmode_t posix_flags = 0;
66 if ((flags & O_ACCMODE) == O_RDONLY)
67 posix_flags = FMODE_READ;
68 else if ((flags & O_ACCMODE) == O_WRONLY)
69 posix_flags = FMODE_WRITE;
70 else if ((flags & O_ACCMODE) == O_RDWR) {
71 /* GENERIC_ALL is too much permission to request
72 can cause unnecessary access denied on create */
73 /* return GENERIC_ALL; */
74 posix_flags = FMODE_READ | FMODE_WRITE;
76 /* can not map O_CREAT or O_EXCL or O_TRUNC flags when
77 reopening a file. They had their effect on the original open */
79 posix_flags |= (fmode_t)O_APPEND;
81 posix_flags |= (fmode_t)O_DSYNC;
83 posix_flags |= (fmode_t)__O_SYNC;
84 if (flags & O_DIRECTORY)
85 posix_flags |= (fmode_t)O_DIRECTORY;
86 if (flags & O_NOFOLLOW)
87 posix_flags |= (fmode_t)O_NOFOLLOW;
89 posix_flags |= (fmode_t)O_DIRECT;
94 static inline int cifs_get_disposition(unsigned int flags)
96 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
98 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
99 return FILE_OVERWRITE_IF;
100 else if ((flags & O_CREAT) == O_CREAT)
102 else if ((flags & O_TRUNC) == O_TRUNC)
103 return FILE_OVERWRITE;
108 /* all arguments to this function must be checked for validity in caller */
110 cifs_posix_open_inode_helper(struct inode *inode, struct file *file,
111 struct cifsInodeInfo *pCifsInode, __u32 oplock,
115 write_lock(&GlobalSMBSeslock);
117 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
118 if (pCifsInode == NULL) {
119 write_unlock(&GlobalSMBSeslock);
123 if (pCifsInode->clientCanCacheRead) {
124 /* we have the inode open somewhere else
125 no need to discard cache data */
126 goto psx_client_can_cache;
129 /* BB FIXME need to fix this check to move it earlier into posix_open
130 BB fIX following section BB FIXME */
132 /* if not oplocked, invalidate inode pages if mtime or file
134 /* temp = cifs_NTtimeToUnix(le64_to_cpu(buf->LastWriteTime));
135 if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
136 (file->f_path.dentry->d_inode->i_size ==
137 (loff_t)le64_to_cpu(buf->EndOfFile))) {
138 cFYI(1, "inode unchanged on server");
140 if (file->f_path.dentry->d_inode->i_mapping) {
141 rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
143 CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
145 cFYI(1, "invalidating remote inode since open detected it "
147 invalidate_remote_inode(file->f_path.dentry->d_inode);
150 psx_client_can_cache:
151 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
152 pCifsInode->clientCanCacheAll = true;
153 pCifsInode->clientCanCacheRead = true;
154 cFYI(1, "Exclusive Oplock granted on inode %p",
155 file->f_path.dentry->d_inode);
156 } else if ((oplock & 0xF) == OPLOCK_READ)
157 pCifsInode->clientCanCacheRead = true;
159 /* will have to change the unlock if we reenable the
160 filemap_fdatawrite (which does not seem necessary */
161 write_unlock(&GlobalSMBSeslock);
165 static struct cifsFileInfo *
166 cifs_fill_filedata(struct file *file)
168 struct list_head *tmp;
169 struct cifsFileInfo *pCifsFile = NULL;
170 struct cifsInodeInfo *pCifsInode = NULL;
172 /* search inode for this file and fill in file->private_data */
173 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
174 read_lock(&GlobalSMBSeslock);
175 list_for_each(tmp, &pCifsInode->openFileList) {
176 pCifsFile = list_entry(tmp, struct cifsFileInfo, flist);
177 if ((pCifsFile->pfile == NULL) &&
178 (pCifsFile->pid == current->tgid)) {
179 /* mode set in cifs_create */
181 /* needed for writepage */
182 pCifsFile->pfile = file;
183 file->private_data = pCifsFile;
187 read_unlock(&GlobalSMBSeslock);
189 if (file->private_data != NULL) {
191 } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL))
192 cERROR(1, "could not find file instance for "
193 "new file %p", file);
197 /* all arguments to this function must be checked for validity in caller */
198 static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
199 struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile,
200 struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf,
201 char *full_path, int xid)
203 struct timespec temp;
206 if (pCifsInode->clientCanCacheRead) {
207 /* we have the inode open somewhere else
208 no need to discard cache data */
209 goto client_can_cache;
212 /* BB need same check in cifs_create too? */
213 /* if not oplocked, invalidate inode pages if mtime or file
215 temp = cifs_NTtimeToUnix(buf->LastWriteTime);
216 if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
217 (file->f_path.dentry->d_inode->i_size ==
218 (loff_t)le64_to_cpu(buf->EndOfFile))) {
219 cFYI(1, "inode unchanged on server");
221 if (file->f_path.dentry->d_inode->i_mapping) {
222 /* BB no need to lock inode until after invalidate
223 since namei code should already have it locked? */
224 rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
226 CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
228 cFYI(1, "invalidating remote inode since open detected it "
230 invalidate_remote_inode(file->f_path.dentry->d_inode);
235 rc = cifs_get_inode_info_unix(&file->f_path.dentry->d_inode,
236 full_path, inode->i_sb, xid);
238 rc = cifs_get_inode_info(&file->f_path.dentry->d_inode,
239 full_path, buf, inode->i_sb, xid, NULL);
241 if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) {
242 pCifsInode->clientCanCacheAll = true;
243 pCifsInode->clientCanCacheRead = true;
244 cFYI(1, "Exclusive Oplock granted on inode %p",
245 file->f_path.dentry->d_inode);
246 } else if ((*oplock & 0xF) == OPLOCK_READ)
247 pCifsInode->clientCanCacheRead = true;
252 int cifs_open(struct inode *inode, struct file *file)
257 struct cifs_sb_info *cifs_sb;
258 struct cifsTconInfo *tcon;
259 struct cifsFileInfo *pCifsFile;
260 struct cifsInodeInfo *pCifsInode;
261 char *full_path = NULL;
265 FILE_ALL_INFO *buf = NULL;
269 cifs_sb = CIFS_SB(inode->i_sb);
270 tcon = cifs_sb->tcon;
272 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
273 pCifsFile = cifs_fill_filedata(file);
280 full_path = build_path_from_dentry(file->f_path.dentry);
281 if (full_path == NULL) {
287 cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
288 inode, file->f_flags, full_path);
295 if (!tcon->broken_posix_open && tcon->unix_ext &&
296 (tcon->ses->capabilities & CAP_UNIX) &&
297 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
298 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
299 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
300 oflags |= SMB_O_CREAT;
301 /* can not refresh inode info since size could be stale */
302 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
303 cifs_sb->mnt_file_mode /* ignored */,
304 oflags, &oplock, &netfid, xid);
306 cFYI(1, "posix open succeeded");
307 /* no need for special case handling of setting mode
308 on read only files needed here */
310 pCifsFile = cifs_new_fileinfo(inode, netfid, file,
313 if (pCifsFile == NULL) {
314 CIFSSMBClose(xid, tcon, netfid);
318 file->private_data = pCifsFile;
320 cifs_posix_open_inode_helper(inode, file, pCifsInode,
323 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
324 if (tcon->ses->serverNOS)
325 cERROR(1, "server %s of type %s returned"
326 " unexpected error on SMB posix open"
327 ", disabling posix open support."
328 " Check if server update available.",
329 tcon->ses->serverName,
330 tcon->ses->serverNOS);
331 tcon->broken_posix_open = true;
332 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
333 (rc != -EOPNOTSUPP)) /* path not found or net err */
335 /* else fallthrough to retry open the old way on network i/o
339 desiredAccess = cifs_convert_flags(file->f_flags);
341 /*********************************************************************
342 * open flag mapping table:
344 * POSIX Flag CIFS Disposition
345 * ---------- ----------------
346 * O_CREAT FILE_OPEN_IF
347 * O_CREAT | O_EXCL FILE_CREATE
348 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
349 * O_TRUNC FILE_OVERWRITE
350 * none of the above FILE_OPEN
352 * Note that there is not a direct match between disposition
353 * FILE_SUPERSEDE (ie create whether or not file exists although
354 * O_CREAT | O_TRUNC is similar but truncates the existing
355 * file rather than creating a new file as FILE_SUPERSEDE does
356 * (which uses the attributes / metadata passed in on open call)
358 *? O_SYNC is a reasonable match to CIFS writethrough flag
359 *? and the read write flags match reasonably. O_LARGEFILE
360 *? is irrelevant because largefile support is always used
361 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
362 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
363 *********************************************************************/
365 disposition = cifs_get_disposition(file->f_flags);
367 /* BB pass O_SYNC flag through on file attributes .. BB */
369 /* Also refresh inode by passing in file_info buf returned by SMBOpen
370 and calling get_inode_info with returned buf (at least helps
371 non-Unix server case) */
373 /* BB we can not do this if this is the second open of a file
374 and the first handle has writebehind data, we might be
375 able to simply do a filemap_fdatawrite/filemap_fdatawait first */
376 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
382 if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS)
383 rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
384 desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
385 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
386 & CIFS_MOUNT_MAP_SPECIAL_CHR);
388 rc = -EIO; /* no NT SMB support fall into legacy open below */
391 /* Old server, try legacy style OpenX */
392 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
393 desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
394 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
395 & CIFS_MOUNT_MAP_SPECIAL_CHR);
398 cFYI(1, "cifs_open returned 0x%x", rc);
402 pCifsFile = cifs_new_fileinfo(inode, netfid, file, file->f_path.mnt,
404 file->private_data = pCifsFile;
405 if (file->private_data == NULL) {
410 rc = cifs_open_inode_helper(inode, file, pCifsInode, pCifsFile, tcon,
411 &oplock, buf, full_path, xid);
413 if (oplock & CIFS_CREATE_ACTION) {
414 /* time to set mode which we can not set earlier due to
415 problems creating new read-only files */
416 if (tcon->unix_ext) {
417 struct cifs_unix_set_info_args args = {
418 .mode = inode->i_mode,
421 .ctime = NO_CHANGE_64,
422 .atime = NO_CHANGE_64,
423 .mtime = NO_CHANGE_64,
426 CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args,
428 cifs_sb->mnt_cifs_flags &
429 CIFS_MOUNT_MAP_SPECIAL_CHR);
440 /* Try to reacquire byte range locks that were released when session */
441 /* to server was lost */
442 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
446 /* BB list all locks open on this file and relock */
451 static int cifs_reopen_file(struct file *file, bool can_flush)
456 struct cifs_sb_info *cifs_sb;
457 struct cifsTconInfo *tcon;
458 struct cifsFileInfo *pCifsFile;
459 struct cifsInodeInfo *pCifsInode;
461 char *full_path = NULL;
463 int disposition = FILE_OPEN;
466 if (file->private_data)
467 pCifsFile = (struct cifsFileInfo *)file->private_data;
472 mutex_lock(&pCifsFile->fh_mutex);
473 if (!pCifsFile->invalidHandle) {
474 mutex_unlock(&pCifsFile->fh_mutex);
480 if (file->f_path.dentry == NULL) {
481 cERROR(1, "no valid name if dentry freed");
484 goto reopen_error_exit;
487 inode = file->f_path.dentry->d_inode;
489 cERROR(1, "inode not valid");
492 goto reopen_error_exit;
495 cifs_sb = CIFS_SB(inode->i_sb);
496 tcon = cifs_sb->tcon;
498 /* can not grab rename sem here because various ops, including
499 those that already have the rename sem can end up causing writepage
500 to get called and if the server was down that means we end up here,
501 and we can never tell if the caller already has the rename_sem */
502 full_path = build_path_from_dentry(file->f_path.dentry);
503 if (full_path == NULL) {
506 mutex_unlock(&pCifsFile->fh_mutex);
511 cFYI(1, "inode = 0x%p file flags 0x%x for %s",
512 inode, file->f_flags, full_path);
519 if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
520 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
521 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
522 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
523 /* can not refresh inode info since size could be stale */
524 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
525 cifs_sb->mnt_file_mode /* ignored */,
526 oflags, &oplock, &netfid, xid);
528 cFYI(1, "posix reopen succeeded");
531 /* fallthrough to retry open the old way on errors, especially
532 in the reconnect path it is important to retry hard */
535 desiredAccess = cifs_convert_flags(file->f_flags);
537 /* Can not refresh inode by passing in file_info buf to be returned
538 by SMBOpen and then calling get_inode_info with returned buf
539 since file might have write behind data that needs to be flushed
540 and server version of file size can be stale. If we knew for sure
541 that inode was not dirty locally we could do this */
543 rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess,
544 CREATE_NOT_DIR, &netfid, &oplock, NULL,
545 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
546 CIFS_MOUNT_MAP_SPECIAL_CHR);
548 mutex_unlock(&pCifsFile->fh_mutex);
549 cFYI(1, "cifs_open returned 0x%x", rc);
550 cFYI(1, "oplock: %d", oplock);
553 pCifsFile->netfid = netfid;
554 pCifsFile->invalidHandle = false;
555 mutex_unlock(&pCifsFile->fh_mutex);
556 pCifsInode = CIFS_I(inode);
559 rc = filemap_write_and_wait(inode->i_mapping);
561 CIFS_I(inode)->write_behind_rc = rc;
562 /* temporarily disable caching while we
563 go to server to get inode info */
564 pCifsInode->clientCanCacheAll = false;
565 pCifsInode->clientCanCacheRead = false;
567 rc = cifs_get_inode_info_unix(&inode,
568 full_path, inode->i_sb, xid);
570 rc = cifs_get_inode_info(&inode,
571 full_path, NULL, inode->i_sb,
573 } /* else we are writing out data to server already
574 and could deadlock if we tried to flush data, and
575 since we do not know if we have data that would
576 invalidate the current end of file on the server
577 we can not go to the server to get the new inod
579 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
580 pCifsInode->clientCanCacheAll = true;
581 pCifsInode->clientCanCacheRead = true;
582 cFYI(1, "Exclusive Oplock granted on inode %p",
583 file->f_path.dentry->d_inode);
584 } else if ((oplock & 0xF) == OPLOCK_READ) {
585 pCifsInode->clientCanCacheRead = true;
586 pCifsInode->clientCanCacheAll = false;
588 pCifsInode->clientCanCacheRead = false;
589 pCifsInode->clientCanCacheAll = false;
591 cifs_relock_file(pCifsFile);
599 int cifs_close(struct inode *inode, struct file *file)
603 struct cifs_sb_info *cifs_sb;
604 struct cifsTconInfo *pTcon;
605 struct cifsFileInfo *pSMBFile =
606 (struct cifsFileInfo *)file->private_data;
610 cifs_sb = CIFS_SB(inode->i_sb);
611 pTcon = cifs_sb->tcon;
613 struct cifsLockInfo *li, *tmp;
614 write_lock(&GlobalSMBSeslock);
615 pSMBFile->closePend = true;
617 /* no sense reconnecting to close a file that is
619 if (!pTcon->need_reconnect) {
620 write_unlock(&GlobalSMBSeslock);
622 while ((atomic_read(&pSMBFile->count) != 1)
623 && (timeout <= 2048)) {
624 /* Give write a better chance to get to
625 server ahead of the close. We do not
626 want to add a wait_q here as it would
627 increase the memory utilization as
628 the struct would be in each open file,
629 but this should give enough time to
631 cFYI(DBG2, "close delay, write pending");
635 if (!pTcon->need_reconnect &&
636 !pSMBFile->invalidHandle)
637 rc = CIFSSMBClose(xid, pTcon,
640 write_unlock(&GlobalSMBSeslock);
642 write_unlock(&GlobalSMBSeslock);
644 /* Delete any outstanding lock records.
645 We'll lose them when the file is closed anyway. */
646 mutex_lock(&pSMBFile->lock_mutex);
647 list_for_each_entry_safe(li, tmp, &pSMBFile->llist, llist) {
648 list_del(&li->llist);
651 mutex_unlock(&pSMBFile->lock_mutex);
653 write_lock(&GlobalSMBSeslock);
654 list_del(&pSMBFile->flist);
655 list_del(&pSMBFile->tlist);
656 write_unlock(&GlobalSMBSeslock);
657 cifsFileInfo_put(file->private_data);
658 file->private_data = NULL;
662 read_lock(&GlobalSMBSeslock);
663 if (list_empty(&(CIFS_I(inode)->openFileList))) {
664 cFYI(1, "closing last open instance for inode %p", inode);
665 /* if the file is not open we do not know if we can cache info
666 on this inode, much less write behind and read ahead */
667 CIFS_I(inode)->clientCanCacheRead = false;
668 CIFS_I(inode)->clientCanCacheAll = false;
670 read_unlock(&GlobalSMBSeslock);
671 if ((rc == 0) && CIFS_I(inode)->write_behind_rc)
672 rc = CIFS_I(inode)->write_behind_rc;
677 int cifs_closedir(struct inode *inode, struct file *file)
681 struct cifsFileInfo *pCFileStruct =
682 (struct cifsFileInfo *)file->private_data;
685 cFYI(1, "Closedir inode = 0x%p", inode);
690 struct cifsTconInfo *pTcon;
691 struct cifs_sb_info *cifs_sb =
692 CIFS_SB(file->f_path.dentry->d_sb);
694 pTcon = cifs_sb->tcon;
696 cFYI(1, "Freeing private data in close dir");
697 write_lock(&GlobalSMBSeslock);
698 if (!pCFileStruct->srch_inf.endOfSearch &&
699 !pCFileStruct->invalidHandle) {
700 pCFileStruct->invalidHandle = true;
701 write_unlock(&GlobalSMBSeslock);
702 rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
703 cFYI(1, "Closing uncompleted readdir with rc %d",
705 /* not much we can do if it fails anyway, ignore rc */
708 write_unlock(&GlobalSMBSeslock);
709 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
711 cFYI(1, "closedir free smb buf in srch struct");
712 pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
713 if (pCFileStruct->srch_inf.smallBuf)
714 cifs_small_buf_release(ptmp);
716 cifs_buf_release(ptmp);
718 kfree(file->private_data);
719 file->private_data = NULL;
721 /* BB can we lock the filestruct while this is going on? */
726 static int store_file_lock(struct cifsFileInfo *fid, __u64 len,
727 __u64 offset, __u8 lockType)
729 struct cifsLockInfo *li =
730 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
736 mutex_lock(&fid->lock_mutex);
737 list_add(&li->llist, &fid->llist);
738 mutex_unlock(&fid->lock_mutex);
742 int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
748 bool wait_flag = false;
749 struct cifs_sb_info *cifs_sb;
750 struct cifsTconInfo *tcon;
752 __u8 lockType = LOCKING_ANDX_LARGE_FILES;
753 bool posix_locking = 0;
755 length = 1 + pfLock->fl_end - pfLock->fl_start;
759 cFYI(1, "Lock parm: 0x%x flockflags: "
760 "0x%x flocktype: 0x%x start: %lld end: %lld",
761 cmd, pfLock->fl_flags, pfLock->fl_type, pfLock->fl_start,
764 if (pfLock->fl_flags & FL_POSIX)
766 if (pfLock->fl_flags & FL_FLOCK)
768 if (pfLock->fl_flags & FL_SLEEP) {
769 cFYI(1, "Blocking lock");
772 if (pfLock->fl_flags & FL_ACCESS)
773 cFYI(1, "Process suspended by mandatory locking - "
774 "not implemented yet");
775 if (pfLock->fl_flags & FL_LEASE)
776 cFYI(1, "Lease on file - not implemented yet");
777 if (pfLock->fl_flags &
778 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
779 cFYI(1, "Unknown lock flags 0x%x", pfLock->fl_flags);
781 if (pfLock->fl_type == F_WRLCK) {
784 } else if (pfLock->fl_type == F_UNLCK) {
787 /* Check if unlock includes more than
789 } else if (pfLock->fl_type == F_RDLCK) {
791 lockType |= LOCKING_ANDX_SHARED_LOCK;
793 } else if (pfLock->fl_type == F_EXLCK) {
796 } else if (pfLock->fl_type == F_SHLCK) {
798 lockType |= LOCKING_ANDX_SHARED_LOCK;
801 cFYI(1, "Unknown type of lock");
803 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
804 tcon = cifs_sb->tcon;
806 if (file->private_data == NULL) {
811 netfid = ((struct cifsFileInfo *)file->private_data)->netfid;
813 if ((tcon->ses->capabilities & CAP_UNIX) &&
814 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
815 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
817 /* BB add code here to normalize offset and length to
818 account for negative length which we can not accept over the
823 if (lockType & LOCKING_ANDX_SHARED_LOCK)
824 posix_lock_type = CIFS_RDLCK;
826 posix_lock_type = CIFS_WRLCK;
827 rc = CIFSSMBPosixLock(xid, tcon, netfid, 1 /* get */,
829 posix_lock_type, wait_flag);
834 /* BB we could chain these into one lock request BB */
835 rc = CIFSSMBLock(xid, tcon, netfid, length, pfLock->fl_start,
836 0, 1, lockType, 0 /* wait flag */ );
838 rc = CIFSSMBLock(xid, tcon, netfid, length,
839 pfLock->fl_start, 1 /* numUnlock */ ,
840 0 /* numLock */ , lockType,
842 pfLock->fl_type = F_UNLCK;
844 cERROR(1, "Error unlocking previously locked "
845 "range %d during test of lock", rc);
849 /* if rc == ERR_SHARING_VIOLATION ? */
852 if (lockType & LOCKING_ANDX_SHARED_LOCK) {
853 pfLock->fl_type = F_WRLCK;
855 rc = CIFSSMBLock(xid, tcon, netfid, length,
856 pfLock->fl_start, 0, 1,
857 lockType | LOCKING_ANDX_SHARED_LOCK,
860 rc = CIFSSMBLock(xid, tcon, netfid,
861 length, pfLock->fl_start, 1, 0,
863 LOCKING_ANDX_SHARED_LOCK,
865 pfLock->fl_type = F_RDLCK;
867 cERROR(1, "Error unlocking "
868 "previously locked range %d "
869 "during test of lock", rc);
872 pfLock->fl_type = F_WRLCK;
882 if (!numLock && !numUnlock) {
883 /* if no lock or unlock then nothing
884 to do since we do not know what it is */
891 if (lockType & LOCKING_ANDX_SHARED_LOCK)
892 posix_lock_type = CIFS_RDLCK;
894 posix_lock_type = CIFS_WRLCK;
897 posix_lock_type = CIFS_UNLCK;
899 rc = CIFSSMBPosixLock(xid, tcon, netfid, 0 /* set */,
901 posix_lock_type, wait_flag);
903 struct cifsFileInfo *fid =
904 (struct cifsFileInfo *)file->private_data;
907 rc = CIFSSMBLock(xid, tcon, netfid, length,
909 0, numLock, lockType, wait_flag);
912 /* For Windows locks we must store them. */
913 rc = store_file_lock(fid, length,
914 pfLock->fl_start, lockType);
916 } else if (numUnlock) {
917 /* For each stored lock that this unlock overlaps
918 completely, unlock it. */
920 struct cifsLockInfo *li, *tmp;
923 mutex_lock(&fid->lock_mutex);
924 list_for_each_entry_safe(li, tmp, &fid->llist, llist) {
925 if (pfLock->fl_start <= li->offset &&
926 (pfLock->fl_start + length) >=
927 (li->offset + li->length)) {
928 stored_rc = CIFSSMBLock(xid, tcon,
930 li->length, li->offset,
931 1, 0, li->type, false);
935 list_del(&li->llist);
940 mutex_unlock(&fid->lock_mutex);
944 if (pfLock->fl_flags & FL_POSIX)
945 posix_lock_file_wait(file, pfLock);
951 * Set the timeout on write requests past EOF. For some servers (Windows)
952 * these calls can be very long.
954 * If we're writing >10M past the EOF we give a 180s timeout. Anything less
955 * than that gets a 45s timeout. Writes not past EOF get 15s timeouts.
956 * The 10M cutoff is totally arbitrary. A better scheme for this would be
957 * welcome if someone wants to suggest one.
959 * We may be able to do a better job with this if there were some way to
960 * declare that a file should be sparse.
963 cifs_write_timeout(struct cifsInodeInfo *cifsi, loff_t offset)
965 if (offset <= cifsi->server_eof)
967 else if (offset > (cifsi->server_eof + (10 * 1024 * 1024)))
968 return CIFS_VLONG_OP;
973 /* update the file size (if needed) after a write */
975 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
976 unsigned int bytes_written)
978 loff_t end_of_write = offset + bytes_written;
980 if (end_of_write > cifsi->server_eof)
981 cifsi->server_eof = end_of_write;
984 ssize_t cifs_user_write(struct file *file, const char __user *write_data,
985 size_t write_size, loff_t *poffset)
988 unsigned int bytes_written = 0;
989 unsigned int total_written;
990 struct cifs_sb_info *cifs_sb;
991 struct cifsTconInfo *pTcon;
993 struct cifsFileInfo *open_file;
994 struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
996 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
998 pTcon = cifs_sb->tcon;
1000 /* cFYI(1, " write %d bytes to offset %lld of %s", write_size,
1001 *poffset, file->f_path.dentry->d_name.name); */
1003 if (file->private_data == NULL)
1005 open_file = (struct cifsFileInfo *) file->private_data;
1007 rc = generic_write_checks(file, poffset, &write_size, 0);
1013 long_op = cifs_write_timeout(cifsi, *poffset);
1014 for (total_written = 0; write_size > total_written;
1015 total_written += bytes_written) {
1017 while (rc == -EAGAIN) {
1018 if (file->private_data == NULL) {
1019 /* file has been closed on us */
1021 /* if we have gotten here we have written some data
1022 and blocked, and the file has been freed on us while
1023 we blocked so return what we managed to write */
1024 return total_written;
1026 if (open_file->closePend) {
1029 return total_written;
1033 if (open_file->invalidHandle) {
1034 /* we could deadlock if we called
1035 filemap_fdatawait from here so tell
1036 reopen_file not to flush data to server
1038 rc = cifs_reopen_file(file, false);
1043 rc = CIFSSMBWrite(xid, pTcon,
1045 min_t(const int, cifs_sb->wsize,
1046 write_size - total_written),
1047 *poffset, &bytes_written,
1048 NULL, write_data + total_written, long_op);
1050 if (rc || (bytes_written == 0)) {
1058 cifs_update_eof(cifsi, *poffset, bytes_written);
1059 *poffset += bytes_written;
1061 long_op = CIFS_STD_OP; /* subsequent writes fast -
1062 15 seconds is plenty */
1065 cifs_stats_bytes_written(pTcon, total_written);
1067 /* since the write may have blocked check these pointers again */
1068 if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) {
1069 struct inode *inode = file->f_path.dentry->d_inode;
1070 /* Do not update local mtime - server will set its actual value on write
1071 * inode->i_ctime = inode->i_mtime =
1072 * current_fs_time(inode->i_sb);*/
1073 if (total_written > 0) {
1074 spin_lock(&inode->i_lock);
1075 if (*poffset > file->f_path.dentry->d_inode->i_size)
1076 i_size_write(file->f_path.dentry->d_inode,
1078 spin_unlock(&inode->i_lock);
1080 mark_inode_dirty_sync(file->f_path.dentry->d_inode);
1083 return total_written;
1086 static ssize_t cifs_write(struct file *file, const char *write_data,
1087 size_t write_size, loff_t *poffset)
1090 unsigned int bytes_written = 0;
1091 unsigned int total_written;
1092 struct cifs_sb_info *cifs_sb;
1093 struct cifsTconInfo *pTcon;
1095 struct cifsFileInfo *open_file;
1096 struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
1098 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1100 pTcon = cifs_sb->tcon;
1102 cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1103 *poffset, file->f_path.dentry->d_name.name);
1105 if (file->private_data == NULL)
1107 open_file = (struct cifsFileInfo *)file->private_data;
1111 long_op = cifs_write_timeout(cifsi, *poffset);
1112 for (total_written = 0; write_size > total_written;
1113 total_written += bytes_written) {
1115 while (rc == -EAGAIN) {
1116 if (file->private_data == NULL) {
1117 /* file has been closed on us */
1119 /* if we have gotten here we have written some data
1120 and blocked, and the file has been freed on us
1121 while we blocked so return what we managed to
1123 return total_written;
1125 if (open_file->closePend) {
1128 return total_written;
1132 if (open_file->invalidHandle) {
1133 /* we could deadlock if we called
1134 filemap_fdatawait from here so tell
1135 reopen_file not to flush data to
1137 rc = cifs_reopen_file(file, false);
1141 if (experimEnabled || (pTcon->ses->server &&
1142 ((pTcon->ses->server->secMode &
1143 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
1148 len = min((size_t)cifs_sb->wsize,
1149 write_size - total_written);
1150 /* iov[0] is reserved for smb header */
1151 iov[1].iov_base = (char *)write_data +
1153 iov[1].iov_len = len;
1154 rc = CIFSSMBWrite2(xid, pTcon,
1155 open_file->netfid, len,
1156 *poffset, &bytes_written,
1159 rc = CIFSSMBWrite(xid, pTcon,
1161 min_t(const int, cifs_sb->wsize,
1162 write_size - total_written),
1163 *poffset, &bytes_written,
1164 write_data + total_written,
1167 if (rc || (bytes_written == 0)) {
1175 cifs_update_eof(cifsi, *poffset, bytes_written);
1176 *poffset += bytes_written;
1178 long_op = CIFS_STD_OP; /* subsequent writes fast -
1179 15 seconds is plenty */
1182 cifs_stats_bytes_written(pTcon, total_written);
1184 /* since the write may have blocked check these pointers again */
1185 if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) {
1186 /*BB We could make this contingent on superblock ATIME flag too */
1187 /* file->f_path.dentry->d_inode->i_ctime =
1188 file->f_path.dentry->d_inode->i_mtime = CURRENT_TIME;*/
1189 if (total_written > 0) {
1190 spin_lock(&file->f_path.dentry->d_inode->i_lock);
1191 if (*poffset > file->f_path.dentry->d_inode->i_size)
1192 i_size_write(file->f_path.dentry->d_inode,
1194 spin_unlock(&file->f_path.dentry->d_inode->i_lock);
1196 mark_inode_dirty_sync(file->f_path.dentry->d_inode);
1199 return total_written;
1202 #ifdef CONFIG_CIFS_EXPERIMENTAL
1203 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode)
1205 struct cifsFileInfo *open_file = NULL;
1207 read_lock(&GlobalSMBSeslock);
1208 /* we could simply get the first_list_entry since write-only entries
1209 are always at the end of the list but since the first entry might
1210 have a close pending, we go through the whole list */
1211 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1212 if (open_file->closePend)
1214 if (open_file->pfile && ((open_file->pfile->f_flags & O_RDWR) ||
1215 (open_file->pfile->f_flags & O_RDONLY))) {
1216 if (!open_file->invalidHandle) {
1217 /* found a good file */
1218 /* lock it so it will not be closed on us */
1219 cifsFileInfo_get(open_file);
1220 read_unlock(&GlobalSMBSeslock);
1222 } /* else might as well continue, and look for
1223 another, or simply have the caller reopen it
1224 again rather than trying to fix this handle */
1225 } else /* write only file */
1226 break; /* write only files are last so must be done */
1228 read_unlock(&GlobalSMBSeslock);
1233 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode)
1235 struct cifsFileInfo *open_file;
1236 bool any_available = false;
1239 /* Having a null inode here (because mapping->host was set to zero by
1240 the VFS or MM) should not happen but we had reports of on oops (due to
1241 it being zero) during stress testcases so we need to check for it */
1243 if (cifs_inode == NULL) {
1244 cERROR(1, "Null inode passed to cifs_writeable_file");
1249 read_lock(&GlobalSMBSeslock);
1251 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1252 if (open_file->closePend ||
1253 (!any_available && open_file->pid != current->tgid))
1256 if (open_file->pfile &&
1257 ((open_file->pfile->f_flags & O_RDWR) ||
1258 (open_file->pfile->f_flags & O_WRONLY))) {
1259 cifsFileInfo_get(open_file);
1261 if (!open_file->invalidHandle) {
1262 /* found a good writable file */
1263 read_unlock(&GlobalSMBSeslock);
1267 read_unlock(&GlobalSMBSeslock);
1268 /* Had to unlock since following call can block */
1269 rc = cifs_reopen_file(open_file->pfile, false);
1271 if (!open_file->closePend)
1273 else { /* start over in case this was deleted */
1274 /* since the list could be modified */
1275 read_lock(&GlobalSMBSeslock);
1276 cifsFileInfo_put(open_file);
1277 goto refind_writable;
1281 /* if it fails, try another handle if possible -
1282 (we can not do this if closePending since
1283 loop could be modified - in which case we
1284 have to start at the beginning of the list
1285 again. Note that it would be bad
1286 to hold up writepages here (rather than
1287 in caller) with continuous retries */
1288 cFYI(1, "wp failed on reopen file");
1289 read_lock(&GlobalSMBSeslock);
1290 /* can not use this handle, no write
1291 pending on this one after all */
1292 cifsFileInfo_put(open_file);
1294 if (open_file->closePend) /* list could have changed */
1295 goto refind_writable;
1296 /* else we simply continue to the next entry. Thus
1297 we do not loop on reopen errors. If we
1298 can not reopen the file, for example if we
1299 reconnected to a server with another client
1300 racing to delete or lock the file we would not
1301 make progress if we restarted before the beginning
1302 of the loop here. */
1305 /* couldn't find useable FH with same pid, try any available */
1306 if (!any_available) {
1307 any_available = true;
1308 goto refind_writable;
1310 read_unlock(&GlobalSMBSeslock);
1314 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1316 struct address_space *mapping = page->mapping;
1317 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1320 int bytes_written = 0;
1321 struct cifs_sb_info *cifs_sb;
1322 struct cifsTconInfo *pTcon;
1323 struct inode *inode;
1324 struct cifsFileInfo *open_file;
1326 if (!mapping || !mapping->host)
1329 inode = page->mapping->host;
1330 cifs_sb = CIFS_SB(inode->i_sb);
1331 pTcon = cifs_sb->tcon;
1333 offset += (loff_t)from;
1334 write_data = kmap(page);
1337 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1342 /* racing with truncate? */
1343 if (offset > mapping->host->i_size) {
1345 return 0; /* don't care */
1348 /* check to make sure that we are not extending the file */
1349 if (mapping->host->i_size - offset < (loff_t)to)
1350 to = (unsigned)(mapping->host->i_size - offset);
1352 open_file = find_writable_file(CIFS_I(mapping->host));
1354 bytes_written = cifs_write(open_file->pfile, write_data,
1356 cifsFileInfo_put(open_file);
1357 /* Does mm or vfs already set times? */
1358 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1359 if ((bytes_written > 0) && (offset))
1361 else if (bytes_written < 0)
1364 cFYI(1, "No writeable filehandles for inode");
1372 static int cifs_writepages(struct address_space *mapping,
1373 struct writeback_control *wbc)
1375 struct backing_dev_info *bdi = mapping->backing_dev_info;
1376 unsigned int bytes_to_write;
1377 unsigned int bytes_written;
1378 struct cifs_sb_info *cifs_sb;
1382 int range_whole = 0;
1389 struct cifsFileInfo *open_file;
1390 struct cifsInodeInfo *cifsi = CIFS_I(mapping->host);
1392 struct pagevec pvec;
1397 cifs_sb = CIFS_SB(mapping->host->i_sb);
1400 * If wsize is smaller that the page cache size, default to writing
1401 * one page at a time via cifs_writepage
1403 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1404 return generic_writepages(mapping, wbc);
1406 if ((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server))
1407 if (cifs_sb->tcon->ses->server->secMode &
1408 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
1409 if (!experimEnabled)
1410 return generic_writepages(mapping, wbc);
1412 iov = kmalloc(32 * sizeof(struct kvec), GFP_KERNEL);
1414 return generic_writepages(mapping, wbc);
1418 * BB: Is this meaningful for a non-block-device file system?
1419 * If it is, we should test it again after we do I/O
1421 if (wbc->nonblocking && bdi_write_congested(bdi)) {
1422 wbc->encountered_congestion = 1;
1429 pagevec_init(&pvec, 0);
1430 if (wbc->range_cyclic) {
1431 index = mapping->writeback_index; /* Start from prev offset */
1434 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1435 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1436 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1441 while (!done && (index <= end) &&
1442 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
1443 PAGECACHE_TAG_DIRTY,
1444 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1))) {
1453 for (i = 0; i < nr_pages; i++) {
1454 page = pvec.pages[i];
1456 * At this point we hold neither mapping->tree_lock nor
1457 * lock on the page itself: the page may be truncated or
1458 * invalidated (changing page->mapping to NULL), or even
1459 * swizzled back from swapper_space to tmpfs file
1465 else if (!trylock_page(page))
1468 if (unlikely(page->mapping != mapping)) {
1473 if (!wbc->range_cyclic && page->index > end) {
1479 if (next && (page->index != next)) {
1480 /* Not next consecutive page */
1485 if (wbc->sync_mode != WB_SYNC_NONE)
1486 wait_on_page_writeback(page);
1488 if (PageWriteback(page) ||
1489 !clear_page_dirty_for_io(page)) {
1495 * This actually clears the dirty bit in the radix tree.
1496 * See cifs_writepage() for more commentary.
1498 set_page_writeback(page);
1500 if (page_offset(page) >= mapping->host->i_size) {
1503 end_page_writeback(page);
1508 * BB can we get rid of this? pages are held by pvec
1510 page_cache_get(page);
1512 len = min(mapping->host->i_size - page_offset(page),
1513 (loff_t)PAGE_CACHE_SIZE);
1515 /* reserve iov[0] for the smb header */
1517 iov[n_iov].iov_base = kmap(page);
1518 iov[n_iov].iov_len = len;
1519 bytes_to_write += len;
1523 offset = page_offset(page);
1525 next = page->index + 1;
1526 if (bytes_to_write + PAGE_CACHE_SIZE > cifs_sb->wsize)
1530 /* Search for a writable handle every time we call
1531 * CIFSSMBWrite2. We can't rely on the last handle
1532 * we used to still be valid
1534 open_file = find_writable_file(CIFS_I(mapping->host));
1536 cERROR(1, "No writable handles for inode");
1539 long_op = cifs_write_timeout(cifsi, offset);
1540 rc = CIFSSMBWrite2(xid, cifs_sb->tcon,
1542 bytes_to_write, offset,
1543 &bytes_written, iov, n_iov,
1545 cifsFileInfo_put(open_file);
1546 cifs_update_eof(cifsi, offset, bytes_written);
1548 if (rc || bytes_written < bytes_to_write) {
1549 cERROR(1, "Write2 ret %d, wrote %d",
1551 /* BB what if continued retry is
1552 requested via mount flags? */
1554 set_bit(AS_ENOSPC, &mapping->flags);
1556 set_bit(AS_EIO, &mapping->flags);
1558 cifs_stats_bytes_written(cifs_sb->tcon,
1562 for (i = 0; i < n_iov; i++) {
1563 page = pvec.pages[first + i];
1564 /* Should we also set page error on
1565 success rc but too little data written? */
1566 /* BB investigate retry logic on temporary
1567 server crash cases and how recovery works
1568 when page marked as error */
1573 end_page_writeback(page);
1574 page_cache_release(page);
1576 if ((wbc->nr_to_write -= n_iov) <= 0)
1580 /* Need to re-find the pages we skipped */
1581 index = pvec.pages[0]->index + 1;
1583 pagevec_release(&pvec);
1585 if (!scanned && !done) {
1587 * We hit the last page and there is more work to be done: wrap
1588 * back to the start of the file
1594 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1595 mapping->writeback_index = index;
1602 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1608 /* BB add check for wbc flags */
1609 page_cache_get(page);
1610 if (!PageUptodate(page))
1611 cFYI(1, "ppw - page not up to date");
1614 * Set the "writeback" flag, and clear "dirty" in the radix tree.
1616 * A writepage() implementation always needs to do either this,
1617 * or re-dirty the page with "redirty_page_for_writepage()" in
1618 * the case of a failure.
1620 * Just unlocking the page will cause the radix tree tag-bits
1621 * to fail to update with the state of the page correctly.
1623 set_page_writeback(page);
1624 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1625 SetPageUptodate(page); /* BB add check for error and Clearuptodate? */
1627 end_page_writeback(page);
1628 page_cache_release(page);
1633 static int cifs_write_end(struct file *file, struct address_space *mapping,
1634 loff_t pos, unsigned len, unsigned copied,
1635 struct page *page, void *fsdata)
1638 struct inode *inode = mapping->host;
1640 cFYI(1, "write_end for page %p from pos %lld with %d bytes",
1643 if (PageChecked(page)) {
1645 SetPageUptodate(page);
1646 ClearPageChecked(page);
1647 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
1648 SetPageUptodate(page);
1650 if (!PageUptodate(page)) {
1652 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
1656 /* this is probably better than directly calling
1657 partialpage_write since in this function the file handle is
1658 known which we might as well leverage */
1659 /* BB check if anything else missing out of ppw
1660 such as updating last write time */
1661 page_data = kmap(page);
1662 rc = cifs_write(file, page_data + offset, copied, &pos);
1663 /* if (rc < 0) should we set writebehind rc? */
1670 set_page_dirty(page);
1674 spin_lock(&inode->i_lock);
1675 if (pos > inode->i_size)
1676 i_size_write(inode, pos);
1677 spin_unlock(&inode->i_lock);
1681 page_cache_release(page);
1686 int cifs_fsync(struct file *file, int datasync)
1690 struct cifsTconInfo *tcon;
1691 struct cifsFileInfo *smbfile =
1692 (struct cifsFileInfo *)file->private_data;
1693 struct inode *inode = file->f_path.dentry->d_inode;
1697 cFYI(1, "Sync file - name: %s datasync: 0x%x",
1698 file->f_path.dentry->d_name.name, datasync);
1700 rc = filemap_write_and_wait(inode->i_mapping);
1702 rc = CIFS_I(inode)->write_behind_rc;
1703 CIFS_I(inode)->write_behind_rc = 0;
1704 tcon = CIFS_SB(inode->i_sb)->tcon;
1705 if (!rc && tcon && smbfile &&
1706 !(CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
1707 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
1714 /* static void cifs_sync_page(struct page *page)
1716 struct address_space *mapping;
1717 struct inode *inode;
1718 unsigned long index = page->index;
1719 unsigned int rpages = 0;
1722 cFYI(1, "sync page %p", page);
1723 mapping = page->mapping;
1726 inode = mapping->host;
1730 /* fill in rpages then
1731 result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */
1733 /* cFYI(1, "rpages is %d for sync page of Index %ld", rpages, index);
1743 * As file closes, flush all cached write data for this inode checking
1744 * for write behind errors.
1746 int cifs_flush(struct file *file, fl_owner_t id)
1748 struct inode *inode = file->f_path.dentry->d_inode;
1751 /* Rather than do the steps manually:
1752 lock the inode for writing
1753 loop through pages looking for write behind data (dirty pages)
1754 coalesce into contiguous 16K (or smaller) chunks to write to server
1755 send to server (prefer in parallel)
1756 deal with writebehind errors
1757 unlock inode for writing
1758 filemapfdatawrite appears easier for the time being */
1760 rc = filemap_fdatawrite(inode->i_mapping);
1761 /* reset wb rc if we were able to write out dirty pages */
1763 rc = CIFS_I(inode)->write_behind_rc;
1764 CIFS_I(inode)->write_behind_rc = 0;
1767 cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
1772 ssize_t cifs_user_read(struct file *file, char __user *read_data,
1773 size_t read_size, loff_t *poffset)
1776 unsigned int bytes_read = 0;
1777 unsigned int total_read = 0;
1778 unsigned int current_read_size;
1779 struct cifs_sb_info *cifs_sb;
1780 struct cifsTconInfo *pTcon;
1782 struct cifsFileInfo *open_file;
1783 char *smb_read_data;
1784 char __user *current_offset;
1785 struct smb_com_read_rsp *pSMBr;
1788 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1789 pTcon = cifs_sb->tcon;
1791 if (file->private_data == NULL) {
1796 open_file = (struct cifsFileInfo *)file->private_data;
1798 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1799 cFYI(1, "attempting read on write only file instance");
1801 for (total_read = 0, current_offset = read_data;
1802 read_size > total_read;
1803 total_read += bytes_read, current_offset += bytes_read) {
1804 current_read_size = min_t(const int, read_size - total_read,
1807 smb_read_data = NULL;
1808 while (rc == -EAGAIN) {
1809 int buf_type = CIFS_NO_BUFFER;
1810 if ((open_file->invalidHandle) &&
1811 (!open_file->closePend)) {
1812 rc = cifs_reopen_file(file, true);
1816 rc = CIFSSMBRead(xid, pTcon,
1818 current_read_size, *poffset,
1819 &bytes_read, &smb_read_data,
1821 pSMBr = (struct smb_com_read_rsp *)smb_read_data;
1822 if (smb_read_data) {
1823 if (copy_to_user(current_offset,
1825 4 /* RFC1001 length field */ +
1826 le16_to_cpu(pSMBr->DataOffset),
1830 if (buf_type == CIFS_SMALL_BUFFER)
1831 cifs_small_buf_release(smb_read_data);
1832 else if (buf_type == CIFS_LARGE_BUFFER)
1833 cifs_buf_release(smb_read_data);
1834 smb_read_data = NULL;
1837 if (rc || (bytes_read == 0)) {
1845 cifs_stats_bytes_read(pTcon, bytes_read);
1846 *poffset += bytes_read;
1854 static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1858 unsigned int bytes_read = 0;
1859 unsigned int total_read;
1860 unsigned int current_read_size;
1861 struct cifs_sb_info *cifs_sb;
1862 struct cifsTconInfo *pTcon;
1864 char *current_offset;
1865 struct cifsFileInfo *open_file;
1866 int buf_type = CIFS_NO_BUFFER;
1869 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1870 pTcon = cifs_sb->tcon;
1872 if (file->private_data == NULL) {
1877 open_file = (struct cifsFileInfo *)file->private_data;
1879 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1880 cFYI(1, "attempting read on write only file instance");
1882 for (total_read = 0, current_offset = read_data;
1883 read_size > total_read;
1884 total_read += bytes_read, current_offset += bytes_read) {
1885 current_read_size = min_t(const int, read_size - total_read,
1887 /* For windows me and 9x we do not want to request more
1888 than it negotiated since it will refuse the read then */
1890 !(pTcon->ses->capabilities & CAP_LARGE_FILES)) {
1891 current_read_size = min_t(const int, current_read_size,
1892 pTcon->ses->server->maxBuf - 128);
1895 while (rc == -EAGAIN) {
1896 if ((open_file->invalidHandle) &&
1897 (!open_file->closePend)) {
1898 rc = cifs_reopen_file(file, true);
1902 rc = CIFSSMBRead(xid, pTcon,
1904 current_read_size, *poffset,
1905 &bytes_read, ¤t_offset,
1908 if (rc || (bytes_read == 0)) {
1916 cifs_stats_bytes_read(pTcon, total_read);
1917 *poffset += bytes_read;
1924 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
1929 rc = cifs_revalidate_file(file);
1931 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
1935 rc = generic_file_mmap(file, vma);
1941 static void cifs_copy_cache_pages(struct address_space *mapping,
1942 struct list_head *pages, int bytes_read, char *data)
1947 while (bytes_read > 0) {
1948 if (list_empty(pages))
1951 page = list_entry(pages->prev, struct page, lru);
1952 list_del(&page->lru);
1954 if (add_to_page_cache_lru(page, mapping, page->index,
1956 page_cache_release(page);
1957 cFYI(1, "Add page cache failed");
1958 data += PAGE_CACHE_SIZE;
1959 bytes_read -= PAGE_CACHE_SIZE;
1962 page_cache_release(page);
1964 target = kmap_atomic(page, KM_USER0);
1966 if (PAGE_CACHE_SIZE > bytes_read) {
1967 memcpy(target, data, bytes_read);
1968 /* zero the tail end of this partial page */
1969 memset(target + bytes_read, 0,
1970 PAGE_CACHE_SIZE - bytes_read);
1973 memcpy(target, data, PAGE_CACHE_SIZE);
1974 bytes_read -= PAGE_CACHE_SIZE;
1976 kunmap_atomic(target, KM_USER0);
1978 flush_dcache_page(page);
1979 SetPageUptodate(page);
1981 data += PAGE_CACHE_SIZE;
1986 static int cifs_readpages(struct file *file, struct address_space *mapping,
1987 struct list_head *page_list, unsigned num_pages)
1993 struct cifs_sb_info *cifs_sb;
1994 struct cifsTconInfo *pTcon;
1995 unsigned int bytes_read = 0;
1996 unsigned int read_size, i;
1997 char *smb_read_data = NULL;
1998 struct smb_com_read_rsp *pSMBr;
1999 struct cifsFileInfo *open_file;
2000 int buf_type = CIFS_NO_BUFFER;
2003 if (file->private_data == NULL) {
2008 open_file = (struct cifsFileInfo *)file->private_data;
2009 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2010 pTcon = cifs_sb->tcon;
2012 cFYI(DBG2, "rpages: num pages %d", num_pages);
2013 for (i = 0; i < num_pages; ) {
2014 unsigned contig_pages;
2015 struct page *tmp_page;
2016 unsigned long expected_index;
2018 if (list_empty(page_list))
2021 page = list_entry(page_list->prev, struct page, lru);
2022 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
2024 /* count adjacent pages that we will read into */
2027 list_entry(page_list->prev, struct page, lru)->index;
2028 list_for_each_entry_reverse(tmp_page, page_list, lru) {
2029 if (tmp_page->index == expected_index) {
2035 if (contig_pages + i > num_pages)
2036 contig_pages = num_pages - i;
2038 /* for reads over a certain size could initiate async
2041 read_size = contig_pages * PAGE_CACHE_SIZE;
2042 /* Read size needs to be in multiples of one page */
2043 read_size = min_t(const unsigned int, read_size,
2044 cifs_sb->rsize & PAGE_CACHE_MASK);
2045 cFYI(DBG2, "rpages: read size 0x%x contiguous pages %d",
2046 read_size, contig_pages);
2048 while (rc == -EAGAIN) {
2049 if ((open_file->invalidHandle) &&
2050 (!open_file->closePend)) {
2051 rc = cifs_reopen_file(file, true);
2056 rc = CIFSSMBRead(xid, pTcon,
2059 &bytes_read, &smb_read_data,
2061 /* BB more RC checks ? */
2062 if (rc == -EAGAIN) {
2063 if (smb_read_data) {
2064 if (buf_type == CIFS_SMALL_BUFFER)
2065 cifs_small_buf_release(smb_read_data);
2066 else if (buf_type == CIFS_LARGE_BUFFER)
2067 cifs_buf_release(smb_read_data);
2068 smb_read_data = NULL;
2072 if ((rc < 0) || (smb_read_data == NULL)) {
2073 cFYI(1, "Read error in readpages: %d", rc);
2075 } else if (bytes_read > 0) {
2076 task_io_account_read(bytes_read);
2077 pSMBr = (struct smb_com_read_rsp *)smb_read_data;
2078 cifs_copy_cache_pages(mapping, page_list, bytes_read,
2079 smb_read_data + 4 /* RFC1001 hdr */ +
2080 le16_to_cpu(pSMBr->DataOffset));
2082 i += bytes_read >> PAGE_CACHE_SHIFT;
2083 cifs_stats_bytes_read(pTcon, bytes_read);
2084 if ((bytes_read & PAGE_CACHE_MASK) != bytes_read) {
2085 i++; /* account for partial page */
2087 /* server copy of file can have smaller size
2089 /* BB do we need to verify this common case ?
2090 this case is ok - if we are at server EOF
2091 we will hit it on next read */
2096 cFYI(1, "No bytes read (%d) at offset %lld . "
2097 "Cleaning remaining pages from readahead list",
2098 bytes_read, offset);
2099 /* BB turn off caching and do new lookup on
2100 file size at server? */
2103 if (smb_read_data) {
2104 if (buf_type == CIFS_SMALL_BUFFER)
2105 cifs_small_buf_release(smb_read_data);
2106 else if (buf_type == CIFS_LARGE_BUFFER)
2107 cifs_buf_release(smb_read_data);
2108 smb_read_data = NULL;
2113 /* need to free smb_read_data buf before exit */
2114 if (smb_read_data) {
2115 if (buf_type == CIFS_SMALL_BUFFER)
2116 cifs_small_buf_release(smb_read_data);
2117 else if (buf_type == CIFS_LARGE_BUFFER)
2118 cifs_buf_release(smb_read_data);
2119 smb_read_data = NULL;
2126 static int cifs_readpage_worker(struct file *file, struct page *page,
2132 page_cache_get(page);
2133 read_data = kmap(page);
2134 /* for reads over a certain size could initiate async read ahead */
2136 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
2141 cFYI(1, "Bytes read %d", rc);
2143 file->f_path.dentry->d_inode->i_atime =
2144 current_fs_time(file->f_path.dentry->d_inode->i_sb);
2146 if (PAGE_CACHE_SIZE > rc)
2147 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
2149 flush_dcache_page(page);
2150 SetPageUptodate(page);
2155 page_cache_release(page);
2159 static int cifs_readpage(struct file *file, struct page *page)
2161 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
2167 if (file->private_data == NULL) {
2173 cFYI(1, "readpage %p at offset %d 0x%x\n",
2174 page, (int)offset, (int)offset);
2176 rc = cifs_readpage_worker(file, page, &offset);
2184 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
2186 struct cifsFileInfo *open_file;
2188 read_lock(&GlobalSMBSeslock);
2189 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2190 if (open_file->closePend)
2192 if (open_file->pfile &&
2193 ((open_file->pfile->f_flags & O_RDWR) ||
2194 (open_file->pfile->f_flags & O_WRONLY))) {
2195 read_unlock(&GlobalSMBSeslock);
2199 read_unlock(&GlobalSMBSeslock);
2203 /* We do not want to update the file size from server for inodes
2204 open for write - to avoid races with writepage extending
2205 the file - in the future we could consider allowing
2206 refreshing the inode only on increases in the file size
2207 but this is tricky to do without racing with writebehind
2208 page caching in the current Linux kernel design */
2209 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
2214 if (is_inode_writable(cifsInode)) {
2215 /* This inode is open for write at least once */
2216 struct cifs_sb_info *cifs_sb;
2218 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
2219 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
2220 /* since no page cache to corrupt on directio
2221 we can change size safely */
2225 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
2233 static int cifs_write_begin(struct file *file, struct address_space *mapping,
2234 loff_t pos, unsigned len, unsigned flags,
2235 struct page **pagep, void **fsdata)
2237 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2238 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
2239 loff_t page_start = pos & PAGE_MASK;
2244 cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
2246 page = grab_cache_page_write_begin(mapping, index, flags);
2252 if (PageUptodate(page))
2256 * If we write a full page it will be up to date, no need to read from
2257 * the server. If the write is short, we'll end up doing a sync write
2260 if (len == PAGE_CACHE_SIZE)
2264 * optimize away the read when we have an oplock, and we're not
2265 * expecting to use any of the data we'd be reading in. That
2266 * is, when the page lies beyond the EOF, or straddles the EOF
2267 * and the write will cover all of the existing data.
2269 if (CIFS_I(mapping->host)->clientCanCacheRead) {
2270 i_size = i_size_read(mapping->host);
2271 if (page_start >= i_size ||
2272 (offset == 0 && (pos + len) >= i_size)) {
2273 zero_user_segments(page, 0, offset,
2277 * PageChecked means that the parts of the page
2278 * to which we're not writing are considered up
2279 * to date. Once the data is copied to the
2280 * page, it can be set uptodate.
2282 SetPageChecked(page);
2287 if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
2289 * might as well read a page, it is fast enough. If we get
2290 * an error, we don't need to return it. cifs_write_end will
2291 * do a sync write instead since PG_uptodate isn't set.
2293 cifs_readpage_worker(file, page, &page_start);
2295 /* we could try using another file handle if there is one -
2296 but how would we lock it to prevent close of that handle
2297 racing with this read? In any case
2298 this will be written out by write_end so is fine */
2306 cifs_oplock_break(struct slow_work *work)
2308 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
2310 struct inode *inode = cfile->pInode;
2311 struct cifsInodeInfo *cinode = CIFS_I(inode);
2312 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->mnt->mnt_sb);
2315 if (inode && S_ISREG(inode->i_mode)) {
2316 if (cinode->clientCanCacheRead)
2317 break_lease(inode, O_RDONLY);
2319 break_lease(inode, O_WRONLY);
2320 rc = filemap_fdatawrite(inode->i_mapping);
2321 if (cinode->clientCanCacheRead == 0) {
2322 waitrc = filemap_fdatawait(inode->i_mapping);
2323 invalidate_remote_inode(inode);
2328 cinode->write_behind_rc = rc;
2329 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
2333 * releasing stale oplock after recent reconnect of smb session using
2334 * a now incorrect file handle is not a data integrity issue but do
2335 * not bother sending an oplock release if session to server still is
2336 * disconnected since oplock already released by the server
2338 if (!cfile->closePend && !cfile->oplock_break_cancelled) {
2339 rc = CIFSSMBLock(0, cifs_sb->tcon, cfile->netfid, 0, 0, 0, 0,
2340 LOCKING_ANDX_OPLOCK_RELEASE, false);
2341 cFYI(1, "Oplock release rc = %d", rc);
2346 cifs_oplock_break_get(struct slow_work *work)
2348 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
2351 cifsFileInfo_get(cfile);
2356 cifs_oplock_break_put(struct slow_work *work)
2358 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
2361 cifsFileInfo_put(cfile);
2364 const struct slow_work_ops cifs_oplock_break_ops = {
2365 .get_ref = cifs_oplock_break_get,
2366 .put_ref = cifs_oplock_break_put,
2367 .execute = cifs_oplock_break,
2370 const struct address_space_operations cifs_addr_ops = {
2371 .readpage = cifs_readpage,
2372 .readpages = cifs_readpages,
2373 .writepage = cifs_writepage,
2374 .writepages = cifs_writepages,
2375 .write_begin = cifs_write_begin,
2376 .write_end = cifs_write_end,
2377 .set_page_dirty = __set_page_dirty_nobuffers,
2378 /* .sync_page = cifs_sync_page, */
2383 * cifs_readpages requires the server to support a buffer large enough to
2384 * contain the header plus one complete page of data. Otherwise, we need
2385 * to leave cifs_readpages out of the address space operations.
2387 const struct address_space_operations cifs_addr_ops_smallbuf = {
2388 .readpage = cifs_readpage,
2389 .writepage = cifs_writepage,
2390 .writepages = cifs_writepages,
2391 .write_begin = cifs_write_begin,
2392 .write_end = cifs_write_end,
2393 .set_page_dirty = __set_page_dirty_nobuffers,
2394 /* .sync_page = cifs_sync_page, */