Merge remote-tracking branch 'fsnotify/for-next'

author Stephen Rothwell <sfr@canb.auug.org.au>

Tue, 13 Dec 2011 04:51:20 +0000 (15:51 +1100)

committer Stephen Rothwell <sfr@canb.auug.org.au>

Tue, 13 Dec 2011 04:51:20 +0000 (15:51 +1100)
author Stephen Rothwell <sfr@canb.auug.org.au>
Tue, 13 Dec 2011 04:51:20 +0000 (15:51 +1100)
committer Stephen Rothwell <sfr@canb.auug.org.au>
Tue, 13 Dec 2011 04:51:20 +0000 (15:51 +1100)
diff --combined fs/cachefiles/namei.c

index a0358c2189cb5aba26de002d93727dedd5b7a3fe,e33b9a00b3bbcd2b992014c1e321e2f03061b9f5..3f458310e2876b5b5ea0eaf9bd86500bae2e0520
--- 1/fs/cachefiles/namei.c
--- 2/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@@ -13,7 -13,6 +13,6 @@@
   #include <linux/sched.h>
   #include <linux/file.h>
   #include <linux/fs.h>
- #include <linux/fsnotify.h>
   #include <linux/quotaops.h>
   #include <linux/xattr.h>
   #include <linux/mount.h>
@@@ -275,7 -274,6 +274,7 @@@ static int cachefiles_bury_object(struc
                                   bool preemptive)
   {
         struct dentry *grave, *trap;
+ +      struct path path, path_to_graveyard;
         char nbuffer[8 + 8 + 1];
         int ret;
   
@@@ -288,18 -286,10 +287,18 @@@
         /* non-directories can just be unlinked */
         if (!S_ISDIR(rep->d_inode->i_mode)) {
                 _debug("unlink stale object");
- -              ret = vfs_unlink(dir->d_inode, rep);
   
- -              if (preemptive)
- -                      cachefiles_mark_object_buried(cache, rep);
+ +              path.mnt = cache->mnt;
+ +              path.dentry = dir;
+ +              ret = security_path_unlink(&path, rep);
+ +              if (ret < 0) {
+ +                      cachefiles_io_error(cache, "Unlink security error");
+ +              } else {
+ +                      ret = vfs_unlink(dir->d_inode, rep);
+ +
+ +                      if (preemptive)
+ +                              cachefiles_mark_object_buried(cache, rep);
+ +              }
   
                 mutex_unlock(&dir->d_inode->i_mutex);
   
@@@ -388,23 -378,12 +387,23 @@@ try_again
         }
   
         /* attempt the rename */
- -      ret = vfs_rename(dir->d_inode, rep, cache->graveyard->d_inode, grave);
- -      if (ret != 0 && ret != -ENOMEM)
- -              cachefiles_io_error(cache, "Rename failed with error %d", ret);
+ +      path.mnt = cache->mnt;
+ +      path.dentry = dir;
+ +      path_to_graveyard.mnt = cache->mnt;
+ +      path_to_graveyard.dentry = cache->graveyard;
+ +      ret = security_path_rename(&path, rep, &path_to_graveyard, grave);
+ +      if (ret < 0) {
+ +              cachefiles_io_error(cache, "Rename security error %d", ret);
+ +      } else {
+ +              ret = vfs_rename(dir->d_inode, rep,
+ +                               cache->graveyard->d_inode, grave);
+ +              if (ret != 0 && ret != -ENOMEM)
+ +                      cachefiles_io_error(cache,
+ +                                          "Rename failed with error %d", ret);
   
- -      if (preemptive)
- -              cachefiles_mark_object_buried(cache, rep);
+ +              if (preemptive)
+ +                      cachefiles_mark_object_buried(cache, rep);
+ +      }
   
         unlock_rename(cache->graveyard, dir);
         dput(grave);
@@@ -468,7 -447,6 +467,7 @@@ int cachefiles_walk_to_object(struct ca
   {
         struct cachefiles_cache *cache;
         struct dentry *dir, *next = NULL;
+ +      struct path path;
         unsigned long start;
         const char *name;
         int ret, nlen;
@@@ -479,7 -457,6 +478,7 @@@
   
         cache = container_of(parent->fscache.cache,
                              struct cachefiles_cache, cache);
+ +      path.mnt = cache->mnt;
   
         ASSERT(parent->dentry);
         ASSERT(parent->dentry->d_inode);
@@@ -533,10 -510,6 +532,10 @@@ lookup_again
                         if (ret < 0)
                                 goto create_error;
   
+ +                      path.dentry = dir;
+ +                      ret = security_path_mkdir(&path, next, 0);
+ +                      if (ret < 0)
+ +                              goto create_error;
                         start = jiffies;
                         ret = vfs_mkdir(dir->d_inode, next, 0);
                         cachefiles_hist(cachefiles_mkdir_histogram, start);
@@@ -562,10 -535,6 +561,10 @@@
                         if (ret < 0)
                                 goto create_error;
   
+ +                      path.dentry = dir;
+ +                      ret = security_path_mknod(&path, next, S_IFREG, 0);
+ +                      if (ret < 0)
+ +                              goto create_error;
                         start = jiffies;
                         ret = vfs_create(dir->d_inode, next, S_IFREG, NULL);
                         cachefiles_hist(cachefiles_create_histogram, start);
@@@ -722,7 -691,6 +721,7 @@@ struct dentry *cachefiles_get_directory
   {
         struct dentry *subdir;
         unsigned long start;
+ +      struct path path;
         int ret;
   
         _enter(",,%s", dirname);
@@@ -750,11 -718,6 +749,11 @@@
   
                 _debug("attempt mkdir");
   
+ +              path.mnt = cache->mnt;
+ +              path.dentry = dir;
+ +              ret = security_path_mkdir(&path, subdir, 0700);
+ +              if (ret < 0)
+ +                      goto mkdir_error;
                 ret = vfs_mkdir(dir->d_inode, subdir, 0700);
                 if (ret < 0)
                         goto mkdir_error;
diff --combined fs/nfsd/vfs.c

index 7a2e442623c876a817411e7225ca29dd4c0090ae,5f30270ae644d7b64a870460fe2fb7378c23392c..66cb3b0b97c1ab7cd43010a20321029427085989
--- 1/fs/nfsd/vfs.c
--- 2/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@@ -1,3 -1,4 +1,3 @@@
- -#define MSNFS /* HACK HACK */
   /*
    * File operations used by nfsd. Some of these have been ripped from
    * other parts of the kernel because they weren't exported, others
@@@ -19,7 -20,6 +19,6 @@@
   #include <linux/fcntl.h>
   #include <linux/namei.h>
   #include <linux/delay.h>
- #include <linux/fsnotify.h>
   #include <linux/posix_acl_xattr.h>
   #include <linux/xattr.h>
   #include <linux/jhash.h>
@@@ -34,8 -34,8 +33,8 @@@
   #endif /* CONFIG_NFSD_V3 */
   
   #ifdef CONFIG_NFSD_V4
- -#include <linux/nfs4_acl.h>
- -#include <linux/nfsd_idmap.h>
+ +#include "acl.h"
+ +#include "idmap.h"
   #endif /* CONFIG_NFSD_V4 */
   
   #include "nfsd.h"
@@@ -87,9 -87,8 +86,9 @@@ nfsd_cross_mnt(struct svc_rqst *rqstp, 
                             .dentry = dget(dentry)};
         int err = 0;
   
- -      while (d_mountpoint(path.dentry) && follow_down(&path))
- -              ;
+ +      err = follow_down(&path);
+ +      if (err < 0)
+ +              goto out;
   
         exp2 = rqst_exp_get_by_name(rqstp, &path);
         if (IS_ERR(exp2)) {
@@@ -168,8 -167,6 +167,8 @@@ int nfsd_mountpoint(struct dentry *dent
   {
         if (d_mountpoint(dentry))
                 return 1;
+ +      if (nfsd4_is_junction(dentry))
+ +              return 1;
         if (!(exp->ex_flags & NFSEXP_V4ROOT))
                 return 0;
         return dentry->d_inode != NULL;
@@@ -183,10 -180,16 +182,10 @@@ nfsd_lookup_dentry(struct svc_rqst *rqs
         struct svc_export       *exp;
         struct dentry           *dparent;
         struct dentry           *dentry;
- -      __be32                  err;
         int                     host_err;
   
         dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);
   
- -      /* Obtain dentry and export. */
- -      err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
- -      if (err)
- -              return err;
- -
         dparent = fhp->fh_dentry;
         exp  = fhp->fh_export;
         exp_get(exp);
@@@ -250,9 -253,6 +249,9 @@@ nfsd_lookup(struct svc_rqst *rqstp, str
         struct dentry           *dentry;
         __be32 err;
   
+ +      err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
+ +      if (err)
+ +              return err;
         err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry);
         if (err)
                 return err;
@@@ -272,13 -272,6 +271,13 @@@ out
         return err;
   }
   
+ +static int nfsd_break_lease(struct inode *inode)
+ +{
+ +      if (!S_ISREG(inode->i_mode))
+ +              return 0;
+ +      return break_lease(inode, O_WRONLY | O_NONBLOCK);
+ +}
+ +
   /*
    * Commit metadata changes to stable storage.
    */
@@@ -381,6 -374,16 +380,6 @@@ nfsd_setattr(struct svc_rqst *rqstp, st
                                 goto out;
                 }
   
- -              /*
- -               * If we are changing the size of the file, then
- -               * we need to break all leases.
- -               */
- -              host_err = break_lease(inode, O_WRONLY | O_NONBLOCK);
- -              if (host_err == -EWOULDBLOCK)
- -                      host_err = -ETIMEDOUT;
- -              if (host_err) /* ENOMEM or EWOULDBLOCK */
- -                      goto out_nfserr;
- -
                 host_err = get_write_access(inode);
                 if (host_err)
                         goto out_nfserr;
@@@ -421,11 -424,7 +420,11 @@@
   
         err = nfserr_notsync;
         if (!check_guard || guardtime == inode->i_ctime.tv_sec) {
+ +              host_err = nfsd_break_lease(inode);
+ +              if (host_err)
+ +                      goto out_nfserr;
                 fh_lock(fhp);
+ +
                 host_err = notify_change(dentry, iap);
                 err = nfserrno(host_err);
                 fh_unlock(fhp);
@@@ -504,7 -503,7 +503,7 @@@ nfsd4_set_nfs4_acl(struct svc_rqst *rqs
         unsigned int flags = 0;
   
         /* Get inode */
- -      error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR);
+ +      error = fh_verify(rqstp, fhp, 0, NFSD_MAY_SATTR);
         if (error)
                 return error;
   
@@@ -594,22 -593,6 +593,22 @@@ nfsd4_get_nfs4_acl(struct svc_rqst *rqs
         return error;
   }
   
+ +#define NFSD_XATTR_JUNCTION_PREFIX XATTR_TRUSTED_PREFIX "junction."
+ +#define NFSD_XATTR_JUNCTION_TYPE NFSD_XATTR_JUNCTION_PREFIX "type"
+ +int nfsd4_is_junction(struct dentry *dentry)
+ +{
+ +      struct inode *inode = dentry->d_inode;
+ +
+ +      if (inode == NULL)
+ +              return 0;
+ +      if (inode->i_mode & S_IXUGO)
+ +              return 0;
+ +      if (!(inode->i_mode & S_ISVTX))
+ +              return 0;
+ +      if (vfs_getxattr(dentry, NFSD_XATTR_JUNCTION_TYPE, NULL, 0) <= 0)
+ +              return 0;
+ +      return 1;
+ +}
   #endif /* defined(CONFIG_NFSD_V4) */
   
   #ifdef CONFIG_NFSD_V3
@@@ -714,15 -697,7 +713,15 @@@ nfsd_access(struct svc_rqst *rqstp, str
   }
   #endif /* CONFIG_NFSD_V3 */
   
+ +static int nfsd_open_break_lease(struct inode *inode, int access)
+ +{
+ +      unsigned int mode;
   
+ +      if (access & NFSD_MAY_NOT_BREAK_LEASE)
+ +              return 0;
+ +      mode = (access & NFSD_MAY_WRITE) ? O_WRONLY : O_RDONLY;
+ +      return break_lease(inode, mode | O_NONBLOCK);
+ +}
   
   /*
    * Open an existing file or directory.
@@@ -770,7 -745,14 +769,7 @@@ nfsd_open(struct svc_rqst *rqstp, struc
         if (!inode->i_fop)
                 goto out;
   
- -      /*
- -       * Check to see if there are any leases on this file.
- -       * This may block while leases are broken.
- -       */
- -      if (!(access & NFSD_MAY_NOT_BREAK_LEASE))
- -              host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? O_WRONLY : 0));
- -      if (host_err == -EWOULDBLOCK)
- -              host_err = -ETIMEDOUT;
+ +      host_err = nfsd_open_break_lease(inode, access);
         if (host_err) /* NOMEM or WOULDBLOCK */
                 goto out_nfserr;
   
@@@ -826,7 -808,7 +825,7 @@@ nfsd_get_raparms(dev_t dev, ino_t ino
                 if (ra->p_count == 0)
                         frap = rap;
         }
- -      depth = nfsdstats.ra_size*11/10;
+ +      depth = nfsdstats.ra_size;
         if (!frap) {    
                 spin_unlock(&rab->pb_lock);
                 return NULL;
@@@ -862,6 -844,11 +861,6 @@@ nfsd_splice_actor(struct pipe_inode_inf
         struct page **pp = rqstp->rq_respages + rqstp->rq_resused;
         struct page *page = buf->page;
         size_t size;
- -      int ret;
- -
- -      ret = buf->ops->confirm(pipe, buf);
- -      if (unlikely(ret))
- -              return ret;
   
         size = sd->len;
   
@@@ -891,15 -878,29 +890,15 @@@ static int nfsd_direct_splice_actor(str
         return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
   }
   
- -static inline int svc_msnfs(struct svc_fh *ffhp)
- -{
- -#ifdef MSNFS
- -      return (ffhp->fh_export->ex_flags & NFSEXP_MSNFS);
- -#else
- -      return 0;
- -#endif
- -}
- -
   static __be32
   nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
                 loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
   {
- -      struct inode *inode;
         mm_segment_t    oldfs;
         __be32          err;
         int             host_err;
   
         err = nfserr_perm;
- -      inode = file->f_path.dentry->d_inode;
- -
- -      if (svc_msnfs(fhp) && !lock_may_read(inode, offset, *count))
- -              goto out;
   
         if (file->f_op->splice_read && rqstp->rq_splice_ok) {
                 struct splice_desc sd = {
@@@ -922,9 -923,9 +921,8 @@@
                 nfsdstats.io_read += host_err;
                 *count = host_err;
                 err = 0;
-               fsnotify_access(file);
         } else 
                 err = nfserrno(host_err);
- -out:
         return err;
   }
   
@@@ -989,6 -990,14 +987,6 @@@ nfsd_vfs_write(struct svc_rqst *rqstp, 
         int                     stable = *stablep;
         int                     use_wgather;
   
- -#ifdef MSNFS
- -      err = nfserr_perm;
- -
- -      if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
- -              (!lock_may_write(file->f_path.dentry->d_inode, offset, *cnt)))
- -              goto out;
- -#endif
- -
         dentry = file->f_path.dentry;
         inode = dentry->d_inode;
         exp   = fhp->fh_export;
@@@ -1024,7 -1033,6 +1022,6 @@@
                 goto out_nfserr;
         *cnt = host_err;
         nfsdstats.io_write += host_err;
-       fsnotify_modify(file);
   
         /* clear setuid/setgid flag after write */
         if (inode->i_mode & (S_ISUID | S_ISGID))
@@@ -1039,6 -1047,7 +1036,6 @@@ out_nfserr
                 err = 0;
         else
                 err = nfserrno(host_err);
- -out:
         return err;
   }
   
@@@ -1356,21 -1365,14 +1353,21 @@@ out_nfserr
   }
   
   #ifdef CONFIG_NFSD_V3
+ +
+ +static inline int nfsd_create_is_exclusive(int createmode)
+ +{
+ +      return createmode == NFS3_CREATE_EXCLUSIVE
+ +             || createmode == NFS4_CREATE_EXCLUSIVE4_1;
+ +}
+ +
   /*
- - * NFSv3 version of nfsd_create
+ + * NFSv3 and NFSv4 version of nfsd_create
    */
   __be32
- -nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ +do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                 char *fname, int flen, struct iattr *iap,
                 struct svc_fh *resfhp, int createmode, u32 *verifier,
- -              int *truncp, int *created)
+ +              bool *truncp, bool *created)
   {
         struct dentry   *dentry, *dchild = NULL;
         struct inode    *dirp;
@@@ -1386,7 -1388,7 +1383,7 @@@
                 goto out;
         if (!(iap->ia_valid & ATTR_MODE))
                 iap->ia_mode = 0;
- -      err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
+ +      err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
         if (err)
                 goto out;
   
@@@ -1408,18 -1410,11 +1405,18 @@@
         if (IS_ERR(dchild))
                 goto out_nfserr;
   
+ +      /* If file doesn't exist, check for permissions to create one */
+ +      if (!dchild->d_inode) {
+ +              err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
+ +              if (err)
+ +                      goto out;
+ +      }
+ +
         err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
         if (err)
                 goto out;
   
- -      if (createmode == NFS3_CREATE_EXCLUSIVE) {
+ +      if (nfsd_create_is_exclusive(createmode)) {
                 /* solaris7 gets confused (bugid 4218508) if these have
                  * the high bit set, so just clear the high bits. If this is
                  * ever changed to use different attrs for storing the
@@@ -1460,11 -1455,6 +1457,11 @@@
                             && dchild->d_inode->i_atime.tv_sec == v_atime
                             && dchild->d_inode->i_size  == 0 )
                                 break;
+ +              case NFS4_CREATE_EXCLUSIVE4_1:
+ +                      if (   dchild->d_inode->i_mtime.tv_sec == v_mtime
+ +                          && dchild->d_inode->i_atime.tv_sec == v_atime
+ +                          && dchild->d_inode->i_size  == 0 )
+ +                              goto set_attr;
                          /* fallthru */
                 case NFS3_CREATE_GUARDED:
                         err = nfserr_exist;
@@@ -1483,7 -1473,7 +1480,7 @@@
   
         nfsd_check_ignore_resizing(iap);
   
- -      if (createmode == NFS3_CREATE_EXCLUSIVE) {
+ +      if (nfsd_create_is_exclusive(createmode)) {
                 /* Cram the verifier into atime/mtime */
                 iap->ia_valid = ATTR_MTIME|ATTR_ATIME
                         | ATTR_MTIME_SET|ATTR_ATIME_SET;
@@@ -1650,12 -1640,10 +1647,12 @@@ nfsd_link(struct svc_rqst *rqstp, struc
         err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_CREATE);
         if (err)
                 goto out;
- -      err = fh_verify(rqstp, tfhp, -S_IFDIR, NFSD_MAY_NOP);
+ +      err = fh_verify(rqstp, tfhp, 0, NFSD_MAY_NOP);
         if (err)
                 goto out;
- -
+ +      err = nfserr_isdir;
+ +      if (S_ISDIR(tfhp->fh_dentry->d_inode->i_mode))
+ +              goto out;
         err = nfserr_perm;
         if (!len)
                 goto out;
@@@ -1679,14 -1667,6 +1676,14 @@@
                 err = nfserrno(host_err);
                 goto out_dput;
         }
+ +      err = nfserr_noent;
+ +      if (!dold->d_inode)
+ +              goto out_drop_write;
+ +      host_err = nfsd_break_lease(dold->d_inode);
+ +      if (host_err) {
+ +              err = nfserrno(host_err);
+ +              goto out_drop_write;
+ +      }
         host_err = vfs_link(dold, dirp, dnew);
         if (!host_err) {
                 err = nfserrno(commit_metadata(ffhp));
@@@ -1698,7 -1678,6 +1695,7 @@@
                 else
                         err = nfserrno(host_err);
         }
+ +out_drop_write:
         mnt_drop_write(tfhp->fh_export->ex_path.mnt);
   out_dput:
         dput(dnew);
@@@ -1773,6 -1752,13 +1770,6 @@@ nfsd_rename(struct svc_rqst *rqstp, str
         if (ndentry == trap)
                 goto out_dput_new;
   
- -      if (svc_msnfs(ffhp) &&
- -              ((atomic_read(&odentry->d_count) > 1)
- -               || (atomic_read(&ndentry->d_count) > 1))) {
- -                      host_err = -EPERM;
- -                      goto out_dput_new;
- -      }
- -
         host_err = -EXDEV;
         if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
                 goto out_dput_new;
@@@ -1780,22 -1766,15 +1777,22 @@@
         if (host_err)
                 goto out_dput_new;
   
+ +      host_err = nfsd_break_lease(odentry->d_inode);
+ +      if (host_err)
+ +              goto out_drop_write;
+ +      if (ndentry->d_inode) {
+ +              host_err = nfsd_break_lease(ndentry->d_inode);
+ +              if (host_err)
+ +                      goto out_drop_write;
+ +      }
         host_err = vfs_rename(fdir, odentry, tdir, ndentry);
         if (!host_err) {
                 host_err = commit_metadata(tfhp);
                 if (!host_err)
                         host_err = commit_metadata(ffhp);
         }
- -
+ +out_drop_write:
         mnt_drop_write(ffhp->fh_export->ex_path.mnt);
- -
    out_dput_new:
         dput(ndentry);
    out_dput_old:
@@@ -1856,22 -1835,26 +1853,22 @@@ nfsd_unlink(struct svc_rqst *rqstp, str
   
         host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
         if (host_err)
- -              goto out_nfserr;
+ +              goto out_put;
   
- -      if (type != S_IFDIR) { /* It's UNLINK */
- -#ifdef MSNFS
- -              if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
- -                      (atomic_read(&rdentry->d_count) > 1)) {
- -                      host_err = -EPERM;
- -              } else
- -#endif
+ +      host_err = nfsd_break_lease(rdentry->d_inode);
+ +      if (host_err)
+ +              goto out_drop_write;
+ +      if (type != S_IFDIR)
                 host_err = vfs_unlink(dirp, rdentry);
- -      } else { /* It's RMDIR */
+ +      else
                 host_err = vfs_rmdir(dirp, rdentry);
- -      }
- -
- -      dput(rdentry);
- -
         if (!host_err)
                 host_err = commit_metadata(fhp);
- -
+ +out_drop_write:
         mnt_drop_write(fhp->fh_export->ex_path.mnt);
+ +out_put:
+ +      dput(rdentry);
+ +
   out_nfserr:
         err = nfserrno(host_err);
   out:
@@@ -2066,7 -2049,7 +2063,7 @@@ nfsd_permission(struct svc_rqst *rqstp
         struct inode    *inode = dentry->d_inode;
         int             err;
   
- -      if (acc == NFSD_MAY_NOP)
+ +      if ((acc & NFSD_MAY_MASK) == NFSD_MAY_NOP)
                 return 0;
   #if 0
         dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n",
@@@ -2134,8 -2117,7 +2131,8 @@@
   
         /* Allow read access to binaries even when mode 111 */
         if (err == -EACCES && S_ISREG(inode->i_mode) &&
- -          acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE))
+ +           (acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE) ||
+ +            acc == (NFSD_MAY_READ | NFSD_MAY_READ_IF_EXEC)))
                 err = inode_permission(inode, MAY_EXEC);
   
         return err? nfserrno(err) : 0;
diff --combined fs/notify/fanotify/fanotify_user.c

index 9fde1c00a29627b88f3023441a8fc840d2f415c6,2d4925b98bdb53104863662db3babb4c79c66bdd..3fe62cb9ad34b99fa0a4a8b569e5c6038c59b510
--- 1/fs/notify/fanotify/fanotify_user.c
--- 2/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@@ -62,6 -62,7 +62,7 @@@ static int create_fd(struct fsnotify_gr
         struct dentry *dentry;
         struct vfsmount *mnt;
         struct file *new_file;
+       unsigned int flags;
   
         pr_debug("%s: group=%p event=%p\n", __func__, group, event);
   
@@@ -83,12 -84,22 +84,22 @@@
         mnt = mntget(event->path.mnt);
         /* it's possible this event was an overflow event.  in that case dentry and mnt
          * are NULL;  That's fine, just don't call dentry open */
-       if (dentry && mnt)
-               new_file = dentry_open(dentry, mnt,
-                                      group->fanotify_data.f_flags | FMODE_NONOTIFY,
-                                      current_cred());
-       else
+       if (dentry && mnt) {
+               flags = group->fanotify_data.f_flags;
+               new_file = dentry_open(dentry, mnt, flags, current_cred());
+               /*
+                * Attempt fallback to read-only access if writable was not possible
+                * in order to at least provide something to the listener.
+                */
+               if (IS_ERR(new_file) && group->fanotify_data.readonly_fallback) {
+                       flags &= ~O_ACCMODE;
+                       flags |= O_RDONLY;
+                       new_file = dentry_open(dentry, mnt, flags,
+                                              current_cred());
+               }
+       } else {
                 new_file = ERR_PTR(-EOVERFLOW);
+       }
         if (IS_ERR(new_file)) {
                 /*
                  * we still send an event even if we can't open the file.  this
@@@ -164,7 -175,7 +175,7 @@@ static int process_access_response(stru
                  fd, response);
         /*
          * make sure the response is valid, if invalid we do nothing and either
- -       * userspace can send a valid responce or we will clean it up after the
+ +       * userspace can send a valid response or we will clean it up after the
          * timeout
          */
         switch (response) {
@@@ -208,14 -219,6 +219,6 @@@ static int prepare_for_access_response(
         re->fd = fd;
   
         mutex_lock(&group->fanotify_data.access_mutex);
- 
-       if (atomic_read(&group->fanotify_data.bypass_perm)) {
-               mutex_unlock(&group->fanotify_data.access_mutex);
-               kmem_cache_free(fanotify_response_event_cache, re);
-               event->response = FAN_ALLOW;
-               return 0;
-       }
-               
         list_add_tail(&re->list, &group->fanotify_data.access_list);
         mutex_unlock(&group->fanotify_data.access_mutex);
   
@@@ -516,6 -519,7 +519,7 @@@ static __u32 fanotify_mark_remove_from_
                                             unsigned int flags)
   {
         __u32 oldmask;
+       int destroy_mark;
   
         spin_lock(&fsn_mark->lock);
         if (!(flags & FAN_MARK_IGNORED_MASK)) {
@@@ -525,9 -529,10 +529,10 @@@
                 oldmask = fsn_mark->ignored_mask;
                 fsnotify_set_mark_ignored_mask_locked(fsn_mark, (oldmask & ~mask));
         }
+       destroy_mark = (!fsn_mark->mask && !fsn_mark->ignored_mask);
         spin_unlock(&fsn_mark->lock);
   
-       if (!(oldmask & ~mask))
+       if (destroy_mark)
                 fsnotify_destroy_mark(fsn_mark);
   
         return mask & oldmask;
@@@ -539,17 -544,23 +544,23 @@@ static int fanotify_remove_vfsmount_mar
   {
         struct fsnotify_mark *fsn_mark = NULL;
         __u32 removed;
+       int ret;
   
+       mutex_lock(&group->mutex);
+       ret = -ENOENT;
         fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
         if (!fsn_mark)
-               return -ENOENT;
+               goto err;
   
         removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags);
         fsnotify_put_mark(fsn_mark);
         if (removed & mnt->mnt_fsnotify_mask)
                 fsnotify_recalc_vfsmount_mask(mnt);
+       ret = 0;
+ err:
+       mutex_unlock(&group->mutex);
   
-       return 0;
+       return ret;
   }
   
   static int fanotify_remove_inode_mark(struct fsnotify_group *group,
@@@ -558,18 -569,24 +569,24 @@@
   {
         struct fsnotify_mark *fsn_mark = NULL;
         __u32 removed;
+       int ret;
   
+       mutex_lock(&group->mutex);
+       ret = -ENOENT;
         fsn_mark = fsnotify_find_inode_mark(group, inode);
         if (!fsn_mark)
-               return -ENOENT;
+               goto err;
   
         removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags);
         /* matches the fsnotify_find_inode_mark() */
         fsnotify_put_mark(fsn_mark);
         if (removed & inode->i_fsnotify_mask)
                 fsnotify_recalc_inode_mask(inode);
+       ret = 0;
+ err:
+       mutex_unlock(&group->mutex);
   
-       return 0;
+       return ret;
   }
   
   static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
@@@ -605,28 -622,35 +622,35 @@@ static int fanotify_add_vfsmount_mark(s
   {
         struct fsnotify_mark *fsn_mark;
         __u32 added;
-       int ret = 0;
+       int ret;
   
+       mutex_lock(&group->mutex);
         fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
         if (!fsn_mark) {
-               if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks)
-                       return -ENOSPC;
+               ret = -ENOSPC;
+               if (atomic_read(&group->num_marks) >
+                   group->fanotify_data.max_marks)
+                       goto err;
   
+               ret = -ENOMEM;
                 fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
                 if (!fsn_mark)
-                       return -ENOMEM;
+                       goto err;
   
                 fsnotify_init_mark(fsn_mark, fanotify_free_mark);
                 ret = fsnotify_add_mark(fsn_mark, group, NULL, mnt, 0);
                 if (ret)
-                       goto err;
+                       goto err2;
         }
         added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
   
         if (added & ~mnt->mnt_fsnotify_mask)
                 fsnotify_recalc_vfsmount_mask(mnt);
- err:
+       ret = 0;
+ err2:
         fsnotify_put_mark(fsn_mark);
+ err:
+       mutex_unlock(&group->mutex);
         return ret;
   }
   
@@@ -636,7 -660,7 +660,7 @@@ static int fanotify_add_inode_mark(stru
   {
         struct fsnotify_mark *fsn_mark;
         __u32 added;
-       int ret = 0;
+       int ret;
   
         pr_debug("%s: group=%p inode=%p\n", __func__, group, inode);
   
@@@ -650,26 -674,33 +674,33 @@@
             (atomic_read(&inode->i_writecount) > 0))
                 return 0;
   
+       mutex_lock(&group->mutex);
         fsn_mark = fsnotify_find_inode_mark(group, inode);
         if (!fsn_mark) {
-               if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks)
-                       return -ENOSPC;
+               ret = -ENOSPC;
+               if (atomic_read(&group->num_marks) >
+                   group->fanotify_data.max_marks)
+                       goto err;
   
+               ret = -ENOMEM;
                 fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
                 if (!fsn_mark)
-                       return -ENOMEM;
+                       goto err;
   
                 fsnotify_init_mark(fsn_mark, fanotify_free_mark);
                 ret = fsnotify_add_mark(fsn_mark, group, inode, NULL, 0);
                 if (ret)
-                       goto err;
+                       goto err2;
         }
         added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
   
         if (added & ~inode->i_fsnotify_mask)
                 fsnotify_recalc_inode_mask(inode);
- err:
+       ret = 0;
+ err2:
         fsnotify_put_mark(fsn_mark);
+ err:
+       mutex_unlock(&group->mutex);
         return ret;
   }
   
@@@ -711,7 -742,7 +742,7 @@@ SYSCALL_DEFINE2(fanotify_init, unsigne
         group->fanotify_data.user = user;
         atomic_inc(&user->fanotify_listeners);
   
-       group->fanotify_data.f_flags = event_f_flags;
+       group->fanotify_data.f_flags = event_f_flags | FMODE_NONOTIFY;
   #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
         mutex_init(&group->fanotify_data.access_mutex);
         init_waitqueue_head(&group->fanotify_data.access_waitq);
@@@ -751,6 -782,14 +782,14 @@@
                 group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS;
         }
   
+       fd = -EINVAL;
+       if (flags & FAN_READONLY_FALLBACK) {
+               if ((event_f_flags & O_ACCMODE) == O_RDWR)
+                       group->fanotify_data.readonly_fallback = true;
+               else
+                       goto out_put_group;
+       }
+ 
         fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
         if (fd < 0)
                 goto out_put_group;
@@@ -876,7 -915,7 +915,7 @@@ SYSCALL_ALIAS(sys_fanotify_mark, SyS_fa
   #endif
   
   /*
- - * fanotify_user_setup - Our initialization function.  Note that we cannnot return
+ + * fanotify_user_setup - Our initialization function.  Note that we cannot return
    * error because we have compiled-in VFS hooks.  So an (unlikely) failure here
    * must result in panic().
    */
diff --combined fs/notify/group.c

index 63fc294a469268d06d9ae1cc9f3d19f3ba95f695,cc341d33f5c829a690f71645b24926165890cdb6..7558c54f2b51efc05b32e6e6c78be7397b3541b1
--- 1/fs/notify/group.c
--- 2/fs/notify/group.c
+++ b/fs/notify/group.c
@@@ -26,7 -26,7 +26,7 @@@
   #include <linux/fsnotify_backend.h>
   #include "fsnotify.h"
   
- -#include <asm/atomic.h>
+ +#include <linux/atomic.h>
   
   /*
    * Final freeing of a group
@@@ -90,6 -90,7 +90,7 @@@ struct fsnotify_group *fsnotify_alloc_g
          */
         atomic_set(&group->num_marks, 1);
   
+       mutex_init(&group->mutex);
         mutex_init(&group->notification_mutex);
         INIT_LIST_HEAD(&group->notification_list);
         init_waitqueue_head(&group->notification_waitq);
diff --combined fs/notify/mark.c

index e14587d55689dae3cedccd1d2afeb1242ab0947c,28b64eb03e333eb0aab9f774c95d10092d571dbc..9d08e2a2acd770cd182e1e44bbfa53dc543a9f93
--- 1/fs/notify/mark.c
--- 2/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@@ -24,7 -24,7 +24,7 @@@
    * referencing this object.  The object typically will live inside the kernel
    * with a refcnt of 2, one for each list it is on (i_list, g_list).  Any task
    * which can find this object holding the appropriete locks, can take a reference
- - * and the object itself is guarenteed to survive until the reference is dropped.
+ + * and the object itself is guaranteed to survive until the reference is dropped.
    *
    * LOCKING:
    * There are 3 spinlocks involved with fsnotify inode marks and they MUST
@@@ -91,8 -91,9 +91,8 @@@
   #include <linux/slab.h>
   #include <linux/spinlock.h>
   #include <linux/srcu.h>
- -#include <linux/writeback.h> /* for inode_lock */
   
- -#include <asm/atomic.h>
+ +#include <linux/atomic.h>
   
   #include <linux/fsnotify_backend.h>
   #include "fsnotify.h"
@@@ -102,17 -103,6 +102,6 @@@ static DEFINE_SPINLOCK(destroy_lock)
   static LIST_HEAD(destroy_list);
   static DECLARE_WAIT_QUEUE_HEAD(destroy_waitq);
   
- void fsnotify_get_mark(struct fsnotify_mark *mark)
- {
-       atomic_inc(&mark->refcnt);
- }
- 
- void fsnotify_put_mark(struct fsnotify_mark *mark)
- {
-       if (atomic_dec_and_test(&mark->refcnt))
-               mark->free_mark(mark);
- }
- 
   /*
    * Any time a mark is getting freed we end up here.
    * The caller had better be holding a reference to this mark so we don't actually
@@@ -216,7 -206,7 +205,7 @@@ int fsnotify_add_mark(struct fsnotify_m
                       struct fsnotify_group *group, struct inode *inode,
                       struct vfsmount *mnt, int allow_dups)
   {
-       int ret = 0;
+       int ret;
   
         BUG_ON(inode && mnt);
         BUG_ON(!inode && !mnt);
@@@ -231,23 -221,20 +220,20 @@@
         spin_lock(&group->mark_lock);
   
         mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE;
- 
         mark->group = group;
         list_add(&mark->g_list, &group->marks_list);
-       atomic_inc(&group->num_marks);
         fsnotify_get_mark(mark); /* for i_list and g_list */
+       atomic_inc(&group->num_marks);
   
-       if (inode) {
+       ret = 0;
+       if (inode)
                 ret = fsnotify_add_inode_mark(mark, group, inode, allow_dups);
-               if (ret)
-                       goto err;
-       } else if (mnt) {
+       else if (mnt)
                 ret = fsnotify_add_vfsmount_mark(mark, group, mnt, allow_dups);
-               if (ret)
-                       goto err;
-       } else {
+       else
                 BUG();
-       }
+       if (ret)
+               goto err;
   
         spin_unlock(&group->mark_lock);
   
@@@ -259,7 -246,7 +245,7 @@@
         if (inode)
                 __fsnotify_update_child_dentry_flags(inode);
   
-       return ret;
+       return 0;
   err:
         mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
         list_del_init(&mark->g_list);
@@@ -345,6 -332,10 +331,10 @@@ static int fsnotify_mark_destroy(void *
   
                 synchronize_srcu(&fsnotify_mark_srcu);
   
+               /*
+                * at this point we cannot be found via the i_list or g_list so
+                * drop that reference.
+                */
                 list_for_each_entry_safe(mark, next, &private_destroy_list, destroy_list) {
                         list_del_init(&mark->destroy_list);
                         fsnotify_put_mark(mark);
diff --combined include/linux/fsnotify_backend.h

index 91d0e0a34ef3185a6051d8394cab63dfb76a04cb,6a3c66051bc85e58531768ab1ba53f0b9f164e60..226c791ff2e2261c9180db4d44ce87ae0b471c7a
--- 1/include/linux/fsnotify_backend.h
--- 2/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@@ -16,7 -16,7 +16,7 @@@
   #include <linux/spinlock.h>
   #include <linux/types.h>
   
- -#include <asm/atomic.h>
+ +#include <linux/atomic.h>
   
   /*
    * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily
@@@ -125,6 -125,7 +125,7 @@@ struct fsnotify_group 
   
         const struct fsnotify_ops *ops; /* how this group handles things */
   
+       struct mutex mutex;
         /* needed to send notification to userspace */
         struct mutex notification_mutex;        /* protect the notification_list */
         struct list_head notification_list;     /* list of event_holder this group needs to send to userspace */
@@@ -168,6 -169,7 +169,7 @@@
                         wait_queue_head_t access_waitq;
                         atomic_t bypass_perm;
   #endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */
+                       bool readonly_fallback;
                         int f_flags;
                         unsigned int max_marks;
                         struct user_struct *user;
@@@ -329,15 -331,9 +331,15 @@@ static inline void __fsnotify_update_dc
   {
         struct dentry *parent;
   
- -      assert_spin_locked(&dcache_lock);
         assert_spin_locked(&dentry->d_lock);
   
+ +      /*
+ +       * Serialisation of setting PARENT_WATCHED on the dentries is provided
+ +       * by d_lock. If inotify_inode_watched changes after we have taken
+ +       * d_lock, the following __fsnotify_update_child_dentry_flags call will
+ +       * find our entry, so it will spin until we complete here, and update
+ +       * us with the new state.
+ +       */
         parent = dentry->d_parent;
         if (parent->d_inode && fsnotify_inode_watches_children(parent->d_inode))
                 dentry->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
@@@ -347,12 -343,15 +349,12 @@@
   
   /*
    * fsnotify_d_instantiate - instantiate a dentry for inode
- - * Called with dcache_lock held.
    */
   static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode)
   {
         if (!inode)
                 return;
   
- -      assert_spin_locked(&dcache_lock);
- -
         spin_lock(&dentry->d_lock);
         __fsnotify_update_dcache_flags(dentry);
         spin_unlock(&dentry->d_lock);
@@@ -415,8 -414,6 +417,6 @@@ extern void fsnotify_clear_inode_marks_
   extern void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, unsigned int flags);
   /* run all the marks in a group, and flag them to be freed */
   extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group);
- extern void fsnotify_get_mark(struct fsnotify_mark *mark);
- extern void fsnotify_put_mark(struct fsnotify_mark *mark);
   extern void fsnotify_unmount_inodes(struct list_head *list);
   
   /* put here because inotify does some weird stuff when destroying watches */
@@@ -430,6 -427,16 +430,16 @@@ extern struct fsnotify_event *fsnotify_
   extern int fsnotify_replace_event(struct fsnotify_event_holder *old_holder,
                                   struct fsnotify_event *new_event);
   
+ static inline void fsnotify_get_mark(struct fsnotify_mark *mark)
+ {
+       atomic_inc(&mark->refcnt);
+ }
+ 
+ static inline void fsnotify_put_mark(struct fsnotify_mark *mark)
+ {
+       if (atomic_dec_and_test(&mark->refcnt))
+               mark->free_mark(mark);
+ }
   #else
   
   static inline int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
author	Stephen Rothwell <sfr@canb.auug.org.au>
	Tue, 13 Dec 2011 04:51:20 +0000 (15:51 +1100)
committer	Stephen Rothwell <sfr@canb.auug.org.au>
	Tue, 13 Dec 2011 04:51:20 +0000 (15:51 +1100)
		1	2
fs/cachefiles/namei.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/nfsd/vfs.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/notify/fanotify/fanotify_user.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/notify/group.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/notify/mark.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/fsnotify_backend.h	patch \|	diff1 \|	diff2 \|	blob \| history