]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - fs/nfs/dir.c
NFS: Move v3 declarations out of internal.h
[karo-tx-linux.git] / fs / nfs / dir.c
index 4a3d4ef76127bc716028d3d9df25791d91ff76ce..36d921f0c6026c27170b565f46eb4e26999ea812 100644 (file)
@@ -988,9 +988,13 @@ EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate);
  * A check for whether or not the parent directory has changed.
  * In the case it has, we assume that the dentries are untrustworthy
  * and may need to be looked up again.
+ * If rcu_walk prevents us from performing a full check, return 0.
  */
-static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
+static int nfs_check_verifier(struct inode *dir, struct dentry *dentry,
+                             int rcu_walk)
 {
+       int ret;
+
        if (IS_ROOT(dentry))
                return 1;
        if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
@@ -998,7 +1002,11 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
        if (!nfs_verify_change_attribute(dir, dentry->d_time))
                return 0;
        /* Revalidate nfsi->cache_change_attribute before we declare a match */
-       if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0)
+       if (rcu_walk)
+               ret = nfs_revalidate_inode_rcu(NFS_SERVER(dir), dir);
+       else
+               ret = nfs_revalidate_inode(NFS_SERVER(dir), dir);
+       if (ret < 0)
                return 0;
        if (!nfs_verify_change_attribute(dir, dentry->d_time))
                return 0;
@@ -1042,6 +1050,8 @@ int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags)
 out:
        return (inode->i_nlink == 0) ? -ENOENT : 0;
 out_force:
+       if (flags & LOOKUP_RCU)
+               return -ECHILD;
        ret = __nfs_revalidate_inode(server, inode);
        if (ret != 0)
                return ret;
@@ -1054,6 +1064,9 @@ out_force:
  *
  * If parent mtime has changed, we revalidate, else we wait for a
  * period corresponding to the parent's attribute cache timeout value.
+ *
+ * If LOOKUP_RCU prevents us from performing a full check, return 1
+ * suggesting a reval is needed.
  */
 static inline
 int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
@@ -1064,7 +1077,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
                return 0;
        if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG)
                return 1;
-       return !nfs_check_verifier(dir, dentry);
+       return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU);
 }
 
 /*
@@ -1088,21 +1101,30 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
        struct nfs4_label *label = NULL;
        int error;
 
-       if (flags & LOOKUP_RCU)
-               return -ECHILD;
-
-       parent = dget_parent(dentry);
-       dir = parent->d_inode;
+       if (flags & LOOKUP_RCU) {
+               parent = ACCESS_ONCE(dentry->d_parent);
+               dir = ACCESS_ONCE(parent->d_inode);
+               if (!dir)
+                       return -ECHILD;
+       } else {
+               parent = dget_parent(dentry);
+               dir = parent->d_inode;
+       }
        nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
        inode = dentry->d_inode;
 
        if (!inode) {
-               if (nfs_neg_need_reval(dir, dentry, flags))
+               if (nfs_neg_need_reval(dir, dentry, flags)) {
+                       if (flags & LOOKUP_RCU)
+                               return -ECHILD;
                        goto out_bad;
+               }
                goto out_valid_noent;
        }
 
        if (is_bad_inode(inode)) {
+               if (flags & LOOKUP_RCU)
+                       return -ECHILD;
                dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
                                __func__, dentry);
                goto out_bad;
@@ -1112,12 +1134,20 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
                goto out_set_verifier;
 
        /* Force a full look up iff the parent directory has changed */
-       if (!nfs_is_exclusive_create(dir, flags) && nfs_check_verifier(dir, dentry)) {
-               if (nfs_lookup_verify_inode(inode, flags))
+       if (!nfs_is_exclusive_create(dir, flags) &&
+           nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
+
+               if (nfs_lookup_verify_inode(inode, flags)) {
+                       if (flags & LOOKUP_RCU)
+                               return -ECHILD;
                        goto out_zap_parent;
+               }
                goto out_valid;
        }
 
+       if (flags & LOOKUP_RCU)
+               return -ECHILD;
+
        if (NFS_STALE(inode))
                goto out_bad;
 
@@ -1153,13 +1183,18 @@ out_set_verifier:
        /* Success: notify readdir to use READDIRPLUS */
        nfs_advise_use_readdirplus(dir);
  out_valid_noent:
-       dput(parent);
+       if (flags & LOOKUP_RCU) {
+               if (parent != ACCESS_ONCE(dentry->d_parent))
+                       return -ECHILD;
+       } else
+               dput(parent);
        dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
                        __func__, dentry);
        return 1;
 out_zap_parent:
        nfs_zap_caches(dir);
  out_bad:
+       WARN_ON(flags & LOOKUP_RCU);
        nfs_free_fattr(fattr);
        nfs_free_fhandle(fhandle);
        nfs4_label_free(label);
@@ -1185,6 +1220,7 @@ out_zap_parent:
                        __func__, dentry);
        return 0;
 out_error:
+       WARN_ON(flags & LOOKUP_RCU);
        nfs_free_fattr(fattr);
        nfs_free_fhandle(fhandle);
        nfs4_label_free(label);
@@ -1529,14 +1565,9 @@ EXPORT_SYMBOL_GPL(nfs_atomic_open);
 
 static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
 {
-       struct dentry *parent = NULL;
        struct inode *inode;
-       struct inode *dir;
        int ret = 0;
 
-       if (flags & LOOKUP_RCU)
-               return -ECHILD;
-
        if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
                goto no_open;
        if (d_mountpoint(dentry))
@@ -1545,34 +1576,47 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
                goto no_open;
 
        inode = dentry->d_inode;
-       parent = dget_parent(dentry);
-       dir = parent->d_inode;
 
        /* We can't create new files in nfs_open_revalidate(), so we
         * optimize away revalidation of negative dentries.
         */
        if (inode == NULL) {
+               struct dentry *parent;
+               struct inode *dir;
+
+               if (flags & LOOKUP_RCU) {
+                       parent = ACCESS_ONCE(dentry->d_parent);
+                       dir = ACCESS_ONCE(parent->d_inode);
+                       if (!dir)
+                               return -ECHILD;
+               } else {
+                       parent = dget_parent(dentry);
+                       dir = parent->d_inode;
+               }
                if (!nfs_neg_need_reval(dir, dentry, flags))
                        ret = 1;
+               else if (flags & LOOKUP_RCU)
+                       ret = -ECHILD;
+               if (!(flags & LOOKUP_RCU))
+                       dput(parent);
+               else if (parent != ACCESS_ONCE(dentry->d_parent))
+                       return -ECHILD;
                goto out;
        }
 
        /* NFS only supports OPEN on regular files */
        if (!S_ISREG(inode->i_mode))
-               goto no_open_dput;
+               goto no_open;
        /* We cannot do exclusive creation on a positive dentry */
        if (flags & LOOKUP_EXCL)
-               goto no_open_dput;
+               goto no_open;
 
        /* Let f_op->open() actually open (and revalidate) the file */
        ret = 1;
 
 out:
-       dput(parent);
        return ret;
 
-no_open_dput:
-       dput(parent);
 no_open:
        return nfs_lookup_revalidate(dentry, flags);
 }
@@ -2028,10 +2072,14 @@ static DEFINE_SPINLOCK(nfs_access_lru_lock);
 static LIST_HEAD(nfs_access_lru_list);
 static atomic_long_t nfs_access_nr_entries;
 
+static unsigned long nfs_access_max_cachesize = ULONG_MAX;
+module_param(nfs_access_max_cachesize, ulong, 0644);
+MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache length");
+
 static void nfs_access_free_entry(struct nfs_access_entry *entry)
 {
        put_rpccred(entry->cred);
-       kfree(entry);
+       kfree_rcu(entry, rcu_head);
        smp_mb__before_atomic();
        atomic_long_dec(&nfs_access_nr_entries);
        smp_mb__after_atomic();
@@ -2048,19 +2096,14 @@ static void nfs_access_free_list(struct list_head *head)
        }
 }
 
-unsigned long
-nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
+static unsigned long
+nfs_do_access_cache_scan(unsigned int nr_to_scan)
 {
        LIST_HEAD(head);
        struct nfs_inode *nfsi, *next;
        struct nfs_access_entry *cache;
-       int nr_to_scan = sc->nr_to_scan;
-       gfp_t gfp_mask = sc->gfp_mask;
        long freed = 0;
 
-       if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
-               return SHRINK_STOP;
-
        spin_lock(&nfs_access_lru_lock);
        list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) {
                struct inode *inode;
@@ -2093,12 +2136,40 @@ remove_lru_entry:
        return freed;
 }
 
+unsigned long
+nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+       int nr_to_scan = sc->nr_to_scan;
+       gfp_t gfp_mask = sc->gfp_mask;
+
+       if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
+               return SHRINK_STOP;
+       return nfs_do_access_cache_scan(nr_to_scan);
+}
+
+
 unsigned long
 nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc)
 {
        return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries));
 }
 
+static void
+nfs_access_cache_enforce_limit(void)
+{
+       long nr_entries = atomic_long_read(&nfs_access_nr_entries);
+       unsigned long diff;
+       unsigned int nr_to_scan;
+
+       if (nr_entries < 0 || nr_entries <= nfs_access_max_cachesize)
+               return;
+       nr_to_scan = 100;
+       diff = nr_entries - nfs_access_max_cachesize;
+       if (diff < nr_to_scan)
+               nr_to_scan = diff;
+       nfs_do_access_cache_scan(nr_to_scan);
+}
+
 static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head)
 {
        struct rb_root *root_node = &nfsi->access_cache;
@@ -2186,6 +2257,38 @@ out_zap:
        return -ENOENT;
 }
 
+static int nfs_access_get_cached_rcu(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
+{
+       /* Only check the most recently returned cache entry,
+        * but do it without locking.
+        */
+       struct nfs_inode *nfsi = NFS_I(inode);
+       struct nfs_access_entry *cache;
+       int err = -ECHILD;
+       struct list_head *lh;
+
+       rcu_read_lock();
+       if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
+               goto out;
+       lh = rcu_dereference(nfsi->access_cache_entry_lru.prev);
+       cache = list_entry(lh, struct nfs_access_entry, lru);
+       if (lh == &nfsi->access_cache_entry_lru ||
+           cred != cache->cred)
+               cache = NULL;
+       if (cache == NULL)
+               goto out;
+       if (!nfs_have_delegated_attributes(inode) &&
+           !time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo))
+               goto out;
+       res->jiffies = cache->jiffies;
+       res->cred = cache->cred;
+       res->mask = cache->mask;
+       err = 0;
+out:
+       rcu_read_unlock();
+       return err;
+}
+
 static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
 {
        struct nfs_inode *nfsi = NFS_I(inode);
@@ -2229,6 +2332,11 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
        cache->cred = get_rpccred(set->cred);
        cache->mask = set->mask;
 
+       /* The above field assignments must be visible
+        * before this item appears on the lru.  We cannot easily
+        * use rcu_assign_pointer, so just force the memory barrier.
+        */
+       smp_wmb();
        nfs_access_add_rbtree(inode, cache);
 
        /* Update accounting */
@@ -2244,6 +2352,7 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
                                        &nfs_access_lru_list);
                spin_unlock(&nfs_access_lru_lock);
        }
+       nfs_access_cache_enforce_limit();
 }
 EXPORT_SYMBOL_GPL(nfs_access_add_cache);
 
@@ -2267,10 +2376,16 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
 
        trace_nfs_access_enter(inode);
 
-       status = nfs_access_get_cached(inode, cred, &cache);
+       status = nfs_access_get_cached_rcu(inode, cred, &cache);
+       if (status != 0)
+               status = nfs_access_get_cached(inode, cred, &cache);
        if (status == 0)
                goto out_cached;
 
+       status = -ECHILD;
+       if (mask & MAY_NOT_BLOCK)
+               goto out;
+
        /* Be clever: ask server to check for all possible rights */
        cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ;
        cache.cred = cred;
@@ -2321,9 +2436,6 @@ int nfs_permission(struct inode *inode, int mask)
        struct rpc_cred *cred;
        int res = 0;
 
-       if (mask & MAY_NOT_BLOCK)
-               return -ECHILD;
-
        nfs_inc_stats(inode, NFSIOS_VFSACCESS);
 
        if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
@@ -2350,12 +2462,23 @@ force_lookup:
        if (!NFS_PROTO(inode)->access)
                goto out_notsup;
 
-       cred = rpc_lookup_cred();
-       if (!IS_ERR(cred)) {
-               res = nfs_do_access(inode, cred, mask);
-               put_rpccred(cred);
-       } else
+       /* Always try fast lookups first */
+       rcu_read_lock();
+       cred = rpc_lookup_cred_nonblock();
+       if (!IS_ERR(cred))
+               res = nfs_do_access(inode, cred, mask|MAY_NOT_BLOCK);
+       else
                res = PTR_ERR(cred);
+       rcu_read_unlock();
+       if (res == -ECHILD && !(mask & MAY_NOT_BLOCK)) {
+               /* Fast lookup failed, try the slow way */
+               cred = rpc_lookup_cred();
+               if (!IS_ERR(cred)) {
+                       res = nfs_do_access(inode, cred, mask);
+                       put_rpccred(cred);
+               } else
+                       res = PTR_ERR(cred);
+       }
 out:
        if (!res && (mask & MAY_EXEC) && !execute_ok(inode))
                res = -EACCES;
@@ -2364,6 +2487,9 @@ out:
                inode->i_sb->s_id, inode->i_ino, mask, res);
        return res;
 out_notsup:
+       if (mask & MAY_NOT_BLOCK)
+               return -ECHILD;
+
        res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
        if (res == 0)
                res = generic_permission(inode, mask);