]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Merge tag 'nfsd-4.11' of git://linux-nfs.org/~bfields/linux
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 28 Feb 2017 23:39:09 +0000 (15:39 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 28 Feb 2017 23:39:09 +0000 (15:39 -0800)
Pull nfsd updates from Bruce Fields:
 "The nfsd update this round is mainly a lot of miscellaneous cleanups
  and bugfixes.

  A couple changes could theoretically break working setups on upgrade.
  I don't expect complaints in practice, but they seem worth calling out
  just in case:

   - NFS security labels are now off by default; a new security_label
     export flag reenables it per export. But, having them on by default
     is a disaster, as it generally only makes sense if all your clients
     and servers have similar enough selinux policies. Thanks to Jason
     Tibbitts for pointing this out.

   - NFSv4/UDP support is off. It was never really supported, and the
     spec explicitly forbids it. We only ever left it on out of
     laziness; thanks to Jeff Layton for finally fixing that"

* tag 'nfsd-4.11' of git://linux-nfs.org/~bfields/linux: (34 commits)
  nfsd: Fix display of the version string
  nfsd: fix configuration of supported minor versions
  sunrpc: don't register UDP port with rpcbind when version needs congestion control
  nfs/nfsd/sunrpc: enforce transport requirements for NFSv4
  sunrpc: flag transports as having congestion control
  sunrpc: turn bitfield flags in svc_version into bools
  nfsd: remove superfluous KERN_INFO
  nfsd: special case truncates some more
  nfsd: minor nfsd_setattr cleanup
  NFSD: Reserve adequate space for LOCKT operation
  NFSD: Get response size before operation for all RPCs
  nfsd/callback: Drop a useless data copy when comparing sessionid
  nfsd/callback: skip the callback tag
  nfsd/callback: Cleanup callback cred on shutdown
  nfsd/idmap: return nfserr_inval for 0-length names
  SUNRPC/Cache: Always treat the invalid cache as unexpired
  SUNRPC: Drop all entries from cache_detail when cache_purge()
  svcrdma: Poll CQs in "workqueue" mode
  svcrdma: Combine list fields in struct svc_rdma_op_ctxt
  svcrdma: Remove unused sc_dto_q field
  ...

33 files changed:
fs/lockd/svc.c
fs/nfs/callback_xdr.c
fs/nfsd/export.c
fs/nfsd/nfs2acl.c
fs/nfsd/nfs3acl.c
fs/nfsd/nfs3proc.c
fs/nfsd/nfs4callback.c
fs/nfsd/nfs4idmap.c
fs/nfsd/nfs4proc.c
fs/nfsd/nfs4state.c
fs/nfsd/nfs4xdr.c
fs/nfsd/nfsctl.c
fs/nfsd/nfsd.h
fs/nfsd/nfsproc.c
fs/nfsd/nfssvc.c
fs/nfsd/state.h
fs/nfsd/vfs.c
fs/nfsd/vfs.h
include/linux/sunrpc/cache.h
include/linux/sunrpc/rpc_rdma.h
include/linux/sunrpc/svc.h
include/linux/sunrpc/svc_rdma.h
include/linux/sunrpc/svc_xprt.h
include/uapi/linux/nfsd/export.h
net/sunrpc/auth_gss/svcauth_gss.c
net/sunrpc/cache.c
net/sunrpc/svc.c
net/sunrpc/svcsock.c
net/sunrpc/xprtrdma/svc_rdma_backchannel.c
net/sunrpc/xprtrdma/svc_rdma_marshal.c
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
net/sunrpc/xprtrdma/svc_rdma_sendto.c
net/sunrpc/xprtrdma/svc_rdma_transport.c

index 1c13dd80744ff99cc0691476c3a2920eca9757cc..7e4ea3b9f4724f2b62f2aa7fe5d89844d07812cd 100644 (file)
@@ -322,6 +322,8 @@ static int lockd_inet6addr_event(struct notifier_block *this,
                dprintk("lockd_inet6addr_event: removed %pI6\n", &ifa->addr);
                sin6.sin6_family = AF_INET6;
                sin6.sin6_addr = ifa->addr;
+               if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
+                       sin6.sin6_scope_id = ifa->idev->dev->ifindex;
                svc_age_temp_xprts_now(nlmsvc_rqst->rq_server,
                        (struct sockaddr *)&sin6);
        }
index eb094c6011d85bb7ce7bd544a3d203defd50b03a..fd0284c1dc328b92520aa0c39b4ca2a4b9899915 100644 (file)
@@ -1083,7 +1083,8 @@ struct svc_version nfs4_callback_version1 = {
        .vs_proc = nfs4_callback_procedures1,
        .vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
        .vs_dispatch = NULL,
-       .vs_hidden = 1,
+       .vs_hidden = true,
+       .vs_need_cong_ctrl = true,
 };
 
 struct svc_version nfs4_callback_version4 = {
@@ -1092,5 +1093,6 @@ struct svc_version nfs4_callback_version4 = {
        .vs_proc = nfs4_callback_procedures1,
        .vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
        .vs_dispatch = NULL,
-       .vs_hidden = 1,
+       .vs_hidden = true,
+       .vs_need_cong_ctrl = true,
 };
index 43e109cc0ccc39e8293a7c8926bcb1c105951714..e71f11b1a180c4c0ff0d3ea30d21b568e3c11511 100644 (file)
@@ -1102,6 +1102,7 @@ static struct flags {
        { NFSEXP_NOAUTHNLM, {"insecure_locks", ""}},
        { NFSEXP_V4ROOT, {"v4root", ""}},
        { NFSEXP_PNFS, {"pnfs", ""}},
+       { NFSEXP_SECURITY_LABEL, {"security_label", ""}},
        { 0, {"", ""}}
 };
 
index d08cd88155c75278c4607f49c078622bf87ab5ee..838f90f3f890a00f0f0989e5c3abb79e20b273d0 100644 (file)
@@ -376,5 +376,4 @@ struct svc_version  nfsd_acl_version2 = {
                .vs_proc        = nfsd_acl_procedures2,
                .vs_dispatch    = nfsd_dispatch,
                .vs_xdrsize     = NFS3_SVC_XDRSIZE,
-               .vs_hidden      = 0,
 };
index 0c890347cde3d9559b0b0103c2c2c11825d51fae..dcb5f79076c0cb3cb12400575cb7e3d3cfa9e26d 100644 (file)
@@ -266,6 +266,5 @@ struct svc_version  nfsd_acl_version3 = {
                .vs_proc        = nfsd_acl_procedures3,
                .vs_dispatch    = nfsd_dispatch,
                .vs_xdrsize     = NFS3_SVC_XDRSIZE,
-               .vs_hidden      = 0,
 };
 
index d818e4ffd79f9acd01c5f08384cd99a1bfad7243..045c9081eabeb0242a0f60d49ec9177dc9c0c6f4 100644 (file)
@@ -193,11 +193,9 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
 
        fh_copy(&resp->fh, &argp->fh);
        resp->committed = argp->stable;
-       nfserr = nfsd_write(rqstp, &resp->fh, NULL,
-                                  argp->offset,
-                                  rqstp->rq_vec, argp->vlen,
-                                  &cnt,
-                                  &resp->committed);
+       nfserr = nfsd_write(rqstp, &resp->fh, argp->offset,
+                               rqstp->rq_vec, argp->vlen,
+                               &cnt, resp->committed);
        resp->count = cnt;
        RETURN_STATUS(nfserr);
 }
index eb78109d666c1a4d8cc62fea22919f824ae93024..0274db6e65d0d6775d0b6c9c9e72e2f0c6a5fa57 100644 (file)
@@ -303,6 +303,7 @@ static int decode_cb_compound4res(struct xdr_stream *xdr,
        p = xdr_inline_decode(xdr, length + 4);
        if (unlikely(p == NULL))
                goto out_overflow;
+       p += XDR_QUADLEN(length);
        hdr->nops = be32_to_cpup(p);
        return 0;
 out_overflow:
@@ -396,13 +397,10 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
                                    struct nfsd4_callback *cb)
 {
        struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
-       struct nfs4_sessionid id;
-       int status;
+       int status = -ESERVERFAULT;
        __be32 *p;
        u32 dummy;
 
-       status = -ESERVERFAULT;
-
        /*
         * If the server returns different values for sessionID, slotID or
         * sequence number, the server is looney tunes.
@@ -410,9 +408,8 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
        p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4 + 4 + 4);
        if (unlikely(p == NULL))
                goto out_overflow;
-       memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
-       if (memcmp(id.data, session->se_sessionid.data,
-                                       NFS4_MAX_SESSIONID_LEN) != 0) {
+
+       if (memcmp(p, session->se_sessionid.data, NFS4_MAX_SESSIONID_LEN)) {
                dprintk("NFS: %s Invalid session id\n", __func__);
                goto out;
        }
@@ -753,6 +750,14 @@ int set_callback_cred(void)
        return 0;
 }
 
+void cleanup_callback_cred(void)
+{
+       if (callback_cred) {
+               put_rpccred(callback_cred);
+               callback_cred = NULL;
+       }
+}
+
 static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses)
 {
        if (clp->cl_minorversion == 0) {
index 5b20577dcdd233162d8030003758274d7619d038..6b9b6cca469f427fed55ec5d892141e38be23eb4 100644 (file)
@@ -628,6 +628,10 @@ nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen,
 {
        __be32 status;
        u32 id = -1;
+
+       if (name == NULL || namelen == 0)
+               return nfserr_inval;
+
        status = do_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, &id);
        *uid = make_kuid(&init_user_ns, id);
        if (!uid_valid(*uid))
@@ -641,6 +645,10 @@ nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen,
 {
        __be32 status;
        u32 id = -1;
+
+       if (name == NULL || namelen == 0)
+               return nfserr_inval;
+
        status = do_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, &id);
        *gid = make_kgid(&init_user_ns, id);
        if (!gid_valid(*gid))
index 74a6e573e061afa73fba49d8c65a09b7d470229d..cbeeda1e94a2fbbba61e2adeeb4f9ba89287eaf9 100644 (file)
@@ -95,11 +95,15 @@ check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                   u32 *bmval, u32 *writable)
 {
        struct dentry *dentry = cstate->current_fh.fh_dentry;
+       struct svc_export *exp = cstate->current_fh.fh_export;
 
        if (!nfsd_attrs_supported(cstate->minorversion, bmval))
                return nfserr_attrnotsupp;
        if ((bmval[0] & FATTR4_WORD0_ACL) && !IS_POSIXACL(d_inode(dentry)))
                return nfserr_attrnotsupp;
+       if ((bmval[2] & FATTR4_WORD2_SECURITY_LABEL) &&
+                       !(exp->ex_flags & NFSEXP_SECURITY_LABEL))
+               return nfserr_attrnotsupp;
        if (writable && !bmval_is_subset(bmval, writable))
                return nfserr_inval;
        if (writable && (bmval[2] & FATTR4_WORD2_MODE_UMASK) &&
@@ -983,7 +987,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
        status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp,
                                write->wr_offset, rqstp->rq_vec, nvecs, &cnt,
-                               &write->wr_how_written);
+                               write->wr_how_written);
        fput(filp);
 
        write->wr_bytes_written = cnt;
@@ -1838,6 +1842,12 @@ static inline u32 nfsd4_status_stateid_rsize(struct svc_rqst *rqstp, struct nfsd
        return (op_encode_hdr_size + op_encode_stateid_maxsz)* sizeof(__be32);
 }
 
+static inline u32 nfsd4_access_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+       /* ac_supported, ac_resp_access */
+       return (op_encode_hdr_size + 2)* sizeof(__be32);
+}
+
 static inline u32 nfsd4_commit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
        return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32);
@@ -1892,6 +1902,11 @@ static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp,
        return ret;
 }
 
+static inline u32 nfsd4_getfh_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+       return (op_encode_hdr_size + 1) * sizeof(__be32) + NFS4_FHSIZE;
+}
+
 static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
        return (op_encode_hdr_size + op_encode_change_info_maxsz)
@@ -1933,6 +1948,11 @@ static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *o
                XDR_QUADLEN(rlen)) * sizeof(__be32);
 }
 
+static inline u32 nfsd4_readlink_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+       return (op_encode_hdr_size + 1) * sizeof(__be32) + PAGE_SIZE;
+}
+
 static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
        return (op_encode_hdr_size + op_encode_change_info_maxsz)
@@ -1952,11 +1972,23 @@ static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp,
                + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) * sizeof(__be32);
 }
 
+static inline u32 nfsd4_test_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+       return (op_encode_hdr_size + 1 + op->u.test_stateid.ts_num_ids)
+               * sizeof(__be32);
+}
+
 static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
        return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32);
 }
 
+static inline u32 nfsd4_secinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+       return (op_encode_hdr_size + RPC_AUTH_MAXFLAVOR *
+               (4 + XDR_QUADLEN(GSS_OID_MAX_LEN))) * sizeof(__be32);
+}
+
 static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
        return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) *
@@ -2011,6 +2043,19 @@ static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 }
 
 #ifdef CONFIG_NFSD_PNFS
+static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+       u32 maxcount = 0, rlen = 0;
+
+       maxcount = svc_max_payload(rqstp);
+       rlen = min(op->u.getdeviceinfo.gd_maxcount, maxcount);
+
+       return (op_encode_hdr_size +
+               1 /* gd_layout_type*/ +
+               XDR_QUADLEN(rlen) +
+               2 /* gd_notify_types */) * sizeof(__be32);
+}
+
 /*
  * At this stage we don't really know what layout driver will handle the request,
  * so we need to define an arbitrary upper bound here.
@@ -2040,10 +2085,17 @@ static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_
 }
 #endif /* CONFIG_NFSD_PNFS */
 
+
+static inline u32 nfsd4_seek_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+       return (op_encode_hdr_size + 3) * sizeof(__be32);
+}
+
 static struct nfsd4_operation nfsd4_ops[] = {
        [OP_ACCESS] = {
                .op_func = (nfsd4op_func)nfsd4_access,
                .op_name = "OP_ACCESS",
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_access_rsize,
        },
        [OP_CLOSE] = {
                .op_func = (nfsd4op_func)nfsd4_close,
@@ -2081,6 +2133,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
        [OP_GETFH] = {
                .op_func = (nfsd4op_func)nfsd4_getfh,
                .op_name = "OP_GETFH",
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_getfh_rsize,
        },
        [OP_LINK] = {
                .op_func = (nfsd4op_func)nfsd4_link,
@@ -2099,6 +2152,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
        [OP_LOCKT] = {
                .op_func = (nfsd4op_func)nfsd4_lockt,
                .op_name = "OP_LOCKT",
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_lock_rsize,
        },
        [OP_LOCKU] = {
                .op_func = (nfsd4op_func)nfsd4_locku,
@@ -2111,15 +2165,18 @@ static struct nfsd4_operation nfsd4_ops[] = {
                .op_func = (nfsd4op_func)nfsd4_lookup,
                .op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID,
                .op_name = "OP_LOOKUP",
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
        },
        [OP_LOOKUPP] = {
                .op_func = (nfsd4op_func)nfsd4_lookupp,
                .op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID,
                .op_name = "OP_LOOKUPP",
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
        },
        [OP_NVERIFY] = {
                .op_func = (nfsd4op_func)nfsd4_nverify,
                .op_name = "OP_NVERIFY",
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
        },
        [OP_OPEN] = {
                .op_func = (nfsd4op_func)nfsd4_open,
@@ -2177,6 +2234,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
        [OP_READLINK] = {
                .op_func = (nfsd4op_func)nfsd4_readlink,
                .op_name = "OP_READLINK",
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_readlink_rsize,
        },
        [OP_REMOVE] = {
                .op_func = (nfsd4op_func)nfsd4_remove,
@@ -2215,6 +2273,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
                .op_func = (nfsd4op_func)nfsd4_secinfo,
                .op_flags = OP_HANDLES_WRONGSEC,
                .op_name = "OP_SECINFO",
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_secinfo_rsize,
        },
        [OP_SETATTR] = {
                .op_func = (nfsd4op_func)nfsd4_setattr,
@@ -2240,6 +2299,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
        [OP_VERIFY] = {
                .op_func = (nfsd4op_func)nfsd4_verify,
                .op_name = "OP_VERIFY",
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
        },
        [OP_WRITE] = {
                .op_func = (nfsd4op_func)nfsd4_write,
@@ -2314,11 +2374,13 @@ static struct nfsd4_operation nfsd4_ops[] = {
                .op_func = (nfsd4op_func)nfsd4_secinfo_no_name,
                .op_flags = OP_HANDLES_WRONGSEC,
                .op_name = "OP_SECINFO_NO_NAME",
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_secinfo_rsize,
        },
        [OP_TEST_STATEID] = {
                .op_func = (nfsd4op_func)nfsd4_test_stateid,
                .op_flags = ALLOWED_WITHOUT_FH,
                .op_name = "OP_TEST_STATEID",
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_test_stateid_rsize,
        },
        [OP_FREE_STATEID] = {
                .op_func = (nfsd4op_func)nfsd4_free_stateid,
@@ -2332,6 +2394,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
                .op_func = (nfsd4op_func)nfsd4_getdeviceinfo,
                .op_flags = ALLOWED_WITHOUT_FH,
                .op_name = "OP_GETDEVICEINFO",
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_getdeviceinfo_rsize,
        },
        [OP_LAYOUTGET] = {
                .op_func = (nfsd4op_func)nfsd4_layoutget,
@@ -2381,6 +2444,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
        [OP_SEEK] = {
                .op_func = (nfsd4op_func)nfsd4_seek,
                .op_name = "OP_SEEK",
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_seek_rsize,
        },
 };
 
@@ -2425,14 +2489,11 @@ bool nfsd4_spo_must_allow(struct svc_rqst *rqstp)
 
 int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
-       struct nfsd4_operation *opdesc;
-       nfsd4op_rsize estimator;
-
        if (op->opnum == OP_ILLEGAL)
                return op_encode_hdr_size * sizeof(__be32);
-       opdesc = OPDESC(op);
-       estimator = opdesc->op_rsize_bop;
-       return estimator ? estimator(rqstp, op) : PAGE_SIZE;
+
+       BUG_ON(OPDESC(op)->op_rsize_bop == NULL);
+       return OPDESC(op)->op_rsize_bop(rqstp, op);
 }
 
 void warn_on_nonidempotent_op(struct nfsd4_op *op)
@@ -2476,12 +2537,13 @@ static struct svc_procedure             nfsd_procedures4[2] = {
 };
 
 struct svc_version     nfsd_version4 = {
-               .vs_vers        = 4,
-               .vs_nproc       = 2,
-               .vs_proc        = nfsd_procedures4,
-               .vs_dispatch    = nfsd_dispatch,
-               .vs_xdrsize     = NFS4_SVC_XDRSIZE,
-               .vs_rpcb_optnl  = 1,
+       .vs_vers                = 4,
+       .vs_nproc               = 2,
+       .vs_proc                = nfsd_procedures4,
+       .vs_dispatch            = nfsd_dispatch,
+       .vs_xdrsize             = NFS4_SVC_XDRSIZE,
+       .vs_rpcb_optnl          = true,
+       .vs_need_cong_ctrl      = true,
 };
 
 /*
index a0dee8ae9f97f16a18e40ba19f8e84a45ad1a02b..e9ef50addddb4489534bc07f138cd8c321d9193d 100644 (file)
@@ -2281,7 +2281,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r
 out_err:
        conn->cb_addr.ss_family = AF_UNSPEC;
        conn->cb_addrlen = 0;
-       dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
+       dprintk("NFSD: this client (clientid %08x/%08x) "
                "will not receive delegations\n",
                clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
 
@@ -7012,23 +7012,24 @@ nfs4_state_start(void)
 
        ret = set_callback_cred();
        if (ret)
-               return -ENOMEM;
+               return ret;
+
        laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4");
        if (laundry_wq == NULL) {
                ret = -ENOMEM;
-               goto out_recovery;
+               goto out_cleanup_cred;
        }
        ret = nfsd4_create_callback_queue();
        if (ret)
                goto out_free_laundry;
 
        set_max_delegations();
-
        return 0;
 
 out_free_laundry:
        destroy_workqueue(laundry_wq);
-out_recovery:
+out_cleanup_cred:
+       cleanup_callback_cred();
        return ret;
 }
 
@@ -7086,6 +7087,7 @@ nfs4_state_shutdown(void)
 {
        destroy_workqueue(laundry_wq);
        nfsd4_destroy_callback_queue();
+       cleanup_callback_cred();
 }
 
 static void
index 8fae53ce21d16c8406ff01425d924eb044edee34..382c1fd05b4c8dfe2973d466bae01d6963eb7c43 100644 (file)
@@ -58,7 +58,7 @@
 
 #define NFSDDBG_FACILITY               NFSDDBG_XDR
 
-u32 nfsd_suppattrs[3][3] = {
+const u32 nfsd_suppattrs[3][3] = {
        {NFSD4_SUPPORTED_ATTRS_WORD0,
         NFSD4_SUPPORTED_ATTRS_WORD1,
         NFSD4_SUPPORTED_ATTRS_WORD2},
@@ -1250,7 +1250,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
        READ_BUF(16);
        p = xdr_decode_hyper(p, &write->wr_offset);
        write->wr_stable_how = be32_to_cpup(p++);
-       if (write->wr_stable_how > 2)
+       if (write->wr_stable_how > NFS_FILE_SYNC)
                goto xdr_error;
        write->wr_buflen = be32_to_cpup(p++);
 
@@ -1941,12 +1941,12 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
                } else
                        max_reply += nfsd4_max_reply(argp->rqstp, op);
                /*
-                * OP_LOCK may return a conflicting lock.  (Special case
-                * because it will just skip encoding this if it runs
-                * out of xdr buffer space, and it is the only operation
-                * that behaves this way.)
+                * OP_LOCK and OP_LOCKT may return a conflicting lock.
+                * (Special case because it will just skip encoding this
+                * if it runs out of xdr buffer space, and it is the only
+                * operation that behaves this way.)
                 */
-               if (op->opnum == OP_LOCK)
+               if (op->opnum == OP_LOCK || op->opnum == OP_LOCKT)
                        max_reply += NFS4_OPAQUE_LIMIT;
 
                if (op->status) {
@@ -1966,9 +1966,13 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
        DECODE_TAIL;
 }
 
-static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode)
+static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
+                            struct svc_export *exp)
 {
-       if (IS_I_VERSION(inode)) {
+       if (exp->ex_flags & NFSEXP_V4ROOT) {
+               *p++ = cpu_to_be32(convert_to_wallclock(exp->cd->flush_time));
+               *p++ = 0;
+       } else if (IS_I_VERSION(inode)) {
                p = xdr_encode_hyper(p, inode->i_version);
        } else {
                *p++ = cpu_to_be32(stat->ctime.tv_sec);
@@ -2417,8 +2421,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
        if ((bmval2 & FATTR4_WORD2_SECURITY_LABEL) ||
             bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
-               err = security_inode_getsecctx(d_inode(dentry),
+               if (exp->ex_flags & NFSEXP_SECURITY_LABEL)
+                       err = security_inode_getsecctx(d_inode(dentry),
                                                &context, &contextlen);
+               else
+                       err = -EOPNOTSUPP;
                contextsupport = (err == 0);
                if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
                        if (err == -EOPNOTSUPP)
@@ -2490,7 +2497,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
                p = xdr_reserve_space(xdr, 8);
                if (!p)
                        goto out_resource;
-               p = encode_change(p, &stat, d_inode(dentry));
+               p = encode_change(p, &stat, d_inode(dentry), exp);
        }
        if (bmval0 & FATTR4_WORD0_SIZE) {
                p = xdr_reserve_space(xdr, 8);
index f3b2f34b10a3f19cd018c9fd8d176dfae44ea70b..73e75ac905258c17bdc107c0c071e8d14df739f0 100644 (file)
@@ -536,6 +536,19 @@ out_free:
        return rv;
 }
 
+static ssize_t
+nfsd_print_version_support(char *buf, int remaining, const char *sep,
+               unsigned vers, unsigned minor)
+{
+       const char *format = (minor == 0) ? "%s%c%u" : "%s%c%u.%u";
+       bool supported = !!nfsd_vers(vers, NFSD_TEST);
+
+       if (vers == 4 && !nfsd_minorversion(minor, NFSD_TEST))
+               supported = false;
+       return snprintf(buf, remaining, format, sep,
+                       supported ? '+' : '-', vers, minor);
+}
+
 static ssize_t __write_versions(struct file *file, char *buf, size_t size)
 {
        char *mesg = buf;
@@ -561,6 +574,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
                len = qword_get(&mesg, vers, size);
                if (len <= 0) return -EINVAL;
                do {
+                       enum vers_op cmd;
                        sign = *vers;
                        if (sign == '+' || sign == '-')
                                num = simple_strtol((vers+1), &minorp, 0);
@@ -569,24 +583,22 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
                        if (*minorp == '.') {
                                if (num != 4)
                                        return -EINVAL;
-                               minor = simple_strtoul(minorp+1, NULL, 0);
-                               if (minor == 0)
-                                       return -EINVAL;
-                               if (nfsd_minorversion(minor, sign == '-' ?
-                                                    NFSD_CLEAR : NFSD_SET) < 0)
+                               if (kstrtouint(minorp+1, 0, &minor) < 0)
                                        return -EINVAL;
-                               goto next;
-                       }
+                       } else
+                               minor = 0;
+                       cmd = sign == '-' ? NFSD_CLEAR : NFSD_SET;
                        switch(num) {
                        case 2:
                        case 3:
-                       case 4:
-                               nfsd_vers(num, sign == '-' ? NFSD_CLEAR : NFSD_SET);
+                               nfsd_vers(num, cmd);
                                break;
+                       case 4:
+                               if (nfsd_minorversion(minor, cmd) >= 0)
+                                       break;
                        default:
                                return -EINVAL;
                        }
-               next:
                        vers += len + 1;
                } while ((len = qword_get(&mesg, vers, size)) > 0);
                /* If all get turned off, turn them back on, as
@@ -599,35 +611,23 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
        len = 0;
        sep = "";
        remaining = SIMPLE_TRANSACTION_LIMIT;
-       for (num=2 ; num <= 4 ; num++)
-               if (nfsd_vers(num, NFSD_AVAIL)) {
-                       len = snprintf(buf, remaining, "%s%c%d", sep,
-                                      nfsd_vers(num, NFSD_TEST)?'+':'-',
-                                      num);
-                       sep = " ";
-
-                       if (len >= remaining)
-                               break;
-                       remaining -= len;
-                       buf += len;
-                       tlen += len;
-               }
-       if (nfsd_vers(4, NFSD_AVAIL))
-               for (minor = 1; minor <= NFSD_SUPPORTED_MINOR_VERSION;
-                    minor++) {
-                       len = snprintf(buf, remaining, " %c4.%u",
-                                       (nfsd_vers(4, NFSD_TEST) &&
-                                        nfsd_minorversion(minor, NFSD_TEST)) ?
-                                               '+' : '-',
-                                       minor);
-
+       for (num=2 ; num <= 4 ; num++) {
+               if (!nfsd_vers(num, NFSD_AVAIL))
+                       continue;
+               minor = 0;
+               do {
+                       len = nfsd_print_version_support(buf, remaining,
+                                       sep, num, minor);
                        if (len >= remaining)
-                               break;
+                               goto out;
                        remaining -= len;
                        buf += len;
                        tlen += len;
-               }
-
+                       minor++;
+                       sep = " ";
+               } while (num == 4 && minor <= NFSD_SUPPORTED_MINOR_VERSION);
+       }
+out:
        len = snprintf(buf, remaining, "\n");
        if (len >= remaining)
                return -EINVAL;
index d74c8c44dc3536ffdd6a93b0cd340121233e5a95..d96606801d47ae6ee9927a2991263780c00840d7 100644 (file)
@@ -362,16 +362,16 @@ void              nfsd_lockd_shutdown(void);
        FATTR4_WORD2_MODE_UMASK | \
        NFSD4_2_SECURITY_ATTRS)
 
-extern u32 nfsd_suppattrs[3][3];
+extern const u32 nfsd_suppattrs[3][3];
 
-static inline bool bmval_is_subset(u32 *bm1, u32 *bm2)
+static inline bool bmval_is_subset(const u32 *bm1, const u32 *bm2)
 {
        return !((bm1[0] & ~bm2[0]) ||
                 (bm1[1] & ~bm2[1]) ||
                 (bm1[2] & ~bm2[2]));
 }
 
-static inline bool nfsd_attrs_supported(u32 minorversion, u32 *bmval)
+static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval)
 {
        return bmval_is_subset(bmval, nfsd_suppattrs[minorversion]);
 }
index 010aff5c5a79f2e91eaefaa671f77cafdf4c1cb5..fa82b7707e8531f9b7e8065391c3f54387c2740d 100644 (file)
@@ -204,18 +204,14 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
                                        struct nfsd_attrstat  *resp)
 {
        __be32  nfserr;
-       int     stable = 1;
        unsigned long cnt = argp->len;
 
        dprintk("nfsd: WRITE    %s %d bytes at %d\n",
                SVCFH_fmt(&argp->fh),
                argp->len, argp->offset);
 
-       nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL,
-                                  argp->offset,
-                                  rqstp->rq_vec, argp->vlen,
-                                  &cnt,
-                                  &stable);
+       nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), argp->offset,
+                               rqstp->rq_vec, argp->vlen, &cnt, NFS_DATA_SYNC);
        return nfsd_return_attrs(nfserr, resp);
 }
 
index e6bfd96734c006587bd1709d0df48818c2065789..efd66da992010ffe5aeb877e2e6f5ab0d850bced 100644 (file)
@@ -153,6 +153,18 @@ int nfsd_vers(int vers, enum vers_op change)
        return 0;
 }
 
+static void
+nfsd_adjust_nfsd_versions4(void)
+{
+       unsigned i;
+
+       for (i = 0; i <= NFSD_SUPPORTED_MINOR_VERSION; i++) {
+               if (nfsd_supported_minorversions[i])
+                       return;
+       }
+       nfsd_vers(4, NFSD_CLEAR);
+}
+
 int nfsd_minorversion(u32 minorversion, enum vers_op change)
 {
        if (minorversion > NFSD_SUPPORTED_MINOR_VERSION)
@@ -160,9 +172,11 @@ int nfsd_minorversion(u32 minorversion, enum vers_op change)
        switch(change) {
        case NFSD_SET:
                nfsd_supported_minorversions[minorversion] = true;
+               nfsd_vers(4, NFSD_SET);
                break;
        case NFSD_CLEAR:
                nfsd_supported_minorversions[minorversion] = false;
+               nfsd_adjust_nfsd_versions4();
                break;
        case NFSD_TEST:
                return nfsd_supported_minorversions[minorversion];
@@ -354,6 +368,8 @@ static int nfsd_inet6addr_event(struct notifier_block *this,
                dprintk("nfsd_inet6addr_event: removed %pI6\n", &ifa->addr);
                sin6.sin6_family = AF_INET6;
                sin6.sin6_addr = ifa->addr;
+               if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
+                       sin6.sin6_scope_id = ifa->idev->dev->ifindex;
                svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6);
        }
 
index 4516e8b7d776305d94fb89f86256ee3fc54dec27..005c911b34ac4553a2c02da05b4e5d975b660710 100644 (file)
@@ -615,6 +615,7 @@ extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,
 extern __be32 nfs4_check_open_reclaim(clientid_t *clid,
                struct nfsd4_compound_state *cstate, struct nfsd_net *nn);
 extern int set_callback_cred(void);
+extern void cleanup_callback_cred(void);
 extern void nfsd4_probe_callback(struct nfs4_client *clp);
 extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
 extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
index 26c6fdb4bf67cf1e3e3a843e8e816d7a76eae265..19d50f600e8d48c6f493130076606a6213de258d 100644 (file)
@@ -377,7 +377,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
        __be32          err;
        int             host_err;
        bool            get_write_count;
-       int             size_change = 0;
+       bool            size_change = (iap->ia_valid & ATTR_SIZE);
 
        if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
                accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
@@ -390,11 +390,11 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
        /* Get inode */
        err = fh_verify(rqstp, fhp, ftype, accmode);
        if (err)
-               goto out;
+               return err;
        if (get_write_count) {
                host_err = fh_want_write(fhp);
                if (host_err)
-                       return nfserrno(host_err);
+                       goto out;
        }
 
        dentry = fhp->fh_dentry;
@@ -405,20 +405,28 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
                iap->ia_valid &= ~ATTR_MODE;
 
        if (!iap->ia_valid)
-               goto out;
+               return 0;
 
        nfsd_sanitize_attrs(inode, iap);
 
+       if (check_guard && guardtime != inode->i_ctime.tv_sec)
+               return nfserr_notsync;
+
        /*
         * The size case is special, it changes the file in addition to the
-        * attributes.
+        * attributes, and file systems don't expect it to be mixed with
+        * "random" attribute changes.  We thus split out the size change
+        * into a separate call to ->setattr, and do the rest as a separate
+        * setattr call.
         */
-       if (iap->ia_valid & ATTR_SIZE) {
+       if (size_change) {
                err = nfsd_get_write_access(rqstp, fhp, iap);
                if (err)
-                       goto out;
-               size_change = 1;
+                       return err;
+       }
 
+       fh_lock(fhp);
+       if (size_change) {
                /*
                 * RFC5661, Section 18.30.4:
                 *   Changing the size of a file with SETATTR indirectly
@@ -426,29 +434,36 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
                 *
                 * (and similar for the older RFCs)
                 */
-               if (iap->ia_size != i_size_read(inode))
-                       iap->ia_valid |= ATTR_MTIME;
-       }
+               struct iattr size_attr = {
+                       .ia_valid       = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME,
+                       .ia_size        = iap->ia_size,
+               };
 
-       iap->ia_valid |= ATTR_CTIME;
+               host_err = notify_change(dentry, &size_attr, NULL);
+               if (host_err)
+                       goto out_unlock;
+               iap->ia_valid &= ~ATTR_SIZE;
 
-       if (check_guard && guardtime != inode->i_ctime.tv_sec) {
-               err = nfserr_notsync;
-               goto out_put_write_access;
+               /*
+                * Avoid the additional setattr call below if the only other
+                * attribute that the client sends is the mtime, as we update
+                * it as part of the size change above.
+                */
+               if ((iap->ia_valid & ~ATTR_MTIME) == 0)
+                       goto out_unlock;
        }
 
-       fh_lock(fhp);
+       iap->ia_valid |= ATTR_CTIME;
        host_err = notify_change(dentry, iap, NULL);
-       fh_unlock(fhp);
-       err = nfserrno(host_err);
 
-out_put_write_access:
+out_unlock:
+       fh_unlock(fhp);
        if (size_change)
                put_write_access(inode);
-       if (!err)
-               err = nfserrno(commit_metadata(fhp));
 out:
-       return err;
+       if (!host_err)
+               host_err = commit_metadata(fhp);
+       return nfserrno(host_err);
 }
 
 #if defined(CONFIG_NFSD_V4)
@@ -940,14 +955,12 @@ static int wait_for_concurrent_writes(struct file *file)
 __be32
 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
                                loff_t offset, struct kvec *vec, int vlen,
-                               unsigned long *cnt, int *stablep)
+                               unsigned long *cnt, int stable)
 {
        struct svc_export       *exp;
-       struct inode            *inode;
        mm_segment_t            oldfs;
        __be32                  err = 0;
        int                     host_err;
-       int                     stable = *stablep;
        int                     use_wgather;
        loff_t                  pos = offset;
        unsigned int            pflags = current->flags;
@@ -962,13 +975,11 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
                 */
                current->flags |= PF_LESS_THROTTLE;
 
-       inode = file_inode(file);
-       exp   = fhp->fh_export;
-
+       exp = fhp->fh_export;
        use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
 
        if (!EX_ISSYNC(exp))
-               stable = 0;
+               stable = NFS_UNSTABLE;
 
        if (stable && !use_wgather)
                flags |= RWF_SYNC;
@@ -1035,35 +1046,22 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
  * N.B. After this call fhp needs an fh_put
  */
 __be32
-nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
-               loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt,
-               int *stablep)
+nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
+          struct kvec *vec, int vlen, unsigned long *cnt, int stable)
 {
-       __be32                  err = 0;
+       struct file *file = NULL;
+       __be32 err = 0;
 
        trace_write_start(rqstp, fhp, offset, vlen);
 
-       if (file) {
-               err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
-                               NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE);
-               if (err)
-                       goto out;
-               trace_write_opened(rqstp, fhp, offset, vlen);
-               err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt,
-                               stablep);
-               trace_write_io_done(rqstp, fhp, offset, vlen);
-       } else {
-               err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
-               if (err)
-                       goto out;
+       err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
+       if (err)
+               goto out;
 
-               trace_write_opened(rqstp, fhp, offset, vlen);
-               if (cnt)
-                       err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen,
-                                            cnt, stablep);
-               trace_write_io_done(rqstp, fhp, offset, vlen);
-               fput(file);
-       }
+       trace_write_opened(rqstp, fhp, offset, vlen);
+       err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stable);
+       trace_write_io_done(rqstp, fhp, offset, vlen);
+       fput(file);
 out:
        trace_write_done(rqstp, fhp, offset, vlen);
        return err;
index 0bf9e7bf5800af3855e3d93aaec194dcbea93ba6..db98c48c735aaae5a914a6e2073391ceab436a2f 100644 (file)
@@ -83,12 +83,12 @@ __be32              nfsd_readv(struct file *, loff_t, struct kvec *, int,
                                unsigned long *);
 __be32                 nfsd_read(struct svc_rqst *, struct svc_fh *,
                                loff_t, struct kvec *, int, unsigned long *);
-__be32                 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
-                               loff_t, struct kvec *,int, unsigned long *, int *);
+__be32                 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
+                               struct kvec *, int, unsigned long *, int);
 __be32         nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
                                struct file *file, loff_t offset,
                                struct kvec *vec, int vlen, unsigned long *cnt,
-                               int *stablep);
+                               int stable);
 __be32         nfsd_readlink(struct svc_rqst *, struct svc_fh *,
                                char *, int *);
 __be32         nfsd_symlink(struct svc_rqst *, struct svc_fh *,
index 8a511c0985aafe0a18722c9dd701cf8326dcad59..20d157a518a7dcb14763f5bfd7e317c0a6537387 100644 (file)
@@ -204,8 +204,11 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd)
        kref_put(&h->ref, cd->cache_put);
 }
 
-static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h)
+static inline bool cache_is_expired(struct cache_detail *detail, struct cache_head *h)
 {
+       if (!test_bit(CACHE_VALID, &h->flags))
+               return false;
+
        return  (h->expiry_time < seconds_since_boot()) ||
                (detail->flush_time >= h->last_refresh);
 }
@@ -227,6 +230,7 @@ extern void sunrpc_destroy_cache_detail(struct cache_detail *cd);
 extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *,
                                        umode_t, struct cache_detail *);
 extern void sunrpc_cache_unregister_pipefs(struct cache_detail *);
+extern void sunrpc_cache_unhash(struct cache_detail *, struct cache_head *);
 
 /* Must store cache_detail in seq_file->private if using next three functions */
 extern void *cache_seq_start(struct seq_file *file, loff_t *pos);
index cfda6adcf33cfcf3c28e46066ec294c6d2902389..245fc59b73247d744682c128bfcae1270e146c26 100644 (file)
@@ -109,6 +109,15 @@ struct rpcrdma_msg {
        } rm_body;
 };
 
+/*
+ * XDR sizes, in quads
+ */
+enum {
+       rpcrdma_fixed_maxsz     = 4,
+       rpcrdma_segment_maxsz   = 4,
+       rpcrdma_readchunk_maxsz = 2 + rpcrdma_segment_maxsz,
+};
+
 /*
  * Smallest RPC/RDMA header: rm_xid through rm_type, then rm_nochunks
  */
index 7321ae933867566013a250623564d722d2800305..e770abeed32d7117c4f2d363f9d7370a60d2c55f 100644 (file)
@@ -400,10 +400,14 @@ struct svc_version {
        struct svc_procedure *  vs_proc;        /* per-procedure info */
        u32                     vs_xdrsize;     /* xdrsize needed for this version */
 
-       unsigned int            vs_hidden : 1,  /* Don't register with portmapper.
-                                                * Only used for nfsacl so far. */
-                               vs_rpcb_optnl:1;/* Don't care the result of register.
-                                                * Only used for nfsv4. */
+       /* Don't register with rpcbind */
+       bool                    vs_hidden;
+
+       /* Don't care if the rpcbind registration fails */
+       bool                    vs_rpcb_optnl;
+
+       /* Need xprt with congestion control */
+       bool                    vs_need_cong_ctrl;
 
        /* Override dispatch function (e.g. when caching replies).
         * A return value of 0 means drop the request. 
index 757fb963696c76b3ab24f754ff89424fb428fc79..b105f73e3ca26355b2ee8b32651b48526a945899 100644 (file)
@@ -70,7 +70,7 @@ extern atomic_t rdma_stat_sq_prod;
  * completes.
  */
 struct svc_rdma_op_ctxt {
-       struct list_head free;
+       struct list_head list;
        struct svc_rdma_op_ctxt *read_hdr;
        struct svc_rdma_fastreg_mr *frmr;
        int hdr_count;
@@ -78,7 +78,6 @@ struct svc_rdma_op_ctxt {
        struct ib_cqe cqe;
        struct ib_cqe reg_cqe;
        struct ib_cqe inv_cqe;
-       struct list_head dto_q;
        u32 byte_len;
        u32 position;
        struct svcxprt_rdma *xprt;
@@ -141,7 +140,8 @@ struct svcxprt_rdma {
        atomic_t             sc_sq_avail;       /* SQEs ready to be consumed */
        unsigned int         sc_sq_depth;       /* Depth of SQ */
        unsigned int         sc_rq_depth;       /* Depth of RQ */
-       u32                  sc_max_requests;   /* Forward credits */
+       __be32               sc_fc_credits;     /* Forward credits */
+       u32                  sc_max_requests;   /* Max requests */
        u32                  sc_max_bc_requests;/* Backward credits */
        int                  sc_max_req_size;   /* Size of each RQ WR buf */
 
@@ -171,7 +171,6 @@ struct svcxprt_rdma {
 
        wait_queue_head_t    sc_send_wait;      /* SQ exhaustion waitlist */
        unsigned long        sc_flags;
-       struct list_head     sc_dto_q;          /* DTO tasklet I/O pending Q */
        struct list_head     sc_read_complete_q;
        struct work_struct   sc_work;
 };
@@ -214,11 +213,7 @@ extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int);
 extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int);
 extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int,
                                            __be32, __be64, u32);
-extern void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *,
-                                            struct rpcrdma_msg *,
-                                            struct rpcrdma_msg *,
-                                            enum rpcrdma_proc);
-extern int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *);
+extern unsigned int svc_rdma_xdr_get_reply_hdr_len(__be32 *rdma_resp);
 
 /* svc_rdma_recvfrom.c */
 extern int svc_rdma_recvfrom(struct svc_rqst *);
index 7440290f64acd3694dfc5c17618c55f6253aae01..ddb7f94a9d06ecc48828b7b00230662b85768d64 100644 (file)
@@ -67,6 +67,7 @@ struct svc_xprt {
 #define XPT_CACHE_AUTH 11              /* cache auth info */
 #define XPT_LOCAL      12              /* connection from loopback interface */
 #define XPT_KILL_TEMP   13             /* call xpo_kill_temp_xprt before closing */
+#define XPT_CONG_CTRL  14              /* has congestion control */
 
        struct svc_serv         *xpt_server;    /* service for transport */
        atomic_t                xpt_reserved;   /* space on outq that is rsvd */
index 0df7bd5d2fb17cf4b9df3b300451cce13075d194..c3be256107c6421432e8a63b041306e224874c2e 100644 (file)
@@ -32,7 +32,8 @@
 #define NFSEXP_ASYNC           0x0010
 #define NFSEXP_GATHERED_WRITES 0x0020
 #define NFSEXP_NOREADDIRPLUS    0x0040
-/* 80 100 currently unused */
+#define NFSEXP_SECURITY_LABEL  0x0080
+/* 0x100 currently unused */
 #define NFSEXP_NOHIDE          0x0200
 #define NFSEXP_NOSUBTREECHECK  0x0400
 #define        NFSEXP_NOAUTHNLM        0x0800          /* Don't authenticate NLM requests - just trust */
@@ -53,7 +54,7 @@
 #define NFSEXP_PNFS            0x20000
 
 /* All flags that we claim to support.  (Note we don't support NOACL.) */
-#define NFSEXP_ALLFLAGS                0x3FE7F
+#define NFSEXP_ALLFLAGS                0x3FEFF
 
 /* The flags that may vary depending on security flavor: */
 #define NFSEXP_SECINFO_FLAGS   (NFSEXP_READONLY | NFSEXP_ROOTSQUASH \
index 1530825985221a1aeb5f77ee81f4251acdef9d96..a54a7a3d28f5300e7940769b1b3bc0b5daa7cfbb 100644 (file)
@@ -1489,8 +1489,8 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
        case RPC_GSS_PROC_DESTROY:
                if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq))
                        goto auth_err;
-               rsci->h.expiry_time = seconds_since_boot();
-               set_bit(CACHE_NEGATIVE, &rsci->h.flags);
+               /* Delete the entry from the cache_list and call cache_put */
+               sunrpc_cache_unhash(sn->rsc_cache, &rsci->h);
                if (resv->iov_len + 4 > PAGE_SIZE)
                        goto drop;
                svc_putnl(resv, RPC_SUCCESS);
index f39e3e11f9aa283698ced6a8ca92fed5f68140e5..d8639da06d9cd4815a407ef4dec4340bee68caf4 100644 (file)
@@ -362,11 +362,6 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd)
        cache_purge(cd);
        spin_lock(&cache_list_lock);
        write_lock(&cd->hash_lock);
-       if (cd->entries) {
-               write_unlock(&cd->hash_lock);
-               spin_unlock(&cache_list_lock);
-               goto out;
-       }
        if (current_detail == cd)
                current_detail = NULL;
        list_del_init(&cd->others);
@@ -376,9 +371,6 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd)
                /* module must be being unloaded so its safe to kill the worker */
                cancel_delayed_work_sync(&cache_cleaner);
        }
-       return;
-out:
-       printk(KERN_ERR "RPC: failed to unregister %s cache\n", cd->name);
 }
 EXPORT_SYMBOL_GPL(sunrpc_destroy_cache_detail);
 
@@ -497,13 +489,32 @@ EXPORT_SYMBOL_GPL(cache_flush);
 
 void cache_purge(struct cache_detail *detail)
 {
-       time_t now = seconds_since_boot();
-       if (detail->flush_time >= now)
-               now = detail->flush_time + 1;
-       /* 'now' is the maximum value any 'last_refresh' can have */
-       detail->flush_time = now;
-       detail->nextcheck = seconds_since_boot();
-       cache_flush();
+       struct cache_head *ch = NULL;
+       struct hlist_head *head = NULL;
+       struct hlist_node *tmp = NULL;
+       int i = 0;
+
+       write_lock(&detail->hash_lock);
+       if (!detail->entries) {
+               write_unlock(&detail->hash_lock);
+               return;
+       }
+
+       dprintk("RPC: %d entries in %s cache\n", detail->entries, detail->name);
+       for (i = 0; i < detail->hash_size; i++) {
+               head = &detail->hash_table[i];
+               hlist_for_each_entry_safe(ch, tmp, head, cache_list) {
+                       hlist_del_init(&ch->cache_list);
+                       detail->entries--;
+
+                       set_bit(CACHE_CLEANED, &ch->flags);
+                       write_unlock(&detail->hash_lock);
+                       cache_fresh_unlocked(ch, detail);
+                       cache_put(ch, detail);
+                       write_lock(&detail->hash_lock);
+               }
+       }
+       write_unlock(&detail->hash_lock);
 }
 EXPORT_SYMBOL_GPL(cache_purge);
 
@@ -1855,3 +1866,15 @@ void sunrpc_cache_unregister_pipefs(struct cache_detail *cd)
 }
 EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs);
 
+void sunrpc_cache_unhash(struct cache_detail *cd, struct cache_head *h)
+{
+       write_lock(&cd->hash_lock);
+       if (!hlist_unhashed(&h->cache_list)){
+               hlist_del_init(&h->cache_list);
+               cd->entries--;
+               write_unlock(&cd->hash_lock);
+               cache_put(h, cd);
+       } else
+               write_unlock(&cd->hash_lock);
+}
+EXPORT_SYMBOL_GPL(sunrpc_cache_unhash);
index 2e22889a8837bd6d0dc3be1d1d6f8528829c0080..b94efd93d3e498a94bec4fee5eec8b9748052bdb 100644 (file)
@@ -385,7 +385,7 @@ static int svc_uses_rpcbind(struct svc_serv *serv)
                for (i = 0; i < progp->pg_nvers; i++) {
                        if (progp->pg_vers[i] == NULL)
                                continue;
-                       if (progp->pg_vers[i]->vs_hidden == 0)
+                       if (!progp->pg_vers[i]->vs_hidden)
                                return 1;
                }
        }
@@ -976,6 +976,13 @@ int svc_register(const struct svc_serv *serv, struct net *net,
                        if (vers->vs_hidden)
                                continue;
 
+                       /*
+                        * Don't register a UDP port if we need congestion
+                        * control.
+                        */
+                       if (vers->vs_need_cong_ctrl && proto == IPPROTO_UDP)
+                               continue;
+
                        error = __svc_register(net, progp->pg_name, progp->pg_prog,
                                                i, family, proto, port);
 
@@ -1169,6 +1176,21 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
          !(versp = progp->pg_vers[vers]))
                goto err_bad_vers;
 
+       /*
+        * Some protocol versions (namely NFSv4) require some form of
+        * congestion control.  (See RFC 7530 section 3.1 paragraph 2)
+        * In other words, UDP is not allowed. We mark those when setting
+        * up the svc_xprt, and verify that here.
+        *
+        * The spec is not very clear about what error should be returned
+        * when someone tries to access a server that is listening on UDP
+        * for lower versions. RPC_PROG_MISMATCH seems to be the closest
+        * fit.
+        */
+       if (versp->vs_need_cong_ctrl &&
+           !test_bit(XPT_CONG_CTRL, &rqstp->rq_xprt->xpt_flags))
+               goto err_bad_vers;
+
        procp = versp->vs_proc + proc;
        if (proc >= versp->vs_nproc || !procp->pc_func)
                goto err_bad_proc;
index d227d97f7ad4d3b3102b6c329dcfe925502fc471..8931e33b65412d7b8bbe8b3872e5f7d7b27d92d5 100644 (file)
@@ -1306,6 +1306,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
        svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_tcp_class,
                      &svsk->sk_xprt, serv);
        set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
+       set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags);
        if (sk->sk_state == TCP_LISTEN) {
                dprintk("setting up TCP socket for listening\n");
                set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags);
index cb1e48e54eb1440181976a352229783f202f896d..ff1df40f0d261bc956f1af3410d8780f4c582b83 100644 (file)
@@ -201,19 +201,20 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
 {
        struct rpc_xprt *xprt = rqst->rq_xprt;
        struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
-       struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)rqst->rq_buffer;
+       __be32 *p;
        int rc;
 
        /* Space in the send buffer for an RPC/RDMA header is reserved
         * via xprt->tsh_size.
         */
-       headerp->rm_xid = rqst->rq_xid;
-       headerp->rm_vers = rpcrdma_version;
-       headerp->rm_credit = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests);
-       headerp->rm_type = rdma_msg;
-       headerp->rm_body.rm_chunks[0] = xdr_zero;
-       headerp->rm_body.rm_chunks[1] = xdr_zero;
-       headerp->rm_body.rm_chunks[2] = xdr_zero;
+       p = rqst->rq_buffer;
+       *p++ = rqst->rq_xid;
+       *p++ = rpcrdma_version;
+       *p++ = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests);
+       *p++ = rdma_msg;
+       *p++ = xdr_zero;
+       *p++ = xdr_zero;
+       *p   = xdr_zero;
 
 #ifdef SVCRDMA_BACKCHANNEL_DEBUG
        pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer);
index 0ba9887f3e22bab9a1e3e809df5c4e2c23a510fe..1c4aabf0f65772c13265421262feb030ab4a58ca 100644 (file)
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2016 Oracle. All rights reserved.
  * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
 
 #define RPCDBG_FACILITY        RPCDBG_SVCXPRT
 
-/*
- * Decodes a read chunk list. The expected format is as follows:
- *    descrim  : xdr_one
- *    position : __be32 offset into XDR stream
- *    handle   : __be32 RKEY
- *    . . .
- *  end-of-list: xdr_zero
- */
-static __be32 *decode_read_list(__be32 *va, __be32 *vaend)
+static __be32 *xdr_check_read_list(__be32 *p, __be32 *end)
 {
-       struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va;
+       __be32 *next;
 
-       while (ch->rc_discrim != xdr_zero) {
-               if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) >
-                   (unsigned long)vaend) {
-                       dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch);
+       while (*p++ != xdr_zero) {
+               next = p + rpcrdma_readchunk_maxsz - 1;
+               if (next > end)
                        return NULL;
-               }
-               ch++;
+               p = next;
        }
-       return &ch->rc_position;
+       return p;
 }
 
-/*
- * Decodes a write chunk list. The expected format is as follows:
- *    descrim  : xdr_one
- *    nchunks  : <count>
- *       handle   : __be32 RKEY           ---+
- *       length   : __be32 <len of segment>  |
- *       offset   : remove va                + <count>
- *       . . .                               |
- *                                        ---+
- */
-static __be32 *decode_write_list(__be32 *va, __be32 *vaend)
+static __be32 *xdr_check_write_list(__be32 *p, __be32 *end)
 {
-       unsigned long start, end;
-       int nchunks;
-
-       struct rpcrdma_write_array *ary =
-               (struct rpcrdma_write_array *)va;
+       __be32 *next;
 
-       /* Check for not write-array */
-       if (ary->wc_discrim == xdr_zero)
-               return &ary->wc_nchunks;
-
-       if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
-           (unsigned long)vaend) {
-               dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
-               return NULL;
-       }
-       nchunks = be32_to_cpu(ary->wc_nchunks);
-
-       start = (unsigned long)&ary->wc_array[0];
-       end = (unsigned long)vaend;
-       if (nchunks < 0 ||
-           nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
-           (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
-               dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
-                       ary, nchunks, vaend);
-               return NULL;
+       while (*p++ != xdr_zero) {
+               next = p + 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz;
+               if (next > end)
+                       return NULL;
+               p = next;
        }
-       /*
-        * rs_length is the 2nd 4B field in wc_target and taking its
-        * address skips the list terminator
-        */
-       return &ary->wc_array[nchunks].wc_target.rs_length;
+       return p;
 }
 
-static __be32 *decode_reply_array(__be32 *va, __be32 *vaend)
+static __be32 *xdr_check_reply_chunk(__be32 *p, __be32 *end)
 {
-       unsigned long start, end;
-       int nchunks;
-       struct rpcrdma_write_array *ary =
-               (struct rpcrdma_write_array *)va;
-
-       /* Check for no reply-array */
-       if (ary->wc_discrim == xdr_zero)
-               return &ary->wc_nchunks;
-
-       if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
-           (unsigned long)vaend) {
-               dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
-               return NULL;
-       }
-       nchunks = be32_to_cpu(ary->wc_nchunks);
-
-       start = (unsigned long)&ary->wc_array[0];
-       end = (unsigned long)vaend;
-       if (nchunks < 0 ||
-           nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
-           (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
-               dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
-                       ary, nchunks, vaend);
-               return NULL;
+       __be32 *next;
+
+       if (*p++ != xdr_zero) {
+               next = p + 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz;
+               if (next > end)
+                       return NULL;
+               p = next;
        }
-       return (__be32 *)&ary->wc_array[nchunks];
+       return p;
 }
 
 /**
@@ -158,87 +100,71 @@ static __be32 *decode_reply_array(__be32 *va, __be32 *vaend)
  */
 int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg)
 {
-       struct rpcrdma_msg *rmsgp;
-       __be32 *va, *vaend;
-       unsigned int len;
-       u32 hdr_len;
+       __be32 *p, *end, *rdma_argp;
+       unsigned int hdr_len;
 
        /* Verify that there's enough bytes for header + something */
-       if (rq_arg->len <= RPCRDMA_HDRLEN_ERR) {
-               dprintk("svcrdma: header too short = %d\n",
-                       rq_arg->len);
-               return -EINVAL;
-       }
+       if (rq_arg->len <= RPCRDMA_HDRLEN_ERR)
+               goto out_short;
 
-       rmsgp = (struct rpcrdma_msg *)rq_arg->head[0].iov_base;
-       if (rmsgp->rm_vers != rpcrdma_version) {
-               dprintk("%s: bad version %u\n", __func__,
-                       be32_to_cpu(rmsgp->rm_vers));
-               return -EPROTONOSUPPORT;
-       }
+       rdma_argp = rq_arg->head[0].iov_base;
+       if (*(rdma_argp + 1) != rpcrdma_version)
+               goto out_version;
 
-       switch (be32_to_cpu(rmsgp->rm_type)) {
-       case RDMA_MSG:
-       case RDMA_NOMSG:
+       switch (*(rdma_argp + 3)) {
+       case rdma_msg:
+       case rdma_nomsg:
                break;
 
-       case RDMA_DONE:
-               /* Just drop it */
-               dprintk("svcrdma: dropping RDMA_DONE message\n");
-               return 0;
-
-       case RDMA_ERROR:
-               /* Possible if this is a backchannel reply.
-                * XXX: We should cancel this XID, though.
-                */
-               dprintk("svcrdma: dropping RDMA_ERROR message\n");
-               return 0;
-
-       case RDMA_MSGP:
-               /* Pull in the extra for the padded case, bump our pointer */
-               rmsgp->rm_body.rm_padded.rm_align =
-                       be32_to_cpu(rmsgp->rm_body.rm_padded.rm_align);
-               rmsgp->rm_body.rm_padded.rm_thresh =
-                       be32_to_cpu(rmsgp->rm_body.rm_padded.rm_thresh);
-
-               va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
-               rq_arg->head[0].iov_base = va;
-               len = (u32)((unsigned long)va - (unsigned long)rmsgp);
-               rq_arg->head[0].iov_len -= len;
-               if (len > rq_arg->len)
-                       return -EINVAL;
-               return len;
-       default:
-               dprintk("svcrdma: bad rdma procedure (%u)\n",
-                       be32_to_cpu(rmsgp->rm_type));
-               return -EINVAL;
-       }
+       case rdma_done:
+               goto out_drop;
 
-       /* The chunk list may contain either a read chunk list or a write
-        * chunk list and a reply chunk list.
-        */
-       va = &rmsgp->rm_body.rm_chunks[0];
-       vaend = (__be32 *)((unsigned long)rmsgp + rq_arg->len);
-       va = decode_read_list(va, vaend);
-       if (!va) {
-               dprintk("svcrdma: failed to decode read list\n");
-               return -EINVAL;
-       }
-       va = decode_write_list(va, vaend);
-       if (!va) {
-               dprintk("svcrdma: failed to decode write list\n");
-               return -EINVAL;
-       }
-       va = decode_reply_array(va, vaend);
-       if (!va) {
-               dprintk("svcrdma: failed to decode reply chunk\n");
-               return -EINVAL;
+       case rdma_error:
+               goto out_drop;
+
+       default:
+               goto out_proc;
        }
 
-       rq_arg->head[0].iov_base = va;
-       hdr_len = (unsigned long)va - (unsigned long)rmsgp;
+       end = (__be32 *)((unsigned long)rdma_argp + rq_arg->len);
+       p = xdr_check_read_list(rdma_argp + 4, end);
+       if (!p)
+               goto out_inval;
+       p = xdr_check_write_list(p, end);
+       if (!p)
+               goto out_inval;
+       p = xdr_check_reply_chunk(p, end);
+       if (!p)
+               goto out_inval;
+       if (p > end)
+               goto out_inval;
+
+       rq_arg->head[0].iov_base = p;
+       hdr_len = (unsigned long)p - (unsigned long)rdma_argp;
        rq_arg->head[0].iov_len -= hdr_len;
        return hdr_len;
+
+out_short:
+       dprintk("svcrdma: header too short = %d\n", rq_arg->len);
+       return -EINVAL;
+
+out_version:
+       dprintk("svcrdma: bad xprt version: %u\n",
+               be32_to_cpup(rdma_argp + 1));
+       return -EPROTONOSUPPORT;
+
+out_drop:
+       dprintk("svcrdma: dropping RDMA_DONE/ERROR message\n");
+       return 0;
+
+out_proc:
+       dprintk("svcrdma: bad rdma procedure (%u)\n",
+               be32_to_cpup(rdma_argp + 3));
+       return -EINVAL;
+
+out_inval:
+       dprintk("svcrdma: failed to parse transport header\n");
+       return -EINVAL;
 }
 
 int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
@@ -249,7 +175,7 @@ int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
 
        *va++ = rmsgp->rm_xid;
        *va++ = rmsgp->rm_vers;
-       *va++ = cpu_to_be32(xprt->sc_max_requests);
+       *va++ = xprt->sc_fc_credits;
        *va++ = rdma_error;
        *va++ = cpu_to_be32(err);
        if (err == ERR_VERS) {
@@ -260,32 +186,35 @@ int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
        return (int)((unsigned long)va - (unsigned long)startp);
 }
 
-int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp)
+/**
+ * svc_rdma_xdr_get_reply_hdr_length - Get length of Reply transport header
+ * @rdma_resp: buffer containing Reply transport header
+ *
+ * Returns length of transport header, in bytes.
+ */
+unsigned int svc_rdma_xdr_get_reply_hdr_len(__be32 *rdma_resp)
 {
-       struct rpcrdma_write_array *wr_ary;
+       unsigned int nsegs;
+       __be32 *p;
 
-       /* There is no read-list in a reply */
+       p = rdma_resp;
 
-       /* skip write list */
-       wr_ary = (struct rpcrdma_write_array *)
-               &rmsgp->rm_body.rm_chunks[1];
-       if (wr_ary->wc_discrim)
-               wr_ary = (struct rpcrdma_write_array *)
-                       &wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)].
-                       wc_target.rs_length;
-       else
-               wr_ary = (struct rpcrdma_write_array *)
-                       &wr_ary->wc_nchunks;
-
-       /* skip reply array */
-       if (wr_ary->wc_discrim)
-               wr_ary = (struct rpcrdma_write_array *)
-                       &wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)];
-       else
-               wr_ary = (struct rpcrdma_write_array *)
-                       &wr_ary->wc_nchunks;
-
-       return (unsigned long) wr_ary - (unsigned long) rmsgp;
+       /* RPC-over-RDMA V1 replies never have a Read list. */
+       p += rpcrdma_fixed_maxsz + 1;
+
+       /* Skip Write list. */
+       while (*p++ != xdr_zero) {
+               nsegs = be32_to_cpup(p++);
+               p += nsegs * rpcrdma_segment_maxsz;
+       }
+
+       /* Skip Reply chunk. */
+       if (*p++ != xdr_zero) {
+               nsegs = be32_to_cpup(p++);
+               p += nsegs * rpcrdma_segment_maxsz;
+       }
+
+       return (unsigned long)p - (unsigned long)rdma_resp;
 }
 
 void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks)
@@ -326,19 +255,3 @@ void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
        seg->rs_offset = rs_offset;
        seg->rs_length = cpu_to_be32(write_len);
 }
-
-void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
-                                 struct rpcrdma_msg *rdma_argp,
-                                 struct rpcrdma_msg *rdma_resp,
-                                 enum rpcrdma_proc rdma_type)
-{
-       rdma_resp->rm_xid = rdma_argp->rm_xid;
-       rdma_resp->rm_vers = rdma_argp->rm_vers;
-       rdma_resp->rm_credit = cpu_to_be32(xprt->sc_max_requests);
-       rdma_resp->rm_type = cpu_to_be32(rdma_type);
-
-       /* Encode <nul> chunks lists */
-       rdma_resp->rm_body.rm_chunks[0] = xdr_zero;
-       rdma_resp->rm_body.rm_chunks[1] = xdr_zero;
-       rdma_resp->rm_body.rm_chunks[2] = xdr_zero;
-}
index 172b537f8cfc942ef62574b74cff7ac5f421fba9..f7b2daf72a86582807798379ac3be336b061a958 100644 (file)
@@ -606,26 +606,24 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 
        dprintk("svcrdma: rqstp=%p\n", rqstp);
 
-       spin_lock_bh(&rdma_xprt->sc_rq_dto_lock);
+       spin_lock(&rdma_xprt->sc_rq_dto_lock);
        if (!list_empty(&rdma_xprt->sc_read_complete_q)) {
-               ctxt = list_entry(rdma_xprt->sc_read_complete_q.next,
-                                 struct svc_rdma_op_ctxt,
-                                 dto_q);
-               list_del_init(&ctxt->dto_q);
-               spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
+               ctxt = list_first_entry(&rdma_xprt->sc_read_complete_q,
+                                       struct svc_rdma_op_ctxt, list);
+               list_del(&ctxt->list);
+               spin_unlock(&rdma_xprt->sc_rq_dto_lock);
                rdma_read_complete(rqstp, ctxt);
                goto complete;
        } else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
-               ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next,
-                                 struct svc_rdma_op_ctxt,
-                                 dto_q);
-               list_del_init(&ctxt->dto_q);
+               ctxt = list_first_entry(&rdma_xprt->sc_rq_dto_q,
+                                       struct svc_rdma_op_ctxt, list);
+               list_del(&ctxt->list);
        } else {
                atomic_inc(&rdma_stat_rq_starve);
                clear_bit(XPT_DATA, &xprt->xpt_flags);
                ctxt = NULL;
        }
-       spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
+       spin_unlock(&rdma_xprt->sc_rq_dto_lock);
        if (!ctxt) {
                /* This is the EAGAIN path. The svc_recv routine will
                 * return -EAGAIN, the nfsd thread will go to call into
index ad4d286a83c5195fe663dd581cd49a5c9f9a6166..515221b16d0956ea027e91985c89606c403d5109 100644 (file)
@@ -476,7 +476,8 @@ static int send_reply(struct svcxprt_rdma *rdma,
 
        /* Prepare the SGE for the RPCRDMA Header */
        ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
-       ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
+       ctxt->sge[0].length =
+           svc_rdma_xdr_get_reply_hdr_len((__be32 *)rdma_resp);
        ctxt->sge[0].addr =
            ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
                            ctxt->sge[0].length, DMA_TO_DEVICE);
@@ -559,12 +560,12 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
        struct rpcrdma_msg *rdma_argp;
        struct rpcrdma_msg *rdma_resp;
        struct rpcrdma_write_array *wr_ary, *rp_ary;
-       enum rpcrdma_proc reply_type;
        int ret;
        int inline_bytes;
        struct page *res_page;
        struct svc_rdma_req_map *vec;
        u32 inv_rkey;
+       __be32 *p;
 
        dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
 
@@ -596,12 +597,17 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
        if (!res_page)
                goto err0;
        rdma_resp = page_address(res_page);
-       if (rp_ary)
-               reply_type = RDMA_NOMSG;
-       else
-               reply_type = RDMA_MSG;
-       svc_rdma_xdr_encode_reply_header(rdma, rdma_argp,
-                                        rdma_resp, reply_type);
+
+       p = &rdma_resp->rm_xid;
+       *p++ = rdma_argp->rm_xid;
+       *p++ = rdma_argp->rm_vers;
+       *p++ = rdma->sc_fc_credits;
+       *p++ = rp_ary ? rdma_nomsg : rdma_msg;
+
+       /* Start with empty chunks */
+       *p++ = xdr_zero;
+       *p++ = xdr_zero;
+       *p   = xdr_zero;
 
        /* Send any write-chunk data and build resp write-list */
        if (wr_ary) {
index 39652d390a9c60bc026199a7dcb5ef996bcd65ab..c13a5c35ce14d992515fa99e456976ed0cd1c382 100644 (file)
@@ -157,8 +157,7 @@ static struct svc_rdma_op_ctxt *alloc_ctxt(struct svcxprt_rdma *xprt,
        ctxt = kmalloc(sizeof(*ctxt), flags);
        if (ctxt) {
                ctxt->xprt = xprt;
-               INIT_LIST_HEAD(&ctxt->free);
-               INIT_LIST_HEAD(&ctxt->dto_q);
+               INIT_LIST_HEAD(&ctxt->list);
        }
        return ctxt;
 }
@@ -180,7 +179,7 @@ static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt)
                        dprintk("svcrdma: No memory for RDMA ctxt\n");
                        return false;
                }
-               list_add(&ctxt->free, &xprt->sc_ctxts);
+               list_add(&ctxt->list, &xprt->sc_ctxts);
        }
        return true;
 }
@@ -189,15 +188,15 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
 {
        struct svc_rdma_op_ctxt *ctxt = NULL;
 
-       spin_lock_bh(&xprt->sc_ctxt_lock);
+       spin_lock(&xprt->sc_ctxt_lock);
        xprt->sc_ctxt_used++;
        if (list_empty(&xprt->sc_ctxts))
                goto out_empty;
 
        ctxt = list_first_entry(&xprt->sc_ctxts,
-                               struct svc_rdma_op_ctxt, free);
-       list_del_init(&ctxt->free);
-       spin_unlock_bh(&xprt->sc_ctxt_lock);
+                               struct svc_rdma_op_ctxt, list);
+       list_del(&ctxt->list);
+       spin_unlock(&xprt->sc_ctxt_lock);
 
 out:
        ctxt->count = 0;
@@ -209,15 +208,15 @@ out_empty:
        /* Either pre-allocation missed the mark, or send
         * queue accounting is broken.
         */
-       spin_unlock_bh(&xprt->sc_ctxt_lock);
+       spin_unlock(&xprt->sc_ctxt_lock);
 
        ctxt = alloc_ctxt(xprt, GFP_NOIO);
        if (ctxt)
                goto out;
 
-       spin_lock_bh(&xprt->sc_ctxt_lock);
+       spin_lock(&xprt->sc_ctxt_lock);
        xprt->sc_ctxt_used--;
-       spin_unlock_bh(&xprt->sc_ctxt_lock);
+       spin_unlock(&xprt->sc_ctxt_lock);
        WARN_ONCE(1, "svcrdma: empty RDMA ctxt list?\n");
        return NULL;
 }
@@ -254,10 +253,10 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
                for (i = 0; i < ctxt->count; i++)
                        put_page(ctxt->pages[i]);
 
-       spin_lock_bh(&xprt->sc_ctxt_lock);
+       spin_lock(&xprt->sc_ctxt_lock);
        xprt->sc_ctxt_used--;
-       list_add(&ctxt->free, &xprt->sc_ctxts);
-       spin_unlock_bh(&xprt->sc_ctxt_lock);
+       list_add(&ctxt->list, &xprt->sc_ctxts);
+       spin_unlock(&xprt->sc_ctxt_lock);
 }
 
 static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt)
@@ -266,8 +265,8 @@ static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt)
                struct svc_rdma_op_ctxt *ctxt;
 
                ctxt = list_first_entry(&xprt->sc_ctxts,
-                                       struct svc_rdma_op_ctxt, free);
-               list_del(&ctxt->free);
+                                       struct svc_rdma_op_ctxt, list);
+               list_del(&ctxt->list);
                kfree(ctxt);
        }
 }
@@ -404,7 +403,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
        /* All wc fields are now known to be valid */
        ctxt->byte_len = wc->byte_len;
        spin_lock(&xprt->sc_rq_dto_lock);
-       list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q);
+       list_add_tail(&ctxt->list, &xprt->sc_rq_dto_q);
        spin_unlock(&xprt->sc_rq_dto_lock);
 
        set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
@@ -525,7 +524,7 @@ void svc_rdma_wc_read(struct ib_cq *cq, struct ib_wc *wc)
 
                read_hdr = ctxt->read_hdr;
                spin_lock(&xprt->sc_rq_dto_lock);
-               list_add_tail(&read_hdr->dto_q,
+               list_add_tail(&read_hdr->list,
                              &xprt->sc_read_complete_q);
                spin_unlock(&xprt->sc_rq_dto_lock);
 
@@ -557,7 +556,6 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
                return NULL;
        svc_xprt_init(&init_net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
        INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
-       INIT_LIST_HEAD(&cma_xprt->sc_dto_q);
        INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
        INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
        INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
@@ -571,6 +569,14 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
        spin_lock_init(&cma_xprt->sc_ctxt_lock);
        spin_lock_init(&cma_xprt->sc_map_lock);
 
+       /*
+        * Note that this implies that the underlying transport support
+        * has some form of congestion control (see RFC 7530 section 3.1
+        * paragraph 2). For now, we assume that all supported RDMA
+        * transports are suitable here.
+        */
+       set_bit(XPT_CONG_CTRL, &cma_xprt->sc_xprt.xpt_flags);
+
        if (listener)
                set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
 
@@ -923,14 +929,14 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
 {
        struct svc_rdma_fastreg_mr *frmr = NULL;
 
-       spin_lock_bh(&rdma->sc_frmr_q_lock);
+       spin_lock(&rdma->sc_frmr_q_lock);
        if (!list_empty(&rdma->sc_frmr_q)) {
                frmr = list_entry(rdma->sc_frmr_q.next,
                                  struct svc_rdma_fastreg_mr, frmr_list);
                list_del_init(&frmr->frmr_list);
                frmr->sg_nents = 0;
        }
-       spin_unlock_bh(&rdma->sc_frmr_q_lock);
+       spin_unlock(&rdma->sc_frmr_q_lock);
        if (frmr)
                return frmr;
 
@@ -943,10 +949,10 @@ void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
        if (frmr) {
                ib_dma_unmap_sg(rdma->sc_cm_id->device,
                                frmr->sg, frmr->sg_nents, frmr->direction);
-               spin_lock_bh(&rdma->sc_frmr_q_lock);
+               spin_lock(&rdma->sc_frmr_q_lock);
                WARN_ON_ONCE(!list_empty(&frmr->frmr_list));
                list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
-               spin_unlock_bh(&rdma->sc_frmr_q_lock);
+               spin_unlock(&rdma->sc_frmr_q_lock);
        }
 }
 
@@ -1002,6 +1008,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
        newxprt->sc_max_req_size = svcrdma_max_req_size;
        newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr,
                                         svcrdma_max_requests);
+       newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests);
        newxprt->sc_max_bc_requests = min_t(u32, dev->attrs.max_qp_wr,
                                            svcrdma_max_bc_requests);
        newxprt->sc_rq_depth = newxprt->sc_max_requests +
@@ -1027,13 +1034,13 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
                goto errout;
        }
        newxprt->sc_sq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_sq_depth,
-                                       0, IB_POLL_SOFTIRQ);
+                                       0, IB_POLL_WORKQUEUE);
        if (IS_ERR(newxprt->sc_sq_cq)) {
                dprintk("svcrdma: error creating SQ CQ for connect request\n");
                goto errout;
        }
        newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_rq_depth,
-                                       0, IB_POLL_SOFTIRQ);
+                                       0, IB_POLL_WORKQUEUE);
        if (IS_ERR(newxprt->sc_rq_cq)) {
                dprintk("svcrdma: error creating RQ CQ for connect request\n");
                goto errout;
@@ -1213,20 +1220,18 @@ static void __svc_rdma_free(struct work_struct *work)
         */
        while (!list_empty(&rdma->sc_read_complete_q)) {
                struct svc_rdma_op_ctxt *ctxt;
-               ctxt = list_entry(rdma->sc_read_complete_q.next,
-                                 struct svc_rdma_op_ctxt,
-                                 dto_q);
-               list_del_init(&ctxt->dto_q);
+               ctxt = list_first_entry(&rdma->sc_read_complete_q,
+                                       struct svc_rdma_op_ctxt, list);
+               list_del(&ctxt->list);
                svc_rdma_put_context(ctxt, 1);
        }
 
        /* Destroy queued, but not processed recv completions */
        while (!list_empty(&rdma->sc_rq_dto_q)) {
                struct svc_rdma_op_ctxt *ctxt;
-               ctxt = list_entry(rdma->sc_rq_dto_q.next,
-                                 struct svc_rdma_op_ctxt,
-                                 dto_q);
-               list_del_init(&ctxt->dto_q);
+               ctxt = list_first_entry(&rdma->sc_rq_dto_q,
+                                       struct svc_rdma_op_ctxt, list);
+               list_del(&ctxt->list);
                svc_rdma_put_context(ctxt, 1);
        }