]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - fs/nfs/nfs4proc.c
pNFS: Layoutreturn must free the layout after the layout-private data
[karo-tx-linux.git] / fs / nfs / nfs4proc.c
index 241da19b7da4a54a45b1a2bd6cae4cab8cab4dde..c5a5086696555c8039bb3953d8e50078e345c369 100644 (file)
@@ -94,7 +94,7 @@ static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fa
 static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label);
 static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
                            struct nfs_fattr *fattr, struct iattr *sattr,
-                           struct nfs4_state *state, struct nfs4_label *ilabel,
+                           struct nfs_open_context *ctx, struct nfs4_label *ilabel,
                            struct nfs4_label *olabel);
 #ifdef CONFIG_NFS_V4_1
 static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *,
@@ -226,7 +226,6 @@ static const u32 nfs4_pnfs_open_bitmap[3] = {
 
 static const u32 nfs4_open_noattr_bitmap[3] = {
        FATTR4_WORD0_TYPE
-       | FATTR4_WORD0_CHANGE
        | FATTR4_WORD0_FILEID,
 };
 
@@ -817,6 +816,10 @@ static int nfs41_sequence_process(struct rpc_task *task,
        case -NFS4ERR_SEQ_FALSE_RETRY:
                ++slot->seq_nr;
                goto retry_nowait;
+       case -NFS4ERR_DEADSESSION:
+       case -NFS4ERR_BADSESSION:
+               nfs4_schedule_session_recovery(session, res->sr_status);
+               goto retry_nowait;
        default:
                /* Just update the slot sequence no. */
                slot->seq_done = 1;
@@ -1221,6 +1224,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
        atomic_inc(&sp->so_count);
        p->o_arg.open_flags = flags;
        p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE);
+       p->o_arg.claim = nfs4_map_atomic_open_claim(server, claim);
        p->o_arg.share_access = nfs4_map_atomic_open_share(server,
                        fmode, flags);
        /* don't put an ACCESS op in OPEN compound if O_EXCL, because ACCESS
@@ -1228,8 +1232,16 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
        if (!(flags & O_EXCL)) {
                /* ask server to check for all possible rights as results
                 * are cached */
-               p->o_arg.access = NFS4_ACCESS_READ | NFS4_ACCESS_MODIFY |
-                                 NFS4_ACCESS_EXTEND | NFS4_ACCESS_EXECUTE;
+               switch (p->o_arg.claim) {
+               default:
+                       break;
+               case NFS4_OPEN_CLAIM_NULL:
+               case NFS4_OPEN_CLAIM_FH:
+                       p->o_arg.access = NFS4_ACCESS_READ |
+                               NFS4_ACCESS_MODIFY |
+                               NFS4_ACCESS_EXTEND |
+                               NFS4_ACCESS_EXECUTE;
+               }
        }
        p->o_arg.clientid = server->nfs_client->cl_clientid;
        p->o_arg.id.create_time = ktime_to_ns(sp->so_seqid.create_time);
@@ -1239,7 +1251,6 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
        p->o_arg.bitmask = nfs4_bitmask(server, label);
        p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0];
        p->o_arg.label = nfs4_label_copy(p->a_label, label);
-       p->o_arg.claim = nfs4_map_atomic_open_claim(server, claim);
        switch (p->o_arg.claim) {
        case NFS4_OPEN_CLAIM_NULL:
        case NFS4_OPEN_CLAIM_DELEGATE_CUR:
@@ -2819,7 +2830,7 @@ static int _nfs4_do_open(struct inode *dir,
                        nfs_fattr_init(opendata->o_res.f_attr);
                        status = nfs4_do_setattr(state->inode, cred,
                                        opendata->o_res.f_attr, sattr,
-                                       state, label, olabel);
+                                       ctx, label, olabel);
                        if (status == 0) {
                                nfs_setattr_update_inode(state->inode, sattr,
                                                opendata->o_res.f_attr);
@@ -2914,7 +2925,7 @@ static int _nfs4_do_setattr(struct inode *inode,
                            struct nfs_setattrargs *arg,
                            struct nfs_setattrres *res,
                            struct rpc_cred *cred,
-                           struct nfs4_state *state)
+                           struct nfs_open_context *ctx)
 {
        struct nfs_server *server = NFS_SERVER(inode);
         struct rpc_message msg = {
@@ -2937,15 +2948,17 @@ static int _nfs4_do_setattr(struct inode *inode,
 
        if (nfs4_copy_delegation_stateid(inode, fmode, &arg->stateid, &delegation_cred)) {
                /* Use that stateid */
-       } else if (truncate && state != NULL) {
-               struct nfs_lockowner lockowner = {
-                       .l_owner = current->files,
-                       .l_pid = current->tgid,
-               };
-               if (!nfs4_valid_open_stateid(state))
+       } else if (truncate && ctx != NULL) {
+               struct nfs_lock_context *l_ctx;
+               if (!nfs4_valid_open_stateid(ctx->state))
                        return -EBADF;
-               if (nfs4_select_rw_stateid(state, FMODE_WRITE, &lockowner,
-                               &arg->stateid, &delegation_cred) == -EIO)
+               l_ctx = nfs_get_lock_context(ctx);
+               if (IS_ERR(l_ctx))
+                       return PTR_ERR(l_ctx);
+               status = nfs4_select_rw_stateid(ctx->state, FMODE_WRITE, l_ctx,
+                                               &arg->stateid, &delegation_cred);
+               nfs_put_lock_context(l_ctx);
+               if (status == -EIO)
                        return -EBADF;
        } else
                nfs4_stateid_copy(&arg->stateid, &zero_stateid);
@@ -2955,7 +2968,7 @@ static int _nfs4_do_setattr(struct inode *inode,
        status = nfs4_call_sync(server->client, server, &msg, &arg->seq_args, &res->seq_res, 1);
 
        put_rpccred(delegation_cred);
-       if (status == 0 && state != NULL)
+       if (status == 0 && ctx != NULL)
                renew_lease(server, timestamp);
        trace_nfs4_setattr(inode, &arg->stateid, status);
        return status;
@@ -2963,10 +2976,11 @@ static int _nfs4_do_setattr(struct inode *inode,
 
 static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
                           struct nfs_fattr *fattr, struct iattr *sattr,
-                          struct nfs4_state *state, struct nfs4_label *ilabel,
+                          struct nfs_open_context *ctx, struct nfs4_label *ilabel,
                           struct nfs4_label *olabel)
 {
        struct nfs_server *server = NFS_SERVER(inode);
+       struct nfs4_state *state = ctx ? ctx->state : NULL;
         struct nfs_setattrargs  arg = {
                 .fh             = NFS_FH(inode),
                 .iap            = sattr,
@@ -2991,7 +3005,7 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
                arg.bitmask = nfs4_bitmask(server, olabel);
 
        do {
-               err = _nfs4_do_setattr(inode, &arg, &res, cred, state);
+               err = _nfs4_do_setattr(inode, &arg, &res, cred, ctx);
                switch (err) {
                case -NFS4ERR_OPENMODE:
                        if (!(sattr->ia_valid & ATTR_SIZE)) {
@@ -3028,10 +3042,15 @@ struct nfs4_closedata {
        struct nfs4_state *state;
        struct nfs_closeargs arg;
        struct nfs_closeres res;
+       struct {
+               struct nfs4_layoutreturn_args arg;
+               struct nfs4_layoutreturn_res res;
+               struct nfs4_xdr_opaque_data ld_private;
+               u32 roc_barrier;
+               bool roc;
+       } lr;
        struct nfs_fattr fattr;
        unsigned long timestamp;
-       bool roc;
-       u32 roc_barrier;
 };
 
 static void nfs4_free_closedata(void *data)
@@ -3040,8 +3059,9 @@ static void nfs4_free_closedata(void *data)
        struct nfs4_state_owner *sp = calldata->state->owner;
        struct super_block *sb = calldata->state->inode->i_sb;
 
-       if (calldata->roc)
-               pnfs_roc_release(calldata->state->inode);
+       if (calldata->lr.roc)
+               pnfs_roc_release(&calldata->lr.arg, &calldata->lr.res,
+                               calldata->res.lr_ret);
        nfs4_put_open_state(calldata->state);
        nfs_free_seqid(calldata->arg.seqid);
        nfs4_put_state_owner(sp);
@@ -3060,15 +3080,38 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
        if (!nfs4_sequence_done(task, &calldata->res.seq_res))
                return;
        trace_nfs4_close(state, &calldata->arg, &calldata->res, task->tk_status);
+
+       /* Handle Layoutreturn errors */
+       if (calldata->arg.lr_args && task->tk_status != 0) {
+               switch (calldata->res.lr_ret) {
+               default:
+                       calldata->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT;
+                       break;
+               case 0:
+                       calldata->arg.lr_args = NULL;
+                       calldata->res.lr_res = NULL;
+                       break;
+               case -NFS4ERR_ADMIN_REVOKED:
+               case -NFS4ERR_DELEG_REVOKED:
+               case -NFS4ERR_EXPIRED:
+               case -NFS4ERR_BAD_STATEID:
+               case -NFS4ERR_OLD_STATEID:
+               case -NFS4ERR_UNKNOWN_LAYOUTTYPE:
+               case -NFS4ERR_WRONG_CRED:
+                       calldata->arg.lr_args = NULL;
+                       calldata->res.lr_res = NULL;
+                       calldata->res.lr_ret = 0;
+                       rpc_restart_call_prepare(task);
+                       return;
+               }
+       }
+
         /* hmm. we are done with the inode, and in the process of freeing
         * the state_owner. we keep this around to process errors
         */
        switch (task->tk_status) {
                case 0:
                        res_stateid = &calldata->res.stateid;
-                       if (calldata->roc)
-                               pnfs_roc_set_barrier(state->inode,
-                                                    calldata->roc_barrier);
                        renew_lease(server, calldata->timestamp);
                        break;
                case -NFS4ERR_ADMIN_REVOKED:
@@ -3144,15 +3187,20 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
                goto out_no_action;
        }
 
-       if (nfs4_wait_on_layoutreturn(inode, task)) {
+       if (!calldata->lr.roc && nfs4_wait_on_layoutreturn(inode, task)) {
                nfs_release_seqid(calldata->arg.seqid);
                goto out_wait;
        }
 
-       if (calldata->arg.fmode == 0)
+       if (calldata->arg.fmode == 0) {
                task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
-       if (calldata->roc)
-               pnfs_roc_get_barrier(inode, &calldata->roc_barrier);
+
+               /* Close-to-open cache consistency revalidation */
+               if (!nfs4_have_delegation(inode, FMODE_READ))
+                       calldata->arg.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask;
+               else
+                       calldata->arg.bitmask = NULL;
+       }
 
        calldata->arg.share_access =
                nfs4_map_atomic_open_share(NFS_SERVER(inode),
@@ -3179,13 +3227,6 @@ static const struct rpc_call_ops nfs4_close_ops = {
        .rpc_release = nfs4_free_closedata,
 };
 
-static bool nfs4_roc(struct inode *inode)
-{
-       if (!nfs_have_layout(inode))
-               return false;
-       return pnfs_roc(inode);
-}
-
 /* 
  * It is possible for data to be read/written from a mem-mapped file 
  * after the sys_close call (which hits the vfs layer as a flush).
@@ -3233,11 +3274,17 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait)
        if (IS_ERR(calldata->arg.seqid))
                goto out_free_calldata;
        calldata->arg.fmode = 0;
-       calldata->arg.bitmask = server->cache_consistency_bitmask;
+       calldata->lr.arg.ld_private = &calldata->lr.ld_private;
        calldata->res.fattr = &calldata->fattr;
        calldata->res.seqid = calldata->arg.seqid;
        calldata->res.server = server;
-       calldata->roc = nfs4_roc(state->inode);
+       calldata->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT;
+       calldata->lr.roc = pnfs_roc(state->inode,
+                       &calldata->lr.arg, &calldata->lr.res, msg.rpc_cred);
+       if (calldata->lr.roc) {
+               calldata->arg.lr_args = &calldata->lr.arg;
+               calldata->res.lr_res = &calldata->lr.res;
+       }
        nfs_sb_active(calldata->inode->i_sb);
 
        msg.rpc_argp = &calldata->arg;
@@ -3687,7 +3734,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 {
        struct inode *inode = d_inode(dentry);
        struct rpc_cred *cred = NULL;
-       struct nfs4_state *state = NULL;
+       struct nfs_open_context *ctx = NULL;
        struct nfs4_label *label = NULL;
        int status;
 
@@ -3708,20 +3755,17 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 
        /* Search for an existing open(O_WRITE) file */
        if (sattr->ia_valid & ATTR_FILE) {
-               struct nfs_open_context *ctx;
 
                ctx = nfs_file_open_context(sattr->ia_file);
-               if (ctx) {
+               if (ctx)
                        cred = ctx->cred;
-                       state = ctx->state;
-               }
        }
 
        label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL);
        if (IS_ERR(label))
                return PTR_ERR(label);
 
-       status = nfs4_do_setattr(inode, cred, fattr, sattr, state, NULL, label);
+       status = nfs4_do_setattr(inode, cred, fattr, sattr, ctx, NULL, label);
        if (status == 0) {
                nfs_setattr_update_inode(inode, sattr, fattr);
                nfs_setsecurity(inode, fattr, label);
@@ -3971,7 +4015,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
        struct nfs4_state *state;
        int status = 0;
 
-       ctx = alloc_nfs_open_context(dentry, FMODE_READ);
+       ctx = alloc_nfs_open_context(dentry, FMODE_READ, NULL);
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
 
@@ -4541,11 +4585,7 @@ int nfs4_set_rw_stateid(nfs4_stateid *stateid,
                const struct nfs_lock_context *l_ctx,
                fmode_t fmode)
 {
-       const struct nfs_lockowner *lockowner = NULL;
-
-       if (l_ctx != NULL)
-               lockowner = &l_ctx->lockowner;
-       return nfs4_select_rw_stateid(ctx->state, fmode, lockowner, stateid, NULL);
+       return nfs4_select_rw_stateid(ctx->state, fmode, l_ctx, stateid, NULL);
 }
 EXPORT_SYMBOL_GPL(nfs4_set_rw_stateid);
 
@@ -5564,11 +5604,16 @@ struct nfs4_delegreturndata {
        struct nfs_fh fh;
        nfs4_stateid stateid;
        unsigned long timestamp;
+       struct {
+               struct nfs4_layoutreturn_args arg;
+               struct nfs4_layoutreturn_res res;
+               struct nfs4_xdr_opaque_data ld_private;
+               u32 roc_barrier;
+               bool roc;
+       } lr;
        struct nfs_fattr fattr;
        int rpc_status;
        struct inode *inode;
-       bool roc;
-       u32 roc_barrier;
 };
 
 static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
@@ -5579,6 +5624,32 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
                return;
 
        trace_nfs4_delegreturn_exit(&data->args, &data->res, task->tk_status);
+
+       /* Handle Layoutreturn errors */
+       if (data->args.lr_args && task->tk_status != 0) {
+               switch(data->res.lr_ret) {
+               default:
+                       data->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT;
+                       break;
+               case 0:
+                       data->args.lr_args = NULL;
+                       data->res.lr_res = NULL;
+                       break;
+               case -NFS4ERR_ADMIN_REVOKED:
+               case -NFS4ERR_DELEG_REVOKED:
+               case -NFS4ERR_EXPIRED:
+               case -NFS4ERR_BAD_STATEID:
+               case -NFS4ERR_OLD_STATEID:
+               case -NFS4ERR_UNKNOWN_LAYOUTTYPE:
+               case -NFS4ERR_WRONG_CRED:
+                       data->args.lr_args = NULL;
+                       data->res.lr_res = NULL;
+                       data->res.lr_ret = 0;
+                       rpc_restart_call_prepare(task);
+                       return;
+               }
+       }
+
        switch (task->tk_status) {
        case 0:
                renew_lease(data->res.server, data->timestamp);
@@ -5602,8 +5673,6 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
                }
        }
        data->rpc_status = task->tk_status;
-       if (data->roc && data->rpc_status == 0)
-               pnfs_roc_set_barrier(data->inode, data->roc_barrier);
 }
 
 static void nfs4_delegreturn_release(void *calldata)
@@ -5612,8 +5681,9 @@ static void nfs4_delegreturn_release(void *calldata)
        struct inode *inode = data->inode;
 
        if (inode) {
-               if (data->roc)
-                       pnfs_roc_release(inode);
+               if (data->lr.roc)
+                       pnfs_roc_release(&data->lr.arg, &data->lr.res,
+                                       data->res.lr_ret);
                nfs_iput_and_deactive(inode);
        }
        kfree(calldata);
@@ -5625,12 +5695,9 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
 
        d_data = (struct nfs4_delegreturndata *)data;
 
-       if (nfs4_wait_on_layoutreturn(d_data->inode, task))
+       if (!d_data->lr.roc && nfs4_wait_on_layoutreturn(d_data->inode, task))
                return;
 
-       if (d_data->roc)
-               pnfs_roc_get_barrier(d_data->inode, &d_data->roc_barrier);
-
        nfs4_setup_sequence(d_data->res.server,
                        &d_data->args.seq_args,
                        &d_data->res.seq_res,
@@ -5676,12 +5743,22 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
        nfs4_stateid_copy(&data->stateid, stateid);
        data->res.fattr = &data->fattr;
        data->res.server = server;
+       data->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT;
+       data->lr.arg.ld_private = &data->lr.ld_private;
        nfs_fattr_init(data->res.fattr);
        data->timestamp = jiffies;
        data->rpc_status = 0;
+       data->lr.roc = pnfs_roc(inode, &data->lr.arg, &data->lr.res, cred);
        data->inode = nfs_igrab_and_active(inode);
-       if (data->inode)
-               data->roc = nfs4_roc(inode);
+       if (data->inode) {
+               if (data->lr.roc) {
+                       data->args.lr_args = &data->lr.arg;
+                       data->res.lr_res = &data->lr.res;
+               }
+       } else if (data->lr.roc) {
+               pnfs_roc_release(&data->lr.arg, &data->lr.res, 0);
+               data->lr.roc = false;
+       }
 
        task_setup_data.callback_data = data;
        msg.rpc_argp = &data->args;
@@ -8559,21 +8636,13 @@ static void nfs4_layoutreturn_release(void *calldata)
 {
        struct nfs4_layoutreturn *lrp = calldata;
        struct pnfs_layout_hdr *lo = lrp->args.layout;
-       LIST_HEAD(freeme);
 
        dprintk("--> %s\n", __func__);
-       spin_lock(&lo->plh_inode->i_lock);
-       if (lrp->res.lrs_present) {
-               pnfs_mark_matching_lsegs_invalid(lo, &freeme,
-                               &lrp->args.range,
-                               be32_to_cpu(lrp->args.stateid.seqid));
-               pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
-       } else
-               pnfs_mark_layout_stateid_invalid(lo, &freeme);
-       pnfs_clear_layoutreturn_waitbit(lo);
-       spin_unlock(&lo->plh_inode->i_lock);
+       pnfs_layoutreturn_free_lsegs(lo, &lrp->args.stateid, &lrp->args.range,
+                       lrp->res.lrs_present ? &lrp->res.stateid : NULL);
        nfs4_sequence_free_slot(&lrp->res.seq_res);
-       pnfs_free_lseg_list(&freeme);
+       if (lrp->ld_private.ops && lrp->ld_private.ops->free)
+               lrp->ld_private.ops->free(&lrp->ld_private);
        pnfs_put_layout_hdr(lrp->args.layout);
        nfs_iput_and_deactive(lrp->inode);
        kfree(calldata);