2 * linux/fs/nfs/blocklayout/blocklayoutdev.c
4 * Device operations for the pnfs nfs4 file layout driver.
6 * Copyright (c) 2006 The Regents of the University of Michigan.
9 * Andy Adamson <andros@citi.umich.edu>
10 * Fred Isaman <iisaman@umich.edu>
12 * permission is granted to use, copy, create derivative works and
13 * redistribute this software and such derivative works for any purpose,
14 * so long as the name of the university of michigan is not used in
15 * any advertising or publicity pertaining to the use or distribution
16 * of this software without specific, written prior authorization. if
17 * the above copyright notice or any other identification of the
18 * university of michigan is included in any copy of any portion of
19 * this software, then the disclaimer below must also be included.
21 * this software is provided as is, without representation from the
22 * university of michigan as to its fitness for any purpose, and without
23 * warranty by the university of michigan of any kind, either express
24 * or implied, including without limitation the implied warranties of
25 * merchantability and fitness for a particular purpose. the regents
26 * of the university of michigan shall not be liable for any damages,
27 * including special, indirect, incidental, or consequential damages,
28 * with respect to any claim arising out or in connection with the use
29 * of the software, even if it has been or is hereafter advised of the
30 * possibility of such damages.
32 #include <linux/module.h>
33 #include <linux/buffer_head.h> /* __bread */
35 #include <linux/genhd.h>
36 #include <linux/blkdev.h>
37 #include <linux/hash.h>
39 #include "blocklayout.h"
41 #define NFSDBG_FACILITY NFSDBG_PNFS_LD
43 static int decode_sector_number(__be32 **rp, sector_t *sp)
47 *rp = xdr_decode_hyper(*rp, &s);
49 printk(KERN_WARNING "NFS: %s: sector not aligned\n", __func__);
52 *sp = s >> SECTOR_SHIFT;
56 ssize_t bl_pipe_downcall(struct file *filp, const char __user *src,
59 struct nfs_net *nn = net_generic(filp->f_dentry->d_sb->s_fs_info,
62 if (mlen != sizeof (struct bl_dev_msg))
65 if (copy_from_user(&nn->bl_mount_reply, src, mlen) != 0)
73 void bl_pipe_destroy_msg(struct rpc_pipe_msg *msg)
75 struct bl_pipe_msg *bl_pipe_msg = container_of(msg, struct bl_pipe_msg, msg);
79 wake_up(bl_pipe_msg->bl_wq);
83 * Decodes pnfs_block_deviceaddr4 which is XDR encoded in dev->dev_addr_buf.
85 struct nfs4_deviceid_node *
86 bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *dev,
89 struct pnfs_block_dev *rv;
90 struct block_device *bd;
91 struct bl_pipe_msg bl_pipe_msg;
92 struct rpc_pipe_msg *msg = &bl_pipe_msg.msg;
93 struct bl_msg_hdr bl_msg = {
94 .type = BL_DEVICE_MOUNT,
95 .totallen = dev->mincount,
98 DECLARE_WAITQUEUE(wq, current);
99 int offset, len, i, rc;
100 struct net *net = server->nfs_client->cl_net;
101 struct nfs_net *nn = net_generic(net, nfs_net_id);
102 struct bl_dev_msg *reply = &nn->bl_mount_reply;
104 dprintk("%s CREATING PIPEFS MESSAGE\n", __func__);
105 dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data,
108 bl_pipe_msg.bl_wq = &nn->bl_wq;
109 memset(msg, 0, sizeof(*msg));
110 msg->data = kzalloc(sizeof(bl_msg) + dev->mincount, gfp_mask);
114 memcpy(msg->data, &bl_msg, sizeof(bl_msg));
115 dataptr = (uint8_t *) msg->data;
117 offset = sizeof(bl_msg);
118 for (i = 0; len > 0; i++) {
119 memcpy(&dataptr[offset], page_address(dev->pages[i]),
120 len < PAGE_CACHE_SIZE ? len : PAGE_CACHE_SIZE);
121 len -= PAGE_CACHE_SIZE;
122 offset += PAGE_CACHE_SIZE;
124 msg->len = sizeof(bl_msg) + dev->mincount;
126 dprintk("%s CALLING USERSPACE DAEMON\n", __func__);
127 add_wait_queue(&nn->bl_wq, &wq);
128 rc = rpc_queue_upcall(nn->bl_device_pipe, msg);
130 remove_wait_queue(&nn->bl_wq, &wq);
134 set_current_state(TASK_UNINTERRUPTIBLE);
136 __set_current_state(TASK_RUNNING);
137 remove_wait_queue(&nn->bl_wq, &wq);
139 if (reply->status != BL_DEVICE_REQUEST_PROC) {
140 printk(KERN_WARNING "%s failed to decode device: %d\n",
141 __func__, reply->status);
145 bd = blkdev_get_by_dev(MKDEV(reply->major, reply->minor),
148 printk(KERN_WARNING "%s failed to open device %d:%d (%ld)\n",
149 __func__, reply->major, reply->minor,
154 rv = kzalloc(sizeof(*rv), gfp_mask);
158 nfs4_init_deviceid_node(&rv->d_node, server, &dev->dev_id);
161 dprintk("%s Created device %s with bd_block_size %u\n",
163 bd->bd_disk->disk_name,
175 bl_free_deviceid_node(struct nfs4_deviceid_node *d)
177 struct pnfs_block_dev *dev =
178 container_of(d, struct pnfs_block_dev, d_node);
179 struct net *net = d->nfs_client->cl_net;
181 blkdev_put(dev->d_bdev, FMODE_READ);
182 bl_dm_remove(net, dev->d_bdev->bd_dev);
187 /* Tracks info needed to ensure extents in layout obey constraints of spec */
188 struct layout_verification {
189 u32 mode; /* R or RW */
190 u64 start; /* Expected start of next non-COW extent */
191 u64 inval; /* Start of INVAL coverage */
192 u64 cowread; /* End of COW read coverage */
195 /* Verify the extent meets the layout requirements of the pnfs-block draft,
198 static int verify_extent(struct pnfs_block_extent *be,
199 struct layout_verification *lv)
201 if (lv->mode == IOMODE_READ) {
202 if (be->be_state == PNFS_BLOCK_READWRITE_DATA ||
203 be->be_state == PNFS_BLOCK_INVALID_DATA)
205 if (be->be_f_offset != lv->start)
207 lv->start += be->be_length;
210 /* lv->mode == IOMODE_RW */
211 if (be->be_state == PNFS_BLOCK_READWRITE_DATA) {
212 if (be->be_f_offset != lv->start)
214 if (lv->cowread > lv->start)
216 lv->start += be->be_length;
217 lv->inval = lv->start;
219 } else if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
220 if (be->be_f_offset != lv->start)
222 lv->start += be->be_length;
224 } else if (be->be_state == PNFS_BLOCK_READ_DATA) {
225 if (be->be_f_offset > lv->start)
227 if (be->be_f_offset < lv->inval)
229 if (be->be_f_offset < lv->cowread)
231 /* It looks like you might want to min this with lv->start,
232 * but you really don't.
234 lv->inval = lv->inval + be->be_length;
235 lv->cowread = be->be_f_offset + be->be_length;
241 /* XDR decode pnfs_block_layout4 structure */
243 nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo,
244 struct nfs4_layoutget_res *lgr, gfp_t gfp_flags)
246 struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
247 int i, status = -EIO;
249 struct pnfs_block_extent *be = NULL, *save;
250 struct xdr_stream stream;
252 struct page *scratch;
254 struct layout_verification lv = {
255 .mode = lgr->range.iomode,
256 .start = lgr->range.offset >> SECTOR_SHIFT,
257 .inval = lgr->range.offset >> SECTOR_SHIFT,
258 .cowread = lgr->range.offset >> SECTOR_SHIFT,
262 dprintk("---> %s\n", __func__);
264 scratch = alloc_page(gfp_flags);
268 xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len);
269 xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
271 p = xdr_inline_decode(&stream, 4);
275 count = be32_to_cpup(p++);
277 dprintk("%s enter, number of extents %i\n", __func__, count);
278 p = xdr_inline_decode(&stream, (28 + NFS4_DEVICEID4_SIZE) * count);
282 /* Decode individual extents, putting them in temporary
283 * staging area until whole layout is decoded to make error
286 for (i = 0; i < count; i++) {
287 struct nfs4_deviceid id;
289 be = kzalloc(sizeof(struct pnfs_block_extent), GFP_NOFS);
294 memcpy(&id, p, NFS4_DEVICEID4_SIZE);
295 p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
298 nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), &id,
299 lo->plh_lc_cred, gfp_flags);
303 /* The next three values are read in as bytes,
304 * but stored as 512-byte sector lengths
306 if (decode_sector_number(&p, &be->be_f_offset) < 0)
308 if (decode_sector_number(&p, &be->be_length) < 0)
310 if (decode_sector_number(&p, &be->be_v_offset) < 0)
312 be->be_state = be32_to_cpup(p++);
313 if (verify_extent(be, &lv)) {
314 dprintk("%s verify failed\n", __func__);
317 list_add_tail(&be->be_list, &extents);
319 if (lgr->range.offset + lgr->range.length !=
320 lv.start << SECTOR_SHIFT) {
321 dprintk("%s Final length mismatch\n", __func__);
325 if (lv.start < lv.cowread) {
326 dprintk("%s Final uncovered COW extent\n", __func__);
330 /* Extents decoded properly, now try to merge them in to
331 * existing layout extents.
333 list_for_each_entry_safe(be, save, &extents, be_list) {
334 list_del(&be->be_list);
336 status = ext_tree_insert(bl, be);
342 __free_page(scratch);
343 dprintk("%s returns %i\n", __func__, status);
347 nfs4_put_deviceid_node(be->be_device);
350 while (!list_empty(&extents)) {
351 be = list_first_entry(&extents, struct pnfs_block_extent,
353 list_del(&be->be_list);
354 nfs4_put_deviceid_node(be->be_device);