]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - fs/nfs/blocklayout/dev.c
pnfs/blocklayout: in-kernel GETDEVICEINFO XDR parsing
[karo-tx-linux.git] / fs / nfs / blocklayout / dev.c
1 /*
2  * Copyright (c) 2014 Christoph Hellwig.
3  */
4 #include <linux/sunrpc/svc.h>
5 #include <linux/blkdev.h>
6 #include <linux/nfs4.h>
7 #include <linux/nfs_fs.h>
8 #include <linux/nfs_xdr.h>
9
10 #include "blocklayout.h"
11
12 #define NFSDBG_FACILITY         NFSDBG_PNFS_LD
13
14 static void
15 bl_free_device(struct pnfs_block_dev *dev)
16 {
17         if (dev->nr_children) {
18                 int i;
19
20                 for (i = 0; i < dev->nr_children; i++)
21                         bl_free_device(&dev->children[i]);
22                 kfree(dev->children);
23         } else {
24                 if (dev->bdev)
25                         blkdev_put(dev->bdev, FMODE_READ);
26         }
27 }
28
29 void
30 bl_free_deviceid_node(struct nfs4_deviceid_node *d)
31 {
32         struct pnfs_block_dev *dev =
33                 container_of(d, struct pnfs_block_dev, node);
34
35         bl_free_device(dev);
36         kfree(dev);
37 }
38
39 static int
40 nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
41 {
42         __be32 *p;
43         int i;
44
45         p = xdr_inline_decode(xdr, 4);
46         if (!p)
47                 return -EIO;
48         b->type = be32_to_cpup(p++);
49
50         switch (b->type) {
51         case PNFS_BLOCK_VOLUME_SIMPLE:
52                 p = xdr_inline_decode(xdr, 4);
53                 if (!p)
54                         return -EIO;
55                 b->simple.nr_sigs = be32_to_cpup(p++);
56                 if (!b->simple.nr_sigs) {
57                         dprintk("no signature\n");
58                         return -EIO;
59                 }
60
61                 b->simple.len = 4 + 4;
62                 for (i = 0; i < b->simple.nr_sigs; i++) {
63                         p = xdr_inline_decode(xdr, 8 + 4);
64                         if (!p)
65                                 return -EIO;
66                         p = xdr_decode_hyper(p, &b->simple.sigs[i].offset);
67                         b->simple.sigs[i].sig_len = be32_to_cpup(p++);
68
69                         p = xdr_inline_decode(xdr, b->simple.sigs[i].sig_len);
70                         if (!p)
71                                 return -EIO;
72                         memcpy(&b->simple.sigs[i].sig, p,
73                                 b->simple.sigs[i].sig_len);
74
75                         b->simple.len += 8 + 4 + b->simple.sigs[i].sig_len;
76                 }
77                 break;
78         case PNFS_BLOCK_VOLUME_SLICE:
79                 p = xdr_inline_decode(xdr, 8 + 8 + 4);
80                 if (!p)
81                         return -EIO;
82                 p = xdr_decode_hyper(p, &b->slice.start);
83                 p = xdr_decode_hyper(p, &b->slice.len);
84                 b->slice.volume = be32_to_cpup(p++);
85                 break;
86         case PNFS_BLOCK_VOLUME_CONCAT:
87                 p = xdr_inline_decode(xdr, 4);
88                 if (!p)
89                         return -EIO;
90                 b->concat.volumes_count = be32_to_cpup(p++);
91
92                 p = xdr_inline_decode(xdr, b->concat.volumes_count * 4);
93                 if (!p)
94                         return -EIO;
95                 for (i = 0; i < b->concat.volumes_count; i++)
96                         b->concat.volumes[i] = be32_to_cpup(p++);
97                 break;
98         case PNFS_BLOCK_VOLUME_STRIPE:
99                 p = xdr_inline_decode(xdr, 8 + 4);
100                 if (!p)
101                         return -EIO;
102                 p = xdr_decode_hyper(p, &b->stripe.chunk_size);
103                 b->stripe.volumes_count = be32_to_cpup(p++);
104
105                 p = xdr_inline_decode(xdr, b->stripe.volumes_count * 4);
106                 if (!p)
107                         return -EIO;
108                 for (i = 0; i < b->stripe.volumes_count; i++)
109                         b->stripe.volumes[i] = be32_to_cpup(p++);
110                 break;
111         default:
112                 dprintk("unknown volume type!\n");
113                 return -EIO;
114         }
115
116         return 0;
117 }
118
119 static bool bl_map_simple(struct pnfs_block_dev *dev, u64 offset,
120                 struct pnfs_block_dev_map *map)
121 {
122         map->start = dev->start;
123         map->len = dev->len;
124         map->disk_offset = dev->disk_offset;
125         map->bdev = dev->bdev;
126         return true;
127 }
128
129 static bool bl_map_concat(struct pnfs_block_dev *dev, u64 offset,
130                 struct pnfs_block_dev_map *map)
131 {
132         int i;
133
134         for (i = 0; i < dev->nr_children; i++) {
135                 struct pnfs_block_dev *child = &dev->children[i];
136
137                 if (child->start > offset ||
138                     child->start + child->len <= offset)
139                         continue;
140
141                 child->map(child, offset - child->start, map);
142                 return true;
143         }
144
145         dprintk("%s: ran off loop!\n", __func__);
146         return false;
147 }
148
149 static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
150                 struct pnfs_block_dev_map *map)
151 {
152         struct pnfs_block_dev *child;
153         u64 chunk = (offset / dev->chunk_size);
154         int chunk_idx = chunk % dev->nr_children;
155         u64 disk_offset;
156
157         if (chunk_idx > dev->nr_children) {
158                 dprintk("%s: invalid chunk idx %d (%lld/%lld)\n",
159                         __func__, chunk_idx, offset, dev->chunk_size);
160                 /* error, should not happen */
161                 return false;
162         }
163
164         /* truncate offset to the beginning of the stripe */
165         offset = chunk * dev->chunk_size;
166
167         /* disk offset of the stripe */
168         disk_offset = offset / dev->nr_children;
169
170         child = &dev->children[chunk_idx];
171         child->map(child, disk_offset, map);
172
173         map->start += offset;
174         map->disk_offset += disk_offset;
175         map->len = dev->chunk_size;
176         return true;
177 }
178
179 static int
180 bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
181                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask);
182
183
184 static int
185 bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
186                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
187 {
188         struct pnfs_block_volume *v = &volumes[idx];
189         dev_t dev;
190
191         dev = bl_resolve_deviceid(server, v, gfp_mask);
192         if (!dev)
193                 return -EIO;
194
195         d->bdev = blkdev_get_by_dev(dev, FMODE_READ, NULL);
196         if (IS_ERR(d->bdev)) {
197                 printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n",
198                         MAJOR(dev), MINOR(dev), PTR_ERR(d->bdev));
199                 return PTR_ERR(d->bdev);
200         }
201
202
203         d->len = i_size_read(d->bdev->bd_inode);
204         d->map = bl_map_simple;
205
206         printk(KERN_INFO "pNFS: using block device %s\n",
207                 d->bdev->bd_disk->disk_name);
208         return 0;
209 }
210
211 static int
212 bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
213                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
214 {
215         struct pnfs_block_volume *v = &volumes[idx];
216         int ret;
217
218         ret = bl_parse_deviceid(server, d, volumes, v->slice.volume, gfp_mask);
219         if (ret)
220                 return ret;
221
222         d->disk_offset = v->slice.start;
223         d->len = v->slice.len;
224         return 0;
225 }
226
227 static int
228 bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
229                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
230 {
231         struct pnfs_block_volume *v = &volumes[idx];
232         u64 len = 0;
233         int ret, i;
234
235         d->children = kcalloc(v->concat.volumes_count,
236                         sizeof(struct pnfs_block_dev), GFP_KERNEL);
237         if (!d->children)
238                 return -ENOMEM;
239
240         for (i = 0; i < v->concat.volumes_count; i++) {
241                 ret = bl_parse_deviceid(server, &d->children[i],
242                                 volumes, v->concat.volumes[i], gfp_mask);
243                 if (ret)
244                         return ret;
245
246                 d->nr_children++;
247                 d->children[i].start += len;
248                 len += d->children[i].len;
249         }
250
251         d->len = len;
252         d->map = bl_map_concat;
253         return 0;
254 }
255
256 static int
257 bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
258                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
259 {
260         struct pnfs_block_volume *v = &volumes[idx];
261         u64 len = 0;
262         int ret, i;
263
264         d->children = kcalloc(v->stripe.volumes_count,
265                         sizeof(struct pnfs_block_dev), GFP_KERNEL);
266         if (!d->children)
267                 return -ENOMEM;
268
269         for (i = 0; i < v->stripe.volumes_count; i++) {
270                 ret = bl_parse_deviceid(server, &d->children[i],
271                                 volumes, v->stripe.volumes[i], gfp_mask);
272                 if (ret)
273                         return ret;
274
275                 d->nr_children++;
276                 len += d->children[i].len;
277         }
278
279         d->len = len;
280         d->chunk_size = v->stripe.chunk_size;
281         d->map = bl_map_stripe;
282         return 0;
283 }
284
285 static int
286 bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
287                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
288 {
289         switch (volumes[idx].type) {
290         case PNFS_BLOCK_VOLUME_SIMPLE:
291                 return bl_parse_simple(server, d, volumes, idx, gfp_mask);
292         case PNFS_BLOCK_VOLUME_SLICE:
293                 return bl_parse_slice(server, d, volumes, idx, gfp_mask);
294         case PNFS_BLOCK_VOLUME_CONCAT:
295                 return bl_parse_concat(server, d, volumes, idx, gfp_mask);
296         case PNFS_BLOCK_VOLUME_STRIPE:
297                 return bl_parse_stripe(server, d, volumes, idx, gfp_mask);
298         default:
299                 dprintk("unsupported volume type: %d\n", volumes[idx].type);
300                 return -EIO;
301         }
302 }
303
304 struct nfs4_deviceid_node *
305 bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
306                 gfp_t gfp_mask)
307 {
308         struct nfs4_deviceid_node *node = NULL;
309         struct pnfs_block_volume *volumes;
310         struct pnfs_block_dev *top;
311         struct xdr_stream xdr;
312         struct xdr_buf buf;
313         struct page *scratch;
314         int nr_volumes, ret, i;
315         __be32 *p;
316
317         scratch = alloc_page(gfp_mask);
318         if (!scratch)
319                 goto out;
320
321         xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen);
322         xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE);
323
324         p = xdr_inline_decode(&xdr, sizeof(__be32));
325         if (!p)
326                 goto out_free_scratch;
327         nr_volumes = be32_to_cpup(p++);
328
329         volumes = kcalloc(nr_volumes, sizeof(struct pnfs_block_volume),
330                           gfp_mask);
331         if (!volumes)
332                 goto out_free_scratch;
333
334         for (i = 0; i < nr_volumes; i++) {
335                 ret = nfs4_block_decode_volume(&xdr, &volumes[i]);
336                 if (ret < 0)
337                         goto out_free_volumes;
338         }
339
340         top = kzalloc(sizeof(*top), gfp_mask);
341         if (!top)
342                 goto out_free_volumes;
343
344         ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask);
345         if (ret) {
346                 bl_free_device(top);
347                 kfree(top);
348                 goto out_free_volumes;
349         }
350
351         node = &top->node;
352         nfs4_init_deviceid_node(node, server, &pdev->dev_id);
353
354 out_free_volumes:
355         kfree(volumes);
356 out_free_scratch:
357         __free_page(scratch);
358 out:
359         return node;
360 }