nfs/blocklayout: Make sure calculate signature length aligned
[cascardo/linux.git] / fs / nfs / blocklayout / dev.c
1 /*
2  * Copyright (c) 2014-2016 Christoph Hellwig.
3  */
4 #include <linux/sunrpc/svc.h>
5 #include <linux/blkdev.h>
6 #include <linux/nfs4.h>
7 #include <linux/nfs_fs.h>
8 #include <linux/nfs_xdr.h>
9 #include <linux/pr.h>
10
11 #include "blocklayout.h"
12
13 #define NFSDBG_FACILITY         NFSDBG_PNFS_LD
14
15 static void
16 bl_free_device(struct pnfs_block_dev *dev)
17 {
18         if (dev->nr_children) {
19                 int i;
20
21                 for (i = 0; i < dev->nr_children; i++)
22                         bl_free_device(&dev->children[i]);
23                 kfree(dev->children);
24         } else {
25                 if (dev->pr_registered) {
26                         const struct pr_ops *ops =
27                                 dev->bdev->bd_disk->fops->pr_ops;
28                         int error;
29
30                         error = ops->pr_register(dev->bdev, dev->pr_key, 0,
31                                 false);
32                         if (error)
33                                 pr_err("failed to unregister PR key.\n");
34                 }
35
36                 if (dev->bdev)
37                         blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE);
38         }
39 }
40
41 void
42 bl_free_deviceid_node(struct nfs4_deviceid_node *d)
43 {
44         struct pnfs_block_dev *dev =
45                 container_of(d, struct pnfs_block_dev, node);
46
47         bl_free_device(dev);
48         kfree_rcu(dev, node.rcu);
49 }
50
51 static int
52 nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
53 {
54         __be32 *p;
55         int i;
56
57         p = xdr_inline_decode(xdr, 4);
58         if (!p)
59                 return -EIO;
60         b->type = be32_to_cpup(p++);
61
62         switch (b->type) {
63         case PNFS_BLOCK_VOLUME_SIMPLE:
64                 p = xdr_inline_decode(xdr, 4);
65                 if (!p)
66                         return -EIO;
67                 b->simple.nr_sigs = be32_to_cpup(p++);
68                 if (!b->simple.nr_sigs) {
69                         dprintk("no signature\n");
70                         return -EIO;
71                 }
72
73                 b->simple.len = 4 + 4;
74                 for (i = 0; i < b->simple.nr_sigs; i++) {
75                         p = xdr_inline_decode(xdr, 8 + 4);
76                         if (!p)
77                                 return -EIO;
78                         p = xdr_decode_hyper(p, &b->simple.sigs[i].offset);
79                         b->simple.sigs[i].sig_len = be32_to_cpup(p++);
80                         if (b->simple.sigs[i].sig_len > PNFS_BLOCK_UUID_LEN) {
81                                 pr_info("signature too long: %d\n",
82                                         b->simple.sigs[i].sig_len);
83                                 return -EIO;
84                         }
85
86                         p = xdr_inline_decode(xdr, b->simple.sigs[i].sig_len);
87                         if (!p)
88                                 return -EIO;
89                         memcpy(&b->simple.sigs[i].sig, p,
90                                 b->simple.sigs[i].sig_len);
91
92                         b->simple.len += 8 + 4 + \
93                                 (XDR_QUADLEN(b->simple.sigs[i].sig_len) << 2);
94                 }
95                 break;
96         case PNFS_BLOCK_VOLUME_SLICE:
97                 p = xdr_inline_decode(xdr, 8 + 8 + 4);
98                 if (!p)
99                         return -EIO;
100                 p = xdr_decode_hyper(p, &b->slice.start);
101                 p = xdr_decode_hyper(p, &b->slice.len);
102                 b->slice.volume = be32_to_cpup(p++);
103                 break;
104         case PNFS_BLOCK_VOLUME_CONCAT:
105                 p = xdr_inline_decode(xdr, 4);
106                 if (!p)
107                         return -EIO;
108                 b->concat.volumes_count = be32_to_cpup(p++);
109
110                 p = xdr_inline_decode(xdr, b->concat.volumes_count * 4);
111                 if (!p)
112                         return -EIO;
113                 for (i = 0; i < b->concat.volumes_count; i++)
114                         b->concat.volumes[i] = be32_to_cpup(p++);
115                 break;
116         case PNFS_BLOCK_VOLUME_STRIPE:
117                 p = xdr_inline_decode(xdr, 8 + 4);
118                 if (!p)
119                         return -EIO;
120                 p = xdr_decode_hyper(p, &b->stripe.chunk_size);
121                 b->stripe.volumes_count = be32_to_cpup(p++);
122
123                 p = xdr_inline_decode(xdr, b->stripe.volumes_count * 4);
124                 if (!p)
125                         return -EIO;
126                 for (i = 0; i < b->stripe.volumes_count; i++)
127                         b->stripe.volumes[i] = be32_to_cpup(p++);
128                 break;
129         case PNFS_BLOCK_VOLUME_SCSI:
130                 p = xdr_inline_decode(xdr, 4 + 4 + 4);
131                 if (!p)
132                         return -EIO;
133                 b->scsi.code_set = be32_to_cpup(p++);
134                 b->scsi.designator_type = be32_to_cpup(p++);
135                 b->scsi.designator_len = be32_to_cpup(p++);
136                 p = xdr_inline_decode(xdr, b->scsi.designator_len);
137                 if (!p)
138                         return -EIO;
139                 if (b->scsi.designator_len > 256)
140                         return -EIO;
141                 memcpy(&b->scsi.designator, p, b->scsi.designator_len);
142                 p = xdr_inline_decode(xdr, 8);
143                 if (!p)
144                         return -EIO;
145                 p = xdr_decode_hyper(p, &b->scsi.pr_key);
146                 break;
147         default:
148                 dprintk("unknown volume type!\n");
149                 return -EIO;
150         }
151
152         return 0;
153 }
154
155 static bool bl_map_simple(struct pnfs_block_dev *dev, u64 offset,
156                 struct pnfs_block_dev_map *map)
157 {
158         map->start = dev->start;
159         map->len = dev->len;
160         map->disk_offset = dev->disk_offset;
161         map->bdev = dev->bdev;
162         return true;
163 }
164
165 static bool bl_map_concat(struct pnfs_block_dev *dev, u64 offset,
166                 struct pnfs_block_dev_map *map)
167 {
168         int i;
169
170         for (i = 0; i < dev->nr_children; i++) {
171                 struct pnfs_block_dev *child = &dev->children[i];
172
173                 if (child->start > offset ||
174                     child->start + child->len <= offset)
175                         continue;
176
177                 child->map(child, offset - child->start, map);
178                 return true;
179         }
180
181         dprintk("%s: ran off loop!\n", __func__);
182         return false;
183 }
184
185 static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
186                 struct pnfs_block_dev_map *map)
187 {
188         struct pnfs_block_dev *child;
189         u64 chunk;
190         u32 chunk_idx;
191         u64 disk_offset;
192
193         chunk = div_u64(offset, dev->chunk_size);
194         div_u64_rem(chunk, dev->nr_children, &chunk_idx);
195
196         if (chunk_idx > dev->nr_children) {
197                 dprintk("%s: invalid chunk idx %d (%lld/%lld)\n",
198                         __func__, chunk_idx, offset, dev->chunk_size);
199                 /* error, should not happen */
200                 return false;
201         }
202
203         /* truncate offset to the beginning of the stripe */
204         offset = chunk * dev->chunk_size;
205
206         /* disk offset of the stripe */
207         disk_offset = div_u64(offset, dev->nr_children);
208
209         child = &dev->children[chunk_idx];
210         child->map(child, disk_offset, map);
211
212         map->start += offset;
213         map->disk_offset += disk_offset;
214         map->len = dev->chunk_size;
215         return true;
216 }
217
218 static int
219 bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
220                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask);
221
222
223 static int
224 bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
225                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
226 {
227         struct pnfs_block_volume *v = &volumes[idx];
228         dev_t dev;
229
230         dev = bl_resolve_deviceid(server, v, gfp_mask);
231         if (!dev)
232                 return -EIO;
233
234         d->bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_WRITE, NULL);
235         if (IS_ERR(d->bdev)) {
236                 printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n",
237                         MAJOR(dev), MINOR(dev), PTR_ERR(d->bdev));
238                 return PTR_ERR(d->bdev);
239         }
240
241
242         d->len = i_size_read(d->bdev->bd_inode);
243         d->map = bl_map_simple;
244
245         printk(KERN_INFO "pNFS: using block device %s\n",
246                 d->bdev->bd_disk->disk_name);
247         return 0;
248 }
249
250 static bool
251 bl_validate_designator(struct pnfs_block_volume *v)
252 {
253         switch (v->scsi.designator_type) {
254         case PS_DESIGNATOR_EUI64:
255                 if (v->scsi.code_set != PS_CODE_SET_BINARY)
256                         return false;
257
258                 if (v->scsi.designator_len != 8 &&
259                     v->scsi.designator_len != 10 &&
260                     v->scsi.designator_len != 16)
261                         return false;
262
263                 return true;
264         case PS_DESIGNATOR_NAA:
265                 if (v->scsi.code_set != PS_CODE_SET_BINARY)
266                         return false;
267
268                 if (v->scsi.designator_len != 8 &&
269                     v->scsi.designator_len != 16)
270                         return false;
271
272                 return true;
273         case PS_DESIGNATOR_T10:
274         case PS_DESIGNATOR_NAME:
275                 pr_err("pNFS: unsupported designator "
276                         "(code set %d, type %d, len %d.\n",
277                         v->scsi.code_set,
278                         v->scsi.designator_type,
279                         v->scsi.designator_len);
280                 return false;
281         default:
282                 pr_err("pNFS: invalid designator "
283                         "(code set %d, type %d, len %d.\n",
284                         v->scsi.code_set,
285                         v->scsi.designator_type,
286                         v->scsi.designator_len);
287                 return false;
288         }
289 }
290
291 /*
292  * Try to open the udev path for the WWN.  At least on Debian the udev
293  * by-id path will always point to the dm-multipath device if one exists.
294  */
295 static struct block_device *
296 bl_open_udev_path(struct pnfs_block_volume *v)
297 {
298         struct block_device *bdev;
299         const char *devname;
300
301         devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%*phN",
302                                 v->scsi.designator_len, v->scsi.designator);
303         if (!devname)
304                 return ERR_PTR(-ENOMEM);
305
306         bdev = blkdev_get_by_path(devname, FMODE_READ | FMODE_WRITE, NULL);
307         if (IS_ERR(bdev)) {
308                 pr_warn("pNFS: failed to open device %s (%ld)\n",
309                         devname, PTR_ERR(bdev));
310         }
311
312         kfree(devname);
313         return bdev;
314 }
315
316 /*
317  * Try to open the RH/Fedora specific dm-mpath udev path for this WWN, as the
318  * wwn- links will only point to the first discovered SCSI device there.
319  */
320 static struct block_device *
321 bl_open_dm_mpath_udev_path(struct pnfs_block_volume *v)
322 {
323         struct block_device *bdev;
324         const char *devname;
325
326         devname = kasprintf(GFP_KERNEL,
327                         "/dev/disk/by-id/dm-uuid-mpath-%d%*phN",
328                         v->scsi.designator_type,
329                         v->scsi.designator_len, v->scsi.designator);
330         if (!devname)
331                 return ERR_PTR(-ENOMEM);
332
333         bdev = blkdev_get_by_path(devname, FMODE_READ | FMODE_WRITE, NULL);
334         kfree(devname);
335         return bdev;
336 }
337
338 static int
339 bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
340                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
341 {
342         struct pnfs_block_volume *v = &volumes[idx];
343         const struct pr_ops *ops;
344         int error;
345
346         if (!bl_validate_designator(v))
347                 return -EINVAL;
348
349         d->bdev = bl_open_dm_mpath_udev_path(v);
350         if (IS_ERR(d->bdev))
351                 d->bdev = bl_open_udev_path(v);
352         if (IS_ERR(d->bdev))
353                 return PTR_ERR(d->bdev);
354
355         d->len = i_size_read(d->bdev->bd_inode);
356         d->map = bl_map_simple;
357         d->pr_key = v->scsi.pr_key;
358
359         pr_info("pNFS: using block device %s (reservation key 0x%llx)\n",
360                 d->bdev->bd_disk->disk_name, d->pr_key);
361
362         ops = d->bdev->bd_disk->fops->pr_ops;
363         if (!ops) {
364                 pr_err("pNFS: block device %s does not support reservations.",
365                                 d->bdev->bd_disk->disk_name);
366                 error = -EINVAL;
367                 goto out_blkdev_put;
368         }
369
370         error = ops->pr_register(d->bdev, 0, d->pr_key, true);
371         if (error) {
372                 pr_err("pNFS: failed to register key for block device %s.",
373                                 d->bdev->bd_disk->disk_name);
374                 goto out_blkdev_put;
375         }
376
377         d->pr_registered = true;
378         return 0;
379
380 out_blkdev_put:
381         blkdev_put(d->bdev, FMODE_READ | FMODE_WRITE);
382         return error;
383 }
384
385 static int
386 bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
387                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
388 {
389         struct pnfs_block_volume *v = &volumes[idx];
390         int ret;
391
392         ret = bl_parse_deviceid(server, d, volumes, v->slice.volume, gfp_mask);
393         if (ret)
394                 return ret;
395
396         d->disk_offset = v->slice.start;
397         d->len = v->slice.len;
398         return 0;
399 }
400
401 static int
402 bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
403                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
404 {
405         struct pnfs_block_volume *v = &volumes[idx];
406         u64 len = 0;
407         int ret, i;
408
409         d->children = kcalloc(v->concat.volumes_count,
410                         sizeof(struct pnfs_block_dev), GFP_KERNEL);
411         if (!d->children)
412                 return -ENOMEM;
413
414         for (i = 0; i < v->concat.volumes_count; i++) {
415                 ret = bl_parse_deviceid(server, &d->children[i],
416                                 volumes, v->concat.volumes[i], gfp_mask);
417                 if (ret)
418                         return ret;
419
420                 d->nr_children++;
421                 d->children[i].start += len;
422                 len += d->children[i].len;
423         }
424
425         d->len = len;
426         d->map = bl_map_concat;
427         return 0;
428 }
429
430 static int
431 bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
432                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
433 {
434         struct pnfs_block_volume *v = &volumes[idx];
435         u64 len = 0;
436         int ret, i;
437
438         d->children = kcalloc(v->stripe.volumes_count,
439                         sizeof(struct pnfs_block_dev), GFP_KERNEL);
440         if (!d->children)
441                 return -ENOMEM;
442
443         for (i = 0; i < v->stripe.volumes_count; i++) {
444                 ret = bl_parse_deviceid(server, &d->children[i],
445                                 volumes, v->stripe.volumes[i], gfp_mask);
446                 if (ret)
447                         return ret;
448
449                 d->nr_children++;
450                 len += d->children[i].len;
451         }
452
453         d->len = len;
454         d->chunk_size = v->stripe.chunk_size;
455         d->map = bl_map_stripe;
456         return 0;
457 }
458
459 static int
460 bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
461                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
462 {
463         switch (volumes[idx].type) {
464         case PNFS_BLOCK_VOLUME_SIMPLE:
465                 return bl_parse_simple(server, d, volumes, idx, gfp_mask);
466         case PNFS_BLOCK_VOLUME_SLICE:
467                 return bl_parse_slice(server, d, volumes, idx, gfp_mask);
468         case PNFS_BLOCK_VOLUME_CONCAT:
469                 return bl_parse_concat(server, d, volumes, idx, gfp_mask);
470         case PNFS_BLOCK_VOLUME_STRIPE:
471                 return bl_parse_stripe(server, d, volumes, idx, gfp_mask);
472         case PNFS_BLOCK_VOLUME_SCSI:
473                 return bl_parse_scsi(server, d, volumes, idx, gfp_mask);
474         default:
475                 dprintk("unsupported volume type: %d\n", volumes[idx].type);
476                 return -EIO;
477         }
478 }
479
480 struct nfs4_deviceid_node *
481 bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
482                 gfp_t gfp_mask)
483 {
484         struct nfs4_deviceid_node *node = NULL;
485         struct pnfs_block_volume *volumes;
486         struct pnfs_block_dev *top;
487         struct xdr_stream xdr;
488         struct xdr_buf buf;
489         struct page *scratch;
490         int nr_volumes, ret, i;
491         __be32 *p;
492
493         scratch = alloc_page(gfp_mask);
494         if (!scratch)
495                 goto out;
496
497         xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen);
498         xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE);
499
500         p = xdr_inline_decode(&xdr, sizeof(__be32));
501         if (!p)
502                 goto out_free_scratch;
503         nr_volumes = be32_to_cpup(p++);
504
505         volumes = kcalloc(nr_volumes, sizeof(struct pnfs_block_volume),
506                           gfp_mask);
507         if (!volumes)
508                 goto out_free_scratch;
509
510         for (i = 0; i < nr_volumes; i++) {
511                 ret = nfs4_block_decode_volume(&xdr, &volumes[i]);
512                 if (ret < 0)
513                         goto out_free_volumes;
514         }
515
516         top = kzalloc(sizeof(*top), gfp_mask);
517         if (!top)
518                 goto out_free_volumes;
519
520         ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask);
521         if (ret) {
522                 bl_free_device(top);
523                 kfree(top);
524                 goto out_free_volumes;
525         }
526
527         node = &top->node;
528         nfs4_init_deviceid_node(node, server, &pdev->dev_id);
529
530 out_free_volumes:
531         kfree(volumes);
532 out_free_scratch:
533         __free_page(scratch);
534 out:
535         return node;
536 }