45fd6cca4a27eb611dffdf247b4a58d30a409adb
[cascardo/linux.git] / drivers / nvme / host / core.c
1 /*
2  * NVM Express device driver
3  * Copyright (c) 2011-2014, Intel Corporation.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  */
14
15 #include <linux/blkdev.h>
16 #include <linux/blk-mq.h>
17 #include <linux/delay.h>
18 #include <linux/errno.h>
19 #include <linux/hdreg.h>
20 #include <linux/kernel.h>
21 #include <linux/module.h>
22 #include <linux/list_sort.h>
23 #include <linux/slab.h>
24 #include <linux/types.h>
25 #include <linux/pr.h>
26 #include <linux/ptrace.h>
27 #include <linux/nvme_ioctl.h>
28 #include <linux/t10-pi.h>
29 #include <scsi/sg.h>
30 #include <asm/unaligned.h>
31
32 #include "nvme.h"
33
34 #define NVME_MINORS             (1U << MINORBITS)
35
36 unsigned char admin_timeout = 60;
37 module_param(admin_timeout, byte, 0644);
38 MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
39 EXPORT_SYMBOL_GPL(admin_timeout);
40
41 unsigned char nvme_io_timeout = 30;
42 module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
43 MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
44 EXPORT_SYMBOL_GPL(nvme_io_timeout);
45
46 unsigned char shutdown_timeout = 5;
47 module_param(shutdown_timeout, byte, 0644);
48 MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
49
50 static int nvme_major;
51 module_param(nvme_major, int, 0);
52
53 static int nvme_char_major;
54 module_param(nvme_char_major, int, 0);
55
56 static LIST_HEAD(nvme_ctrl_list);
57 static DEFINE_SPINLOCK(dev_list_lock);
58
59 static struct class *nvme_class;
60
61 void nvme_cancel_request(struct request *req, void *data, bool reserved)
62 {
63         int status;
64
65         if (!blk_mq_request_started(req))
66                 return;
67
68         dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device,
69                                 "Cancelling I/O %d", req->tag);
70
71         status = NVME_SC_ABORT_REQ;
72         if (blk_queue_dying(req->q))
73                 status |= NVME_SC_DNR;
74         blk_mq_complete_request(req, status);
75 }
76 EXPORT_SYMBOL_GPL(nvme_cancel_request);
77
78 bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
79                 enum nvme_ctrl_state new_state)
80 {
81         enum nvme_ctrl_state old_state = ctrl->state;
82         bool changed = false;
83
84         spin_lock_irq(&ctrl->lock);
85         switch (new_state) {
86         case NVME_CTRL_LIVE:
87                 switch (old_state) {
88                 case NVME_CTRL_RESETTING:
89                         changed = true;
90                         /* FALLTHRU */
91                 default:
92                         break;
93                 }
94                 break;
95         case NVME_CTRL_RESETTING:
96                 switch (old_state) {
97                 case NVME_CTRL_NEW:
98                 case NVME_CTRL_LIVE:
99                         changed = true;
100                         /* FALLTHRU */
101                 default:
102                         break;
103                 }
104                 break;
105         case NVME_CTRL_DELETING:
106                 switch (old_state) {
107                 case NVME_CTRL_LIVE:
108                 case NVME_CTRL_RESETTING:
109                         changed = true;
110                         /* FALLTHRU */
111                 default:
112                         break;
113                 }
114                 break;
115         case NVME_CTRL_DEAD:
116                 switch (old_state) {
117                 case NVME_CTRL_DELETING:
118                         changed = true;
119                         /* FALLTHRU */
120                 default:
121                         break;
122                 }
123                 break;
124         default:
125                 break;
126         }
127         spin_unlock_irq(&ctrl->lock);
128
129         if (changed)
130                 ctrl->state = new_state;
131
132         return changed;
133 }
134 EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
135
136 static void nvme_free_ns(struct kref *kref)
137 {
138         struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
139
140         if (ns->type == NVME_NS_LIGHTNVM)
141                 nvme_nvm_unregister(ns->queue, ns->disk->disk_name);
142
143         spin_lock(&dev_list_lock);
144         ns->disk->private_data = NULL;
145         spin_unlock(&dev_list_lock);
146
147         put_disk(ns->disk);
148         ida_simple_remove(&ns->ctrl->ns_ida, ns->instance);
149         nvme_put_ctrl(ns->ctrl);
150         kfree(ns);
151 }
152
153 static void nvme_put_ns(struct nvme_ns *ns)
154 {
155         kref_put(&ns->kref, nvme_free_ns);
156 }
157
158 static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk)
159 {
160         struct nvme_ns *ns;
161
162         spin_lock(&dev_list_lock);
163         ns = disk->private_data;
164         if (ns) {
165                 if (!kref_get_unless_zero(&ns->kref))
166                         goto fail;
167                 if (!try_module_get(ns->ctrl->ops->module))
168                         goto fail_put_ns;
169         }
170         spin_unlock(&dev_list_lock);
171
172         return ns;
173
174 fail_put_ns:
175         kref_put(&ns->kref, nvme_free_ns);
176 fail:
177         spin_unlock(&dev_list_lock);
178         return NULL;
179 }
180
181 void nvme_requeue_req(struct request *req)
182 {
183         unsigned long flags;
184
185         blk_mq_requeue_request(req);
186         spin_lock_irqsave(req->q->queue_lock, flags);
187         if (!blk_queue_stopped(req->q))
188                 blk_mq_kick_requeue_list(req->q);
189         spin_unlock_irqrestore(req->q->queue_lock, flags);
190 }
191 EXPORT_SYMBOL_GPL(nvme_requeue_req);
192
193 struct request *nvme_alloc_request(struct request_queue *q,
194                 struct nvme_command *cmd, unsigned int flags)
195 {
196         struct request *req;
197
198         req = blk_mq_alloc_request(q, nvme_is_write(cmd), flags);
199         if (IS_ERR(req))
200                 return req;
201
202         req->cmd_type = REQ_TYPE_DRV_PRIV;
203         req->cmd_flags |= REQ_FAILFAST_DRIVER;
204         req->__data_len = 0;
205         req->__sector = (sector_t) -1;
206         req->bio = req->biotail = NULL;
207
208         req->cmd = (unsigned char *)cmd;
209         req->cmd_len = sizeof(struct nvme_command);
210
211         return req;
212 }
213 EXPORT_SYMBOL_GPL(nvme_alloc_request);
214
215 static inline void nvme_setup_flush(struct nvme_ns *ns,
216                 struct nvme_command *cmnd)
217 {
218         memset(cmnd, 0, sizeof(*cmnd));
219         cmnd->common.opcode = nvme_cmd_flush;
220         cmnd->common.nsid = cpu_to_le32(ns->ns_id);
221 }
222
223 static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
224                 struct nvme_command *cmnd)
225 {
226         struct nvme_dsm_range *range;
227         struct page *page;
228         int offset;
229         unsigned int nr_bytes = blk_rq_bytes(req);
230
231         range = kmalloc(sizeof(*range), GFP_ATOMIC);
232         if (!range)
233                 return BLK_MQ_RQ_QUEUE_BUSY;
234
235         range->cattr = cpu_to_le32(0);
236         range->nlb = cpu_to_le32(nr_bytes >> ns->lba_shift);
237         range->slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
238
239         memset(cmnd, 0, sizeof(*cmnd));
240         cmnd->dsm.opcode = nvme_cmd_dsm;
241         cmnd->dsm.nsid = cpu_to_le32(ns->ns_id);
242         cmnd->dsm.nr = 0;
243         cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
244
245         req->completion_data = range;
246         page = virt_to_page(range);
247         offset = offset_in_page(range);
248         blk_add_request_payload(req, page, offset, sizeof(*range));
249
250         /*
251          * we set __data_len back to the size of the area to be discarded
252          * on disk. This allows us to report completion on the full amount
253          * of blocks described by the request.
254          */
255         req->__data_len = nr_bytes;
256
257         return 0;
258 }
259
260 static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
261                 struct nvme_command *cmnd)
262 {
263         u16 control = 0;
264         u32 dsmgmt = 0;
265
266         if (req->cmd_flags & REQ_FUA)
267                 control |= NVME_RW_FUA;
268         if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
269                 control |= NVME_RW_LR;
270
271         if (req->cmd_flags & REQ_RAHEAD)
272                 dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
273
274         memset(cmnd, 0, sizeof(*cmnd));
275         cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
276         cmnd->rw.command_id = req->tag;
277         cmnd->rw.nsid = cpu_to_le32(ns->ns_id);
278         cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
279         cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
280
281         if (ns->ms) {
282                 switch (ns->pi_type) {
283                 case NVME_NS_DPS_PI_TYPE3:
284                         control |= NVME_RW_PRINFO_PRCHK_GUARD;
285                         break;
286                 case NVME_NS_DPS_PI_TYPE1:
287                 case NVME_NS_DPS_PI_TYPE2:
288                         control |= NVME_RW_PRINFO_PRCHK_GUARD |
289                                         NVME_RW_PRINFO_PRCHK_REF;
290                         cmnd->rw.reftag = cpu_to_le32(
291                                         nvme_block_nr(ns, blk_rq_pos(req)));
292                         break;
293                 }
294                 if (!blk_integrity_rq(req))
295                         control |= NVME_RW_PRINFO_PRACT;
296         }
297
298         cmnd->rw.control = cpu_to_le16(control);
299         cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
300 }
301
302 int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
303                 struct nvme_command *cmd)
304 {
305         int ret = 0;
306
307         if (req->cmd_type == REQ_TYPE_DRV_PRIV)
308                 memcpy(cmd, req->cmd, sizeof(*cmd));
309         else if (req_op(req) == REQ_OP_FLUSH)
310                 nvme_setup_flush(ns, cmd);
311         else if (req_op(req) == REQ_OP_DISCARD)
312                 ret = nvme_setup_discard(ns, req, cmd);
313         else
314                 nvme_setup_rw(ns, req, cmd);
315
316         return ret;
317 }
318 EXPORT_SYMBOL_GPL(nvme_setup_cmd);
319
320 /*
321  * Returns 0 on success.  If the result is negative, it's a Linux error code;
322  * if the result is positive, it's an NVM Express status code
323  */
324 int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
325                 struct nvme_completion *cqe, void *buffer, unsigned bufflen,
326                 unsigned timeout)
327 {
328         struct request *req;
329         int ret;
330
331         req = nvme_alloc_request(q, cmd, 0);
332         if (IS_ERR(req))
333                 return PTR_ERR(req);
334
335         req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
336         req->special = cqe;
337
338         if (buffer && bufflen) {
339                 ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL);
340                 if (ret)
341                         goto out;
342         }
343
344         blk_execute_rq(req->q, NULL, req, 0);
345         ret = req->errors;
346  out:
347         blk_mq_free_request(req);
348         return ret;
349 }
350
351 int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
352                 void *buffer, unsigned bufflen)
353 {
354         return __nvme_submit_sync_cmd(q, cmd, NULL, buffer, bufflen, 0);
355 }
356 EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd);
357
358 int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
359                 void __user *ubuffer, unsigned bufflen,
360                 void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
361                 u32 *result, unsigned timeout)
362 {
363         bool write = nvme_is_write(cmd);
364         struct nvme_completion cqe;
365         struct nvme_ns *ns = q->queuedata;
366         struct gendisk *disk = ns ? ns->disk : NULL;
367         struct request *req;
368         struct bio *bio = NULL;
369         void *meta = NULL;
370         int ret;
371
372         req = nvme_alloc_request(q, cmd, 0);
373         if (IS_ERR(req))
374                 return PTR_ERR(req);
375
376         req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
377         req->special = &cqe;
378
379         if (ubuffer && bufflen) {
380                 ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
381                                 GFP_KERNEL);
382                 if (ret)
383                         goto out;
384                 bio = req->bio;
385
386                 if (!disk)
387                         goto submit;
388                 bio->bi_bdev = bdget_disk(disk, 0);
389                 if (!bio->bi_bdev) {
390                         ret = -ENODEV;
391                         goto out_unmap;
392                 }
393
394                 if (meta_buffer && meta_len) {
395                         struct bio_integrity_payload *bip;
396
397                         meta = kmalloc(meta_len, GFP_KERNEL);
398                         if (!meta) {
399                                 ret = -ENOMEM;
400                                 goto out_unmap;
401                         }
402
403                         if (write) {
404                                 if (copy_from_user(meta, meta_buffer,
405                                                 meta_len)) {
406                                         ret = -EFAULT;
407                                         goto out_free_meta;
408                                 }
409                         }
410
411                         bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
412                         if (IS_ERR(bip)) {
413                                 ret = PTR_ERR(bip);
414                                 goto out_free_meta;
415                         }
416
417                         bip->bip_iter.bi_size = meta_len;
418                         bip->bip_iter.bi_sector = meta_seed;
419
420                         ret = bio_integrity_add_page(bio, virt_to_page(meta),
421                                         meta_len, offset_in_page(meta));
422                         if (ret != meta_len) {
423                                 ret = -ENOMEM;
424                                 goto out_free_meta;
425                         }
426                 }
427         }
428  submit:
429         blk_execute_rq(req->q, disk, req, 0);
430         ret = req->errors;
431         if (result)
432                 *result = le32_to_cpu(cqe.result);
433         if (meta && !ret && !write) {
434                 if (copy_to_user(meta_buffer, meta, meta_len))
435                         ret = -EFAULT;
436         }
437  out_free_meta:
438         kfree(meta);
439  out_unmap:
440         if (bio) {
441                 if (disk && bio->bi_bdev)
442                         bdput(bio->bi_bdev);
443                 blk_rq_unmap_user(bio);
444         }
445  out:
446         blk_mq_free_request(req);
447         return ret;
448 }
449
450 int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
451                 void __user *ubuffer, unsigned bufflen, u32 *result,
452                 unsigned timeout)
453 {
454         return __nvme_submit_user_cmd(q, cmd, ubuffer, bufflen, NULL, 0, 0,
455                         result, timeout);
456 }
457
458 int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
459 {
460         struct nvme_command c = { };
461         int error;
462
463         /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
464         c.identify.opcode = nvme_admin_identify;
465         c.identify.cns = cpu_to_le32(1);
466
467         *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
468         if (!*id)
469                 return -ENOMEM;
470
471         error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
472                         sizeof(struct nvme_id_ctrl));
473         if (error)
474                 kfree(*id);
475         return error;
476 }
477
478 static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list)
479 {
480         struct nvme_command c = { };
481
482         c.identify.opcode = nvme_admin_identify;
483         c.identify.cns = cpu_to_le32(2);
484         c.identify.nsid = cpu_to_le32(nsid);
485         return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
486 }
487
488 int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
489                 struct nvme_id_ns **id)
490 {
491         struct nvme_command c = { };
492         int error;
493
494         /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
495         c.identify.opcode = nvme_admin_identify,
496         c.identify.nsid = cpu_to_le32(nsid),
497
498         *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
499         if (!*id)
500                 return -ENOMEM;
501
502         error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
503                         sizeof(struct nvme_id_ns));
504         if (error)
505                 kfree(*id);
506         return error;
507 }
508
509 int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
510                                         dma_addr_t dma_addr, u32 *result)
511 {
512         struct nvme_command c;
513         struct nvme_completion cqe;
514         int ret;
515
516         memset(&c, 0, sizeof(c));
517         c.features.opcode = nvme_admin_get_features;
518         c.features.nsid = cpu_to_le32(nsid);
519         c.features.prp1 = cpu_to_le64(dma_addr);
520         c.features.fid = cpu_to_le32(fid);
521
522         ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0);
523         if (ret >= 0)
524                 *result = le32_to_cpu(cqe.result);
525         return ret;
526 }
527
528 int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
529                                         dma_addr_t dma_addr, u32 *result)
530 {
531         struct nvme_command c;
532         struct nvme_completion cqe;
533         int ret;
534
535         memset(&c, 0, sizeof(c));
536         c.features.opcode = nvme_admin_set_features;
537         c.features.prp1 = cpu_to_le64(dma_addr);
538         c.features.fid = cpu_to_le32(fid);
539         c.features.dword11 = cpu_to_le32(dword11);
540
541         ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0);
542         if (ret >= 0)
543                 *result = le32_to_cpu(cqe.result);
544         return ret;
545 }
546
547 int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
548 {
549         struct nvme_command c = { };
550         int error;
551
552         c.common.opcode = nvme_admin_get_log_page,
553         c.common.nsid = cpu_to_le32(0xFFFFFFFF),
554         c.common.cdw10[0] = cpu_to_le32(
555                         (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
556                          NVME_LOG_SMART),
557
558         *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
559         if (!*log)
560                 return -ENOMEM;
561
562         error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
563                         sizeof(struct nvme_smart_log));
564         if (error)
565                 kfree(*log);
566         return error;
567 }
568
569 int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
570 {
571         u32 q_count = (*count - 1) | ((*count - 1) << 16);
572         u32 result;
573         int status, nr_io_queues;
574
575         status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, 0,
576                         &result);
577         if (status)
578                 return status;
579
580         nr_io_queues = min(result & 0xffff, result >> 16) + 1;
581         *count = min(*count, nr_io_queues);
582         return 0;
583 }
584 EXPORT_SYMBOL_GPL(nvme_set_queue_count);
585
586 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
587 {
588         struct nvme_user_io io;
589         struct nvme_command c;
590         unsigned length, meta_len;
591         void __user *metadata;
592
593         if (copy_from_user(&io, uio, sizeof(io)))
594                 return -EFAULT;
595         if (io.flags)
596                 return -EINVAL;
597
598         switch (io.opcode) {
599         case nvme_cmd_write:
600         case nvme_cmd_read:
601         case nvme_cmd_compare:
602                 break;
603         default:
604                 return -EINVAL;
605         }
606
607         length = (io.nblocks + 1) << ns->lba_shift;
608         meta_len = (io.nblocks + 1) * ns->ms;
609         metadata = (void __user *)(uintptr_t)io.metadata;
610
611         if (ns->ext) {
612                 length += meta_len;
613                 meta_len = 0;
614         } else if (meta_len) {
615                 if ((io.metadata & 3) || !io.metadata)
616                         return -EINVAL;
617         }
618
619         memset(&c, 0, sizeof(c));
620         c.rw.opcode = io.opcode;
621         c.rw.flags = io.flags;
622         c.rw.nsid = cpu_to_le32(ns->ns_id);
623         c.rw.slba = cpu_to_le64(io.slba);
624         c.rw.length = cpu_to_le16(io.nblocks);
625         c.rw.control = cpu_to_le16(io.control);
626         c.rw.dsmgmt = cpu_to_le32(io.dsmgmt);
627         c.rw.reftag = cpu_to_le32(io.reftag);
628         c.rw.apptag = cpu_to_le16(io.apptag);
629         c.rw.appmask = cpu_to_le16(io.appmask);
630
631         return __nvme_submit_user_cmd(ns->queue, &c,
632                         (void __user *)(uintptr_t)io.addr, length,
633                         metadata, meta_len, io.slba, NULL, 0);
634 }
635
636 static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
637                         struct nvme_passthru_cmd __user *ucmd)
638 {
639         struct nvme_passthru_cmd cmd;
640         struct nvme_command c;
641         unsigned timeout = 0;
642         int status;
643
644         if (!capable(CAP_SYS_ADMIN))
645                 return -EACCES;
646         if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
647                 return -EFAULT;
648         if (cmd.flags)
649                 return -EINVAL;
650
651         memset(&c, 0, sizeof(c));
652         c.common.opcode = cmd.opcode;
653         c.common.flags = cmd.flags;
654         c.common.nsid = cpu_to_le32(cmd.nsid);
655         c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
656         c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
657         c.common.cdw10[0] = cpu_to_le32(cmd.cdw10);
658         c.common.cdw10[1] = cpu_to_le32(cmd.cdw11);
659         c.common.cdw10[2] = cpu_to_le32(cmd.cdw12);
660         c.common.cdw10[3] = cpu_to_le32(cmd.cdw13);
661         c.common.cdw10[4] = cpu_to_le32(cmd.cdw14);
662         c.common.cdw10[5] = cpu_to_le32(cmd.cdw15);
663
664         if (cmd.timeout_ms)
665                 timeout = msecs_to_jiffies(cmd.timeout_ms);
666
667         status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
668                         (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
669                         &cmd.result, timeout);
670         if (status >= 0) {
671                 if (put_user(cmd.result, &ucmd->result))
672                         return -EFAULT;
673         }
674
675         return status;
676 }
677
678 static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
679                 unsigned int cmd, unsigned long arg)
680 {
681         struct nvme_ns *ns = bdev->bd_disk->private_data;
682
683         switch (cmd) {
684         case NVME_IOCTL_ID:
685                 force_successful_syscall_return();
686                 return ns->ns_id;
687         case NVME_IOCTL_ADMIN_CMD:
688                 return nvme_user_cmd(ns->ctrl, NULL, (void __user *)arg);
689         case NVME_IOCTL_IO_CMD:
690                 return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg);
691         case NVME_IOCTL_SUBMIT_IO:
692                 return nvme_submit_io(ns, (void __user *)arg);
693 #ifdef CONFIG_BLK_DEV_NVME_SCSI
694         case SG_GET_VERSION_NUM:
695                 return nvme_sg_get_version_num((void __user *)arg);
696         case SG_IO:
697                 return nvme_sg_io(ns, (void __user *)arg);
698 #endif
699         default:
700                 return -ENOTTY;
701         }
702 }
703
704 #ifdef CONFIG_COMPAT
705 static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
706                         unsigned int cmd, unsigned long arg)
707 {
708         switch (cmd) {
709         case SG_IO:
710                 return -ENOIOCTLCMD;
711         }
712         return nvme_ioctl(bdev, mode, cmd, arg);
713 }
714 #else
715 #define nvme_compat_ioctl       NULL
716 #endif
717
718 static int nvme_open(struct block_device *bdev, fmode_t mode)
719 {
720         return nvme_get_ns_from_disk(bdev->bd_disk) ? 0 : -ENXIO;
721 }
722
723 static void nvme_release(struct gendisk *disk, fmode_t mode)
724 {
725         struct nvme_ns *ns = disk->private_data;
726
727         module_put(ns->ctrl->ops->module);
728         nvme_put_ns(ns);
729 }
730
731 static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
732 {
733         /* some standard values */
734         geo->heads = 1 << 6;
735         geo->sectors = 1 << 5;
736         geo->cylinders = get_capacity(bdev->bd_disk) >> 11;
737         return 0;
738 }
739
740 #ifdef CONFIG_BLK_DEV_INTEGRITY
741 static void nvme_init_integrity(struct nvme_ns *ns)
742 {
743         struct blk_integrity integrity;
744
745         switch (ns->pi_type) {
746         case NVME_NS_DPS_PI_TYPE3:
747                 integrity.profile = &t10_pi_type3_crc;
748                 integrity.tag_size = sizeof(u16) + sizeof(u32);
749                 integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
750                 break;
751         case NVME_NS_DPS_PI_TYPE1:
752         case NVME_NS_DPS_PI_TYPE2:
753                 integrity.profile = &t10_pi_type1_crc;
754                 integrity.tag_size = sizeof(u16);
755                 integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
756                 break;
757         default:
758                 integrity.profile = NULL;
759                 break;
760         }
761         integrity.tuple_size = ns->ms;
762         blk_integrity_register(ns->disk, &integrity);
763         blk_queue_max_integrity_segments(ns->queue, 1);
764 }
765 #else
766 static void nvme_init_integrity(struct nvme_ns *ns)
767 {
768 }
769 #endif /* CONFIG_BLK_DEV_INTEGRITY */
770
771 static void nvme_config_discard(struct nvme_ns *ns)
772 {
773         struct nvme_ctrl *ctrl = ns->ctrl;
774         u32 logical_block_size = queue_logical_block_size(ns->queue);
775
776         if (ctrl->quirks & NVME_QUIRK_DISCARD_ZEROES)
777                 ns->queue->limits.discard_zeroes_data = 1;
778         else
779                 ns->queue->limits.discard_zeroes_data = 0;
780
781         ns->queue->limits.discard_alignment = logical_block_size;
782         ns->queue->limits.discard_granularity = logical_block_size;
783         blk_queue_max_discard_sectors(ns->queue, UINT_MAX);
784         queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
785 }
786
787 static int nvme_revalidate_disk(struct gendisk *disk)
788 {
789         struct nvme_ns *ns = disk->private_data;
790         struct nvme_id_ns *id;
791         u8 lbaf, pi_type;
792         u16 old_ms;
793         unsigned short bs;
794
795         if (test_bit(NVME_NS_DEAD, &ns->flags)) {
796                 set_capacity(disk, 0);
797                 return -ENODEV;
798         }
799         if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) {
800                 dev_warn(disk_to_dev(ns->disk), "%s: Identify failure\n",
801                                 __func__);
802                 return -ENODEV;
803         }
804         if (id->ncap == 0) {
805                 kfree(id);
806                 return -ENODEV;
807         }
808
809         if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) {
810                 if (nvme_nvm_register(ns->queue, disk->disk_name)) {
811                         dev_warn(disk_to_dev(ns->disk),
812                                 "%s: LightNVM init failure\n", __func__);
813                         kfree(id);
814                         return -ENODEV;
815                 }
816                 ns->type = NVME_NS_LIGHTNVM;
817         }
818
819         if (ns->ctrl->vs >= NVME_VS(1, 1))
820                 memcpy(ns->eui, id->eui64, sizeof(ns->eui));
821         if (ns->ctrl->vs >= NVME_VS(1, 2))
822                 memcpy(ns->uuid, id->nguid, sizeof(ns->uuid));
823
824         old_ms = ns->ms;
825         lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
826         ns->lba_shift = id->lbaf[lbaf].ds;
827         ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
828         ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
829
830         /*
831          * If identify namespace failed, use default 512 byte block size so
832          * block layer can use before failing read/write for 0 capacity.
833          */
834         if (ns->lba_shift == 0)
835                 ns->lba_shift = 9;
836         bs = 1 << ns->lba_shift;
837         /* XXX: PI implementation requires metadata equal t10 pi tuple size */
838         pi_type = ns->ms == sizeof(struct t10_pi_tuple) ?
839                                         id->dps & NVME_NS_DPS_PI_MASK : 0;
840
841         blk_mq_freeze_queue(disk->queue);
842         if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
843                                 ns->ms != old_ms ||
844                                 bs != queue_logical_block_size(disk->queue) ||
845                                 (ns->ms && ns->ext)))
846                 blk_integrity_unregister(disk);
847
848         ns->pi_type = pi_type;
849         blk_queue_logical_block_size(ns->queue, bs);
850
851         if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
852                 nvme_init_integrity(ns);
853         if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
854                 set_capacity(disk, 0);
855         else
856                 set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
857
858         if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM)
859                 nvme_config_discard(ns);
860         blk_mq_unfreeze_queue(disk->queue);
861
862         kfree(id);
863         return 0;
864 }
865
866 static char nvme_pr_type(enum pr_type type)
867 {
868         switch (type) {
869         case PR_WRITE_EXCLUSIVE:
870                 return 1;
871         case PR_EXCLUSIVE_ACCESS:
872                 return 2;
873         case PR_WRITE_EXCLUSIVE_REG_ONLY:
874                 return 3;
875         case PR_EXCLUSIVE_ACCESS_REG_ONLY:
876                 return 4;
877         case PR_WRITE_EXCLUSIVE_ALL_REGS:
878                 return 5;
879         case PR_EXCLUSIVE_ACCESS_ALL_REGS:
880                 return 6;
881         default:
882                 return 0;
883         }
884 };
885
886 static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
887                                 u64 key, u64 sa_key, u8 op)
888 {
889         struct nvme_ns *ns = bdev->bd_disk->private_data;
890         struct nvme_command c;
891         u8 data[16] = { 0, };
892
893         put_unaligned_le64(key, &data[0]);
894         put_unaligned_le64(sa_key, &data[8]);
895
896         memset(&c, 0, sizeof(c));
897         c.common.opcode = op;
898         c.common.nsid = cpu_to_le32(ns->ns_id);
899         c.common.cdw10[0] = cpu_to_le32(cdw10);
900
901         return nvme_submit_sync_cmd(ns->queue, &c, data, 16);
902 }
903
904 static int nvme_pr_register(struct block_device *bdev, u64 old,
905                 u64 new, unsigned flags)
906 {
907         u32 cdw10;
908
909         if (flags & ~PR_FL_IGNORE_KEY)
910                 return -EOPNOTSUPP;
911
912         cdw10 = old ? 2 : 0;
913         cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0;
914         cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */
915         return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register);
916 }
917
918 static int nvme_pr_reserve(struct block_device *bdev, u64 key,
919                 enum pr_type type, unsigned flags)
920 {
921         u32 cdw10;
922
923         if (flags & ~PR_FL_IGNORE_KEY)
924                 return -EOPNOTSUPP;
925
926         cdw10 = nvme_pr_type(type) << 8;
927         cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0);
928         return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire);
929 }
930
931 static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
932                 enum pr_type type, bool abort)
933 {
934         u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1;
935         return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire);
936 }
937
938 static int nvme_pr_clear(struct block_device *bdev, u64 key)
939 {
940         u32 cdw10 = 1 | (key ? 1 << 3 : 0);
941         return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register);
942 }
943
944 static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
945 {
946         u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0;
947         return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
948 }
949
950 static const struct pr_ops nvme_pr_ops = {
951         .pr_register    = nvme_pr_register,
952         .pr_reserve     = nvme_pr_reserve,
953         .pr_release     = nvme_pr_release,
954         .pr_preempt     = nvme_pr_preempt,
955         .pr_clear       = nvme_pr_clear,
956 };
957
958 static const struct block_device_operations nvme_fops = {
959         .owner          = THIS_MODULE,
960         .ioctl          = nvme_ioctl,
961         .compat_ioctl   = nvme_compat_ioctl,
962         .open           = nvme_open,
963         .release        = nvme_release,
964         .getgeo         = nvme_getgeo,
965         .revalidate_disk= nvme_revalidate_disk,
966         .pr_ops         = &nvme_pr_ops,
967 };
968
969 static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
970 {
971         unsigned long timeout =
972                 ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
973         u32 csts, bit = enabled ? NVME_CSTS_RDY : 0;
974         int ret;
975
976         while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
977                 if ((csts & NVME_CSTS_RDY) == bit)
978                         break;
979
980                 msleep(100);
981                 if (fatal_signal_pending(current))
982                         return -EINTR;
983                 if (time_after(jiffies, timeout)) {
984                         dev_err(ctrl->device,
985                                 "Device not ready; aborting %s\n", enabled ?
986                                                 "initialisation" : "reset");
987                         return -ENODEV;
988                 }
989         }
990
991         return ret;
992 }
993
994 /*
995  * If the device has been passed off to us in an enabled state, just clear
996  * the enabled bit.  The spec says we should set the 'shutdown notification
997  * bits', but doing so may cause the device to complete commands to the
998  * admin queue ... and we don't know what memory that might be pointing at!
999  */
1000 int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
1001 {
1002         int ret;
1003
1004         ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
1005         ctrl->ctrl_config &= ~NVME_CC_ENABLE;
1006
1007         ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
1008         if (ret)
1009                 return ret;
1010         return nvme_wait_ready(ctrl, cap, false);
1011 }
1012 EXPORT_SYMBOL_GPL(nvme_disable_ctrl);
1013
1014 int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
1015 {
1016         /*
1017          * Default to a 4K page size, with the intention to update this
1018          * path in the future to accomodate architectures with differing
1019          * kernel and IO page sizes.
1020          */
1021         unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12, page_shift = 12;
1022         int ret;
1023
1024         if (page_shift < dev_page_min) {
1025                 dev_err(ctrl->device,
1026                         "Minimum device page size %u too large for host (%u)\n",
1027                         1 << dev_page_min, 1 << page_shift);
1028                 return -ENODEV;
1029         }
1030
1031         ctrl->page_size = 1 << page_shift;
1032
1033         ctrl->ctrl_config = NVME_CC_CSS_NVM;
1034         ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
1035         ctrl->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
1036         ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
1037         ctrl->ctrl_config |= NVME_CC_ENABLE;
1038
1039         ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
1040         if (ret)
1041                 return ret;
1042         return nvme_wait_ready(ctrl, cap, true);
1043 }
1044 EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
1045
1046 int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
1047 {
1048         unsigned long timeout = SHUTDOWN_TIMEOUT + jiffies;
1049         u32 csts;
1050         int ret;
1051
1052         ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
1053         ctrl->ctrl_config |= NVME_CC_SHN_NORMAL;
1054
1055         ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
1056         if (ret)
1057                 return ret;
1058
1059         while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
1060                 if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_CMPLT)
1061                         break;
1062
1063                 msleep(100);
1064                 if (fatal_signal_pending(current))
1065                         return -EINTR;
1066                 if (time_after(jiffies, timeout)) {
1067                         dev_err(ctrl->device,
1068                                 "Device shutdown incomplete; abort shutdown\n");
1069                         return -ENODEV;
1070                 }
1071         }
1072
1073         return ret;
1074 }
1075 EXPORT_SYMBOL_GPL(nvme_shutdown_ctrl);
1076
1077 static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
1078                 struct request_queue *q)
1079 {
1080         bool vwc = false;
1081
1082         if (ctrl->max_hw_sectors) {
1083                 u32 max_segments =
1084                         (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1;
1085
1086                 blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
1087                 blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
1088         }
1089         if (ctrl->stripe_size)
1090                 blk_queue_chunk_sectors(q, ctrl->stripe_size >> 9);
1091         blk_queue_virt_boundary(q, ctrl->page_size - 1);
1092         if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
1093                 vwc = true;
1094         blk_queue_write_cache(q, vwc, vwc);
1095 }
1096
1097 /*
1098  * Initialize the cached copies of the Identify data and various controller
1099  * register in our nvme_ctrl structure.  This should be called as soon as
1100  * the admin queue is fully up and running.
1101  */
1102 int nvme_init_identify(struct nvme_ctrl *ctrl)
1103 {
1104         struct nvme_id_ctrl *id;
1105         u64 cap;
1106         int ret, page_shift;
1107         u32 max_hw_sectors;
1108
1109         ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
1110         if (ret) {
1111                 dev_err(ctrl->device, "Reading VS failed (%d)\n", ret);
1112                 return ret;
1113         }
1114
1115         ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &cap);
1116         if (ret) {
1117                 dev_err(ctrl->device, "Reading CAP failed (%d)\n", ret);
1118                 return ret;
1119         }
1120         page_shift = NVME_CAP_MPSMIN(cap) + 12;
1121
1122         if (ctrl->vs >= NVME_VS(1, 1))
1123                 ctrl->subsystem = NVME_CAP_NSSRC(cap);
1124
1125         ret = nvme_identify_ctrl(ctrl, &id);
1126         if (ret) {
1127                 dev_err(ctrl->device, "Identify Controller failed (%d)\n", ret);
1128                 return -EIO;
1129         }
1130
1131         ctrl->vid = le16_to_cpu(id->vid);
1132         ctrl->oncs = le16_to_cpup(&id->oncs);
1133         atomic_set(&ctrl->abort_limit, id->acl + 1);
1134         ctrl->vwc = id->vwc;
1135         ctrl->cntlid = le16_to_cpup(&id->cntlid);
1136         memcpy(ctrl->serial, id->sn, sizeof(id->sn));
1137         memcpy(ctrl->model, id->mn, sizeof(id->mn));
1138         memcpy(ctrl->firmware_rev, id->fr, sizeof(id->fr));
1139         if (id->mdts)
1140                 max_hw_sectors = 1 << (id->mdts + page_shift - 9);
1141         else
1142                 max_hw_sectors = UINT_MAX;
1143         ctrl->max_hw_sectors =
1144                 min_not_zero(ctrl->max_hw_sectors, max_hw_sectors);
1145
1146         if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && id->vs[3]) {
1147                 unsigned int max_hw_sectors;
1148
1149                 ctrl->stripe_size = 1 << (id->vs[3] + page_shift);
1150                 max_hw_sectors = ctrl->stripe_size >> (page_shift - 9);
1151                 if (ctrl->max_hw_sectors) {
1152                         ctrl->max_hw_sectors = min(max_hw_sectors,
1153                                                         ctrl->max_hw_sectors);
1154                 } else {
1155                         ctrl->max_hw_sectors = max_hw_sectors;
1156                 }
1157         }
1158
1159         nvme_set_queue_limits(ctrl, ctrl->admin_q);
1160
1161         kfree(id);
1162         return 0;
1163 }
1164 EXPORT_SYMBOL_GPL(nvme_init_identify);
1165
1166 static int nvme_dev_open(struct inode *inode, struct file *file)
1167 {
1168         struct nvme_ctrl *ctrl;
1169         int instance = iminor(inode);
1170         int ret = -ENODEV;
1171
1172         spin_lock(&dev_list_lock);
1173         list_for_each_entry(ctrl, &nvme_ctrl_list, node) {
1174                 if (ctrl->instance != instance)
1175                         continue;
1176
1177                 if (!ctrl->admin_q) {
1178                         ret = -EWOULDBLOCK;
1179                         break;
1180                 }
1181                 if (!kref_get_unless_zero(&ctrl->kref))
1182                         break;
1183                 file->private_data = ctrl;
1184                 ret = 0;
1185                 break;
1186         }
1187         spin_unlock(&dev_list_lock);
1188
1189         return ret;
1190 }
1191
1192 static int nvme_dev_release(struct inode *inode, struct file *file)
1193 {
1194         nvme_put_ctrl(file->private_data);
1195         return 0;
1196 }
1197
1198 static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
1199 {
1200         struct nvme_ns *ns;
1201         int ret;
1202
1203         mutex_lock(&ctrl->namespaces_mutex);
1204         if (list_empty(&ctrl->namespaces)) {
1205                 ret = -ENOTTY;
1206                 goto out_unlock;
1207         }
1208
1209         ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list);
1210         if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) {
1211                 dev_warn(ctrl->device,
1212                         "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
1213                 ret = -EINVAL;
1214                 goto out_unlock;
1215         }
1216
1217         dev_warn(ctrl->device,
1218                 "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n");
1219         kref_get(&ns->kref);
1220         mutex_unlock(&ctrl->namespaces_mutex);
1221
1222         ret = nvme_user_cmd(ctrl, ns, argp);
1223         nvme_put_ns(ns);
1224         return ret;
1225
1226 out_unlock:
1227         mutex_unlock(&ctrl->namespaces_mutex);
1228         return ret;
1229 }
1230
1231 static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
1232                 unsigned long arg)
1233 {
1234         struct nvme_ctrl *ctrl = file->private_data;
1235         void __user *argp = (void __user *)arg;
1236
1237         switch (cmd) {
1238         case NVME_IOCTL_ADMIN_CMD:
1239                 return nvme_user_cmd(ctrl, NULL, argp);
1240         case NVME_IOCTL_IO_CMD:
1241                 return nvme_dev_user_cmd(ctrl, argp);
1242         case NVME_IOCTL_RESET:
1243                 dev_warn(ctrl->device, "resetting controller\n");
1244                 return ctrl->ops->reset_ctrl(ctrl);
1245         case NVME_IOCTL_SUBSYS_RESET:
1246                 return nvme_reset_subsystem(ctrl);
1247         case NVME_IOCTL_RESCAN:
1248                 nvme_queue_scan(ctrl);
1249                 return 0;
1250         default:
1251                 return -ENOTTY;
1252         }
1253 }
1254
1255 static const struct file_operations nvme_dev_fops = {
1256         .owner          = THIS_MODULE,
1257         .open           = nvme_dev_open,
1258         .release        = nvme_dev_release,
1259         .unlocked_ioctl = nvme_dev_ioctl,
1260         .compat_ioctl   = nvme_dev_ioctl,
1261 };
1262
1263 static ssize_t nvme_sysfs_reset(struct device *dev,
1264                                 struct device_attribute *attr, const char *buf,
1265                                 size_t count)
1266 {
1267         struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
1268         int ret;
1269
1270         ret = ctrl->ops->reset_ctrl(ctrl);
1271         if (ret < 0)
1272                 return ret;
1273         return count;
1274 }
1275 static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
1276
1277 static ssize_t nvme_sysfs_rescan(struct device *dev,
1278                                 struct device_attribute *attr, const char *buf,
1279                                 size_t count)
1280 {
1281         struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
1282
1283         nvme_queue_scan(ctrl);
1284         return count;
1285 }
1286 static DEVICE_ATTR(rescan_controller, S_IWUSR, NULL, nvme_sysfs_rescan);
1287
1288 static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
1289                                                                 char *buf)
1290 {
1291         struct nvme_ns *ns = dev_to_disk(dev)->private_data;
1292         struct nvme_ctrl *ctrl = ns->ctrl;
1293         int serial_len = sizeof(ctrl->serial);
1294         int model_len = sizeof(ctrl->model);
1295
1296         if (memchr_inv(ns->uuid, 0, sizeof(ns->uuid)))
1297                 return sprintf(buf, "eui.%16phN\n", ns->uuid);
1298
1299         if (memchr_inv(ns->eui, 0, sizeof(ns->eui)))
1300                 return sprintf(buf, "eui.%8phN\n", ns->eui);
1301
1302         while (ctrl->serial[serial_len - 1] == ' ')
1303                 serial_len--;
1304         while (ctrl->model[model_len - 1] == ' ')
1305                 model_len--;
1306
1307         return sprintf(buf, "nvme.%04x-%*phN-%*phN-%08x\n", ctrl->vid,
1308                 serial_len, ctrl->serial, model_len, ctrl->model, ns->ns_id);
1309 }
1310 static DEVICE_ATTR(wwid, S_IRUGO, wwid_show, NULL);
1311
1312 static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
1313                                                                 char *buf)
1314 {
1315         struct nvme_ns *ns = dev_to_disk(dev)->private_data;
1316         return sprintf(buf, "%pU\n", ns->uuid);
1317 }
1318 static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL);
1319
1320 static ssize_t eui_show(struct device *dev, struct device_attribute *attr,
1321                                                                 char *buf)
1322 {
1323         struct nvme_ns *ns = dev_to_disk(dev)->private_data;
1324         return sprintf(buf, "%8phd\n", ns->eui);
1325 }
1326 static DEVICE_ATTR(eui, S_IRUGO, eui_show, NULL);
1327
1328 static ssize_t nsid_show(struct device *dev, struct device_attribute *attr,
1329                                                                 char *buf)
1330 {
1331         struct nvme_ns *ns = dev_to_disk(dev)->private_data;
1332         return sprintf(buf, "%d\n", ns->ns_id);
1333 }
1334 static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL);
1335
1336 static struct attribute *nvme_ns_attrs[] = {
1337         &dev_attr_wwid.attr,
1338         &dev_attr_uuid.attr,
1339         &dev_attr_eui.attr,
1340         &dev_attr_nsid.attr,
1341         NULL,
1342 };
1343
1344 static umode_t nvme_attrs_are_visible(struct kobject *kobj,
1345                 struct attribute *a, int n)
1346 {
1347         struct device *dev = container_of(kobj, struct device, kobj);
1348         struct nvme_ns *ns = dev_to_disk(dev)->private_data;
1349
1350         if (a == &dev_attr_uuid.attr) {
1351                 if (!memchr_inv(ns->uuid, 0, sizeof(ns->uuid)))
1352                         return 0;
1353         }
1354         if (a == &dev_attr_eui.attr) {
1355                 if (!memchr_inv(ns->eui, 0, sizeof(ns->eui)))
1356                         return 0;
1357         }
1358         return a->mode;
1359 }
1360
1361 static const struct attribute_group nvme_ns_attr_group = {
1362         .attrs          = nvme_ns_attrs,
1363         .is_visible     = nvme_attrs_are_visible,
1364 };
1365
1366 #define nvme_show_str_function(field)                                           \
1367 static ssize_t  field##_show(struct device *dev,                                \
1368                             struct device_attribute *attr, char *buf)           \
1369 {                                                                               \
1370         struct nvme_ctrl *ctrl = dev_get_drvdata(dev);                          \
1371         return sprintf(buf, "%.*s\n", (int)sizeof(ctrl->field), ctrl->field);   \
1372 }                                                                               \
1373 static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
1374
1375 #define nvme_show_int_function(field)                                           \
1376 static ssize_t  field##_show(struct device *dev,                                \
1377                             struct device_attribute *attr, char *buf)           \
1378 {                                                                               \
1379         struct nvme_ctrl *ctrl = dev_get_drvdata(dev);                          \
1380         return sprintf(buf, "%d\n", ctrl->field);       \
1381 }                                                                               \
1382 static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
1383
1384 nvme_show_str_function(model);
1385 nvme_show_str_function(serial);
1386 nvme_show_str_function(firmware_rev);
1387 nvme_show_int_function(cntlid);
1388
1389 static struct attribute *nvme_dev_attrs[] = {
1390         &dev_attr_reset_controller.attr,
1391         &dev_attr_rescan_controller.attr,
1392         &dev_attr_model.attr,
1393         &dev_attr_serial.attr,
1394         &dev_attr_firmware_rev.attr,
1395         &dev_attr_cntlid.attr,
1396         NULL
1397 };
1398
1399 static struct attribute_group nvme_dev_attrs_group = {
1400         .attrs = nvme_dev_attrs,
1401 };
1402
1403 static const struct attribute_group *nvme_dev_attr_groups[] = {
1404         &nvme_dev_attrs_group,
1405         NULL,
1406 };
1407
1408 static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
1409 {
1410         struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
1411         struct nvme_ns *nsb = container_of(b, struct nvme_ns, list);
1412
1413         return nsa->ns_id - nsb->ns_id;
1414 }
1415
1416 static struct nvme_ns *nvme_find_ns(struct nvme_ctrl *ctrl, unsigned nsid)
1417 {
1418         struct nvme_ns *ns;
1419
1420         lockdep_assert_held(&ctrl->namespaces_mutex);
1421
1422         list_for_each_entry(ns, &ctrl->namespaces, list) {
1423                 if (ns->ns_id == nsid)
1424                         return ns;
1425                 if (ns->ns_id > nsid)
1426                         break;
1427         }
1428         return NULL;
1429 }
1430
1431 static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
1432 {
1433         struct nvme_ns *ns;
1434         struct gendisk *disk;
1435         int node = dev_to_node(ctrl->dev);
1436
1437         lockdep_assert_held(&ctrl->namespaces_mutex);
1438
1439         ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
1440         if (!ns)
1441                 return;
1442
1443         ns->instance = ida_simple_get(&ctrl->ns_ida, 1, 0, GFP_KERNEL);
1444         if (ns->instance < 0)
1445                 goto out_free_ns;
1446
1447         ns->queue = blk_mq_init_queue(ctrl->tagset);
1448         if (IS_ERR(ns->queue))
1449                 goto out_release_instance;
1450         queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
1451         ns->queue->queuedata = ns;
1452         ns->ctrl = ctrl;
1453
1454         disk = alloc_disk_node(0, node);
1455         if (!disk)
1456                 goto out_free_queue;
1457
1458         kref_init(&ns->kref);
1459         ns->ns_id = nsid;
1460         ns->disk = disk;
1461         ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */
1462
1463
1464         blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
1465         nvme_set_queue_limits(ctrl, ns->queue);
1466
1467         disk->major = nvme_major;
1468         disk->first_minor = 0;
1469         disk->fops = &nvme_fops;
1470         disk->private_data = ns;
1471         disk->queue = ns->queue;
1472         disk->driverfs_dev = ctrl->device;
1473         disk->flags = GENHD_FL_EXT_DEVT;
1474         sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, ns->instance);
1475
1476         if (nvme_revalidate_disk(ns->disk))
1477                 goto out_free_disk;
1478
1479         list_add_tail_rcu(&ns->list, &ctrl->namespaces);
1480         kref_get(&ctrl->kref);
1481         if (ns->type == NVME_NS_LIGHTNVM)
1482                 return;
1483
1484         add_disk(ns->disk);
1485         if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
1486                                         &nvme_ns_attr_group))
1487                 pr_warn("%s: failed to create sysfs group for identification\n",
1488                         ns->disk->disk_name);
1489         return;
1490  out_free_disk:
1491         kfree(disk);
1492  out_free_queue:
1493         blk_cleanup_queue(ns->queue);
1494  out_release_instance:
1495         ida_simple_remove(&ctrl->ns_ida, ns->instance);
1496  out_free_ns:
1497         kfree(ns);
1498 }
1499
1500 static void nvme_ns_remove(struct nvme_ns *ns)
1501 {
1502         lockdep_assert_held(&ns->ctrl->namespaces_mutex);
1503
1504         if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags))
1505                 return;
1506
1507         if (ns->disk->flags & GENHD_FL_UP) {
1508                 if (blk_get_integrity(ns->disk))
1509                         blk_integrity_unregister(ns->disk);
1510                 sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
1511                                         &nvme_ns_attr_group);
1512                 del_gendisk(ns->disk);
1513                 blk_mq_abort_requeue_list(ns->queue);
1514                 blk_cleanup_queue(ns->queue);
1515         }
1516         list_del_init(&ns->list);
1517         synchronize_rcu();
1518         nvme_put_ns(ns);
1519 }
1520
1521 static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid)
1522 {
1523         struct nvme_ns *ns;
1524
1525         ns = nvme_find_ns(ctrl, nsid);
1526         if (ns) {
1527                 if (revalidate_disk(ns->disk))
1528                         nvme_ns_remove(ns);
1529         } else
1530                 nvme_alloc_ns(ctrl, nsid);
1531 }
1532
1533 static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
1534                                         unsigned nsid)
1535 {
1536         struct nvme_ns *ns, *next;
1537
1538         list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) {
1539                 if (ns->ns_id > nsid)
1540                         nvme_ns_remove(ns);
1541         }
1542 }
1543
1544 static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
1545 {
1546         struct nvme_ns *ns;
1547         __le32 *ns_list;
1548         unsigned i, j, nsid, prev = 0, num_lists = DIV_ROUND_UP(nn, 1024);
1549         int ret = 0;
1550
1551         ns_list = kzalloc(0x1000, GFP_KERNEL);
1552         if (!ns_list)
1553                 return -ENOMEM;
1554
1555         for (i = 0; i < num_lists; i++) {
1556                 ret = nvme_identify_ns_list(ctrl, prev, ns_list);
1557                 if (ret)
1558                         goto free;
1559
1560                 for (j = 0; j < min(nn, 1024U); j++) {
1561                         nsid = le32_to_cpu(ns_list[j]);
1562                         if (!nsid)
1563                                 goto out;
1564
1565                         nvme_validate_ns(ctrl, nsid);
1566
1567                         while (++prev < nsid) {
1568                                 ns = nvme_find_ns(ctrl, prev);
1569                                 if (ns)
1570                                         nvme_ns_remove(ns);
1571                         }
1572                 }
1573                 nn -= j;
1574         }
1575  out:
1576         nvme_remove_invalid_namespaces(ctrl, prev);
1577  free:
1578         kfree(ns_list);
1579         return ret;
1580 }
1581
1582 static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl, unsigned nn)
1583 {
1584         unsigned i;
1585
1586         lockdep_assert_held(&ctrl->namespaces_mutex);
1587
1588         for (i = 1; i <= nn; i++)
1589                 nvme_validate_ns(ctrl, i);
1590
1591         nvme_remove_invalid_namespaces(ctrl, nn);
1592 }
1593
1594 static void nvme_scan_work(struct work_struct *work)
1595 {
1596         struct nvme_ctrl *ctrl =
1597                 container_of(work, struct nvme_ctrl, scan_work);
1598         struct nvme_id_ctrl *id;
1599         unsigned nn;
1600
1601         if (ctrl->state != NVME_CTRL_LIVE)
1602                 return;
1603
1604         if (nvme_identify_ctrl(ctrl, &id))
1605                 return;
1606
1607         mutex_lock(&ctrl->namespaces_mutex);
1608         nn = le32_to_cpu(id->nn);
1609         if (ctrl->vs >= NVME_VS(1, 1) &&
1610             !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
1611                 if (!nvme_scan_ns_list(ctrl, nn))
1612                         goto done;
1613         }
1614         nvme_scan_ns_sequential(ctrl, nn);
1615  done:
1616         list_sort(NULL, &ctrl->namespaces, ns_cmp);
1617         mutex_unlock(&ctrl->namespaces_mutex);
1618         kfree(id);
1619
1620         if (ctrl->ops->post_scan)
1621                 ctrl->ops->post_scan(ctrl);
1622 }
1623
1624 void nvme_queue_scan(struct nvme_ctrl *ctrl)
1625 {
1626         /*
1627          * Do not queue new scan work when a controller is reset during
1628          * removal.
1629          */
1630         if (ctrl->state == NVME_CTRL_LIVE)
1631                 schedule_work(&ctrl->scan_work);
1632 }
1633 EXPORT_SYMBOL_GPL(nvme_queue_scan);
1634
1635 void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
1636 {
1637         struct nvme_ns *ns, *next;
1638
1639         /*
1640          * The dead states indicates the controller was not gracefully
1641          * disconnected. In that case, we won't be able to flush any data while
1642          * removing the namespaces' disks; fail all the queues now to avoid
1643          * potentially having to clean up the failed sync later.
1644          */
1645         if (ctrl->state == NVME_CTRL_DEAD)
1646                 nvme_kill_queues(ctrl);
1647
1648         mutex_lock(&ctrl->namespaces_mutex);
1649         list_for_each_entry_safe(ns, next, &ctrl->namespaces, list)
1650                 nvme_ns_remove(ns);
1651         mutex_unlock(&ctrl->namespaces_mutex);
1652 }
1653 EXPORT_SYMBOL_GPL(nvme_remove_namespaces);
1654
1655 static void nvme_async_event_work(struct work_struct *work)
1656 {
1657         struct nvme_ctrl *ctrl =
1658                 container_of(work, struct nvme_ctrl, async_event_work);
1659
1660         spin_lock_irq(&ctrl->lock);
1661         while (ctrl->event_limit > 0) {
1662                 int aer_idx = --ctrl->event_limit;
1663
1664                 spin_unlock_irq(&ctrl->lock);
1665                 ctrl->ops->submit_async_event(ctrl, aer_idx);
1666                 spin_lock_irq(&ctrl->lock);
1667         }
1668         spin_unlock_irq(&ctrl->lock);
1669 }
1670
1671 void nvme_complete_async_event(struct nvme_ctrl *ctrl,
1672                 struct nvme_completion *cqe)
1673 {
1674         u16 status = le16_to_cpu(cqe->status) >> 1;
1675         u32 result = le32_to_cpu(cqe->result);
1676
1677         if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ) {
1678                 ++ctrl->event_limit;
1679                 schedule_work(&ctrl->async_event_work);
1680         }
1681
1682         if (status != NVME_SC_SUCCESS)
1683                 return;
1684
1685         switch (result & 0xff07) {
1686         case NVME_AER_NOTICE_NS_CHANGED:
1687                 dev_info(ctrl->device, "rescanning\n");
1688                 nvme_queue_scan(ctrl);
1689                 break;
1690         default:
1691                 dev_warn(ctrl->device, "async event result %08x\n", result);
1692         }
1693 }
1694 EXPORT_SYMBOL_GPL(nvme_complete_async_event);
1695
1696 void nvme_queue_async_events(struct nvme_ctrl *ctrl)
1697 {
1698         ctrl->event_limit = NVME_NR_AERS;
1699         schedule_work(&ctrl->async_event_work);
1700 }
1701 EXPORT_SYMBOL_GPL(nvme_queue_async_events);
1702
1703 static DEFINE_IDA(nvme_instance_ida);
1704
1705 static int nvme_set_instance(struct nvme_ctrl *ctrl)
1706 {
1707         int instance, error;
1708
1709         do {
1710                 if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL))
1711                         return -ENODEV;
1712
1713                 spin_lock(&dev_list_lock);
1714                 error = ida_get_new(&nvme_instance_ida, &instance);
1715                 spin_unlock(&dev_list_lock);
1716         } while (error == -EAGAIN);
1717
1718         if (error)
1719                 return -ENODEV;
1720
1721         ctrl->instance = instance;
1722         return 0;
1723 }
1724
1725 static void nvme_release_instance(struct nvme_ctrl *ctrl)
1726 {
1727         spin_lock(&dev_list_lock);
1728         ida_remove(&nvme_instance_ida, ctrl->instance);
1729         spin_unlock(&dev_list_lock);
1730 }
1731
1732 void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
1733 {
1734         flush_work(&ctrl->async_event_work);
1735         flush_work(&ctrl->scan_work);
1736         nvme_remove_namespaces(ctrl);
1737
1738         device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance));
1739
1740         spin_lock(&dev_list_lock);
1741         list_del(&ctrl->node);
1742         spin_unlock(&dev_list_lock);
1743 }
1744 EXPORT_SYMBOL_GPL(nvme_uninit_ctrl);
1745
1746 static void nvme_free_ctrl(struct kref *kref)
1747 {
1748         struct nvme_ctrl *ctrl = container_of(kref, struct nvme_ctrl, kref);
1749
1750         put_device(ctrl->device);
1751         nvme_release_instance(ctrl);
1752         ida_destroy(&ctrl->ns_ida);
1753
1754         ctrl->ops->free_ctrl(ctrl);
1755 }
1756
1757 void nvme_put_ctrl(struct nvme_ctrl *ctrl)
1758 {
1759         kref_put(&ctrl->kref, nvme_free_ctrl);
1760 }
1761 EXPORT_SYMBOL_GPL(nvme_put_ctrl);
1762
1763 /*
1764  * Initialize a NVMe controller structures.  This needs to be called during
1765  * earliest initialization so that we have the initialized structured around
1766  * during probing.
1767  */
1768 int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
1769                 const struct nvme_ctrl_ops *ops, unsigned long quirks)
1770 {
1771         int ret;
1772
1773         ctrl->state = NVME_CTRL_NEW;
1774         spin_lock_init(&ctrl->lock);
1775         INIT_LIST_HEAD(&ctrl->namespaces);
1776         mutex_init(&ctrl->namespaces_mutex);
1777         kref_init(&ctrl->kref);
1778         ctrl->dev = dev;
1779         ctrl->ops = ops;
1780         ctrl->quirks = quirks;
1781         INIT_WORK(&ctrl->scan_work, nvme_scan_work);
1782         INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
1783
1784         ret = nvme_set_instance(ctrl);
1785         if (ret)
1786                 goto out;
1787
1788         ctrl->device = device_create_with_groups(nvme_class, ctrl->dev,
1789                                 MKDEV(nvme_char_major, ctrl->instance),
1790                                 ctrl, nvme_dev_attr_groups,
1791                                 "nvme%d", ctrl->instance);
1792         if (IS_ERR(ctrl->device)) {
1793                 ret = PTR_ERR(ctrl->device);
1794                 goto out_release_instance;
1795         }
1796         get_device(ctrl->device);
1797         ida_init(&ctrl->ns_ida);
1798
1799         spin_lock(&dev_list_lock);
1800         list_add_tail(&ctrl->node, &nvme_ctrl_list);
1801         spin_unlock(&dev_list_lock);
1802
1803         return 0;
1804 out_release_instance:
1805         nvme_release_instance(ctrl);
1806 out:
1807         return ret;
1808 }
1809 EXPORT_SYMBOL_GPL(nvme_init_ctrl);
1810
1811 /**
1812  * nvme_kill_queues(): Ends all namespace queues
1813  * @ctrl: the dead controller that needs to end
1814  *
1815  * Call this function when the driver determines it is unable to get the
1816  * controller in a state capable of servicing IO.
1817  */
1818 void nvme_kill_queues(struct nvme_ctrl *ctrl)
1819 {
1820         struct nvme_ns *ns;
1821
1822         rcu_read_lock();
1823         list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
1824                 if (!kref_get_unless_zero(&ns->kref))
1825                         continue;
1826
1827                 /*
1828                  * Revalidating a dead namespace sets capacity to 0. This will
1829                  * end buffered writers dirtying pages that can't be synced.
1830                  */
1831                 if (!test_and_set_bit(NVME_NS_DEAD, &ns->flags))
1832                         revalidate_disk(ns->disk);
1833
1834                 blk_set_queue_dying(ns->queue);
1835                 blk_mq_abort_requeue_list(ns->queue);
1836                 blk_mq_start_stopped_hw_queues(ns->queue, true);
1837
1838                 nvme_put_ns(ns);
1839         }
1840         rcu_read_unlock();
1841 }
1842 EXPORT_SYMBOL_GPL(nvme_kill_queues);
1843
1844 void nvme_stop_queues(struct nvme_ctrl *ctrl)
1845 {
1846         struct nvme_ns *ns;
1847
1848         rcu_read_lock();
1849         list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
1850                 spin_lock_irq(ns->queue->queue_lock);
1851                 queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue);
1852                 spin_unlock_irq(ns->queue->queue_lock);
1853
1854                 blk_mq_cancel_requeue_work(ns->queue);
1855                 blk_mq_stop_hw_queues(ns->queue);
1856         }
1857         rcu_read_unlock();
1858 }
1859 EXPORT_SYMBOL_GPL(nvme_stop_queues);
1860
1861 void nvme_start_queues(struct nvme_ctrl *ctrl)
1862 {
1863         struct nvme_ns *ns;
1864
1865         rcu_read_lock();
1866         list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
1867                 queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue);
1868                 blk_mq_start_stopped_hw_queues(ns->queue, true);
1869                 blk_mq_kick_requeue_list(ns->queue);
1870         }
1871         rcu_read_unlock();
1872 }
1873 EXPORT_SYMBOL_GPL(nvme_start_queues);
1874
1875 int __init nvme_core_init(void)
1876 {
1877         int result;
1878
1879         result = register_blkdev(nvme_major, "nvme");
1880         if (result < 0)
1881                 return result;
1882         else if (result > 0)
1883                 nvme_major = result;
1884
1885         result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme",
1886                                                         &nvme_dev_fops);
1887         if (result < 0)
1888                 goto unregister_blkdev;
1889         else if (result > 0)
1890                 nvme_char_major = result;
1891
1892         nvme_class = class_create(THIS_MODULE, "nvme");
1893         if (IS_ERR(nvme_class)) {
1894                 result = PTR_ERR(nvme_class);
1895                 goto unregister_chrdev;
1896         }
1897
1898         return 0;
1899
1900  unregister_chrdev:
1901         __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
1902  unregister_blkdev:
1903         unregister_blkdev(nvme_major, "nvme");
1904         return result;
1905 }
1906
1907 void nvme_core_exit(void)
1908 {
1909         class_destroy(nvme_class);
1910         __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
1911         unregister_blkdev(nvme_major, "nvme");
1912 }
1913
1914 MODULE_LICENSE("GPL");
1915 MODULE_VERSION("1.0");
1916 module_init(nvme_core_init);
1917 module_exit(nvme_core_exit);