libnvdimm, region: update nd_region_available_dpa() for multi-pmem support
[cascardo/linux.git] / drivers / nvdimm / dimm_devs.c
1 /*
2  * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of version 2 of the GNU General Public License as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  */
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14 #include <linux/vmalloc.h>
15 #include <linux/device.h>
16 #include <linux/ndctl.h>
17 #include <linux/slab.h>
18 #include <linux/io.h>
19 #include <linux/fs.h>
20 #include <linux/mm.h>
21 #include "nd-core.h"
22 #include "label.h"
23 #include "nd.h"
24
25 static DEFINE_IDA(dimm_ida);
26
27 /*
28  * Retrieve bus and dimm handle and return if this bus supports
29  * get_config_data commands
30  */
31 int nvdimm_check_config_data(struct device *dev)
32 {
33         struct nvdimm *nvdimm = to_nvdimm(dev);
34
35         if (!nvdimm->cmd_mask ||
36             !test_bit(ND_CMD_GET_CONFIG_DATA, &nvdimm->cmd_mask)) {
37                 if (nvdimm->flags & NDD_ALIASING)
38                         return -ENXIO;
39                 else
40                         return -ENOTTY;
41         }
42
43         return 0;
44 }
45
46 static int validate_dimm(struct nvdimm_drvdata *ndd)
47 {
48         int rc;
49
50         if (!ndd)
51                 return -EINVAL;
52
53         rc = nvdimm_check_config_data(ndd->dev);
54         if (rc)
55                 dev_dbg(ndd->dev, "%pf: %s error: %d\n",
56                                 __builtin_return_address(0), __func__, rc);
57         return rc;
58 }
59
60 /**
61  * nvdimm_init_nsarea - determine the geometry of a dimm's namespace area
62  * @nvdimm: dimm to initialize
63  */
64 int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd)
65 {
66         struct nd_cmd_get_config_size *cmd = &ndd->nsarea;
67         struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev);
68         struct nvdimm_bus_descriptor *nd_desc;
69         int rc = validate_dimm(ndd);
70
71         if (rc)
72                 return rc;
73
74         if (cmd->config_size)
75                 return 0; /* already valid */
76
77         memset(cmd, 0, sizeof(*cmd));
78         nd_desc = nvdimm_bus->nd_desc;
79         return nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev),
80                         ND_CMD_GET_CONFIG_SIZE, cmd, sizeof(*cmd), NULL);
81 }
82
83 int nvdimm_init_config_data(struct nvdimm_drvdata *ndd)
84 {
85         struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev);
86         struct nd_cmd_get_config_data_hdr *cmd;
87         struct nvdimm_bus_descriptor *nd_desc;
88         int rc = validate_dimm(ndd);
89         u32 max_cmd_size, config_size;
90         size_t offset;
91
92         if (rc)
93                 return rc;
94
95         if (ndd->data)
96                 return 0;
97
98         if (ndd->nsarea.status || ndd->nsarea.max_xfer == 0
99                         || ndd->nsarea.config_size < ND_LABEL_MIN_SIZE) {
100                 dev_dbg(ndd->dev, "failed to init config data area: (%d:%d)\n",
101                                 ndd->nsarea.max_xfer, ndd->nsarea.config_size);
102                 return -ENXIO;
103         }
104
105         ndd->data = kmalloc(ndd->nsarea.config_size, GFP_KERNEL);
106         if (!ndd->data)
107                 ndd->data = vmalloc(ndd->nsarea.config_size);
108
109         if (!ndd->data)
110                 return -ENOMEM;
111
112         max_cmd_size = min_t(u32, PAGE_SIZE, ndd->nsarea.max_xfer);
113         cmd = kzalloc(max_cmd_size + sizeof(*cmd), GFP_KERNEL);
114         if (!cmd)
115                 return -ENOMEM;
116
117         nd_desc = nvdimm_bus->nd_desc;
118         for (config_size = ndd->nsarea.config_size, offset = 0;
119                         config_size; config_size -= cmd->in_length,
120                         offset += cmd->in_length) {
121                 cmd->in_length = min(config_size, max_cmd_size);
122                 cmd->in_offset = offset;
123                 rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev),
124                                 ND_CMD_GET_CONFIG_DATA, cmd,
125                                 cmd->in_length + sizeof(*cmd), NULL);
126                 if (rc || cmd->status) {
127                         rc = -ENXIO;
128                         break;
129                 }
130                 memcpy(ndd->data + offset, cmd->out_buf, cmd->in_length);
131         }
132         dev_dbg(ndd->dev, "%s: len: %zu rc: %d\n", __func__, offset, rc);
133         kfree(cmd);
134
135         return rc;
136 }
137
138 int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
139                 void *buf, size_t len)
140 {
141         int rc = validate_dimm(ndd);
142         size_t max_cmd_size, buf_offset;
143         struct nd_cmd_set_config_hdr *cmd;
144         struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev);
145         struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
146
147         if (rc)
148                 return rc;
149
150         if (!ndd->data)
151                 return -ENXIO;
152
153         if (offset + len > ndd->nsarea.config_size)
154                 return -ENXIO;
155
156         max_cmd_size = min_t(u32, PAGE_SIZE, len);
157         max_cmd_size = min_t(u32, max_cmd_size, ndd->nsarea.max_xfer);
158         cmd = kzalloc(max_cmd_size + sizeof(*cmd) + sizeof(u32), GFP_KERNEL);
159         if (!cmd)
160                 return -ENOMEM;
161
162         for (buf_offset = 0; len; len -= cmd->in_length,
163                         buf_offset += cmd->in_length) {
164                 size_t cmd_size;
165                 u32 *status;
166
167                 cmd->in_offset = offset + buf_offset;
168                 cmd->in_length = min(max_cmd_size, len);
169                 memcpy(cmd->in_buf, buf + buf_offset, cmd->in_length);
170
171                 /* status is output in the last 4-bytes of the command buffer */
172                 cmd_size = sizeof(*cmd) + cmd->in_length + sizeof(u32);
173                 status = ((void *) cmd) + cmd_size - sizeof(u32);
174
175                 rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev),
176                                 ND_CMD_SET_CONFIG_DATA, cmd, cmd_size, NULL);
177                 if (rc || *status) {
178                         rc = rc ? rc : -ENXIO;
179                         break;
180                 }
181         }
182         kfree(cmd);
183
184         return rc;
185 }
186
187 static void nvdimm_release(struct device *dev)
188 {
189         struct nvdimm *nvdimm = to_nvdimm(dev);
190
191         ida_simple_remove(&dimm_ida, nvdimm->id);
192         kfree(nvdimm);
193 }
194
195 static struct device_type nvdimm_device_type = {
196         .name = "nvdimm",
197         .release = nvdimm_release,
198 };
199
200 bool is_nvdimm(struct device *dev)
201 {
202         return dev->type == &nvdimm_device_type;
203 }
204
205 struct nvdimm *to_nvdimm(struct device *dev)
206 {
207         struct nvdimm *nvdimm = container_of(dev, struct nvdimm, dev);
208
209         WARN_ON(!is_nvdimm(dev));
210         return nvdimm;
211 }
212 EXPORT_SYMBOL_GPL(to_nvdimm);
213
214 struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr)
215 {
216         struct nd_region *nd_region = &ndbr->nd_region;
217         struct nd_mapping *nd_mapping = &nd_region->mapping[0];
218
219         return nd_mapping->nvdimm;
220 }
221 EXPORT_SYMBOL_GPL(nd_blk_region_to_dimm);
222
223 struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping)
224 {
225         struct nvdimm *nvdimm = nd_mapping->nvdimm;
226
227         WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm->dev));
228
229         return dev_get_drvdata(&nvdimm->dev);
230 }
231 EXPORT_SYMBOL(to_ndd);
232
233 void nvdimm_drvdata_release(struct kref *kref)
234 {
235         struct nvdimm_drvdata *ndd = container_of(kref, typeof(*ndd), kref);
236         struct device *dev = ndd->dev;
237         struct resource *res, *_r;
238
239         dev_dbg(dev, "%s\n", __func__);
240
241         nvdimm_bus_lock(dev);
242         for_each_dpa_resource_safe(ndd, res, _r)
243                 nvdimm_free_dpa(ndd, res);
244         nvdimm_bus_unlock(dev);
245
246         kvfree(ndd->data);
247         kfree(ndd);
248         put_device(dev);
249 }
250
251 void get_ndd(struct nvdimm_drvdata *ndd)
252 {
253         kref_get(&ndd->kref);
254 }
255
256 void put_ndd(struct nvdimm_drvdata *ndd)
257 {
258         if (ndd)
259                 kref_put(&ndd->kref, nvdimm_drvdata_release);
260 }
261
262 const char *nvdimm_name(struct nvdimm *nvdimm)
263 {
264         return dev_name(&nvdimm->dev);
265 }
266 EXPORT_SYMBOL_GPL(nvdimm_name);
267
268 struct kobject *nvdimm_kobj(struct nvdimm *nvdimm)
269 {
270         return &nvdimm->dev.kobj;
271 }
272 EXPORT_SYMBOL_GPL(nvdimm_kobj);
273
274 unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm)
275 {
276         return nvdimm->cmd_mask;
277 }
278 EXPORT_SYMBOL_GPL(nvdimm_cmd_mask);
279
280 void *nvdimm_provider_data(struct nvdimm *nvdimm)
281 {
282         if (nvdimm)
283                 return nvdimm->provider_data;
284         return NULL;
285 }
286 EXPORT_SYMBOL_GPL(nvdimm_provider_data);
287
288 static ssize_t commands_show(struct device *dev,
289                 struct device_attribute *attr, char *buf)
290 {
291         struct nvdimm *nvdimm = to_nvdimm(dev);
292         int cmd, len = 0;
293
294         if (!nvdimm->cmd_mask)
295                 return sprintf(buf, "\n");
296
297         for_each_set_bit(cmd, &nvdimm->cmd_mask, BITS_PER_LONG)
298                 len += sprintf(buf + len, "%s ", nvdimm_cmd_name(cmd));
299         len += sprintf(buf + len, "\n");
300         return len;
301 }
302 static DEVICE_ATTR_RO(commands);
303
304 static ssize_t state_show(struct device *dev, struct device_attribute *attr,
305                 char *buf)
306 {
307         struct nvdimm *nvdimm = to_nvdimm(dev);
308
309         /*
310          * The state may be in the process of changing, userspace should
311          * quiesce probing if it wants a static answer
312          */
313         nvdimm_bus_lock(dev);
314         nvdimm_bus_unlock(dev);
315         return sprintf(buf, "%s\n", atomic_read(&nvdimm->busy)
316                         ? "active" : "idle");
317 }
318 static DEVICE_ATTR_RO(state);
319
320 static ssize_t available_slots_show(struct device *dev,
321                 struct device_attribute *attr, char *buf)
322 {
323         struct nvdimm_drvdata *ndd = dev_get_drvdata(dev);
324         ssize_t rc;
325         u32 nfree;
326
327         if (!ndd)
328                 return -ENXIO;
329
330         nvdimm_bus_lock(dev);
331         nfree = nd_label_nfree(ndd);
332         if (nfree - 1 > nfree) {
333                 dev_WARN_ONCE(dev, 1, "we ate our last label?\n");
334                 nfree = 0;
335         } else
336                 nfree--;
337         rc = sprintf(buf, "%d\n", nfree);
338         nvdimm_bus_unlock(dev);
339         return rc;
340 }
341 static DEVICE_ATTR_RO(available_slots);
342
343 static struct attribute *nvdimm_attributes[] = {
344         &dev_attr_state.attr,
345         &dev_attr_commands.attr,
346         &dev_attr_available_slots.attr,
347         NULL,
348 };
349
350 struct attribute_group nvdimm_attribute_group = {
351         .attrs = nvdimm_attributes,
352 };
353 EXPORT_SYMBOL_GPL(nvdimm_attribute_group);
354
355 struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
356                 const struct attribute_group **groups, unsigned long flags,
357                 unsigned long cmd_mask, int num_flush,
358                 struct resource *flush_wpq)
359 {
360         struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL);
361         struct device *dev;
362
363         if (!nvdimm)
364                 return NULL;
365
366         nvdimm->id = ida_simple_get(&dimm_ida, 0, 0, GFP_KERNEL);
367         if (nvdimm->id < 0) {
368                 kfree(nvdimm);
369                 return NULL;
370         }
371         nvdimm->provider_data = provider_data;
372         nvdimm->flags = flags;
373         nvdimm->cmd_mask = cmd_mask;
374         nvdimm->num_flush = num_flush;
375         nvdimm->flush_wpq = flush_wpq;
376         atomic_set(&nvdimm->busy, 0);
377         dev = &nvdimm->dev;
378         dev_set_name(dev, "nmem%d", nvdimm->id);
379         dev->parent = &nvdimm_bus->dev;
380         dev->type = &nvdimm_device_type;
381         dev->devt = MKDEV(nvdimm_major, nvdimm->id);
382         dev->groups = groups;
383         nd_device_register(dev);
384
385         return nvdimm;
386 }
387 EXPORT_SYMBOL_GPL(nvdimm_create);
388
389 struct blk_alloc_info {
390         struct nd_mapping *nd_mapping;
391         resource_size_t available, busy;
392         struct resource *res;
393 };
394
395 static int alias_dpa_busy(struct device *dev, void *data)
396 {
397         resource_size_t map_end, blk_start, new, busy;
398         struct blk_alloc_info *info = data;
399         struct nd_mapping *nd_mapping;
400         struct nd_region *nd_region;
401         struct nvdimm_drvdata *ndd;
402         struct resource *res;
403         int i;
404
405         if (!is_nd_pmem(dev))
406                 return 0;
407
408         nd_region = to_nd_region(dev);
409         for (i = 0; i < nd_region->ndr_mappings; i++) {
410                 nd_mapping  = &nd_region->mapping[i];
411                 if (nd_mapping->nvdimm == info->nd_mapping->nvdimm)
412                         break;
413         }
414
415         if (i >= nd_region->ndr_mappings)
416                 return 0;
417
418         ndd = to_ndd(nd_mapping);
419         map_end = nd_mapping->start + nd_mapping->size - 1;
420         blk_start = nd_mapping->start;
421  retry:
422         /*
423          * Find the free dpa from the end of the last pmem allocation to
424          * the end of the interleave-set mapping that is not already
425          * covered by a blk allocation.
426          */
427         busy = 0;
428         for_each_dpa_resource(ndd, res) {
429                 if ((res->start >= blk_start && res->start < map_end)
430                                 || (res->end >= blk_start
431                                         && res->end <= map_end)) {
432                         if (strncmp(res->name, "pmem", 4) == 0) {
433                                 new = max(blk_start, min(map_end + 1,
434                                                         res->end + 1));
435                                 if (new != blk_start) {
436                                         blk_start = new;
437                                         goto retry;
438                                 }
439                         } else
440                                 busy += min(map_end, res->end)
441                                         - max(nd_mapping->start, res->start) + 1;
442                 } else if (nd_mapping->start > res->start
443                                 && map_end < res->end) {
444                         /* total eclipse of the PMEM region mapping */
445                         busy += nd_mapping->size;
446                         break;
447                 }
448         }
449
450         info->available -= blk_start - nd_mapping->start + busy;
451         return 0;
452 }
453
454 static int blk_dpa_busy(struct device *dev, void *data)
455 {
456         struct blk_alloc_info *info = data;
457         struct nd_mapping *nd_mapping;
458         struct nd_region *nd_region;
459         resource_size_t map_end;
460         int i;
461
462         if (!is_nd_pmem(dev))
463                 return 0;
464
465         nd_region = to_nd_region(dev);
466         for (i = 0; i < nd_region->ndr_mappings; i++) {
467                 nd_mapping  = &nd_region->mapping[i];
468                 if (nd_mapping->nvdimm == info->nd_mapping->nvdimm)
469                         break;
470         }
471
472         if (i >= nd_region->ndr_mappings)
473                 return 0;
474
475         map_end = nd_mapping->start + nd_mapping->size - 1;
476         if (info->res->start >= nd_mapping->start
477                         && info->res->start < map_end) {
478                 if (info->res->end <= map_end) {
479                         info->busy = 0;
480                         return 1;
481                 } else {
482                         info->busy -= info->res->end - map_end;
483                         return 0;
484                 }
485         } else if (info->res->end >= nd_mapping->start
486                         && info->res->end <= map_end) {
487                 info->busy -= nd_mapping->start - info->res->start;
488                 return 0;
489         } else {
490                 info->busy -= nd_mapping->size;
491                 return 0;
492         }
493 }
494
495 /**
496  * nd_blk_available_dpa - account the unused dpa of BLK region
497  * @nd_mapping: container of dpa-resource-root + labels
498  *
499  * Unlike PMEM, BLK namespaces can occupy discontiguous DPA ranges, but
500  * we arrange for them to never start at an lower dpa than the last
501  * PMEM allocation in an aliased region.
502  */
503 resource_size_t nd_blk_available_dpa(struct nd_region *nd_region)
504 {
505         struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
506         struct nd_mapping *nd_mapping = &nd_region->mapping[0];
507         struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
508         struct blk_alloc_info info = {
509                 .nd_mapping = nd_mapping,
510                 .available = nd_mapping->size,
511         };
512         struct resource *res;
513
514         if (!ndd)
515                 return 0;
516
517         device_for_each_child(&nvdimm_bus->dev, &info, alias_dpa_busy);
518
519         /* now account for busy blk allocations in unaliased dpa */
520         for_each_dpa_resource(ndd, res) {
521                 if (strncmp(res->name, "blk", 3) != 0)
522                         continue;
523
524                 info.res = res;
525                 info.busy = resource_size(res);
526                 device_for_each_child(&nvdimm_bus->dev, &info, blk_dpa_busy);
527                 info.available -= info.busy;
528         }
529
530         return info.available;
531 }
532
533 /**
534  * nd_pmem_available_dpa - for the given dimm+region account unallocated dpa
535  * @nd_mapping: container of dpa-resource-root + labels
536  * @nd_region: constrain available space check to this reference region
537  * @overlap: calculate available space assuming this level of overlap
538  *
539  * Validate that a PMEM label, if present, aligns with the start of an
540  * interleave set and truncate the available size at the lowest BLK
541  * overlap point.
542  *
543  * The expectation is that this routine is called multiple times as it
544  * probes for the largest BLK encroachment for any single member DIMM of
545  * the interleave set.  Once that value is determined the PMEM-limit for
546  * the set can be established.
547  */
548 resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
549                 struct nd_mapping *nd_mapping, resource_size_t *overlap)
550 {
551         resource_size_t map_start, map_end, busy = 0, available, blk_start;
552         struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
553         struct resource *res;
554         const char *reason;
555
556         if (!ndd)
557                 return 0;
558
559         map_start = nd_mapping->start;
560         map_end = map_start + nd_mapping->size - 1;
561         blk_start = max(map_start, map_end + 1 - *overlap);
562         for_each_dpa_resource(ndd, res) {
563                 if (res->start >= map_start && res->start < map_end) {
564                         if (strncmp(res->name, "blk", 3) == 0)
565                                 blk_start = min(blk_start,
566                                                 max(map_start, res->start));
567                         else if (res->end > map_end) {
568                                 reason = "misaligned to iset";
569                                 goto err;
570                         } else
571                                 busy += resource_size(res);
572                 } else if (res->end >= map_start && res->end <= map_end) {
573                         if (strncmp(res->name, "blk", 3) == 0) {
574                                 /*
575                                  * If a BLK allocation overlaps the start of
576                                  * PMEM the entire interleave set may now only
577                                  * be used for BLK.
578                                  */
579                                 blk_start = map_start;
580                         } else
581                                 busy += resource_size(res);
582                 } else if (map_start > res->start && map_start < res->end) {
583                         /* total eclipse of the mapping */
584                         busy += nd_mapping->size;
585                         blk_start = map_start;
586                 }
587         }
588
589         *overlap = map_end + 1 - blk_start;
590         available = blk_start - map_start;
591         if (busy < available)
592                 return available - busy;
593         return 0;
594
595  err:
596         nd_dbg_dpa(nd_region, ndd, res, "%s\n", reason);
597         return 0;
598 }
599
600 void nvdimm_free_dpa(struct nvdimm_drvdata *ndd, struct resource *res)
601 {
602         WARN_ON_ONCE(!is_nvdimm_bus_locked(ndd->dev));
603         kfree(res->name);
604         __release_region(&ndd->dpa, res->start, resource_size(res));
605 }
606
607 struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd,
608                 struct nd_label_id *label_id, resource_size_t start,
609                 resource_size_t n)
610 {
611         char *name = kmemdup(label_id, sizeof(*label_id), GFP_KERNEL);
612         struct resource *res;
613
614         if (!name)
615                 return NULL;
616
617         WARN_ON_ONCE(!is_nvdimm_bus_locked(ndd->dev));
618         res = __request_region(&ndd->dpa, start, n, name, 0);
619         if (!res)
620                 kfree(name);
621         return res;
622 }
623
624 /**
625  * nvdimm_allocated_dpa - sum up the dpa currently allocated to this label_id
626  * @nvdimm: container of dpa-resource-root + labels
627  * @label_id: dpa resource name of the form {pmem|blk}-<human readable uuid>
628  */
629 resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd,
630                 struct nd_label_id *label_id)
631 {
632         resource_size_t allocated = 0;
633         struct resource *res;
634
635         for_each_dpa_resource(ndd, res)
636                 if (strcmp(res->name, label_id->id) == 0)
637                         allocated += resource_size(res);
638
639         return allocated;
640 }
641
642 static int count_dimms(struct device *dev, void *c)
643 {
644         int *count = c;
645
646         if (is_nvdimm(dev))
647                 (*count)++;
648         return 0;
649 }
650
651 int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count)
652 {
653         int count = 0;
654         /* Flush any possible dimm registration failures */
655         nd_synchronize();
656
657         device_for_each_child(&nvdimm_bus->dev, &count, count_dimms);
658         dev_dbg(&nvdimm_bus->dev, "%s: count: %d\n", __func__, count);
659         if (count != dimm_count)
660                 return -ENXIO;
661         return 0;
662 }
663 EXPORT_SYMBOL_GPL(nvdimm_bus_check_dimm_count);
664
665 void __exit nvdimm_devs_exit(void)
666 {
667         ida_destroy(&dimm_ida);
668 }