nfit: do an ARS scrub on hitting a latent media error
[cascardo/linux.git] / drivers / acpi / nfit / core.c
1 /*
2  * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of version 2 of the GNU General Public License as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  */
13 #include <linux/list_sort.h>
14 #include <linux/libnvdimm.h>
15 #include <linux/module.h>
16 #include <linux/mutex.h>
17 #include <linux/ndctl.h>
18 #include <linux/sysfs.h>
19 #include <linux/delay.h>
20 #include <linux/list.h>
21 #include <linux/acpi.h>
22 #include <linux/sort.h>
23 #include <linux/pmem.h>
24 #include <linux/io.h>
25 #include <linux/nd.h>
26 #include <asm/cacheflush.h>
27 #include "nfit.h"
28
29 /*
30  * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is
31  * irrelevant.
32  */
33 #include <linux/io-64-nonatomic-hi-lo.h>
34
35 static bool force_enable_dimms;
36 module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR);
37 MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status");
38
39 static unsigned int scrub_timeout = NFIT_ARS_TIMEOUT;
40 module_param(scrub_timeout, uint, S_IRUGO|S_IWUSR);
41 MODULE_PARM_DESC(scrub_timeout, "Initial scrub timeout in seconds");
42
43 /* after three payloads of overflow, it's dead jim */
44 static unsigned int scrub_overflow_abort = 3;
45 module_param(scrub_overflow_abort, uint, S_IRUGO|S_IWUSR);
46 MODULE_PARM_DESC(scrub_overflow_abort,
47                 "Number of times we overflow ARS results before abort");
48
49 static bool disable_vendor_specific;
50 module_param(disable_vendor_specific, bool, S_IRUGO);
51 MODULE_PARM_DESC(disable_vendor_specific,
52                 "Limit commands to the publicly specified set\n");
53
54 LIST_HEAD(acpi_descs);
55 DEFINE_MUTEX(acpi_desc_lock);
56
57 static struct workqueue_struct *nfit_wq;
58
59 struct nfit_table_prev {
60         struct list_head spas;
61         struct list_head memdevs;
62         struct list_head dcrs;
63         struct list_head bdws;
64         struct list_head idts;
65         struct list_head flushes;
66 };
67
68 static u8 nfit_uuid[NFIT_UUID_MAX][16];
69
70 const u8 *to_nfit_uuid(enum nfit_uuids id)
71 {
72         return nfit_uuid[id];
73 }
74 EXPORT_SYMBOL(to_nfit_uuid);
75
76 static struct acpi_nfit_desc *to_acpi_nfit_desc(
77                 struct nvdimm_bus_descriptor *nd_desc)
78 {
79         return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
80 }
81
82 static struct acpi_device *to_acpi_dev(struct acpi_nfit_desc *acpi_desc)
83 {
84         struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
85
86         /*
87          * If provider == 'ACPI.NFIT' we can assume 'dev' is a struct
88          * acpi_device.
89          */
90         if (!nd_desc->provider_name
91                         || strcmp(nd_desc->provider_name, "ACPI.NFIT") != 0)
92                 return NULL;
93
94         return to_acpi_device(acpi_desc->dev);
95 }
96
97 static int xlat_status(void *buf, unsigned int cmd)
98 {
99         struct nd_cmd_clear_error *clear_err;
100         struct nd_cmd_ars_status *ars_status;
101         struct nd_cmd_ars_start *ars_start;
102         struct nd_cmd_ars_cap *ars_cap;
103         u16 flags;
104
105         switch (cmd) {
106         case ND_CMD_ARS_CAP:
107                 ars_cap = buf;
108                 if ((ars_cap->status & 0xffff) == NFIT_ARS_CAP_NONE)
109                         return -ENOTTY;
110
111                 /* Command failed */
112                 if (ars_cap->status & 0xffff)
113                         return -EIO;
114
115                 /* No supported scan types for this range */
116                 flags = ND_ARS_PERSISTENT | ND_ARS_VOLATILE;
117                 if ((ars_cap->status >> 16 & flags) == 0)
118                         return -ENOTTY;
119                 break;
120         case ND_CMD_ARS_START:
121                 ars_start = buf;
122                 /* ARS is in progress */
123                 if ((ars_start->status & 0xffff) == NFIT_ARS_START_BUSY)
124                         return -EBUSY;
125
126                 /* Command failed */
127                 if (ars_start->status & 0xffff)
128                         return -EIO;
129                 break;
130         case ND_CMD_ARS_STATUS:
131                 ars_status = buf;
132                 /* Command failed */
133                 if (ars_status->status & 0xffff)
134                         return -EIO;
135                 /* Check extended status (Upper two bytes) */
136                 if (ars_status->status == NFIT_ARS_STATUS_DONE)
137                         return 0;
138
139                 /* ARS is in progress */
140                 if (ars_status->status == NFIT_ARS_STATUS_BUSY)
141                         return -EBUSY;
142
143                 /* No ARS performed for the current boot */
144                 if (ars_status->status == NFIT_ARS_STATUS_NONE)
145                         return -EAGAIN;
146
147                 /*
148                  * ARS interrupted, either we overflowed or some other
149                  * agent wants the scan to stop.  If we didn't overflow
150                  * then just continue with the returned results.
151                  */
152                 if (ars_status->status == NFIT_ARS_STATUS_INTR) {
153                         if (ars_status->flags & NFIT_ARS_F_OVERFLOW)
154                                 return -ENOSPC;
155                         return 0;
156                 }
157
158                 /* Unknown status */
159                 if (ars_status->status >> 16)
160                         return -EIO;
161                 break;
162         case ND_CMD_CLEAR_ERROR:
163                 clear_err = buf;
164                 if (clear_err->status & 0xffff)
165                         return -EIO;
166                 if (!clear_err->cleared)
167                         return -EIO;
168                 if (clear_err->length > clear_err->cleared)
169                         return clear_err->cleared;
170                 break;
171         default:
172                 break;
173         }
174
175         return 0;
176 }
177
178 static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc,
179                 struct nvdimm *nvdimm, unsigned int cmd, void *buf,
180                 unsigned int buf_len, int *cmd_rc)
181 {
182         struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
183         union acpi_object in_obj, in_buf, *out_obj;
184         const struct nd_cmd_desc *desc = NULL;
185         struct device *dev = acpi_desc->dev;
186         struct nd_cmd_pkg *call_pkg = NULL;
187         const char *cmd_name, *dimm_name;
188         unsigned long cmd_mask, dsm_mask;
189         acpi_handle handle;
190         unsigned int func;
191         const u8 *uuid;
192         u32 offset;
193         int rc, i;
194
195         func = cmd;
196         if (cmd == ND_CMD_CALL) {
197                 call_pkg = buf;
198                 func = call_pkg->nd_command;
199         }
200
201         if (nvdimm) {
202                 struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
203                 struct acpi_device *adev = nfit_mem->adev;
204
205                 if (!adev)
206                         return -ENOTTY;
207                 if (call_pkg && nfit_mem->family != call_pkg->nd_family)
208                         return -ENOTTY;
209
210                 dimm_name = nvdimm_name(nvdimm);
211                 cmd_name = nvdimm_cmd_name(cmd);
212                 cmd_mask = nvdimm_cmd_mask(nvdimm);
213                 dsm_mask = nfit_mem->dsm_mask;
214                 desc = nd_cmd_dimm_desc(cmd);
215                 uuid = to_nfit_uuid(nfit_mem->family);
216                 handle = adev->handle;
217         } else {
218                 struct acpi_device *adev = to_acpi_dev(acpi_desc);
219
220                 cmd_name = nvdimm_bus_cmd_name(cmd);
221                 cmd_mask = nd_desc->cmd_mask;
222                 dsm_mask = cmd_mask;
223                 desc = nd_cmd_bus_desc(cmd);
224                 uuid = to_nfit_uuid(NFIT_DEV_BUS);
225                 handle = adev->handle;
226                 dimm_name = "bus";
227         }
228
229         if (!desc || (cmd && (desc->out_num + desc->in_num == 0)))
230                 return -ENOTTY;
231
232         if (!test_bit(cmd, &cmd_mask) || !test_bit(func, &dsm_mask))
233                 return -ENOTTY;
234
235         in_obj.type = ACPI_TYPE_PACKAGE;
236         in_obj.package.count = 1;
237         in_obj.package.elements = &in_buf;
238         in_buf.type = ACPI_TYPE_BUFFER;
239         in_buf.buffer.pointer = buf;
240         in_buf.buffer.length = 0;
241
242         /* libnvdimm has already validated the input envelope */
243         for (i = 0; i < desc->in_num; i++)
244                 in_buf.buffer.length += nd_cmd_in_size(nvdimm, cmd, desc,
245                                 i, buf);
246
247         if (call_pkg) {
248                 /* skip over package wrapper */
249                 in_buf.buffer.pointer = (void *) &call_pkg->nd_payload;
250                 in_buf.buffer.length = call_pkg->nd_size_in;
251         }
252
253         if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) {
254                 dev_dbg(dev, "%s:%s cmd: %d: func: %d input length: %d\n",
255                                 __func__, dimm_name, cmd, func,
256                                 in_buf.buffer.length);
257                 print_hex_dump_debug("nvdimm in  ", DUMP_PREFIX_OFFSET, 4, 4,
258                         in_buf.buffer.pointer,
259                         min_t(u32, 256, in_buf.buffer.length), true);
260         }
261
262         out_obj = acpi_evaluate_dsm(handle, uuid, 1, func, &in_obj);
263         if (!out_obj) {
264                 dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name,
265                                 cmd_name);
266                 return -EINVAL;
267         }
268
269         if (call_pkg) {
270                 call_pkg->nd_fw_size = out_obj->buffer.length;
271                 memcpy(call_pkg->nd_payload + call_pkg->nd_size_in,
272                         out_obj->buffer.pointer,
273                         min(call_pkg->nd_fw_size, call_pkg->nd_size_out));
274
275                 ACPI_FREE(out_obj);
276                 /*
277                  * Need to support FW function w/o known size in advance.
278                  * Caller can determine required size based upon nd_fw_size.
279                  * If we return an error (like elsewhere) then caller wouldn't
280                  * be able to rely upon data returned to make calculation.
281                  */
282                 return 0;
283         }
284
285         if (out_obj->package.type != ACPI_TYPE_BUFFER) {
286                 dev_dbg(dev, "%s:%s unexpected output object type cmd: %s type: %d\n",
287                                 __func__, dimm_name, cmd_name, out_obj->type);
288                 rc = -EINVAL;
289                 goto out;
290         }
291
292         if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) {
293                 dev_dbg(dev, "%s:%s cmd: %s output length: %d\n", __func__,
294                                 dimm_name, cmd_name, out_obj->buffer.length);
295                 print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4,
296                                 4, out_obj->buffer.pointer, min_t(u32, 128,
297                                         out_obj->buffer.length), true);
298         }
299
300         for (i = 0, offset = 0; i < desc->out_num; i++) {
301                 u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i, buf,
302                                 (u32 *) out_obj->buffer.pointer);
303
304                 if (offset + out_size > out_obj->buffer.length) {
305                         dev_dbg(dev, "%s:%s output object underflow cmd: %s field: %d\n",
306                                         __func__, dimm_name, cmd_name, i);
307                         break;
308                 }
309
310                 if (in_buf.buffer.length + offset + out_size > buf_len) {
311                         dev_dbg(dev, "%s:%s output overrun cmd: %s field: %d\n",
312                                         __func__, dimm_name, cmd_name, i);
313                         rc = -ENXIO;
314                         goto out;
315                 }
316                 memcpy(buf + in_buf.buffer.length + offset,
317                                 out_obj->buffer.pointer + offset, out_size);
318                 offset += out_size;
319         }
320         if (offset + in_buf.buffer.length < buf_len) {
321                 if (i >= 1) {
322                         /*
323                          * status valid, return the number of bytes left
324                          * unfilled in the output buffer
325                          */
326                         rc = buf_len - offset - in_buf.buffer.length;
327                         if (cmd_rc)
328                                 *cmd_rc = xlat_status(buf, cmd);
329                 } else {
330                         dev_err(dev, "%s:%s underrun cmd: %s buf_len: %d out_len: %d\n",
331                                         __func__, dimm_name, cmd_name, buf_len,
332                                         offset);
333                         rc = -ENXIO;
334                 }
335         } else {
336                 rc = 0;
337                 if (cmd_rc)
338                         *cmd_rc = xlat_status(buf, cmd);
339         }
340
341  out:
342         ACPI_FREE(out_obj);
343
344         return rc;
345 }
346
347 static const char *spa_type_name(u16 type)
348 {
349         static const char *to_name[] = {
350                 [NFIT_SPA_VOLATILE] = "volatile",
351                 [NFIT_SPA_PM] = "pmem",
352                 [NFIT_SPA_DCR] = "dimm-control-region",
353                 [NFIT_SPA_BDW] = "block-data-window",
354                 [NFIT_SPA_VDISK] = "volatile-disk",
355                 [NFIT_SPA_VCD] = "volatile-cd",
356                 [NFIT_SPA_PDISK] = "persistent-disk",
357                 [NFIT_SPA_PCD] = "persistent-cd",
358
359         };
360
361         if (type > NFIT_SPA_PCD)
362                 return "unknown";
363
364         return to_name[type];
365 }
366
367 int nfit_spa_type(struct acpi_nfit_system_address *spa)
368 {
369         int i;
370
371         for (i = 0; i < NFIT_UUID_MAX; i++)
372                 if (memcmp(to_nfit_uuid(i), spa->range_guid, 16) == 0)
373                         return i;
374         return -1;
375 }
376
377 static bool add_spa(struct acpi_nfit_desc *acpi_desc,
378                 struct nfit_table_prev *prev,
379                 struct acpi_nfit_system_address *spa)
380 {
381         struct device *dev = acpi_desc->dev;
382         struct nfit_spa *nfit_spa;
383
384         if (spa->header.length != sizeof(*spa))
385                 return false;
386
387         list_for_each_entry(nfit_spa, &prev->spas, list) {
388                 if (memcmp(nfit_spa->spa, spa, sizeof(*spa)) == 0) {
389                         list_move_tail(&nfit_spa->list, &acpi_desc->spas);
390                         return true;
391                 }
392         }
393
394         nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa) + sizeof(*spa),
395                         GFP_KERNEL);
396         if (!nfit_spa)
397                 return false;
398         INIT_LIST_HEAD(&nfit_spa->list);
399         memcpy(nfit_spa->spa, spa, sizeof(*spa));
400         list_add_tail(&nfit_spa->list, &acpi_desc->spas);
401         dev_dbg(dev, "%s: spa index: %d type: %s\n", __func__,
402                         spa->range_index,
403                         spa_type_name(nfit_spa_type(spa)));
404         return true;
405 }
406
407 static bool add_memdev(struct acpi_nfit_desc *acpi_desc,
408                 struct nfit_table_prev *prev,
409                 struct acpi_nfit_memory_map *memdev)
410 {
411         struct device *dev = acpi_desc->dev;
412         struct nfit_memdev *nfit_memdev;
413
414         if (memdev->header.length != sizeof(*memdev))
415                 return false;
416
417         list_for_each_entry(nfit_memdev, &prev->memdevs, list)
418                 if (memcmp(nfit_memdev->memdev, memdev, sizeof(*memdev)) == 0) {
419                         list_move_tail(&nfit_memdev->list, &acpi_desc->memdevs);
420                         return true;
421                 }
422
423         nfit_memdev = devm_kzalloc(dev, sizeof(*nfit_memdev) + sizeof(*memdev),
424                         GFP_KERNEL);
425         if (!nfit_memdev)
426                 return false;
427         INIT_LIST_HEAD(&nfit_memdev->list);
428         memcpy(nfit_memdev->memdev, memdev, sizeof(*memdev));
429         list_add_tail(&nfit_memdev->list, &acpi_desc->memdevs);
430         dev_dbg(dev, "%s: memdev handle: %#x spa: %d dcr: %d\n",
431                         __func__, memdev->device_handle, memdev->range_index,
432                         memdev->region_index);
433         return true;
434 }
435
436 /*
437  * An implementation may provide a truncated control region if no block windows
438  * are defined.
439  */
440 static size_t sizeof_dcr(struct acpi_nfit_control_region *dcr)
441 {
442         if (dcr->header.length < offsetof(struct acpi_nfit_control_region,
443                                 window_size))
444                 return 0;
445         if (dcr->windows)
446                 return sizeof(*dcr);
447         return offsetof(struct acpi_nfit_control_region, window_size);
448 }
449
450 static bool add_dcr(struct acpi_nfit_desc *acpi_desc,
451                 struct nfit_table_prev *prev,
452                 struct acpi_nfit_control_region *dcr)
453 {
454         struct device *dev = acpi_desc->dev;
455         struct nfit_dcr *nfit_dcr;
456
457         if (!sizeof_dcr(dcr))
458                 return false;
459
460         list_for_each_entry(nfit_dcr, &prev->dcrs, list)
461                 if (memcmp(nfit_dcr->dcr, dcr, sizeof_dcr(dcr)) == 0) {
462                         list_move_tail(&nfit_dcr->list, &acpi_desc->dcrs);
463                         return true;
464                 }
465
466         nfit_dcr = devm_kzalloc(dev, sizeof(*nfit_dcr) + sizeof(*dcr),
467                         GFP_KERNEL);
468         if (!nfit_dcr)
469                 return false;
470         INIT_LIST_HEAD(&nfit_dcr->list);
471         memcpy(nfit_dcr->dcr, dcr, sizeof_dcr(dcr));
472         list_add_tail(&nfit_dcr->list, &acpi_desc->dcrs);
473         dev_dbg(dev, "%s: dcr index: %d windows: %d\n", __func__,
474                         dcr->region_index, dcr->windows);
475         return true;
476 }
477
478 static bool add_bdw(struct acpi_nfit_desc *acpi_desc,
479                 struct nfit_table_prev *prev,
480                 struct acpi_nfit_data_region *bdw)
481 {
482         struct device *dev = acpi_desc->dev;
483         struct nfit_bdw *nfit_bdw;
484
485         if (bdw->header.length != sizeof(*bdw))
486                 return false;
487         list_for_each_entry(nfit_bdw, &prev->bdws, list)
488                 if (memcmp(nfit_bdw->bdw, bdw, sizeof(*bdw)) == 0) {
489                         list_move_tail(&nfit_bdw->list, &acpi_desc->bdws);
490                         return true;
491                 }
492
493         nfit_bdw = devm_kzalloc(dev, sizeof(*nfit_bdw) + sizeof(*bdw),
494                         GFP_KERNEL);
495         if (!nfit_bdw)
496                 return false;
497         INIT_LIST_HEAD(&nfit_bdw->list);
498         memcpy(nfit_bdw->bdw, bdw, sizeof(*bdw));
499         list_add_tail(&nfit_bdw->list, &acpi_desc->bdws);
500         dev_dbg(dev, "%s: bdw dcr: %d windows: %d\n", __func__,
501                         bdw->region_index, bdw->windows);
502         return true;
503 }
504
505 static size_t sizeof_idt(struct acpi_nfit_interleave *idt)
506 {
507         if (idt->header.length < sizeof(*idt))
508                 return 0;
509         return sizeof(*idt) + sizeof(u32) * (idt->line_count - 1);
510 }
511
512 static bool add_idt(struct acpi_nfit_desc *acpi_desc,
513                 struct nfit_table_prev *prev,
514                 struct acpi_nfit_interleave *idt)
515 {
516         struct device *dev = acpi_desc->dev;
517         struct nfit_idt *nfit_idt;
518
519         if (!sizeof_idt(idt))
520                 return false;
521
522         list_for_each_entry(nfit_idt, &prev->idts, list) {
523                 if (sizeof_idt(nfit_idt->idt) != sizeof_idt(idt))
524                         continue;
525
526                 if (memcmp(nfit_idt->idt, idt, sizeof_idt(idt)) == 0) {
527                         list_move_tail(&nfit_idt->list, &acpi_desc->idts);
528                         return true;
529                 }
530         }
531
532         nfit_idt = devm_kzalloc(dev, sizeof(*nfit_idt) + sizeof_idt(idt),
533                         GFP_KERNEL);
534         if (!nfit_idt)
535                 return false;
536         INIT_LIST_HEAD(&nfit_idt->list);
537         memcpy(nfit_idt->idt, idt, sizeof_idt(idt));
538         list_add_tail(&nfit_idt->list, &acpi_desc->idts);
539         dev_dbg(dev, "%s: idt index: %d num_lines: %d\n", __func__,
540                         idt->interleave_index, idt->line_count);
541         return true;
542 }
543
544 static size_t sizeof_flush(struct acpi_nfit_flush_address *flush)
545 {
546         if (flush->header.length < sizeof(*flush))
547                 return 0;
548         return sizeof(*flush) + sizeof(u64) * (flush->hint_count - 1);
549 }
550
551 static bool add_flush(struct acpi_nfit_desc *acpi_desc,
552                 struct nfit_table_prev *prev,
553                 struct acpi_nfit_flush_address *flush)
554 {
555         struct device *dev = acpi_desc->dev;
556         struct nfit_flush *nfit_flush;
557
558         if (!sizeof_flush(flush))
559                 return false;
560
561         list_for_each_entry(nfit_flush, &prev->flushes, list) {
562                 if (sizeof_flush(nfit_flush->flush) != sizeof_flush(flush))
563                         continue;
564
565                 if (memcmp(nfit_flush->flush, flush,
566                                         sizeof_flush(flush)) == 0) {
567                         list_move_tail(&nfit_flush->list, &acpi_desc->flushes);
568                         return true;
569                 }
570         }
571
572         nfit_flush = devm_kzalloc(dev, sizeof(*nfit_flush)
573                         + sizeof_flush(flush), GFP_KERNEL);
574         if (!nfit_flush)
575                 return false;
576         INIT_LIST_HEAD(&nfit_flush->list);
577         memcpy(nfit_flush->flush, flush, sizeof_flush(flush));
578         list_add_tail(&nfit_flush->list, &acpi_desc->flushes);
579         dev_dbg(dev, "%s: nfit_flush handle: %d hint_count: %d\n", __func__,
580                         flush->device_handle, flush->hint_count);
581         return true;
582 }
583
584 static void *add_table(struct acpi_nfit_desc *acpi_desc,
585                 struct nfit_table_prev *prev, void *table, const void *end)
586 {
587         struct device *dev = acpi_desc->dev;
588         struct acpi_nfit_header *hdr;
589         void *err = ERR_PTR(-ENOMEM);
590
591         if (table >= end)
592                 return NULL;
593
594         hdr = table;
595         if (!hdr->length) {
596                 dev_warn(dev, "found a zero length table '%d' parsing nfit\n",
597                         hdr->type);
598                 return NULL;
599         }
600
601         switch (hdr->type) {
602         case ACPI_NFIT_TYPE_SYSTEM_ADDRESS:
603                 if (!add_spa(acpi_desc, prev, table))
604                         return err;
605                 break;
606         case ACPI_NFIT_TYPE_MEMORY_MAP:
607                 if (!add_memdev(acpi_desc, prev, table))
608                         return err;
609                 break;
610         case ACPI_NFIT_TYPE_CONTROL_REGION:
611                 if (!add_dcr(acpi_desc, prev, table))
612                         return err;
613                 break;
614         case ACPI_NFIT_TYPE_DATA_REGION:
615                 if (!add_bdw(acpi_desc, prev, table))
616                         return err;
617                 break;
618         case ACPI_NFIT_TYPE_INTERLEAVE:
619                 if (!add_idt(acpi_desc, prev, table))
620                         return err;
621                 break;
622         case ACPI_NFIT_TYPE_FLUSH_ADDRESS:
623                 if (!add_flush(acpi_desc, prev, table))
624                         return err;
625                 break;
626         case ACPI_NFIT_TYPE_SMBIOS:
627                 dev_dbg(dev, "%s: smbios\n", __func__);
628                 break;
629         default:
630                 dev_err(dev, "unknown table '%d' parsing nfit\n", hdr->type);
631                 break;
632         }
633
634         return table + hdr->length;
635 }
636
637 static void nfit_mem_find_spa_bdw(struct acpi_nfit_desc *acpi_desc,
638                 struct nfit_mem *nfit_mem)
639 {
640         u32 device_handle = __to_nfit_memdev(nfit_mem)->device_handle;
641         u16 dcr = nfit_mem->dcr->region_index;
642         struct nfit_spa *nfit_spa;
643
644         list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
645                 u16 range_index = nfit_spa->spa->range_index;
646                 int type = nfit_spa_type(nfit_spa->spa);
647                 struct nfit_memdev *nfit_memdev;
648
649                 if (type != NFIT_SPA_BDW)
650                         continue;
651
652                 list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
653                         if (nfit_memdev->memdev->range_index != range_index)
654                                 continue;
655                         if (nfit_memdev->memdev->device_handle != device_handle)
656                                 continue;
657                         if (nfit_memdev->memdev->region_index != dcr)
658                                 continue;
659
660                         nfit_mem->spa_bdw = nfit_spa->spa;
661                         return;
662                 }
663         }
664
665         dev_dbg(acpi_desc->dev, "SPA-BDW not found for SPA-DCR %d\n",
666                         nfit_mem->spa_dcr->range_index);
667         nfit_mem->bdw = NULL;
668 }
669
670 static void nfit_mem_init_bdw(struct acpi_nfit_desc *acpi_desc,
671                 struct nfit_mem *nfit_mem, struct acpi_nfit_system_address *spa)
672 {
673         u16 dcr = __to_nfit_memdev(nfit_mem)->region_index;
674         struct nfit_memdev *nfit_memdev;
675         struct nfit_bdw *nfit_bdw;
676         struct nfit_idt *nfit_idt;
677         u16 idt_idx, range_index;
678
679         list_for_each_entry(nfit_bdw, &acpi_desc->bdws, list) {
680                 if (nfit_bdw->bdw->region_index != dcr)
681                         continue;
682                 nfit_mem->bdw = nfit_bdw->bdw;
683                 break;
684         }
685
686         if (!nfit_mem->bdw)
687                 return;
688
689         nfit_mem_find_spa_bdw(acpi_desc, nfit_mem);
690
691         if (!nfit_mem->spa_bdw)
692                 return;
693
694         range_index = nfit_mem->spa_bdw->range_index;
695         list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
696                 if (nfit_memdev->memdev->range_index != range_index ||
697                                 nfit_memdev->memdev->region_index != dcr)
698                         continue;
699                 nfit_mem->memdev_bdw = nfit_memdev->memdev;
700                 idt_idx = nfit_memdev->memdev->interleave_index;
701                 list_for_each_entry(nfit_idt, &acpi_desc->idts, list) {
702                         if (nfit_idt->idt->interleave_index != idt_idx)
703                                 continue;
704                         nfit_mem->idt_bdw = nfit_idt->idt;
705                         break;
706                 }
707                 break;
708         }
709 }
710
711 static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
712                 struct acpi_nfit_system_address *spa)
713 {
714         struct nfit_mem *nfit_mem, *found;
715         struct nfit_memdev *nfit_memdev;
716         int type = nfit_spa_type(spa);
717
718         switch (type) {
719         case NFIT_SPA_DCR:
720         case NFIT_SPA_PM:
721                 break;
722         default:
723                 return 0;
724         }
725
726         list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
727                 struct nfit_flush *nfit_flush;
728                 struct nfit_dcr *nfit_dcr;
729                 u32 device_handle;
730                 u16 dcr;
731
732                 if (nfit_memdev->memdev->range_index != spa->range_index)
733                         continue;
734                 found = NULL;
735                 dcr = nfit_memdev->memdev->region_index;
736                 device_handle = nfit_memdev->memdev->device_handle;
737                 list_for_each_entry(nfit_mem, &acpi_desc->dimms, list)
738                         if (__to_nfit_memdev(nfit_mem)->device_handle
739                                         == device_handle) {
740                                 found = nfit_mem;
741                                 break;
742                         }
743
744                 if (found)
745                         nfit_mem = found;
746                 else {
747                         nfit_mem = devm_kzalloc(acpi_desc->dev,
748                                         sizeof(*nfit_mem), GFP_KERNEL);
749                         if (!nfit_mem)
750                                 return -ENOMEM;
751                         INIT_LIST_HEAD(&nfit_mem->list);
752                         nfit_mem->acpi_desc = acpi_desc;
753                         list_add(&nfit_mem->list, &acpi_desc->dimms);
754                 }
755
756                 list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
757                         if (nfit_dcr->dcr->region_index != dcr)
758                                 continue;
759                         /*
760                          * Record the control region for the dimm.  For
761                          * the ACPI 6.1 case, where there are separate
762                          * control regions for the pmem vs blk
763                          * interfaces, be sure to record the extended
764                          * blk details.
765                          */
766                         if (!nfit_mem->dcr)
767                                 nfit_mem->dcr = nfit_dcr->dcr;
768                         else if (nfit_mem->dcr->windows == 0
769                                         && nfit_dcr->dcr->windows)
770                                 nfit_mem->dcr = nfit_dcr->dcr;
771                         break;
772                 }
773
774                 list_for_each_entry(nfit_flush, &acpi_desc->flushes, list) {
775                         struct acpi_nfit_flush_address *flush;
776                         u16 i;
777
778                         if (nfit_flush->flush->device_handle != device_handle)
779                                 continue;
780                         nfit_mem->nfit_flush = nfit_flush;
781                         flush = nfit_flush->flush;
782                         nfit_mem->flush_wpq = devm_kzalloc(acpi_desc->dev,
783                                         flush->hint_count
784                                         * sizeof(struct resource), GFP_KERNEL);
785                         if (!nfit_mem->flush_wpq)
786                                 return -ENOMEM;
787                         for (i = 0; i < flush->hint_count; i++) {
788                                 struct resource *res = &nfit_mem->flush_wpq[i];
789
790                                 res->start = flush->hint_address[i];
791                                 res->end = res->start + 8 - 1;
792                         }
793                         break;
794                 }
795
796                 if (dcr && !nfit_mem->dcr) {
797                         dev_err(acpi_desc->dev, "SPA %d missing DCR %d\n",
798                                         spa->range_index, dcr);
799                         return -ENODEV;
800                 }
801
802                 if (type == NFIT_SPA_DCR) {
803                         struct nfit_idt *nfit_idt;
804                         u16 idt_idx;
805
806                         /* multiple dimms may share a SPA when interleaved */
807                         nfit_mem->spa_dcr = spa;
808                         nfit_mem->memdev_dcr = nfit_memdev->memdev;
809                         idt_idx = nfit_memdev->memdev->interleave_index;
810                         list_for_each_entry(nfit_idt, &acpi_desc->idts, list) {
811                                 if (nfit_idt->idt->interleave_index != idt_idx)
812                                         continue;
813                                 nfit_mem->idt_dcr = nfit_idt->idt;
814                                 break;
815                         }
816                         nfit_mem_init_bdw(acpi_desc, nfit_mem, spa);
817                 } else {
818                         /*
819                          * A single dimm may belong to multiple SPA-PM
820                          * ranges, record at least one in addition to
821                          * any SPA-DCR range.
822                          */
823                         nfit_mem->memdev_pmem = nfit_memdev->memdev;
824                 }
825         }
826
827         return 0;
828 }
829
830 static int nfit_mem_cmp(void *priv, struct list_head *_a, struct list_head *_b)
831 {
832         struct nfit_mem *a = container_of(_a, typeof(*a), list);
833         struct nfit_mem *b = container_of(_b, typeof(*b), list);
834         u32 handleA, handleB;
835
836         handleA = __to_nfit_memdev(a)->device_handle;
837         handleB = __to_nfit_memdev(b)->device_handle;
838         if (handleA < handleB)
839                 return -1;
840         else if (handleA > handleB)
841                 return 1;
842         return 0;
843 }
844
845 static int nfit_mem_init(struct acpi_nfit_desc *acpi_desc)
846 {
847         struct nfit_spa *nfit_spa;
848
849         /*
850          * For each SPA-DCR or SPA-PMEM address range find its
851          * corresponding MEMDEV(s).  From each MEMDEV find the
852          * corresponding DCR.  Then, if we're operating on a SPA-DCR,
853          * try to find a SPA-BDW and a corresponding BDW that references
854          * the DCR.  Throw it all into an nfit_mem object.  Note, that
855          * BDWs are optional.
856          */
857         list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
858                 int rc;
859
860                 rc = nfit_mem_dcr_init(acpi_desc, nfit_spa->spa);
861                 if (rc)
862                         return rc;
863         }
864
865         list_sort(NULL, &acpi_desc->dimms, nfit_mem_cmp);
866
867         return 0;
868 }
869
870 static ssize_t revision_show(struct device *dev,
871                 struct device_attribute *attr, char *buf)
872 {
873         struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
874         struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
875         struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
876
877         return sprintf(buf, "%d\n", acpi_desc->acpi_header.revision);
878 }
879 static DEVICE_ATTR_RO(revision);
880
881 /*
882  * This shows the number of full Address Range Scrubs that have been
883  * completed since driver load time. Userspace can wait on this using
884  * select/poll etc. A '+' at the end indicates an ARS is in progress
885  */
886 static ssize_t scrub_show(struct device *dev,
887                 struct device_attribute *attr, char *buf)
888 {
889         struct nvdimm_bus_descriptor *nd_desc;
890         ssize_t rc = -ENXIO;
891
892         device_lock(dev);
893         nd_desc = dev_get_drvdata(dev);
894         if (nd_desc) {
895                 struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
896
897                 rc = sprintf(buf, "%d%s", acpi_desc->scrub_count,
898                                 (work_busy(&acpi_desc->work)) ? "+\n" : "\n");
899         }
900         device_unlock(dev);
901         return rc;
902 }
903
904 static ssize_t scrub_store(struct device *dev,
905                 struct device_attribute *attr, const char *buf, size_t size)
906 {
907         struct nvdimm_bus_descriptor *nd_desc;
908         ssize_t rc;
909         long val;
910
911         rc = kstrtol(buf, 0, &val);
912         if (rc)
913                 return rc;
914         if (val != 1)
915                 return -EINVAL;
916
917         device_lock(dev);
918         nd_desc = dev_get_drvdata(dev);
919         if (nd_desc) {
920                 struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
921
922                 rc = acpi_nfit_ars_rescan(acpi_desc);
923         }
924         device_unlock(dev);
925         if (rc)
926                 return rc;
927         return size;
928 }
929 static DEVICE_ATTR_RW(scrub);
930
931 static bool ars_supported(struct nvdimm_bus *nvdimm_bus)
932 {
933         struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
934         const unsigned long mask = 1 << ND_CMD_ARS_CAP | 1 << ND_CMD_ARS_START
935                 | 1 << ND_CMD_ARS_STATUS;
936
937         return (nd_desc->cmd_mask & mask) == mask;
938 }
939
940 static umode_t nfit_visible(struct kobject *kobj, struct attribute *a, int n)
941 {
942         struct device *dev = container_of(kobj, struct device, kobj);
943         struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
944
945         if (a == &dev_attr_scrub.attr && !ars_supported(nvdimm_bus))
946                 return 0;
947         return a->mode;
948 }
949
950 static struct attribute *acpi_nfit_attributes[] = {
951         &dev_attr_revision.attr,
952         &dev_attr_scrub.attr,
953         NULL,
954 };
955
956 static struct attribute_group acpi_nfit_attribute_group = {
957         .name = "nfit",
958         .attrs = acpi_nfit_attributes,
959         .is_visible = nfit_visible,
960 };
961
962 static const struct attribute_group *acpi_nfit_attribute_groups[] = {
963         &nvdimm_bus_attribute_group,
964         &acpi_nfit_attribute_group,
965         NULL,
966 };
967
968 static struct acpi_nfit_memory_map *to_nfit_memdev(struct device *dev)
969 {
970         struct nvdimm *nvdimm = to_nvdimm(dev);
971         struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
972
973         return __to_nfit_memdev(nfit_mem);
974 }
975
976 static struct acpi_nfit_control_region *to_nfit_dcr(struct device *dev)
977 {
978         struct nvdimm *nvdimm = to_nvdimm(dev);
979         struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
980
981         return nfit_mem->dcr;
982 }
983
984 static ssize_t handle_show(struct device *dev,
985                 struct device_attribute *attr, char *buf)
986 {
987         struct acpi_nfit_memory_map *memdev = to_nfit_memdev(dev);
988
989         return sprintf(buf, "%#x\n", memdev->device_handle);
990 }
991 static DEVICE_ATTR_RO(handle);
992
993 static ssize_t phys_id_show(struct device *dev,
994                 struct device_attribute *attr, char *buf)
995 {
996         struct acpi_nfit_memory_map *memdev = to_nfit_memdev(dev);
997
998         return sprintf(buf, "%#x\n", memdev->physical_id);
999 }
1000 static DEVICE_ATTR_RO(phys_id);
1001
1002 static ssize_t vendor_show(struct device *dev,
1003                 struct device_attribute *attr, char *buf)
1004 {
1005         struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
1006
1007         return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->vendor_id));
1008 }
1009 static DEVICE_ATTR_RO(vendor);
1010
1011 static ssize_t rev_id_show(struct device *dev,
1012                 struct device_attribute *attr, char *buf)
1013 {
1014         struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
1015
1016         return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->revision_id));
1017 }
1018 static DEVICE_ATTR_RO(rev_id);
1019
1020 static ssize_t device_show(struct device *dev,
1021                 struct device_attribute *attr, char *buf)
1022 {
1023         struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
1024
1025         return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->device_id));
1026 }
1027 static DEVICE_ATTR_RO(device);
1028
1029 static ssize_t subsystem_vendor_show(struct device *dev,
1030                 struct device_attribute *attr, char *buf)
1031 {
1032         struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
1033
1034         return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->subsystem_vendor_id));
1035 }
1036 static DEVICE_ATTR_RO(subsystem_vendor);
1037
1038 static ssize_t subsystem_rev_id_show(struct device *dev,
1039                 struct device_attribute *attr, char *buf)
1040 {
1041         struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
1042
1043         return sprintf(buf, "0x%04x\n",
1044                         be16_to_cpu(dcr->subsystem_revision_id));
1045 }
1046 static DEVICE_ATTR_RO(subsystem_rev_id);
1047
1048 static ssize_t subsystem_device_show(struct device *dev,
1049                 struct device_attribute *attr, char *buf)
1050 {
1051         struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
1052
1053         return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->subsystem_device_id));
1054 }
1055 static DEVICE_ATTR_RO(subsystem_device);
1056
1057 static int num_nvdimm_formats(struct nvdimm *nvdimm)
1058 {
1059         struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
1060         int formats = 0;
1061
1062         if (nfit_mem->memdev_pmem)
1063                 formats++;
1064         if (nfit_mem->memdev_bdw)
1065                 formats++;
1066         return formats;
1067 }
1068
1069 static ssize_t format_show(struct device *dev,
1070                 struct device_attribute *attr, char *buf)
1071 {
1072         struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
1073
1074         return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->code));
1075 }
1076 static DEVICE_ATTR_RO(format);
1077
1078 static ssize_t format1_show(struct device *dev,
1079                 struct device_attribute *attr, char *buf)
1080 {
1081         u32 handle;
1082         ssize_t rc = -ENXIO;
1083         struct nfit_mem *nfit_mem;
1084         struct nfit_memdev *nfit_memdev;
1085         struct acpi_nfit_desc *acpi_desc;
1086         struct nvdimm *nvdimm = to_nvdimm(dev);
1087         struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
1088
1089         nfit_mem = nvdimm_provider_data(nvdimm);
1090         acpi_desc = nfit_mem->acpi_desc;
1091         handle = to_nfit_memdev(dev)->device_handle;
1092
1093         /* assumes DIMMs have at most 2 published interface codes */
1094         mutex_lock(&acpi_desc->init_mutex);
1095         list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
1096                 struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev;
1097                 struct nfit_dcr *nfit_dcr;
1098
1099                 if (memdev->device_handle != handle)
1100                         continue;
1101
1102                 list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
1103                         if (nfit_dcr->dcr->region_index != memdev->region_index)
1104                                 continue;
1105                         if (nfit_dcr->dcr->code == dcr->code)
1106                                 continue;
1107                         rc = sprintf(buf, "%#x\n",
1108                                         be16_to_cpu(nfit_dcr->dcr->code));
1109                         break;
1110                 }
1111                 if (rc != ENXIO)
1112                         break;
1113         }
1114         mutex_unlock(&acpi_desc->init_mutex);
1115         return rc;
1116 }
1117 static DEVICE_ATTR_RO(format1);
1118
1119 static ssize_t formats_show(struct device *dev,
1120                 struct device_attribute *attr, char *buf)
1121 {
1122         struct nvdimm *nvdimm = to_nvdimm(dev);
1123
1124         return sprintf(buf, "%d\n", num_nvdimm_formats(nvdimm));
1125 }
1126 static DEVICE_ATTR_RO(formats);
1127
1128 static ssize_t serial_show(struct device *dev,
1129                 struct device_attribute *attr, char *buf)
1130 {
1131         struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
1132
1133         return sprintf(buf, "0x%08x\n", be32_to_cpu(dcr->serial_number));
1134 }
1135 static DEVICE_ATTR_RO(serial);
1136
1137 static ssize_t family_show(struct device *dev,
1138                 struct device_attribute *attr, char *buf)
1139 {
1140         struct nvdimm *nvdimm = to_nvdimm(dev);
1141         struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
1142
1143         if (nfit_mem->family < 0)
1144                 return -ENXIO;
1145         return sprintf(buf, "%d\n", nfit_mem->family);
1146 }
1147 static DEVICE_ATTR_RO(family);
1148
1149 static ssize_t dsm_mask_show(struct device *dev,
1150                 struct device_attribute *attr, char *buf)
1151 {
1152         struct nvdimm *nvdimm = to_nvdimm(dev);
1153         struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
1154
1155         if (nfit_mem->family < 0)
1156                 return -ENXIO;
1157         return sprintf(buf, "%#lx\n", nfit_mem->dsm_mask);
1158 }
1159 static DEVICE_ATTR_RO(dsm_mask);
1160
1161 static ssize_t flags_show(struct device *dev,
1162                 struct device_attribute *attr, char *buf)
1163 {
1164         u16 flags = to_nfit_memdev(dev)->flags;
1165
1166         return sprintf(buf, "%s%s%s%s%s\n",
1167                 flags & ACPI_NFIT_MEM_SAVE_FAILED ? "save_fail " : "",
1168                 flags & ACPI_NFIT_MEM_RESTORE_FAILED ? "restore_fail " : "",
1169                 flags & ACPI_NFIT_MEM_FLUSH_FAILED ? "flush_fail " : "",
1170                 flags & ACPI_NFIT_MEM_NOT_ARMED ? "not_armed " : "",
1171                 flags & ACPI_NFIT_MEM_HEALTH_OBSERVED ? "smart_event " : "");
1172 }
1173 static DEVICE_ATTR_RO(flags);
1174
1175 static ssize_t id_show(struct device *dev,
1176                 struct device_attribute *attr, char *buf)
1177 {
1178         struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
1179
1180         if (dcr->valid_fields & ACPI_NFIT_CONTROL_MFG_INFO_VALID)
1181                 return sprintf(buf, "%04x-%02x-%04x-%08x\n",
1182                                 be16_to_cpu(dcr->vendor_id),
1183                                 dcr->manufacturing_location,
1184                                 be16_to_cpu(dcr->manufacturing_date),
1185                                 be32_to_cpu(dcr->serial_number));
1186         else
1187                 return sprintf(buf, "%04x-%08x\n",
1188                                 be16_to_cpu(dcr->vendor_id),
1189                                 be32_to_cpu(dcr->serial_number));
1190 }
1191 static DEVICE_ATTR_RO(id);
1192
1193 static struct attribute *acpi_nfit_dimm_attributes[] = {
1194         &dev_attr_handle.attr,
1195         &dev_attr_phys_id.attr,
1196         &dev_attr_vendor.attr,
1197         &dev_attr_device.attr,
1198         &dev_attr_rev_id.attr,
1199         &dev_attr_subsystem_vendor.attr,
1200         &dev_attr_subsystem_device.attr,
1201         &dev_attr_subsystem_rev_id.attr,
1202         &dev_attr_format.attr,
1203         &dev_attr_formats.attr,
1204         &dev_attr_format1.attr,
1205         &dev_attr_serial.attr,
1206         &dev_attr_flags.attr,
1207         &dev_attr_id.attr,
1208         &dev_attr_family.attr,
1209         &dev_attr_dsm_mask.attr,
1210         NULL,
1211 };
1212
1213 static umode_t acpi_nfit_dimm_attr_visible(struct kobject *kobj,
1214                 struct attribute *a, int n)
1215 {
1216         struct device *dev = container_of(kobj, struct device, kobj);
1217         struct nvdimm *nvdimm = to_nvdimm(dev);
1218
1219         if (!to_nfit_dcr(dev))
1220                 return 0;
1221         if (a == &dev_attr_format1.attr && num_nvdimm_formats(nvdimm) <= 1)
1222                 return 0;
1223         return a->mode;
1224 }
1225
1226 static struct attribute_group acpi_nfit_dimm_attribute_group = {
1227         .name = "nfit",
1228         .attrs = acpi_nfit_dimm_attributes,
1229         .is_visible = acpi_nfit_dimm_attr_visible,
1230 };
1231
1232 static const struct attribute_group *acpi_nfit_dimm_attribute_groups[] = {
1233         &nvdimm_attribute_group,
1234         &nd_device_attribute_group,
1235         &acpi_nfit_dimm_attribute_group,
1236         NULL,
1237 };
1238
1239 static struct nvdimm *acpi_nfit_dimm_by_handle(struct acpi_nfit_desc *acpi_desc,
1240                 u32 device_handle)
1241 {
1242         struct nfit_mem *nfit_mem;
1243
1244         list_for_each_entry(nfit_mem, &acpi_desc->dimms, list)
1245                 if (__to_nfit_memdev(nfit_mem)->device_handle == device_handle)
1246                         return nfit_mem->nvdimm;
1247
1248         return NULL;
1249 }
1250
1251 static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
1252                 struct nfit_mem *nfit_mem, u32 device_handle)
1253 {
1254         struct acpi_device *adev, *adev_dimm;
1255         struct device *dev = acpi_desc->dev;
1256         unsigned long dsm_mask;
1257         const u8 *uuid;
1258         int i;
1259
1260         /* nfit test assumes 1:1 relationship between commands and dsms */
1261         nfit_mem->dsm_mask = acpi_desc->dimm_cmd_force_en;
1262         nfit_mem->family = NVDIMM_FAMILY_INTEL;
1263         adev = to_acpi_dev(acpi_desc);
1264         if (!adev)
1265                 return 0;
1266
1267         adev_dimm = acpi_find_child_device(adev, device_handle, false);
1268         nfit_mem->adev = adev_dimm;
1269         if (!adev_dimm) {
1270                 dev_err(dev, "no ACPI.NFIT device with _ADR %#x, disabling...\n",
1271                                 device_handle);
1272                 return force_enable_dimms ? 0 : -ENODEV;
1273         }
1274
1275         /*
1276          * Until standardization materializes we need to consider 4
1277          * different command sets.  Note, that checking for function0 (bit0)
1278          * tells us if any commands are reachable through this uuid.
1279          */
1280         for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_MSFT; i++)
1281                 if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1))
1282                         break;
1283
1284         /* limit the supported commands to those that are publicly documented */
1285         nfit_mem->family = i;
1286         if (nfit_mem->family == NVDIMM_FAMILY_INTEL) {
1287                 dsm_mask = 0x3fe;
1288                 if (disable_vendor_specific)
1289                         dsm_mask &= ~(1 << ND_CMD_VENDOR);
1290         } else if (nfit_mem->family == NVDIMM_FAMILY_HPE1) {
1291                 dsm_mask = 0x1c3c76;
1292         } else if (nfit_mem->family == NVDIMM_FAMILY_HPE2) {
1293                 dsm_mask = 0x1fe;
1294                 if (disable_vendor_specific)
1295                         dsm_mask &= ~(1 << 8);
1296         } else if (nfit_mem->family == NVDIMM_FAMILY_MSFT) {
1297                 dsm_mask = 0xffffffff;
1298         } else {
1299                 dev_err(dev, "unknown dimm command family\n");
1300                 nfit_mem->family = -1;
1301                 return force_enable_dimms ? 0 : -ENODEV;
1302         }
1303
1304         uuid = to_nfit_uuid(nfit_mem->family);
1305         for_each_set_bit(i, &dsm_mask, BITS_PER_LONG)
1306                 if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i))
1307                         set_bit(i, &nfit_mem->dsm_mask);
1308
1309         return 0;
1310 }
1311
1312 static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
1313 {
1314         struct nfit_mem *nfit_mem;
1315         int dimm_count = 0;
1316
1317         list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
1318                 struct acpi_nfit_flush_address *flush;
1319                 unsigned long flags = 0, cmd_mask;
1320                 struct nvdimm *nvdimm;
1321                 u32 device_handle;
1322                 u16 mem_flags;
1323                 int rc;
1324
1325                 device_handle = __to_nfit_memdev(nfit_mem)->device_handle;
1326                 nvdimm = acpi_nfit_dimm_by_handle(acpi_desc, device_handle);
1327                 if (nvdimm) {
1328                         dimm_count++;
1329                         continue;
1330                 }
1331
1332                 if (nfit_mem->bdw && nfit_mem->memdev_pmem)
1333                         flags |= NDD_ALIASING;
1334
1335                 mem_flags = __to_nfit_memdev(nfit_mem)->flags;
1336                 if (mem_flags & ACPI_NFIT_MEM_NOT_ARMED)
1337                         flags |= NDD_UNARMED;
1338
1339                 rc = acpi_nfit_add_dimm(acpi_desc, nfit_mem, device_handle);
1340                 if (rc)
1341                         continue;
1342
1343                 /*
1344                  * TODO: provide translation for non-NVDIMM_FAMILY_INTEL
1345                  * devices (i.e. from nd_cmd to acpi_dsm) to standardize the
1346                  * userspace interface.
1347                  */
1348                 cmd_mask = 1UL << ND_CMD_CALL;
1349                 if (nfit_mem->family == NVDIMM_FAMILY_INTEL)
1350                         cmd_mask |= nfit_mem->dsm_mask;
1351
1352                 flush = nfit_mem->nfit_flush ? nfit_mem->nfit_flush->flush
1353                         : NULL;
1354                 nvdimm = nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem,
1355                                 acpi_nfit_dimm_attribute_groups,
1356                                 flags, cmd_mask, flush ? flush->hint_count : 0,
1357                                 nfit_mem->flush_wpq);
1358                 if (!nvdimm)
1359                         return -ENOMEM;
1360
1361                 nfit_mem->nvdimm = nvdimm;
1362                 dimm_count++;
1363
1364                 if ((mem_flags & ACPI_NFIT_MEM_FAILED_MASK) == 0)
1365                         continue;
1366
1367                 dev_info(acpi_desc->dev, "%s flags:%s%s%s%s\n",
1368                                 nvdimm_name(nvdimm),
1369                   mem_flags & ACPI_NFIT_MEM_SAVE_FAILED ? " save_fail" : "",
1370                   mem_flags & ACPI_NFIT_MEM_RESTORE_FAILED ? " restore_fail":"",
1371                   mem_flags & ACPI_NFIT_MEM_FLUSH_FAILED ? " flush_fail" : "",
1372                   mem_flags & ACPI_NFIT_MEM_NOT_ARMED ? " not_armed" : "");
1373
1374         }
1375
1376         return nvdimm_bus_check_dimm_count(acpi_desc->nvdimm_bus, dimm_count);
1377 }
1378
1379 static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
1380 {
1381         struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
1382         const u8 *uuid = to_nfit_uuid(NFIT_DEV_BUS);
1383         struct acpi_device *adev;
1384         int i;
1385
1386         nd_desc->cmd_mask = acpi_desc->bus_cmd_force_en;
1387         adev = to_acpi_dev(acpi_desc);
1388         if (!adev)
1389                 return;
1390
1391         for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++)
1392                 if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i))
1393                         set_bit(i, &nd_desc->cmd_mask);
1394 }
1395
1396 static ssize_t range_index_show(struct device *dev,
1397                 struct device_attribute *attr, char *buf)
1398 {
1399         struct nd_region *nd_region = to_nd_region(dev);
1400         struct nfit_spa *nfit_spa = nd_region_provider_data(nd_region);
1401
1402         return sprintf(buf, "%d\n", nfit_spa->spa->range_index);
1403 }
1404 static DEVICE_ATTR_RO(range_index);
1405
1406 static struct attribute *acpi_nfit_region_attributes[] = {
1407         &dev_attr_range_index.attr,
1408         NULL,
1409 };
1410
1411 static struct attribute_group acpi_nfit_region_attribute_group = {
1412         .name = "nfit",
1413         .attrs = acpi_nfit_region_attributes,
1414 };
1415
1416 static const struct attribute_group *acpi_nfit_region_attribute_groups[] = {
1417         &nd_region_attribute_group,
1418         &nd_mapping_attribute_group,
1419         &nd_device_attribute_group,
1420         &nd_numa_attribute_group,
1421         &acpi_nfit_region_attribute_group,
1422         NULL,
1423 };
1424
1425 /* enough info to uniquely specify an interleave set */
1426 struct nfit_set_info {
1427         struct nfit_set_info_map {
1428                 u64 region_offset;
1429                 u32 serial_number;
1430                 u32 pad;
1431         } mapping[0];
1432 };
1433
1434 static size_t sizeof_nfit_set_info(int num_mappings)
1435 {
1436         return sizeof(struct nfit_set_info)
1437                 + num_mappings * sizeof(struct nfit_set_info_map);
1438 }
1439
1440 static int cmp_map(const void *m0, const void *m1)
1441 {
1442         const struct nfit_set_info_map *map0 = m0;
1443         const struct nfit_set_info_map *map1 = m1;
1444
1445         return memcmp(&map0->region_offset, &map1->region_offset,
1446                         sizeof(u64));
1447 }
1448
1449 /* Retrieve the nth entry referencing this spa */
1450 static struct acpi_nfit_memory_map *memdev_from_spa(
1451                 struct acpi_nfit_desc *acpi_desc, u16 range_index, int n)
1452 {
1453         struct nfit_memdev *nfit_memdev;
1454
1455         list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list)
1456                 if (nfit_memdev->memdev->range_index == range_index)
1457                         if (n-- == 0)
1458                                 return nfit_memdev->memdev;
1459         return NULL;
1460 }
1461
1462 static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
1463                 struct nd_region_desc *ndr_desc,
1464                 struct acpi_nfit_system_address *spa)
1465 {
1466         int i, spa_type = nfit_spa_type(spa);
1467         struct device *dev = acpi_desc->dev;
1468         struct nd_interleave_set *nd_set;
1469         u16 nr = ndr_desc->num_mappings;
1470         struct nfit_set_info *info;
1471
1472         if (spa_type == NFIT_SPA_PM || spa_type == NFIT_SPA_VOLATILE)
1473                 /* pass */;
1474         else
1475                 return 0;
1476
1477         nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
1478         if (!nd_set)
1479                 return -ENOMEM;
1480
1481         info = devm_kzalloc(dev, sizeof_nfit_set_info(nr), GFP_KERNEL);
1482         if (!info)
1483                 return -ENOMEM;
1484         for (i = 0; i < nr; i++) {
1485                 struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i];
1486                 struct nfit_set_info_map *map = &info->mapping[i];
1487                 struct nvdimm *nvdimm = nd_mapping->nvdimm;
1488                 struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
1489                 struct acpi_nfit_memory_map *memdev = memdev_from_spa(acpi_desc,
1490                                 spa->range_index, i);
1491
1492                 if (!memdev || !nfit_mem->dcr) {
1493                         dev_err(dev, "%s: failed to find DCR\n", __func__);
1494                         return -ENODEV;
1495                 }
1496
1497                 map->region_offset = memdev->region_offset;
1498                 map->serial_number = nfit_mem->dcr->serial_number;
1499         }
1500
1501         sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map),
1502                         cmp_map, NULL);
1503         nd_set->cookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
1504         ndr_desc->nd_set = nd_set;
1505         devm_kfree(dev, info);
1506
1507         return 0;
1508 }
1509
1510 static u64 to_interleave_offset(u64 offset, struct nfit_blk_mmio *mmio)
1511 {
1512         struct acpi_nfit_interleave *idt = mmio->idt;
1513         u32 sub_line_offset, line_index, line_offset;
1514         u64 line_no, table_skip_count, table_offset;
1515
1516         line_no = div_u64_rem(offset, mmio->line_size, &sub_line_offset);
1517         table_skip_count = div_u64_rem(line_no, mmio->num_lines, &line_index);
1518         line_offset = idt->line_offset[line_index]
1519                 * mmio->line_size;
1520         table_offset = table_skip_count * mmio->table_size;
1521
1522         return mmio->base_offset + line_offset + table_offset + sub_line_offset;
1523 }
1524
1525 static u32 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw)
1526 {
1527         struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
1528         u64 offset = nfit_blk->stat_offset + mmio->size * bw;
1529
1530         if (mmio->num_lines)
1531                 offset = to_interleave_offset(offset, mmio);
1532
1533         return readl(mmio->addr.base + offset);
1534 }
1535
1536 static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
1537                 resource_size_t dpa, unsigned int len, unsigned int write)
1538 {
1539         u64 cmd, offset;
1540         struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
1541
1542         enum {
1543                 BCW_OFFSET_MASK = (1ULL << 48)-1,
1544                 BCW_LEN_SHIFT = 48,
1545                 BCW_LEN_MASK = (1ULL << 8) - 1,
1546                 BCW_CMD_SHIFT = 56,
1547         };
1548
1549         cmd = (dpa >> L1_CACHE_SHIFT) & BCW_OFFSET_MASK;
1550         len = len >> L1_CACHE_SHIFT;
1551         cmd |= ((u64) len & BCW_LEN_MASK) << BCW_LEN_SHIFT;
1552         cmd |= ((u64) write) << BCW_CMD_SHIFT;
1553
1554         offset = nfit_blk->cmd_offset + mmio->size * bw;
1555         if (mmio->num_lines)
1556                 offset = to_interleave_offset(offset, mmio);
1557
1558         writeq(cmd, mmio->addr.base + offset);
1559         nvdimm_flush(nfit_blk->nd_region);
1560
1561         if (nfit_blk->dimm_flags & NFIT_BLK_DCR_LATCH)
1562                 readq(mmio->addr.base + offset);
1563 }
1564
1565 static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
1566                 resource_size_t dpa, void *iobuf, size_t len, int rw,
1567                 unsigned int lane)
1568 {
1569         struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
1570         unsigned int copied = 0;
1571         u64 base_offset;
1572         int rc;
1573
1574         base_offset = nfit_blk->bdw_offset + dpa % L1_CACHE_BYTES
1575                 + lane * mmio->size;
1576         write_blk_ctl(nfit_blk, lane, dpa, len, rw);
1577         while (len) {
1578                 unsigned int c;
1579                 u64 offset;
1580
1581                 if (mmio->num_lines) {
1582                         u32 line_offset;
1583
1584                         offset = to_interleave_offset(base_offset + copied,
1585                                         mmio);
1586                         div_u64_rem(offset, mmio->line_size, &line_offset);
1587                         c = min_t(size_t, len, mmio->line_size - line_offset);
1588                 } else {
1589                         offset = base_offset + nfit_blk->bdw_offset;
1590                         c = len;
1591                 }
1592
1593                 if (rw)
1594                         memcpy_to_pmem(mmio->addr.aperture + offset,
1595                                         iobuf + copied, c);
1596                 else {
1597                         if (nfit_blk->dimm_flags & NFIT_BLK_READ_FLUSH)
1598                                 mmio_flush_range((void __force *)
1599                                         mmio->addr.aperture + offset, c);
1600
1601                         memcpy_from_pmem(iobuf + copied,
1602                                         mmio->addr.aperture + offset, c);
1603                 }
1604
1605                 copied += c;
1606                 len -= c;
1607         }
1608
1609         if (rw)
1610                 nvdimm_flush(nfit_blk->nd_region);
1611
1612         rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0;
1613         return rc;
1614 }
1615
1616 static int acpi_nfit_blk_region_do_io(struct nd_blk_region *ndbr,
1617                 resource_size_t dpa, void *iobuf, u64 len, int rw)
1618 {
1619         struct nfit_blk *nfit_blk = nd_blk_region_provider_data(ndbr);
1620         struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
1621         struct nd_region *nd_region = nfit_blk->nd_region;
1622         unsigned int lane, copied = 0;
1623         int rc = 0;
1624
1625         lane = nd_region_acquire_lane(nd_region);
1626         while (len) {
1627                 u64 c = min(len, mmio->size);
1628
1629                 rc = acpi_nfit_blk_single_io(nfit_blk, dpa + copied,
1630                                 iobuf + copied, c, rw, lane);
1631                 if (rc)
1632                         break;
1633
1634                 copied += c;
1635                 len -= c;
1636         }
1637         nd_region_release_lane(nd_region, lane);
1638
1639         return rc;
1640 }
1641
1642 static int nfit_blk_init_interleave(struct nfit_blk_mmio *mmio,
1643                 struct acpi_nfit_interleave *idt, u16 interleave_ways)
1644 {
1645         if (idt) {
1646                 mmio->num_lines = idt->line_count;
1647                 mmio->line_size = idt->line_size;
1648                 if (interleave_ways == 0)
1649                         return -ENXIO;
1650                 mmio->table_size = mmio->num_lines * interleave_ways
1651                         * mmio->line_size;
1652         }
1653
1654         return 0;
1655 }
1656
1657 static int acpi_nfit_blk_get_flags(struct nvdimm_bus_descriptor *nd_desc,
1658                 struct nvdimm *nvdimm, struct nfit_blk *nfit_blk)
1659 {
1660         struct nd_cmd_dimm_flags flags;
1661         int rc;
1662
1663         memset(&flags, 0, sizeof(flags));
1664         rc = nd_desc->ndctl(nd_desc, nvdimm, ND_CMD_DIMM_FLAGS, &flags,
1665                         sizeof(flags), NULL);
1666
1667         if (rc >= 0 && flags.status == 0)
1668                 nfit_blk->dimm_flags = flags.flags;
1669         else if (rc == -ENOTTY) {
1670                 /* fall back to a conservative default */
1671                 nfit_blk->dimm_flags = NFIT_BLK_DCR_LATCH | NFIT_BLK_READ_FLUSH;
1672                 rc = 0;
1673         } else
1674                 rc = -ENXIO;
1675
1676         return rc;
1677 }
1678
1679 static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
1680                 struct device *dev)
1681 {
1682         struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
1683         struct nd_blk_region *ndbr = to_nd_blk_region(dev);
1684         struct nfit_blk_mmio *mmio;
1685         struct nfit_blk *nfit_blk;
1686         struct nfit_mem *nfit_mem;
1687         struct nvdimm *nvdimm;
1688         int rc;
1689
1690         nvdimm = nd_blk_region_to_dimm(ndbr);
1691         nfit_mem = nvdimm_provider_data(nvdimm);
1692         if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) {
1693                 dev_dbg(dev, "%s: missing%s%s%s\n", __func__,
1694                                 nfit_mem ? "" : " nfit_mem",
1695                                 (nfit_mem && nfit_mem->dcr) ? "" : " dcr",
1696                                 (nfit_mem && nfit_mem->bdw) ? "" : " bdw");
1697                 return -ENXIO;
1698         }
1699
1700         nfit_blk = devm_kzalloc(dev, sizeof(*nfit_blk), GFP_KERNEL);
1701         if (!nfit_blk)
1702                 return -ENOMEM;
1703         nd_blk_region_set_provider_data(ndbr, nfit_blk);
1704         nfit_blk->nd_region = to_nd_region(dev);
1705
1706         /* map block aperture memory */
1707         nfit_blk->bdw_offset = nfit_mem->bdw->offset;
1708         mmio = &nfit_blk->mmio[BDW];
1709         mmio->addr.base = devm_nvdimm_memremap(dev, nfit_mem->spa_bdw->address,
1710                         nfit_mem->spa_bdw->length, ARCH_MEMREMAP_PMEM);
1711         if (!mmio->addr.base) {
1712                 dev_dbg(dev, "%s: %s failed to map bdw\n", __func__,
1713                                 nvdimm_name(nvdimm));
1714                 return -ENOMEM;
1715         }
1716         mmio->size = nfit_mem->bdw->size;
1717         mmio->base_offset = nfit_mem->memdev_bdw->region_offset;
1718         mmio->idt = nfit_mem->idt_bdw;
1719         mmio->spa = nfit_mem->spa_bdw;
1720         rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_bdw,
1721                         nfit_mem->memdev_bdw->interleave_ways);
1722         if (rc) {
1723                 dev_dbg(dev, "%s: %s failed to init bdw interleave\n",
1724                                 __func__, nvdimm_name(nvdimm));
1725                 return rc;
1726         }
1727
1728         /* map block control memory */
1729         nfit_blk->cmd_offset = nfit_mem->dcr->command_offset;
1730         nfit_blk->stat_offset = nfit_mem->dcr->status_offset;
1731         mmio = &nfit_blk->mmio[DCR];
1732         mmio->addr.base = devm_nvdimm_ioremap(dev, nfit_mem->spa_dcr->address,
1733                         nfit_mem->spa_dcr->length);
1734         if (!mmio->addr.base) {
1735                 dev_dbg(dev, "%s: %s failed to map dcr\n", __func__,
1736                                 nvdimm_name(nvdimm));
1737                 return -ENOMEM;
1738         }
1739         mmio->size = nfit_mem->dcr->window_size;
1740         mmio->base_offset = nfit_mem->memdev_dcr->region_offset;
1741         mmio->idt = nfit_mem->idt_dcr;
1742         mmio->spa = nfit_mem->spa_dcr;
1743         rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_dcr,
1744                         nfit_mem->memdev_dcr->interleave_ways);
1745         if (rc) {
1746                 dev_dbg(dev, "%s: %s failed to init dcr interleave\n",
1747                                 __func__, nvdimm_name(nvdimm));
1748                 return rc;
1749         }
1750
1751         rc = acpi_nfit_blk_get_flags(nd_desc, nvdimm, nfit_blk);
1752         if (rc < 0) {
1753                 dev_dbg(dev, "%s: %s failed get DIMM flags\n",
1754                                 __func__, nvdimm_name(nvdimm));
1755                 return rc;
1756         }
1757
1758         if (nvdimm_has_flush(nfit_blk->nd_region) < 0)
1759                 dev_warn(dev, "unable to guarantee persistence of writes\n");
1760
1761         if (mmio->line_size == 0)
1762                 return 0;
1763
1764         if ((u32) nfit_blk->cmd_offset % mmio->line_size
1765                         + 8 > mmio->line_size) {
1766                 dev_dbg(dev, "cmd_offset crosses interleave boundary\n");
1767                 return -ENXIO;
1768         } else if ((u32) nfit_blk->stat_offset % mmio->line_size
1769                         + 8 > mmio->line_size) {
1770                 dev_dbg(dev, "stat_offset crosses interleave boundary\n");
1771                 return -ENXIO;
1772         }
1773
1774         return 0;
1775 }
1776
1777 static int ars_get_cap(struct acpi_nfit_desc *acpi_desc,
1778                 struct nd_cmd_ars_cap *cmd, struct nfit_spa *nfit_spa)
1779 {
1780         struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
1781         struct acpi_nfit_system_address *spa = nfit_spa->spa;
1782         int cmd_rc, rc;
1783
1784         cmd->address = spa->address;
1785         cmd->length = spa->length;
1786         rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, cmd,
1787                         sizeof(*cmd), &cmd_rc);
1788         if (rc < 0)
1789                 return rc;
1790         return cmd_rc;
1791 }
1792
1793 static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa)
1794 {
1795         int rc;
1796         int cmd_rc;
1797         struct nd_cmd_ars_start ars_start;
1798         struct acpi_nfit_system_address *spa = nfit_spa->spa;
1799         struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
1800
1801         memset(&ars_start, 0, sizeof(ars_start));
1802         ars_start.address = spa->address;
1803         ars_start.length = spa->length;
1804         if (nfit_spa_type(spa) == NFIT_SPA_PM)
1805                 ars_start.type = ND_ARS_PERSISTENT;
1806         else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE)
1807                 ars_start.type = ND_ARS_VOLATILE;
1808         else
1809                 return -ENOTTY;
1810
1811         rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start,
1812                         sizeof(ars_start), &cmd_rc);
1813
1814         if (rc < 0)
1815                 return rc;
1816         return cmd_rc;
1817 }
1818
1819 static int ars_continue(struct acpi_nfit_desc *acpi_desc)
1820 {
1821         int rc, cmd_rc;
1822         struct nd_cmd_ars_start ars_start;
1823         struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
1824         struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
1825
1826         memset(&ars_start, 0, sizeof(ars_start));
1827         ars_start.address = ars_status->restart_address;
1828         ars_start.length = ars_status->restart_length;
1829         ars_start.type = ars_status->type;
1830         rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start,
1831                         sizeof(ars_start), &cmd_rc);
1832         if (rc < 0)
1833                 return rc;
1834         return cmd_rc;
1835 }
1836
1837 static int ars_get_status(struct acpi_nfit_desc *acpi_desc)
1838 {
1839         struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
1840         struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
1841         int rc, cmd_rc;
1842
1843         rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, ars_status,
1844                         acpi_desc->ars_status_size, &cmd_rc);
1845         if (rc < 0)
1846                 return rc;
1847         return cmd_rc;
1848 }
1849
1850 static int ars_status_process_records(struct nvdimm_bus *nvdimm_bus,
1851                 struct nd_cmd_ars_status *ars_status)
1852 {
1853         int rc;
1854         u32 i;
1855
1856         for (i = 0; i < ars_status->num_records; i++) {
1857                 rc = nvdimm_bus_add_poison(nvdimm_bus,
1858                                 ars_status->records[i].err_address,
1859                                 ars_status->records[i].length);
1860                 if (rc)
1861                         return rc;
1862         }
1863
1864         return 0;
1865 }
1866
1867 static void acpi_nfit_remove_resource(void *data)
1868 {
1869         struct resource *res = data;
1870
1871         remove_resource(res);
1872 }
1873
1874 static int acpi_nfit_insert_resource(struct acpi_nfit_desc *acpi_desc,
1875                 struct nd_region_desc *ndr_desc)
1876 {
1877         struct resource *res, *nd_res = ndr_desc->res;
1878         int is_pmem, ret;
1879
1880         /* No operation if the region is already registered as PMEM */
1881         is_pmem = region_intersects(nd_res->start, resource_size(nd_res),
1882                                 IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY);
1883         if (is_pmem == REGION_INTERSECTS)
1884                 return 0;
1885
1886         res = devm_kzalloc(acpi_desc->dev, sizeof(*res), GFP_KERNEL);
1887         if (!res)
1888                 return -ENOMEM;
1889
1890         res->name = "Persistent Memory";
1891         res->start = nd_res->start;
1892         res->end = nd_res->end;
1893         res->flags = IORESOURCE_MEM;
1894         res->desc = IORES_DESC_PERSISTENT_MEMORY;
1895
1896         ret = insert_resource(&iomem_resource, res);
1897         if (ret)
1898                 return ret;
1899
1900         ret = devm_add_action_or_reset(acpi_desc->dev,
1901                                         acpi_nfit_remove_resource,
1902                                         res);
1903         if (ret)
1904                 return ret;
1905
1906         return 0;
1907 }
1908
1909 static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
1910                 struct nd_mapping *nd_mapping, struct nd_region_desc *ndr_desc,
1911                 struct acpi_nfit_memory_map *memdev,
1912                 struct nfit_spa *nfit_spa)
1913 {
1914         struct nvdimm *nvdimm = acpi_nfit_dimm_by_handle(acpi_desc,
1915                         memdev->device_handle);
1916         struct acpi_nfit_system_address *spa = nfit_spa->spa;
1917         struct nd_blk_region_desc *ndbr_desc;
1918         struct nfit_mem *nfit_mem;
1919         int blk_valid = 0;
1920
1921         if (!nvdimm) {
1922                 dev_err(acpi_desc->dev, "spa%d dimm: %#x not found\n",
1923                                 spa->range_index, memdev->device_handle);
1924                 return -ENODEV;
1925         }
1926
1927         nd_mapping->nvdimm = nvdimm;
1928         switch (nfit_spa_type(spa)) {
1929         case NFIT_SPA_PM:
1930         case NFIT_SPA_VOLATILE:
1931                 nd_mapping->start = memdev->address;
1932                 nd_mapping->size = memdev->region_size;
1933                 break;
1934         case NFIT_SPA_DCR:
1935                 nfit_mem = nvdimm_provider_data(nvdimm);
1936                 if (!nfit_mem || !nfit_mem->bdw) {
1937                         dev_dbg(acpi_desc->dev, "spa%d %s missing bdw\n",
1938                                         spa->range_index, nvdimm_name(nvdimm));
1939                 } else {
1940                         nd_mapping->size = nfit_mem->bdw->capacity;
1941                         nd_mapping->start = nfit_mem->bdw->start_address;
1942                         ndr_desc->num_lanes = nfit_mem->bdw->windows;
1943                         blk_valid = 1;
1944                 }
1945
1946                 ndr_desc->nd_mapping = nd_mapping;
1947                 ndr_desc->num_mappings = blk_valid;
1948                 ndbr_desc = to_blk_region_desc(ndr_desc);
1949                 ndbr_desc->enable = acpi_nfit_blk_region_enable;
1950                 ndbr_desc->do_io = acpi_desc->blk_do_io;
1951                 nfit_spa->nd_region = nvdimm_blk_region_create(acpi_desc->nvdimm_bus,
1952                                 ndr_desc);
1953                 if (!nfit_spa->nd_region)
1954                         return -ENOMEM;
1955                 break;
1956         }
1957
1958         return 0;
1959 }
1960
1961 static bool nfit_spa_is_virtual(struct acpi_nfit_system_address *spa)
1962 {
1963         return (nfit_spa_type(spa) == NFIT_SPA_VDISK ||
1964                 nfit_spa_type(spa) == NFIT_SPA_VCD   ||
1965                 nfit_spa_type(spa) == NFIT_SPA_PDISK ||
1966                 nfit_spa_type(spa) == NFIT_SPA_PCD);
1967 }
1968
1969 static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
1970                 struct nfit_spa *nfit_spa)
1971 {
1972         static struct nd_mapping nd_mappings[ND_MAX_MAPPINGS];
1973         struct acpi_nfit_system_address *spa = nfit_spa->spa;
1974         struct nd_blk_region_desc ndbr_desc;
1975         struct nd_region_desc *ndr_desc;
1976         struct nfit_memdev *nfit_memdev;
1977         struct nvdimm_bus *nvdimm_bus;
1978         struct resource res;
1979         int count = 0, rc;
1980
1981         if (nfit_spa->nd_region)
1982                 return 0;
1983
1984         if (spa->range_index == 0 && !nfit_spa_is_virtual(spa)) {
1985                 dev_dbg(acpi_desc->dev, "%s: detected invalid spa index\n",
1986                                 __func__);
1987                 return 0;
1988         }
1989
1990         memset(&res, 0, sizeof(res));
1991         memset(&nd_mappings, 0, sizeof(nd_mappings));
1992         memset(&ndbr_desc, 0, sizeof(ndbr_desc));
1993         res.start = spa->address;
1994         res.end = res.start + spa->length - 1;
1995         ndr_desc = &ndbr_desc.ndr_desc;
1996         ndr_desc->res = &res;
1997         ndr_desc->provider_data = nfit_spa;
1998         ndr_desc->attr_groups = acpi_nfit_region_attribute_groups;
1999         if (spa->flags & ACPI_NFIT_PROXIMITY_VALID)
2000                 ndr_desc->numa_node = acpi_map_pxm_to_online_node(
2001                                                 spa->proximity_domain);
2002         else
2003                 ndr_desc->numa_node = NUMA_NO_NODE;
2004
2005         list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
2006                 struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev;
2007                 struct nd_mapping *nd_mapping;
2008
2009                 if (memdev->range_index != spa->range_index)
2010                         continue;
2011                 if (count >= ND_MAX_MAPPINGS) {
2012                         dev_err(acpi_desc->dev, "spa%d exceeds max mappings %d\n",
2013                                         spa->range_index, ND_MAX_MAPPINGS);
2014                         return -ENXIO;
2015                 }
2016                 nd_mapping = &nd_mappings[count++];
2017                 rc = acpi_nfit_init_mapping(acpi_desc, nd_mapping, ndr_desc,
2018                                 memdev, nfit_spa);
2019                 if (rc)
2020                         goto out;
2021         }
2022
2023         ndr_desc->nd_mapping = nd_mappings;
2024         ndr_desc->num_mappings = count;
2025         rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa);
2026         if (rc)
2027                 goto out;
2028
2029         nvdimm_bus = acpi_desc->nvdimm_bus;
2030         if (nfit_spa_type(spa) == NFIT_SPA_PM) {
2031                 rc = acpi_nfit_insert_resource(acpi_desc, ndr_desc);
2032                 if (rc) {
2033                         dev_warn(acpi_desc->dev,
2034                                 "failed to insert pmem resource to iomem: %d\n",
2035                                 rc);
2036                         goto out;
2037                 }
2038
2039                 nfit_spa->nd_region = nvdimm_pmem_region_create(nvdimm_bus,
2040                                 ndr_desc);
2041                 if (!nfit_spa->nd_region)
2042                         rc = -ENOMEM;
2043         } else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) {
2044                 nfit_spa->nd_region = nvdimm_volatile_region_create(nvdimm_bus,
2045                                 ndr_desc);
2046                 if (!nfit_spa->nd_region)
2047                         rc = -ENOMEM;
2048         } else if (nfit_spa_is_virtual(spa)) {
2049                 nfit_spa->nd_region = nvdimm_pmem_region_create(nvdimm_bus,
2050                                 ndr_desc);
2051                 if (!nfit_spa->nd_region)
2052                         rc = -ENOMEM;
2053         }
2054
2055  out:
2056         if (rc)
2057                 dev_err(acpi_desc->dev, "failed to register spa range %d\n",
2058                                 nfit_spa->spa->range_index);
2059         return rc;
2060 }
2061
2062 static int ars_status_alloc(struct acpi_nfit_desc *acpi_desc,
2063                 u32 max_ars)
2064 {
2065         struct device *dev = acpi_desc->dev;
2066         struct nd_cmd_ars_status *ars_status;
2067
2068         if (acpi_desc->ars_status && acpi_desc->ars_status_size >= max_ars) {
2069                 memset(acpi_desc->ars_status, 0, acpi_desc->ars_status_size);
2070                 return 0;
2071         }
2072
2073         if (acpi_desc->ars_status)
2074                 devm_kfree(dev, acpi_desc->ars_status);
2075         acpi_desc->ars_status = NULL;
2076         ars_status = devm_kzalloc(dev, max_ars, GFP_KERNEL);
2077         if (!ars_status)
2078                 return -ENOMEM;
2079         acpi_desc->ars_status = ars_status;
2080         acpi_desc->ars_status_size = max_ars;
2081         return 0;
2082 }
2083
2084 static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc,
2085                 struct nfit_spa *nfit_spa)
2086 {
2087         struct acpi_nfit_system_address *spa = nfit_spa->spa;
2088         int rc;
2089
2090         if (!nfit_spa->max_ars) {
2091                 struct nd_cmd_ars_cap ars_cap;
2092
2093                 memset(&ars_cap, 0, sizeof(ars_cap));
2094                 rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa);
2095                 if (rc < 0)
2096                         return rc;
2097                 nfit_spa->max_ars = ars_cap.max_ars_out;
2098                 nfit_spa->clear_err_unit = ars_cap.clear_err_unit;
2099                 /* check that the supported scrub types match the spa type */
2100                 if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE &&
2101                                 ((ars_cap.status >> 16) & ND_ARS_VOLATILE) == 0)
2102                         return -ENOTTY;
2103                 else if (nfit_spa_type(spa) == NFIT_SPA_PM &&
2104                                 ((ars_cap.status >> 16) & ND_ARS_PERSISTENT) == 0)
2105                         return -ENOTTY;
2106         }
2107
2108         if (ars_status_alloc(acpi_desc, nfit_spa->max_ars))
2109                 return -ENOMEM;
2110
2111         rc = ars_get_status(acpi_desc);
2112         if (rc < 0 && rc != -ENOSPC)
2113                 return rc;
2114
2115         if (ars_status_process_records(acpi_desc->nvdimm_bus,
2116                                 acpi_desc->ars_status))
2117                 return -ENOMEM;
2118
2119         return 0;
2120 }
2121
2122 static void acpi_nfit_async_scrub(struct acpi_nfit_desc *acpi_desc,
2123                 struct nfit_spa *nfit_spa)
2124 {
2125         struct acpi_nfit_system_address *spa = nfit_spa->spa;
2126         unsigned int overflow_retry = scrub_overflow_abort;
2127         u64 init_ars_start = 0, init_ars_len = 0;
2128         struct device *dev = acpi_desc->dev;
2129         unsigned int tmo = scrub_timeout;
2130         int rc;
2131
2132         if (!nfit_spa->ars_required || !nfit_spa->nd_region)
2133                 return;
2134
2135         rc = ars_start(acpi_desc, nfit_spa);
2136         /*
2137          * If we timed out the initial scan we'll still be busy here,
2138          * and will wait another timeout before giving up permanently.
2139          */
2140         if (rc < 0 && rc != -EBUSY)
2141                 return;
2142
2143         do {
2144                 u64 ars_start, ars_len;
2145
2146                 if (acpi_desc->cancel)
2147                         break;
2148                 rc = acpi_nfit_query_poison(acpi_desc, nfit_spa);
2149                 if (rc == -ENOTTY)
2150                         break;
2151                 if (rc == -EBUSY && !tmo) {
2152                         dev_warn(dev, "range %d ars timeout, aborting\n",
2153                                         spa->range_index);
2154                         break;
2155                 }
2156
2157                 if (rc == -EBUSY) {
2158                         /*
2159                          * Note, entries may be appended to the list
2160                          * while the lock is dropped, but the workqueue
2161                          * being active prevents entries being deleted /
2162                          * freed.
2163                          */
2164                         mutex_unlock(&acpi_desc->init_mutex);
2165                         ssleep(1);
2166                         tmo--;
2167                         mutex_lock(&acpi_desc->init_mutex);
2168                         continue;
2169                 }
2170
2171                 /* we got some results, but there are more pending... */
2172                 if (rc == -ENOSPC && overflow_retry--) {
2173                         if (!init_ars_len) {
2174                                 init_ars_len = acpi_desc->ars_status->length;
2175                                 init_ars_start = acpi_desc->ars_status->address;
2176                         }
2177                         rc = ars_continue(acpi_desc);
2178                 }
2179
2180                 if (rc < 0) {
2181                         dev_warn(dev, "range %d ars continuation failed\n",
2182                                         spa->range_index);
2183                         break;
2184                 }
2185
2186                 if (init_ars_len) {
2187                         ars_start = init_ars_start;
2188                         ars_len = init_ars_len;
2189                 } else {
2190                         ars_start = acpi_desc->ars_status->address;
2191                         ars_len = acpi_desc->ars_status->length;
2192                 }
2193                 dev_dbg(dev, "spa range: %d ars from %#llx + %#llx complete\n",
2194                                 spa->range_index, ars_start, ars_len);
2195                 /* notify the region about new poison entries */
2196                 nvdimm_region_notify(nfit_spa->nd_region,
2197                                 NVDIMM_REVALIDATE_POISON);
2198                 break;
2199         } while (1);
2200 }
2201
2202 static void acpi_nfit_scrub(struct work_struct *work)
2203 {
2204         struct device *dev;
2205         u64 init_scrub_length = 0;
2206         struct nfit_spa *nfit_spa;
2207         u64 init_scrub_address = 0;
2208         bool init_ars_done = false;
2209         struct acpi_nfit_desc *acpi_desc;
2210         unsigned int tmo = scrub_timeout;
2211         unsigned int overflow_retry = scrub_overflow_abort;
2212
2213         acpi_desc = container_of(work, typeof(*acpi_desc), work);
2214         dev = acpi_desc->dev;
2215
2216         /*
2217          * We scrub in 2 phases.  The first phase waits for any platform
2218          * firmware initiated scrubs to complete and then we go search for the
2219          * affected spa regions to mark them scanned.  In the second phase we
2220          * initiate a directed scrub for every range that was not scrubbed in
2221          * phase 1. If we're called for a 'rescan', we harmlessly pass through
2222          * the first phase, but really only care about running phase 2, where
2223          * regions can be notified of new poison.
2224          */
2225
2226         /* process platform firmware initiated scrubs */
2227  retry:
2228         mutex_lock(&acpi_desc->init_mutex);
2229         list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
2230                 struct nd_cmd_ars_status *ars_status;
2231                 struct acpi_nfit_system_address *spa;
2232                 u64 ars_start, ars_len;
2233                 int rc;
2234
2235                 if (acpi_desc->cancel)
2236                         break;
2237
2238                 if (nfit_spa->nd_region)
2239                         continue;
2240
2241                 if (init_ars_done) {
2242                         /*
2243                          * No need to re-query, we're now just
2244                          * reconciling all the ranges covered by the
2245                          * initial scrub
2246                          */
2247                         rc = 0;
2248                 } else
2249                         rc = acpi_nfit_query_poison(acpi_desc, nfit_spa);
2250
2251                 if (rc == -ENOTTY) {
2252                         /* no ars capability, just register spa and move on */
2253                         acpi_nfit_register_region(acpi_desc, nfit_spa);
2254                         continue;
2255                 }
2256
2257                 if (rc == -EBUSY && !tmo) {
2258                         /* fallthrough to directed scrub in phase 2 */
2259                         dev_warn(dev, "timeout awaiting ars results, continuing...\n");
2260                         break;
2261                 } else if (rc == -EBUSY) {
2262                         mutex_unlock(&acpi_desc->init_mutex);
2263                         ssleep(1);
2264                         tmo--;
2265                         goto retry;
2266                 }
2267
2268                 /* we got some results, but there are more pending... */
2269                 if (rc == -ENOSPC && overflow_retry--) {
2270                         ars_status = acpi_desc->ars_status;
2271                         /*
2272                          * Record the original scrub range, so that we
2273                          * can recall all the ranges impacted by the
2274                          * initial scrub.
2275                          */
2276                         if (!init_scrub_length) {
2277                                 init_scrub_length = ars_status->length;
2278                                 init_scrub_address = ars_status->address;
2279                         }
2280                         rc = ars_continue(acpi_desc);
2281                         if (rc == 0) {
2282                                 mutex_unlock(&acpi_desc->init_mutex);
2283                                 goto retry;
2284                         }
2285                 }
2286
2287                 if (rc < 0) {
2288                         /*
2289                          * Initial scrub failed, we'll give it one more
2290                          * try below...
2291                          */
2292                         break;
2293                 }
2294
2295                 /* We got some final results, record completed ranges */
2296                 ars_status = acpi_desc->ars_status;
2297                 if (init_scrub_length) {
2298                         ars_start = init_scrub_address;
2299                         ars_len = ars_start + init_scrub_length;
2300                 } else {
2301                         ars_start = ars_status->address;
2302                         ars_len = ars_status->length;
2303                 }
2304                 spa = nfit_spa->spa;
2305
2306                 if (!init_ars_done) {
2307                         init_ars_done = true;
2308                         dev_dbg(dev, "init scrub %#llx + %#llx complete\n",
2309                                         ars_start, ars_len);
2310                 }
2311                 if (ars_start <= spa->address && ars_start + ars_len
2312                                 >= spa->address + spa->length)
2313                         acpi_nfit_register_region(acpi_desc, nfit_spa);
2314         }
2315
2316         /*
2317          * For all the ranges not covered by an initial scrub we still
2318          * want to see if there are errors, but it's ok to discover them
2319          * asynchronously.
2320          */
2321         list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
2322                 /*
2323                  * Flag all the ranges that still need scrubbing, but
2324                  * register them now to make data available.
2325                  */
2326                 if (!nfit_spa->nd_region) {
2327                         nfit_spa->ars_required = 1;
2328                         acpi_nfit_register_region(acpi_desc, nfit_spa);
2329                 }
2330         }
2331
2332         list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
2333                 acpi_nfit_async_scrub(acpi_desc, nfit_spa);
2334         acpi_desc->scrub_count++;
2335         if (acpi_desc->scrub_count_state)
2336                 sysfs_notify_dirent(acpi_desc->scrub_count_state);
2337         mutex_unlock(&acpi_desc->init_mutex);
2338 }
2339
2340 static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
2341 {
2342         struct nfit_spa *nfit_spa;
2343         int rc;
2344
2345         list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
2346                 if (nfit_spa_type(nfit_spa->spa) == NFIT_SPA_DCR) {
2347                         /* BLK regions don't need to wait for ars results */
2348                         rc = acpi_nfit_register_region(acpi_desc, nfit_spa);
2349                         if (rc)
2350                                 return rc;
2351                 }
2352
2353         queue_work(nfit_wq, &acpi_desc->work);
2354         return 0;
2355 }
2356
2357 static int acpi_nfit_check_deletions(struct acpi_nfit_desc *acpi_desc,
2358                 struct nfit_table_prev *prev)
2359 {
2360         struct device *dev = acpi_desc->dev;
2361
2362         if (!list_empty(&prev->spas) ||
2363                         !list_empty(&prev->memdevs) ||
2364                         !list_empty(&prev->dcrs) ||
2365                         !list_empty(&prev->bdws) ||
2366                         !list_empty(&prev->idts) ||
2367                         !list_empty(&prev->flushes)) {
2368                 dev_err(dev, "new nfit deletes entries (unsupported)\n");
2369                 return -ENXIO;
2370         }
2371         return 0;
2372 }
2373
2374 static int acpi_nfit_desc_init_scrub_attr(struct acpi_nfit_desc *acpi_desc)
2375 {
2376         struct device *dev = acpi_desc->dev;
2377         struct kernfs_node *nfit;
2378         struct device *bus_dev;
2379
2380         if (!ars_supported(acpi_desc->nvdimm_bus))
2381                 return 0;
2382
2383         bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus);
2384         nfit = sysfs_get_dirent(bus_dev->kobj.sd, "nfit");
2385         if (!nfit) {
2386                 dev_err(dev, "sysfs_get_dirent 'nfit' failed\n");
2387                 return -ENODEV;
2388         }
2389         acpi_desc->scrub_count_state = sysfs_get_dirent(nfit, "scrub");
2390         sysfs_put(nfit);
2391         if (!acpi_desc->scrub_count_state) {
2392                 dev_err(dev, "sysfs_get_dirent 'scrub' failed\n");
2393                 return -ENODEV;
2394         }
2395
2396         return 0;
2397 }
2398
2399 static void acpi_nfit_destruct(void *data)
2400 {
2401         struct acpi_nfit_desc *acpi_desc = data;
2402         struct device *bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus);
2403
2404         /*
2405          * Destruct under acpi_desc_lock so that nfit_handle_mce does not
2406          * race teardown
2407          */
2408         mutex_lock(&acpi_desc_lock);
2409         acpi_desc->cancel = 1;
2410         /*
2411          * Bounce the nvdimm bus lock to make sure any in-flight
2412          * acpi_nfit_ars_rescan() submissions have had a chance to
2413          * either submit or see ->cancel set.
2414          */
2415         device_lock(bus_dev);
2416         device_unlock(bus_dev);
2417
2418         flush_workqueue(nfit_wq);
2419         if (acpi_desc->scrub_count_state)
2420                 sysfs_put(acpi_desc->scrub_count_state);
2421         nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
2422         acpi_desc->nvdimm_bus = NULL;
2423         list_del(&acpi_desc->list);
2424         mutex_unlock(&acpi_desc_lock);
2425 }
2426
2427 int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz)
2428 {
2429         struct device *dev = acpi_desc->dev;
2430         struct nfit_table_prev prev;
2431         const void *end;
2432         int rc;
2433
2434         if (!acpi_desc->nvdimm_bus) {
2435                 acpi_nfit_init_dsms(acpi_desc);
2436
2437                 acpi_desc->nvdimm_bus = nvdimm_bus_register(dev,
2438                                 &acpi_desc->nd_desc);
2439                 if (!acpi_desc->nvdimm_bus)
2440                         return -ENOMEM;
2441
2442                 rc = devm_add_action_or_reset(dev, acpi_nfit_destruct,
2443                                 acpi_desc);
2444                 if (rc)
2445                         return rc;
2446
2447                 rc = acpi_nfit_desc_init_scrub_attr(acpi_desc);
2448                 if (rc)
2449                         return rc;
2450
2451                 /* register this acpi_desc for mce notifications */
2452                 mutex_lock(&acpi_desc_lock);
2453                 list_add_tail(&acpi_desc->list, &acpi_descs);
2454                 mutex_unlock(&acpi_desc_lock);
2455         }
2456
2457         mutex_lock(&acpi_desc->init_mutex);
2458
2459         INIT_LIST_HEAD(&prev.spas);
2460         INIT_LIST_HEAD(&prev.memdevs);
2461         INIT_LIST_HEAD(&prev.dcrs);
2462         INIT_LIST_HEAD(&prev.bdws);
2463         INIT_LIST_HEAD(&prev.idts);
2464         INIT_LIST_HEAD(&prev.flushes);
2465
2466         list_cut_position(&prev.spas, &acpi_desc->spas,
2467                                 acpi_desc->spas.prev);
2468         list_cut_position(&prev.memdevs, &acpi_desc->memdevs,
2469                                 acpi_desc->memdevs.prev);
2470         list_cut_position(&prev.dcrs, &acpi_desc->dcrs,
2471                                 acpi_desc->dcrs.prev);
2472         list_cut_position(&prev.bdws, &acpi_desc->bdws,
2473                                 acpi_desc->bdws.prev);
2474         list_cut_position(&prev.idts, &acpi_desc->idts,
2475                                 acpi_desc->idts.prev);
2476         list_cut_position(&prev.flushes, &acpi_desc->flushes,
2477                                 acpi_desc->flushes.prev);
2478
2479         end = data + sz;
2480         while (!IS_ERR_OR_NULL(data))
2481                 data = add_table(acpi_desc, &prev, data, end);
2482
2483         if (IS_ERR(data)) {
2484                 dev_dbg(dev, "%s: nfit table parsing error: %ld\n", __func__,
2485                                 PTR_ERR(data));
2486                 rc = PTR_ERR(data);
2487                 goto out_unlock;
2488         }
2489
2490         rc = acpi_nfit_check_deletions(acpi_desc, &prev);
2491         if (rc)
2492                 goto out_unlock;
2493
2494         rc = nfit_mem_init(acpi_desc);
2495         if (rc)
2496                 goto out_unlock;
2497
2498         rc = acpi_nfit_register_dimms(acpi_desc);
2499         if (rc)
2500                 goto out_unlock;
2501
2502         rc = acpi_nfit_register_regions(acpi_desc);
2503
2504  out_unlock:
2505         mutex_unlock(&acpi_desc->init_mutex);
2506         return rc;
2507 }
2508 EXPORT_SYMBOL_GPL(acpi_nfit_init);
2509
2510 struct acpi_nfit_flush_work {
2511         struct work_struct work;
2512         struct completion cmp;
2513 };
2514
2515 static void flush_probe(struct work_struct *work)
2516 {
2517         struct acpi_nfit_flush_work *flush;
2518
2519         flush = container_of(work, typeof(*flush), work);
2520         complete(&flush->cmp);
2521 }
2522
2523 static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
2524 {
2525         struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
2526         struct device *dev = acpi_desc->dev;
2527         struct acpi_nfit_flush_work flush;
2528
2529         /* bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
2530         device_lock(dev);
2531         device_unlock(dev);
2532
2533         /*
2534          * Scrub work could take 10s of seconds, userspace may give up so we
2535          * need to be interruptible while waiting.
2536          */
2537         INIT_WORK_ONSTACK(&flush.work, flush_probe);
2538         COMPLETION_INITIALIZER_ONSTACK(flush.cmp);
2539         queue_work(nfit_wq, &flush.work);
2540         return wait_for_completion_interruptible(&flush.cmp);
2541 }
2542
2543 static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
2544                 struct nvdimm *nvdimm, unsigned int cmd)
2545 {
2546         struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
2547
2548         if (nvdimm)
2549                 return 0;
2550         if (cmd != ND_CMD_ARS_START)
2551                 return 0;
2552
2553         /*
2554          * The kernel and userspace may race to initiate a scrub, but
2555          * the scrub thread is prepared to lose that initial race.  It
2556          * just needs guarantees that any ars it initiates are not
2557          * interrupted by any intervening start reqeusts from userspace.
2558          */
2559         if (work_busy(&acpi_desc->work))
2560                 return -EBUSY;
2561
2562         return 0;
2563 }
2564
2565 int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc)
2566 {
2567         struct device *dev = acpi_desc->dev;
2568         struct nfit_spa *nfit_spa;
2569
2570         if (work_busy(&acpi_desc->work))
2571                 return -EBUSY;
2572
2573         if (acpi_desc->cancel)
2574                 return 0;
2575
2576         mutex_lock(&acpi_desc->init_mutex);
2577         list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
2578                 struct acpi_nfit_system_address *spa = nfit_spa->spa;
2579
2580                 if (nfit_spa_type(spa) != NFIT_SPA_PM)
2581                         continue;
2582
2583                 nfit_spa->ars_required = 1;
2584         }
2585         queue_work(nfit_wq, &acpi_desc->work);
2586         dev_dbg(dev, "%s: ars_scan triggered\n", __func__);
2587         mutex_unlock(&acpi_desc->init_mutex);
2588
2589         return 0;
2590 }
2591
2592 void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
2593 {
2594         struct nvdimm_bus_descriptor *nd_desc;
2595
2596         dev_set_drvdata(dev, acpi_desc);
2597         acpi_desc->dev = dev;
2598         acpi_desc->blk_do_io = acpi_nfit_blk_region_do_io;
2599         nd_desc = &acpi_desc->nd_desc;
2600         nd_desc->provider_name = "ACPI.NFIT";
2601         nd_desc->module = THIS_MODULE;
2602         nd_desc->ndctl = acpi_nfit_ctl;
2603         nd_desc->flush_probe = acpi_nfit_flush_probe;
2604         nd_desc->clear_to_send = acpi_nfit_clear_to_send;
2605         nd_desc->attr_groups = acpi_nfit_attribute_groups;
2606
2607         INIT_LIST_HEAD(&acpi_desc->spas);
2608         INIT_LIST_HEAD(&acpi_desc->dcrs);
2609         INIT_LIST_HEAD(&acpi_desc->bdws);
2610         INIT_LIST_HEAD(&acpi_desc->idts);
2611         INIT_LIST_HEAD(&acpi_desc->flushes);
2612         INIT_LIST_HEAD(&acpi_desc->memdevs);
2613         INIT_LIST_HEAD(&acpi_desc->dimms);
2614         INIT_LIST_HEAD(&acpi_desc->list);
2615         mutex_init(&acpi_desc->init_mutex);
2616         INIT_WORK(&acpi_desc->work, acpi_nfit_scrub);
2617 }
2618 EXPORT_SYMBOL_GPL(acpi_nfit_desc_init);
2619
2620 static int acpi_nfit_add(struct acpi_device *adev)
2621 {
2622         struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
2623         struct acpi_nfit_desc *acpi_desc;
2624         struct device *dev = &adev->dev;
2625         struct acpi_table_header *tbl;
2626         acpi_status status = AE_OK;
2627         acpi_size sz;
2628         int rc = 0;
2629
2630         status = acpi_get_table_with_size(ACPI_SIG_NFIT, 0, &tbl, &sz);
2631         if (ACPI_FAILURE(status)) {
2632                 /* This is ok, we could have an nvdimm hotplugged later */
2633                 dev_dbg(dev, "failed to find NFIT at startup\n");
2634                 return 0;
2635         }
2636
2637         acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
2638         if (!acpi_desc)
2639                 return -ENOMEM;
2640         acpi_nfit_desc_init(acpi_desc, &adev->dev);
2641
2642         /* Save the acpi header for exporting the revision via sysfs */
2643         acpi_desc->acpi_header = *tbl;
2644
2645         /* Evaluate _FIT and override with that if present */
2646         status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf);
2647         if (ACPI_SUCCESS(status) && buf.length > 0) {
2648                 union acpi_object *obj = buf.pointer;
2649
2650                 if (obj->type == ACPI_TYPE_BUFFER)
2651                         rc = acpi_nfit_init(acpi_desc, obj->buffer.pointer,
2652                                         obj->buffer.length);
2653                 else
2654                         dev_dbg(dev, "%s invalid type %d, ignoring _FIT\n",
2655                                  __func__, (int) obj->type);
2656                 kfree(buf.pointer);
2657         } else
2658                 /* skip over the lead-in header table */
2659                 rc = acpi_nfit_init(acpi_desc, (void *) tbl
2660                                 + sizeof(struct acpi_table_nfit),
2661                                 sz - sizeof(struct acpi_table_nfit));
2662         return rc;
2663 }
2664
2665 static int acpi_nfit_remove(struct acpi_device *adev)
2666 {
2667         /* see acpi_nfit_destruct */
2668         return 0;
2669 }
2670
2671 static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
2672 {
2673         struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev);
2674         struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
2675         struct device *dev = &adev->dev;
2676         union acpi_object *obj;
2677         acpi_status status;
2678         int ret;
2679
2680         dev_dbg(dev, "%s: event: %d\n", __func__, event);
2681
2682         device_lock(dev);
2683         if (!dev->driver) {
2684                 /* dev->driver may be null if we're being removed */
2685                 dev_dbg(dev, "%s: no driver found for dev\n", __func__);
2686                 goto out_unlock;
2687         }
2688
2689         if (!acpi_desc) {
2690                 acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
2691                 if (!acpi_desc)
2692                         goto out_unlock;
2693                 acpi_nfit_desc_init(acpi_desc, &adev->dev);
2694         } else {
2695                 /*
2696                  * Finish previous registration before considering new
2697                  * regions.
2698                  */
2699                 flush_workqueue(nfit_wq);
2700         }
2701
2702         /* Evaluate _FIT */
2703         status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf);
2704         if (ACPI_FAILURE(status)) {
2705                 dev_err(dev, "failed to evaluate _FIT\n");
2706                 goto out_unlock;
2707         }
2708
2709         obj = buf.pointer;
2710         if (obj->type == ACPI_TYPE_BUFFER) {
2711                 ret = acpi_nfit_init(acpi_desc, obj->buffer.pointer,
2712                                 obj->buffer.length);
2713                 if (ret)
2714                         dev_err(dev, "failed to merge updated NFIT\n");
2715         } else
2716                 dev_err(dev, "Invalid _FIT\n");
2717         kfree(buf.pointer);
2718
2719  out_unlock:
2720         device_unlock(dev);
2721 }
2722
2723 static const struct acpi_device_id acpi_nfit_ids[] = {
2724         { "ACPI0012", 0 },
2725         { "", 0 },
2726 };
2727 MODULE_DEVICE_TABLE(acpi, acpi_nfit_ids);
2728
2729 static struct acpi_driver acpi_nfit_driver = {
2730         .name = KBUILD_MODNAME,
2731         .ids = acpi_nfit_ids,
2732         .ops = {
2733                 .add = acpi_nfit_add,
2734                 .remove = acpi_nfit_remove,
2735                 .notify = acpi_nfit_notify,
2736         },
2737 };
2738
2739 static __init int nfit_init(void)
2740 {
2741         BUILD_BUG_ON(sizeof(struct acpi_table_nfit) != 40);
2742         BUILD_BUG_ON(sizeof(struct acpi_nfit_system_address) != 56);
2743         BUILD_BUG_ON(sizeof(struct acpi_nfit_memory_map) != 48);
2744         BUILD_BUG_ON(sizeof(struct acpi_nfit_interleave) != 20);
2745         BUILD_BUG_ON(sizeof(struct acpi_nfit_smbios) != 9);
2746         BUILD_BUG_ON(sizeof(struct acpi_nfit_control_region) != 80);
2747         BUILD_BUG_ON(sizeof(struct acpi_nfit_data_region) != 40);
2748
2749         acpi_str_to_uuid(UUID_VOLATILE_MEMORY, nfit_uuid[NFIT_SPA_VOLATILE]);
2750         acpi_str_to_uuid(UUID_PERSISTENT_MEMORY, nfit_uuid[NFIT_SPA_PM]);
2751         acpi_str_to_uuid(UUID_CONTROL_REGION, nfit_uuid[NFIT_SPA_DCR]);
2752         acpi_str_to_uuid(UUID_DATA_REGION, nfit_uuid[NFIT_SPA_BDW]);
2753         acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_VDISK]);
2754         acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_CD, nfit_uuid[NFIT_SPA_VCD]);
2755         acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_PDISK]);
2756         acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_CD, nfit_uuid[NFIT_SPA_PCD]);
2757         acpi_str_to_uuid(UUID_NFIT_BUS, nfit_uuid[NFIT_DEV_BUS]);
2758         acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]);
2759         acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE1, nfit_uuid[NFIT_DEV_DIMM_N_HPE1]);
2760         acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE2, nfit_uuid[NFIT_DEV_DIMM_N_HPE2]);
2761         acpi_str_to_uuid(UUID_NFIT_DIMM_N_MSFT, nfit_uuid[NFIT_DEV_DIMM_N_MSFT]);
2762
2763         nfit_wq = create_singlethread_workqueue("nfit");
2764         if (!nfit_wq)
2765                 return -ENOMEM;
2766
2767         nfit_mce_register();
2768
2769         return acpi_bus_register_driver(&acpi_nfit_driver);
2770 }
2771
2772 static __exit void nfit_exit(void)
2773 {
2774         nfit_mce_unregister();
2775         acpi_bus_unregister_driver(&acpi_nfit_driver);
2776         destroy_workqueue(nfit_wq);
2777         WARN_ON(!list_empty(&acpi_descs));
2778 }
2779
2780 module_init(nfit_init);
2781 module_exit(nfit_exit);
2782 MODULE_LICENSE("GPL v2");
2783 MODULE_AUTHOR("Intel Corporation");