Merge tag 'driver-core-4.9-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git...
[cascardo/linux.git] / drivers / nvdimm / namespace_devs.c
index fbcadc7..abe5c6b 100644 (file)
@@ -12,6 +12,7 @@
  */
 #include <linux/module.h>
 #include <linux/device.h>
+#include <linux/sort.h>
 #include <linux/slab.h>
 #include <linux/pmem.h>
 #include <linux/list.h>
@@ -29,7 +30,10 @@ static void namespace_io_release(struct device *dev)
 static void namespace_pmem_release(struct device *dev)
 {
        struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+       struct nd_region *nd_region = to_nd_region(dev->parent);
 
+       if (nspm->id >= 0)
+               ida_simple_remove(&nd_region->ns_ida, nspm->id);
        kfree(nspm->alt_name);
        kfree(nspm->uuid);
        kfree(nspm);
@@ -63,17 +67,17 @@ static struct device_type namespace_blk_device_type = {
        .release = namespace_blk_release,
 };
 
-static bool is_namespace_pmem(struct device *dev)
+static bool is_namespace_pmem(const struct device *dev)
 {
        return dev ? dev->type == &namespace_pmem_device_type : false;
 }
 
-static bool is_namespace_blk(struct device *dev)
+static bool is_namespace_blk(const struct device *dev)
 {
        return dev ? dev->type == &namespace_blk_device_type : false;
 }
 
-static bool is_namespace_io(struct device *dev)
+static bool is_namespace_io(const struct device *dev)
 {
        return dev ? dev->type == &namespace_io_device_type : false;
 }
@@ -169,7 +173,21 @@ const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
                suffix = "s";
 
        if (is_namespace_pmem(&ndns->dev) || is_namespace_io(&ndns->dev)) {
-               sprintf(name, "pmem%d%s", nd_region->id, suffix ? suffix : "");
+               int nsidx = 0;
+
+               if (is_namespace_pmem(&ndns->dev)) {
+                       struct nd_namespace_pmem *nspm;
+
+                       nspm = to_nd_namespace_pmem(&ndns->dev);
+                       nsidx = nspm->id;
+               }
+
+               if (nsidx)
+                       sprintf(name, "pmem%d.%d%s", nd_region->id, nsidx,
+                                       suffix ? suffix : "");
+               else
+                       sprintf(name, "pmem%d%s", nd_region->id,
+                                       suffix ? suffix : "");
        } else if (is_namespace_blk(&ndns->dev)) {
                struct nd_namespace_blk *nsblk;
 
@@ -511,19 +529,68 @@ static resource_size_t init_dpa_allocation(struct nd_label_id *label_id,
        return rc ? n : 0;
 }
 
-static bool space_valid(bool is_pmem, bool is_reserve,
-               struct nd_label_id *label_id, struct resource *res)
+
+/**
+ * space_valid() - validate free dpa space against constraints
+ * @nd_region: hosting region of the free space
+ * @ndd: dimm device data for debug
+ * @label_id: namespace id to allocate space
+ * @prev: potential allocation that precedes free space
+ * @next: allocation that follows the given free space range
+ * @exist: first allocation with same id in the mapping
+ * @n: range that must satisfied for pmem allocations
+ * @valid: free space range to validate
+ *
+ * BLK-space is valid as long as it does not precede a PMEM
+ * allocation in a given region. PMEM-space must be contiguous
+ * and adjacent to an existing existing allocation (if one
+ * exists).  If reserving PMEM any space is valid.
+ */
+static void space_valid(struct nd_region *nd_region, struct nvdimm_drvdata *ndd,
+               struct nd_label_id *label_id, struct resource *prev,
+               struct resource *next, struct resource *exist,
+               resource_size_t n, struct resource *valid)
 {
-       /*
-        * For BLK-space any space is valid, for PMEM-space, it must be
-        * contiguous with an existing allocation unless we are
-        * reserving pmem.
-        */
-       if (is_reserve || !is_pmem)
-               return true;
-       if (!res || strcmp(res->name, label_id->id) == 0)
-               return true;
-       return false;
+       bool is_reserve = strcmp(label_id->id, "pmem-reserve") == 0;
+       bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0;
+
+       if (valid->start >= valid->end)
+               goto invalid;
+
+       if (is_reserve)
+               return;
+
+       if (!is_pmem) {
+               struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+               struct nvdimm_bus *nvdimm_bus;
+               struct blk_alloc_info info = {
+                       .nd_mapping = nd_mapping,
+                       .available = nd_mapping->size,
+                       .res = valid,
+               };
+
+               WARN_ON(!is_nd_blk(&nd_region->dev));
+               nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
+               device_for_each_child(&nvdimm_bus->dev, &info, alias_dpa_busy);
+               return;
+       }
+
+       /* allocation needs to be contiguous, so this is all or nothing */
+       if (resource_size(valid) < n)
+               goto invalid;
+
+       /* we've got all the space we need and no existing allocation */
+       if (!exist)
+               return;
+
+       /* allocation needs to be contiguous with the existing namespace */
+       if (valid->start == exist->end + 1
+                       || valid->end == exist->start - 1)
+               return;
+
+ invalid:
+       /* truncate @valid size to 0 */
+       valid->end = valid->start - 1;
 }
 
 enum alloc_loc {
@@ -535,18 +602,24 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
                resource_size_t n)
 {
        resource_size_t mapping_end = nd_mapping->start + nd_mapping->size - 1;
-       bool is_reserve = strcmp(label_id->id, "pmem-reserve") == 0;
        bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0;
        struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+       struct resource *res, *exist = NULL, valid;
        const resource_size_t to_allocate = n;
-       struct resource *res;
        int first;
 
+       for_each_dpa_resource(ndd, res)
+               if (strcmp(label_id->id, res->name) == 0)
+                       exist = res;
+
+       valid.start = nd_mapping->start;
+       valid.end = mapping_end;
+       valid.name = "free space";
  retry:
        first = 0;
        for_each_dpa_resource(ndd, res) {
-               resource_size_t allocate, available = 0, free_start, free_end;
                struct resource *next = res->sibling, *new_res = NULL;
+               resource_size_t allocate, available = 0;
                enum alloc_loc loc = ALLOC_ERR;
                const char *action;
                int rc = 0;
@@ -559,32 +632,35 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
 
                /* space at the beginning of the mapping */
                if (!first++ && res->start > nd_mapping->start) {
-                       free_start = nd_mapping->start;
-                       available = res->start - free_start;
-                       if (space_valid(is_pmem, is_reserve, label_id, NULL))
+                       valid.start = nd_mapping->start;
+                       valid.end = res->start - 1;
+                       space_valid(nd_region, ndd, label_id, NULL, next, exist,
+                                       to_allocate, &valid);
+                       available = resource_size(&valid);
+                       if (available)
                                loc = ALLOC_BEFORE;
                }
 
                /* space between allocations */
                if (!loc && next) {
-                       free_start = res->start + resource_size(res);
-                       free_end = min(mapping_end, next->start - 1);
-                       if (space_valid(is_pmem, is_reserve, label_id, res)
-                                       && free_start < free_end) {
-                               available = free_end + 1 - free_start;
+                       valid.start = res->start + resource_size(res);
+                       valid.end = min(mapping_end, next->start - 1);
+                       space_valid(nd_region, ndd, label_id, res, next, exist,
+                                       to_allocate, &valid);
+                       available = resource_size(&valid);
+                       if (available)
                                loc = ALLOC_MID;
-                       }
                }
 
                /* space at the end of the mapping */
                if (!loc && !next) {
-                       free_start = res->start + resource_size(res);
-                       free_end = mapping_end;
-                       if (space_valid(is_pmem, is_reserve, label_id, res)
-                                       && free_start < free_end) {
-                               available = free_end + 1 - free_start;
+                       valid.start = res->start + resource_size(res);
+                       valid.end = mapping_end;
+                       space_valid(nd_region, ndd, label_id, res, next, exist,
+                                       to_allocate, &valid);
+                       available = resource_size(&valid);
+                       if (available)
                                loc = ALLOC_AFTER;
-                       }
                }
 
                if (!loc || !available)
@@ -594,8 +670,6 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
                case ALLOC_BEFORE:
                        if (strcmp(res->name, label_id->id) == 0) {
                                /* adjust current resource up */
-                               if (is_pmem && !is_reserve)
-                                       return n;
                                rc = adjust_resource(res, res->start - allocate,
                                                resource_size(res) + allocate);
                                action = "cur grow up";
@@ -605,8 +679,6 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
                case ALLOC_MID:
                        if (strcmp(next->name, label_id->id) == 0) {
                                /* adjust next resource up */
-                               if (is_pmem && !is_reserve)
-                                       return n;
                                rc = adjust_resource(next, next->start
                                                - allocate, resource_size(next)
                                                + allocate);
@@ -630,12 +702,10 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
                if (strcmp(action, "allocate") == 0) {
                        /* BLK allocate bottom up */
                        if (!is_pmem)
-                               free_start += available - allocate;
-                       else if (!is_reserve && free_start != nd_mapping->start)
-                               return n;
+                               valid.start += available - allocate;
 
                        new_res = nvdimm_allocate_dpa(ndd, label_id,
-                                       free_start, allocate);
+                                       valid.start, allocate);
                        if (!new_res)
                                rc = -EBUSY;
                } else if (strcmp(action, "grow down") == 0) {
@@ -833,13 +903,45 @@ static int grow_dpa_allocation(struct nd_region *nd_region,
        return 0;
 }
 
-static void nd_namespace_pmem_set_size(struct nd_region *nd_region,
+static void nd_namespace_pmem_set_resource(struct nd_region *nd_region,
                struct nd_namespace_pmem *nspm, resource_size_t size)
 {
        struct resource *res = &nspm->nsio.res;
+       resource_size_t offset = 0;
 
-       res->start = nd_region->ndr_start;
-       res->end = nd_region->ndr_start + size - 1;
+       if (size && !nspm->uuid) {
+               WARN_ON_ONCE(1);
+               size = 0;
+       }
+
+       if (size && nspm->uuid) {
+               struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+               struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+               struct nd_label_id label_id;
+               struct resource *res;
+
+               if (!ndd) {
+                       size = 0;
+                       goto out;
+               }
+
+               nd_label_gen_id(&label_id, nspm->uuid, 0);
+
+               /* calculate a spa offset from the dpa allocation offset */
+               for_each_dpa_resource(ndd, res)
+                       if (strcmp(res->name, label_id.id) == 0) {
+                               offset = (res->start - nd_mapping->start)
+                                       * nd_region->ndr_mappings;
+                               goto out;
+                       }
+
+               WARN_ON_ONCE(1);
+               size = 0;
+       }
+
+ out:
+       res->start = nd_region->ndr_start + offset;
+       res->end = res->start + size - 1;
 }
 
 static bool uuid_not_set(const u8 *uuid, struct device *dev, const char *where)
@@ -930,7 +1032,7 @@ static ssize_t __size_store(struct device *dev, unsigned long long val)
        if (is_namespace_pmem(dev)) {
                struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
 
-               nd_namespace_pmem_set_size(nd_region, nspm,
+               nd_namespace_pmem_set_resource(nd_region, nspm,
                                val * nd_region->ndr_mappings);
        } else if (is_namespace_blk(dev)) {
                struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
@@ -1546,6 +1648,7 @@ static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
 
        for (i = 0; i < nd_region->ndr_mappings; i++) {
                struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+               struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
                struct nd_namespace_label *nd_label = NULL;
                u64 hw_start, hw_end, pmem_start, pmem_end;
                struct nd_label_ent *label_ent;
@@ -1573,10 +1676,14 @@ static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
                hw_end = hw_start + nd_mapping->size;
                pmem_start = __le64_to_cpu(nd_label->dpa);
                pmem_end = pmem_start + __le64_to_cpu(nd_label->rawsize);
-               if (pmem_start == hw_start && pmem_end <= hw_end)
+               if (pmem_start >= hw_start && pmem_start < hw_end
+                               && pmem_end <= hw_end && pmem_end > hw_start)
                        /* pass */;
-               else
+               else {
+                       dev_dbg(&nd_region->dev, "%s invalid label for %pUb\n",
+                                       dev_name(ndd->dev), nd_label->uuid);
                        return -EINVAL;
+               }
 
                /* move recently validated label to the front of the list */
                list_move(&label_ent->list, &nd_mapping->labels);
@@ -1618,6 +1725,7 @@ struct device *create_namespace_pmem(struct nd_region *nd_region,
        if (!nspm)
                return ERR_PTR(-ENOMEM);
 
+       nspm->id = -1;
        dev = &nspm->nsio.common.dev;
        dev->type = &namespace_pmem_device_type;
        dev->parent = &nd_region->dev;
@@ -1629,11 +1737,15 @@ struct device *create_namespace_pmem(struct nd_region *nd_region,
                if (!has_uuid_at_pos(nd_region, nd_label->uuid, cookie, i))
                        break;
        if (i < nd_region->ndr_mappings) {
+               struct nvdimm_drvdata *ndd = to_ndd(&nd_region->mapping[i]);
+
                /*
                 * Give up if we don't find an instance of a uuid at each
                 * position (from 0 to nd_region->ndr_mappings - 1), or if we
                 * find a dimm with two instances of the same uuid.
                 */
+               dev_err(&nd_region->dev, "%s missing label for %pUb\n",
+                               dev_name(ndd->dev), nd_label->uuid);
                rc = -EINVAL;
                goto err;
        }
@@ -1679,7 +1791,7 @@ struct device *create_namespace_pmem(struct nd_region *nd_region,
                goto err;
        }
 
-       nd_namespace_pmem_set_size(nd_region, nspm, size);
+       nd_namespace_pmem_set_resource(nd_region, nspm, size);
 
        return dev;
  err:
@@ -1748,16 +1860,58 @@ static struct device *nd_namespace_blk_create(struct nd_region *nd_region)
        return &nsblk->common.dev;
 }
 
-void nd_region_create_blk_seed(struct nd_region *nd_region)
+static struct device *nd_namespace_pmem_create(struct nd_region *nd_region)
+{
+       struct nd_namespace_pmem *nspm;
+       struct resource *res;
+       struct device *dev;
+
+       if (!is_nd_pmem(&nd_region->dev))
+               return NULL;
+
+       nspm = kzalloc(sizeof(*nspm), GFP_KERNEL);
+       if (!nspm)
+               return NULL;
+
+       dev = &nspm->nsio.common.dev;
+       dev->type = &namespace_pmem_device_type;
+       dev->parent = &nd_region->dev;
+       res = &nspm->nsio.res;
+       res->name = dev_name(&nd_region->dev);
+       res->flags = IORESOURCE_MEM;
+
+       nspm->id = ida_simple_get(&nd_region->ns_ida, 0, 0, GFP_KERNEL);
+       if (nspm->id < 0) {
+               kfree(nspm);
+               return NULL;
+       }
+       dev_set_name(dev, "namespace%d.%d", nd_region->id, nspm->id);
+       dev->parent = &nd_region->dev;
+       dev->groups = nd_namespace_attribute_groups;
+       nd_namespace_pmem_set_resource(nd_region, nspm, 0);
+
+       return dev;
+}
+
+void nd_region_create_ns_seed(struct nd_region *nd_region)
 {
        WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));
-       nd_region->ns_seed = nd_namespace_blk_create(nd_region);
+
+       if (nd_region_to_nstype(nd_region) == ND_DEVICE_NAMESPACE_IO)
+               return;
+
+       if (is_nd_blk(&nd_region->dev))
+               nd_region->ns_seed = nd_namespace_blk_create(nd_region);
+       else
+               nd_region->ns_seed = nd_namespace_pmem_create(nd_region);
+
        /*
         * Seed creation failures are not fatal, provisioning is simply
         * disabled until memory becomes available
         */
        if (!nd_region->ns_seed)
-               dev_err(&nd_region->dev, "failed to create blk namespace\n");
+               dev_err(&nd_region->dev, "failed to create %s namespace\n",
+                               is_nd_blk(&nd_region->dev) ? "blk" : "pmem");
        else
                nd_device_register(nd_region->ns_seed);
 }
@@ -1874,12 +2028,38 @@ struct device *create_namespace_blk(struct nd_region *nd_region,
        return ERR_PTR(-ENXIO);
 }
 
+static int cmp_dpa(const void *a, const void *b)
+{
+       const struct device *dev_a = *(const struct device **) a;
+       const struct device *dev_b = *(const struct device **) b;
+       struct nd_namespace_blk *nsblk_a, *nsblk_b;
+       struct nd_namespace_pmem *nspm_a, *nspm_b;
+
+       if (is_namespace_io(dev_a))
+               return 0;
+
+       if (is_namespace_blk(dev_a)) {
+               nsblk_a = to_nd_namespace_blk(dev_a);
+               nsblk_b = to_nd_namespace_blk(dev_b);
+
+               return memcmp(&nsblk_a->res[0]->start, &nsblk_b->res[0]->start,
+                               sizeof(resource_size_t));
+       }
+
+       nspm_a = to_nd_namespace_pmem(dev_a);
+       nspm_b = to_nd_namespace_pmem(dev_b);
+
+       return memcmp(&nspm_a->nsio.res.start, &nspm_b->nsio.res.start,
+                       sizeof(resource_size_t));
+}
+
 static struct device **scan_labels(struct nd_region *nd_region)
 {
-       struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+       int i, count = 0;
        struct device *dev, **devs = NULL;
        struct nd_label_ent *label_ent, *e;
-       int i, count = 0;
+       struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+       resource_size_t map_end = nd_mapping->start + nd_mapping->size - 1;
 
        /* "safe" because create_namespace_pmem() might list_move() label_ent */
        list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) {
@@ -1896,6 +2076,10 @@ static struct device **scan_labels(struct nd_region *nd_region)
                else
                        continue;
 
+               /* skip labels that describe extents outside of the region */
+               if (nd_label->dpa < nd_mapping->start || nd_label->dpa > map_end)
+                       continue;
+
                i = add_namespace_resource(nd_region, nd_label, devs, count);
                if (i < 0)
                        goto err;
@@ -1928,9 +2112,6 @@ static struct device **scan_labels(struct nd_region *nd_region)
                                }
                        } else
                                devs[count++] = dev;
-
-                       /* we only expect one valid pmem label set per region */
-                       break;
                }
        }
 
@@ -1961,35 +2142,48 @@ static struct device **scan_labels(struct nd_region *nd_region)
                                goto err;
                        dev = &nspm->nsio.common.dev;
                        dev->type = &namespace_pmem_device_type;
-                       nd_namespace_pmem_set_size(nd_region, nspm, 0);
+                       nd_namespace_pmem_set_resource(nd_region, nspm, 0);
                }
                dev->parent = &nd_region->dev;
                devs[count++] = dev;
        } else if (is_nd_pmem(&nd_region->dev)) {
                /* clean unselected labels */
                for (i = 0; i < nd_region->ndr_mappings; i++) {
+                       struct list_head *l, *e;
+                       LIST_HEAD(list);
+                       int j;
+
                        nd_mapping = &nd_region->mapping[i];
                        if (list_empty(&nd_mapping->labels)) {
                                WARN_ON(1);
                                continue;
                        }
-                       label_ent = list_first_entry(&nd_mapping->labels,
-                                       typeof(*label_ent), list);
-                       list_del(&label_ent->list);
+
+                       j = count;
+                       list_for_each_safe(l, e, &nd_mapping->labels) {
+                               if (!j--)
+                                       break;
+                               list_move_tail(l, &list);
+                       }
                        nd_mapping_free_labels(nd_mapping);
-                       list_add(&label_ent->list, &nd_mapping->labels);
+                       list_splice_init(&list, &nd_mapping->labels);
                }
        }
 
+       if (count > 1)
+               sort(devs, count, sizeof(struct device *), cmp_dpa, NULL);
+
        return devs;
 
  err:
-       for (i = 0; devs[i]; i++)
-               if (is_nd_blk(&nd_region->dev))
-                       namespace_blk_release(devs[i]);
-               else
-                       namespace_pmem_release(devs[i]);
-       kfree(devs);
+       if (devs) {
+               for (i = 0; devs[i]; i++)
+                       if (is_nd_blk(&nd_region->dev))
+                               namespace_blk_release(devs[i]);
+                       else
+                               namespace_pmem_release(devs[i]);
+               kfree(devs);
+       }
        return NULL;
 }
 
@@ -2117,6 +2311,13 @@ int nd_region_register_namespaces(struct nd_region *nd_region, int *err)
                        id = ida_simple_get(&nd_region->ns_ida, 0, 0,
                                        GFP_KERNEL);
                        nsblk->id = id;
+               } else if (type == ND_DEVICE_NAMESPACE_PMEM) {
+                       struct nd_namespace_pmem *nspm;
+
+                       nspm = to_nd_namespace_pmem(dev);
+                       id = ida_simple_get(&nd_region->ns_ida, 0, 0,
+                                       GFP_KERNEL);
+                       nspm->id = id;
                } else
                        id = i;