iommu/amd: Use trylock to aquire bitmap_lock
[cascardo/linux.git] / drivers / iommu / amd_iommu.c
index 8b2be1e..eed355c 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/msi.h>
 #include <linux/dma-contiguous.h>
 #include <linux/irqdomain.h>
+#include <linux/percpu.h>
 #include <asm/irq_remapping.h>
 #include <asm/io_apic.h>
 #include <asm/apic.h>
@@ -114,6 +115,45 @@ struct kmem_cache *amd_iommu_irq_cache;
 static void update_domain(struct protection_domain *domain);
 static int protection_domain_init(struct protection_domain *domain);
 
+/*
+ * For dynamic growth the aperture size is split into ranges of 128MB of
+ * DMA address space each. This struct represents one such range.
+ */
+struct aperture_range {
+
+       spinlock_t bitmap_lock;
+
+       /* address allocation bitmap */
+       unsigned long *bitmap;
+       unsigned long offset;
+       unsigned long next_bit;
+
+       /*
+        * Array of PTE pages for the aperture. In this array we save all the
+        * leaf pages of the domain page table used for the aperture. This way
+        * we don't need to walk the page table to find a specific PTE. We can
+        * just calculate its address in constant time.
+        */
+       u64 *pte_pages[64];
+};
+
+/*
+ * Data container for a dma_ops specific protection domain
+ */
+struct dma_ops_domain {
+       /* generic protection domain information */
+       struct protection_domain domain;
+
+       /* size of the aperture for the mappings */
+       unsigned long aperture_size;
+
+       /* aperture index we start searching for free addresses */
+       u32 __percpu *next_index;
+
+       /* address space relevant data */
+       struct aperture_range *aperture[APERTURE_MAX_RANGES];
+};
+
 /****************************************************************************
  *
  * Helper functions
@@ -1167,11 +1207,21 @@ static u64 *alloc_pte(struct protection_domain *domain,
        end_lvl = PAGE_SIZE_LEVEL(page_size);
 
        while (level > end_lvl) {
-               if (!IOMMU_PTE_PRESENT(*pte)) {
+               u64 __pte, __npte;
+
+               __pte = *pte;
+
+               if (!IOMMU_PTE_PRESENT(__pte)) {
                        page = (u64 *)get_zeroed_page(gfp);
                        if (!page)
                                return NULL;
-                       *pte = PM_LEVEL_PDE(level, virt_to_phys(page));
+
+                       __npte = PM_LEVEL_PDE(level, virt_to_phys(page));
+
+                       if (cmpxchg64(pte, __pte, __npte)) {
+                               free_page((unsigned long)page);
+                               continue;
+                       }
                }
 
                /* No level skipping support yet */
@@ -1376,8 +1426,10 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
                           bool populate, gfp_t gfp)
 {
        int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
-       struct amd_iommu *iommu;
        unsigned long i, old_size, pte_pgsize;
+       struct aperture_range *range;
+       struct amd_iommu *iommu;
+       unsigned long flags;
 
 #ifdef CONFIG_IOMMU_STRESS
        populate = false;
@@ -1386,15 +1438,17 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
        if (index >= APERTURE_MAX_RANGES)
                return -ENOMEM;
 
-       dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp);
-       if (!dma_dom->aperture[index])
+       range = kzalloc(sizeof(struct aperture_range), gfp);
+       if (!range)
                return -ENOMEM;
 
-       dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp);
-       if (!dma_dom->aperture[index]->bitmap)
+       range->bitmap = (void *)get_zeroed_page(gfp);
+       if (!range->bitmap)
                goto out_free;
 
-       dma_dom->aperture[index]->offset = dma_dom->aperture_size;
+       range->offset = dma_dom->aperture_size;
+
+       spin_lock_init(&range->bitmap_lock);
 
        if (populate) {
                unsigned long address = dma_dom->aperture_size;
@@ -1407,14 +1461,20 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
                        if (!pte)
                                goto out_free;
 
-                       dma_dom->aperture[index]->pte_pages[i] = pte_page;
+                       range->pte_pages[i] = pte_page;
 
                        address += APERTURE_RANGE_SIZE / 64;
                }
        }
 
-       old_size                = dma_dom->aperture_size;
-       dma_dom->aperture_size += APERTURE_RANGE_SIZE;
+       spin_lock_irqsave(&dma_dom->domain.lock, flags);
+
+       /* First take the bitmap_lock and then publish the range */
+       spin_lock(&range->bitmap_lock);
+
+       old_size                 = dma_dom->aperture_size;
+       dma_dom->aperture[index] = range;
+       dma_dom->aperture_size  += APERTURE_RANGE_SIZE;
 
        /* Reserve address range used for MSI messages */
        if (old_size < MSI_ADDR_BASE_LO &&
@@ -1461,62 +1521,123 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
 
        update_domain(&dma_dom->domain);
 
+       spin_unlock(&range->bitmap_lock);
+
+       spin_unlock_irqrestore(&dma_dom->domain.lock, flags);
+
        return 0;
 
 out_free:
        update_domain(&dma_dom->domain);
 
-       free_page((unsigned long)dma_dom->aperture[index]->bitmap);
+       free_page((unsigned long)range->bitmap);
 
-       kfree(dma_dom->aperture[index]);
-       dma_dom->aperture[index] = NULL;
+       kfree(range);
 
        return -ENOMEM;
 }
 
+static dma_addr_t dma_ops_aperture_alloc(struct dma_ops_domain *dom,
+                                        struct aperture_range *range,
+                                        unsigned long pages,
+                                        unsigned long dma_mask,
+                                        unsigned long boundary_size,
+                                        unsigned long align_mask,
+                                        bool trylock)
+{
+       unsigned long offset, limit, flags;
+       dma_addr_t address;
+       bool flush = false;
+
+       offset = range->offset >> PAGE_SHIFT;
+       limit  = iommu_device_max_index(APERTURE_RANGE_PAGES, offset,
+                                       dma_mask >> PAGE_SHIFT);
+
+       if (trylock) {
+               if (!spin_trylock_irqsave(&range->bitmap_lock, flags))
+                       return -1;
+       } else {
+               spin_lock_irqsave(&range->bitmap_lock, flags);
+       }
+
+       address = iommu_area_alloc(range->bitmap, limit, range->next_bit,
+                                  pages, offset, boundary_size, align_mask);
+       if (address == -1) {
+               /* Nothing found, retry one time */
+               address = iommu_area_alloc(range->bitmap, limit,
+                                          0, pages, offset, boundary_size,
+                                          align_mask);
+               flush = true;
+       }
+
+       if (address != -1)
+               range->next_bit = address + pages;
+
+       spin_unlock_irqrestore(&range->bitmap_lock, flags);
+
+       if (flush) {
+               domain_flush_tlb(&dom->domain);
+               domain_flush_complete(&dom->domain);
+       }
+
+       return address;
+}
+
 static unsigned long dma_ops_area_alloc(struct device *dev,
                                        struct dma_ops_domain *dom,
                                        unsigned int pages,
                                        unsigned long align_mask,
-                                       u64 dma_mask,
-                                       unsigned long start)
+                                       u64 dma_mask)
 {
-       unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE;
-       int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT;
-       int i = start >> APERTURE_RANGE_SHIFT;
        unsigned long boundary_size, mask;
        unsigned long address = -1;
-       unsigned long limit;
+       bool first = true;
+       u32 start, i;
 
-       next_bit >>= PAGE_SHIFT;
+       preempt_disable();
 
        mask = dma_get_seg_boundary(dev);
 
+again:
+       start = this_cpu_read(*dom->next_index);
+
+       /* Sanity check - is it really necessary? */
+       if (unlikely(start > APERTURE_MAX_RANGES)) {
+               start = 0;
+               this_cpu_write(*dom->next_index, 0);
+       }
+
        boundary_size = mask + 1 ? ALIGN(mask + 1, PAGE_SIZE) >> PAGE_SHIFT :
                                   1UL << (BITS_PER_LONG - PAGE_SHIFT);
 
-       for (;i < max_index; ++i) {
-               unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT;
+       for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
+               struct aperture_range *range;
+               int index;
 
-               if (dom->aperture[i]->offset >= dma_mask)
-                       break;
+               index = (start + i) % APERTURE_MAX_RANGES;
 
-               limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset,
-                                              dma_mask >> PAGE_SHIFT);
+               range = dom->aperture[index];
 
-               address = iommu_area_alloc(dom->aperture[i]->bitmap,
-                                          limit, next_bit, pages, 0,
-                                           boundary_size, align_mask);
+               if (!range || range->offset >= dma_mask)
+                       continue;
+
+               address = dma_ops_aperture_alloc(dom, range, pages,
+                                                dma_mask, boundary_size,
+                                                align_mask, first);
                if (address != -1) {
-                       address = dom->aperture[i]->offset +
-                                 (address << PAGE_SHIFT);
-                       dom->next_address = address + (pages << PAGE_SHIFT);
+                       address = range->offset + (address << PAGE_SHIFT);
+                       this_cpu_write(*dom->next_index, index);
                        break;
                }
+       }
 
-               next_bit = 0;
+       if (address == -1 && first) {
+               first = false;
+               goto again;
        }
 
+       preempt_enable();
+
        return address;
 }
 
@@ -1526,21 +1647,14 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev,
                                             unsigned long align_mask,
                                             u64 dma_mask)
 {
-       unsigned long address;
-
-#ifdef CONFIG_IOMMU_STRESS
-       dom->next_address = 0;
-       dom->need_flush = true;
-#endif
+       unsigned long address = -1;
 
-       address = dma_ops_area_alloc(dev, dom, pages, align_mask,
-                                    dma_mask, dom->next_address);
+       while (address == -1) {
+               address = dma_ops_area_alloc(dev, dom, pages,
+                                            align_mask, dma_mask);
 
-       if (address == -1) {
-               dom->next_address = 0;
-               address = dma_ops_area_alloc(dev, dom, pages, align_mask,
-                                            dma_mask, 0);
-               dom->need_flush = true;
+               if (address == -1 && alloc_new_range(dom, false, GFP_ATOMIC))
+                       break;
        }
 
        if (unlikely(address == -1))
@@ -1562,6 +1676,7 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
 {
        unsigned i = address >> APERTURE_RANGE_SHIFT;
        struct aperture_range *range = dom->aperture[i];
+       unsigned long flags;
 
        BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL);
 
@@ -1570,12 +1685,18 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
                return;
 #endif
 
-       if (address >= dom->next_address)
-               dom->need_flush = true;
+       if (amd_iommu_unmap_flush) {
+               domain_flush_tlb(&dom->domain);
+               domain_flush_complete(&dom->domain);
+       }
 
        address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;
 
+       spin_lock_irqsave(&range->bitmap_lock, flags);
+       if (address + pages > range->next_bit)
+               range->next_bit = address + pages;
        bitmap_clear(range->bitmap, address, pages);
+       spin_unlock_irqrestore(&range->bitmap_lock, flags);
 
 }
 
@@ -1755,6 +1876,8 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
        if (!dom)
                return;
 
+       free_percpu(dom->next_index);
+
        del_domain_from_list(&dom->domain);
 
        free_pagetable(&dom->domain);
@@ -1777,6 +1900,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
 static struct dma_ops_domain *dma_ops_domain_alloc(void)
 {
        struct dma_ops_domain *dma_dom;
+       int cpu;
 
        dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
        if (!dma_dom)
@@ -1785,6 +1909,10 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
        if (protection_domain_init(&dma_dom->domain))
                goto free_dma_dom;
 
+       dma_dom->next_index = alloc_percpu(u32);
+       if (!dma_dom->next_index)
+               goto free_dma_dom;
+
        dma_dom->domain.mode = PAGE_MODE_2_LEVEL;
        dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
        dma_dom->domain.flags = PD_DMA_OPS_MASK;
@@ -1792,8 +1920,6 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
        if (!dma_dom->domain.pt_root)
                goto free_dma_dom;
 
-       dma_dom->need_flush = false;
-
        add_domain_to_list(&dma_dom->domain);
 
        if (alloc_new_range(dma_dom, true, GFP_KERNEL))
@@ -1804,8 +1930,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
         * a valid dma-address. So we can use 0 as error value
         */
        dma_dom->aperture[0]->bitmap[0] = 1;
-       dma_dom->next_address = 0;
 
+       for_each_possible_cpu(cpu)
+               *per_cpu_ptr(dma_dom->next_index, cpu) = 0;
 
        return dma_dom;
 
@@ -2328,7 +2455,7 @@ static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom,
        else if (direction == DMA_BIDIRECTIONAL)
                __pte |= IOMMU_PTE_IR | IOMMU_PTE_IW;
 
-       WARN_ON(*pte);
+       WARN_ON_ONCE(*pte);
 
        *pte = __pte;
 
@@ -2357,7 +2484,7 @@ static void dma_ops_domain_unmap(struct dma_ops_domain *dom,
 
        pte += PM_LEVEL_INDEX(0, address);
 
-       WARN_ON(!*pte);
+       WARN_ON_ONCE(!*pte);
 
        *pte = 0ULL;
 }
@@ -2393,26 +2520,11 @@ static dma_addr_t __map_single(struct device *dev,
        if (align)
                align_mask = (1UL << get_order(size)) - 1;
 
-retry:
        address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
                                          dma_mask);
-       if (unlikely(address == DMA_ERROR_CODE)) {
-               /*
-                * setting next_address here will let the address
-                * allocator only scan the new allocated range in the
-                * first run. This is a small optimization.
-                */
-               dma_dom->next_address = dma_dom->aperture_size;
 
-               if (alloc_new_range(dma_dom, false, GFP_ATOMIC))
-                       goto out;
-
-               /*
-                * aperture was successfully enlarged by 128 MB, try
-                * allocation again
-                */
-               goto retry;
-       }
+       if (address == DMA_ERROR_CODE)
+               goto out;
 
        start = address;
        for (i = 0; i < pages; ++i) {
@@ -2427,11 +2539,10 @@ retry:
 
        ADD_STATS_COUNTER(alloced_io_mem, size);
 
-       if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
-               domain_flush_tlb(&dma_dom->domain);
-               dma_dom->need_flush = false;
-       } else if (unlikely(amd_iommu_np_cache))
+       if (unlikely(amd_iommu_np_cache)) {
                domain_flush_pages(&dma_dom->domain, address, size);
+               domain_flush_complete(&dma_dom->domain);
+       }
 
 out:
        return address;
@@ -2478,11 +2589,6 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
        SUB_STATS_COUNTER(alloced_io_mem, size);
 
        dma_ops_free_addresses(dma_dom, dma_addr, pages);
-
-       if (amd_iommu_unmap_flush || dma_dom->need_flush) {
-               domain_flush_pages(&dma_dom->domain, flush_addr, size);
-               dma_dom->need_flush = false;
-       }
 }
 
 /*
@@ -2493,11 +2599,9 @@ static dma_addr_t map_page(struct device *dev, struct page *page,
                           enum dma_data_direction dir,
                           struct dma_attrs *attrs)
 {
-       unsigned long flags;
+       phys_addr_t paddr = page_to_phys(page) + offset;
        struct protection_domain *domain;
-       dma_addr_t addr;
        u64 dma_mask;
-       phys_addr_t paddr = page_to_phys(page) + offset;
 
        INC_STATS_COUNTER(cnt_map_single);
 
@@ -2509,19 +2613,8 @@ static dma_addr_t map_page(struct device *dev, struct page *page,
 
        dma_mask = *dev->dma_mask;
 
-       spin_lock_irqsave(&domain->lock, flags);
-
-       addr = __map_single(dev, domain->priv, paddr, size, dir, false,
+       return __map_single(dev, domain->priv, paddr, size, dir, false,
                            dma_mask);
-       if (addr == DMA_ERROR_CODE)
-               goto out;
-
-       domain_flush_complete(domain);
-
-out:
-       spin_unlock_irqrestore(&domain->lock, flags);
-
-       return addr;
 }
 
 /*
@@ -2530,7 +2623,6 @@ out:
 static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
                       enum dma_data_direction dir, struct dma_attrs *attrs)
 {
-       unsigned long flags;
        struct protection_domain *domain;
 
        INC_STATS_COUNTER(cnt_unmap_single);
@@ -2539,13 +2631,7 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
        if (IS_ERR(domain))
                return;
 
-       spin_lock_irqsave(&domain->lock, flags);
-
        __unmap_single(domain->priv, dma_addr, size, dir);
-
-       domain_flush_complete(domain);
-
-       spin_unlock_irqrestore(&domain->lock, flags);
 }
 
 /*
@@ -2556,7 +2642,6 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
                  int nelems, enum dma_data_direction dir,
                  struct dma_attrs *attrs)
 {
-       unsigned long flags;
        struct protection_domain *domain;
        int i;
        struct scatterlist *s;
@@ -2572,8 +2657,6 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
 
        dma_mask = *dev->dma_mask;
 
-       spin_lock_irqsave(&domain->lock, flags);
-
        for_each_sg(sglist, s, nelems, i) {
                paddr = sg_phys(s);
 
@@ -2588,12 +2671,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
                        goto unmap;
        }
 
-       domain_flush_complete(domain);
-
-out:
-       spin_unlock_irqrestore(&domain->lock, flags);
-
        return mapped_elems;
+
 unmap:
        for_each_sg(sglist, s, mapped_elems, i) {
                if (s->dma_address)
@@ -2602,9 +2681,7 @@ unmap:
                s->dma_address = s->dma_length = 0;
        }
 
-       mapped_elems = 0;
-
-       goto out;
+       return 0;
 }
 
 /*
@@ -2615,7 +2692,6 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
                     int nelems, enum dma_data_direction dir,
                     struct dma_attrs *attrs)
 {
-       unsigned long flags;
        struct protection_domain *domain;
        struct scatterlist *s;
        int i;
@@ -2626,17 +2702,11 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
        if (IS_ERR(domain))
                return;
 
-       spin_lock_irqsave(&domain->lock, flags);
-
        for_each_sg(sglist, s, nelems, i) {
                __unmap_single(domain->priv, s->dma_address,
                               s->dma_length, dir);
                s->dma_address = s->dma_length = 0;
        }
-
-       domain_flush_complete(domain);
-
-       spin_unlock_irqrestore(&domain->lock, flags);
 }
 
 /*
@@ -2648,7 +2718,6 @@ static void *alloc_coherent(struct device *dev, size_t size,
 {
        u64 dma_mask = dev->coherent_dma_mask;
        struct protection_domain *domain;
-       unsigned long flags;
        struct page *page;
 
        INC_STATS_COUNTER(cnt_alloc_coherent);
@@ -2680,19 +2749,11 @@ static void *alloc_coherent(struct device *dev, size_t size,
        if (!dma_mask)
                dma_mask = *dev->dma_mask;
 
-       spin_lock_irqsave(&domain->lock, flags);
-
        *dma_addr = __map_single(dev, domain->priv, page_to_phys(page),
                                 size, DMA_BIDIRECTIONAL, true, dma_mask);
 
-       if (*dma_addr == DMA_ERROR_CODE) {
-               spin_unlock_irqrestore(&domain->lock, flags);
+       if (*dma_addr == DMA_ERROR_CODE)
                goto out_free;
-       }
-
-       domain_flush_complete(domain);
-
-       spin_unlock_irqrestore(&domain->lock, flags);
 
        return page_address(page);
 
@@ -2712,7 +2773,6 @@ static void free_coherent(struct device *dev, size_t size,
                          struct dma_attrs *attrs)
 {
        struct protection_domain *domain;
-       unsigned long flags;
        struct page *page;
 
        INC_STATS_COUNTER(cnt_free_coherent);
@@ -2724,14 +2784,8 @@ static void free_coherent(struct device *dev, size_t size,
        if (IS_ERR(domain))
                goto free_mem;
 
-       spin_lock_irqsave(&domain->lock, flags);
-
        __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
 
-       domain_flush_complete(domain);
-
-       spin_unlock_irqrestore(&domain->lock, flags);
-
 free_mem:
        if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
                __free_pages(page, get_order(size));