Merge git://git.infradead.org/intel-iommu
[cascardo/linux.git] / drivers / iommu / intel-iommu.c
1 /*
2  * Copyright © 2006-2014 Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * Authors: David Woodhouse <dwmw2@infradead.org>,
14  *          Ashok Raj <ashok.raj@intel.com>,
15  *          Shaohua Li <shaohua.li@intel.com>,
16  *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17  *          Fenghua Yu <fenghua.yu@intel.com>
18  *          Joerg Roedel <jroedel@suse.de>
19  */
20
21 #define pr_fmt(fmt)     "DMAR: " fmt
22
23 #include <linux/init.h>
24 #include <linux/bitmap.h>
25 #include <linux/debugfs.h>
26 #include <linux/export.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/memory.h>
36 #include <linux/timer.h>
37 #include <linux/iova.h>
38 #include <linux/iommu.h>
39 #include <linux/intel-iommu.h>
40 #include <linux/syscore_ops.h>
41 #include <linux/tboot.h>
42 #include <linux/dmi.h>
43 #include <linux/pci-ats.h>
44 #include <linux/memblock.h>
45 #include <linux/dma-contiguous.h>
46 #include <linux/crash_dump.h>
47 #include <asm/irq_remapping.h>
48 #include <asm/cacheflush.h>
49 #include <asm/iommu.h>
50
51 #include "irq_remapping.h"
52
53 #define ROOT_SIZE               VTD_PAGE_SIZE
54 #define CONTEXT_SIZE            VTD_PAGE_SIZE
55
56 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
57 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
58 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
59 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
60
61 #define IOAPIC_RANGE_START      (0xfee00000)
62 #define IOAPIC_RANGE_END        (0xfeefffff)
63 #define IOVA_START_ADDR         (0x1000)
64
65 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
66
67 #define MAX_AGAW_WIDTH 64
68 #define MAX_AGAW_PFN_WIDTH      (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
69
70 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
71 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
72
73 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
74    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
75 #define DOMAIN_MAX_PFN(gaw)     ((unsigned long) min_t(uint64_t, \
76                                 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
77 #define DOMAIN_MAX_ADDR(gaw)    (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
78
79 /* IO virtual address start page frame number */
80 #define IOVA_START_PFN          (1)
81
82 #define IOVA_PFN(addr)          ((addr) >> PAGE_SHIFT)
83 #define DMA_32BIT_PFN           IOVA_PFN(DMA_BIT_MASK(32))
84 #define DMA_64BIT_PFN           IOVA_PFN(DMA_BIT_MASK(64))
85
86 /* page table handling */
87 #define LEVEL_STRIDE            (9)
88 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
89
90 /*
91  * This bitmap is used to advertise the page sizes our hardware support
92  * to the IOMMU core, which will then use this information to split
93  * physically contiguous memory regions it is mapping into page sizes
94  * that we support.
95  *
96  * Traditionally the IOMMU core just handed us the mappings directly,
97  * after making sure the size is an order of a 4KiB page and that the
98  * mapping has natural alignment.
99  *
100  * To retain this behavior, we currently advertise that we support
101  * all page sizes that are an order of 4KiB.
102  *
103  * If at some point we'd like to utilize the IOMMU core's new behavior,
104  * we could change this to advertise the real page sizes we support.
105  */
106 #define INTEL_IOMMU_PGSIZES     (~0xFFFUL)
107
108 static inline int agaw_to_level(int agaw)
109 {
110         return agaw + 2;
111 }
112
113 static inline int agaw_to_width(int agaw)
114 {
115         return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
116 }
117
118 static inline int width_to_agaw(int width)
119 {
120         return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
121 }
122
123 static inline unsigned int level_to_offset_bits(int level)
124 {
125         return (level - 1) * LEVEL_STRIDE;
126 }
127
128 static inline int pfn_level_offset(unsigned long pfn, int level)
129 {
130         return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
131 }
132
133 static inline unsigned long level_mask(int level)
134 {
135         return -1UL << level_to_offset_bits(level);
136 }
137
138 static inline unsigned long level_size(int level)
139 {
140         return 1UL << level_to_offset_bits(level);
141 }
142
143 static inline unsigned long align_to_level(unsigned long pfn, int level)
144 {
145         return (pfn + level_size(level) - 1) & level_mask(level);
146 }
147
148 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
149 {
150         return  1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
151 }
152
153 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
154    are never going to work. */
155 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
156 {
157         return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
158 }
159
160 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
161 {
162         return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
163 }
164 static inline unsigned long page_to_dma_pfn(struct page *pg)
165 {
166         return mm_to_dma_pfn(page_to_pfn(pg));
167 }
168 static inline unsigned long virt_to_dma_pfn(void *p)
169 {
170         return page_to_dma_pfn(virt_to_page(p));
171 }
172
173 /* global iommu list, set NULL for ignored DMAR units */
174 static struct intel_iommu **g_iommus;
175
176 static void __init check_tylersburg_isoch(void);
177 static int rwbf_quirk;
178
179 /*
180  * set to 1 to panic kernel if can't successfully enable VT-d
181  * (used when kernel is launched w/ TXT)
182  */
183 static int force_on = 0;
184
185 /*
186  * 0: Present
187  * 1-11: Reserved
188  * 12-63: Context Ptr (12 - (haw-1))
189  * 64-127: Reserved
190  */
191 struct root_entry {
192         u64     lo;
193         u64     hi;
194 };
195 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
196
197 /*
198  * Take a root_entry and return the Lower Context Table Pointer (LCTP)
199  * if marked present.
200  */
201 static phys_addr_t root_entry_lctp(struct root_entry *re)
202 {
203         if (!(re->lo & 1))
204                 return 0;
205
206         return re->lo & VTD_PAGE_MASK;
207 }
208
209 /*
210  * Take a root_entry and return the Upper Context Table Pointer (UCTP)
211  * if marked present.
212  */
213 static phys_addr_t root_entry_uctp(struct root_entry *re)
214 {
215         if (!(re->hi & 1))
216                 return 0;
217
218         return re->hi & VTD_PAGE_MASK;
219 }
220 /*
221  * low 64 bits:
222  * 0: present
223  * 1: fault processing disable
224  * 2-3: translation type
225  * 12-63: address space root
226  * high 64 bits:
227  * 0-2: address width
228  * 3-6: aval
229  * 8-23: domain id
230  */
231 struct context_entry {
232         u64 lo;
233         u64 hi;
234 };
235
236 static inline void context_clear_pasid_enable(struct context_entry *context)
237 {
238         context->lo &= ~(1ULL << 11);
239 }
240
241 static inline bool context_pasid_enabled(struct context_entry *context)
242 {
243         return !!(context->lo & (1ULL << 11));
244 }
245
246 static inline void context_set_copied(struct context_entry *context)
247 {
248         context->hi |= (1ull << 3);
249 }
250
251 static inline bool context_copied(struct context_entry *context)
252 {
253         return !!(context->hi & (1ULL << 3));
254 }
255
256 static inline bool __context_present(struct context_entry *context)
257 {
258         return (context->lo & 1);
259 }
260
261 static inline bool context_present(struct context_entry *context)
262 {
263         return context_pasid_enabled(context) ?
264              __context_present(context) :
265              __context_present(context) && !context_copied(context);
266 }
267
268 static inline void context_set_present(struct context_entry *context)
269 {
270         context->lo |= 1;
271 }
272
273 static inline void context_set_fault_enable(struct context_entry *context)
274 {
275         context->lo &= (((u64)-1) << 2) | 1;
276 }
277
278 static inline void context_set_translation_type(struct context_entry *context,
279                                                 unsigned long value)
280 {
281         context->lo &= (((u64)-1) << 4) | 3;
282         context->lo |= (value & 3) << 2;
283 }
284
285 static inline void context_set_address_root(struct context_entry *context,
286                                             unsigned long value)
287 {
288         context->lo &= ~VTD_PAGE_MASK;
289         context->lo |= value & VTD_PAGE_MASK;
290 }
291
292 static inline void context_set_address_width(struct context_entry *context,
293                                              unsigned long value)
294 {
295         context->hi |= value & 7;
296 }
297
298 static inline void context_set_domain_id(struct context_entry *context,
299                                          unsigned long value)
300 {
301         context->hi |= (value & ((1 << 16) - 1)) << 8;
302 }
303
304 static inline int context_domain_id(struct context_entry *c)
305 {
306         return((c->hi >> 8) & 0xffff);
307 }
308
309 static inline void context_clear_entry(struct context_entry *context)
310 {
311         context->lo = 0;
312         context->hi = 0;
313 }
314
315 /*
316  * 0: readable
317  * 1: writable
318  * 2-6: reserved
319  * 7: super page
320  * 8-10: available
321  * 11: snoop behavior
322  * 12-63: Host physcial address
323  */
324 struct dma_pte {
325         u64 val;
326 };
327
328 static inline void dma_clear_pte(struct dma_pte *pte)
329 {
330         pte->val = 0;
331 }
332
333 static inline u64 dma_pte_addr(struct dma_pte *pte)
334 {
335 #ifdef CONFIG_64BIT
336         return pte->val & VTD_PAGE_MASK;
337 #else
338         /* Must have a full atomic 64-bit read */
339         return  __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
340 #endif
341 }
342
343 static inline bool dma_pte_present(struct dma_pte *pte)
344 {
345         return (pte->val & 3) != 0;
346 }
347
348 static inline bool dma_pte_superpage(struct dma_pte *pte)
349 {
350         return (pte->val & DMA_PTE_LARGE_PAGE);
351 }
352
353 static inline int first_pte_in_page(struct dma_pte *pte)
354 {
355         return !((unsigned long)pte & ~VTD_PAGE_MASK);
356 }
357
358 /*
359  * This domain is a statically identity mapping domain.
360  *      1. This domain creats a static 1:1 mapping to all usable memory.
361  *      2. It maps to each iommu if successful.
362  *      3. Each iommu mapps to this domain if successful.
363  */
364 static struct dmar_domain *si_domain;
365 static int hw_pass_through = 1;
366
367 /*
368  * Domain represents a virtual machine, more than one devices
369  * across iommus may be owned in one domain, e.g. kvm guest.
370  */
371 #define DOMAIN_FLAG_VIRTUAL_MACHINE     (1 << 0)
372
373 /* si_domain contains mulitple devices */
374 #define DOMAIN_FLAG_STATIC_IDENTITY     (1 << 1)
375
376 #define for_each_domain_iommu(idx, domain)                      \
377         for (idx = 0; idx < g_num_of_iommus; idx++)             \
378                 if (domain->iommu_refcnt[idx])
379
380 struct dmar_domain {
381         int     nid;                    /* node id */
382
383         unsigned        iommu_refcnt[DMAR_UNITS_SUPPORTED];
384                                         /* Refcount of devices per iommu */
385
386
387         u16             iommu_did[DMAR_UNITS_SUPPORTED];
388                                         /* Domain ids per IOMMU. Use u16 since
389                                          * domain ids are 16 bit wide according
390                                          * to VT-d spec, section 9.3 */
391
392         struct list_head devices;       /* all devices' list */
393         struct iova_domain iovad;       /* iova's that belong to this domain */
394
395         struct dma_pte  *pgd;           /* virtual address */
396         int             gaw;            /* max guest address width */
397
398         /* adjusted guest address width, 0 is level 2 30-bit */
399         int             agaw;
400
401         int             flags;          /* flags to find out type of domain */
402
403         int             iommu_coherency;/* indicate coherency of iommu access */
404         int             iommu_snooping; /* indicate snooping control feature*/
405         int             iommu_count;    /* reference count of iommu */
406         int             iommu_superpage;/* Level of superpages supported:
407                                            0 == 4KiB (no superpages), 1 == 2MiB,
408                                            2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
409         u64             max_addr;       /* maximum mapped address */
410
411         struct iommu_domain domain;     /* generic domain data structure for
412                                            iommu core */
413 };
414
415 /* PCI domain-device relationship */
416 struct device_domain_info {
417         struct list_head link;  /* link to domain siblings */
418         struct list_head global; /* link to global list */
419         u8 bus;                 /* PCI bus number */
420         u8 devfn;               /* PCI devfn number */
421         u8 pasid_supported:3;
422         u8 pasid_enabled:1;
423         u8 pri_supported:1;
424         u8 pri_enabled:1;
425         u8 ats_supported:1;
426         u8 ats_enabled:1;
427         u8 ats_qdep;
428         struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
429         struct intel_iommu *iommu; /* IOMMU used by this device */
430         struct dmar_domain *domain; /* pointer to domain */
431 };
432
433 struct dmar_rmrr_unit {
434         struct list_head list;          /* list of rmrr units   */
435         struct acpi_dmar_header *hdr;   /* ACPI header          */
436         u64     base_address;           /* reserved base address*/
437         u64     end_address;            /* reserved end address */
438         struct dmar_dev_scope *devices; /* target devices */
439         int     devices_cnt;            /* target device count */
440 };
441
442 struct dmar_atsr_unit {
443         struct list_head list;          /* list of ATSR units */
444         struct acpi_dmar_header *hdr;   /* ACPI header */
445         struct dmar_dev_scope *devices; /* target devices */
446         int devices_cnt;                /* target device count */
447         u8 include_all:1;               /* include all ports */
448 };
449
450 static LIST_HEAD(dmar_atsr_units);
451 static LIST_HEAD(dmar_rmrr_units);
452
453 #define for_each_rmrr_units(rmrr) \
454         list_for_each_entry(rmrr, &dmar_rmrr_units, list)
455
456 static void flush_unmaps_timeout(unsigned long data);
457
458 static DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
459
460 #define HIGH_WATER_MARK 250
461 struct deferred_flush_tables {
462         int next;
463         struct iova *iova[HIGH_WATER_MARK];
464         struct dmar_domain *domain[HIGH_WATER_MARK];
465         struct page *freelist[HIGH_WATER_MARK];
466 };
467
468 static struct deferred_flush_tables *deferred_flush;
469
470 /* bitmap for indexing intel_iommus */
471 static int g_num_of_iommus;
472
473 static DEFINE_SPINLOCK(async_umap_flush_lock);
474 static LIST_HEAD(unmaps_to_do);
475
476 static int timer_on;
477 static long list_size;
478
479 static void domain_exit(struct dmar_domain *domain);
480 static void domain_remove_dev_info(struct dmar_domain *domain);
481 static void dmar_remove_one_dev_info(struct dmar_domain *domain,
482                                      struct device *dev);
483 static void __dmar_remove_one_dev_info(struct device_domain_info *info);
484 static void domain_context_clear(struct intel_iommu *iommu,
485                                  struct device *dev);
486 static int domain_detach_iommu(struct dmar_domain *domain,
487                                struct intel_iommu *iommu);
488
489 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
490 int dmar_disabled = 0;
491 #else
492 int dmar_disabled = 1;
493 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
494
495 int intel_iommu_enabled = 0;
496 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
497
498 static int dmar_map_gfx = 1;
499 static int dmar_forcedac;
500 static int intel_iommu_strict;
501 static int intel_iommu_superpage = 1;
502 static int intel_iommu_ecs = 1;
503 static int intel_iommu_pasid28;
504 static int iommu_identity_mapping;
505
506 #define IDENTMAP_ALL            1
507 #define IDENTMAP_GFX            2
508 #define IDENTMAP_AZALIA         4
509
510 /* Broadwell and Skylake have broken ECS support — normal so-called "second
511  * level" translation of DMA requests-without-PASID doesn't actually happen
512  * unless you also set the NESTE bit in an extended context-entry. Which of
513  * course means that SVM doesn't work because it's trying to do nested
514  * translation of the physical addresses it finds in the process page tables,
515  * through the IOVA->phys mapping found in the "second level" page tables.
516  *
517  * The VT-d specification was retroactively changed to change the definition
518  * of the capability bits and pretend that Broadwell/Skylake never happened...
519  * but unfortunately the wrong bit was changed. It's ECS which is broken, but
520  * for some reason it was the PASID capability bit which was redefined (from
521  * bit 28 on BDW/SKL to bit 40 in future).
522  *
523  * So our test for ECS needs to eschew those implementations which set the old
524  * PASID capabiity bit 28, since those are the ones on which ECS is broken.
525  * Unless we are working around the 'pasid28' limitations, that is, by putting
526  * the device into passthrough mode for normal DMA and thus masking the bug.
527  */
528 #define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
529                             (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap)))
530 /* PASID support is thus enabled if ECS is enabled and *either* of the old
531  * or new capability bits are set. */
532 #define pasid_enabled(iommu) (ecs_enabled(iommu) &&                     \
533                               (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap)))
534
535 int intel_iommu_gfx_mapped;
536 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
537
538 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
539 static DEFINE_SPINLOCK(device_domain_lock);
540 static LIST_HEAD(device_domain_list);
541
542 static const struct iommu_ops intel_iommu_ops;
543
544 static bool translation_pre_enabled(struct intel_iommu *iommu)
545 {
546         return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
547 }
548
549 static void clear_translation_pre_enabled(struct intel_iommu *iommu)
550 {
551         iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
552 }
553
554 static void init_translation_status(struct intel_iommu *iommu)
555 {
556         u32 gsts;
557
558         gsts = readl(iommu->reg + DMAR_GSTS_REG);
559         if (gsts & DMA_GSTS_TES)
560                 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
561 }
562
563 /* Convert generic 'struct iommu_domain to private struct dmar_domain */
564 static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
565 {
566         return container_of(dom, struct dmar_domain, domain);
567 }
568
569 static int __init intel_iommu_setup(char *str)
570 {
571         if (!str)
572                 return -EINVAL;
573         while (*str) {
574                 if (!strncmp(str, "on", 2)) {
575                         dmar_disabled = 0;
576                         pr_info("IOMMU enabled\n");
577                 } else if (!strncmp(str, "off", 3)) {
578                         dmar_disabled = 1;
579                         pr_info("IOMMU disabled\n");
580                 } else if (!strncmp(str, "igfx_off", 8)) {
581                         dmar_map_gfx = 0;
582                         pr_info("Disable GFX device mapping\n");
583                 } else if (!strncmp(str, "forcedac", 8)) {
584                         pr_info("Forcing DAC for PCI devices\n");
585                         dmar_forcedac = 1;
586                 } else if (!strncmp(str, "strict", 6)) {
587                         pr_info("Disable batched IOTLB flush\n");
588                         intel_iommu_strict = 1;
589                 } else if (!strncmp(str, "sp_off", 6)) {
590                         pr_info("Disable supported super page\n");
591                         intel_iommu_superpage = 0;
592                 } else if (!strncmp(str, "ecs_off", 7)) {
593                         printk(KERN_INFO
594                                 "Intel-IOMMU: disable extended context table support\n");
595                         intel_iommu_ecs = 0;
596                 } else if (!strncmp(str, "pasid28", 7)) {
597                         printk(KERN_INFO
598                                 "Intel-IOMMU: enable pre-production PASID support\n");
599                         intel_iommu_pasid28 = 1;
600                         iommu_identity_mapping |= IDENTMAP_GFX;
601                 }
602
603                 str += strcspn(str, ",");
604                 while (*str == ',')
605                         str++;
606         }
607         return 0;
608 }
609 __setup("intel_iommu=", intel_iommu_setup);
610
611 static struct kmem_cache *iommu_domain_cache;
612 static struct kmem_cache *iommu_devinfo_cache;
613
614 static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
615 {
616         struct dmar_domain **domains;
617         int idx = did >> 8;
618
619         domains = iommu->domains[idx];
620         if (!domains)
621                 return NULL;
622
623         return domains[did & 0xff];
624 }
625
626 static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
627                              struct dmar_domain *domain)
628 {
629         struct dmar_domain **domains;
630         int idx = did >> 8;
631
632         if (!iommu->domains[idx]) {
633                 size_t size = 256 * sizeof(struct dmar_domain *);
634                 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
635         }
636
637         domains = iommu->domains[idx];
638         if (WARN_ON(!domains))
639                 return;
640         else
641                 domains[did & 0xff] = domain;
642 }
643
644 static inline void *alloc_pgtable_page(int node)
645 {
646         struct page *page;
647         void *vaddr = NULL;
648
649         page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
650         if (page)
651                 vaddr = page_address(page);
652         return vaddr;
653 }
654
655 static inline void free_pgtable_page(void *vaddr)
656 {
657         free_page((unsigned long)vaddr);
658 }
659
660 static inline void *alloc_domain_mem(void)
661 {
662         return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
663 }
664
665 static void free_domain_mem(void *vaddr)
666 {
667         kmem_cache_free(iommu_domain_cache, vaddr);
668 }
669
670 static inline void * alloc_devinfo_mem(void)
671 {
672         return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
673 }
674
675 static inline void free_devinfo_mem(void *vaddr)
676 {
677         kmem_cache_free(iommu_devinfo_cache, vaddr);
678 }
679
680 static inline int domain_type_is_vm(struct dmar_domain *domain)
681 {
682         return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
683 }
684
685 static inline int domain_type_is_si(struct dmar_domain *domain)
686 {
687         return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
688 }
689
690 static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
691 {
692         return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
693                                 DOMAIN_FLAG_STATIC_IDENTITY);
694 }
695
696 static inline int domain_pfn_supported(struct dmar_domain *domain,
697                                        unsigned long pfn)
698 {
699         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
700
701         return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
702 }
703
704 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
705 {
706         unsigned long sagaw;
707         int agaw = -1;
708
709         sagaw = cap_sagaw(iommu->cap);
710         for (agaw = width_to_agaw(max_gaw);
711              agaw >= 0; agaw--) {
712                 if (test_bit(agaw, &sagaw))
713                         break;
714         }
715
716         return agaw;
717 }
718
719 /*
720  * Calculate max SAGAW for each iommu.
721  */
722 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
723 {
724         return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
725 }
726
727 /*
728  * calculate agaw for each iommu.
729  * "SAGAW" may be different across iommus, use a default agaw, and
730  * get a supported less agaw for iommus that don't support the default agaw.
731  */
732 int iommu_calculate_agaw(struct intel_iommu *iommu)
733 {
734         return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
735 }
736
737 /* This functionin only returns single iommu in a domain */
738 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
739 {
740         int iommu_id;
741
742         /* si_domain and vm domain should not get here. */
743         BUG_ON(domain_type_is_vm_or_si(domain));
744         for_each_domain_iommu(iommu_id, domain)
745                 break;
746
747         if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
748                 return NULL;
749
750         return g_iommus[iommu_id];
751 }
752
753 static void domain_update_iommu_coherency(struct dmar_domain *domain)
754 {
755         struct dmar_drhd_unit *drhd;
756         struct intel_iommu *iommu;
757         bool found = false;
758         int i;
759
760         domain->iommu_coherency = 1;
761
762         for_each_domain_iommu(i, domain) {
763                 found = true;
764                 if (!ecap_coherent(g_iommus[i]->ecap)) {
765                         domain->iommu_coherency = 0;
766                         break;
767                 }
768         }
769         if (found)
770                 return;
771
772         /* No hardware attached; use lowest common denominator */
773         rcu_read_lock();
774         for_each_active_iommu(iommu, drhd) {
775                 if (!ecap_coherent(iommu->ecap)) {
776                         domain->iommu_coherency = 0;
777                         break;
778                 }
779         }
780         rcu_read_unlock();
781 }
782
783 static int domain_update_iommu_snooping(struct intel_iommu *skip)
784 {
785         struct dmar_drhd_unit *drhd;
786         struct intel_iommu *iommu;
787         int ret = 1;
788
789         rcu_read_lock();
790         for_each_active_iommu(iommu, drhd) {
791                 if (iommu != skip) {
792                         if (!ecap_sc_support(iommu->ecap)) {
793                                 ret = 0;
794                                 break;
795                         }
796                 }
797         }
798         rcu_read_unlock();
799
800         return ret;
801 }
802
803 static int domain_update_iommu_superpage(struct intel_iommu *skip)
804 {
805         struct dmar_drhd_unit *drhd;
806         struct intel_iommu *iommu;
807         int mask = 0xf;
808
809         if (!intel_iommu_superpage) {
810                 return 0;
811         }
812
813         /* set iommu_superpage to the smallest common denominator */
814         rcu_read_lock();
815         for_each_active_iommu(iommu, drhd) {
816                 if (iommu != skip) {
817                         mask &= cap_super_page_val(iommu->cap);
818                         if (!mask)
819                                 break;
820                 }
821         }
822         rcu_read_unlock();
823
824         return fls(mask);
825 }
826
827 /* Some capabilities may be different across iommus */
828 static void domain_update_iommu_cap(struct dmar_domain *domain)
829 {
830         domain_update_iommu_coherency(domain);
831         domain->iommu_snooping = domain_update_iommu_snooping(NULL);
832         domain->iommu_superpage = domain_update_iommu_superpage(NULL);
833 }
834
835 static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
836                                                        u8 bus, u8 devfn, int alloc)
837 {
838         struct root_entry *root = &iommu->root_entry[bus];
839         struct context_entry *context;
840         u64 *entry;
841
842         entry = &root->lo;
843         if (ecs_enabled(iommu)) {
844                 if (devfn >= 0x80) {
845                         devfn -= 0x80;
846                         entry = &root->hi;
847                 }
848                 devfn *= 2;
849         }
850         if (*entry & 1)
851                 context = phys_to_virt(*entry & VTD_PAGE_MASK);
852         else {
853                 unsigned long phy_addr;
854                 if (!alloc)
855                         return NULL;
856
857                 context = alloc_pgtable_page(iommu->node);
858                 if (!context)
859                         return NULL;
860
861                 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
862                 phy_addr = virt_to_phys((void *)context);
863                 *entry = phy_addr | 1;
864                 __iommu_flush_cache(iommu, entry, sizeof(*entry));
865         }
866         return &context[devfn];
867 }
868
869 static int iommu_dummy(struct device *dev)
870 {
871         return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
872 }
873
874 static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
875 {
876         struct dmar_drhd_unit *drhd = NULL;
877         struct intel_iommu *iommu;
878         struct device *tmp;
879         struct pci_dev *ptmp, *pdev = NULL;
880         u16 segment = 0;
881         int i;
882
883         if (iommu_dummy(dev))
884                 return NULL;
885
886         if (dev_is_pci(dev)) {
887                 pdev = to_pci_dev(dev);
888                 segment = pci_domain_nr(pdev->bus);
889         } else if (has_acpi_companion(dev))
890                 dev = &ACPI_COMPANION(dev)->dev;
891
892         rcu_read_lock();
893         for_each_active_iommu(iommu, drhd) {
894                 if (pdev && segment != drhd->segment)
895                         continue;
896
897                 for_each_active_dev_scope(drhd->devices,
898                                           drhd->devices_cnt, i, tmp) {
899                         if (tmp == dev) {
900                                 *bus = drhd->devices[i].bus;
901                                 *devfn = drhd->devices[i].devfn;
902                                 goto out;
903                         }
904
905                         if (!pdev || !dev_is_pci(tmp))
906                                 continue;
907
908                         ptmp = to_pci_dev(tmp);
909                         if (ptmp->subordinate &&
910                             ptmp->subordinate->number <= pdev->bus->number &&
911                             ptmp->subordinate->busn_res.end >= pdev->bus->number)
912                                 goto got_pdev;
913                 }
914
915                 if (pdev && drhd->include_all) {
916                 got_pdev:
917                         *bus = pdev->bus->number;
918                         *devfn = pdev->devfn;
919                         goto out;
920                 }
921         }
922         iommu = NULL;
923  out:
924         rcu_read_unlock();
925
926         return iommu;
927 }
928
929 static void domain_flush_cache(struct dmar_domain *domain,
930                                void *addr, int size)
931 {
932         if (!domain->iommu_coherency)
933                 clflush_cache_range(addr, size);
934 }
935
936 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
937 {
938         struct context_entry *context;
939         int ret = 0;
940         unsigned long flags;
941
942         spin_lock_irqsave(&iommu->lock, flags);
943         context = iommu_context_addr(iommu, bus, devfn, 0);
944         if (context)
945                 ret = context_present(context);
946         spin_unlock_irqrestore(&iommu->lock, flags);
947         return ret;
948 }
949
950 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
951 {
952         struct context_entry *context;
953         unsigned long flags;
954
955         spin_lock_irqsave(&iommu->lock, flags);
956         context = iommu_context_addr(iommu, bus, devfn, 0);
957         if (context) {
958                 context_clear_entry(context);
959                 __iommu_flush_cache(iommu, context, sizeof(*context));
960         }
961         spin_unlock_irqrestore(&iommu->lock, flags);
962 }
963
964 static void free_context_table(struct intel_iommu *iommu)
965 {
966         int i;
967         unsigned long flags;
968         struct context_entry *context;
969
970         spin_lock_irqsave(&iommu->lock, flags);
971         if (!iommu->root_entry) {
972                 goto out;
973         }
974         for (i = 0; i < ROOT_ENTRY_NR; i++) {
975                 context = iommu_context_addr(iommu, i, 0, 0);
976                 if (context)
977                         free_pgtable_page(context);
978
979                 if (!ecs_enabled(iommu))
980                         continue;
981
982                 context = iommu_context_addr(iommu, i, 0x80, 0);
983                 if (context)
984                         free_pgtable_page(context);
985
986         }
987         free_pgtable_page(iommu->root_entry);
988         iommu->root_entry = NULL;
989 out:
990         spin_unlock_irqrestore(&iommu->lock, flags);
991 }
992
993 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
994                                       unsigned long pfn, int *target_level)
995 {
996         struct dma_pte *parent, *pte = NULL;
997         int level = agaw_to_level(domain->agaw);
998         int offset;
999
1000         BUG_ON(!domain->pgd);
1001
1002         if (!domain_pfn_supported(domain, pfn))
1003                 /* Address beyond IOMMU's addressing capabilities. */
1004                 return NULL;
1005
1006         parent = domain->pgd;
1007
1008         while (1) {
1009                 void *tmp_page;
1010
1011                 offset = pfn_level_offset(pfn, level);
1012                 pte = &parent[offset];
1013                 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
1014                         break;
1015                 if (level == *target_level)
1016                         break;
1017
1018                 if (!dma_pte_present(pte)) {
1019                         uint64_t pteval;
1020
1021                         tmp_page = alloc_pgtable_page(domain->nid);
1022
1023                         if (!tmp_page)
1024                                 return NULL;
1025
1026                         domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
1027                         pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
1028                         if (cmpxchg64(&pte->val, 0ULL, pteval))
1029                                 /* Someone else set it while we were thinking; use theirs. */
1030                                 free_pgtable_page(tmp_page);
1031                         else
1032                                 domain_flush_cache(domain, pte, sizeof(*pte));
1033                 }
1034                 if (level == 1)
1035                         break;
1036
1037                 parent = phys_to_virt(dma_pte_addr(pte));
1038                 level--;
1039         }
1040
1041         if (!*target_level)
1042                 *target_level = level;
1043
1044         return pte;
1045 }
1046
1047
1048 /* return address's pte at specific level */
1049 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
1050                                          unsigned long pfn,
1051                                          int level, int *large_page)
1052 {
1053         struct dma_pte *parent, *pte = NULL;
1054         int total = agaw_to_level(domain->agaw);
1055         int offset;
1056
1057         parent = domain->pgd;
1058         while (level <= total) {
1059                 offset = pfn_level_offset(pfn, total);
1060                 pte = &parent[offset];
1061                 if (level == total)
1062                         return pte;
1063
1064                 if (!dma_pte_present(pte)) {
1065                         *large_page = total;
1066                         break;
1067                 }
1068
1069                 if (dma_pte_superpage(pte)) {
1070                         *large_page = total;
1071                         return pte;
1072                 }
1073
1074                 parent = phys_to_virt(dma_pte_addr(pte));
1075                 total--;
1076         }
1077         return NULL;
1078 }
1079
1080 /* clear last level pte, a tlb flush should be followed */
1081 static void dma_pte_clear_range(struct dmar_domain *domain,
1082                                 unsigned long start_pfn,
1083                                 unsigned long last_pfn)
1084 {
1085         unsigned int large_page = 1;
1086         struct dma_pte *first_pte, *pte;
1087
1088         BUG_ON(!domain_pfn_supported(domain, start_pfn));
1089         BUG_ON(!domain_pfn_supported(domain, last_pfn));
1090         BUG_ON(start_pfn > last_pfn);
1091
1092         /* we don't need lock here; nobody else touches the iova range */
1093         do {
1094                 large_page = 1;
1095                 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
1096                 if (!pte) {
1097                         start_pfn = align_to_level(start_pfn + 1, large_page + 1);
1098                         continue;
1099                 }
1100                 do {
1101                         dma_clear_pte(pte);
1102                         start_pfn += lvl_to_nr_pages(large_page);
1103                         pte++;
1104                 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1105
1106                 domain_flush_cache(domain, first_pte,
1107                                    (void *)pte - (void *)first_pte);
1108
1109         } while (start_pfn && start_pfn <= last_pfn);
1110 }
1111
1112 static void dma_pte_free_level(struct dmar_domain *domain, int level,
1113                                struct dma_pte *pte, unsigned long pfn,
1114                                unsigned long start_pfn, unsigned long last_pfn)
1115 {
1116         pfn = max(start_pfn, pfn);
1117         pte = &pte[pfn_level_offset(pfn, level)];
1118
1119         do {
1120                 unsigned long level_pfn;
1121                 struct dma_pte *level_pte;
1122
1123                 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1124                         goto next;
1125
1126                 level_pfn = pfn & level_mask(level - 1);
1127                 level_pte = phys_to_virt(dma_pte_addr(pte));
1128
1129                 if (level > 2)
1130                         dma_pte_free_level(domain, level - 1, level_pte,
1131                                            level_pfn, start_pfn, last_pfn);
1132
1133                 /* If range covers entire pagetable, free it */
1134                 if (!(start_pfn > level_pfn ||
1135                       last_pfn < level_pfn + level_size(level) - 1)) {
1136                         dma_clear_pte(pte);
1137                         domain_flush_cache(domain, pte, sizeof(*pte));
1138                         free_pgtable_page(level_pte);
1139                 }
1140 next:
1141                 pfn += level_size(level);
1142         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1143 }
1144
1145 /* free page table pages. last level pte should already be cleared */
1146 static void dma_pte_free_pagetable(struct dmar_domain *domain,
1147                                    unsigned long start_pfn,
1148                                    unsigned long last_pfn)
1149 {
1150         BUG_ON(!domain_pfn_supported(domain, start_pfn));
1151         BUG_ON(!domain_pfn_supported(domain, last_pfn));
1152         BUG_ON(start_pfn > last_pfn);
1153
1154         dma_pte_clear_range(domain, start_pfn, last_pfn);
1155
1156         /* We don't need lock here; nobody else touches the iova range */
1157         dma_pte_free_level(domain, agaw_to_level(domain->agaw),
1158                            domain->pgd, 0, start_pfn, last_pfn);
1159
1160         /* free pgd */
1161         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1162                 free_pgtable_page(domain->pgd);
1163                 domain->pgd = NULL;
1164         }
1165 }
1166
1167 /* When a page at a given level is being unlinked from its parent, we don't
1168    need to *modify* it at all. All we need to do is make a list of all the
1169    pages which can be freed just as soon as we've flushed the IOTLB and we
1170    know the hardware page-walk will no longer touch them.
1171    The 'pte' argument is the *parent* PTE, pointing to the page that is to
1172    be freed. */
1173 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1174                                             int level, struct dma_pte *pte,
1175                                             struct page *freelist)
1176 {
1177         struct page *pg;
1178
1179         pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1180         pg->freelist = freelist;
1181         freelist = pg;
1182
1183         if (level == 1)
1184                 return freelist;
1185
1186         pte = page_address(pg);
1187         do {
1188                 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1189                         freelist = dma_pte_list_pagetables(domain, level - 1,
1190                                                            pte, freelist);
1191                 pte++;
1192         } while (!first_pte_in_page(pte));
1193
1194         return freelist;
1195 }
1196
1197 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1198                                         struct dma_pte *pte, unsigned long pfn,
1199                                         unsigned long start_pfn,
1200                                         unsigned long last_pfn,
1201                                         struct page *freelist)
1202 {
1203         struct dma_pte *first_pte = NULL, *last_pte = NULL;
1204
1205         pfn = max(start_pfn, pfn);
1206         pte = &pte[pfn_level_offset(pfn, level)];
1207
1208         do {
1209                 unsigned long level_pfn;
1210
1211                 if (!dma_pte_present(pte))
1212                         goto next;
1213
1214                 level_pfn = pfn & level_mask(level);
1215
1216                 /* If range covers entire pagetable, free it */
1217                 if (start_pfn <= level_pfn &&
1218                     last_pfn >= level_pfn + level_size(level) - 1) {
1219                         /* These suborbinate page tables are going away entirely. Don't
1220                            bother to clear them; we're just going to *free* them. */
1221                         if (level > 1 && !dma_pte_superpage(pte))
1222                                 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1223
1224                         dma_clear_pte(pte);
1225                         if (!first_pte)
1226                                 first_pte = pte;
1227                         last_pte = pte;
1228                 } else if (level > 1) {
1229                         /* Recurse down into a level that isn't *entirely* obsolete */
1230                         freelist = dma_pte_clear_level(domain, level - 1,
1231                                                        phys_to_virt(dma_pte_addr(pte)),
1232                                                        level_pfn, start_pfn, last_pfn,
1233                                                        freelist);
1234                 }
1235 next:
1236                 pfn += level_size(level);
1237         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1238
1239         if (first_pte)
1240                 domain_flush_cache(domain, first_pte,
1241                                    (void *)++last_pte - (void *)first_pte);
1242
1243         return freelist;
1244 }
1245
1246 /* We can't just free the pages because the IOMMU may still be walking
1247    the page tables, and may have cached the intermediate levels. The
1248    pages can only be freed after the IOTLB flush has been done. */
1249 static struct page *domain_unmap(struct dmar_domain *domain,
1250                                  unsigned long start_pfn,
1251                                  unsigned long last_pfn)
1252 {
1253         struct page *freelist = NULL;
1254
1255         BUG_ON(!domain_pfn_supported(domain, start_pfn));
1256         BUG_ON(!domain_pfn_supported(domain, last_pfn));
1257         BUG_ON(start_pfn > last_pfn);
1258
1259         /* we don't need lock here; nobody else touches the iova range */
1260         freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1261                                        domain->pgd, 0, start_pfn, last_pfn, NULL);
1262
1263         /* free pgd */
1264         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1265                 struct page *pgd_page = virt_to_page(domain->pgd);
1266                 pgd_page->freelist = freelist;
1267                 freelist = pgd_page;
1268
1269                 domain->pgd = NULL;
1270         }
1271
1272         return freelist;
1273 }
1274
1275 static void dma_free_pagelist(struct page *freelist)
1276 {
1277         struct page *pg;
1278
1279         while ((pg = freelist)) {
1280                 freelist = pg->freelist;
1281                 free_pgtable_page(page_address(pg));
1282         }
1283 }
1284
1285 /* iommu handling */
1286 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1287 {
1288         struct root_entry *root;
1289         unsigned long flags;
1290
1291         root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1292         if (!root) {
1293                 pr_err("Allocating root entry for %s failed\n",
1294                         iommu->name);
1295                 return -ENOMEM;
1296         }
1297
1298         __iommu_flush_cache(iommu, root, ROOT_SIZE);
1299
1300         spin_lock_irqsave(&iommu->lock, flags);
1301         iommu->root_entry = root;
1302         spin_unlock_irqrestore(&iommu->lock, flags);
1303
1304         return 0;
1305 }
1306
1307 static void iommu_set_root_entry(struct intel_iommu *iommu)
1308 {
1309         u64 addr;
1310         u32 sts;
1311         unsigned long flag;
1312
1313         addr = virt_to_phys(iommu->root_entry);
1314         if (ecs_enabled(iommu))
1315                 addr |= DMA_RTADDR_RTT;
1316
1317         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1318         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1319
1320         writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1321
1322         /* Make sure hardware complete it */
1323         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1324                       readl, (sts & DMA_GSTS_RTPS), sts);
1325
1326         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1327 }
1328
1329 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1330 {
1331         u32 val;
1332         unsigned long flag;
1333
1334         if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1335                 return;
1336
1337         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1338         writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1339
1340         /* Make sure hardware complete it */
1341         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1342                       readl, (!(val & DMA_GSTS_WBFS)), val);
1343
1344         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1345 }
1346
1347 /* return value determine if we need a write buffer flush */
1348 static void __iommu_flush_context(struct intel_iommu *iommu,
1349                                   u16 did, u16 source_id, u8 function_mask,
1350                                   u64 type)
1351 {
1352         u64 val = 0;
1353         unsigned long flag;
1354
1355         switch (type) {
1356         case DMA_CCMD_GLOBAL_INVL:
1357                 val = DMA_CCMD_GLOBAL_INVL;
1358                 break;
1359         case DMA_CCMD_DOMAIN_INVL:
1360                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1361                 break;
1362         case DMA_CCMD_DEVICE_INVL:
1363                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1364                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1365                 break;
1366         default:
1367                 BUG();
1368         }
1369         val |= DMA_CCMD_ICC;
1370
1371         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1372         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1373
1374         /* Make sure hardware complete it */
1375         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1376                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1377
1378         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1379 }
1380
1381 /* return value determine if we need a write buffer flush */
1382 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1383                                 u64 addr, unsigned int size_order, u64 type)
1384 {
1385         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1386         u64 val = 0, val_iva = 0;
1387         unsigned long flag;
1388
1389         switch (type) {
1390         case DMA_TLB_GLOBAL_FLUSH:
1391                 /* global flush doesn't need set IVA_REG */
1392                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1393                 break;
1394         case DMA_TLB_DSI_FLUSH:
1395                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1396                 break;
1397         case DMA_TLB_PSI_FLUSH:
1398                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1399                 /* IH bit is passed in as part of address */
1400                 val_iva = size_order | addr;
1401                 break;
1402         default:
1403                 BUG();
1404         }
1405         /* Note: set drain read/write */
1406 #if 0
1407         /*
1408          * This is probably to be super secure.. Looks like we can
1409          * ignore it without any impact.
1410          */
1411         if (cap_read_drain(iommu->cap))
1412                 val |= DMA_TLB_READ_DRAIN;
1413 #endif
1414         if (cap_write_drain(iommu->cap))
1415                 val |= DMA_TLB_WRITE_DRAIN;
1416
1417         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1418         /* Note: Only uses first TLB reg currently */
1419         if (val_iva)
1420                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1421         dmar_writeq(iommu->reg + tlb_offset + 8, val);
1422
1423         /* Make sure hardware complete it */
1424         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1425                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1426
1427         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1428
1429         /* check IOTLB invalidation granularity */
1430         if (DMA_TLB_IAIG(val) == 0)
1431                 pr_err("Flush IOTLB failed\n");
1432         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1433                 pr_debug("TLB flush request %Lx, actual %Lx\n",
1434                         (unsigned long long)DMA_TLB_IIRG(type),
1435                         (unsigned long long)DMA_TLB_IAIG(val));
1436 }
1437
1438 static struct device_domain_info *
1439 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1440                          u8 bus, u8 devfn)
1441 {
1442         struct device_domain_info *info;
1443
1444         assert_spin_locked(&device_domain_lock);
1445
1446         if (!iommu->qi)
1447                 return NULL;
1448
1449         list_for_each_entry(info, &domain->devices, link)
1450                 if (info->iommu == iommu && info->bus == bus &&
1451                     info->devfn == devfn) {
1452                         if (info->ats_supported && info->dev)
1453                                 return info;
1454                         break;
1455                 }
1456
1457         return NULL;
1458 }
1459
1460 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1461 {
1462         struct pci_dev *pdev;
1463
1464         if (!info || !dev_is_pci(info->dev))
1465                 return;
1466
1467         pdev = to_pci_dev(info->dev);
1468
1469 #ifdef CONFIG_INTEL_IOMMU_SVM
1470         /* The PCIe spec, in its wisdom, declares that the behaviour of
1471            the device if you enable PASID support after ATS support is
1472            undefined. So always enable PASID support on devices which
1473            have it, even if we can't yet know if we're ever going to
1474            use it. */
1475         if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1476                 info->pasid_enabled = 1;
1477
1478         if (info->pri_supported && !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1479                 info->pri_enabled = 1;
1480 #endif
1481         if (info->ats_supported && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1482                 info->ats_enabled = 1;
1483                 info->ats_qdep = pci_ats_queue_depth(pdev);
1484         }
1485 }
1486
1487 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1488 {
1489         struct pci_dev *pdev;
1490
1491         if (dev_is_pci(info->dev))
1492                 return;
1493
1494         pdev = to_pci_dev(info->dev);
1495
1496         if (info->ats_enabled) {
1497                 pci_disable_ats(pdev);
1498                 info->ats_enabled = 0;
1499         }
1500 #ifdef CONFIG_INTEL_IOMMU_SVM
1501         if (info->pri_enabled) {
1502                 pci_disable_pri(pdev);
1503                 info->pri_enabled = 0;
1504         }
1505         if (info->pasid_enabled) {
1506                 pci_disable_pasid(pdev);
1507                 info->pasid_enabled = 0;
1508         }
1509 #endif
1510 }
1511
1512 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1513                                   u64 addr, unsigned mask)
1514 {
1515         u16 sid, qdep;
1516         unsigned long flags;
1517         struct device_domain_info *info;
1518
1519         spin_lock_irqsave(&device_domain_lock, flags);
1520         list_for_each_entry(info, &domain->devices, link) {
1521                 if (!info->ats_enabled)
1522                         continue;
1523
1524                 sid = info->bus << 8 | info->devfn;
1525                 qdep = info->ats_qdep;
1526                 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1527         }
1528         spin_unlock_irqrestore(&device_domain_lock, flags);
1529 }
1530
1531 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1532                                   struct dmar_domain *domain,
1533                                   unsigned long pfn, unsigned int pages,
1534                                   int ih, int map)
1535 {
1536         unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1537         uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1538         u16 did = domain->iommu_did[iommu->seq_id];
1539
1540         BUG_ON(pages == 0);
1541
1542         if (ih)
1543                 ih = 1 << 6;
1544         /*
1545          * Fallback to domain selective flush if no PSI support or the size is
1546          * too big.
1547          * PSI requires page size to be 2 ^ x, and the base address is naturally
1548          * aligned to the size
1549          */
1550         if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1551                 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1552                                                 DMA_TLB_DSI_FLUSH);
1553         else
1554                 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1555                                                 DMA_TLB_PSI_FLUSH);
1556
1557         /*
1558          * In caching mode, changes of pages from non-present to present require
1559          * flush. However, device IOTLB doesn't need to be flushed in this case.
1560          */
1561         if (!cap_caching_mode(iommu->cap) || !map)
1562                 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1563                                       addr, mask);
1564 }
1565
1566 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1567 {
1568         u32 pmen;
1569         unsigned long flags;
1570
1571         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1572         pmen = readl(iommu->reg + DMAR_PMEN_REG);
1573         pmen &= ~DMA_PMEN_EPM;
1574         writel(pmen, iommu->reg + DMAR_PMEN_REG);
1575
1576         /* wait for the protected region status bit to clear */
1577         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1578                 readl, !(pmen & DMA_PMEN_PRS), pmen);
1579
1580         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1581 }
1582
1583 static void iommu_enable_translation(struct intel_iommu *iommu)
1584 {
1585         u32 sts;
1586         unsigned long flags;
1587
1588         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1589         iommu->gcmd |= DMA_GCMD_TE;
1590         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1591
1592         /* Make sure hardware complete it */
1593         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1594                       readl, (sts & DMA_GSTS_TES), sts);
1595
1596         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1597 }
1598
1599 static void iommu_disable_translation(struct intel_iommu *iommu)
1600 {
1601         u32 sts;
1602         unsigned long flag;
1603
1604         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1605         iommu->gcmd &= ~DMA_GCMD_TE;
1606         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1607
1608         /* Make sure hardware complete it */
1609         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1610                       readl, (!(sts & DMA_GSTS_TES)), sts);
1611
1612         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1613 }
1614
1615
1616 static int iommu_init_domains(struct intel_iommu *iommu)
1617 {
1618         u32 ndomains, nlongs;
1619         size_t size;
1620
1621         ndomains = cap_ndoms(iommu->cap);
1622         pr_debug("%s: Number of Domains supported <%d>\n",
1623                  iommu->name, ndomains);
1624         nlongs = BITS_TO_LONGS(ndomains);
1625
1626         spin_lock_init(&iommu->lock);
1627
1628         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1629         if (!iommu->domain_ids) {
1630                 pr_err("%s: Allocating domain id array failed\n",
1631                        iommu->name);
1632                 return -ENOMEM;
1633         }
1634
1635         size = ((ndomains >> 8) + 1) * sizeof(struct dmar_domain **);
1636         iommu->domains = kzalloc(size, GFP_KERNEL);
1637
1638         if (iommu->domains) {
1639                 size = 256 * sizeof(struct dmar_domain *);
1640                 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1641         }
1642
1643         if (!iommu->domains || !iommu->domains[0]) {
1644                 pr_err("%s: Allocating domain array failed\n",
1645                        iommu->name);
1646                 kfree(iommu->domain_ids);
1647                 kfree(iommu->domains);
1648                 iommu->domain_ids = NULL;
1649                 iommu->domains    = NULL;
1650                 return -ENOMEM;
1651         }
1652
1653
1654
1655         /*
1656          * If Caching mode is set, then invalid translations are tagged
1657          * with domain-id 0, hence we need to pre-allocate it. We also
1658          * use domain-id 0 as a marker for non-allocated domain-id, so
1659          * make sure it is not used for a real domain.
1660          */
1661         set_bit(0, iommu->domain_ids);
1662
1663         return 0;
1664 }
1665
1666 static void disable_dmar_iommu(struct intel_iommu *iommu)
1667 {
1668         struct device_domain_info *info, *tmp;
1669         unsigned long flags;
1670
1671         if (!iommu->domains || !iommu->domain_ids)
1672                 return;
1673
1674         spin_lock_irqsave(&device_domain_lock, flags);
1675         list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1676                 struct dmar_domain *domain;
1677
1678                 if (info->iommu != iommu)
1679                         continue;
1680
1681                 if (!info->dev || !info->domain)
1682                         continue;
1683
1684                 domain = info->domain;
1685
1686                 dmar_remove_one_dev_info(domain, info->dev);
1687
1688                 if (!domain_type_is_vm_or_si(domain))
1689                         domain_exit(domain);
1690         }
1691         spin_unlock_irqrestore(&device_domain_lock, flags);
1692
1693         if (iommu->gcmd & DMA_GCMD_TE)
1694                 iommu_disable_translation(iommu);
1695 }
1696
1697 static void free_dmar_iommu(struct intel_iommu *iommu)
1698 {
1699         if ((iommu->domains) && (iommu->domain_ids)) {
1700                 int elems = (cap_ndoms(iommu->cap) >> 8) + 1;
1701                 int i;
1702
1703                 for (i = 0; i < elems; i++)
1704                         kfree(iommu->domains[i]);
1705                 kfree(iommu->domains);
1706                 kfree(iommu->domain_ids);
1707                 iommu->domains = NULL;
1708                 iommu->domain_ids = NULL;
1709         }
1710
1711         g_iommus[iommu->seq_id] = NULL;
1712
1713         /* free context mapping */
1714         free_context_table(iommu);
1715
1716 #ifdef CONFIG_INTEL_IOMMU_SVM
1717         if (pasid_enabled(iommu)) {
1718                 if (ecap_prs(iommu->ecap))
1719                         intel_svm_finish_prq(iommu);
1720                 intel_svm_free_pasid_tables(iommu);
1721         }
1722 #endif
1723 }
1724
1725 static struct dmar_domain *alloc_domain(int flags)
1726 {
1727         struct dmar_domain *domain;
1728
1729         domain = alloc_domain_mem();
1730         if (!domain)
1731                 return NULL;
1732
1733         memset(domain, 0, sizeof(*domain));
1734         domain->nid = -1;
1735         domain->flags = flags;
1736         INIT_LIST_HEAD(&domain->devices);
1737
1738         return domain;
1739 }
1740
1741 /* Must be called with iommu->lock */
1742 static int domain_attach_iommu(struct dmar_domain *domain,
1743                                struct intel_iommu *iommu)
1744 {
1745         unsigned long ndomains;
1746         int num;
1747
1748         assert_spin_locked(&device_domain_lock);
1749         assert_spin_locked(&iommu->lock);
1750
1751         domain->iommu_refcnt[iommu->seq_id] += 1;
1752         domain->iommu_count += 1;
1753         if (domain->iommu_refcnt[iommu->seq_id] == 1) {
1754                 ndomains = cap_ndoms(iommu->cap);
1755                 num      = find_first_zero_bit(iommu->domain_ids, ndomains);
1756
1757                 if (num >= ndomains) {
1758                         pr_err("%s: No free domain ids\n", iommu->name);
1759                         domain->iommu_refcnt[iommu->seq_id] -= 1;
1760                         domain->iommu_count -= 1;
1761                         return -ENOSPC;
1762                 }
1763
1764                 set_bit(num, iommu->domain_ids);
1765                 set_iommu_domain(iommu, num, domain);
1766
1767                 domain->iommu_did[iommu->seq_id] = num;
1768                 domain->nid                      = iommu->node;
1769
1770                 domain_update_iommu_cap(domain);
1771         }
1772
1773         return 0;
1774 }
1775
1776 static int domain_detach_iommu(struct dmar_domain *domain,
1777                                struct intel_iommu *iommu)
1778 {
1779         int num, count = INT_MAX;
1780
1781         assert_spin_locked(&device_domain_lock);
1782         assert_spin_locked(&iommu->lock);
1783
1784         domain->iommu_refcnt[iommu->seq_id] -= 1;
1785         count = --domain->iommu_count;
1786         if (domain->iommu_refcnt[iommu->seq_id] == 0) {
1787                 num = domain->iommu_did[iommu->seq_id];
1788                 clear_bit(num, iommu->domain_ids);
1789                 set_iommu_domain(iommu, num, NULL);
1790
1791                 domain_update_iommu_cap(domain);
1792                 domain->iommu_did[iommu->seq_id] = 0;
1793         }
1794
1795         return count;
1796 }
1797
1798 static struct iova_domain reserved_iova_list;
1799 static struct lock_class_key reserved_rbtree_key;
1800
1801 static int dmar_init_reserved_ranges(void)
1802 {
1803         struct pci_dev *pdev = NULL;
1804         struct iova *iova;
1805         int i;
1806
1807         init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN,
1808                         DMA_32BIT_PFN);
1809
1810         lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1811                 &reserved_rbtree_key);
1812
1813         /* IOAPIC ranges shouldn't be accessed by DMA */
1814         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1815                 IOVA_PFN(IOAPIC_RANGE_END));
1816         if (!iova) {
1817                 pr_err("Reserve IOAPIC range failed\n");
1818                 return -ENODEV;
1819         }
1820
1821         /* Reserve all PCI MMIO to avoid peer-to-peer access */
1822         for_each_pci_dev(pdev) {
1823                 struct resource *r;
1824
1825                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1826                         r = &pdev->resource[i];
1827                         if (!r->flags || !(r->flags & IORESOURCE_MEM))
1828                                 continue;
1829                         iova = reserve_iova(&reserved_iova_list,
1830                                             IOVA_PFN(r->start),
1831                                             IOVA_PFN(r->end));
1832                         if (!iova) {
1833                                 pr_err("Reserve iova failed\n");
1834                                 return -ENODEV;
1835                         }
1836                 }
1837         }
1838         return 0;
1839 }
1840
1841 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1842 {
1843         copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1844 }
1845
1846 static inline int guestwidth_to_adjustwidth(int gaw)
1847 {
1848         int agaw;
1849         int r = (gaw - 12) % 9;
1850
1851         if (r == 0)
1852                 agaw = gaw;
1853         else
1854                 agaw = gaw + 9 - r;
1855         if (agaw > 64)
1856                 agaw = 64;
1857         return agaw;
1858 }
1859
1860 static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1861                        int guest_width)
1862 {
1863         int adjust_width, agaw;
1864         unsigned long sagaw;
1865
1866         init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
1867                         DMA_32BIT_PFN);
1868         domain_reserve_special_ranges(domain);
1869
1870         /* calculate AGAW */
1871         if (guest_width > cap_mgaw(iommu->cap))
1872                 guest_width = cap_mgaw(iommu->cap);
1873         domain->gaw = guest_width;
1874         adjust_width = guestwidth_to_adjustwidth(guest_width);
1875         agaw = width_to_agaw(adjust_width);
1876         sagaw = cap_sagaw(iommu->cap);
1877         if (!test_bit(agaw, &sagaw)) {
1878                 /* hardware doesn't support it, choose a bigger one */
1879                 pr_debug("Hardware doesn't support agaw %d\n", agaw);
1880                 agaw = find_next_bit(&sagaw, 5, agaw);
1881                 if (agaw >= 5)
1882                         return -ENODEV;
1883         }
1884         domain->agaw = agaw;
1885
1886         if (ecap_coherent(iommu->ecap))
1887                 domain->iommu_coherency = 1;
1888         else
1889                 domain->iommu_coherency = 0;
1890
1891         if (ecap_sc_support(iommu->ecap))
1892                 domain->iommu_snooping = 1;
1893         else
1894                 domain->iommu_snooping = 0;
1895
1896         if (intel_iommu_superpage)
1897                 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1898         else
1899                 domain->iommu_superpage = 0;
1900
1901         domain->nid = iommu->node;
1902
1903         /* always allocate the top pgd */
1904         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1905         if (!domain->pgd)
1906                 return -ENOMEM;
1907         __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1908         return 0;
1909 }
1910
1911 static void domain_exit(struct dmar_domain *domain)
1912 {
1913         struct page *freelist = NULL;
1914
1915         /* Domain 0 is reserved, so dont process it */
1916         if (!domain)
1917                 return;
1918
1919         /* Flush any lazy unmaps that may reference this domain */
1920         if (!intel_iommu_strict)
1921                 flush_unmaps_timeout(0);
1922
1923         /* Remove associated devices and clear attached or cached domains */
1924         rcu_read_lock();
1925         domain_remove_dev_info(domain);
1926         rcu_read_unlock();
1927
1928         /* destroy iovas */
1929         put_iova_domain(&domain->iovad);
1930
1931         freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1932
1933         dma_free_pagelist(freelist);
1934
1935         free_domain_mem(domain);
1936 }
1937
1938 static int domain_context_mapping_one(struct dmar_domain *domain,
1939                                       struct intel_iommu *iommu,
1940                                       u8 bus, u8 devfn)
1941 {
1942         u16 did = domain->iommu_did[iommu->seq_id];
1943         int translation = CONTEXT_TT_MULTI_LEVEL;
1944         struct device_domain_info *info = NULL;
1945         struct context_entry *context;
1946         unsigned long flags;
1947         struct dma_pte *pgd;
1948         int ret, agaw;
1949
1950         WARN_ON(did == 0);
1951
1952         if (hw_pass_through && domain_type_is_si(domain))
1953                 translation = CONTEXT_TT_PASS_THROUGH;
1954
1955         pr_debug("Set context mapping for %02x:%02x.%d\n",
1956                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1957
1958         BUG_ON(!domain->pgd);
1959
1960         spin_lock_irqsave(&device_domain_lock, flags);
1961         spin_lock(&iommu->lock);
1962
1963         ret = -ENOMEM;
1964         context = iommu_context_addr(iommu, bus, devfn, 1);
1965         if (!context)
1966                 goto out_unlock;
1967
1968         ret = 0;
1969         if (context_present(context))
1970                 goto out_unlock;
1971
1972         pgd = domain->pgd;
1973
1974         context_clear_entry(context);
1975         context_set_domain_id(context, did);
1976
1977         /*
1978          * Skip top levels of page tables for iommu which has less agaw
1979          * than default.  Unnecessary for PT mode.
1980          */
1981         if (translation != CONTEXT_TT_PASS_THROUGH) {
1982                 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1983                         ret = -ENOMEM;
1984                         pgd = phys_to_virt(dma_pte_addr(pgd));
1985                         if (!dma_pte_present(pgd))
1986                                 goto out_unlock;
1987                 }
1988
1989                 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
1990                 if (info && info->ats_supported)
1991                         translation = CONTEXT_TT_DEV_IOTLB;
1992                 else
1993                         translation = CONTEXT_TT_MULTI_LEVEL;
1994
1995                 context_set_address_root(context, virt_to_phys(pgd));
1996                 context_set_address_width(context, iommu->agaw);
1997         } else {
1998                 /*
1999                  * In pass through mode, AW must be programmed to
2000                  * indicate the largest AGAW value supported by
2001                  * hardware. And ASR is ignored by hardware.
2002                  */
2003                 context_set_address_width(context, iommu->msagaw);
2004         }
2005
2006         context_set_translation_type(context, translation);
2007         context_set_fault_enable(context);
2008         context_set_present(context);
2009         domain_flush_cache(domain, context, sizeof(*context));
2010
2011         /*
2012          * It's a non-present to present mapping. If hardware doesn't cache
2013          * non-present entry we only need to flush the write-buffer. If the
2014          * _does_ cache non-present entries, then it does so in the special
2015          * domain #0, which we have to flush:
2016          */
2017         if (cap_caching_mode(iommu->cap)) {
2018                 iommu->flush.flush_context(iommu, 0,
2019                                            (((u16)bus) << 8) | devfn,
2020                                            DMA_CCMD_MASK_NOBIT,
2021                                            DMA_CCMD_DEVICE_INVL);
2022                 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
2023         } else {
2024                 iommu_flush_write_buffer(iommu);
2025         }
2026         iommu_enable_dev_iotlb(info);
2027
2028         ret = 0;
2029
2030 out_unlock:
2031         spin_unlock(&iommu->lock);
2032         spin_unlock_irqrestore(&device_domain_lock, flags);
2033
2034         return 0;
2035 }
2036
2037 struct domain_context_mapping_data {
2038         struct dmar_domain *domain;
2039         struct intel_iommu *iommu;
2040 };
2041
2042 static int domain_context_mapping_cb(struct pci_dev *pdev,
2043                                      u16 alias, void *opaque)
2044 {
2045         struct domain_context_mapping_data *data = opaque;
2046
2047         return domain_context_mapping_one(data->domain, data->iommu,
2048                                           PCI_BUS_NUM(alias), alias & 0xff);
2049 }
2050
2051 static int
2052 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
2053 {
2054         struct intel_iommu *iommu;
2055         u8 bus, devfn;
2056         struct domain_context_mapping_data data;
2057
2058         iommu = device_to_iommu(dev, &bus, &devfn);
2059         if (!iommu)
2060                 return -ENODEV;
2061
2062         if (!dev_is_pci(dev))
2063                 return domain_context_mapping_one(domain, iommu, bus, devfn);
2064
2065         data.domain = domain;
2066         data.iommu = iommu;
2067
2068         return pci_for_each_dma_alias(to_pci_dev(dev),
2069                                       &domain_context_mapping_cb, &data);
2070 }
2071
2072 static int domain_context_mapped_cb(struct pci_dev *pdev,
2073                                     u16 alias, void *opaque)
2074 {
2075         struct intel_iommu *iommu = opaque;
2076
2077         return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2078 }
2079
2080 static int domain_context_mapped(struct device *dev)
2081 {
2082         struct intel_iommu *iommu;
2083         u8 bus, devfn;
2084
2085         iommu = device_to_iommu(dev, &bus, &devfn);
2086         if (!iommu)
2087                 return -ENODEV;
2088
2089         if (!dev_is_pci(dev))
2090                 return device_context_mapped(iommu, bus, devfn);
2091
2092         return !pci_for_each_dma_alias(to_pci_dev(dev),
2093                                        domain_context_mapped_cb, iommu);
2094 }
2095
2096 /* Returns a number of VTD pages, but aligned to MM page size */
2097 static inline unsigned long aligned_nrpages(unsigned long host_addr,
2098                                             size_t size)
2099 {
2100         host_addr &= ~PAGE_MASK;
2101         return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2102 }
2103
2104 /* Return largest possible superpage level for a given mapping */
2105 static inline int hardware_largepage_caps(struct dmar_domain *domain,
2106                                           unsigned long iov_pfn,
2107                                           unsigned long phy_pfn,
2108                                           unsigned long pages)
2109 {
2110         int support, level = 1;
2111         unsigned long pfnmerge;
2112
2113         support = domain->iommu_superpage;
2114
2115         /* To use a large page, the virtual *and* physical addresses
2116            must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2117            of them will mean we have to use smaller pages. So just
2118            merge them and check both at once. */
2119         pfnmerge = iov_pfn | phy_pfn;
2120
2121         while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2122                 pages >>= VTD_STRIDE_SHIFT;
2123                 if (!pages)
2124                         break;
2125                 pfnmerge >>= VTD_STRIDE_SHIFT;
2126                 level++;
2127                 support--;
2128         }
2129         return level;
2130 }
2131
2132 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2133                             struct scatterlist *sg, unsigned long phys_pfn,
2134                             unsigned long nr_pages, int prot)
2135 {
2136         struct dma_pte *first_pte = NULL, *pte = NULL;
2137         phys_addr_t uninitialized_var(pteval);
2138         unsigned long sg_res = 0;
2139         unsigned int largepage_lvl = 0;
2140         unsigned long lvl_pages = 0;
2141
2142         BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2143
2144         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2145                 return -EINVAL;
2146
2147         prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2148
2149         if (!sg) {
2150                 sg_res = nr_pages;
2151                 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2152         }
2153
2154         while (nr_pages > 0) {
2155                 uint64_t tmp;
2156
2157                 if (!sg_res) {
2158                         sg_res = aligned_nrpages(sg->offset, sg->length);
2159                         sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
2160                         sg->dma_length = sg->length;
2161                         pteval = (sg_phys(sg) & PAGE_MASK) | prot;
2162                         phys_pfn = pteval >> VTD_PAGE_SHIFT;
2163                 }
2164
2165                 if (!pte) {
2166                         largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2167
2168                         first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2169                         if (!pte)
2170                                 return -ENOMEM;
2171                         /* It is large page*/
2172                         if (largepage_lvl > 1) {
2173                                 unsigned long nr_superpages, end_pfn;
2174
2175                                 pteval |= DMA_PTE_LARGE_PAGE;
2176                                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2177
2178                                 nr_superpages = sg_res / lvl_pages;
2179                                 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2180
2181                                 /*
2182                                  * Ensure that old small page tables are
2183                                  * removed to make room for superpage(s).
2184                                  */
2185                                 dma_pte_free_pagetable(domain, iov_pfn, end_pfn);
2186                         } else {
2187                                 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2188                         }
2189
2190                 }
2191                 /* We don't need lock here, nobody else
2192                  * touches the iova range
2193                  */
2194                 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2195                 if (tmp) {
2196                         static int dumps = 5;
2197                         pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2198                                 iov_pfn, tmp, (unsigned long long)pteval);
2199                         if (dumps) {
2200                                 dumps--;
2201                                 debug_dma_dump_mappings(NULL);
2202                         }
2203                         WARN_ON(1);
2204                 }
2205
2206                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2207
2208                 BUG_ON(nr_pages < lvl_pages);
2209                 BUG_ON(sg_res < lvl_pages);
2210
2211                 nr_pages -= lvl_pages;
2212                 iov_pfn += lvl_pages;
2213                 phys_pfn += lvl_pages;
2214                 pteval += lvl_pages * VTD_PAGE_SIZE;
2215                 sg_res -= lvl_pages;
2216
2217                 /* If the next PTE would be the first in a new page, then we
2218                    need to flush the cache on the entries we've just written.
2219                    And then we'll need to recalculate 'pte', so clear it and
2220                    let it get set again in the if (!pte) block above.
2221
2222                    If we're done (!nr_pages) we need to flush the cache too.
2223
2224                    Also if we've been setting superpages, we may need to
2225                    recalculate 'pte' and switch back to smaller pages for the
2226                    end of the mapping, if the trailing size is not enough to
2227                    use another superpage (i.e. sg_res < lvl_pages). */
2228                 pte++;
2229                 if (!nr_pages || first_pte_in_page(pte) ||
2230                     (largepage_lvl > 1 && sg_res < lvl_pages)) {
2231                         domain_flush_cache(domain, first_pte,
2232                                            (void *)pte - (void *)first_pte);
2233                         pte = NULL;
2234                 }
2235
2236                 if (!sg_res && nr_pages)
2237                         sg = sg_next(sg);
2238         }
2239         return 0;
2240 }
2241
2242 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2243                                     struct scatterlist *sg, unsigned long nr_pages,
2244                                     int prot)
2245 {
2246         return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2247 }
2248
2249 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2250                                      unsigned long phys_pfn, unsigned long nr_pages,
2251                                      int prot)
2252 {
2253         return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2254 }
2255
2256 static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
2257 {
2258         if (!iommu)
2259                 return;
2260
2261         clear_context_table(iommu, bus, devfn);
2262         iommu->flush.flush_context(iommu, 0, 0, 0,
2263                                            DMA_CCMD_GLOBAL_INVL);
2264         iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2265 }
2266
2267 static inline void unlink_domain_info(struct device_domain_info *info)
2268 {
2269         assert_spin_locked(&device_domain_lock);
2270         list_del(&info->link);
2271         list_del(&info->global);
2272         if (info->dev)
2273                 info->dev->archdata.iommu = NULL;
2274 }
2275
2276 static void domain_remove_dev_info(struct dmar_domain *domain)
2277 {
2278         struct device_domain_info *info, *tmp;
2279         unsigned long flags;
2280
2281         spin_lock_irqsave(&device_domain_lock, flags);
2282         list_for_each_entry_safe(info, tmp, &domain->devices, link)
2283                 __dmar_remove_one_dev_info(info);
2284         spin_unlock_irqrestore(&device_domain_lock, flags);
2285 }
2286
2287 /*
2288  * find_domain
2289  * Note: we use struct device->archdata.iommu stores the info
2290  */
2291 static struct dmar_domain *find_domain(struct device *dev)
2292 {
2293         struct device_domain_info *info;
2294
2295         /* No lock here, assumes no domain exit in normal case */
2296         info = dev->archdata.iommu;
2297         if (info)
2298                 return info->domain;
2299         return NULL;
2300 }
2301
2302 static inline struct device_domain_info *
2303 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2304 {
2305         struct device_domain_info *info;
2306
2307         list_for_each_entry(info, &device_domain_list, global)
2308                 if (info->iommu->segment == segment && info->bus == bus &&
2309                     info->devfn == devfn)
2310                         return info;
2311
2312         return NULL;
2313 }
2314
2315 static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2316                                                     int bus, int devfn,
2317                                                     struct device *dev,
2318                                                     struct dmar_domain *domain)
2319 {
2320         struct dmar_domain *found = NULL;
2321         struct device_domain_info *info;
2322         unsigned long flags;
2323         int ret;
2324
2325         info = alloc_devinfo_mem();
2326         if (!info)
2327                 return NULL;
2328
2329         info->bus = bus;
2330         info->devfn = devfn;
2331         info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2332         info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2333         info->ats_qdep = 0;
2334         info->dev = dev;
2335         info->domain = domain;
2336         info->iommu = iommu;
2337
2338         if (dev && dev_is_pci(dev)) {
2339                 struct pci_dev *pdev = to_pci_dev(info->dev);
2340
2341                 if (ecap_dev_iotlb_support(iommu->ecap) &&
2342                     pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2343                     dmar_find_matched_atsr_unit(pdev))
2344                         info->ats_supported = 1;
2345
2346                 if (ecs_enabled(iommu)) {
2347                         if (pasid_enabled(iommu)) {
2348                                 int features = pci_pasid_features(pdev);
2349                                 if (features >= 0)
2350                                         info->pasid_supported = features | 1;
2351                         }
2352
2353                         if (info->ats_supported && ecap_prs(iommu->ecap) &&
2354                             pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2355                                 info->pri_supported = 1;
2356                 }
2357         }
2358
2359         spin_lock_irqsave(&device_domain_lock, flags);
2360         if (dev)
2361                 found = find_domain(dev);
2362
2363         if (!found) {
2364                 struct device_domain_info *info2;
2365                 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2366                 if (info2) {
2367                         found      = info2->domain;
2368                         info2->dev = dev;
2369                 }
2370         }
2371
2372         if (found) {
2373                 spin_unlock_irqrestore(&device_domain_lock, flags);
2374                 free_devinfo_mem(info);
2375                 /* Caller must free the original domain */
2376                 return found;
2377         }
2378
2379         spin_lock(&iommu->lock);
2380         ret = domain_attach_iommu(domain, iommu);
2381         spin_unlock(&iommu->lock);
2382
2383         if (ret) {
2384                 spin_unlock_irqrestore(&device_domain_lock, flags);
2385                 free_devinfo_mem(info);
2386                 return NULL;
2387         }
2388
2389         list_add(&info->link, &domain->devices);
2390         list_add(&info->global, &device_domain_list);
2391         if (dev)
2392                 dev->archdata.iommu = info;
2393         spin_unlock_irqrestore(&device_domain_lock, flags);
2394
2395         if (dev && domain_context_mapping(domain, dev)) {
2396                 pr_err("Domain context map for %s failed\n", dev_name(dev));
2397                 dmar_remove_one_dev_info(domain, dev);
2398                 return NULL;
2399         }
2400
2401         return domain;
2402 }
2403
2404 static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2405 {
2406         *(u16 *)opaque = alias;
2407         return 0;
2408 }
2409
2410 /* domain is initialized */
2411 static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2412 {
2413         struct device_domain_info *info = NULL;
2414         struct dmar_domain *domain, *tmp;
2415         struct intel_iommu *iommu;
2416         u16 req_id, dma_alias;
2417         unsigned long flags;
2418         u8 bus, devfn;
2419
2420         domain = find_domain(dev);
2421         if (domain)
2422                 return domain;
2423
2424         iommu = device_to_iommu(dev, &bus, &devfn);
2425         if (!iommu)
2426                 return NULL;
2427
2428         req_id = ((u16)bus << 8) | devfn;
2429
2430         if (dev_is_pci(dev)) {
2431                 struct pci_dev *pdev = to_pci_dev(dev);
2432
2433                 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2434
2435                 spin_lock_irqsave(&device_domain_lock, flags);
2436                 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2437                                                       PCI_BUS_NUM(dma_alias),
2438                                                       dma_alias & 0xff);
2439                 if (info) {
2440                         iommu = info->iommu;
2441                         domain = info->domain;
2442                 }
2443                 spin_unlock_irqrestore(&device_domain_lock, flags);
2444
2445                 /* DMA alias already has a domain, uses it */
2446                 if (info)
2447                         goto found_domain;
2448         }
2449
2450         /* Allocate and initialize new domain for the device */
2451         domain = alloc_domain(0);
2452         if (!domain)
2453                 return NULL;
2454         if (domain_init(domain, iommu, gaw)) {
2455                 domain_exit(domain);
2456                 return NULL;
2457         }
2458
2459         /* register PCI DMA alias device */
2460         if (req_id != dma_alias && dev_is_pci(dev)) {
2461                 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2462                                                dma_alias & 0xff, NULL, domain);
2463
2464                 if (!tmp || tmp != domain) {
2465                         domain_exit(domain);
2466                         domain = tmp;
2467                 }
2468
2469                 if (!domain)
2470                         return NULL;
2471         }
2472
2473 found_domain:
2474         tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2475
2476         if (!tmp || tmp != domain) {
2477                 domain_exit(domain);
2478                 domain = tmp;
2479         }
2480
2481         return domain;
2482 }
2483
2484 static int iommu_domain_identity_map(struct dmar_domain *domain,
2485                                      unsigned long long start,
2486                                      unsigned long long end)
2487 {
2488         unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2489         unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2490
2491         if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2492                           dma_to_mm_pfn(last_vpfn))) {
2493                 pr_err("Reserving iova failed\n");
2494                 return -ENOMEM;
2495         }
2496
2497         pr_debug("Mapping reserved region %llx-%llx\n", start, end);
2498         /*
2499          * RMRR range might have overlap with physical memory range,
2500          * clear it first
2501          */
2502         dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2503
2504         return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2505                                   last_vpfn - first_vpfn + 1,
2506                                   DMA_PTE_READ|DMA_PTE_WRITE);
2507 }
2508
2509 static int iommu_prepare_identity_map(struct device *dev,
2510                                       unsigned long long start,
2511                                       unsigned long long end)
2512 {
2513         struct dmar_domain *domain;
2514         int ret;
2515
2516         domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2517         if (!domain)
2518                 return -ENOMEM;
2519
2520         /* For _hardware_ passthrough, don't bother. But for software
2521            passthrough, we do it anyway -- it may indicate a memory
2522            range which is reserved in E820, so which didn't get set
2523            up to start with in si_domain */
2524         if (domain == si_domain && hw_pass_through) {
2525                 pr_warn("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2526                         dev_name(dev), start, end);
2527                 return 0;
2528         }
2529
2530         pr_info("Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2531                 dev_name(dev), start, end);
2532
2533         if (end < start) {
2534                 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2535                         "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2536                         dmi_get_system_info(DMI_BIOS_VENDOR),
2537                         dmi_get_system_info(DMI_BIOS_VERSION),
2538                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2539                 ret = -EIO;
2540                 goto error;
2541         }
2542
2543         if (end >> agaw_to_width(domain->agaw)) {
2544                 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2545                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2546                      agaw_to_width(domain->agaw),
2547                      dmi_get_system_info(DMI_BIOS_VENDOR),
2548                      dmi_get_system_info(DMI_BIOS_VERSION),
2549                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2550                 ret = -EIO;
2551                 goto error;
2552         }
2553
2554         ret = iommu_domain_identity_map(domain, start, end);
2555         if (ret)
2556                 goto error;
2557
2558         return 0;
2559
2560  error:
2561         domain_exit(domain);
2562         return ret;
2563 }
2564
2565 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2566                                          struct device *dev)
2567 {
2568         if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2569                 return 0;
2570         return iommu_prepare_identity_map(dev, rmrr->base_address,
2571                                           rmrr->end_address);
2572 }
2573
2574 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2575 static inline void iommu_prepare_isa(void)
2576 {
2577         struct pci_dev *pdev;
2578         int ret;
2579
2580         pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2581         if (!pdev)
2582                 return;
2583
2584         pr_info("Prepare 0-16MiB unity mapping for LPC\n");
2585         ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
2586
2587         if (ret)
2588                 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
2589
2590         pci_dev_put(pdev);
2591 }
2592 #else
2593 static inline void iommu_prepare_isa(void)
2594 {
2595         return;
2596 }
2597 #endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2598
2599 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2600
2601 static int __init si_domain_init(int hw)
2602 {
2603         int nid, ret = 0;
2604
2605         si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2606         if (!si_domain)
2607                 return -EFAULT;
2608
2609         if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2610                 domain_exit(si_domain);
2611                 return -EFAULT;
2612         }
2613
2614         pr_debug("Identity mapping domain allocated\n");
2615
2616         if (hw)
2617                 return 0;
2618
2619         for_each_online_node(nid) {
2620                 unsigned long start_pfn, end_pfn;
2621                 int i;
2622
2623                 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2624                         ret = iommu_domain_identity_map(si_domain,
2625                                         PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2626                         if (ret)
2627                                 return ret;
2628                 }
2629         }
2630
2631         return 0;
2632 }
2633
2634 static int identity_mapping(struct device *dev)
2635 {
2636         struct device_domain_info *info;
2637
2638         if (likely(!iommu_identity_mapping))
2639                 return 0;
2640
2641         info = dev->archdata.iommu;
2642         if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2643                 return (info->domain == si_domain);
2644
2645         return 0;
2646 }
2647
2648 static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2649 {
2650         struct dmar_domain *ndomain;
2651         struct intel_iommu *iommu;
2652         u8 bus, devfn;
2653
2654         iommu = device_to_iommu(dev, &bus, &devfn);
2655         if (!iommu)
2656                 return -ENODEV;
2657
2658         ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2659         if (ndomain != domain)
2660                 return -EBUSY;
2661
2662         return 0;
2663 }
2664
2665 static bool device_has_rmrr(struct device *dev)
2666 {
2667         struct dmar_rmrr_unit *rmrr;
2668         struct device *tmp;
2669         int i;
2670
2671         rcu_read_lock();
2672         for_each_rmrr_units(rmrr) {
2673                 /*
2674                  * Return TRUE if this RMRR contains the device that
2675                  * is passed in.
2676                  */
2677                 for_each_active_dev_scope(rmrr->devices,
2678                                           rmrr->devices_cnt, i, tmp)
2679                         if (tmp == dev) {
2680                                 rcu_read_unlock();
2681                                 return true;
2682                         }
2683         }
2684         rcu_read_unlock();
2685         return false;
2686 }
2687
2688 /*
2689  * There are a couple cases where we need to restrict the functionality of
2690  * devices associated with RMRRs.  The first is when evaluating a device for
2691  * identity mapping because problems exist when devices are moved in and out
2692  * of domains and their respective RMRR information is lost.  This means that
2693  * a device with associated RMRRs will never be in a "passthrough" domain.
2694  * The second is use of the device through the IOMMU API.  This interface
2695  * expects to have full control of the IOVA space for the device.  We cannot
2696  * satisfy both the requirement that RMRR access is maintained and have an
2697  * unencumbered IOVA space.  We also have no ability to quiesce the device's
2698  * use of the RMRR space or even inform the IOMMU API user of the restriction.
2699  * We therefore prevent devices associated with an RMRR from participating in
2700  * the IOMMU API, which eliminates them from device assignment.
2701  *
2702  * In both cases we assume that PCI USB devices with RMRRs have them largely
2703  * for historical reasons and that the RMRR space is not actively used post
2704  * boot.  This exclusion may change if vendors begin to abuse it.
2705  *
2706  * The same exception is made for graphics devices, with the requirement that
2707  * any use of the RMRR regions will be torn down before assigning the device
2708  * to a guest.
2709  */
2710 static bool device_is_rmrr_locked(struct device *dev)
2711 {
2712         if (!device_has_rmrr(dev))
2713                 return false;
2714
2715         if (dev_is_pci(dev)) {
2716                 struct pci_dev *pdev = to_pci_dev(dev);
2717
2718                 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2719                         return false;
2720         }
2721
2722         return true;
2723 }
2724
2725 static int iommu_should_identity_map(struct device *dev, int startup)
2726 {
2727
2728         if (dev_is_pci(dev)) {
2729                 struct pci_dev *pdev = to_pci_dev(dev);
2730
2731                 if (device_is_rmrr_locked(dev))
2732                         return 0;
2733
2734                 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2735                         return 1;
2736
2737                 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2738                         return 1;
2739
2740                 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2741                         return 0;
2742
2743                 /*
2744                  * We want to start off with all devices in the 1:1 domain, and
2745                  * take them out later if we find they can't access all of memory.
2746                  *
2747                  * However, we can't do this for PCI devices behind bridges,
2748                  * because all PCI devices behind the same bridge will end up
2749                  * with the same source-id on their transactions.
2750                  *
2751                  * Practically speaking, we can't change things around for these
2752                  * devices at run-time, because we can't be sure there'll be no
2753                  * DMA transactions in flight for any of their siblings.
2754                  *
2755                  * So PCI devices (unless they're on the root bus) as well as
2756                  * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2757                  * the 1:1 domain, just in _case_ one of their siblings turns out
2758                  * not to be able to map all of memory.
2759                  */
2760                 if (!pci_is_pcie(pdev)) {
2761                         if (!pci_is_root_bus(pdev->bus))
2762                                 return 0;
2763                         if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2764                                 return 0;
2765                 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2766                         return 0;
2767         } else {
2768                 if (device_has_rmrr(dev))
2769                         return 0;
2770         }
2771
2772         /*
2773          * At boot time, we don't yet know if devices will be 64-bit capable.
2774          * Assume that they will — if they turn out not to be, then we can
2775          * take them out of the 1:1 domain later.
2776          */
2777         if (!startup) {
2778                 /*
2779                  * If the device's dma_mask is less than the system's memory
2780                  * size then this is not a candidate for identity mapping.
2781                  */
2782                 u64 dma_mask = *dev->dma_mask;
2783
2784                 if (dev->coherent_dma_mask &&
2785                     dev->coherent_dma_mask < dma_mask)
2786                         dma_mask = dev->coherent_dma_mask;
2787
2788                 return dma_mask >= dma_get_required_mask(dev);
2789         }
2790
2791         return 1;
2792 }
2793
2794 static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2795 {
2796         int ret;
2797
2798         if (!iommu_should_identity_map(dev, 1))
2799                 return 0;
2800
2801         ret = domain_add_dev_info(si_domain, dev);
2802         if (!ret)
2803                 pr_info("%s identity mapping for device %s\n",
2804                         hw ? "Hardware" : "Software", dev_name(dev));
2805         else if (ret == -ENODEV)
2806                 /* device not associated with an iommu */
2807                 ret = 0;
2808
2809         return ret;
2810 }
2811
2812
2813 static int __init iommu_prepare_static_identity_mapping(int hw)
2814 {
2815         struct pci_dev *pdev = NULL;
2816         struct dmar_drhd_unit *drhd;
2817         struct intel_iommu *iommu;
2818         struct device *dev;
2819         int i;
2820         int ret = 0;
2821
2822         for_each_pci_dev(pdev) {
2823                 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2824                 if (ret)
2825                         return ret;
2826         }
2827
2828         for_each_active_iommu(iommu, drhd)
2829                 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2830                         struct acpi_device_physical_node *pn;
2831                         struct acpi_device *adev;
2832
2833                         if (dev->bus != &acpi_bus_type)
2834                                 continue;
2835
2836                         adev= to_acpi_device(dev);
2837                         mutex_lock(&adev->physical_node_lock);
2838                         list_for_each_entry(pn, &adev->physical_node_list, node) {
2839                                 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2840                                 if (ret)
2841                                         break;
2842                         }
2843                         mutex_unlock(&adev->physical_node_lock);
2844                         if (ret)
2845                                 return ret;
2846                 }
2847
2848         return 0;
2849 }
2850
2851 static void intel_iommu_init_qi(struct intel_iommu *iommu)
2852 {
2853         /*
2854          * Start from the sane iommu hardware state.
2855          * If the queued invalidation is already initialized by us
2856          * (for example, while enabling interrupt-remapping) then
2857          * we got the things already rolling from a sane state.
2858          */
2859         if (!iommu->qi) {
2860                 /*
2861                  * Clear any previous faults.
2862                  */
2863                 dmar_fault(-1, iommu);
2864                 /*
2865                  * Disable queued invalidation if supported and already enabled
2866                  * before OS handover.
2867                  */
2868                 dmar_disable_qi(iommu);
2869         }
2870
2871         if (dmar_enable_qi(iommu)) {
2872                 /*
2873                  * Queued Invalidate not enabled, use Register Based Invalidate
2874                  */
2875                 iommu->flush.flush_context = __iommu_flush_context;
2876                 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2877                 pr_info("%s: Using Register based invalidation\n",
2878                         iommu->name);
2879         } else {
2880                 iommu->flush.flush_context = qi_flush_context;
2881                 iommu->flush.flush_iotlb = qi_flush_iotlb;
2882                 pr_info("%s: Using Queued invalidation\n", iommu->name);
2883         }
2884 }
2885
2886 static int copy_context_table(struct intel_iommu *iommu,
2887                               struct root_entry __iomem *old_re,
2888                               struct context_entry **tbl,
2889                               int bus, bool ext)
2890 {
2891         int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
2892         struct context_entry __iomem *old_ce = NULL;
2893         struct context_entry *new_ce = NULL, ce;
2894         struct root_entry re;
2895         phys_addr_t old_ce_phys;
2896
2897         tbl_idx = ext ? bus * 2 : bus;
2898         memcpy_fromio(&re, old_re, sizeof(re));
2899
2900         for (devfn = 0; devfn < 256; devfn++) {
2901                 /* First calculate the correct index */
2902                 idx = (ext ? devfn * 2 : devfn) % 256;
2903
2904                 if (idx == 0) {
2905                         /* First save what we may have and clean up */
2906                         if (new_ce) {
2907                                 tbl[tbl_idx] = new_ce;
2908                                 __iommu_flush_cache(iommu, new_ce,
2909                                                     VTD_PAGE_SIZE);
2910                                 pos = 1;
2911                         }
2912
2913                         if (old_ce)
2914                                 iounmap(old_ce);
2915
2916                         ret = 0;
2917                         if (devfn < 0x80)
2918                                 old_ce_phys = root_entry_lctp(&re);
2919                         else
2920                                 old_ce_phys = root_entry_uctp(&re);
2921
2922                         if (!old_ce_phys) {
2923                                 if (ext && devfn == 0) {
2924                                         /* No LCTP, try UCTP */
2925                                         devfn = 0x7f;
2926                                         continue;
2927                                 } else {
2928                                         goto out;
2929                                 }
2930                         }
2931
2932                         ret = -ENOMEM;
2933                         old_ce = ioremap_cache(old_ce_phys, PAGE_SIZE);
2934                         if (!old_ce)
2935                                 goto out;
2936
2937                         new_ce = alloc_pgtable_page(iommu->node);
2938                         if (!new_ce)
2939                                 goto out_unmap;
2940
2941                         ret = 0;
2942                 }
2943
2944                 /* Now copy the context entry */
2945                 memcpy_fromio(&ce, old_ce + idx, sizeof(ce));
2946
2947                 if (!__context_present(&ce))
2948                         continue;
2949
2950                 did = context_domain_id(&ce);
2951                 if (did >= 0 && did < cap_ndoms(iommu->cap))
2952                         set_bit(did, iommu->domain_ids);
2953
2954                 /*
2955                  * We need a marker for copied context entries. This
2956                  * marker needs to work for the old format as well as
2957                  * for extended context entries.
2958                  *
2959                  * Bit 67 of the context entry is used. In the old
2960                  * format this bit is available to software, in the
2961                  * extended format it is the PGE bit, but PGE is ignored
2962                  * by HW if PASIDs are disabled (and thus still
2963                  * available).
2964                  *
2965                  * So disable PASIDs first and then mark the entry
2966                  * copied. This means that we don't copy PASID
2967                  * translations from the old kernel, but this is fine as
2968                  * faults there are not fatal.
2969                  */
2970                 context_clear_pasid_enable(&ce);
2971                 context_set_copied(&ce);
2972
2973                 new_ce[idx] = ce;
2974         }
2975
2976         tbl[tbl_idx + pos] = new_ce;
2977
2978         __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
2979
2980 out_unmap:
2981         iounmap(old_ce);
2982
2983 out:
2984         return ret;
2985 }
2986
2987 static int copy_translation_tables(struct intel_iommu *iommu)
2988 {
2989         struct root_entry __iomem *old_rt;
2990         struct context_entry **ctxt_tbls;
2991         phys_addr_t old_rt_phys;
2992         int ctxt_table_entries;
2993         unsigned long flags;
2994         u64 rtaddr_reg;
2995         int bus, ret;
2996         bool new_ext, ext;
2997
2998         rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
2999         ext        = !!(rtaddr_reg & DMA_RTADDR_RTT);
3000         new_ext    = !!ecap_ecs(iommu->ecap);
3001
3002         /*
3003          * The RTT bit can only be changed when translation is disabled,
3004          * but disabling translation means to open a window for data
3005          * corruption. So bail out and don't copy anything if we would
3006          * have to change the bit.
3007          */
3008         if (new_ext != ext)
3009                 return -EINVAL;
3010
3011         old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3012         if (!old_rt_phys)
3013                 return -EINVAL;
3014
3015         old_rt = ioremap_cache(old_rt_phys, PAGE_SIZE);
3016         if (!old_rt)
3017                 return -ENOMEM;
3018
3019         /* This is too big for the stack - allocate it from slab */
3020         ctxt_table_entries = ext ? 512 : 256;
3021         ret = -ENOMEM;
3022         ctxt_tbls = kzalloc(ctxt_table_entries * sizeof(void *), GFP_KERNEL);
3023         if (!ctxt_tbls)
3024                 goto out_unmap;
3025
3026         for (bus = 0; bus < 256; bus++) {
3027                 ret = copy_context_table(iommu, &old_rt[bus],
3028                                          ctxt_tbls, bus, ext);
3029                 if (ret) {
3030                         pr_err("%s: Failed to copy context table for bus %d\n",
3031                                 iommu->name, bus);
3032                         continue;
3033                 }
3034         }
3035
3036         spin_lock_irqsave(&iommu->lock, flags);
3037
3038         /* Context tables are copied, now write them to the root_entry table */
3039         for (bus = 0; bus < 256; bus++) {
3040                 int idx = ext ? bus * 2 : bus;
3041                 u64 val;
3042
3043                 if (ctxt_tbls[idx]) {
3044                         val = virt_to_phys(ctxt_tbls[idx]) | 1;
3045                         iommu->root_entry[bus].lo = val;
3046                 }
3047
3048                 if (!ext || !ctxt_tbls[idx + 1])
3049                         continue;
3050
3051                 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3052                 iommu->root_entry[bus].hi = val;
3053         }
3054
3055         spin_unlock_irqrestore(&iommu->lock, flags);
3056
3057         kfree(ctxt_tbls);
3058
3059         __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3060
3061         ret = 0;
3062
3063 out_unmap:
3064         iounmap(old_rt);
3065
3066         return ret;
3067 }
3068
3069 static int __init init_dmars(void)
3070 {
3071         struct dmar_drhd_unit *drhd;
3072         struct dmar_rmrr_unit *rmrr;
3073         bool copied_tables = false;
3074         struct device *dev;
3075         struct intel_iommu *iommu;
3076         int i, ret;
3077
3078         /*
3079          * for each drhd
3080          *    allocate root
3081          *    initialize and program root entry to not present
3082          * endfor
3083          */
3084         for_each_drhd_unit(drhd) {
3085                 /*
3086                  * lock not needed as this is only incremented in the single
3087                  * threaded kernel __init code path all other access are read
3088                  * only
3089                  */
3090                 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
3091                         g_num_of_iommus++;
3092                         continue;
3093                 }
3094                 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
3095         }
3096
3097         /* Preallocate enough resources for IOMMU hot-addition */
3098         if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3099                 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3100
3101         g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3102                         GFP_KERNEL);
3103         if (!g_iommus) {
3104                 pr_err("Allocating global iommu array failed\n");
3105                 ret = -ENOMEM;
3106                 goto error;
3107         }
3108
3109         deferred_flush = kzalloc(g_num_of_iommus *
3110                 sizeof(struct deferred_flush_tables), GFP_KERNEL);
3111         if (!deferred_flush) {
3112                 ret = -ENOMEM;
3113                 goto free_g_iommus;
3114         }
3115
3116         for_each_active_iommu(iommu, drhd) {
3117                 g_iommus[iommu->seq_id] = iommu;
3118
3119                 intel_iommu_init_qi(iommu);
3120
3121                 ret = iommu_init_domains(iommu);
3122                 if (ret)
3123                         goto free_iommu;
3124
3125                 init_translation_status(iommu);
3126
3127                 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3128                         iommu_disable_translation(iommu);
3129                         clear_translation_pre_enabled(iommu);
3130                         pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3131                                 iommu->name);
3132                 }
3133
3134                 /*
3135                  * TBD:
3136                  * we could share the same root & context tables
3137                  * among all IOMMU's. Need to Split it later.
3138                  */
3139                 ret = iommu_alloc_root_entry(iommu);
3140                 if (ret)
3141                         goto free_iommu;
3142
3143                 if (translation_pre_enabled(iommu)) {
3144                         pr_info("Translation already enabled - trying to copy translation structures\n");
3145
3146                         ret = copy_translation_tables(iommu);
3147                         if (ret) {
3148                                 /*
3149                                  * We found the IOMMU with translation
3150                                  * enabled - but failed to copy over the
3151                                  * old root-entry table. Try to proceed
3152                                  * by disabling translation now and
3153                                  * allocating a clean root-entry table.
3154                                  * This might cause DMAR faults, but
3155                                  * probably the dump will still succeed.
3156                                  */
3157                                 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3158                                        iommu->name);
3159                                 iommu_disable_translation(iommu);
3160                                 clear_translation_pre_enabled(iommu);
3161                         } else {
3162                                 pr_info("Copied translation tables from previous kernel for %s\n",
3163                                         iommu->name);
3164                                 copied_tables = true;
3165                         }
3166                 }
3167
3168                 iommu_flush_write_buffer(iommu);
3169                 iommu_set_root_entry(iommu);
3170                 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3171                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3172
3173                 if (!ecap_pass_through(iommu->ecap))
3174                         hw_pass_through = 0;
3175 #ifdef CONFIG_INTEL_IOMMU_SVM
3176                 if (pasid_enabled(iommu))
3177                         intel_svm_alloc_pasid_tables(iommu);
3178 #endif
3179         }
3180
3181         if (iommu_pass_through)
3182                 iommu_identity_mapping |= IDENTMAP_ALL;
3183
3184 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
3185         iommu_identity_mapping |= IDENTMAP_GFX;
3186 #endif
3187
3188         if (iommu_identity_mapping) {
3189                 ret = si_domain_init(hw_pass_through);
3190                 if (ret)
3191                         goto free_iommu;
3192         }
3193
3194         check_tylersburg_isoch();
3195
3196         /*
3197          * If we copied translations from a previous kernel in the kdump
3198          * case, we can not assign the devices to domains now, as that
3199          * would eliminate the old mappings. So skip this part and defer
3200          * the assignment to device driver initialization time.
3201          */
3202         if (copied_tables)
3203                 goto domains_done;
3204
3205         /*
3206          * If pass through is not set or not enabled, setup context entries for
3207          * identity mappings for rmrr, gfx, and isa and may fall back to static
3208          * identity mapping if iommu_identity_mapping is set.
3209          */
3210         if (iommu_identity_mapping) {
3211                 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
3212                 if (ret) {
3213                         pr_crit("Failed to setup IOMMU pass-through\n");
3214                         goto free_iommu;
3215                 }
3216         }
3217         /*
3218          * For each rmrr
3219          *   for each dev attached to rmrr
3220          *   do
3221          *     locate drhd for dev, alloc domain for dev
3222          *     allocate free domain
3223          *     allocate page table entries for rmrr
3224          *     if context not allocated for bus
3225          *           allocate and init context
3226          *           set present in root table for this bus
3227          *     init context with domain, translation etc
3228          *    endfor
3229          * endfor
3230          */
3231         pr_info("Setting RMRR:\n");
3232         for_each_rmrr_units(rmrr) {
3233                 /* some BIOS lists non-exist devices in DMAR table. */
3234                 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3235                                           i, dev) {
3236                         ret = iommu_prepare_rmrr_dev(rmrr, dev);
3237                         if (ret)
3238                                 pr_err("Mapping reserved region failed\n");
3239                 }
3240         }
3241
3242         iommu_prepare_isa();
3243
3244 domains_done:
3245
3246         /*
3247          * for each drhd
3248          *   enable fault log
3249          *   global invalidate context cache
3250          *   global invalidate iotlb
3251          *   enable translation
3252          */
3253         for_each_iommu(iommu, drhd) {
3254                 if (drhd->ignored) {
3255                         /*
3256                          * we always have to disable PMRs or DMA may fail on
3257                          * this device
3258                          */
3259                         if (force_on)
3260                                 iommu_disable_protect_mem_regions(iommu);
3261                         continue;
3262                 }
3263
3264                 iommu_flush_write_buffer(iommu);
3265
3266 #ifdef CONFIG_INTEL_IOMMU_SVM
3267                 if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
3268                         ret = intel_svm_enable_prq(iommu);
3269                         if (ret)
3270                                 goto free_iommu;
3271                 }
3272 #endif
3273                 ret = dmar_set_interrupt(iommu);
3274                 if (ret)
3275                         goto free_iommu;
3276
3277                 if (!translation_pre_enabled(iommu))
3278                         iommu_enable_translation(iommu);
3279
3280                 iommu_disable_protect_mem_regions(iommu);
3281         }
3282
3283         return 0;
3284
3285 free_iommu:
3286         for_each_active_iommu(iommu, drhd) {
3287                 disable_dmar_iommu(iommu);
3288                 free_dmar_iommu(iommu);
3289         }
3290         kfree(deferred_flush);
3291 free_g_iommus:
3292         kfree(g_iommus);
3293 error:
3294         return ret;
3295 }
3296
3297 /* This takes a number of _MM_ pages, not VTD pages */
3298 static struct iova *intel_alloc_iova(struct device *dev,
3299                                      struct dmar_domain *domain,
3300                                      unsigned long nrpages, uint64_t dma_mask)
3301 {
3302         struct iova *iova = NULL;
3303
3304         /* Restrict dma_mask to the width that the iommu can handle */
3305         dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
3306         /* Ensure we reserve the whole size-aligned region */
3307         nrpages = __roundup_pow_of_two(nrpages);
3308
3309         if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
3310                 /*
3311                  * First try to allocate an io virtual address in
3312                  * DMA_BIT_MASK(32) and if that fails then try allocating
3313                  * from higher range
3314                  */
3315                 iova = alloc_iova(&domain->iovad, nrpages,
3316                                   IOVA_PFN(DMA_BIT_MASK(32)), 1);
3317                 if (iova)
3318                         return iova;
3319         }
3320         iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
3321         if (unlikely(!iova)) {
3322                 pr_err("Allocating %ld-page iova for %s failed",
3323                        nrpages, dev_name(dev));
3324                 return NULL;
3325         }
3326
3327         return iova;
3328 }
3329
3330 static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
3331 {
3332         struct dmar_domain *domain;
3333
3334         domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3335         if (!domain) {
3336                 pr_err("Allocating domain for %s failed\n",
3337                        dev_name(dev));
3338                 return NULL;
3339         }
3340
3341         return domain;
3342 }
3343
3344 static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
3345 {
3346         struct device_domain_info *info;
3347
3348         /* No lock here, assumes no domain exit in normal case */
3349         info = dev->archdata.iommu;
3350         if (likely(info))
3351                 return info->domain;
3352
3353         return __get_valid_domain_for_dev(dev);
3354 }
3355
3356 /* Check if the dev needs to go through non-identity map and unmap process.*/
3357 static int iommu_no_mapping(struct device *dev)
3358 {
3359         int found;
3360
3361         if (iommu_dummy(dev))
3362                 return 1;
3363
3364         if (!iommu_identity_mapping)
3365                 return 0;
3366
3367         found = identity_mapping(dev);
3368         if (found) {
3369                 if (iommu_should_identity_map(dev, 0))
3370                         return 1;
3371                 else {
3372                         /*
3373                          * 32 bit DMA is removed from si_domain and fall back
3374                          * to non-identity mapping.
3375                          */
3376                         dmar_remove_one_dev_info(si_domain, dev);
3377                         pr_info("32bit %s uses non-identity mapping\n",
3378                                 dev_name(dev));
3379                         return 0;
3380                 }
3381         } else {
3382                 /*
3383                  * In case of a detached 64 bit DMA device from vm, the device
3384                  * is put into si_domain for identity mapping.
3385                  */
3386                 if (iommu_should_identity_map(dev, 0)) {
3387                         int ret;
3388                         ret = domain_add_dev_info(si_domain, dev);
3389                         if (!ret) {
3390                                 pr_info("64bit %s uses identity mapping\n",
3391                                         dev_name(dev));
3392                                 return 1;
3393                         }
3394                 }
3395         }
3396
3397         return 0;
3398 }
3399
3400 static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3401                                      size_t size, int dir, u64 dma_mask)
3402 {
3403         struct dmar_domain *domain;
3404         phys_addr_t start_paddr;
3405         struct iova *iova;
3406         int prot = 0;
3407         int ret;
3408         struct intel_iommu *iommu;
3409         unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3410
3411         BUG_ON(dir == DMA_NONE);
3412
3413         if (iommu_no_mapping(dev))
3414                 return paddr;
3415
3416         domain = get_valid_domain_for_dev(dev);
3417         if (!domain)
3418                 return 0;
3419
3420         iommu = domain_get_iommu(domain);
3421         size = aligned_nrpages(paddr, size);
3422
3423         iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3424         if (!iova)
3425                 goto error;
3426
3427         /*
3428          * Check if DMAR supports zero-length reads on write only
3429          * mappings..
3430          */
3431         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3432                         !cap_zlr(iommu->cap))
3433                 prot |= DMA_PTE_READ;
3434         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3435                 prot |= DMA_PTE_WRITE;
3436         /*
3437          * paddr - (paddr + size) might be partial page, we should map the whole
3438          * page.  Note: if two part of one page are separately mapped, we
3439          * might have two guest_addr mapping to the same host paddr, but this
3440          * is not a big problem
3441          */
3442         ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
3443                                  mm_to_dma_pfn(paddr_pfn), size, prot);
3444         if (ret)
3445                 goto error;
3446
3447         /* it's a non-present to present mapping. Only flush if caching mode */
3448         if (cap_caching_mode(iommu->cap))
3449                 iommu_flush_iotlb_psi(iommu, domain,
3450                                       mm_to_dma_pfn(iova->pfn_lo),
3451                                       size, 0, 1);
3452         else
3453                 iommu_flush_write_buffer(iommu);
3454
3455         start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3456         start_paddr += paddr & ~PAGE_MASK;
3457         return start_paddr;
3458
3459 error:
3460         if (iova)
3461                 __free_iova(&domain->iovad, iova);
3462         pr_err("Device %s request: %zx@%llx dir %d --- failed\n",
3463                 dev_name(dev), size, (unsigned long long)paddr, dir);
3464         return 0;
3465 }
3466
3467 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3468                                  unsigned long offset, size_t size,
3469                                  enum dma_data_direction dir,
3470                                  struct dma_attrs *attrs)
3471 {
3472         return __intel_map_single(dev, page_to_phys(page) + offset, size,
3473                                   dir, *dev->dma_mask);
3474 }
3475
3476 static void flush_unmaps(void)
3477 {
3478         int i, j;
3479
3480         timer_on = 0;
3481
3482         /* just flush them all */
3483         for (i = 0; i < g_num_of_iommus; i++) {
3484                 struct intel_iommu *iommu = g_iommus[i];
3485                 if (!iommu)
3486                         continue;
3487
3488                 if (!deferred_flush[i].next)
3489                         continue;
3490
3491                 /* In caching mode, global flushes turn emulation expensive */
3492                 if (!cap_caching_mode(iommu->cap))
3493                         iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3494                                          DMA_TLB_GLOBAL_FLUSH);
3495                 for (j = 0; j < deferred_flush[i].next; j++) {
3496                         unsigned long mask;
3497                         struct iova *iova = deferred_flush[i].iova[j];
3498                         struct dmar_domain *domain = deferred_flush[i].domain[j];
3499
3500                         /* On real hardware multiple invalidations are expensive */
3501                         if (cap_caching_mode(iommu->cap))
3502                                 iommu_flush_iotlb_psi(iommu, domain,
3503                                         iova->pfn_lo, iova_size(iova),
3504                                         !deferred_flush[i].freelist[j], 0);
3505                         else {
3506                                 mask = ilog2(mm_to_dma_pfn(iova_size(iova)));
3507                                 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3508                                                 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3509                         }
3510                         __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
3511                         if (deferred_flush[i].freelist[j])
3512                                 dma_free_pagelist(deferred_flush[i].freelist[j]);
3513                 }
3514                 deferred_flush[i].next = 0;
3515         }
3516
3517         list_size = 0;
3518 }
3519
3520 static void flush_unmaps_timeout(unsigned long data)
3521 {
3522         unsigned long flags;
3523
3524         spin_lock_irqsave(&async_umap_flush_lock, flags);
3525         flush_unmaps();
3526         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3527 }
3528
3529 static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
3530 {
3531         unsigned long flags;
3532         int next, iommu_id;
3533         struct intel_iommu *iommu;
3534
3535         spin_lock_irqsave(&async_umap_flush_lock, flags);
3536         if (list_size == HIGH_WATER_MARK)
3537                 flush_unmaps();
3538
3539         iommu = domain_get_iommu(dom);
3540         iommu_id = iommu->seq_id;
3541
3542         next = deferred_flush[iommu_id].next;
3543         deferred_flush[iommu_id].domain[next] = dom;
3544         deferred_flush[iommu_id].iova[next] = iova;
3545         deferred_flush[iommu_id].freelist[next] = freelist;
3546         deferred_flush[iommu_id].next++;
3547
3548         if (!timer_on) {
3549                 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3550                 timer_on = 1;
3551         }
3552         list_size++;
3553         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3554 }
3555
3556 static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
3557 {
3558         struct dmar_domain *domain;
3559         unsigned long start_pfn, last_pfn;
3560         struct iova *iova;
3561         struct intel_iommu *iommu;
3562         struct page *freelist;
3563
3564         if (iommu_no_mapping(dev))
3565                 return;
3566
3567         domain = find_domain(dev);
3568         BUG_ON(!domain);
3569
3570         iommu = domain_get_iommu(domain);
3571
3572         iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
3573         if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3574                       (unsigned long long)dev_addr))
3575                 return;
3576
3577         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3578         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3579
3580         pr_debug("Device %s unmapping: pfn %lx-%lx\n",
3581                  dev_name(dev), start_pfn, last_pfn);
3582
3583         freelist = domain_unmap(domain, start_pfn, last_pfn);
3584
3585         if (intel_iommu_strict) {
3586                 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
3587                                       last_pfn - start_pfn + 1, !freelist, 0);
3588                 /* free iova */
3589                 __free_iova(&domain->iovad, iova);
3590                 dma_free_pagelist(freelist);
3591         } else {
3592                 add_unmap(domain, iova, freelist);
3593                 /*
3594                  * queue up the release of the unmap to save the 1/6th of the
3595                  * cpu used up by the iotlb flush operation...
3596                  */
3597         }
3598 }
3599
3600 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3601                              size_t size, enum dma_data_direction dir,
3602                              struct dma_attrs *attrs)
3603 {
3604         intel_unmap(dev, dev_addr);
3605 }
3606
3607 static void *intel_alloc_coherent(struct device *dev, size_t size,
3608                                   dma_addr_t *dma_handle, gfp_t flags,
3609                                   struct dma_attrs *attrs)
3610 {
3611         struct page *page = NULL;
3612         int order;
3613
3614         size = PAGE_ALIGN(size);
3615         order = get_order(size);
3616
3617         if (!iommu_no_mapping(dev))
3618                 flags &= ~(GFP_DMA | GFP_DMA32);
3619         else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3620                 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
3621                         flags |= GFP_DMA;
3622                 else
3623                         flags |= GFP_DMA32;
3624         }
3625
3626         if (flags & __GFP_WAIT) {
3627                 unsigned int count = size >> PAGE_SHIFT;
3628
3629                 page = dma_alloc_from_contiguous(dev, count, order);
3630                 if (page && iommu_no_mapping(dev) &&
3631                     page_to_phys(page) + size > dev->coherent_dma_mask) {
3632                         dma_release_from_contiguous(dev, page, count);
3633                         page = NULL;
3634                 }
3635         }
3636
3637         if (!page)
3638                 page = alloc_pages(flags, order);
3639         if (!page)
3640                 return NULL;
3641         memset(page_address(page), 0, size);
3642
3643         *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3644                                          DMA_BIDIRECTIONAL,
3645                                          dev->coherent_dma_mask);
3646         if (*dma_handle)
3647                 return page_address(page);
3648         if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3649                 __free_pages(page, order);
3650
3651         return NULL;
3652 }
3653
3654 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3655                                 dma_addr_t dma_handle, struct dma_attrs *attrs)
3656 {
3657         int order;
3658         struct page *page = virt_to_page(vaddr);
3659
3660         size = PAGE_ALIGN(size);
3661         order = get_order(size);
3662
3663         intel_unmap(dev, dma_handle);
3664         if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3665                 __free_pages(page, order);
3666 }
3667
3668 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3669                            int nelems, enum dma_data_direction dir,
3670                            struct dma_attrs *attrs)
3671 {
3672         intel_unmap(dev, sglist[0].dma_address);
3673 }
3674
3675 static int intel_nontranslate_map_sg(struct device *hddev,
3676         struct scatterlist *sglist, int nelems, int dir)
3677 {
3678         int i;
3679         struct scatterlist *sg;
3680
3681         for_each_sg(sglist, sg, nelems, i) {
3682                 BUG_ON(!sg_page(sg));
3683                 sg->dma_address = sg_phys(sg);
3684                 sg->dma_length = sg->length;
3685         }
3686         return nelems;
3687 }
3688
3689 static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3690                         enum dma_data_direction dir, struct dma_attrs *attrs)
3691 {
3692         int i;
3693         struct dmar_domain *domain;
3694         size_t size = 0;
3695         int prot = 0;
3696         struct iova *iova = NULL;
3697         int ret;
3698         struct scatterlist *sg;
3699         unsigned long start_vpfn;
3700         struct intel_iommu *iommu;
3701
3702         BUG_ON(dir == DMA_NONE);
3703         if (iommu_no_mapping(dev))
3704                 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
3705
3706         domain = get_valid_domain_for_dev(dev);
3707         if (!domain)
3708                 return 0;
3709
3710         iommu = domain_get_iommu(domain);
3711
3712         for_each_sg(sglist, sg, nelems, i)
3713                 size += aligned_nrpages(sg->offset, sg->length);
3714
3715         iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3716                                 *dev->dma_mask);
3717         if (!iova) {
3718                 sglist->dma_length = 0;
3719                 return 0;
3720         }
3721
3722         /*
3723          * Check if DMAR supports zero-length reads on write only
3724          * mappings..
3725          */
3726         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3727                         !cap_zlr(iommu->cap))
3728                 prot |= DMA_PTE_READ;
3729         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3730                 prot |= DMA_PTE_WRITE;
3731
3732         start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3733
3734         ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3735         if (unlikely(ret)) {
3736                 dma_pte_free_pagetable(domain, start_vpfn,
3737                                        start_vpfn + size - 1);
3738                 __free_iova(&domain->iovad, iova);
3739                 return 0;
3740         }
3741
3742         /* it's a non-present to present mapping. Only flush if caching mode */
3743         if (cap_caching_mode(iommu->cap))
3744                 iommu_flush_iotlb_psi(iommu, domain, start_vpfn, size, 0, 1);
3745         else
3746                 iommu_flush_write_buffer(iommu);
3747
3748         return nelems;
3749 }
3750
3751 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3752 {
3753         return !dma_addr;
3754 }
3755
3756 struct dma_map_ops intel_dma_ops = {
3757         .alloc = intel_alloc_coherent,
3758         .free = intel_free_coherent,
3759         .map_sg = intel_map_sg,
3760         .unmap_sg = intel_unmap_sg,
3761         .map_page = intel_map_page,
3762         .unmap_page = intel_unmap_page,
3763         .mapping_error = intel_mapping_error,
3764 };
3765
3766 static inline int iommu_domain_cache_init(void)
3767 {
3768         int ret = 0;
3769
3770         iommu_domain_cache = kmem_cache_create("iommu_domain",
3771                                          sizeof(struct dmar_domain),
3772                                          0,
3773                                          SLAB_HWCACHE_ALIGN,
3774
3775                                          NULL);
3776         if (!iommu_domain_cache) {
3777                 pr_err("Couldn't create iommu_domain cache\n");
3778                 ret = -ENOMEM;
3779         }
3780
3781         return ret;
3782 }
3783
3784 static inline int iommu_devinfo_cache_init(void)
3785 {
3786         int ret = 0;
3787
3788         iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3789                                          sizeof(struct device_domain_info),
3790                                          0,
3791                                          SLAB_HWCACHE_ALIGN,
3792                                          NULL);
3793         if (!iommu_devinfo_cache) {
3794                 pr_err("Couldn't create devinfo cache\n");
3795                 ret = -ENOMEM;
3796         }
3797
3798         return ret;
3799 }
3800
3801 static int __init iommu_init_mempool(void)
3802 {
3803         int ret;
3804         ret = iova_cache_get();
3805         if (ret)
3806                 return ret;
3807
3808         ret = iommu_domain_cache_init();
3809         if (ret)
3810                 goto domain_error;
3811
3812         ret = iommu_devinfo_cache_init();
3813         if (!ret)
3814                 return ret;
3815
3816         kmem_cache_destroy(iommu_domain_cache);
3817 domain_error:
3818         iova_cache_put();
3819
3820         return -ENOMEM;
3821 }
3822
3823 static void __init iommu_exit_mempool(void)
3824 {
3825         kmem_cache_destroy(iommu_devinfo_cache);
3826         kmem_cache_destroy(iommu_domain_cache);
3827         iova_cache_put();
3828 }
3829
3830 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3831 {
3832         struct dmar_drhd_unit *drhd;
3833         u32 vtbar;
3834         int rc;
3835
3836         /* We know that this device on this chipset has its own IOMMU.
3837          * If we find it under a different IOMMU, then the BIOS is lying
3838          * to us. Hope that the IOMMU for this device is actually
3839          * disabled, and it needs no translation...
3840          */
3841         rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3842         if (rc) {
3843                 /* "can't" happen */
3844                 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3845                 return;
3846         }
3847         vtbar &= 0xffff0000;
3848
3849         /* we know that the this iommu should be at offset 0xa000 from vtbar */
3850         drhd = dmar_find_matched_drhd_unit(pdev);
3851         if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3852                             TAINT_FIRMWARE_WORKAROUND,
3853                             "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3854                 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3855 }
3856 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3857
3858 static void __init init_no_remapping_devices(void)
3859 {
3860         struct dmar_drhd_unit *drhd;
3861         struct device *dev;
3862         int i;
3863
3864         for_each_drhd_unit(drhd) {
3865                 if (!drhd->include_all) {
3866                         for_each_active_dev_scope(drhd->devices,
3867                                                   drhd->devices_cnt, i, dev)
3868                                 break;
3869                         /* ignore DMAR unit if no devices exist */
3870                         if (i == drhd->devices_cnt)
3871                                 drhd->ignored = 1;
3872                 }
3873         }
3874
3875         for_each_active_drhd_unit(drhd) {
3876                 if (drhd->include_all)
3877                         continue;
3878
3879                 for_each_active_dev_scope(drhd->devices,
3880                                           drhd->devices_cnt, i, dev)
3881                         if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
3882                                 break;
3883                 if (i < drhd->devices_cnt)
3884                         continue;
3885
3886                 /* This IOMMU has *only* gfx devices. Either bypass it or
3887                    set the gfx_mapped flag, as appropriate */
3888                 if (dmar_map_gfx) {
3889                         intel_iommu_gfx_mapped = 1;
3890                 } else {
3891                         drhd->ignored = 1;
3892                         for_each_active_dev_scope(drhd->devices,
3893                                                   drhd->devices_cnt, i, dev)
3894                                 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3895                 }
3896         }
3897 }
3898
3899 #ifdef CONFIG_SUSPEND
3900 static int init_iommu_hw(void)
3901 {
3902         struct dmar_drhd_unit *drhd;
3903         struct intel_iommu *iommu = NULL;
3904
3905         for_each_active_iommu(iommu, drhd)
3906                 if (iommu->qi)
3907                         dmar_reenable_qi(iommu);
3908
3909         for_each_iommu(iommu, drhd) {
3910                 if (drhd->ignored) {
3911                         /*
3912                          * we always have to disable PMRs or DMA may fail on
3913                          * this device
3914                          */
3915                         if (force_on)
3916                                 iommu_disable_protect_mem_regions(iommu);
3917                         continue;
3918                 }
3919         
3920                 iommu_flush_write_buffer(iommu);
3921
3922                 iommu_set_root_entry(iommu);
3923
3924                 iommu->flush.flush_context(iommu, 0, 0, 0,
3925                                            DMA_CCMD_GLOBAL_INVL);
3926                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3927                 iommu_enable_translation(iommu);
3928                 iommu_disable_protect_mem_regions(iommu);
3929         }
3930
3931         return 0;
3932 }
3933
3934 static void iommu_flush_all(void)
3935 {
3936         struct dmar_drhd_unit *drhd;
3937         struct intel_iommu *iommu;
3938
3939         for_each_active_iommu(iommu, drhd) {
3940                 iommu->flush.flush_context(iommu, 0, 0, 0,
3941                                            DMA_CCMD_GLOBAL_INVL);
3942                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3943                                          DMA_TLB_GLOBAL_FLUSH);
3944         }
3945 }
3946
3947 static int iommu_suspend(void)
3948 {
3949         struct dmar_drhd_unit *drhd;
3950         struct intel_iommu *iommu = NULL;
3951         unsigned long flag;
3952
3953         for_each_active_iommu(iommu, drhd) {
3954                 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3955                                                  GFP_ATOMIC);
3956                 if (!iommu->iommu_state)
3957                         goto nomem;
3958         }
3959
3960         iommu_flush_all();
3961
3962         for_each_active_iommu(iommu, drhd) {
3963                 iommu_disable_translation(iommu);
3964
3965                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3966
3967                 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3968                         readl(iommu->reg + DMAR_FECTL_REG);
3969                 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3970                         readl(iommu->reg + DMAR_FEDATA_REG);
3971                 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3972                         readl(iommu->reg + DMAR_FEADDR_REG);
3973                 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3974                         readl(iommu->reg + DMAR_FEUADDR_REG);
3975
3976                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3977         }
3978         return 0;
3979
3980 nomem:
3981         for_each_active_iommu(iommu, drhd)
3982                 kfree(iommu->iommu_state);
3983
3984         return -ENOMEM;
3985 }
3986
3987 static void iommu_resume(void)
3988 {
3989         struct dmar_drhd_unit *drhd;
3990         struct intel_iommu *iommu = NULL;
3991         unsigned long flag;
3992
3993         if (init_iommu_hw()) {
3994                 if (force_on)
3995                         panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3996                 else
3997                         WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3998                 return;
3999         }
4000
4001         for_each_active_iommu(iommu, drhd) {
4002
4003                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
4004
4005                 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4006                         iommu->reg + DMAR_FECTL_REG);
4007                 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4008                         iommu->reg + DMAR_FEDATA_REG);
4009                 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4010                         iommu->reg + DMAR_FEADDR_REG);
4011                 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4012                         iommu->reg + DMAR_FEUADDR_REG);
4013
4014                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4015         }
4016
4017         for_each_active_iommu(iommu, drhd)
4018                 kfree(iommu->iommu_state);
4019 }
4020
4021 static struct syscore_ops iommu_syscore_ops = {
4022         .resume         = iommu_resume,
4023         .suspend        = iommu_suspend,
4024 };
4025
4026 static void __init init_iommu_pm_ops(void)
4027 {
4028         register_syscore_ops(&iommu_syscore_ops);
4029 }
4030
4031 #else
4032 static inline void init_iommu_pm_ops(void) {}
4033 #endif  /* CONFIG_PM */
4034
4035
4036 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
4037 {
4038         struct acpi_dmar_reserved_memory *rmrr;
4039         struct dmar_rmrr_unit *rmrru;
4040
4041         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4042         if (!rmrru)
4043                 return -ENOMEM;
4044
4045         rmrru->hdr = header;
4046         rmrr = (struct acpi_dmar_reserved_memory *)header;
4047         rmrru->base_address = rmrr->base_address;
4048         rmrru->end_address = rmrr->end_address;
4049         rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4050                                 ((void *)rmrr) + rmrr->header.length,
4051                                 &rmrru->devices_cnt);
4052         if (rmrru->devices_cnt && rmrru->devices == NULL) {
4053                 kfree(rmrru);
4054                 return -ENOMEM;
4055         }
4056
4057         list_add(&rmrru->list, &dmar_rmrr_units);
4058
4059         return 0;
4060 }
4061
4062 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4063 {
4064         struct dmar_atsr_unit *atsru;
4065         struct acpi_dmar_atsr *tmp;
4066
4067         list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4068                 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4069                 if (atsr->segment != tmp->segment)
4070                         continue;
4071                 if (atsr->header.length != tmp->header.length)
4072                         continue;
4073                 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4074                         return atsru;
4075         }
4076
4077         return NULL;
4078 }
4079
4080 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4081 {
4082         struct acpi_dmar_atsr *atsr;
4083         struct dmar_atsr_unit *atsru;
4084
4085         if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled)
4086                 return 0;
4087
4088         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4089         atsru = dmar_find_atsr(atsr);
4090         if (atsru)
4091                 return 0;
4092
4093         atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
4094         if (!atsru)
4095                 return -ENOMEM;
4096
4097         /*
4098          * If memory is allocated from slab by ACPI _DSM method, we need to
4099          * copy the memory content because the memory buffer will be freed
4100          * on return.
4101          */
4102         atsru->hdr = (void *)(atsru + 1);
4103         memcpy(atsru->hdr, hdr, hdr->length);
4104         atsru->include_all = atsr->flags & 0x1;
4105         if (!atsru->include_all) {
4106                 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4107                                 (void *)atsr + atsr->header.length,
4108                                 &atsru->devices_cnt);
4109                 if (atsru->devices_cnt && atsru->devices == NULL) {
4110                         kfree(atsru);
4111                         return -ENOMEM;
4112                 }
4113         }
4114
4115         list_add_rcu(&atsru->list, &dmar_atsr_units);
4116
4117         return 0;
4118 }
4119
4120 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4121 {
4122         dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4123         kfree(atsru);
4124 }
4125
4126 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4127 {
4128         struct acpi_dmar_atsr *atsr;
4129         struct dmar_atsr_unit *atsru;
4130
4131         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4132         atsru = dmar_find_atsr(atsr);
4133         if (atsru) {
4134                 list_del_rcu(&atsru->list);
4135                 synchronize_rcu();
4136                 intel_iommu_free_atsr(atsru);
4137         }
4138
4139         return 0;
4140 }
4141
4142 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4143 {
4144         int i;
4145         struct device *dev;
4146         struct acpi_dmar_atsr *atsr;
4147         struct dmar_atsr_unit *atsru;
4148
4149         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4150         atsru = dmar_find_atsr(atsr);
4151         if (!atsru)
4152                 return 0;
4153
4154         if (!atsru->include_all && atsru->devices && atsru->devices_cnt)
4155                 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4156                                           i, dev)
4157                         return -EBUSY;
4158
4159         return 0;
4160 }
4161
4162 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4163 {
4164         int sp, ret = 0;
4165         struct intel_iommu *iommu = dmaru->iommu;
4166
4167         if (g_iommus[iommu->seq_id])
4168                 return 0;
4169
4170         if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
4171                 pr_warn("%s: Doesn't support hardware pass through.\n",
4172                         iommu->name);
4173                 return -ENXIO;
4174         }
4175         if (!ecap_sc_support(iommu->ecap) &&
4176             domain_update_iommu_snooping(iommu)) {
4177                 pr_warn("%s: Doesn't support snooping.\n",
4178                         iommu->name);
4179                 return -ENXIO;
4180         }
4181         sp = domain_update_iommu_superpage(iommu) - 1;
4182         if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
4183                 pr_warn("%s: Doesn't support large page.\n",
4184                         iommu->name);
4185                 return -ENXIO;
4186         }
4187
4188         /*
4189          * Disable translation if already enabled prior to OS handover.
4190          */
4191         if (iommu->gcmd & DMA_GCMD_TE)
4192                 iommu_disable_translation(iommu);
4193
4194         g_iommus[iommu->seq_id] = iommu;
4195         ret = iommu_init_domains(iommu);
4196         if (ret == 0)
4197                 ret = iommu_alloc_root_entry(iommu);
4198         if (ret)
4199                 goto out;
4200
4201 #ifdef CONFIG_INTEL_IOMMU_SVM
4202         if (pasid_enabled(iommu))
4203                 intel_svm_alloc_pasid_tables(iommu);
4204 #endif
4205
4206         if (dmaru->ignored) {
4207                 /*
4208                  * we always have to disable PMRs or DMA may fail on this device
4209                  */
4210                 if (force_on)
4211                         iommu_disable_protect_mem_regions(iommu);
4212                 return 0;
4213         }
4214
4215         intel_iommu_init_qi(iommu);
4216         iommu_flush_write_buffer(iommu);
4217
4218 #ifdef CONFIG_INTEL_IOMMU_SVM
4219         if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
4220                 ret = intel_svm_enable_prq(iommu);
4221                 if (ret)
4222                         goto disable_iommu;
4223         }
4224 #endif
4225         ret = dmar_set_interrupt(iommu);
4226         if (ret)
4227                 goto disable_iommu;
4228
4229         iommu_set_root_entry(iommu);
4230         iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4231         iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4232         iommu_enable_translation(iommu);
4233
4234         iommu_disable_protect_mem_regions(iommu);
4235         return 0;
4236
4237 disable_iommu:
4238         disable_dmar_iommu(iommu);
4239 out:
4240         free_dmar_iommu(iommu);
4241         return ret;
4242 }
4243
4244 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4245 {
4246         int ret = 0;
4247         struct intel_iommu *iommu = dmaru->iommu;
4248
4249         if (!intel_iommu_enabled)
4250                 return 0;
4251         if (iommu == NULL)
4252                 return -EINVAL;
4253
4254         if (insert) {
4255                 ret = intel_iommu_add(dmaru);
4256         } else {
4257                 disable_dmar_iommu(iommu);
4258                 free_dmar_iommu(iommu);
4259         }
4260
4261         return ret;
4262 }
4263
4264 static void intel_iommu_free_dmars(void)
4265 {
4266         struct dmar_rmrr_unit *rmrru, *rmrr_n;
4267         struct dmar_atsr_unit *atsru, *atsr_n;
4268
4269         list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4270                 list_del(&rmrru->list);
4271                 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4272                 kfree(rmrru);
4273         }
4274
4275         list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4276                 list_del(&atsru->list);
4277                 intel_iommu_free_atsr(atsru);
4278         }
4279 }
4280
4281 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4282 {
4283         int i, ret = 1;
4284         struct pci_bus *bus;
4285         struct pci_dev *bridge = NULL;
4286         struct device *tmp;
4287         struct acpi_dmar_atsr *atsr;
4288         struct dmar_atsr_unit *atsru;
4289
4290         dev = pci_physfn(dev);
4291         for (bus = dev->bus; bus; bus = bus->parent) {
4292                 bridge = bus->self;
4293                 /* If it's an integrated device, allow ATS */
4294                 if (!bridge)
4295                         return 1;
4296                 /* Connected via non-PCIe: no ATS */
4297                 if (!pci_is_pcie(bridge) ||
4298                     pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
4299                         return 0;
4300                 /* If we found the root port, look it up in the ATSR */
4301                 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
4302                         break;
4303         }
4304
4305         rcu_read_lock();
4306         list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4307                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4308                 if (atsr->segment != pci_domain_nr(dev->bus))
4309                         continue;
4310
4311                 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
4312                         if (tmp == &bridge->dev)
4313                                 goto out;
4314
4315                 if (atsru->include_all)
4316                         goto out;
4317         }
4318         ret = 0;
4319 out:
4320         rcu_read_unlock();
4321
4322         return ret;
4323 }
4324
4325 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4326 {
4327         int ret = 0;
4328         struct dmar_rmrr_unit *rmrru;
4329         struct dmar_atsr_unit *atsru;
4330         struct acpi_dmar_atsr *atsr;
4331         struct acpi_dmar_reserved_memory *rmrr;
4332
4333         if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
4334                 return 0;
4335
4336         list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4337                 rmrr = container_of(rmrru->hdr,
4338                                     struct acpi_dmar_reserved_memory, header);
4339                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4340                         ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4341                                 ((void *)rmrr) + rmrr->header.length,
4342                                 rmrr->segment, rmrru->devices,
4343                                 rmrru->devices_cnt);
4344                         if(ret < 0)
4345                                 return ret;
4346                 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
4347                         dmar_remove_dev_scope(info, rmrr->segment,
4348                                 rmrru->devices, rmrru->devices_cnt);
4349                 }
4350         }
4351
4352         list_for_each_entry(atsru, &dmar_atsr_units, list) {
4353                 if (atsru->include_all)
4354                         continue;
4355
4356                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4357                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4358                         ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4359                                         (void *)atsr + atsr->header.length,
4360                                         atsr->segment, atsru->devices,
4361                                         atsru->devices_cnt);
4362                         if (ret > 0)
4363                                 break;
4364                         else if(ret < 0)
4365                                 return ret;
4366                 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
4367                         if (dmar_remove_dev_scope(info, atsr->segment,
4368                                         atsru->devices, atsru->devices_cnt))
4369                                 break;
4370                 }
4371         }
4372
4373         return 0;
4374 }
4375
4376 /*
4377  * Here we only respond to action of unbound device from driver.
4378  *
4379  * Added device is not attached to its DMAR domain here yet. That will happen
4380  * when mapping the device to iova.
4381  */
4382 static int device_notifier(struct notifier_block *nb,
4383                                   unsigned long action, void *data)
4384 {
4385         struct device *dev = data;
4386         struct dmar_domain *domain;
4387
4388         if (iommu_dummy(dev))
4389                 return 0;
4390
4391         if (action != BUS_NOTIFY_REMOVED_DEVICE)
4392                 return 0;
4393
4394         domain = find_domain(dev);
4395         if (!domain)
4396                 return 0;
4397
4398         dmar_remove_one_dev_info(domain, dev);
4399         if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
4400                 domain_exit(domain);
4401
4402         return 0;
4403 }
4404
4405 static struct notifier_block device_nb = {
4406         .notifier_call = device_notifier,
4407 };
4408
4409 static int intel_iommu_memory_notifier(struct notifier_block *nb,
4410                                        unsigned long val, void *v)
4411 {
4412         struct memory_notify *mhp = v;
4413         unsigned long long start, end;
4414         unsigned long start_vpfn, last_vpfn;
4415
4416         switch (val) {
4417         case MEM_GOING_ONLINE:
4418                 start = mhp->start_pfn << PAGE_SHIFT;
4419                 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4420                 if (iommu_domain_identity_map(si_domain, start, end)) {
4421                         pr_warn("Failed to build identity map for [%llx-%llx]\n",
4422                                 start, end);
4423                         return NOTIFY_BAD;
4424                 }
4425                 break;
4426
4427         case MEM_OFFLINE:
4428         case MEM_CANCEL_ONLINE:
4429                 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4430                 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4431                 while (start_vpfn <= last_vpfn) {
4432                         struct iova *iova;
4433                         struct dmar_drhd_unit *drhd;
4434                         struct intel_iommu *iommu;
4435                         struct page *freelist;
4436
4437                         iova = find_iova(&si_domain->iovad, start_vpfn);
4438                         if (iova == NULL) {
4439                                 pr_debug("Failed get IOVA for PFN %lx\n",
4440                                          start_vpfn);
4441                                 break;
4442                         }
4443
4444                         iova = split_and_remove_iova(&si_domain->iovad, iova,
4445                                                      start_vpfn, last_vpfn);
4446                         if (iova == NULL) {
4447                                 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
4448                                         start_vpfn, last_vpfn);
4449                                 return NOTIFY_BAD;
4450                         }
4451
4452                         freelist = domain_unmap(si_domain, iova->pfn_lo,
4453                                                iova->pfn_hi);
4454
4455                         rcu_read_lock();
4456                         for_each_active_iommu(iommu, drhd)
4457                                 iommu_flush_iotlb_psi(iommu, si_domain,
4458                                         iova->pfn_lo, iova_size(iova),
4459                                         !freelist, 0);
4460                         rcu_read_unlock();
4461                         dma_free_pagelist(freelist);
4462
4463                         start_vpfn = iova->pfn_hi + 1;
4464                         free_iova_mem(iova);
4465                 }
4466                 break;
4467         }
4468
4469         return NOTIFY_OK;
4470 }
4471
4472 static struct notifier_block intel_iommu_memory_nb = {
4473         .notifier_call = intel_iommu_memory_notifier,
4474         .priority = 0
4475 };
4476
4477
4478 static ssize_t intel_iommu_show_version(struct device *dev,
4479                                         struct device_attribute *attr,
4480                                         char *buf)
4481 {
4482         struct intel_iommu *iommu = dev_get_drvdata(dev);
4483         u32 ver = readl(iommu->reg + DMAR_VER_REG);
4484         return sprintf(buf, "%d:%d\n",
4485                        DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4486 }
4487 static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4488
4489 static ssize_t intel_iommu_show_address(struct device *dev,
4490                                         struct device_attribute *attr,
4491                                         char *buf)
4492 {
4493         struct intel_iommu *iommu = dev_get_drvdata(dev);
4494         return sprintf(buf, "%llx\n", iommu->reg_phys);
4495 }
4496 static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4497
4498 static ssize_t intel_iommu_show_cap(struct device *dev,
4499                                     struct device_attribute *attr,
4500                                     char *buf)
4501 {
4502         struct intel_iommu *iommu = dev_get_drvdata(dev);
4503         return sprintf(buf, "%llx\n", iommu->cap);
4504 }
4505 static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4506
4507 static ssize_t intel_iommu_show_ecap(struct device *dev,
4508                                     struct device_attribute *attr,
4509                                     char *buf)
4510 {
4511         struct intel_iommu *iommu = dev_get_drvdata(dev);
4512         return sprintf(buf, "%llx\n", iommu->ecap);
4513 }
4514 static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4515
4516 static ssize_t intel_iommu_show_ndoms(struct device *dev,
4517                                       struct device_attribute *attr,
4518                                       char *buf)
4519 {
4520         struct intel_iommu *iommu = dev_get_drvdata(dev);
4521         return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4522 }
4523 static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4524
4525 static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4526                                            struct device_attribute *attr,
4527                                            char *buf)
4528 {
4529         struct intel_iommu *iommu = dev_get_drvdata(dev);
4530         return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4531                                                   cap_ndoms(iommu->cap)));
4532 }
4533 static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4534
4535 static struct attribute *intel_iommu_attrs[] = {
4536         &dev_attr_version.attr,
4537         &dev_attr_address.attr,
4538         &dev_attr_cap.attr,
4539         &dev_attr_ecap.attr,
4540         &dev_attr_domains_supported.attr,
4541         &dev_attr_domains_used.attr,
4542         NULL,
4543 };
4544
4545 static struct attribute_group intel_iommu_group = {
4546         .name = "intel-iommu",
4547         .attrs = intel_iommu_attrs,
4548 };
4549
4550 const struct attribute_group *intel_iommu_groups[] = {
4551         &intel_iommu_group,
4552         NULL,
4553 };
4554
4555 int __init intel_iommu_init(void)
4556 {
4557         int ret = -ENODEV;
4558         struct dmar_drhd_unit *drhd;
4559         struct intel_iommu *iommu;
4560
4561         /* VT-d is required for a TXT/tboot launch, so enforce that */
4562         force_on = tboot_force_iommu();
4563
4564         if (iommu_init_mempool()) {
4565                 if (force_on)
4566                         panic("tboot: Failed to initialize iommu memory\n");
4567                 return -ENOMEM;
4568         }
4569
4570         down_write(&dmar_global_lock);
4571         if (dmar_table_init()) {
4572                 if (force_on)
4573                         panic("tboot: Failed to initialize DMAR table\n");
4574                 goto out_free_dmar;
4575         }
4576
4577         if (dmar_dev_scope_init() < 0) {
4578                 if (force_on)
4579                         panic("tboot: Failed to initialize DMAR device scope\n");
4580                 goto out_free_dmar;
4581         }
4582
4583         if (no_iommu || dmar_disabled)
4584                 goto out_free_dmar;
4585
4586         if (list_empty(&dmar_rmrr_units))
4587                 pr_info("No RMRR found\n");
4588
4589         if (list_empty(&dmar_atsr_units))
4590                 pr_info("No ATSR found\n");
4591
4592         if (dmar_init_reserved_ranges()) {
4593                 if (force_on)
4594                         panic("tboot: Failed to reserve iommu ranges\n");
4595                 goto out_free_reserved_range;
4596         }
4597
4598         init_no_remapping_devices();
4599
4600         ret = init_dmars();
4601         if (ret) {
4602                 if (force_on)
4603                         panic("tboot: Failed to initialize DMARs\n");
4604                 pr_err("Initialization failed\n");
4605                 goto out_free_reserved_range;
4606         }
4607         up_write(&dmar_global_lock);
4608         pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
4609
4610         init_timer(&unmap_timer);
4611 #ifdef CONFIG_SWIOTLB
4612         swiotlb = 0;
4613 #endif
4614         dma_ops = &intel_dma_ops;
4615
4616         init_iommu_pm_ops();
4617
4618         for_each_active_iommu(iommu, drhd)
4619                 iommu->iommu_dev = iommu_device_create(NULL, iommu,
4620                                                        intel_iommu_groups,
4621                                                        "%s", iommu->name);
4622
4623         bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4624         bus_register_notifier(&pci_bus_type, &device_nb);
4625         if (si_domain && !hw_pass_through)
4626                 register_memory_notifier(&intel_iommu_memory_nb);
4627
4628         intel_iommu_enabled = 1;
4629
4630         return 0;
4631
4632 out_free_reserved_range:
4633         put_iova_domain(&reserved_iova_list);
4634 out_free_dmar:
4635         intel_iommu_free_dmars();
4636         up_write(&dmar_global_lock);
4637         iommu_exit_mempool();
4638         return ret;
4639 }
4640
4641 static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4642 {
4643         struct intel_iommu *iommu = opaque;
4644
4645         domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4646         return 0;
4647 }
4648
4649 /*
4650  * NB - intel-iommu lacks any sort of reference counting for the users of
4651  * dependent devices.  If multiple endpoints have intersecting dependent
4652  * devices, unbinding the driver from any one of them will possibly leave
4653  * the others unable to operate.
4654  */
4655 static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
4656 {
4657         if (!iommu || !dev || !dev_is_pci(dev))
4658                 return;
4659
4660         pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
4661 }
4662
4663 static void __dmar_remove_one_dev_info(struct device_domain_info *info)
4664 {
4665         struct intel_iommu *iommu;
4666         unsigned long flags;
4667
4668         assert_spin_locked(&device_domain_lock);
4669
4670         if (WARN_ON(!info))
4671                 return;
4672
4673         iommu = info->iommu;
4674
4675         if (info->dev) {
4676                 iommu_disable_dev_iotlb(info);
4677                 domain_context_clear(iommu, info->dev);
4678         }
4679
4680         unlink_domain_info(info);
4681
4682         spin_lock_irqsave(&iommu->lock, flags);
4683         domain_detach_iommu(info->domain, iommu);
4684         spin_unlock_irqrestore(&iommu->lock, flags);
4685
4686         free_devinfo_mem(info);
4687 }
4688
4689 static void dmar_remove_one_dev_info(struct dmar_domain *domain,
4690                                      struct device *dev)
4691 {
4692         struct device_domain_info *info;
4693         unsigned long flags;
4694
4695         spin_lock_irqsave(&device_domain_lock, flags);
4696         info = dev->archdata.iommu;
4697         __dmar_remove_one_dev_info(info);
4698         spin_unlock_irqrestore(&device_domain_lock, flags);
4699 }
4700
4701 static int md_domain_init(struct dmar_domain *domain, int guest_width)
4702 {
4703         int adjust_width;
4704
4705         init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
4706                         DMA_32BIT_PFN);
4707         domain_reserve_special_ranges(domain);
4708
4709         /* calculate AGAW */
4710         domain->gaw = guest_width;
4711         adjust_width = guestwidth_to_adjustwidth(guest_width);
4712         domain->agaw = width_to_agaw(adjust_width);
4713
4714         domain->iommu_coherency = 0;
4715         domain->iommu_snooping = 0;
4716         domain->iommu_superpage = 0;
4717         domain->max_addr = 0;
4718
4719         /* always allocate the top pgd */
4720         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
4721         if (!domain->pgd)
4722                 return -ENOMEM;
4723         domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4724         return 0;
4725 }
4726
4727 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
4728 {
4729         struct dmar_domain *dmar_domain;
4730         struct iommu_domain *domain;
4731
4732         if (type != IOMMU_DOMAIN_UNMANAGED)
4733                 return NULL;
4734
4735         dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
4736         if (!dmar_domain) {
4737                 pr_err("Can't allocate dmar_domain\n");
4738                 return NULL;
4739         }
4740         if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4741                 pr_err("Domain initialization failed\n");
4742                 domain_exit(dmar_domain);
4743                 return NULL;
4744         }
4745         domain_update_iommu_cap(dmar_domain);
4746
4747         domain = &dmar_domain->domain;
4748         domain->geometry.aperture_start = 0;
4749         domain->geometry.aperture_end   = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4750         domain->geometry.force_aperture = true;
4751
4752         return domain;
4753 }
4754
4755 static void intel_iommu_domain_free(struct iommu_domain *domain)
4756 {
4757         domain_exit(to_dmar_domain(domain));
4758 }
4759
4760 static int intel_iommu_attach_device(struct iommu_domain *domain,
4761                                      struct device *dev)
4762 {
4763         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4764         struct intel_iommu *iommu;
4765         int addr_width;
4766         u8 bus, devfn;
4767
4768         if (device_is_rmrr_locked(dev)) {
4769                 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement.  Contact your platform vendor.\n");
4770                 return -EPERM;
4771         }
4772
4773         /* normally dev is not mapped */
4774         if (unlikely(domain_context_mapped(dev))) {
4775                 struct dmar_domain *old_domain;
4776
4777                 old_domain = find_domain(dev);
4778                 if (old_domain) {
4779                         rcu_read_lock();
4780                         dmar_remove_one_dev_info(old_domain, dev);
4781                         rcu_read_unlock();
4782
4783                         if (!domain_type_is_vm_or_si(old_domain) &&
4784                              list_empty(&old_domain->devices))
4785                                 domain_exit(old_domain);
4786                 }
4787         }
4788
4789         iommu = device_to_iommu(dev, &bus, &devfn);
4790         if (!iommu)
4791                 return -ENODEV;
4792
4793         /* check if this iommu agaw is sufficient for max mapped address */
4794         addr_width = agaw_to_width(iommu->agaw);
4795         if (addr_width > cap_mgaw(iommu->cap))
4796                 addr_width = cap_mgaw(iommu->cap);
4797
4798         if (dmar_domain->max_addr > (1LL << addr_width)) {
4799                 pr_err("%s: iommu width (%d) is not "
4800                        "sufficient for the mapped address (%llx)\n",
4801                        __func__, addr_width, dmar_domain->max_addr);
4802                 return -EFAULT;
4803         }
4804         dmar_domain->gaw = addr_width;
4805
4806         /*
4807          * Knock out extra levels of page tables if necessary
4808          */
4809         while (iommu->agaw < dmar_domain->agaw) {
4810                 struct dma_pte *pte;
4811
4812                 pte = dmar_domain->pgd;
4813                 if (dma_pte_present(pte)) {
4814                         dmar_domain->pgd = (struct dma_pte *)
4815                                 phys_to_virt(dma_pte_addr(pte));
4816                         free_pgtable_page(pte);
4817                 }
4818                 dmar_domain->agaw--;
4819         }
4820
4821         return domain_add_dev_info(dmar_domain, dev);
4822 }
4823
4824 static void intel_iommu_detach_device(struct iommu_domain *domain,
4825                                       struct device *dev)
4826 {
4827         dmar_remove_one_dev_info(to_dmar_domain(domain), dev);
4828 }
4829
4830 static int intel_iommu_map(struct iommu_domain *domain,
4831                            unsigned long iova, phys_addr_t hpa,
4832                            size_t size, int iommu_prot)
4833 {
4834         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4835         u64 max_addr;
4836         int prot = 0;
4837         int ret;
4838
4839         if (iommu_prot & IOMMU_READ)
4840                 prot |= DMA_PTE_READ;
4841         if (iommu_prot & IOMMU_WRITE)
4842                 prot |= DMA_PTE_WRITE;
4843         if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4844                 prot |= DMA_PTE_SNP;
4845
4846         max_addr = iova + size;
4847         if (dmar_domain->max_addr < max_addr) {
4848                 u64 end;
4849
4850                 /* check if minimum agaw is sufficient for mapped address */
4851                 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4852                 if (end < max_addr) {
4853                         pr_err("%s: iommu width (%d) is not "
4854                                "sufficient for the mapped address (%llx)\n",
4855                                __func__, dmar_domain->gaw, max_addr);
4856                         return -EFAULT;
4857                 }
4858                 dmar_domain->max_addr = max_addr;
4859         }
4860         /* Round up size to next multiple of PAGE_SIZE, if it and
4861            the low bits of hpa would take us onto the next page */
4862         size = aligned_nrpages(hpa, size);
4863         ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4864                                  hpa >> VTD_PAGE_SHIFT, size, prot);
4865         return ret;
4866 }
4867
4868 static size_t intel_iommu_unmap(struct iommu_domain *domain,
4869                                 unsigned long iova, size_t size)
4870 {
4871         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4872         struct page *freelist = NULL;
4873         struct intel_iommu *iommu;
4874         unsigned long start_pfn, last_pfn;
4875         unsigned int npages;
4876         int iommu_id, level = 0;
4877
4878         /* Cope with horrid API which requires us to unmap more than the
4879            size argument if it happens to be a large-page mapping. */
4880         BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
4881
4882         if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4883                 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4884
4885         start_pfn = iova >> VTD_PAGE_SHIFT;
4886         last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4887
4888         freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4889
4890         npages = last_pfn - start_pfn + 1;
4891
4892         for_each_domain_iommu(iommu_id, dmar_domain) {
4893                 iommu = g_iommus[iommu_id];
4894
4895                 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
4896                                       start_pfn, npages, !freelist, 0);
4897         }
4898
4899         dma_free_pagelist(freelist);
4900
4901         if (dmar_domain->max_addr == iova + size)
4902                 dmar_domain->max_addr = iova;
4903
4904         return size;
4905 }
4906
4907 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4908                                             dma_addr_t iova)
4909 {
4910         struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4911         struct dma_pte *pte;
4912         int level = 0;
4913         u64 phys = 0;
4914
4915         pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
4916         if (pte)
4917                 phys = dma_pte_addr(pte);
4918
4919         return phys;
4920 }
4921
4922 static bool intel_iommu_capable(enum iommu_cap cap)
4923 {
4924         if (cap == IOMMU_CAP_CACHE_COHERENCY)
4925                 return domain_update_iommu_snooping(NULL) == 1;
4926         if (cap == IOMMU_CAP_INTR_REMAP)
4927                 return irq_remapping_enabled == 1;
4928
4929         return false;
4930 }
4931
4932 static int intel_iommu_add_device(struct device *dev)
4933 {
4934         struct intel_iommu *iommu;
4935         struct iommu_group *group;
4936         u8 bus, devfn;
4937
4938         iommu = device_to_iommu(dev, &bus, &devfn);
4939         if (!iommu)
4940                 return -ENODEV;
4941
4942         iommu_device_link(iommu->iommu_dev, dev);
4943
4944         group = iommu_group_get_for_dev(dev);
4945
4946         if (IS_ERR(group))
4947                 return PTR_ERR(group);
4948
4949         iommu_group_put(group);
4950         return 0;
4951 }
4952
4953 static void intel_iommu_remove_device(struct device *dev)
4954 {
4955         struct intel_iommu *iommu;
4956         u8 bus, devfn;
4957
4958         iommu = device_to_iommu(dev, &bus, &devfn);
4959         if (!iommu)
4960                 return;
4961
4962         iommu_group_remove_device(dev);
4963
4964         iommu_device_unlink(iommu->iommu_dev, dev);
4965 }
4966
4967 #ifdef CONFIG_INTEL_IOMMU_SVM
4968 int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
4969 {
4970         struct device_domain_info *info;
4971         struct context_entry *context;
4972         struct dmar_domain *domain;
4973         unsigned long flags;
4974         u64 ctx_lo;
4975         int ret;
4976
4977         domain = get_valid_domain_for_dev(sdev->dev);
4978         if (!domain)
4979                 return -EINVAL;
4980
4981         spin_lock_irqsave(&device_domain_lock, flags);
4982         spin_lock(&iommu->lock);
4983
4984         ret = -EINVAL;
4985         info = sdev->dev->archdata.iommu;
4986         if (!info || !info->pasid_supported)
4987                 goto out;
4988
4989         context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
4990         if (WARN_ON(!context))
4991                 goto out;
4992
4993         ctx_lo = context[0].lo;
4994
4995         sdev->did = domain->iommu_did[iommu->seq_id];
4996         sdev->sid = PCI_DEVID(info->bus, info->devfn);
4997
4998         if (!(ctx_lo & CONTEXT_PASIDE)) {
4999                 context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
5000                 context[1].lo = (u64)virt_to_phys(iommu->pasid_table) | ecap_pss(iommu->ecap);
5001                 wmb();
5002                 /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
5003                  * extended to permit requests-with-PASID if the PASIDE bit
5004                  * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH,
5005                  * however, the PASIDE bit is ignored and requests-with-PASID
5006                  * are unconditionally blocked. Which makes less sense.
5007                  * So convert from CONTEXT_TT_PASS_THROUGH to one of the new
5008                  * "guest mode" translation types depending on whether ATS
5009                  * is available or not. Annoyingly, we can't use the new
5010                  * modes *unless* PASIDE is set. */
5011                 if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) {
5012                         ctx_lo &= ~CONTEXT_TT_MASK;
5013                         if (info->ats_supported)
5014                                 ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2;
5015                         else
5016                                 ctx_lo |= CONTEXT_TT_PT_PASID << 2;
5017                 }
5018                 ctx_lo |= CONTEXT_PASIDE;
5019                 if (iommu->pasid_state_table)
5020                         ctx_lo |= CONTEXT_DINVE;
5021                 if (info->pri_supported)
5022                         ctx_lo |= CONTEXT_PRS;
5023                 context[0].lo = ctx_lo;
5024                 wmb();
5025                 iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
5026                                            DMA_CCMD_MASK_NOBIT,
5027                                            DMA_CCMD_DEVICE_INVL);
5028         }
5029
5030         /* Enable PASID support in the device, if it wasn't already */
5031         if (!info->pasid_enabled)
5032                 iommu_enable_dev_iotlb(info);
5033
5034         if (info->ats_enabled) {
5035                 sdev->dev_iotlb = 1;
5036                 sdev->qdep = info->ats_qdep;
5037                 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
5038                         sdev->qdep = 0;
5039         }
5040         ret = 0;
5041
5042  out:
5043         spin_unlock(&iommu->lock);
5044         spin_unlock_irqrestore(&device_domain_lock, flags);
5045
5046         return ret;
5047 }
5048
5049 struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5050 {
5051         struct intel_iommu *iommu;
5052         u8 bus, devfn;
5053
5054         if (iommu_dummy(dev)) {
5055                 dev_warn(dev,
5056                          "No IOMMU translation for device; cannot enable SVM\n");
5057                 return NULL;
5058         }
5059
5060         iommu = device_to_iommu(dev, &bus, &devfn);
5061         if ((!iommu)) {
5062                 dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
5063                 return NULL;
5064         }
5065
5066         if (!iommu->pasid_table) {
5067                 dev_err(dev, "PASID not enabled on IOMMU; cannot enable SVM\n");
5068                 return NULL;
5069         }
5070
5071         return iommu;
5072 }
5073 #endif /* CONFIG_INTEL_IOMMU_SVM */
5074
5075 static const struct iommu_ops intel_iommu_ops = {
5076         .capable        = intel_iommu_capable,
5077         .domain_alloc   = intel_iommu_domain_alloc,
5078         .domain_free    = intel_iommu_domain_free,
5079         .attach_dev     = intel_iommu_attach_device,
5080         .detach_dev     = intel_iommu_detach_device,
5081         .map            = intel_iommu_map,
5082         .unmap          = intel_iommu_unmap,
5083         .map_sg         = default_iommu_map_sg,
5084         .iova_to_phys   = intel_iommu_iova_to_phys,
5085         .add_device     = intel_iommu_add_device,
5086         .remove_device  = intel_iommu_remove_device,
5087         .pgsize_bitmap  = INTEL_IOMMU_PGSIZES,
5088 };
5089
5090 static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
5091 {
5092         /* G4x/GM45 integrated gfx dmar support is totally busted. */
5093         pr_info("Disabling IOMMU for graphics on this chipset\n");
5094         dmar_map_gfx = 0;
5095 }
5096
5097 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
5098 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
5099 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
5100 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
5101 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
5102 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
5103 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
5104
5105 static void quirk_iommu_rwbf(struct pci_dev *dev)
5106 {
5107         /*
5108          * Mobile 4 Series Chipset neglects to set RWBF capability,
5109          * but needs it. Same seems to hold for the desktop versions.
5110          */
5111         pr_info("Forcing write-buffer flush capability\n");
5112         rwbf_quirk = 1;
5113 }
5114
5115 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
5116 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5117 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5118 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5119 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5120 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5121 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
5122
5123 #define GGC 0x52
5124 #define GGC_MEMORY_SIZE_MASK    (0xf << 8)
5125 #define GGC_MEMORY_SIZE_NONE    (0x0 << 8)
5126 #define GGC_MEMORY_SIZE_1M      (0x1 << 8)
5127 #define GGC_MEMORY_SIZE_2M      (0x3 << 8)
5128 #define GGC_MEMORY_VT_ENABLED   (0x8 << 8)
5129 #define GGC_MEMORY_SIZE_2M_VT   (0x9 << 8)
5130 #define GGC_MEMORY_SIZE_3M_VT   (0xa << 8)
5131 #define GGC_MEMORY_SIZE_4M_VT   (0xb << 8)
5132
5133 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
5134 {
5135         unsigned short ggc;
5136
5137         if (pci_read_config_word(dev, GGC, &ggc))
5138                 return;
5139
5140         if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
5141                 pr_info("BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
5142                 dmar_map_gfx = 0;
5143         } else if (dmar_map_gfx) {
5144                 /* we have to ensure the gfx device is idle before we flush */
5145                 pr_info("Disabling batched IOTLB flush on Ironlake\n");
5146                 intel_iommu_strict = 1;
5147        }
5148 }
5149 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5150 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5151 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5152 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5153
5154 /* On Tylersburg chipsets, some BIOSes have been known to enable the
5155    ISOCH DMAR unit for the Azalia sound device, but not give it any
5156    TLB entries, which causes it to deadlock. Check for that.  We do
5157    this in a function called from init_dmars(), instead of in a PCI
5158    quirk, because we don't want to print the obnoxious "BIOS broken"
5159    message if VT-d is actually disabled.
5160 */
5161 static void __init check_tylersburg_isoch(void)
5162 {
5163         struct pci_dev *pdev;
5164         uint32_t vtisochctrl;
5165
5166         /* If there's no Azalia in the system anyway, forget it. */
5167         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5168         if (!pdev)
5169                 return;
5170         pci_dev_put(pdev);
5171
5172         /* System Management Registers. Might be hidden, in which case
5173            we can't do the sanity check. But that's OK, because the
5174            known-broken BIOSes _don't_ actually hide it, so far. */
5175         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5176         if (!pdev)
5177                 return;
5178
5179         if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5180                 pci_dev_put(pdev);
5181                 return;
5182         }
5183
5184         pci_dev_put(pdev);
5185
5186         /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
5187         if (vtisochctrl & 1)
5188                 return;
5189
5190         /* Drop all bits other than the number of TLB entries */
5191         vtisochctrl &= 0x1c;
5192
5193         /* If we have the recommended number of TLB entries (16), fine. */
5194         if (vtisochctrl == 0x10)
5195                 return;
5196
5197         /* Zero TLB entries? You get to ride the short bus to school. */
5198         if (!vtisochctrl) {
5199                 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5200                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5201                      dmi_get_system_info(DMI_BIOS_VENDOR),
5202                      dmi_get_system_info(DMI_BIOS_VERSION),
5203                      dmi_get_system_info(DMI_PRODUCT_VERSION));
5204                 iommu_identity_mapping |= IDENTMAP_AZALIA;
5205                 return;
5206         }
5207
5208         pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
5209                vtisochctrl);
5210 }