iommu/arm-smmu: Support DMA-API domains
[cascardo/linux.git] / drivers / iommu / arm-smmu-v3.c
1 /*
2  * IOMMU API for ARM architected SMMUv3 implementations.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
15  *
16  * Copyright (C) 2015 ARM Limited
17  *
18  * Author: Will Deacon <will.deacon@arm.com>
19  *
20  * This driver is powered by bad coffee and bombay mix.
21  */
22
23 #include <linux/delay.h>
24 #include <linux/dma-iommu.h>
25 #include <linux/err.h>
26 #include <linux/interrupt.h>
27 #include <linux/iommu.h>
28 #include <linux/iopoll.h>
29 #include <linux/module.h>
30 #include <linux/msi.h>
31 #include <linux/of.h>
32 #include <linux/of_address.h>
33 #include <linux/of_platform.h>
34 #include <linux/pci.h>
35 #include <linux/platform_device.h>
36
37 #include "io-pgtable.h"
38
39 /* MMIO registers */
40 #define ARM_SMMU_IDR0                   0x0
41 #define IDR0_ST_LVL_SHIFT               27
42 #define IDR0_ST_LVL_MASK                0x3
43 #define IDR0_ST_LVL_2LVL                (1 << IDR0_ST_LVL_SHIFT)
44 #define IDR0_STALL_MODEL_SHIFT          24
45 #define IDR0_STALL_MODEL_MASK           0x3
46 #define IDR0_STALL_MODEL_STALL          (0 << IDR0_STALL_MODEL_SHIFT)
47 #define IDR0_STALL_MODEL_FORCE          (2 << IDR0_STALL_MODEL_SHIFT)
48 #define IDR0_TTENDIAN_SHIFT             21
49 #define IDR0_TTENDIAN_MASK              0x3
50 #define IDR0_TTENDIAN_LE                (2 << IDR0_TTENDIAN_SHIFT)
51 #define IDR0_TTENDIAN_BE                (3 << IDR0_TTENDIAN_SHIFT)
52 #define IDR0_TTENDIAN_MIXED             (0 << IDR0_TTENDIAN_SHIFT)
53 #define IDR0_CD2L                       (1 << 19)
54 #define IDR0_VMID16                     (1 << 18)
55 #define IDR0_PRI                        (1 << 16)
56 #define IDR0_SEV                        (1 << 14)
57 #define IDR0_MSI                        (1 << 13)
58 #define IDR0_ASID16                     (1 << 12)
59 #define IDR0_ATS                        (1 << 10)
60 #define IDR0_HYP                        (1 << 9)
61 #define IDR0_COHACC                     (1 << 4)
62 #define IDR0_TTF_SHIFT                  2
63 #define IDR0_TTF_MASK                   0x3
64 #define IDR0_TTF_AARCH64                (2 << IDR0_TTF_SHIFT)
65 #define IDR0_TTF_AARCH32_64             (3 << IDR0_TTF_SHIFT)
66 #define IDR0_S1P                        (1 << 1)
67 #define IDR0_S2P                        (1 << 0)
68
69 #define ARM_SMMU_IDR1                   0x4
70 #define IDR1_TABLES_PRESET              (1 << 30)
71 #define IDR1_QUEUES_PRESET              (1 << 29)
72 #define IDR1_REL                        (1 << 28)
73 #define IDR1_CMDQ_SHIFT                 21
74 #define IDR1_CMDQ_MASK                  0x1f
75 #define IDR1_EVTQ_SHIFT                 16
76 #define IDR1_EVTQ_MASK                  0x1f
77 #define IDR1_PRIQ_SHIFT                 11
78 #define IDR1_PRIQ_MASK                  0x1f
79 #define IDR1_SSID_SHIFT                 6
80 #define IDR1_SSID_MASK                  0x1f
81 #define IDR1_SID_SHIFT                  0
82 #define IDR1_SID_MASK                   0x3f
83
84 #define ARM_SMMU_IDR5                   0x14
85 #define IDR5_STALL_MAX_SHIFT            16
86 #define IDR5_STALL_MAX_MASK             0xffff
87 #define IDR5_GRAN64K                    (1 << 6)
88 #define IDR5_GRAN16K                    (1 << 5)
89 #define IDR5_GRAN4K                     (1 << 4)
90 #define IDR5_OAS_SHIFT                  0
91 #define IDR5_OAS_MASK                   0x7
92 #define IDR5_OAS_32_BIT                 (0 << IDR5_OAS_SHIFT)
93 #define IDR5_OAS_36_BIT                 (1 << IDR5_OAS_SHIFT)
94 #define IDR5_OAS_40_BIT                 (2 << IDR5_OAS_SHIFT)
95 #define IDR5_OAS_42_BIT                 (3 << IDR5_OAS_SHIFT)
96 #define IDR5_OAS_44_BIT                 (4 << IDR5_OAS_SHIFT)
97 #define IDR5_OAS_48_BIT                 (5 << IDR5_OAS_SHIFT)
98
99 #define ARM_SMMU_CR0                    0x20
100 #define CR0_CMDQEN                      (1 << 3)
101 #define CR0_EVTQEN                      (1 << 2)
102 #define CR0_PRIQEN                      (1 << 1)
103 #define CR0_SMMUEN                      (1 << 0)
104
105 #define ARM_SMMU_CR0ACK                 0x24
106
107 #define ARM_SMMU_CR1                    0x28
108 #define CR1_SH_NSH                      0
109 #define CR1_SH_OSH                      2
110 #define CR1_SH_ISH                      3
111 #define CR1_CACHE_NC                    0
112 #define CR1_CACHE_WB                    1
113 #define CR1_CACHE_WT                    2
114 #define CR1_TABLE_SH_SHIFT              10
115 #define CR1_TABLE_OC_SHIFT              8
116 #define CR1_TABLE_IC_SHIFT              6
117 #define CR1_QUEUE_SH_SHIFT              4
118 #define CR1_QUEUE_OC_SHIFT              2
119 #define CR1_QUEUE_IC_SHIFT              0
120
121 #define ARM_SMMU_CR2                    0x2c
122 #define CR2_PTM                         (1 << 2)
123 #define CR2_RECINVSID                   (1 << 1)
124 #define CR2_E2H                         (1 << 0)
125
126 #define ARM_SMMU_IRQ_CTRL               0x50
127 #define IRQ_CTRL_EVTQ_IRQEN             (1 << 2)
128 #define IRQ_CTRL_PRIQ_IRQEN             (1 << 1)
129 #define IRQ_CTRL_GERROR_IRQEN           (1 << 0)
130
131 #define ARM_SMMU_IRQ_CTRLACK            0x54
132
133 #define ARM_SMMU_GERROR                 0x60
134 #define GERROR_SFM_ERR                  (1 << 8)
135 #define GERROR_MSI_GERROR_ABT_ERR       (1 << 7)
136 #define GERROR_MSI_PRIQ_ABT_ERR         (1 << 6)
137 #define GERROR_MSI_EVTQ_ABT_ERR         (1 << 5)
138 #define GERROR_MSI_CMDQ_ABT_ERR         (1 << 4)
139 #define GERROR_PRIQ_ABT_ERR             (1 << 3)
140 #define GERROR_EVTQ_ABT_ERR             (1 << 2)
141 #define GERROR_CMDQ_ERR                 (1 << 0)
142 #define GERROR_ERR_MASK                 0xfd
143
144 #define ARM_SMMU_GERRORN                0x64
145
146 #define ARM_SMMU_GERROR_IRQ_CFG0        0x68
147 #define ARM_SMMU_GERROR_IRQ_CFG1        0x70
148 #define ARM_SMMU_GERROR_IRQ_CFG2        0x74
149
150 #define ARM_SMMU_STRTAB_BASE            0x80
151 #define STRTAB_BASE_RA                  (1UL << 62)
152 #define STRTAB_BASE_ADDR_SHIFT          6
153 #define STRTAB_BASE_ADDR_MASK           0x3ffffffffffUL
154
155 #define ARM_SMMU_STRTAB_BASE_CFG        0x88
156 #define STRTAB_BASE_CFG_LOG2SIZE_SHIFT  0
157 #define STRTAB_BASE_CFG_LOG2SIZE_MASK   0x3f
158 #define STRTAB_BASE_CFG_SPLIT_SHIFT     6
159 #define STRTAB_BASE_CFG_SPLIT_MASK      0x1f
160 #define STRTAB_BASE_CFG_FMT_SHIFT       16
161 #define STRTAB_BASE_CFG_FMT_MASK        0x3
162 #define STRTAB_BASE_CFG_FMT_LINEAR      (0 << STRTAB_BASE_CFG_FMT_SHIFT)
163 #define STRTAB_BASE_CFG_FMT_2LVL        (1 << STRTAB_BASE_CFG_FMT_SHIFT)
164
165 #define ARM_SMMU_CMDQ_BASE              0x90
166 #define ARM_SMMU_CMDQ_PROD              0x98
167 #define ARM_SMMU_CMDQ_CONS              0x9c
168
169 #define ARM_SMMU_EVTQ_BASE              0xa0
170 #define ARM_SMMU_EVTQ_PROD              0x100a8
171 #define ARM_SMMU_EVTQ_CONS              0x100ac
172 #define ARM_SMMU_EVTQ_IRQ_CFG0          0xb0
173 #define ARM_SMMU_EVTQ_IRQ_CFG1          0xb8
174 #define ARM_SMMU_EVTQ_IRQ_CFG2          0xbc
175
176 #define ARM_SMMU_PRIQ_BASE              0xc0
177 #define ARM_SMMU_PRIQ_PROD              0x100c8
178 #define ARM_SMMU_PRIQ_CONS              0x100cc
179 #define ARM_SMMU_PRIQ_IRQ_CFG0          0xd0
180 #define ARM_SMMU_PRIQ_IRQ_CFG1          0xd8
181 #define ARM_SMMU_PRIQ_IRQ_CFG2          0xdc
182
183 /* Common MSI config fields */
184 #define MSI_CFG0_ADDR_SHIFT             2
185 #define MSI_CFG0_ADDR_MASK              0x3fffffffffffUL
186 #define MSI_CFG2_SH_SHIFT               4
187 #define MSI_CFG2_SH_NSH                 (0UL << MSI_CFG2_SH_SHIFT)
188 #define MSI_CFG2_SH_OSH                 (2UL << MSI_CFG2_SH_SHIFT)
189 #define MSI_CFG2_SH_ISH                 (3UL << MSI_CFG2_SH_SHIFT)
190 #define MSI_CFG2_MEMATTR_SHIFT          0
191 #define MSI_CFG2_MEMATTR_DEVICE_nGnRE   (0x1 << MSI_CFG2_MEMATTR_SHIFT)
192
193 #define Q_IDX(q, p)                     ((p) & ((1 << (q)->max_n_shift) - 1))
194 #define Q_WRP(q, p)                     ((p) & (1 << (q)->max_n_shift))
195 #define Q_OVERFLOW_FLAG                 (1 << 31)
196 #define Q_OVF(q, p)                     ((p) & Q_OVERFLOW_FLAG)
197 #define Q_ENT(q, p)                     ((q)->base +                    \
198                                          Q_IDX(q, p) * (q)->ent_dwords)
199
200 #define Q_BASE_RWA                      (1UL << 62)
201 #define Q_BASE_ADDR_SHIFT               5
202 #define Q_BASE_ADDR_MASK                0xfffffffffffUL
203 #define Q_BASE_LOG2SIZE_SHIFT           0
204 #define Q_BASE_LOG2SIZE_MASK            0x1fUL
205
206 /*
207  * Stream table.
208  *
209  * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
210  * 2lvl: 128k L1 entries,
211  *       256 lazy entries per table (each table covers a PCI bus)
212  */
213 #define STRTAB_L1_SZ_SHIFT              20
214 #define STRTAB_SPLIT                    8
215
216 #define STRTAB_L1_DESC_DWORDS           1
217 #define STRTAB_L1_DESC_SPAN_SHIFT       0
218 #define STRTAB_L1_DESC_SPAN_MASK        0x1fUL
219 #define STRTAB_L1_DESC_L2PTR_SHIFT      6
220 #define STRTAB_L1_DESC_L2PTR_MASK       0x3ffffffffffUL
221
222 #define STRTAB_STE_DWORDS               8
223 #define STRTAB_STE_0_V                  (1UL << 0)
224 #define STRTAB_STE_0_CFG_SHIFT          1
225 #define STRTAB_STE_0_CFG_MASK           0x7UL
226 #define STRTAB_STE_0_CFG_ABORT          (0UL << STRTAB_STE_0_CFG_SHIFT)
227 #define STRTAB_STE_0_CFG_BYPASS         (4UL << STRTAB_STE_0_CFG_SHIFT)
228 #define STRTAB_STE_0_CFG_S1_TRANS       (5UL << STRTAB_STE_0_CFG_SHIFT)
229 #define STRTAB_STE_0_CFG_S2_TRANS       (6UL << STRTAB_STE_0_CFG_SHIFT)
230
231 #define STRTAB_STE_0_S1FMT_SHIFT        4
232 #define STRTAB_STE_0_S1FMT_LINEAR       (0UL << STRTAB_STE_0_S1FMT_SHIFT)
233 #define STRTAB_STE_0_S1CTXPTR_SHIFT     6
234 #define STRTAB_STE_0_S1CTXPTR_MASK      0x3ffffffffffUL
235 #define STRTAB_STE_0_S1CDMAX_SHIFT      59
236 #define STRTAB_STE_0_S1CDMAX_MASK       0x1fUL
237
238 #define STRTAB_STE_1_S1C_CACHE_NC       0UL
239 #define STRTAB_STE_1_S1C_CACHE_WBRA     1UL
240 #define STRTAB_STE_1_S1C_CACHE_WT       2UL
241 #define STRTAB_STE_1_S1C_CACHE_WB       3UL
242 #define STRTAB_STE_1_S1C_SH_NSH         0UL
243 #define STRTAB_STE_1_S1C_SH_OSH         2UL
244 #define STRTAB_STE_1_S1C_SH_ISH         3UL
245 #define STRTAB_STE_1_S1CIR_SHIFT        2
246 #define STRTAB_STE_1_S1COR_SHIFT        4
247 #define STRTAB_STE_1_S1CSH_SHIFT        6
248
249 #define STRTAB_STE_1_S1STALLD           (1UL << 27)
250
251 #define STRTAB_STE_1_EATS_ABT           0UL
252 #define STRTAB_STE_1_EATS_TRANS         1UL
253 #define STRTAB_STE_1_EATS_S1CHK         2UL
254 #define STRTAB_STE_1_EATS_SHIFT         28
255
256 #define STRTAB_STE_1_STRW_NSEL1         0UL
257 #define STRTAB_STE_1_STRW_EL2           2UL
258 #define STRTAB_STE_1_STRW_SHIFT         30
259
260 #define STRTAB_STE_1_SHCFG_INCOMING     1UL
261 #define STRTAB_STE_1_SHCFG_SHIFT        44
262
263 #define STRTAB_STE_2_S2VMID_SHIFT       0
264 #define STRTAB_STE_2_S2VMID_MASK        0xffffUL
265 #define STRTAB_STE_2_VTCR_SHIFT         32
266 #define STRTAB_STE_2_VTCR_MASK          0x7ffffUL
267 #define STRTAB_STE_2_S2AA64             (1UL << 51)
268 #define STRTAB_STE_2_S2ENDI             (1UL << 52)
269 #define STRTAB_STE_2_S2PTW              (1UL << 54)
270 #define STRTAB_STE_2_S2R                (1UL << 58)
271
272 #define STRTAB_STE_3_S2TTB_SHIFT        4
273 #define STRTAB_STE_3_S2TTB_MASK         0xfffffffffffUL
274
275 /* Context descriptor (stage-1 only) */
276 #define CTXDESC_CD_DWORDS               8
277 #define CTXDESC_CD_0_TCR_T0SZ_SHIFT     0
278 #define ARM64_TCR_T0SZ_SHIFT            0
279 #define ARM64_TCR_T0SZ_MASK             0x1fUL
280 #define CTXDESC_CD_0_TCR_TG0_SHIFT      6
281 #define ARM64_TCR_TG0_SHIFT             14
282 #define ARM64_TCR_TG0_MASK              0x3UL
283 #define CTXDESC_CD_0_TCR_IRGN0_SHIFT    8
284 #define ARM64_TCR_IRGN0_SHIFT           8
285 #define ARM64_TCR_IRGN0_MASK            0x3UL
286 #define CTXDESC_CD_0_TCR_ORGN0_SHIFT    10
287 #define ARM64_TCR_ORGN0_SHIFT           10
288 #define ARM64_TCR_ORGN0_MASK            0x3UL
289 #define CTXDESC_CD_0_TCR_SH0_SHIFT      12
290 #define ARM64_TCR_SH0_SHIFT             12
291 #define ARM64_TCR_SH0_MASK              0x3UL
292 #define CTXDESC_CD_0_TCR_EPD0_SHIFT     14
293 #define ARM64_TCR_EPD0_SHIFT            7
294 #define ARM64_TCR_EPD0_MASK             0x1UL
295 #define CTXDESC_CD_0_TCR_EPD1_SHIFT     30
296 #define ARM64_TCR_EPD1_SHIFT            23
297 #define ARM64_TCR_EPD1_MASK             0x1UL
298
299 #define CTXDESC_CD_0_ENDI               (1UL << 15)
300 #define CTXDESC_CD_0_V                  (1UL << 31)
301
302 #define CTXDESC_CD_0_TCR_IPS_SHIFT      32
303 #define ARM64_TCR_IPS_SHIFT             32
304 #define ARM64_TCR_IPS_MASK              0x7UL
305 #define CTXDESC_CD_0_TCR_TBI0_SHIFT     38
306 #define ARM64_TCR_TBI0_SHIFT            37
307 #define ARM64_TCR_TBI0_MASK             0x1UL
308
309 #define CTXDESC_CD_0_AA64               (1UL << 41)
310 #define CTXDESC_CD_0_R                  (1UL << 45)
311 #define CTXDESC_CD_0_A                  (1UL << 46)
312 #define CTXDESC_CD_0_ASET_SHIFT         47
313 #define CTXDESC_CD_0_ASET_SHARED        (0UL << CTXDESC_CD_0_ASET_SHIFT)
314 #define CTXDESC_CD_0_ASET_PRIVATE       (1UL << CTXDESC_CD_0_ASET_SHIFT)
315 #define CTXDESC_CD_0_ASID_SHIFT         48
316 #define CTXDESC_CD_0_ASID_MASK          0xffffUL
317
318 #define CTXDESC_CD_1_TTB0_SHIFT         4
319 #define CTXDESC_CD_1_TTB0_MASK          0xfffffffffffUL
320
321 #define CTXDESC_CD_3_MAIR_SHIFT         0
322
323 /* Convert between AArch64 (CPU) TCR format and SMMU CD format */
324 #define ARM_SMMU_TCR2CD(tcr, fld)                                       \
325         (((tcr) >> ARM64_TCR_##fld##_SHIFT & ARM64_TCR_##fld##_MASK)    \
326          << CTXDESC_CD_0_TCR_##fld##_SHIFT)
327
328 /* Command queue */
329 #define CMDQ_ENT_DWORDS                 2
330 #define CMDQ_MAX_SZ_SHIFT               8
331
332 #define CMDQ_ERR_SHIFT                  24
333 #define CMDQ_ERR_MASK                   0x7f
334 #define CMDQ_ERR_CERROR_NONE_IDX        0
335 #define CMDQ_ERR_CERROR_ILL_IDX         1
336 #define CMDQ_ERR_CERROR_ABT_IDX         2
337
338 #define CMDQ_0_OP_SHIFT                 0
339 #define CMDQ_0_OP_MASK                  0xffUL
340 #define CMDQ_0_SSV                      (1UL << 11)
341
342 #define CMDQ_PREFETCH_0_SID_SHIFT       32
343 #define CMDQ_PREFETCH_1_SIZE_SHIFT      0
344 #define CMDQ_PREFETCH_1_ADDR_MASK       ~0xfffUL
345
346 #define CMDQ_CFGI_0_SID_SHIFT           32
347 #define CMDQ_CFGI_0_SID_MASK            0xffffffffUL
348 #define CMDQ_CFGI_1_LEAF                (1UL << 0)
349 #define CMDQ_CFGI_1_RANGE_SHIFT         0
350 #define CMDQ_CFGI_1_RANGE_MASK          0x1fUL
351
352 #define CMDQ_TLBI_0_VMID_SHIFT          32
353 #define CMDQ_TLBI_0_ASID_SHIFT          48
354 #define CMDQ_TLBI_1_LEAF                (1UL << 0)
355 #define CMDQ_TLBI_1_VA_MASK             ~0xfffUL
356 #define CMDQ_TLBI_1_IPA_MASK            0xfffffffff000UL
357
358 #define CMDQ_PRI_0_SSID_SHIFT           12
359 #define CMDQ_PRI_0_SSID_MASK            0xfffffUL
360 #define CMDQ_PRI_0_SID_SHIFT            32
361 #define CMDQ_PRI_0_SID_MASK             0xffffffffUL
362 #define CMDQ_PRI_1_GRPID_SHIFT          0
363 #define CMDQ_PRI_1_GRPID_MASK           0x1ffUL
364 #define CMDQ_PRI_1_RESP_SHIFT           12
365 #define CMDQ_PRI_1_RESP_DENY            (0UL << CMDQ_PRI_1_RESP_SHIFT)
366 #define CMDQ_PRI_1_RESP_FAIL            (1UL << CMDQ_PRI_1_RESP_SHIFT)
367 #define CMDQ_PRI_1_RESP_SUCC            (2UL << CMDQ_PRI_1_RESP_SHIFT)
368
369 #define CMDQ_SYNC_0_CS_SHIFT            12
370 #define CMDQ_SYNC_0_CS_NONE             (0UL << CMDQ_SYNC_0_CS_SHIFT)
371 #define CMDQ_SYNC_0_CS_SEV              (2UL << CMDQ_SYNC_0_CS_SHIFT)
372
373 /* Event queue */
374 #define EVTQ_ENT_DWORDS                 4
375 #define EVTQ_MAX_SZ_SHIFT               7
376
377 #define EVTQ_0_ID_SHIFT                 0
378 #define EVTQ_0_ID_MASK                  0xffUL
379
380 /* PRI queue */
381 #define PRIQ_ENT_DWORDS                 2
382 #define PRIQ_MAX_SZ_SHIFT               8
383
384 #define PRIQ_0_SID_SHIFT                0
385 #define PRIQ_0_SID_MASK                 0xffffffffUL
386 #define PRIQ_0_SSID_SHIFT               32
387 #define PRIQ_0_SSID_MASK                0xfffffUL
388 #define PRIQ_0_PERM_PRIV                (1UL << 58)
389 #define PRIQ_0_PERM_EXEC                (1UL << 59)
390 #define PRIQ_0_PERM_READ                (1UL << 60)
391 #define PRIQ_0_PERM_WRITE               (1UL << 61)
392 #define PRIQ_0_PRG_LAST                 (1UL << 62)
393 #define PRIQ_0_SSID_V                   (1UL << 63)
394
395 #define PRIQ_1_PRG_IDX_SHIFT            0
396 #define PRIQ_1_PRG_IDX_MASK             0x1ffUL
397 #define PRIQ_1_ADDR_SHIFT               12
398 #define PRIQ_1_ADDR_MASK                0xfffffffffffffUL
399
400 /* High-level queue structures */
401 #define ARM_SMMU_POLL_TIMEOUT_US        100
402
403 static bool disable_bypass;
404 module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
405 MODULE_PARM_DESC(disable_bypass,
406         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
407
408 enum pri_resp {
409         PRI_RESP_DENY,
410         PRI_RESP_FAIL,
411         PRI_RESP_SUCC,
412 };
413
414 enum arm_smmu_msi_index {
415         EVTQ_MSI_INDEX,
416         GERROR_MSI_INDEX,
417         PRIQ_MSI_INDEX,
418         ARM_SMMU_MAX_MSIS,
419 };
420
421 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
422         [EVTQ_MSI_INDEX] = {
423                 ARM_SMMU_EVTQ_IRQ_CFG0,
424                 ARM_SMMU_EVTQ_IRQ_CFG1,
425                 ARM_SMMU_EVTQ_IRQ_CFG2,
426         },
427         [GERROR_MSI_INDEX] = {
428                 ARM_SMMU_GERROR_IRQ_CFG0,
429                 ARM_SMMU_GERROR_IRQ_CFG1,
430                 ARM_SMMU_GERROR_IRQ_CFG2,
431         },
432         [PRIQ_MSI_INDEX] = {
433                 ARM_SMMU_PRIQ_IRQ_CFG0,
434                 ARM_SMMU_PRIQ_IRQ_CFG1,
435                 ARM_SMMU_PRIQ_IRQ_CFG2,
436         },
437 };
438
439 struct arm_smmu_cmdq_ent {
440         /* Common fields */
441         u8                              opcode;
442         bool                            substream_valid;
443
444         /* Command-specific fields */
445         union {
446                 #define CMDQ_OP_PREFETCH_CFG    0x1
447                 struct {
448                         u32                     sid;
449                         u8                      size;
450                         u64                     addr;
451                 } prefetch;
452
453                 #define CMDQ_OP_CFGI_STE        0x3
454                 #define CMDQ_OP_CFGI_ALL        0x4
455                 struct {
456                         u32                     sid;
457                         union {
458                                 bool            leaf;
459                                 u8              span;
460                         };
461                 } cfgi;
462
463                 #define CMDQ_OP_TLBI_NH_ASID    0x11
464                 #define CMDQ_OP_TLBI_NH_VA      0x12
465                 #define CMDQ_OP_TLBI_EL2_ALL    0x20
466                 #define CMDQ_OP_TLBI_S12_VMALL  0x28
467                 #define CMDQ_OP_TLBI_S2_IPA     0x2a
468                 #define CMDQ_OP_TLBI_NSNH_ALL   0x30
469                 struct {
470                         u16                     asid;
471                         u16                     vmid;
472                         bool                    leaf;
473                         u64                     addr;
474                 } tlbi;
475
476                 #define CMDQ_OP_PRI_RESP        0x41
477                 struct {
478                         u32                     sid;
479                         u32                     ssid;
480                         u16                     grpid;
481                         enum pri_resp           resp;
482                 } pri;
483
484                 #define CMDQ_OP_CMD_SYNC        0x46
485         };
486 };
487
488 struct arm_smmu_queue {
489         int                             irq; /* Wired interrupt */
490
491         __le64                          *base;
492         dma_addr_t                      base_dma;
493         u64                             q_base;
494
495         size_t                          ent_dwords;
496         u32                             max_n_shift;
497         u32                             prod;
498         u32                             cons;
499
500         u32 __iomem                     *prod_reg;
501         u32 __iomem                     *cons_reg;
502 };
503
504 struct arm_smmu_cmdq {
505         struct arm_smmu_queue           q;
506         spinlock_t                      lock;
507 };
508
509 struct arm_smmu_evtq {
510         struct arm_smmu_queue           q;
511         u32                             max_stalls;
512 };
513
514 struct arm_smmu_priq {
515         struct arm_smmu_queue           q;
516 };
517
518 /* High-level stream table and context descriptor structures */
519 struct arm_smmu_strtab_l1_desc {
520         u8                              span;
521
522         __le64                          *l2ptr;
523         dma_addr_t                      l2ptr_dma;
524 };
525
526 struct arm_smmu_s1_cfg {
527         __le64                          *cdptr;
528         dma_addr_t                      cdptr_dma;
529
530         struct arm_smmu_ctx_desc {
531                 u16     asid;
532                 u64     ttbr;
533                 u64     tcr;
534                 u64     mair;
535         }                               cd;
536 };
537
538 struct arm_smmu_s2_cfg {
539         u16                             vmid;
540         u64                             vttbr;
541         u64                             vtcr;
542 };
543
544 struct arm_smmu_strtab_ent {
545         bool                            valid;
546
547         bool                            bypass; /* Overrides s1/s2 config */
548         struct arm_smmu_s1_cfg          *s1_cfg;
549         struct arm_smmu_s2_cfg          *s2_cfg;
550 };
551
552 struct arm_smmu_strtab_cfg {
553         __le64                          *strtab;
554         dma_addr_t                      strtab_dma;
555         struct arm_smmu_strtab_l1_desc  *l1_desc;
556         unsigned int                    num_l1_ents;
557
558         u64                             strtab_base;
559         u32                             strtab_base_cfg;
560 };
561
562 /* An SMMUv3 instance */
563 struct arm_smmu_device {
564         struct device                   *dev;
565         void __iomem                    *base;
566
567 #define ARM_SMMU_FEAT_2_LVL_STRTAB      (1 << 0)
568 #define ARM_SMMU_FEAT_2_LVL_CDTAB       (1 << 1)
569 #define ARM_SMMU_FEAT_TT_LE             (1 << 2)
570 #define ARM_SMMU_FEAT_TT_BE             (1 << 3)
571 #define ARM_SMMU_FEAT_PRI               (1 << 4)
572 #define ARM_SMMU_FEAT_ATS               (1 << 5)
573 #define ARM_SMMU_FEAT_SEV               (1 << 6)
574 #define ARM_SMMU_FEAT_MSI               (1 << 7)
575 #define ARM_SMMU_FEAT_COHERENCY         (1 << 8)
576 #define ARM_SMMU_FEAT_TRANS_S1          (1 << 9)
577 #define ARM_SMMU_FEAT_TRANS_S2          (1 << 10)
578 #define ARM_SMMU_FEAT_STALLS            (1 << 11)
579 #define ARM_SMMU_FEAT_HYP               (1 << 12)
580         u32                             features;
581
582 #define ARM_SMMU_OPT_SKIP_PREFETCH      (1 << 0)
583         u32                             options;
584
585         struct arm_smmu_cmdq            cmdq;
586         struct arm_smmu_evtq            evtq;
587         struct arm_smmu_priq            priq;
588
589         int                             gerr_irq;
590
591         unsigned long                   ias; /* IPA */
592         unsigned long                   oas; /* PA */
593
594 #define ARM_SMMU_MAX_ASIDS              (1 << 16)
595         unsigned int                    asid_bits;
596         DECLARE_BITMAP(asid_map, ARM_SMMU_MAX_ASIDS);
597
598 #define ARM_SMMU_MAX_VMIDS              (1 << 16)
599         unsigned int                    vmid_bits;
600         DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
601
602         unsigned int                    ssid_bits;
603         unsigned int                    sid_bits;
604
605         struct arm_smmu_strtab_cfg      strtab_cfg;
606 };
607
608 /* SMMU private data for an IOMMU group */
609 struct arm_smmu_group {
610         struct arm_smmu_device          *smmu;
611         struct arm_smmu_domain          *domain;
612         int                             num_sids;
613         u32                             *sids;
614         struct arm_smmu_strtab_ent      ste;
615 };
616
617 /* SMMU private data for an IOMMU domain */
618 enum arm_smmu_domain_stage {
619         ARM_SMMU_DOMAIN_S1 = 0,
620         ARM_SMMU_DOMAIN_S2,
621         ARM_SMMU_DOMAIN_NESTED,
622 };
623
624 struct arm_smmu_domain {
625         struct arm_smmu_device          *smmu;
626         struct mutex                    init_mutex; /* Protects smmu pointer */
627
628         struct io_pgtable_ops           *pgtbl_ops;
629         spinlock_t                      pgtbl_lock;
630
631         enum arm_smmu_domain_stage      stage;
632         union {
633                 struct arm_smmu_s1_cfg  s1_cfg;
634                 struct arm_smmu_s2_cfg  s2_cfg;
635         };
636
637         struct iommu_domain             domain;
638 };
639
640 struct arm_smmu_option_prop {
641         u32 opt;
642         const char *prop;
643 };
644
645 static struct arm_smmu_option_prop arm_smmu_options[] = {
646         { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
647         { 0, NULL},
648 };
649
650 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
651 {
652         return container_of(dom, struct arm_smmu_domain, domain);
653 }
654
655 static void parse_driver_options(struct arm_smmu_device *smmu)
656 {
657         int i = 0;
658
659         do {
660                 if (of_property_read_bool(smmu->dev->of_node,
661                                                 arm_smmu_options[i].prop)) {
662                         smmu->options |= arm_smmu_options[i].opt;
663                         dev_notice(smmu->dev, "option %s\n",
664                                 arm_smmu_options[i].prop);
665                 }
666         } while (arm_smmu_options[++i].opt);
667 }
668
669 /* Low-level queue manipulation functions */
670 static bool queue_full(struct arm_smmu_queue *q)
671 {
672         return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
673                Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
674 }
675
676 static bool queue_empty(struct arm_smmu_queue *q)
677 {
678         return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
679                Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
680 }
681
682 static void queue_sync_cons(struct arm_smmu_queue *q)
683 {
684         q->cons = readl_relaxed(q->cons_reg);
685 }
686
687 static void queue_inc_cons(struct arm_smmu_queue *q)
688 {
689         u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
690
691         q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
692         writel(q->cons, q->cons_reg);
693 }
694
695 static int queue_sync_prod(struct arm_smmu_queue *q)
696 {
697         int ret = 0;
698         u32 prod = readl_relaxed(q->prod_reg);
699
700         if (Q_OVF(q, prod) != Q_OVF(q, q->prod))
701                 ret = -EOVERFLOW;
702
703         q->prod = prod;
704         return ret;
705 }
706
707 static void queue_inc_prod(struct arm_smmu_queue *q)
708 {
709         u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + 1;
710
711         q->prod = Q_OVF(q, q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
712         writel(q->prod, q->prod_reg);
713 }
714
715 static bool __queue_cons_before(struct arm_smmu_queue *q, u32 until)
716 {
717         if (Q_WRP(q, q->cons) == Q_WRP(q, until))
718                 return Q_IDX(q, q->cons) < Q_IDX(q, until);
719
720         return Q_IDX(q, q->cons) >= Q_IDX(q, until);
721 }
722
723 static int queue_poll_cons(struct arm_smmu_queue *q, u32 until, bool wfe)
724 {
725         ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
726
727         while (queue_sync_cons(q), __queue_cons_before(q, until)) {
728                 if (ktime_compare(ktime_get(), timeout) > 0)
729                         return -ETIMEDOUT;
730
731                 if (wfe) {
732                         wfe();
733                 } else {
734                         cpu_relax();
735                         udelay(1);
736                 }
737         }
738
739         return 0;
740 }
741
742 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
743 {
744         int i;
745
746         for (i = 0; i < n_dwords; ++i)
747                 *dst++ = cpu_to_le64(*src++);
748 }
749
750 static int queue_insert_raw(struct arm_smmu_queue *q, u64 *ent)
751 {
752         if (queue_full(q))
753                 return -ENOSPC;
754
755         queue_write(Q_ENT(q, q->prod), ent, q->ent_dwords);
756         queue_inc_prod(q);
757         return 0;
758 }
759
760 static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
761 {
762         int i;
763
764         for (i = 0; i < n_dwords; ++i)
765                 *dst++ = le64_to_cpu(*src++);
766 }
767
768 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
769 {
770         if (queue_empty(q))
771                 return -EAGAIN;
772
773         queue_read(ent, Q_ENT(q, q->cons), q->ent_dwords);
774         queue_inc_cons(q);
775         return 0;
776 }
777
778 /* High-level queue accessors */
779 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
780 {
781         memset(cmd, 0, CMDQ_ENT_DWORDS << 3);
782         cmd[0] |= (ent->opcode & CMDQ_0_OP_MASK) << CMDQ_0_OP_SHIFT;
783
784         switch (ent->opcode) {
785         case CMDQ_OP_TLBI_EL2_ALL:
786         case CMDQ_OP_TLBI_NSNH_ALL:
787                 break;
788         case CMDQ_OP_PREFETCH_CFG:
789                 cmd[0] |= (u64)ent->prefetch.sid << CMDQ_PREFETCH_0_SID_SHIFT;
790                 cmd[1] |= ent->prefetch.size << CMDQ_PREFETCH_1_SIZE_SHIFT;
791                 cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
792                 break;
793         case CMDQ_OP_CFGI_STE:
794                 cmd[0] |= (u64)ent->cfgi.sid << CMDQ_CFGI_0_SID_SHIFT;
795                 cmd[1] |= ent->cfgi.leaf ? CMDQ_CFGI_1_LEAF : 0;
796                 break;
797         case CMDQ_OP_CFGI_ALL:
798                 /* Cover the entire SID range */
799                 cmd[1] |= CMDQ_CFGI_1_RANGE_MASK << CMDQ_CFGI_1_RANGE_SHIFT;
800                 break;
801         case CMDQ_OP_TLBI_NH_VA:
802                 cmd[0] |= (u64)ent->tlbi.asid << CMDQ_TLBI_0_ASID_SHIFT;
803                 cmd[1] |= ent->tlbi.leaf ? CMDQ_TLBI_1_LEAF : 0;
804                 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
805                 break;
806         case CMDQ_OP_TLBI_S2_IPA:
807                 cmd[0] |= (u64)ent->tlbi.vmid << CMDQ_TLBI_0_VMID_SHIFT;
808                 cmd[1] |= ent->tlbi.leaf ? CMDQ_TLBI_1_LEAF : 0;
809                 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
810                 break;
811         case CMDQ_OP_TLBI_NH_ASID:
812                 cmd[0] |= (u64)ent->tlbi.asid << CMDQ_TLBI_0_ASID_SHIFT;
813                 /* Fallthrough */
814         case CMDQ_OP_TLBI_S12_VMALL:
815                 cmd[0] |= (u64)ent->tlbi.vmid << CMDQ_TLBI_0_VMID_SHIFT;
816                 break;
817         case CMDQ_OP_PRI_RESP:
818                 cmd[0] |= ent->substream_valid ? CMDQ_0_SSV : 0;
819                 cmd[0] |= ent->pri.ssid << CMDQ_PRI_0_SSID_SHIFT;
820                 cmd[0] |= (u64)ent->pri.sid << CMDQ_PRI_0_SID_SHIFT;
821                 cmd[1] |= ent->pri.grpid << CMDQ_PRI_1_GRPID_SHIFT;
822                 switch (ent->pri.resp) {
823                 case PRI_RESP_DENY:
824                         cmd[1] |= CMDQ_PRI_1_RESP_DENY;
825                         break;
826                 case PRI_RESP_FAIL:
827                         cmd[1] |= CMDQ_PRI_1_RESP_FAIL;
828                         break;
829                 case PRI_RESP_SUCC:
830                         cmd[1] |= CMDQ_PRI_1_RESP_SUCC;
831                         break;
832                 default:
833                         return -EINVAL;
834                 }
835                 break;
836         case CMDQ_OP_CMD_SYNC:
837                 cmd[0] |= CMDQ_SYNC_0_CS_SEV;
838                 break;
839         default:
840                 return -ENOENT;
841         }
842
843         return 0;
844 }
845
846 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
847 {
848         static const char *cerror_str[] = {
849                 [CMDQ_ERR_CERROR_NONE_IDX]      = "No error",
850                 [CMDQ_ERR_CERROR_ILL_IDX]       = "Illegal command",
851                 [CMDQ_ERR_CERROR_ABT_IDX]       = "Abort on command fetch",
852         };
853
854         int i;
855         u64 cmd[CMDQ_ENT_DWORDS];
856         struct arm_smmu_queue *q = &smmu->cmdq.q;
857         u32 cons = readl_relaxed(q->cons_reg);
858         u32 idx = cons >> CMDQ_ERR_SHIFT & CMDQ_ERR_MASK;
859         struct arm_smmu_cmdq_ent cmd_sync = {
860                 .opcode = CMDQ_OP_CMD_SYNC,
861         };
862
863         dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
864                 idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
865
866         switch (idx) {
867         case CMDQ_ERR_CERROR_ABT_IDX:
868                 dev_err(smmu->dev, "retrying command fetch\n");
869         case CMDQ_ERR_CERROR_NONE_IDX:
870                 return;
871         case CMDQ_ERR_CERROR_ILL_IDX:
872                 /* Fallthrough */
873         default:
874                 break;
875         }
876
877         /*
878          * We may have concurrent producers, so we need to be careful
879          * not to touch any of the shadow cmdq state.
880          */
881         queue_read(cmd, Q_ENT(q, idx), q->ent_dwords);
882         dev_err(smmu->dev, "skipping command in error state:\n");
883         for (i = 0; i < ARRAY_SIZE(cmd); ++i)
884                 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
885
886         /* Convert the erroneous command into a CMD_SYNC */
887         if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
888                 dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
889                 return;
890         }
891
892         queue_write(cmd, Q_ENT(q, idx), q->ent_dwords);
893 }
894
895 static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
896                                     struct arm_smmu_cmdq_ent *ent)
897 {
898         u32 until;
899         u64 cmd[CMDQ_ENT_DWORDS];
900         bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
901         struct arm_smmu_queue *q = &smmu->cmdq.q;
902
903         if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
904                 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
905                          ent->opcode);
906                 return;
907         }
908
909         spin_lock(&smmu->cmdq.lock);
910         while (until = q->prod + 1, queue_insert_raw(q, cmd) == -ENOSPC) {
911                 /*
912                  * Keep the queue locked, otherwise the producer could wrap
913                  * twice and we could see a future consumer pointer that looks
914                  * like it's behind us.
915                  */
916                 if (queue_poll_cons(q, until, wfe))
917                         dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
918         }
919
920         if (ent->opcode == CMDQ_OP_CMD_SYNC && queue_poll_cons(q, until, wfe))
921                 dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
922         spin_unlock(&smmu->cmdq.lock);
923 }
924
925 /* Context descriptor manipulation functions */
926 static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
927 {
928         u64 val = 0;
929
930         /* Repack the TCR. Just care about TTBR0 for now */
931         val |= ARM_SMMU_TCR2CD(tcr, T0SZ);
932         val |= ARM_SMMU_TCR2CD(tcr, TG0);
933         val |= ARM_SMMU_TCR2CD(tcr, IRGN0);
934         val |= ARM_SMMU_TCR2CD(tcr, ORGN0);
935         val |= ARM_SMMU_TCR2CD(tcr, SH0);
936         val |= ARM_SMMU_TCR2CD(tcr, EPD0);
937         val |= ARM_SMMU_TCR2CD(tcr, EPD1);
938         val |= ARM_SMMU_TCR2CD(tcr, IPS);
939         val |= ARM_SMMU_TCR2CD(tcr, TBI0);
940
941         return val;
942 }
943
944 static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
945                                     struct arm_smmu_s1_cfg *cfg)
946 {
947         u64 val;
948
949         /*
950          * We don't need to issue any invalidation here, as we'll invalidate
951          * the STE when installing the new entry anyway.
952          */
953         val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) |
954 #ifdef __BIG_ENDIAN
955               CTXDESC_CD_0_ENDI |
956 #endif
957               CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET_PRIVATE |
958               CTXDESC_CD_0_AA64 | (u64)cfg->cd.asid << CTXDESC_CD_0_ASID_SHIFT |
959               CTXDESC_CD_0_V;
960         cfg->cdptr[0] = cpu_to_le64(val);
961
962         val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK << CTXDESC_CD_1_TTB0_SHIFT;
963         cfg->cdptr[1] = cpu_to_le64(val);
964
965         cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair << CTXDESC_CD_3_MAIR_SHIFT);
966 }
967
968 /* Stream table manipulation functions */
969 static void
970 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
971 {
972         u64 val = 0;
973
974         val |= (desc->span & STRTAB_L1_DESC_SPAN_MASK)
975                 << STRTAB_L1_DESC_SPAN_SHIFT;
976         val |= desc->l2ptr_dma &
977                STRTAB_L1_DESC_L2PTR_MASK << STRTAB_L1_DESC_L2PTR_SHIFT;
978
979         *dst = cpu_to_le64(val);
980 }
981
982 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
983 {
984         struct arm_smmu_cmdq_ent cmd = {
985                 .opcode = CMDQ_OP_CFGI_STE,
986                 .cfgi   = {
987                         .sid    = sid,
988                         .leaf   = true,
989                 },
990         };
991
992         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
993         cmd.opcode = CMDQ_OP_CMD_SYNC;
994         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
995 }
996
997 static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
998                                       __le64 *dst, struct arm_smmu_strtab_ent *ste)
999 {
1000         /*
1001          * This is hideously complicated, but we only really care about
1002          * three cases at the moment:
1003          *
1004          * 1. Invalid (all zero) -> bypass  (init)
1005          * 2. Bypass -> translation (attach)
1006          * 3. Translation -> bypass (detach)
1007          *
1008          * Given that we can't update the STE atomically and the SMMU
1009          * doesn't read the thing in a defined order, that leaves us
1010          * with the following maintenance requirements:
1011          *
1012          * 1. Update Config, return (init time STEs aren't live)
1013          * 2. Write everything apart from dword 0, sync, write dword 0, sync
1014          * 3. Update Config, sync
1015          */
1016         u64 val = le64_to_cpu(dst[0]);
1017         bool ste_live = false;
1018         struct arm_smmu_cmdq_ent prefetch_cmd = {
1019                 .opcode         = CMDQ_OP_PREFETCH_CFG,
1020                 .prefetch       = {
1021                         .sid    = sid,
1022                 },
1023         };
1024
1025         if (val & STRTAB_STE_0_V) {
1026                 u64 cfg;
1027
1028                 cfg = val & STRTAB_STE_0_CFG_MASK << STRTAB_STE_0_CFG_SHIFT;
1029                 switch (cfg) {
1030                 case STRTAB_STE_0_CFG_BYPASS:
1031                         break;
1032                 case STRTAB_STE_0_CFG_S1_TRANS:
1033                 case STRTAB_STE_0_CFG_S2_TRANS:
1034                         ste_live = true;
1035                         break;
1036                 default:
1037                         BUG(); /* STE corruption */
1038                 }
1039         }
1040
1041         /* Nuke the existing Config, as we're going to rewrite it */
1042         val &= ~(STRTAB_STE_0_CFG_MASK << STRTAB_STE_0_CFG_SHIFT);
1043
1044         if (ste->valid)
1045                 val |= STRTAB_STE_0_V;
1046         else
1047                 val &= ~STRTAB_STE_0_V;
1048
1049         if (ste->bypass) {
1050                 val |= disable_bypass ? STRTAB_STE_0_CFG_ABORT
1051                                       : STRTAB_STE_0_CFG_BYPASS;
1052                 dst[0] = cpu_to_le64(val);
1053                 dst[1] = cpu_to_le64(STRTAB_STE_1_SHCFG_INCOMING
1054                          << STRTAB_STE_1_SHCFG_SHIFT);
1055                 dst[2] = 0; /* Nuke the VMID */
1056                 if (ste_live)
1057                         arm_smmu_sync_ste_for_sid(smmu, sid);
1058                 return;
1059         }
1060
1061         if (ste->s1_cfg) {
1062                 BUG_ON(ste_live);
1063                 dst[1] = cpu_to_le64(
1064                          STRTAB_STE_1_S1C_CACHE_WBRA
1065                          << STRTAB_STE_1_S1CIR_SHIFT |
1066                          STRTAB_STE_1_S1C_CACHE_WBRA
1067                          << STRTAB_STE_1_S1COR_SHIFT |
1068                          STRTAB_STE_1_S1C_SH_ISH << STRTAB_STE_1_S1CSH_SHIFT |
1069 #ifdef CONFIG_PCI_ATS
1070                          STRTAB_STE_1_EATS_TRANS << STRTAB_STE_1_EATS_SHIFT |
1071 #endif
1072                          STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT);
1073
1074                 if (smmu->features & ARM_SMMU_FEAT_STALLS)
1075                         dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1076
1077                 val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK
1078                         << STRTAB_STE_0_S1CTXPTR_SHIFT) |
1079                         STRTAB_STE_0_CFG_S1_TRANS;
1080
1081         }
1082
1083         if (ste->s2_cfg) {
1084                 BUG_ON(ste_live);
1085                 dst[2] = cpu_to_le64(
1086                          ste->s2_cfg->vmid << STRTAB_STE_2_S2VMID_SHIFT |
1087                          (ste->s2_cfg->vtcr & STRTAB_STE_2_VTCR_MASK)
1088                           << STRTAB_STE_2_VTCR_SHIFT |
1089 #ifdef __BIG_ENDIAN
1090                          STRTAB_STE_2_S2ENDI |
1091 #endif
1092                          STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1093                          STRTAB_STE_2_S2R);
1094
1095                 dst[3] = cpu_to_le64(ste->s2_cfg->vttbr &
1096                          STRTAB_STE_3_S2TTB_MASK << STRTAB_STE_3_S2TTB_SHIFT);
1097
1098                 val |= STRTAB_STE_0_CFG_S2_TRANS;
1099         }
1100
1101         arm_smmu_sync_ste_for_sid(smmu, sid);
1102         dst[0] = cpu_to_le64(val);
1103         arm_smmu_sync_ste_for_sid(smmu, sid);
1104
1105         /* It's likely that we'll want to use the new STE soon */
1106         if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1107                 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1108 }
1109
1110 static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
1111 {
1112         unsigned int i;
1113         struct arm_smmu_strtab_ent ste = {
1114                 .valid  = true,
1115                 .bypass = true,
1116         };
1117
1118         for (i = 0; i < nent; ++i) {
1119                 arm_smmu_write_strtab_ent(NULL, -1, strtab, &ste);
1120                 strtab += STRTAB_STE_DWORDS;
1121         }
1122 }
1123
1124 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1125 {
1126         size_t size;
1127         void *strtab;
1128         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1129         struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1130
1131         if (desc->l2ptr)
1132                 return 0;
1133
1134         size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1135         strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1136
1137         desc->span = STRTAB_SPLIT + 1;
1138         desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1139                                           GFP_KERNEL | __GFP_ZERO);
1140         if (!desc->l2ptr) {
1141                 dev_err(smmu->dev,
1142                         "failed to allocate l2 stream table for SID %u\n",
1143                         sid);
1144                 return -ENOMEM;
1145         }
1146
1147         arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1148         arm_smmu_write_strtab_l1_desc(strtab, desc);
1149         return 0;
1150 }
1151
1152 /* IRQ and event handlers */
1153 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1154 {
1155         int i;
1156         struct arm_smmu_device *smmu = dev;
1157         struct arm_smmu_queue *q = &smmu->evtq.q;
1158         u64 evt[EVTQ_ENT_DWORDS];
1159
1160         while (!queue_remove_raw(q, evt)) {
1161                 u8 id = evt[0] >> EVTQ_0_ID_SHIFT & EVTQ_0_ID_MASK;
1162
1163                 dev_info(smmu->dev, "event 0x%02x received:\n", id);
1164                 for (i = 0; i < ARRAY_SIZE(evt); ++i)
1165                         dev_info(smmu->dev, "\t0x%016llx\n",
1166                                  (unsigned long long)evt[i]);
1167         }
1168
1169         /* Sync our overflow flag, as we believe we're up to speed */
1170         q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1171         return IRQ_HANDLED;
1172 }
1173
1174 static irqreturn_t arm_smmu_evtq_handler(int irq, void *dev)
1175 {
1176         irqreturn_t ret = IRQ_WAKE_THREAD;
1177         struct arm_smmu_device *smmu = dev;
1178         struct arm_smmu_queue *q = &smmu->evtq.q;
1179
1180         /*
1181          * Not much we can do on overflow, so scream and pretend we're
1182          * trying harder.
1183          */
1184         if (queue_sync_prod(q) == -EOVERFLOW)
1185                 dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1186         else if (queue_empty(q))
1187                 ret = IRQ_NONE;
1188
1189         return ret;
1190 }
1191
1192 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1193 {
1194         struct arm_smmu_device *smmu = dev;
1195         struct arm_smmu_queue *q = &smmu->priq.q;
1196         u64 evt[PRIQ_ENT_DWORDS];
1197
1198         while (!queue_remove_raw(q, evt)) {
1199                 u32 sid, ssid;
1200                 u16 grpid;
1201                 bool ssv, last;
1202
1203                 sid = evt[0] >> PRIQ_0_SID_SHIFT & PRIQ_0_SID_MASK;
1204                 ssv = evt[0] & PRIQ_0_SSID_V;
1205                 ssid = ssv ? evt[0] >> PRIQ_0_SSID_SHIFT & PRIQ_0_SSID_MASK : 0;
1206                 last = evt[0] & PRIQ_0_PRG_LAST;
1207                 grpid = evt[1] >> PRIQ_1_PRG_IDX_SHIFT & PRIQ_1_PRG_IDX_MASK;
1208
1209                 dev_info(smmu->dev, "unexpected PRI request received:\n");
1210                 dev_info(smmu->dev,
1211                          "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1212                          sid, ssid, grpid, last ? "L" : "",
1213                          evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1214                          evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1215                          evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1216                          evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1217                          evt[1] & PRIQ_1_ADDR_MASK << PRIQ_1_ADDR_SHIFT);
1218
1219                 if (last) {
1220                         struct arm_smmu_cmdq_ent cmd = {
1221                                 .opcode                 = CMDQ_OP_PRI_RESP,
1222                                 .substream_valid        = ssv,
1223                                 .pri                    = {
1224                                         .sid    = sid,
1225                                         .ssid   = ssid,
1226                                         .grpid  = grpid,
1227                                         .resp   = PRI_RESP_DENY,
1228                                 },
1229                         };
1230
1231                         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1232                 }
1233         }
1234
1235         /* Sync our overflow flag, as we believe we're up to speed */
1236         q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1237         return IRQ_HANDLED;
1238 }
1239
1240 static irqreturn_t arm_smmu_priq_handler(int irq, void *dev)
1241 {
1242         irqreturn_t ret = IRQ_WAKE_THREAD;
1243         struct arm_smmu_device *smmu = dev;
1244         struct arm_smmu_queue *q = &smmu->priq.q;
1245
1246         /* PRIQ overflow indicates a programming error */
1247         if (queue_sync_prod(q) == -EOVERFLOW)
1248                 dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1249         else if (queue_empty(q))
1250                 ret = IRQ_NONE;
1251
1252         return ret;
1253 }
1254
1255 static irqreturn_t arm_smmu_cmdq_sync_handler(int irq, void *dev)
1256 {
1257         /* We don't actually use CMD_SYNC interrupts for anything */
1258         return IRQ_HANDLED;
1259 }
1260
1261 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1262
1263 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1264 {
1265         u32 gerror, gerrorn, active;
1266         struct arm_smmu_device *smmu = dev;
1267
1268         gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1269         gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1270
1271         active = gerror ^ gerrorn;
1272         if (!(active & GERROR_ERR_MASK))
1273                 return IRQ_NONE; /* No errors pending */
1274
1275         dev_warn(smmu->dev,
1276                  "unexpected global error reported (0x%08x), this could be serious\n",
1277                  active);
1278
1279         if (active & GERROR_SFM_ERR) {
1280                 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1281                 arm_smmu_device_disable(smmu);
1282         }
1283
1284         if (active & GERROR_MSI_GERROR_ABT_ERR)
1285                 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1286
1287         if (active & GERROR_MSI_PRIQ_ABT_ERR) {
1288                 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1289                 arm_smmu_priq_handler(irq, smmu->dev);
1290         }
1291
1292         if (active & GERROR_MSI_EVTQ_ABT_ERR) {
1293                 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1294                 arm_smmu_evtq_handler(irq, smmu->dev);
1295         }
1296
1297         if (active & GERROR_MSI_CMDQ_ABT_ERR) {
1298                 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1299                 arm_smmu_cmdq_sync_handler(irq, smmu->dev);
1300         }
1301
1302         if (active & GERROR_PRIQ_ABT_ERR)
1303                 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1304
1305         if (active & GERROR_EVTQ_ABT_ERR)
1306                 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1307
1308         if (active & GERROR_CMDQ_ERR)
1309                 arm_smmu_cmdq_skip_err(smmu);
1310
1311         writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1312         return IRQ_HANDLED;
1313 }
1314
1315 /* IO_PGTABLE API */
1316 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
1317 {
1318         struct arm_smmu_cmdq_ent cmd;
1319
1320         cmd.opcode = CMDQ_OP_CMD_SYNC;
1321         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1322 }
1323
1324 static void arm_smmu_tlb_sync(void *cookie)
1325 {
1326         struct arm_smmu_domain *smmu_domain = cookie;
1327         __arm_smmu_tlb_sync(smmu_domain->smmu);
1328 }
1329
1330 static void arm_smmu_tlb_inv_context(void *cookie)
1331 {
1332         struct arm_smmu_domain *smmu_domain = cookie;
1333         struct arm_smmu_device *smmu = smmu_domain->smmu;
1334         struct arm_smmu_cmdq_ent cmd;
1335
1336         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1337                 cmd.opcode      = CMDQ_OP_TLBI_NH_ASID;
1338                 cmd.tlbi.asid   = smmu_domain->s1_cfg.cd.asid;
1339                 cmd.tlbi.vmid   = 0;
1340         } else {
1341                 cmd.opcode      = CMDQ_OP_TLBI_S12_VMALL;
1342                 cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1343         }
1344
1345         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1346         __arm_smmu_tlb_sync(smmu);
1347 }
1348
1349 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
1350                                           size_t granule, bool leaf, void *cookie)
1351 {
1352         struct arm_smmu_domain *smmu_domain = cookie;
1353         struct arm_smmu_device *smmu = smmu_domain->smmu;
1354         struct arm_smmu_cmdq_ent cmd = {
1355                 .tlbi = {
1356                         .leaf   = leaf,
1357                         .addr   = iova,
1358                 },
1359         };
1360
1361         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1362                 cmd.opcode      = CMDQ_OP_TLBI_NH_VA;
1363                 cmd.tlbi.asid   = smmu_domain->s1_cfg.cd.asid;
1364         } else {
1365                 cmd.opcode      = CMDQ_OP_TLBI_S2_IPA;
1366                 cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
1367         }
1368
1369         do {
1370                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1371                 cmd.tlbi.addr += granule;
1372         } while (size -= granule);
1373 }
1374
1375 static struct iommu_gather_ops arm_smmu_gather_ops = {
1376         .tlb_flush_all  = arm_smmu_tlb_inv_context,
1377         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
1378         .tlb_sync       = arm_smmu_tlb_sync,
1379 };
1380
1381 /* IOMMU API */
1382 static bool arm_smmu_capable(enum iommu_cap cap)
1383 {
1384         switch (cap) {
1385         case IOMMU_CAP_CACHE_COHERENCY:
1386                 return true;
1387         case IOMMU_CAP_INTR_REMAP:
1388                 return true; /* MSIs are just memory writes */
1389         case IOMMU_CAP_NOEXEC:
1390                 return true;
1391         default:
1392                 return false;
1393         }
1394 }
1395
1396 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1397 {
1398         struct arm_smmu_domain *smmu_domain;
1399
1400         if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
1401                 return NULL;
1402
1403         /*
1404          * Allocate the domain and initialise some of its data structures.
1405          * We can't really do anything meaningful until we've added a
1406          * master.
1407          */
1408         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1409         if (!smmu_domain)
1410                 return NULL;
1411
1412         if (type == IOMMU_DOMAIN_DMA &&
1413             iommu_get_dma_cookie(&smmu_domain->domain)) {
1414                 kfree(smmu_domain);
1415                 return NULL;
1416         }
1417
1418         mutex_init(&smmu_domain->init_mutex);
1419         spin_lock_init(&smmu_domain->pgtbl_lock);
1420         return &smmu_domain->domain;
1421 }
1422
1423 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1424 {
1425         int idx, size = 1 << span;
1426
1427         do {
1428                 idx = find_first_zero_bit(map, size);
1429                 if (idx == size)
1430                         return -ENOSPC;
1431         } while (test_and_set_bit(idx, map));
1432
1433         return idx;
1434 }
1435
1436 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1437 {
1438         clear_bit(idx, map);
1439 }
1440
1441 static void arm_smmu_domain_free(struct iommu_domain *domain)
1442 {
1443         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1444         struct arm_smmu_device *smmu = smmu_domain->smmu;
1445
1446         iommu_put_dma_cookie(domain);
1447         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1448
1449         /* Free the CD and ASID, if we allocated them */
1450         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1451                 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1452
1453                 if (cfg->cdptr) {
1454                         dmam_free_coherent(smmu_domain->smmu->dev,
1455                                            CTXDESC_CD_DWORDS << 3,
1456                                            cfg->cdptr,
1457                                            cfg->cdptr_dma);
1458
1459                         arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
1460                 }
1461         } else {
1462                 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1463                 if (cfg->vmid)
1464                         arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1465         }
1466
1467         kfree(smmu_domain);
1468 }
1469
1470 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1471                                        struct io_pgtable_cfg *pgtbl_cfg)
1472 {
1473         int ret;
1474         int asid;
1475         struct arm_smmu_device *smmu = smmu_domain->smmu;
1476         struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1477
1478         asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits);
1479         if (IS_ERR_VALUE(asid))
1480                 return asid;
1481
1482         cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
1483                                          &cfg->cdptr_dma,
1484                                          GFP_KERNEL | __GFP_ZERO);
1485         if (!cfg->cdptr) {
1486                 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1487                 ret = -ENOMEM;
1488                 goto out_free_asid;
1489         }
1490
1491         cfg->cd.asid    = (u16)asid;
1492         cfg->cd.ttbr    = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
1493         cfg->cd.tcr     = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1494         cfg->cd.mair    = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
1495         return 0;
1496
1497 out_free_asid:
1498         arm_smmu_bitmap_free(smmu->asid_map, asid);
1499         return ret;
1500 }
1501
1502 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1503                                        struct io_pgtable_cfg *pgtbl_cfg)
1504 {
1505         int vmid;
1506         struct arm_smmu_device *smmu = smmu_domain->smmu;
1507         struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1508
1509         vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1510         if (IS_ERR_VALUE(vmid))
1511                 return vmid;
1512
1513         cfg->vmid       = (u16)vmid;
1514         cfg->vttbr      = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1515         cfg->vtcr       = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1516         return 0;
1517 }
1518
1519 static struct iommu_ops arm_smmu_ops;
1520
1521 static int arm_smmu_domain_finalise(struct iommu_domain *domain)
1522 {
1523         int ret;
1524         unsigned long ias, oas;
1525         enum io_pgtable_fmt fmt;
1526         struct io_pgtable_cfg pgtbl_cfg;
1527         struct io_pgtable_ops *pgtbl_ops;
1528         int (*finalise_stage_fn)(struct arm_smmu_domain *,
1529                                  struct io_pgtable_cfg *);
1530         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1531         struct arm_smmu_device *smmu = smmu_domain->smmu;
1532
1533         /* Restrict the stage to what we can actually support */
1534         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1535                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1536         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1537                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1538
1539         switch (smmu_domain->stage) {
1540         case ARM_SMMU_DOMAIN_S1:
1541                 ias = VA_BITS;
1542                 oas = smmu->ias;
1543                 fmt = ARM_64_LPAE_S1;
1544                 finalise_stage_fn = arm_smmu_domain_finalise_s1;
1545                 break;
1546         case ARM_SMMU_DOMAIN_NESTED:
1547         case ARM_SMMU_DOMAIN_S2:
1548                 ias = smmu->ias;
1549                 oas = smmu->oas;
1550                 fmt = ARM_64_LPAE_S2;
1551                 finalise_stage_fn = arm_smmu_domain_finalise_s2;
1552                 break;
1553         default:
1554                 return -EINVAL;
1555         }
1556
1557         pgtbl_cfg = (struct io_pgtable_cfg) {
1558                 .pgsize_bitmap  = arm_smmu_ops.pgsize_bitmap,
1559                 .ias            = ias,
1560                 .oas            = oas,
1561                 .tlb            = &arm_smmu_gather_ops,
1562                 .iommu_dev      = smmu->dev,
1563         };
1564
1565         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1566         if (!pgtbl_ops)
1567                 return -ENOMEM;
1568
1569         arm_smmu_ops.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1570         smmu_domain->pgtbl_ops = pgtbl_ops;
1571
1572         ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
1573         if (IS_ERR_VALUE(ret))
1574                 free_io_pgtable_ops(pgtbl_ops);
1575
1576         return ret;
1577 }
1578
1579 static struct arm_smmu_group *arm_smmu_group_get(struct device *dev)
1580 {
1581         struct iommu_group *group;
1582         struct arm_smmu_group *smmu_group;
1583
1584         group = iommu_group_get(dev);
1585         if (!group)
1586                 return NULL;
1587
1588         smmu_group = iommu_group_get_iommudata(group);
1589         iommu_group_put(group);
1590         return smmu_group;
1591 }
1592
1593 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
1594 {
1595         __le64 *step;
1596         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1597
1598         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1599                 struct arm_smmu_strtab_l1_desc *l1_desc;
1600                 int idx;
1601
1602                 /* Two-level walk */
1603                 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
1604                 l1_desc = &cfg->l1_desc[idx];
1605                 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
1606                 step = &l1_desc->l2ptr[idx];
1607         } else {
1608                 /* Simple linear lookup */
1609                 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
1610         }
1611
1612         return step;
1613 }
1614
1615 static int arm_smmu_install_ste_for_group(struct arm_smmu_group *smmu_group)
1616 {
1617         int i;
1618         struct arm_smmu_domain *smmu_domain = smmu_group->domain;
1619         struct arm_smmu_strtab_ent *ste = &smmu_group->ste;
1620         struct arm_smmu_device *smmu = smmu_group->smmu;
1621
1622         if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1623                 ste->s1_cfg = &smmu_domain->s1_cfg;
1624                 ste->s2_cfg = NULL;
1625                 arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
1626         } else {
1627                 ste->s1_cfg = NULL;
1628                 ste->s2_cfg = &smmu_domain->s2_cfg;
1629         }
1630
1631         for (i = 0; i < smmu_group->num_sids; ++i) {
1632                 u32 sid = smmu_group->sids[i];
1633                 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
1634
1635                 arm_smmu_write_strtab_ent(smmu, sid, step, ste);
1636         }
1637
1638         return 0;
1639 }
1640
1641 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1642 {
1643         int ret = 0;
1644         struct arm_smmu_device *smmu;
1645         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1646         struct arm_smmu_group *smmu_group = arm_smmu_group_get(dev);
1647
1648         if (!smmu_group)
1649                 return -ENOENT;
1650
1651         /* Already attached to a different domain? */
1652         if (smmu_group->domain && smmu_group->domain != smmu_domain)
1653                 return -EEXIST;
1654
1655         smmu = smmu_group->smmu;
1656         mutex_lock(&smmu_domain->init_mutex);
1657
1658         if (!smmu_domain->smmu) {
1659                 smmu_domain->smmu = smmu;
1660                 ret = arm_smmu_domain_finalise(domain);
1661                 if (ret) {
1662                         smmu_domain->smmu = NULL;
1663                         goto out_unlock;
1664                 }
1665         } else if (smmu_domain->smmu != smmu) {
1666                 dev_err(dev,
1667                         "cannot attach to SMMU %s (upstream of %s)\n",
1668                         dev_name(smmu_domain->smmu->dev),
1669                         dev_name(smmu->dev));
1670                 ret = -ENXIO;
1671                 goto out_unlock;
1672         }
1673
1674         /* Group already attached to this domain? */
1675         if (smmu_group->domain)
1676                 goto out_unlock;
1677
1678         smmu_group->domain      = smmu_domain;
1679         smmu_group->ste.bypass  = false;
1680
1681         ret = arm_smmu_install_ste_for_group(smmu_group);
1682         if (IS_ERR_VALUE(ret))
1683                 smmu_group->domain = NULL;
1684
1685 out_unlock:
1686         mutex_unlock(&smmu_domain->init_mutex);
1687         return ret;
1688 }
1689
1690 static void arm_smmu_detach_dev(struct iommu_domain *domain, struct device *dev)
1691 {
1692         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1693         struct arm_smmu_group *smmu_group = arm_smmu_group_get(dev);
1694
1695         BUG_ON(!smmu_domain);
1696         BUG_ON(!smmu_group);
1697
1698         mutex_lock(&smmu_domain->init_mutex);
1699         BUG_ON(smmu_group->domain != smmu_domain);
1700
1701         smmu_group->ste.bypass = true;
1702         if (IS_ERR_VALUE(arm_smmu_install_ste_for_group(smmu_group)))
1703                 dev_warn(dev, "failed to install bypass STE\n");
1704
1705         smmu_group->domain = NULL;
1706         mutex_unlock(&smmu_domain->init_mutex);
1707 }
1708
1709 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1710                         phys_addr_t paddr, size_t size, int prot)
1711 {
1712         int ret;
1713         unsigned long flags;
1714         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1715         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1716
1717         if (!ops)
1718                 return -ENODEV;
1719
1720         spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1721         ret = ops->map(ops, iova, paddr, size, prot);
1722         spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1723         return ret;
1724 }
1725
1726 static size_t
1727 arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
1728 {
1729         size_t ret;
1730         unsigned long flags;
1731         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1732         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1733
1734         if (!ops)
1735                 return 0;
1736
1737         spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1738         ret = ops->unmap(ops, iova, size);
1739         spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1740         return ret;
1741 }
1742
1743 static phys_addr_t
1744 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
1745 {
1746         phys_addr_t ret;
1747         unsigned long flags;
1748         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1749         struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1750
1751         if (!ops)
1752                 return 0;
1753
1754         spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1755         ret = ops->iova_to_phys(ops, iova);
1756         spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1757
1758         return ret;
1759 }
1760
1761 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *sidp)
1762 {
1763         *(u32 *)sidp = alias;
1764         return 0; /* Continue walking */
1765 }
1766
1767 static void __arm_smmu_release_pci_iommudata(void *data)
1768 {
1769         kfree(data);
1770 }
1771
1772 static struct arm_smmu_device *arm_smmu_get_for_pci_dev(struct pci_dev *pdev)
1773 {
1774         struct device_node *of_node;
1775         struct platform_device *smmu_pdev;
1776         struct arm_smmu_device *smmu = NULL;
1777         struct pci_bus *bus = pdev->bus;
1778
1779         /* Walk up to the root bus */
1780         while (!pci_is_root_bus(bus))
1781                 bus = bus->parent;
1782
1783         /* Follow the "iommus" phandle from the host controller */
1784         of_node = of_parse_phandle(bus->bridge->parent->of_node, "iommus", 0);
1785         if (!of_node)
1786                 return NULL;
1787
1788         /* See if we can find an SMMU corresponding to the phandle */
1789         smmu_pdev = of_find_device_by_node(of_node);
1790         if (smmu_pdev)
1791                 smmu = platform_get_drvdata(smmu_pdev);
1792
1793         of_node_put(of_node);
1794         return smmu;
1795 }
1796
1797 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
1798 {
1799         unsigned long limit = smmu->strtab_cfg.num_l1_ents;
1800
1801         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
1802                 limit *= 1UL << STRTAB_SPLIT;
1803
1804         return sid < limit;
1805 }
1806
1807 static int arm_smmu_add_device(struct device *dev)
1808 {
1809         int i, ret;
1810         u32 sid, *sids;
1811         struct pci_dev *pdev;
1812         struct iommu_group *group;
1813         struct arm_smmu_group *smmu_group;
1814         struct arm_smmu_device *smmu;
1815
1816         /* We only support PCI, for now */
1817         if (!dev_is_pci(dev))
1818                 return -ENODEV;
1819
1820         pdev = to_pci_dev(dev);
1821         group = iommu_group_get_for_dev(dev);
1822         if (IS_ERR(group))
1823                 return PTR_ERR(group);
1824
1825         smmu_group = iommu_group_get_iommudata(group);
1826         if (!smmu_group) {
1827                 smmu = arm_smmu_get_for_pci_dev(pdev);
1828                 if (!smmu) {
1829                         ret = -ENOENT;
1830                         goto out_remove_dev;
1831                 }
1832
1833                 smmu_group = kzalloc(sizeof(*smmu_group), GFP_KERNEL);
1834                 if (!smmu_group) {
1835                         ret = -ENOMEM;
1836                         goto out_remove_dev;
1837                 }
1838
1839                 smmu_group->ste.valid   = true;
1840                 smmu_group->smmu        = smmu;
1841                 iommu_group_set_iommudata(group, smmu_group,
1842                                           __arm_smmu_release_pci_iommudata);
1843         } else {
1844                 smmu = smmu_group->smmu;
1845         }
1846
1847         /* Assume SID == RID until firmware tells us otherwise */
1848         pci_for_each_dma_alias(pdev, __arm_smmu_get_pci_sid, &sid);
1849         for (i = 0; i < smmu_group->num_sids; ++i) {
1850                 /* If we already know about this SID, then we're done */
1851                 if (smmu_group->sids[i] == sid)
1852                         goto out_put_group;
1853         }
1854
1855         /* Check the SID is in range of the SMMU and our stream table */
1856         if (!arm_smmu_sid_in_range(smmu, sid)) {
1857                 ret = -ERANGE;
1858                 goto out_remove_dev;
1859         }
1860
1861         /* Ensure l2 strtab is initialised */
1862         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1863                 ret = arm_smmu_init_l2_strtab(smmu, sid);
1864                 if (ret)
1865                         goto out_remove_dev;
1866         }
1867
1868         /* Resize the SID array for the group */
1869         smmu_group->num_sids++;
1870         sids = krealloc(smmu_group->sids, smmu_group->num_sids * sizeof(*sids),
1871                         GFP_KERNEL);
1872         if (!sids) {
1873                 smmu_group->num_sids--;
1874                 ret = -ENOMEM;
1875                 goto out_remove_dev;
1876         }
1877
1878         /* Add the new SID */
1879         sids[smmu_group->num_sids - 1] = sid;
1880         smmu_group->sids = sids;
1881
1882 out_put_group:
1883         iommu_group_put(group);
1884         return 0;
1885
1886 out_remove_dev:
1887         iommu_group_remove_device(dev);
1888         iommu_group_put(group);
1889         return ret;
1890 }
1891
1892 static void arm_smmu_remove_device(struct device *dev)
1893 {
1894         iommu_group_remove_device(dev);
1895 }
1896
1897 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1898                                     enum iommu_attr attr, void *data)
1899 {
1900         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1901
1902         switch (attr) {
1903         case DOMAIN_ATTR_NESTING:
1904                 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1905                 return 0;
1906         default:
1907                 return -ENODEV;
1908         }
1909 }
1910
1911 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1912                                     enum iommu_attr attr, void *data)
1913 {
1914         int ret = 0;
1915         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1916
1917         mutex_lock(&smmu_domain->init_mutex);
1918
1919         switch (attr) {
1920         case DOMAIN_ATTR_NESTING:
1921                 if (smmu_domain->smmu) {
1922                         ret = -EPERM;
1923                         goto out_unlock;
1924                 }
1925
1926                 if (*(int *)data)
1927                         smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1928                 else
1929                         smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1930
1931                 break;
1932         default:
1933                 ret = -ENODEV;
1934         }
1935
1936 out_unlock:
1937         mutex_unlock(&smmu_domain->init_mutex);
1938         return ret;
1939 }
1940
1941 static struct iommu_ops arm_smmu_ops = {
1942         .capable                = arm_smmu_capable,
1943         .domain_alloc           = arm_smmu_domain_alloc,
1944         .domain_free            = arm_smmu_domain_free,
1945         .attach_dev             = arm_smmu_attach_dev,
1946         .detach_dev             = arm_smmu_detach_dev,
1947         .map                    = arm_smmu_map,
1948         .unmap                  = arm_smmu_unmap,
1949         .iova_to_phys           = arm_smmu_iova_to_phys,
1950         .add_device             = arm_smmu_add_device,
1951         .remove_device          = arm_smmu_remove_device,
1952         .device_group           = pci_device_group,
1953         .domain_get_attr        = arm_smmu_domain_get_attr,
1954         .domain_set_attr        = arm_smmu_domain_set_attr,
1955         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
1956 };
1957
1958 /* Probing and initialisation functions */
1959 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
1960                                    struct arm_smmu_queue *q,
1961                                    unsigned long prod_off,
1962                                    unsigned long cons_off,
1963                                    size_t dwords)
1964 {
1965         size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
1966
1967         q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL);
1968         if (!q->base) {
1969                 dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n",
1970                         qsz);
1971                 return -ENOMEM;
1972         }
1973
1974         q->prod_reg     = smmu->base + prod_off;
1975         q->cons_reg     = smmu->base + cons_off;
1976         q->ent_dwords   = dwords;
1977
1978         q->q_base  = Q_BASE_RWA;
1979         q->q_base |= q->base_dma & Q_BASE_ADDR_MASK << Q_BASE_ADDR_SHIFT;
1980         q->q_base |= (q->max_n_shift & Q_BASE_LOG2SIZE_MASK)
1981                      << Q_BASE_LOG2SIZE_SHIFT;
1982
1983         q->prod = q->cons = 0;
1984         return 0;
1985 }
1986
1987 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
1988 {
1989         int ret;
1990
1991         /* cmdq */
1992         spin_lock_init(&smmu->cmdq.lock);
1993         ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
1994                                       ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS);
1995         if (ret)
1996                 return ret;
1997
1998         /* evtq */
1999         ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
2000                                       ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS);
2001         if (ret)
2002                 return ret;
2003
2004         /* priq */
2005         if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2006                 return 0;
2007
2008         return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
2009                                        ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
2010 }
2011
2012 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2013 {
2014         unsigned int i;
2015         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2016         size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2017         void *strtab = smmu->strtab_cfg.strtab;
2018
2019         cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2020         if (!cfg->l1_desc) {
2021                 dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2022                 return -ENOMEM;
2023         }
2024
2025         for (i = 0; i < cfg->num_l1_ents; ++i) {
2026                 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2027                 strtab += STRTAB_L1_DESC_DWORDS << 3;
2028         }
2029
2030         return 0;
2031 }
2032
2033 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2034 {
2035         void *strtab;
2036         u64 reg;
2037         u32 size, l1size;
2038         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2039
2040         /*
2041          * If we can resolve everything with a single L2 table, then we
2042          * just need a single L1 descriptor. Otherwise, calculate the L1
2043          * size, capped to the SIDSIZE.
2044          */
2045         if (smmu->sid_bits < STRTAB_SPLIT) {
2046                 size = 0;
2047         } else {
2048                 size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2049                 size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2050         }
2051         cfg->num_l1_ents = 1 << size;
2052
2053         size += STRTAB_SPLIT;
2054         if (size < smmu->sid_bits)
2055                 dev_warn(smmu->dev,
2056                          "2-level strtab only covers %u/%u bits of SID\n",
2057                          size, smmu->sid_bits);
2058
2059         l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2060         strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2061                                      GFP_KERNEL | __GFP_ZERO);
2062         if (!strtab) {
2063                 dev_err(smmu->dev,
2064                         "failed to allocate l1 stream table (%u bytes)\n",
2065                         size);
2066                 return -ENOMEM;
2067         }
2068         cfg->strtab = strtab;
2069
2070         /* Configure strtab_base_cfg for 2 levels */
2071         reg  = STRTAB_BASE_CFG_FMT_2LVL;
2072         reg |= (size & STRTAB_BASE_CFG_LOG2SIZE_MASK)
2073                 << STRTAB_BASE_CFG_LOG2SIZE_SHIFT;
2074         reg |= (STRTAB_SPLIT & STRTAB_BASE_CFG_SPLIT_MASK)
2075                 << STRTAB_BASE_CFG_SPLIT_SHIFT;
2076         cfg->strtab_base_cfg = reg;
2077
2078         return arm_smmu_init_l1_strtab(smmu);
2079 }
2080
2081 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2082 {
2083         void *strtab;
2084         u64 reg;
2085         u32 size;
2086         struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2087
2088         size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2089         strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2090                                      GFP_KERNEL | __GFP_ZERO);
2091         if (!strtab) {
2092                 dev_err(smmu->dev,
2093                         "failed to allocate linear stream table (%u bytes)\n",
2094                         size);
2095                 return -ENOMEM;
2096         }
2097         cfg->strtab = strtab;
2098         cfg->num_l1_ents = 1 << smmu->sid_bits;
2099
2100         /* Configure strtab_base_cfg for a linear table covering all SIDs */
2101         reg  = STRTAB_BASE_CFG_FMT_LINEAR;
2102         reg |= (smmu->sid_bits & STRTAB_BASE_CFG_LOG2SIZE_MASK)
2103                 << STRTAB_BASE_CFG_LOG2SIZE_SHIFT;
2104         cfg->strtab_base_cfg = reg;
2105
2106         arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2107         return 0;
2108 }
2109
2110 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2111 {
2112         u64 reg;
2113         int ret;
2114
2115         if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2116                 ret = arm_smmu_init_strtab_2lvl(smmu);
2117         else
2118                 ret = arm_smmu_init_strtab_linear(smmu);
2119
2120         if (ret)
2121                 return ret;
2122
2123         /* Set the strtab base address */
2124         reg  = smmu->strtab_cfg.strtab_dma &
2125                STRTAB_BASE_ADDR_MASK << STRTAB_BASE_ADDR_SHIFT;
2126         reg |= STRTAB_BASE_RA;
2127         smmu->strtab_cfg.strtab_base = reg;
2128
2129         /* Allocate the first VMID for stage-2 bypass STEs */
2130         set_bit(0, smmu->vmid_map);
2131         return 0;
2132 }
2133
2134 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2135 {
2136         int ret;
2137
2138         ret = arm_smmu_init_queues(smmu);
2139         if (ret)
2140                 return ret;
2141
2142         return arm_smmu_init_strtab(smmu);
2143 }
2144
2145 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2146                                    unsigned int reg_off, unsigned int ack_off)
2147 {
2148         u32 reg;
2149
2150         writel_relaxed(val, smmu->base + reg_off);
2151         return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2152                                           1, ARM_SMMU_POLL_TIMEOUT_US);
2153 }
2154
2155 static void arm_smmu_free_msis(void *data)
2156 {
2157         struct device *dev = data;
2158         platform_msi_domain_free_irqs(dev);
2159 }
2160
2161 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2162 {
2163         phys_addr_t doorbell;
2164         struct device *dev = msi_desc_to_dev(desc);
2165         struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2166         phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2167
2168         doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2169         doorbell &= MSI_CFG0_ADDR_MASK << MSI_CFG0_ADDR_SHIFT;
2170
2171         writeq_relaxed(doorbell, smmu->base + cfg[0]);
2172         writel_relaxed(msg->data, smmu->base + cfg[1]);
2173         writel_relaxed(MSI_CFG2_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2174 }
2175
2176 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2177 {
2178         struct msi_desc *desc;
2179         int ret, nvec = ARM_SMMU_MAX_MSIS;
2180         struct device *dev = smmu->dev;
2181
2182         /* Clear the MSI address regs */
2183         writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2184         writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2185
2186         if (smmu->features & ARM_SMMU_FEAT_PRI)
2187                 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2188         else
2189                 nvec--;
2190
2191         if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2192                 return;
2193
2194         /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2195         ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2196         if (ret) {
2197                 dev_warn(dev, "failed to allocate MSIs\n");
2198                 return;
2199         }
2200
2201         for_each_msi_entry(desc, dev) {
2202                 switch (desc->platform.msi_index) {
2203                 case EVTQ_MSI_INDEX:
2204                         smmu->evtq.q.irq = desc->irq;
2205                         break;
2206                 case GERROR_MSI_INDEX:
2207                         smmu->gerr_irq = desc->irq;
2208                         break;
2209                 case PRIQ_MSI_INDEX:
2210                         smmu->priq.q.irq = desc->irq;
2211                         break;
2212                 default:        /* Unknown */
2213                         continue;
2214                 }
2215         }
2216
2217         /* Add callback to free MSIs on teardown */
2218         devm_add_action(dev, arm_smmu_free_msis, dev);
2219 }
2220
2221 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2222 {
2223         int ret, irq;
2224         u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2225
2226         /* Disable IRQs first */
2227         ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2228                                       ARM_SMMU_IRQ_CTRLACK);
2229         if (ret) {
2230                 dev_err(smmu->dev, "failed to disable irqs\n");
2231                 return ret;
2232         }
2233
2234         arm_smmu_setup_msis(smmu);
2235
2236         /* Request interrupt lines */
2237         irq = smmu->evtq.q.irq;
2238         if (irq) {
2239                 ret = devm_request_threaded_irq(smmu->dev, irq,
2240                                                 arm_smmu_evtq_handler,
2241                                                 arm_smmu_evtq_thread,
2242                                                 0, "arm-smmu-v3-evtq", smmu);
2243                 if (IS_ERR_VALUE(ret))
2244                         dev_warn(smmu->dev, "failed to enable evtq irq\n");
2245         }
2246
2247         irq = smmu->cmdq.q.irq;
2248         if (irq) {
2249                 ret = devm_request_irq(smmu->dev, irq,
2250                                        arm_smmu_cmdq_sync_handler, 0,
2251                                        "arm-smmu-v3-cmdq-sync", smmu);
2252                 if (IS_ERR_VALUE(ret))
2253                         dev_warn(smmu->dev, "failed to enable cmdq-sync irq\n");
2254         }
2255
2256         irq = smmu->gerr_irq;
2257         if (irq) {
2258                 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2259                                        0, "arm-smmu-v3-gerror", smmu);
2260                 if (IS_ERR_VALUE(ret))
2261                         dev_warn(smmu->dev, "failed to enable gerror irq\n");
2262         }
2263
2264         if (smmu->features & ARM_SMMU_FEAT_PRI) {
2265                 irq = smmu->priq.q.irq;
2266                 if (irq) {
2267                         ret = devm_request_threaded_irq(smmu->dev, irq,
2268                                                         arm_smmu_priq_handler,
2269                                                         arm_smmu_priq_thread,
2270                                                         0, "arm-smmu-v3-priq",
2271                                                         smmu);
2272                         if (IS_ERR_VALUE(ret))
2273                                 dev_warn(smmu->dev,
2274                                          "failed to enable priq irq\n");
2275                         else
2276                                 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
2277                 }
2278         }
2279
2280         /* Enable interrupt generation on the SMMU */
2281         ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
2282                                       ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
2283         if (ret)
2284                 dev_warn(smmu->dev, "failed to enable irqs\n");
2285
2286         return 0;
2287 }
2288
2289 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
2290 {
2291         int ret;
2292
2293         ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
2294         if (ret)
2295                 dev_err(smmu->dev, "failed to clear cr0\n");
2296
2297         return ret;
2298 }
2299
2300 static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
2301 {
2302         int ret;
2303         u32 reg, enables;
2304         struct arm_smmu_cmdq_ent cmd;
2305
2306         /* Clear CR0 and sync (disables SMMU and queue processing) */
2307         reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
2308         if (reg & CR0_SMMUEN)
2309                 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
2310
2311         ret = arm_smmu_device_disable(smmu);
2312         if (ret)
2313                 return ret;
2314
2315         /* CR1 (table and queue memory attributes) */
2316         reg = (CR1_SH_ISH << CR1_TABLE_SH_SHIFT) |
2317               (CR1_CACHE_WB << CR1_TABLE_OC_SHIFT) |
2318               (CR1_CACHE_WB << CR1_TABLE_IC_SHIFT) |
2319               (CR1_SH_ISH << CR1_QUEUE_SH_SHIFT) |
2320               (CR1_CACHE_WB << CR1_QUEUE_OC_SHIFT) |
2321               (CR1_CACHE_WB << CR1_QUEUE_IC_SHIFT);
2322         writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
2323
2324         /* CR2 (random crap) */
2325         reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
2326         writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
2327
2328         /* Stream table */
2329         writeq_relaxed(smmu->strtab_cfg.strtab_base,
2330                        smmu->base + ARM_SMMU_STRTAB_BASE);
2331         writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
2332                        smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
2333
2334         /* Command queue */
2335         writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
2336         writel_relaxed(smmu->cmdq.q.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
2337         writel_relaxed(smmu->cmdq.q.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
2338
2339         enables = CR0_CMDQEN;
2340         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2341                                       ARM_SMMU_CR0ACK);
2342         if (ret) {
2343                 dev_err(smmu->dev, "failed to enable command queue\n");
2344                 return ret;
2345         }
2346
2347         /* Invalidate any cached configuration */
2348         cmd.opcode = CMDQ_OP_CFGI_ALL;
2349         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2350         cmd.opcode = CMDQ_OP_CMD_SYNC;
2351         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2352
2353         /* Invalidate any stale TLB entries */
2354         if (smmu->features & ARM_SMMU_FEAT_HYP) {
2355                 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
2356                 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2357         }
2358
2359         cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
2360         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2361         cmd.opcode = CMDQ_OP_CMD_SYNC;
2362         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2363
2364         /* Event queue */
2365         writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
2366         writel_relaxed(smmu->evtq.q.prod, smmu->base + ARM_SMMU_EVTQ_PROD);
2367         writel_relaxed(smmu->evtq.q.cons, smmu->base + ARM_SMMU_EVTQ_CONS);
2368
2369         enables |= CR0_EVTQEN;
2370         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2371                                       ARM_SMMU_CR0ACK);
2372         if (ret) {
2373                 dev_err(smmu->dev, "failed to enable event queue\n");
2374                 return ret;
2375         }
2376
2377         /* PRI queue */
2378         if (smmu->features & ARM_SMMU_FEAT_PRI) {
2379                 writeq_relaxed(smmu->priq.q.q_base,
2380                                smmu->base + ARM_SMMU_PRIQ_BASE);
2381                 writel_relaxed(smmu->priq.q.prod,
2382                                smmu->base + ARM_SMMU_PRIQ_PROD);
2383                 writel_relaxed(smmu->priq.q.cons,
2384                                smmu->base + ARM_SMMU_PRIQ_CONS);
2385
2386                 enables |= CR0_PRIQEN;
2387                 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2388                                               ARM_SMMU_CR0ACK);
2389                 if (ret) {
2390                         dev_err(smmu->dev, "failed to enable PRI queue\n");
2391                         return ret;
2392                 }
2393         }
2394
2395         ret = arm_smmu_setup_irqs(smmu);
2396         if (ret) {
2397                 dev_err(smmu->dev, "failed to setup irqs\n");
2398                 return ret;
2399         }
2400
2401         /* Enable the SMMU interface */
2402         enables |= CR0_SMMUEN;
2403         ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2404                                       ARM_SMMU_CR0ACK);
2405         if (ret) {
2406                 dev_err(smmu->dev, "failed to enable SMMU interface\n");
2407                 return ret;
2408         }
2409
2410         return 0;
2411 }
2412
2413 static int arm_smmu_device_probe(struct arm_smmu_device *smmu)
2414 {
2415         u32 reg;
2416         bool coherent;
2417         unsigned long pgsize_bitmap = 0;
2418
2419         /* IDR0 */
2420         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
2421
2422         /* 2-level structures */
2423         if ((reg & IDR0_ST_LVL_MASK << IDR0_ST_LVL_SHIFT) == IDR0_ST_LVL_2LVL)
2424                 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
2425
2426         if (reg & IDR0_CD2L)
2427                 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
2428
2429         /*
2430          * Translation table endianness.
2431          * We currently require the same endianness as the CPU, but this
2432          * could be changed later by adding a new IO_PGTABLE_QUIRK.
2433          */
2434         switch (reg & IDR0_TTENDIAN_MASK << IDR0_TTENDIAN_SHIFT) {
2435         case IDR0_TTENDIAN_MIXED:
2436                 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
2437                 break;
2438 #ifdef __BIG_ENDIAN
2439         case IDR0_TTENDIAN_BE:
2440                 smmu->features |= ARM_SMMU_FEAT_TT_BE;
2441                 break;
2442 #else
2443         case IDR0_TTENDIAN_LE:
2444                 smmu->features |= ARM_SMMU_FEAT_TT_LE;
2445                 break;
2446 #endif
2447         default:
2448                 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
2449                 return -ENXIO;
2450         }
2451
2452         /* Boolean feature flags */
2453         if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
2454                 smmu->features |= ARM_SMMU_FEAT_PRI;
2455
2456         if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
2457                 smmu->features |= ARM_SMMU_FEAT_ATS;
2458
2459         if (reg & IDR0_SEV)
2460                 smmu->features |= ARM_SMMU_FEAT_SEV;
2461
2462         if (reg & IDR0_MSI)
2463                 smmu->features |= ARM_SMMU_FEAT_MSI;
2464
2465         if (reg & IDR0_HYP)
2466                 smmu->features |= ARM_SMMU_FEAT_HYP;
2467
2468         /*
2469          * The dma-coherent property is used in preference to the ID
2470          * register, but warn on mismatch.
2471          */
2472         coherent = of_dma_is_coherent(smmu->dev->of_node);
2473         if (coherent)
2474                 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2475
2476         if (!!(reg & IDR0_COHACC) != coherent)
2477                 dev_warn(smmu->dev, "IDR0.COHACC overridden by dma-coherent property (%s)\n",
2478                          coherent ? "true" : "false");
2479
2480         switch (reg & IDR0_STALL_MODEL_MASK << IDR0_STALL_MODEL_SHIFT) {
2481         case IDR0_STALL_MODEL_STALL:
2482                 /* Fallthrough */
2483         case IDR0_STALL_MODEL_FORCE:
2484                 smmu->features |= ARM_SMMU_FEAT_STALLS;
2485         }
2486
2487         if (reg & IDR0_S1P)
2488                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
2489
2490         if (reg & IDR0_S2P)
2491                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
2492
2493         if (!(reg & (IDR0_S1P | IDR0_S2P))) {
2494                 dev_err(smmu->dev, "no translation support!\n");
2495                 return -ENXIO;
2496         }
2497
2498         /* We only support the AArch64 table format at present */
2499         switch (reg & IDR0_TTF_MASK << IDR0_TTF_SHIFT) {
2500         case IDR0_TTF_AARCH32_64:
2501                 smmu->ias = 40;
2502                 /* Fallthrough */
2503         case IDR0_TTF_AARCH64:
2504                 break;
2505         default:
2506                 dev_err(smmu->dev, "AArch64 table format not supported!\n");
2507                 return -ENXIO;
2508         }
2509
2510         /* ASID/VMID sizes */
2511         smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
2512         smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
2513
2514         /* IDR1 */
2515         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
2516         if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
2517                 dev_err(smmu->dev, "embedded implementation not supported\n");
2518                 return -ENXIO;
2519         }
2520
2521         /* Queue sizes, capped at 4k */
2522         smmu->cmdq.q.max_n_shift = min((u32)CMDQ_MAX_SZ_SHIFT,
2523                                        reg >> IDR1_CMDQ_SHIFT & IDR1_CMDQ_MASK);
2524         if (!smmu->cmdq.q.max_n_shift) {
2525                 /* Odd alignment restrictions on the base, so ignore for now */
2526                 dev_err(smmu->dev, "unit-length command queue not supported\n");
2527                 return -ENXIO;
2528         }
2529
2530         smmu->evtq.q.max_n_shift = min((u32)EVTQ_MAX_SZ_SHIFT,
2531                                        reg >> IDR1_EVTQ_SHIFT & IDR1_EVTQ_MASK);
2532         smmu->priq.q.max_n_shift = min((u32)PRIQ_MAX_SZ_SHIFT,
2533                                        reg >> IDR1_PRIQ_SHIFT & IDR1_PRIQ_MASK);
2534
2535         /* SID/SSID sizes */
2536         smmu->ssid_bits = reg >> IDR1_SSID_SHIFT & IDR1_SSID_MASK;
2537         smmu->sid_bits = reg >> IDR1_SID_SHIFT & IDR1_SID_MASK;
2538
2539         /* IDR5 */
2540         reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
2541
2542         /* Maximum number of outstanding stalls */
2543         smmu->evtq.max_stalls = reg >> IDR5_STALL_MAX_SHIFT
2544                                 & IDR5_STALL_MAX_MASK;
2545
2546         /* Page sizes */
2547         if (reg & IDR5_GRAN64K)
2548                 pgsize_bitmap |= SZ_64K | SZ_512M;
2549         if (reg & IDR5_GRAN16K)
2550                 pgsize_bitmap |= SZ_16K | SZ_32M;
2551         if (reg & IDR5_GRAN4K)
2552                 pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
2553
2554         arm_smmu_ops.pgsize_bitmap &= pgsize_bitmap;
2555
2556         /* Output address size */
2557         switch (reg & IDR5_OAS_MASK << IDR5_OAS_SHIFT) {
2558         case IDR5_OAS_32_BIT:
2559                 smmu->oas = 32;
2560                 break;
2561         case IDR5_OAS_36_BIT:
2562                 smmu->oas = 36;
2563                 break;
2564         case IDR5_OAS_40_BIT:
2565                 smmu->oas = 40;
2566                 break;
2567         case IDR5_OAS_42_BIT:
2568                 smmu->oas = 42;
2569                 break;
2570         case IDR5_OAS_44_BIT:
2571                 smmu->oas = 44;
2572                 break;
2573         default:
2574                 dev_info(smmu->dev,
2575                         "unknown output address size. Truncating to 48-bit\n");
2576                 /* Fallthrough */
2577         case IDR5_OAS_48_BIT:
2578                 smmu->oas = 48;
2579         }
2580
2581         /* Set the DMA mask for our table walker */
2582         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
2583                 dev_warn(smmu->dev,
2584                          "failed to set DMA mask for table walker\n");
2585
2586         smmu->ias = max(smmu->ias, smmu->oas);
2587
2588         dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
2589                  smmu->ias, smmu->oas, smmu->features);
2590         return 0;
2591 }
2592
2593 static int arm_smmu_device_dt_probe(struct platform_device *pdev)
2594 {
2595         int irq, ret;
2596         struct resource *res;
2597         struct arm_smmu_device *smmu;
2598         struct device *dev = &pdev->dev;
2599
2600         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2601         if (!smmu) {
2602                 dev_err(dev, "failed to allocate arm_smmu_device\n");
2603                 return -ENOMEM;
2604         }
2605         smmu->dev = dev;
2606
2607         /* Base address */
2608         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2609         if (resource_size(res) + 1 < SZ_128K) {
2610                 dev_err(dev, "MMIO region too small (%pr)\n", res);
2611                 return -EINVAL;
2612         }
2613
2614         smmu->base = devm_ioremap_resource(dev, res);
2615         if (IS_ERR(smmu->base))
2616                 return PTR_ERR(smmu->base);
2617
2618         /* Interrupt lines */
2619         irq = platform_get_irq_byname(pdev, "eventq");
2620         if (irq > 0)
2621                 smmu->evtq.q.irq = irq;
2622
2623         irq = platform_get_irq_byname(pdev, "priq");
2624         if (irq > 0)
2625                 smmu->priq.q.irq = irq;
2626
2627         irq = platform_get_irq_byname(pdev, "cmdq-sync");
2628         if (irq > 0)
2629                 smmu->cmdq.q.irq = irq;
2630
2631         irq = platform_get_irq_byname(pdev, "gerror");
2632         if (irq > 0)
2633                 smmu->gerr_irq = irq;
2634
2635         parse_driver_options(smmu);
2636
2637         /* Probe the h/w */
2638         ret = arm_smmu_device_probe(smmu);
2639         if (ret)
2640                 return ret;
2641
2642         /* Initialise in-memory data structures */
2643         ret = arm_smmu_init_structures(smmu);
2644         if (ret)
2645                 return ret;
2646
2647         /* Record our private device structure */
2648         platform_set_drvdata(pdev, smmu);
2649
2650         /* Reset the device */
2651         return arm_smmu_device_reset(smmu);
2652 }
2653
2654 static int arm_smmu_device_remove(struct platform_device *pdev)
2655 {
2656         struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2657
2658         arm_smmu_device_disable(smmu);
2659         return 0;
2660 }
2661
2662 static struct of_device_id arm_smmu_of_match[] = {
2663         { .compatible = "arm,smmu-v3", },
2664         { },
2665 };
2666 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
2667
2668 static struct platform_driver arm_smmu_driver = {
2669         .driver = {
2670                 .name           = "arm-smmu-v3",
2671                 .of_match_table = of_match_ptr(arm_smmu_of_match),
2672         },
2673         .probe  = arm_smmu_device_dt_probe,
2674         .remove = arm_smmu_device_remove,
2675 };
2676
2677 static int __init arm_smmu_init(void)
2678 {
2679         struct device_node *np;
2680         int ret;
2681
2682         np = of_find_matching_node(NULL, arm_smmu_of_match);
2683         if (!np)
2684                 return 0;
2685
2686         of_node_put(np);
2687
2688         ret = platform_driver_register(&arm_smmu_driver);
2689         if (ret)
2690                 return ret;
2691
2692         return bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2693 }
2694
2695 static void __exit arm_smmu_exit(void)
2696 {
2697         return platform_driver_unregister(&arm_smmu_driver);
2698 }
2699
2700 subsys_initcall(arm_smmu_init);
2701 module_exit(arm_smmu_exit);
2702
2703 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
2704 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2705 MODULE_LICENSE("GPL v2");