]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/iommu/arm-smmu.c
iommu/arm-smmu: Intelligent SMR allocation
[karo-tx-linux.git] / drivers / iommu / arm-smmu.c
1 /*
2  * IOMMU API for ARM architected SMMU implementations.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16  *
17  * Copyright (C) 2013 ARM Limited
18  *
19  * Author: Will Deacon <will.deacon@arm.com>
20  *
21  * This driver currently supports:
22  *      - SMMUv1 and v2 implementations
23  *      - Stream-matching and stream-indexing
24  *      - v7/v8 long-descriptor format
25  *      - Non-secure access to the SMMU
26  *      - Context fault reporting
27  */
28
29 #define pr_fmt(fmt) "arm-smmu: " fmt
30
31 #include <linux/atomic.h>
32 #include <linux/delay.h>
33 #include <linux/dma-iommu.h>
34 #include <linux/dma-mapping.h>
35 #include <linux/err.h>
36 #include <linux/interrupt.h>
37 #include <linux/io.h>
38 #include <linux/io-64-nonatomic-hi-lo.h>
39 #include <linux/iommu.h>
40 #include <linux/iopoll.h>
41 #include <linux/module.h>
42 #include <linux/of.h>
43 #include <linux/of_address.h>
44 #include <linux/of_device.h>
45 #include <linux/pci.h>
46 #include <linux/platform_device.h>
47 #include <linux/slab.h>
48 #include <linux/spinlock.h>
49
50 #include <linux/amba/bus.h>
51
52 #include "io-pgtable.h"
53
54 /* Maximum number of stream IDs assigned to a single device */
55 #define MAX_MASTER_STREAMIDS            128
56
57 /* Maximum number of context banks per SMMU */
58 #define ARM_SMMU_MAX_CBS                128
59
60 /* SMMU global address space */
61 #define ARM_SMMU_GR0(smmu)              ((smmu)->base)
62 #define ARM_SMMU_GR1(smmu)              ((smmu)->base + (1 << (smmu)->pgshift))
63
64 /*
65  * SMMU global address space with conditional offset to access secure
66  * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
67  * nsGFSYNR0: 0x450)
68  */
69 #define ARM_SMMU_GR0_NS(smmu)                                           \
70         ((smmu)->base +                                                 \
71                 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS)       \
72                         ? 0x400 : 0))
73
74 /*
75  * Some 64-bit registers only make sense to write atomically, but in such
76  * cases all the data relevant to AArch32 formats lies within the lower word,
77  * therefore this actually makes more sense than it might first appear.
78  */
79 #ifdef CONFIG_64BIT
80 #define smmu_write_atomic_lq            writeq_relaxed
81 #else
82 #define smmu_write_atomic_lq            writel_relaxed
83 #endif
84
85 /* Configuration registers */
86 #define ARM_SMMU_GR0_sCR0               0x0
87 #define sCR0_CLIENTPD                   (1 << 0)
88 #define sCR0_GFRE                       (1 << 1)
89 #define sCR0_GFIE                       (1 << 2)
90 #define sCR0_GCFGFRE                    (1 << 4)
91 #define sCR0_GCFGFIE                    (1 << 5)
92 #define sCR0_USFCFG                     (1 << 10)
93 #define sCR0_VMIDPNE                    (1 << 11)
94 #define sCR0_PTM                        (1 << 12)
95 #define sCR0_FB                         (1 << 13)
96 #define sCR0_VMID16EN                   (1 << 31)
97 #define sCR0_BSU_SHIFT                  14
98 #define sCR0_BSU_MASK                   0x3
99
100 /* Auxiliary Configuration register */
101 #define ARM_SMMU_GR0_sACR               0x10
102
103 /* Identification registers */
104 #define ARM_SMMU_GR0_ID0                0x20
105 #define ARM_SMMU_GR0_ID1                0x24
106 #define ARM_SMMU_GR0_ID2                0x28
107 #define ARM_SMMU_GR0_ID3                0x2c
108 #define ARM_SMMU_GR0_ID4                0x30
109 #define ARM_SMMU_GR0_ID5                0x34
110 #define ARM_SMMU_GR0_ID6                0x38
111 #define ARM_SMMU_GR0_ID7                0x3c
112 #define ARM_SMMU_GR0_sGFSR              0x48
113 #define ARM_SMMU_GR0_sGFSYNR0           0x50
114 #define ARM_SMMU_GR0_sGFSYNR1           0x54
115 #define ARM_SMMU_GR0_sGFSYNR2           0x58
116
117 #define ID0_S1TS                        (1 << 30)
118 #define ID0_S2TS                        (1 << 29)
119 #define ID0_NTS                         (1 << 28)
120 #define ID0_SMS                         (1 << 27)
121 #define ID0_ATOSNS                      (1 << 26)
122 #define ID0_PTFS_NO_AARCH32             (1 << 25)
123 #define ID0_PTFS_NO_AARCH32S            (1 << 24)
124 #define ID0_CTTW                        (1 << 14)
125 #define ID0_NUMIRPT_SHIFT               16
126 #define ID0_NUMIRPT_MASK                0xff
127 #define ID0_NUMSIDB_SHIFT               9
128 #define ID0_NUMSIDB_MASK                0xf
129 #define ID0_NUMSMRG_SHIFT               0
130 #define ID0_NUMSMRG_MASK                0xff
131
132 #define ID1_PAGESIZE                    (1 << 31)
133 #define ID1_NUMPAGENDXB_SHIFT           28
134 #define ID1_NUMPAGENDXB_MASK            7
135 #define ID1_NUMS2CB_SHIFT               16
136 #define ID1_NUMS2CB_MASK                0xff
137 #define ID1_NUMCB_SHIFT                 0
138 #define ID1_NUMCB_MASK                  0xff
139
140 #define ID2_OAS_SHIFT                   4
141 #define ID2_OAS_MASK                    0xf
142 #define ID2_IAS_SHIFT                   0
143 #define ID2_IAS_MASK                    0xf
144 #define ID2_UBS_SHIFT                   8
145 #define ID2_UBS_MASK                    0xf
146 #define ID2_PTFS_4K                     (1 << 12)
147 #define ID2_PTFS_16K                    (1 << 13)
148 #define ID2_PTFS_64K                    (1 << 14)
149 #define ID2_VMID16                      (1 << 15)
150
151 #define ID7_MAJOR_SHIFT                 4
152 #define ID7_MAJOR_MASK                  0xf
153
154 /* Global TLB invalidation */
155 #define ARM_SMMU_GR0_TLBIVMID           0x64
156 #define ARM_SMMU_GR0_TLBIALLNSNH        0x68
157 #define ARM_SMMU_GR0_TLBIALLH           0x6c
158 #define ARM_SMMU_GR0_sTLBGSYNC          0x70
159 #define ARM_SMMU_GR0_sTLBGSTATUS        0x74
160 #define sTLBGSTATUS_GSACTIVE            (1 << 0)
161 #define TLB_LOOP_TIMEOUT                1000000 /* 1s! */
162
163 /* Stream mapping registers */
164 #define ARM_SMMU_GR0_SMR(n)             (0x800 + ((n) << 2))
165 #define SMR_VALID                       (1 << 31)
166 #define SMR_MASK_SHIFT                  16
167 #define SMR_ID_SHIFT                    0
168
169 #define ARM_SMMU_GR0_S2CR(n)            (0xc00 + ((n) << 2))
170 #define S2CR_CBNDX_SHIFT                0
171 #define S2CR_CBNDX_MASK                 0xff
172 #define S2CR_TYPE_SHIFT                 16
173 #define S2CR_TYPE_MASK                  0x3
174 enum arm_smmu_s2cr_type {
175         S2CR_TYPE_TRANS,
176         S2CR_TYPE_BYPASS,
177         S2CR_TYPE_FAULT,
178 };
179
180 #define S2CR_PRIVCFG_SHIFT              24
181 #define S2CR_PRIVCFG_MASK               0x3
182 enum arm_smmu_s2cr_privcfg {
183         S2CR_PRIVCFG_DEFAULT,
184         S2CR_PRIVCFG_DIPAN,
185         S2CR_PRIVCFG_UNPRIV,
186         S2CR_PRIVCFG_PRIV,
187 };
188
189 /* Context bank attribute registers */
190 #define ARM_SMMU_GR1_CBAR(n)            (0x0 + ((n) << 2))
191 #define CBAR_VMID_SHIFT                 0
192 #define CBAR_VMID_MASK                  0xff
193 #define CBAR_S1_BPSHCFG_SHIFT           8
194 #define CBAR_S1_BPSHCFG_MASK            3
195 #define CBAR_S1_BPSHCFG_NSH             3
196 #define CBAR_S1_MEMATTR_SHIFT           12
197 #define CBAR_S1_MEMATTR_MASK            0xf
198 #define CBAR_S1_MEMATTR_WB              0xf
199 #define CBAR_TYPE_SHIFT                 16
200 #define CBAR_TYPE_MASK                  0x3
201 #define CBAR_TYPE_S2_TRANS              (0 << CBAR_TYPE_SHIFT)
202 #define CBAR_TYPE_S1_TRANS_S2_BYPASS    (1 << CBAR_TYPE_SHIFT)
203 #define CBAR_TYPE_S1_TRANS_S2_FAULT     (2 << CBAR_TYPE_SHIFT)
204 #define CBAR_TYPE_S1_TRANS_S2_TRANS     (3 << CBAR_TYPE_SHIFT)
205 #define CBAR_IRPTNDX_SHIFT              24
206 #define CBAR_IRPTNDX_MASK               0xff
207
208 #define ARM_SMMU_GR1_CBA2R(n)           (0x800 + ((n) << 2))
209 #define CBA2R_RW64_32BIT                (0 << 0)
210 #define CBA2R_RW64_64BIT                (1 << 0)
211 #define CBA2R_VMID_SHIFT                16
212 #define CBA2R_VMID_MASK                 0xffff
213
214 /* Translation context bank */
215 #define ARM_SMMU_CB_BASE(smmu)          ((smmu)->base + ((smmu)->size >> 1))
216 #define ARM_SMMU_CB(smmu, n)            ((n) * (1 << (smmu)->pgshift))
217
218 #define ARM_SMMU_CB_SCTLR               0x0
219 #define ARM_SMMU_CB_ACTLR               0x4
220 #define ARM_SMMU_CB_RESUME              0x8
221 #define ARM_SMMU_CB_TTBCR2              0x10
222 #define ARM_SMMU_CB_TTBR0               0x20
223 #define ARM_SMMU_CB_TTBR1               0x28
224 #define ARM_SMMU_CB_TTBCR               0x30
225 #define ARM_SMMU_CB_CONTEXTIDR          0x34
226 #define ARM_SMMU_CB_S1_MAIR0            0x38
227 #define ARM_SMMU_CB_S1_MAIR1            0x3c
228 #define ARM_SMMU_CB_PAR                 0x50
229 #define ARM_SMMU_CB_FSR                 0x58
230 #define ARM_SMMU_CB_FAR                 0x60
231 #define ARM_SMMU_CB_FSYNR0              0x68
232 #define ARM_SMMU_CB_S1_TLBIVA           0x600
233 #define ARM_SMMU_CB_S1_TLBIASID         0x610
234 #define ARM_SMMU_CB_S1_TLBIVAL          0x620
235 #define ARM_SMMU_CB_S2_TLBIIPAS2        0x630
236 #define ARM_SMMU_CB_S2_TLBIIPAS2L       0x638
237 #define ARM_SMMU_CB_ATS1PR              0x800
238 #define ARM_SMMU_CB_ATSR                0x8f0
239
240 #define SCTLR_S1_ASIDPNE                (1 << 12)
241 #define SCTLR_CFCFG                     (1 << 7)
242 #define SCTLR_CFIE                      (1 << 6)
243 #define SCTLR_CFRE                      (1 << 5)
244 #define SCTLR_E                         (1 << 4)
245 #define SCTLR_AFE                       (1 << 2)
246 #define SCTLR_TRE                       (1 << 1)
247 #define SCTLR_M                         (1 << 0)
248
249 #define ARM_MMU500_ACTLR_CPRE           (1 << 1)
250
251 #define ARM_MMU500_ACR_CACHE_LOCK       (1 << 26)
252
253 #define CB_PAR_F                        (1 << 0)
254
255 #define ATSR_ACTIVE                     (1 << 0)
256
257 #define RESUME_RETRY                    (0 << 0)
258 #define RESUME_TERMINATE                (1 << 0)
259
260 #define TTBCR2_SEP_SHIFT                15
261 #define TTBCR2_SEP_UPSTREAM             (0x7 << TTBCR2_SEP_SHIFT)
262
263 #define TTBRn_ASID_SHIFT                48
264
265 #define FSR_MULTI                       (1 << 31)
266 #define FSR_SS                          (1 << 30)
267 #define FSR_UUT                         (1 << 8)
268 #define FSR_ASF                         (1 << 7)
269 #define FSR_TLBLKF                      (1 << 6)
270 #define FSR_TLBMCF                      (1 << 5)
271 #define FSR_EF                          (1 << 4)
272 #define FSR_PF                          (1 << 3)
273 #define FSR_AFF                         (1 << 2)
274 #define FSR_TF                          (1 << 1)
275
276 #define FSR_IGN                         (FSR_AFF | FSR_ASF | \
277                                          FSR_TLBMCF | FSR_TLBLKF)
278 #define FSR_FAULT                       (FSR_MULTI | FSR_SS | FSR_UUT | \
279                                          FSR_EF | FSR_PF | FSR_TF | FSR_IGN)
280
281 #define FSYNR0_WNR                      (1 << 4)
282
283 static int force_stage;
284 module_param(force_stage, int, S_IRUGO);
285 MODULE_PARM_DESC(force_stage,
286         "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
287 static bool disable_bypass;
288 module_param(disable_bypass, bool, S_IRUGO);
289 MODULE_PARM_DESC(disable_bypass,
290         "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
291
292 enum arm_smmu_arch_version {
293         ARM_SMMU_V1,
294         ARM_SMMU_V1_64K,
295         ARM_SMMU_V2,
296 };
297
298 enum arm_smmu_implementation {
299         GENERIC_SMMU,
300         ARM_MMU500,
301         CAVIUM_SMMUV2,
302 };
303
304 struct arm_smmu_s2cr {
305         struct iommu_group              *group;
306         int                             count;
307         enum arm_smmu_s2cr_type         type;
308         enum arm_smmu_s2cr_privcfg      privcfg;
309         u8                              cbndx;
310 };
311
312 #define s2cr_init_val (struct arm_smmu_s2cr){                           \
313         .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,    \
314 }
315
316 struct arm_smmu_smr {
317         u16                             mask;
318         u16                             id;
319         bool                            valid;
320 };
321
322 struct arm_smmu_master_cfg {
323         struct arm_smmu_device          *smmu;
324         int                             num_streamids;
325         u16                             streamids[MAX_MASTER_STREAMIDS];
326         s16                             smendx[MAX_MASTER_STREAMIDS];
327 };
328 #define INVALID_SMENDX                  -1
329 #define for_each_cfg_sme(cfg, i, idx) \
330         for (i = 0; idx = cfg->smendx[i], i < cfg->num_streamids; ++i)
331
332 struct arm_smmu_device {
333         struct device                   *dev;
334
335         void __iomem                    *base;
336         unsigned long                   size;
337         unsigned long                   pgshift;
338
339 #define ARM_SMMU_FEAT_COHERENT_WALK     (1 << 0)
340 #define ARM_SMMU_FEAT_STREAM_MATCH      (1 << 1)
341 #define ARM_SMMU_FEAT_TRANS_S1          (1 << 2)
342 #define ARM_SMMU_FEAT_TRANS_S2          (1 << 3)
343 #define ARM_SMMU_FEAT_TRANS_NESTED      (1 << 4)
344 #define ARM_SMMU_FEAT_TRANS_OPS         (1 << 5)
345 #define ARM_SMMU_FEAT_VMID16            (1 << 6)
346 #define ARM_SMMU_FEAT_FMT_AARCH64_4K    (1 << 7)
347 #define ARM_SMMU_FEAT_FMT_AARCH64_16K   (1 << 8)
348 #define ARM_SMMU_FEAT_FMT_AARCH64_64K   (1 << 9)
349 #define ARM_SMMU_FEAT_FMT_AARCH32_L     (1 << 10)
350 #define ARM_SMMU_FEAT_FMT_AARCH32_S     (1 << 11)
351         u32                             features;
352
353 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
354         u32                             options;
355         enum arm_smmu_arch_version      version;
356         enum arm_smmu_implementation    model;
357
358         u32                             num_context_banks;
359         u32                             num_s2_context_banks;
360         DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
361         atomic_t                        irptndx;
362
363         u32                             num_mapping_groups;
364         u16                             streamid_mask;
365         u16                             smr_mask_mask;
366         struct arm_smmu_smr             *smrs;
367         struct arm_smmu_s2cr            *s2crs;
368         struct mutex                    stream_map_mutex;
369
370         unsigned long                   va_size;
371         unsigned long                   ipa_size;
372         unsigned long                   pa_size;
373         unsigned long                   pgsize_bitmap;
374
375         u32                             num_global_irqs;
376         u32                             num_context_irqs;
377         unsigned int                    *irqs;
378
379         u32                             cavium_id_base; /* Specific to Cavium */
380 };
381
382 enum arm_smmu_context_fmt {
383         ARM_SMMU_CTX_FMT_NONE,
384         ARM_SMMU_CTX_FMT_AARCH64,
385         ARM_SMMU_CTX_FMT_AARCH32_L,
386         ARM_SMMU_CTX_FMT_AARCH32_S,
387 };
388
389 struct arm_smmu_cfg {
390         u8                              cbndx;
391         u8                              irptndx;
392         u32                             cbar;
393         enum arm_smmu_context_fmt       fmt;
394 };
395 #define INVALID_IRPTNDX                 0xff
396
397 #define ARM_SMMU_CB_ASID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->cbndx)
398 #define ARM_SMMU_CB_VMID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->cbndx + 1)
399
400 enum arm_smmu_domain_stage {
401         ARM_SMMU_DOMAIN_S1 = 0,
402         ARM_SMMU_DOMAIN_S2,
403         ARM_SMMU_DOMAIN_NESTED,
404 };
405
406 struct arm_smmu_domain {
407         struct arm_smmu_device          *smmu;
408         struct io_pgtable_ops           *pgtbl_ops;
409         spinlock_t                      pgtbl_lock;
410         struct arm_smmu_cfg             cfg;
411         enum arm_smmu_domain_stage      stage;
412         struct mutex                    init_mutex; /* Protects smmu pointer */
413         struct iommu_domain             domain;
414 };
415
416 struct arm_smmu_option_prop {
417         u32 opt;
418         const char *prop;
419 };
420
421 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
422
423 static struct arm_smmu_option_prop arm_smmu_options[] = {
424         { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
425         { 0, NULL},
426 };
427
428 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
429 {
430         return container_of(dom, struct arm_smmu_domain, domain);
431 }
432
433 static void parse_driver_options(struct arm_smmu_device *smmu)
434 {
435         int i = 0;
436
437         do {
438                 if (of_property_read_bool(smmu->dev->of_node,
439                                                 arm_smmu_options[i].prop)) {
440                         smmu->options |= arm_smmu_options[i].opt;
441                         dev_notice(smmu->dev, "option %s\n",
442                                 arm_smmu_options[i].prop);
443                 }
444         } while (arm_smmu_options[++i].opt);
445 }
446
447 static struct device_node *dev_get_dev_node(struct device *dev)
448 {
449         if (dev_is_pci(dev)) {
450                 struct pci_bus *bus = to_pci_dev(dev)->bus;
451
452                 while (!pci_is_root_bus(bus))
453                         bus = bus->parent;
454                 return of_node_get(bus->bridge->parent->of_node);
455         }
456
457         return of_node_get(dev->of_node);
458 }
459
460 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
461 {
462         *((__be32 *)data) = cpu_to_be32(alias);
463         return 0; /* Continue walking */
464 }
465
466 static int __find_legacy_master_phandle(struct device *dev, void *data)
467 {
468         struct of_phandle_iterator *it = *(void **)data;
469         struct device_node *np = it->node;
470         int err;
471
472         of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
473                             "#stream-id-cells", 0)
474                 if (it->node == np) {
475                         *(void **)data = dev;
476                         return 1;
477                 }
478         it->node = np;
479         return err == -ENOENT ? 0 : err;
480 }
481
482 static struct platform_driver arm_smmu_driver;
483
484 static int arm_smmu_register_legacy_master(struct device *dev)
485 {
486         struct arm_smmu_device *smmu;
487         struct arm_smmu_master_cfg *cfg;
488         struct device_node *np;
489         struct of_phandle_iterator it;
490         void *data = &it;
491         __be32 pci_sid;
492         int err;
493
494         np = dev_get_dev_node(dev);
495         if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
496                 of_node_put(np);
497                 return -ENODEV;
498         }
499
500         it.node = np;
501         err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
502                                      __find_legacy_master_phandle);
503         of_node_put(np);
504         if (err == 0)
505                 return -ENODEV;
506         if (err < 0)
507                 return err;
508
509         smmu = dev_get_drvdata(data);
510
511         if (it.cur_count > MAX_MASTER_STREAMIDS) {
512                 dev_err(smmu->dev,
513                         "reached maximum number (%d) of stream IDs for master device %s\n",
514                         MAX_MASTER_STREAMIDS, dev_name(dev));
515                 return -ENOSPC;
516         }
517         if (dev_is_pci(dev)) {
518                 /* "mmu-masters" assumes Stream ID == Requester ID */
519                 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
520                                        &pci_sid);
521                 it.cur = &pci_sid;
522                 it.cur_count = 1;
523         }
524
525         cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
526         if (!cfg)
527                 return -ENOMEM;
528
529         cfg->smmu = smmu;
530         dev->archdata.iommu = cfg;
531
532         while (it.cur_count--)
533                 cfg->streamids[cfg->num_streamids++] = be32_to_cpup(it.cur++);
534
535         return 0;
536 }
537
538 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
539 {
540         int idx;
541
542         do {
543                 idx = find_next_zero_bit(map, end, start);
544                 if (idx == end)
545                         return -ENOSPC;
546         } while (test_and_set_bit(idx, map));
547
548         return idx;
549 }
550
551 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
552 {
553         clear_bit(idx, map);
554 }
555
556 /* Wait for any pending TLB invalidations to complete */
557 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
558 {
559         int count = 0;
560         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
561
562         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_sTLBGSYNC);
563         while (readl_relaxed(gr0_base + ARM_SMMU_GR0_sTLBGSTATUS)
564                & sTLBGSTATUS_GSACTIVE) {
565                 cpu_relax();
566                 if (++count == TLB_LOOP_TIMEOUT) {
567                         dev_err_ratelimited(smmu->dev,
568                         "TLB sync timed out -- SMMU may be deadlocked\n");
569                         return;
570                 }
571                 udelay(1);
572         }
573 }
574
575 static void arm_smmu_tlb_sync(void *cookie)
576 {
577         struct arm_smmu_domain *smmu_domain = cookie;
578         __arm_smmu_tlb_sync(smmu_domain->smmu);
579 }
580
581 static void arm_smmu_tlb_inv_context(void *cookie)
582 {
583         struct arm_smmu_domain *smmu_domain = cookie;
584         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
585         struct arm_smmu_device *smmu = smmu_domain->smmu;
586         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
587         void __iomem *base;
588
589         if (stage1) {
590                 base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
591                 writel_relaxed(ARM_SMMU_CB_ASID(smmu, cfg),
592                                base + ARM_SMMU_CB_S1_TLBIASID);
593         } else {
594                 base = ARM_SMMU_GR0(smmu);
595                 writel_relaxed(ARM_SMMU_CB_VMID(smmu, cfg),
596                                base + ARM_SMMU_GR0_TLBIVMID);
597         }
598
599         __arm_smmu_tlb_sync(smmu);
600 }
601
602 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
603                                           size_t granule, bool leaf, void *cookie)
604 {
605         struct arm_smmu_domain *smmu_domain = cookie;
606         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
607         struct arm_smmu_device *smmu = smmu_domain->smmu;
608         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
609         void __iomem *reg;
610
611         if (stage1) {
612                 reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
613                 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
614
615                 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
616                         iova &= ~12UL;
617                         iova |= ARM_SMMU_CB_ASID(smmu, cfg);
618                         do {
619                                 writel_relaxed(iova, reg);
620                                 iova += granule;
621                         } while (size -= granule);
622                 } else {
623                         iova >>= 12;
624                         iova |= (u64)ARM_SMMU_CB_ASID(smmu, cfg) << 48;
625                         do {
626                                 writeq_relaxed(iova, reg);
627                                 iova += granule >> 12;
628                         } while (size -= granule);
629                 }
630         } else if (smmu->version == ARM_SMMU_V2) {
631                 reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
632                 reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
633                               ARM_SMMU_CB_S2_TLBIIPAS2;
634                 iova >>= 12;
635                 do {
636                         smmu_write_atomic_lq(iova, reg);
637                         iova += granule >> 12;
638                 } while (size -= granule);
639         } else {
640                 reg = ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_TLBIVMID;
641                 writel_relaxed(ARM_SMMU_CB_VMID(smmu, cfg), reg);
642         }
643 }
644
645 static struct iommu_gather_ops arm_smmu_gather_ops = {
646         .tlb_flush_all  = arm_smmu_tlb_inv_context,
647         .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
648         .tlb_sync       = arm_smmu_tlb_sync,
649 };
650
651 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
652 {
653         u32 fsr, fsynr;
654         unsigned long iova;
655         struct iommu_domain *domain = dev;
656         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
657         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
658         struct arm_smmu_device *smmu = smmu_domain->smmu;
659         void __iomem *cb_base;
660
661         cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
662         fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
663
664         if (!(fsr & FSR_FAULT))
665                 return IRQ_NONE;
666
667         fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
668         iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
669
670         dev_err_ratelimited(smmu->dev,
671         "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
672                             fsr, iova, fsynr, cfg->cbndx);
673
674         writel(fsr, cb_base + ARM_SMMU_CB_FSR);
675         return IRQ_HANDLED;
676 }
677
678 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
679 {
680         u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
681         struct arm_smmu_device *smmu = dev;
682         void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
683
684         gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
685         gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
686         gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
687         gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
688
689         if (!gfsr)
690                 return IRQ_NONE;
691
692         dev_err_ratelimited(smmu->dev,
693                 "Unexpected global fault, this could be serious\n");
694         dev_err_ratelimited(smmu->dev,
695                 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
696                 gfsr, gfsynr0, gfsynr1, gfsynr2);
697
698         writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
699         return IRQ_HANDLED;
700 }
701
702 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
703                                        struct io_pgtable_cfg *pgtbl_cfg)
704 {
705         u32 reg, reg2;
706         u64 reg64;
707         bool stage1;
708         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
709         struct arm_smmu_device *smmu = smmu_domain->smmu;
710         void __iomem *cb_base, *gr1_base;
711
712         gr1_base = ARM_SMMU_GR1(smmu);
713         stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
714         cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
715
716         if (smmu->version > ARM_SMMU_V1) {
717                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
718                         reg = CBA2R_RW64_64BIT;
719                 else
720                         reg = CBA2R_RW64_32BIT;
721                 /* 16-bit VMIDs live in CBA2R */
722                 if (smmu->features & ARM_SMMU_FEAT_VMID16)
723                         reg |= ARM_SMMU_CB_VMID(smmu, cfg) << CBA2R_VMID_SHIFT;
724
725                 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(cfg->cbndx));
726         }
727
728         /* CBAR */
729         reg = cfg->cbar;
730         if (smmu->version < ARM_SMMU_V2)
731                 reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
732
733         /*
734          * Use the weakest shareability/memory types, so they are
735          * overridden by the ttbcr/pte.
736          */
737         if (stage1) {
738                 reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
739                         (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
740         } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
741                 /* 8-bit VMIDs live in CBAR */
742                 reg |= ARM_SMMU_CB_VMID(smmu, cfg) << CBAR_VMID_SHIFT;
743         }
744         writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(cfg->cbndx));
745
746         /* TTBRs */
747         if (stage1) {
748                 u16 asid = ARM_SMMU_CB_ASID(smmu, cfg);
749
750                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
751                         reg = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
752                         writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0);
753                         reg = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
754                         writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1);
755                         writel_relaxed(asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
756                 } else {
757                         reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
758                         reg64 |= (u64)asid << TTBRn_ASID_SHIFT;
759                         writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0);
760                         reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
761                         reg64 |= (u64)asid << TTBRn_ASID_SHIFT;
762                         writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR1);
763                 }
764         } else {
765                 reg64 = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
766                 writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0);
767         }
768
769         /* TTBCR */
770         if (stage1) {
771                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
772                         reg = pgtbl_cfg->arm_v7s_cfg.tcr;
773                         reg2 = 0;
774                 } else {
775                         reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
776                         reg2 = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
777                         reg2 |= TTBCR2_SEP_UPSTREAM;
778                 }
779                 if (smmu->version > ARM_SMMU_V1)
780                         writel_relaxed(reg2, cb_base + ARM_SMMU_CB_TTBCR2);
781         } else {
782                 reg = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
783         }
784         writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR);
785
786         /* MAIRs (stage-1 only) */
787         if (stage1) {
788                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
789                         reg = pgtbl_cfg->arm_v7s_cfg.prrr;
790                         reg2 = pgtbl_cfg->arm_v7s_cfg.nmrr;
791                 } else {
792                         reg = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
793                         reg2 = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
794                 }
795                 writel_relaxed(reg, cb_base + ARM_SMMU_CB_S1_MAIR0);
796                 writel_relaxed(reg2, cb_base + ARM_SMMU_CB_S1_MAIR1);
797         }
798
799         /* SCTLR */
800         reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
801         if (stage1)
802                 reg |= SCTLR_S1_ASIDPNE;
803 #ifdef __BIG_ENDIAN
804         reg |= SCTLR_E;
805 #endif
806         writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
807 }
808
809 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
810                                         struct arm_smmu_device *smmu)
811 {
812         int irq, start, ret = 0;
813         unsigned long ias, oas;
814         struct io_pgtable_ops *pgtbl_ops;
815         struct io_pgtable_cfg pgtbl_cfg;
816         enum io_pgtable_fmt fmt;
817         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
818         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
819
820         mutex_lock(&smmu_domain->init_mutex);
821         if (smmu_domain->smmu)
822                 goto out_unlock;
823
824         /* We're bypassing these SIDs, so don't allocate an actual context */
825         if (domain->type == IOMMU_DOMAIN_DMA) {
826                 smmu_domain->smmu = smmu;
827                 goto out_unlock;
828         }
829
830         /*
831          * Mapping the requested stage onto what we support is surprisingly
832          * complicated, mainly because the spec allows S1+S2 SMMUs without
833          * support for nested translation. That means we end up with the
834          * following table:
835          *
836          * Requested        Supported        Actual
837          *     S1               N              S1
838          *     S1             S1+S2            S1
839          *     S1               S2             S2
840          *     S1               S1             S1
841          *     N                N              N
842          *     N              S1+S2            S2
843          *     N                S2             S2
844          *     N                S1             S1
845          *
846          * Note that you can't actually request stage-2 mappings.
847          */
848         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
849                 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
850         if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
851                 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
852
853         /*
854          * Choosing a suitable context format is even more fiddly. Until we
855          * grow some way for the caller to express a preference, and/or move
856          * the decision into the io-pgtable code where it arguably belongs,
857          * just aim for the closest thing to the rest of the system, and hope
858          * that the hardware isn't esoteric enough that we can't assume AArch64
859          * support to be a superset of AArch32 support...
860          */
861         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
862                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
863         if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
864             !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
865             (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
866             (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
867                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
868         if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
869             (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
870                                ARM_SMMU_FEAT_FMT_AARCH64_16K |
871                                ARM_SMMU_FEAT_FMT_AARCH64_4K)))
872                 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
873
874         if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
875                 ret = -EINVAL;
876                 goto out_unlock;
877         }
878
879         switch (smmu_domain->stage) {
880         case ARM_SMMU_DOMAIN_S1:
881                 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
882                 start = smmu->num_s2_context_banks;
883                 ias = smmu->va_size;
884                 oas = smmu->ipa_size;
885                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
886                         fmt = ARM_64_LPAE_S1;
887                 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
888                         fmt = ARM_32_LPAE_S1;
889                         ias = min(ias, 32UL);
890                         oas = min(oas, 40UL);
891                 } else {
892                         fmt = ARM_V7S;
893                         ias = min(ias, 32UL);
894                         oas = min(oas, 32UL);
895                 }
896                 break;
897         case ARM_SMMU_DOMAIN_NESTED:
898                 /*
899                  * We will likely want to change this if/when KVM gets
900                  * involved.
901                  */
902         case ARM_SMMU_DOMAIN_S2:
903                 cfg->cbar = CBAR_TYPE_S2_TRANS;
904                 start = 0;
905                 ias = smmu->ipa_size;
906                 oas = smmu->pa_size;
907                 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
908                         fmt = ARM_64_LPAE_S2;
909                 } else {
910                         fmt = ARM_32_LPAE_S2;
911                         ias = min(ias, 40UL);
912                         oas = min(oas, 40UL);
913                 }
914                 break;
915         default:
916                 ret = -EINVAL;
917                 goto out_unlock;
918         }
919
920         ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
921                                       smmu->num_context_banks);
922         if (ret < 0)
923                 goto out_unlock;
924
925         cfg->cbndx = ret;
926         if (smmu->version < ARM_SMMU_V2) {
927                 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
928                 cfg->irptndx %= smmu->num_context_irqs;
929         } else {
930                 cfg->irptndx = cfg->cbndx;
931         }
932
933         pgtbl_cfg = (struct io_pgtable_cfg) {
934                 .pgsize_bitmap  = smmu->pgsize_bitmap,
935                 .ias            = ias,
936                 .oas            = oas,
937                 .tlb            = &arm_smmu_gather_ops,
938                 .iommu_dev      = smmu->dev,
939         };
940
941         smmu_domain->smmu = smmu;
942         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
943         if (!pgtbl_ops) {
944                 ret = -ENOMEM;
945                 goto out_clear_smmu;
946         }
947
948         /* Update the domain's page sizes to reflect the page table format */
949         domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
950
951         /* Initialise the context bank with our page table cfg */
952         arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
953
954         /*
955          * Request context fault interrupt. Do this last to avoid the
956          * handler seeing a half-initialised domain state.
957          */
958         irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
959         ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
960                                IRQF_SHARED, "arm-smmu-context-fault", domain);
961         if (ret < 0) {
962                 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
963                         cfg->irptndx, irq);
964                 cfg->irptndx = INVALID_IRPTNDX;
965         }
966
967         mutex_unlock(&smmu_domain->init_mutex);
968
969         /* Publish page table ops for map/unmap */
970         smmu_domain->pgtbl_ops = pgtbl_ops;
971         return 0;
972
973 out_clear_smmu:
974         smmu_domain->smmu = NULL;
975 out_unlock:
976         mutex_unlock(&smmu_domain->init_mutex);
977         return ret;
978 }
979
980 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
981 {
982         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
983         struct arm_smmu_device *smmu = smmu_domain->smmu;
984         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
985         void __iomem *cb_base;
986         int irq;
987
988         if (!smmu || domain->type == IOMMU_DOMAIN_DMA)
989                 return;
990
991         /*
992          * Disable the context bank and free the page tables before freeing
993          * it.
994          */
995         cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
996         writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
997
998         if (cfg->irptndx != INVALID_IRPTNDX) {
999                 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
1000                 devm_free_irq(smmu->dev, irq, domain);
1001         }
1002
1003         free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1004         __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
1005 }
1006
1007 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1008 {
1009         struct arm_smmu_domain *smmu_domain;
1010
1011         if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
1012                 return NULL;
1013         /*
1014          * Allocate the domain and initialise some of its data structures.
1015          * We can't really do anything meaningful until we've added a
1016          * master.
1017          */
1018         smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1019         if (!smmu_domain)
1020                 return NULL;
1021
1022         if (type == IOMMU_DOMAIN_DMA &&
1023             iommu_get_dma_cookie(&smmu_domain->domain)) {
1024                 kfree(smmu_domain);
1025                 return NULL;
1026         }
1027
1028         mutex_init(&smmu_domain->init_mutex);
1029         spin_lock_init(&smmu_domain->pgtbl_lock);
1030
1031         return &smmu_domain->domain;
1032 }
1033
1034 static void arm_smmu_domain_free(struct iommu_domain *domain)
1035 {
1036         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1037
1038         /*
1039          * Free the domain resources. We assume that all devices have
1040          * already been detached.
1041          */
1042         iommu_put_dma_cookie(domain);
1043         arm_smmu_destroy_domain_context(domain);
1044         kfree(smmu_domain);
1045 }
1046
1047 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
1048 {
1049         struct arm_smmu_smr *smr = smmu->smrs + idx;
1050         u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
1051
1052         if (smr->valid)
1053                 reg |= SMR_VALID;
1054         writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
1055 }
1056
1057 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
1058 {
1059         struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
1060         u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
1061                   (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
1062                   (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
1063
1064         writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
1065 }
1066
1067 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
1068 {
1069         arm_smmu_write_s2cr(smmu, idx);
1070         if (smmu->smrs)
1071                 arm_smmu_write_smr(smmu, idx);
1072 }
1073
1074 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
1075 {
1076         struct arm_smmu_smr *smrs = smmu->smrs;
1077         int i, free_idx = -ENOSPC;
1078
1079         /* Stream indexing is blissfully easy */
1080         if (!smrs)
1081                 return id;
1082
1083         /* Validating SMRs is... less so */
1084         for (i = 0; i < smmu->num_mapping_groups; ++i) {
1085                 if (!smrs[i].valid) {
1086                         /*
1087                          * Note the first free entry we come across, which
1088                          * we'll claim in the end if nothing else matches.
1089                          */
1090                         if (free_idx < 0)
1091                                 free_idx = i;
1092                         continue;
1093                 }
1094                 /*
1095                  * If the new entry is _entirely_ matched by an existing entry,
1096                  * then reuse that, with the guarantee that there also cannot
1097                  * be any subsequent conflicting entries. In normal use we'd
1098                  * expect simply identical entries for this case, but there's
1099                  * no harm in accommodating the generalisation.
1100                  */
1101                 if ((mask & smrs[i].mask) == mask &&
1102                     !((id ^ smrs[i].id) & ~smrs[i].mask))
1103                         return i;
1104                 /*
1105                  * If the new entry has any other overlap with an existing one,
1106                  * though, then there always exists at least one stream ID
1107                  * which would cause a conflict, and we can't allow that risk.
1108                  */
1109                 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1110                         return -EINVAL;
1111         }
1112
1113         return free_idx;
1114 }
1115
1116 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1117 {
1118         if (--smmu->s2crs[idx].count)
1119                 return false;
1120
1121         smmu->s2crs[idx] = s2cr_init_val;
1122         if (smmu->smrs)
1123                 smmu->smrs[idx].valid = false;
1124
1125         return true;
1126 }
1127
1128 static int arm_smmu_master_alloc_smes(struct device *dev)
1129 {
1130         struct arm_smmu_master_cfg *cfg = dev->archdata.iommu;
1131         struct arm_smmu_device *smmu = cfg->smmu;
1132         struct arm_smmu_smr *smrs = smmu->smrs;
1133         struct iommu_group *group;
1134         int i, idx, ret;
1135
1136         mutex_lock(&smmu->stream_map_mutex);
1137         /* Figure out a viable stream map entry allocation */
1138         for_each_cfg_sme(cfg, i, idx) {
1139                 if (idx != INVALID_SMENDX) {
1140                         ret = -EEXIST;
1141                         goto out_err;
1142                 }
1143
1144                 ret = arm_smmu_find_sme(smmu, cfg->streamids[i], 0);
1145                 if (ret < 0)
1146                         goto out_err;
1147
1148                 idx = ret;
1149                 if (smrs && smmu->s2crs[idx].count == 0) {
1150                         smrs[idx].id = cfg->streamids[i];
1151                         smrs[idx].mask = 0; /* We don't currently share SMRs */
1152                         smrs[idx].valid = true;
1153                 }
1154                 smmu->s2crs[idx].count++;
1155                 cfg->smendx[i] = (s16)idx;
1156         }
1157
1158         group = iommu_group_get_for_dev(dev);
1159         if (!group)
1160                 group = ERR_PTR(-ENOMEM);
1161         if (IS_ERR(group)) {
1162                 ret = PTR_ERR(group);
1163                 goto out_err;
1164         }
1165         iommu_group_put(group);
1166
1167         /* It worked! Now, poke the actual hardware */
1168         for_each_cfg_sme(cfg, i, idx) {
1169                 arm_smmu_write_sme(smmu, idx);
1170                 smmu->s2crs[idx].group = group;
1171         }
1172
1173         mutex_unlock(&smmu->stream_map_mutex);
1174         return 0;
1175
1176 out_err:
1177         while (i--) {
1178                 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1179                 cfg->smendx[i] = INVALID_SMENDX;
1180         }
1181         mutex_unlock(&smmu->stream_map_mutex);
1182         return ret;
1183 }
1184
1185 static void arm_smmu_master_free_smes(struct arm_smmu_master_cfg *cfg)
1186 {
1187         struct arm_smmu_device *smmu = cfg->smmu;
1188         int i, idx;
1189
1190         mutex_lock(&smmu->stream_map_mutex);
1191         for_each_cfg_sme(cfg, i, idx) {
1192                 if (arm_smmu_free_sme(smmu, idx))
1193                         arm_smmu_write_sme(smmu, idx);
1194                 cfg->smendx[i] = INVALID_SMENDX;
1195         }
1196         mutex_unlock(&smmu->stream_map_mutex);
1197 }
1198
1199 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1200                                       struct arm_smmu_master_cfg *cfg)
1201 {
1202         struct arm_smmu_device *smmu = smmu_domain->smmu;
1203         struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1204         enum arm_smmu_s2cr_type type = S2CR_TYPE_TRANS;
1205         u8 cbndx = smmu_domain->cfg.cbndx;
1206         int i, idx;
1207
1208         /*
1209          * FIXME: This won't be needed once we have IOMMU-backed DMA ops
1210          * for all devices behind the SMMU. Note that we need to take
1211          * care configuring SMRs for devices both a platform_device and
1212          * and a PCI device (i.e. a PCI host controller)
1213          */
1214         if (smmu_domain->domain.type == IOMMU_DOMAIN_DMA)
1215                 type = S2CR_TYPE_BYPASS;
1216
1217         for_each_cfg_sme(cfg, i, idx) {
1218                 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1219                         continue;
1220
1221                 s2cr[idx].type = type;
1222                 s2cr[idx].privcfg = S2CR_PRIVCFG_UNPRIV;
1223                 s2cr[idx].cbndx = cbndx;
1224                 arm_smmu_write_s2cr(smmu, idx);
1225         }
1226         return 0;
1227 }
1228
1229 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1230 {
1231         int ret;
1232         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1233         struct arm_smmu_master_cfg *cfg = dev->archdata.iommu;
1234
1235         if (!cfg) {
1236                 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1237                 return -ENXIO;
1238         }
1239
1240         /* Ensure that the domain is finalised */
1241         ret = arm_smmu_init_domain_context(domain, cfg->smmu);
1242         if (ret < 0)
1243                 return ret;
1244
1245         /*
1246          * Sanity check the domain. We don't support domains across
1247          * different SMMUs.
1248          */
1249         if (smmu_domain->smmu != cfg->smmu) {
1250                 dev_err(dev,
1251                         "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1252                         dev_name(smmu_domain->smmu->dev), dev_name(cfg->smmu->dev));
1253                 return -EINVAL;
1254         }
1255
1256         /* Looks ok, so add the device to the domain */
1257         return arm_smmu_domain_add_master(smmu_domain, cfg);
1258 }
1259
1260 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1261                         phys_addr_t paddr, size_t size, int prot)
1262 {
1263         int ret;
1264         unsigned long flags;
1265         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1266         struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1267
1268         if (!ops)
1269                 return -ENODEV;
1270
1271         spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1272         ret = ops->map(ops, iova, paddr, size, prot);
1273         spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1274         return ret;
1275 }
1276
1277 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1278                              size_t size)
1279 {
1280         size_t ret;
1281         unsigned long flags;
1282         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1283         struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1284
1285         if (!ops)
1286                 return 0;
1287
1288         spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1289         ret = ops->unmap(ops, iova, size);
1290         spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1291         return ret;
1292 }
1293
1294 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1295                                               dma_addr_t iova)
1296 {
1297         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1298         struct arm_smmu_device *smmu = smmu_domain->smmu;
1299         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1300         struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1301         struct device *dev = smmu->dev;
1302         void __iomem *cb_base;
1303         u32 tmp;
1304         u64 phys;
1305         unsigned long va;
1306
1307         cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
1308
1309         /* ATS1 registers can only be written atomically */
1310         va = iova & ~0xfffUL;
1311         if (smmu->version == ARM_SMMU_V2)
1312                 smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
1313         else /* Register is only 32-bit in v1 */
1314                 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1315
1316         if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1317                                       !(tmp & ATSR_ACTIVE), 5, 50)) {
1318                 dev_err(dev,
1319                         "iova to phys timed out on %pad. Falling back to software table walk.\n",
1320                         &iova);
1321                 return ops->iova_to_phys(ops, iova);
1322         }
1323
1324         phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1325         if (phys & CB_PAR_F) {
1326                 dev_err(dev, "translation fault!\n");
1327                 dev_err(dev, "PAR = 0x%llx\n", phys);
1328                 return 0;
1329         }
1330
1331         return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1332 }
1333
1334 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1335                                         dma_addr_t iova)
1336 {
1337         phys_addr_t ret;
1338         unsigned long flags;
1339         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1340         struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1341
1342         if (!ops)
1343                 return 0;
1344
1345         spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1346         if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1347                         smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1348                 ret = arm_smmu_iova_to_phys_hard(domain, iova);
1349         } else {
1350                 ret = ops->iova_to_phys(ops, iova);
1351         }
1352
1353         spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1354
1355         return ret;
1356 }
1357
1358 static bool arm_smmu_capable(enum iommu_cap cap)
1359 {
1360         switch (cap) {
1361         case IOMMU_CAP_CACHE_COHERENCY:
1362                 /*
1363                  * Return true here as the SMMU can always send out coherent
1364                  * requests.
1365                  */
1366                 return true;
1367         case IOMMU_CAP_INTR_REMAP:
1368                 return true; /* MSIs are just memory writes */
1369         case IOMMU_CAP_NOEXEC:
1370                 return true;
1371         default:
1372                 return false;
1373         }
1374 }
1375
1376 static int arm_smmu_add_device(struct device *dev)
1377 {
1378         struct arm_smmu_master_cfg *cfg;
1379         int i, ret;
1380
1381         ret = arm_smmu_register_legacy_master(dev);
1382         cfg = dev->archdata.iommu;
1383         if (ret)
1384                 goto out_free;
1385
1386         ret = -EINVAL;
1387         for (i = 0; i < cfg->num_streamids; i++) {
1388                 u16 sid = cfg->streamids[i];
1389
1390                 if (sid & ~cfg->smmu->streamid_mask) {
1391                         dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1392                                 sid, cfg->smmu->streamid_mask);
1393                         goto out_free;
1394                 }
1395                 cfg->smendx[i] = INVALID_SMENDX;
1396         }
1397
1398         ret = arm_smmu_master_alloc_smes(dev);
1399         if (!ret)
1400                 return ret;
1401
1402 out_free:
1403         kfree(cfg);
1404         dev->archdata.iommu = NULL;
1405         return ret;
1406 }
1407
1408 static void arm_smmu_remove_device(struct device *dev)
1409 {
1410         struct arm_smmu_master_cfg *cfg = dev->archdata.iommu;
1411
1412         if (!cfg)
1413                 return;
1414
1415         arm_smmu_master_free_smes(cfg);
1416         iommu_group_remove_device(dev);
1417         kfree(cfg);
1418         dev->archdata.iommu = NULL;
1419 }
1420
1421 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1422 {
1423         struct arm_smmu_master_cfg *cfg = dev->archdata.iommu;
1424         struct arm_smmu_device *smmu = cfg->smmu;
1425         struct iommu_group *group = NULL;
1426         int i, idx;
1427
1428         for_each_cfg_sme(cfg, i, idx) {
1429                 if (group && smmu->s2crs[idx].group &&
1430                     group != smmu->s2crs[idx].group)
1431                         return ERR_PTR(-EINVAL);
1432
1433                 group = smmu->s2crs[idx].group;
1434         }
1435
1436         if (group)
1437                 return group;
1438
1439         if (dev_is_pci(dev))
1440                 group = pci_device_group(dev);
1441         else
1442                 group = generic_device_group(dev);
1443
1444         return group;
1445 }
1446
1447 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1448                                     enum iommu_attr attr, void *data)
1449 {
1450         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1451
1452         switch (attr) {
1453         case DOMAIN_ATTR_NESTING:
1454                 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1455                 return 0;
1456         default:
1457                 return -ENODEV;
1458         }
1459 }
1460
1461 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1462                                     enum iommu_attr attr, void *data)
1463 {
1464         int ret = 0;
1465         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1466
1467         mutex_lock(&smmu_domain->init_mutex);
1468
1469         switch (attr) {
1470         case DOMAIN_ATTR_NESTING:
1471                 if (smmu_domain->smmu) {
1472                         ret = -EPERM;
1473                         goto out_unlock;
1474                 }
1475
1476                 if (*(int *)data)
1477                         smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1478                 else
1479                         smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1480
1481                 break;
1482         default:
1483                 ret = -ENODEV;
1484         }
1485
1486 out_unlock:
1487         mutex_unlock(&smmu_domain->init_mutex);
1488         return ret;
1489 }
1490
1491 static struct iommu_ops arm_smmu_ops = {
1492         .capable                = arm_smmu_capable,
1493         .domain_alloc           = arm_smmu_domain_alloc,
1494         .domain_free            = arm_smmu_domain_free,
1495         .attach_dev             = arm_smmu_attach_dev,
1496         .map                    = arm_smmu_map,
1497         .unmap                  = arm_smmu_unmap,
1498         .map_sg                 = default_iommu_map_sg,
1499         .iova_to_phys           = arm_smmu_iova_to_phys,
1500         .add_device             = arm_smmu_add_device,
1501         .remove_device          = arm_smmu_remove_device,
1502         .device_group           = arm_smmu_device_group,
1503         .domain_get_attr        = arm_smmu_domain_get_attr,
1504         .domain_set_attr        = arm_smmu_domain_set_attr,
1505         .pgsize_bitmap          = -1UL, /* Restricted during device attach */
1506 };
1507
1508 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1509 {
1510         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1511         void __iomem *cb_base;
1512         int i;
1513         u32 reg, major;
1514
1515         /* clear global FSR */
1516         reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1517         writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1518
1519         /*
1520          * Reset stream mapping groups: Initial values mark all SMRn as
1521          * invalid and all S2CRn as bypass unless overridden.
1522          */
1523         for (i = 0; i < smmu->num_mapping_groups; ++i)
1524                 arm_smmu_write_sme(smmu, i);
1525
1526         /*
1527          * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1528          * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1529          * bit is only present in MMU-500r2 onwards.
1530          */
1531         reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1532         major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
1533         if ((smmu->model == ARM_MMU500) && (major >= 2)) {
1534                 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1535                 reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1536                 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1537         }
1538
1539         /* Make sure all context banks are disabled and clear CB_FSR  */
1540         for (i = 0; i < smmu->num_context_banks; ++i) {
1541                 cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, i);
1542                 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
1543                 writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1544                 /*
1545                  * Disable MMU-500's not-particularly-beneficial next-page
1546                  * prefetcher for the sake of errata #841119 and #826419.
1547                  */
1548                 if (smmu->model == ARM_MMU500) {
1549                         reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
1550                         reg &= ~ARM_MMU500_ACTLR_CPRE;
1551                         writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
1552                 }
1553         }
1554
1555         /* Invalidate the TLB, just in case */
1556         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1557         writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1558
1559         reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1560
1561         /* Enable fault reporting */
1562         reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1563
1564         /* Disable TLB broadcasting. */
1565         reg |= (sCR0_VMIDPNE | sCR0_PTM);
1566
1567         /* Enable client access, handling unmatched streams as appropriate */
1568         reg &= ~sCR0_CLIENTPD;
1569         if (disable_bypass)
1570                 reg |= sCR0_USFCFG;
1571         else
1572                 reg &= ~sCR0_USFCFG;
1573
1574         /* Disable forced broadcasting */
1575         reg &= ~sCR0_FB;
1576
1577         /* Don't upgrade barriers */
1578         reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1579
1580         if (smmu->features & ARM_SMMU_FEAT_VMID16)
1581                 reg |= sCR0_VMID16EN;
1582
1583         /* Push the button */
1584         __arm_smmu_tlb_sync(smmu);
1585         writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1586 }
1587
1588 static int arm_smmu_id_size_to_bits(int size)
1589 {
1590         switch (size) {
1591         case 0:
1592                 return 32;
1593         case 1:
1594                 return 36;
1595         case 2:
1596                 return 40;
1597         case 3:
1598                 return 42;
1599         case 4:
1600                 return 44;
1601         case 5:
1602         default:
1603                 return 48;
1604         }
1605 }
1606
1607 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1608 {
1609         unsigned long size;
1610         void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1611         u32 id;
1612         bool cttw_dt, cttw_reg;
1613         int i;
1614
1615         dev_notice(smmu->dev, "probing hardware configuration...\n");
1616         dev_notice(smmu->dev, "SMMUv%d with:\n",
1617                         smmu->version == ARM_SMMU_V2 ? 2 : 1);
1618
1619         /* ID0 */
1620         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1621
1622         /* Restrict available stages based on module parameter */
1623         if (force_stage == 1)
1624                 id &= ~(ID0_S2TS | ID0_NTS);
1625         else if (force_stage == 2)
1626                 id &= ~(ID0_S1TS | ID0_NTS);
1627
1628         if (id & ID0_S1TS) {
1629                 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1630                 dev_notice(smmu->dev, "\tstage 1 translation\n");
1631         }
1632
1633         if (id & ID0_S2TS) {
1634                 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1635                 dev_notice(smmu->dev, "\tstage 2 translation\n");
1636         }
1637
1638         if (id & ID0_NTS) {
1639                 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1640                 dev_notice(smmu->dev, "\tnested translation\n");
1641         }
1642
1643         if (!(smmu->features &
1644                 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1645                 dev_err(smmu->dev, "\tno translation support!\n");
1646                 return -ENODEV;
1647         }
1648
1649         if ((id & ID0_S1TS) &&
1650                 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1651                 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1652                 dev_notice(smmu->dev, "\taddress translation ops\n");
1653         }
1654
1655         /*
1656          * In order for DMA API calls to work properly, we must defer to what
1657          * the DT says about coherency, regardless of what the hardware claims.
1658          * Fortunately, this also opens up a workaround for systems where the
1659          * ID register value has ended up configured incorrectly.
1660          */
1661         cttw_dt = of_dma_is_coherent(smmu->dev->of_node);
1662         cttw_reg = !!(id & ID0_CTTW);
1663         if (cttw_dt)
1664                 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1665         if (cttw_dt || cttw_reg)
1666                 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1667                            cttw_dt ? "" : "non-");
1668         if (cttw_dt != cttw_reg)
1669                 dev_notice(smmu->dev,
1670                            "\t(IDR0.CTTW overridden by dma-coherent property)\n");
1671
1672         /* Max. number of entries we have for stream matching/indexing */
1673         size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
1674         smmu->streamid_mask = size - 1;
1675         if (id & ID0_SMS) {
1676                 u32 smr;
1677
1678                 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1679                 size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
1680                 if (size == 0) {
1681                         dev_err(smmu->dev,
1682                                 "stream-matching supported, but no SMRs present!\n");
1683                         return -ENODEV;
1684                 }
1685
1686                 /*
1687                  * SMR.ID bits may not be preserved if the corresponding MASK
1688                  * bits are set, so check each one separately. We can reject
1689                  * masters later if they try to claim IDs outside these masks.
1690                  */
1691                 smr = smmu->streamid_mask << SMR_ID_SHIFT;
1692                 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1693                 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1694                 smmu->streamid_mask = smr >> SMR_ID_SHIFT;
1695
1696                 smr = smmu->streamid_mask << SMR_MASK_SHIFT;
1697                 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1698                 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1699                 smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
1700
1701                 /* Zero-initialised to mark as invalid */
1702                 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1703                                           GFP_KERNEL);
1704                 if (!smmu->smrs)
1705                         return -ENOMEM;
1706
1707                 dev_notice(smmu->dev,
1708                            "\tstream matching with %lu register groups, mask 0x%x",
1709                            size, smmu->smr_mask_mask);
1710         }
1711         /* s2cr->type == 0 means translation, so initialise explicitly */
1712         smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1713                                          GFP_KERNEL);
1714         if (!smmu->s2crs)
1715                 return -ENOMEM;
1716         for (i = 0; i < size; i++)
1717                 smmu->s2crs[i] = s2cr_init_val;
1718
1719         smmu->num_mapping_groups = size;
1720         mutex_init(&smmu->stream_map_mutex);
1721
1722         if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1723                 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1724                 if (!(id & ID0_PTFS_NO_AARCH32S))
1725                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1726         }
1727
1728         /* ID1 */
1729         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1730         smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1731
1732         /* Check for size mismatch of SMMU address space from mapped region */
1733         size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1734         size *= 2 << smmu->pgshift;
1735         if (smmu->size != size)
1736                 dev_warn(smmu->dev,
1737                         "SMMU address space size (0x%lx) differs from mapped region size (0x%lx)!\n",
1738                         size, smmu->size);
1739
1740         smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
1741         smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
1742         if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1743                 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1744                 return -ENODEV;
1745         }
1746         dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1747                    smmu->num_context_banks, smmu->num_s2_context_banks);
1748         /*
1749          * Cavium CN88xx erratum #27704.
1750          * Ensure ASID and VMID allocation is unique across all SMMUs in
1751          * the system.
1752          */
1753         if (smmu->model == CAVIUM_SMMUV2) {
1754                 smmu->cavium_id_base =
1755                         atomic_add_return(smmu->num_context_banks,
1756                                           &cavium_smmu_context_count);
1757                 smmu->cavium_id_base -= smmu->num_context_banks;
1758         }
1759
1760         /* ID2 */
1761         id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1762         size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1763         smmu->ipa_size = size;
1764
1765         /* The output mask is also applied for bypass */
1766         size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1767         smmu->pa_size = size;
1768
1769         if (id & ID2_VMID16)
1770                 smmu->features |= ARM_SMMU_FEAT_VMID16;
1771
1772         /*
1773          * What the page table walker can address actually depends on which
1774          * descriptor format is in use, but since a) we don't know that yet,
1775          * and b) it can vary per context bank, this will have to do...
1776          */
1777         if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1778                 dev_warn(smmu->dev,
1779                          "failed to set DMA mask for table walker\n");
1780
1781         if (smmu->version < ARM_SMMU_V2) {
1782                 smmu->va_size = smmu->ipa_size;
1783                 if (smmu->version == ARM_SMMU_V1_64K)
1784                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1785         } else {
1786                 size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
1787                 smmu->va_size = arm_smmu_id_size_to_bits(size);
1788                 if (id & ID2_PTFS_4K)
1789                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1790                 if (id & ID2_PTFS_16K)
1791                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1792                 if (id & ID2_PTFS_64K)
1793                         smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1794         }
1795
1796         /* Now we've corralled the various formats, what'll it do? */
1797         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1798                 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1799         if (smmu->features &
1800             (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1801                 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1802         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1803                 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1804         if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1805                 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1806
1807         if (arm_smmu_ops.pgsize_bitmap == -1UL)
1808                 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1809         else
1810                 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1811         dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1812                    smmu->pgsize_bitmap);
1813
1814
1815         if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1816                 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1817                            smmu->va_size, smmu->ipa_size);
1818
1819         if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1820                 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1821                            smmu->ipa_size, smmu->pa_size);
1822
1823         return 0;
1824 }
1825
1826 struct arm_smmu_match_data {
1827         enum arm_smmu_arch_version version;
1828         enum arm_smmu_implementation model;
1829 };
1830
1831 #define ARM_SMMU_MATCH_DATA(name, ver, imp)     \
1832 static struct arm_smmu_match_data name = { .version = ver, .model = imp }
1833
1834 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1835 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1836 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1837 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1838 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1839
1840 static const struct of_device_id arm_smmu_of_match[] = {
1841         { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1842         { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1843         { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1844         { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1845         { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1846         { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1847         { },
1848 };
1849 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1850
1851 static int arm_smmu_device_dt_probe(struct platform_device *pdev)
1852 {
1853         const struct arm_smmu_match_data *data;
1854         struct resource *res;
1855         struct arm_smmu_device *smmu;
1856         struct device *dev = &pdev->dev;
1857         int num_irqs, i, err;
1858
1859         smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
1860         if (!smmu) {
1861                 dev_err(dev, "failed to allocate arm_smmu_device\n");
1862                 return -ENOMEM;
1863         }
1864         smmu->dev = dev;
1865
1866         data = of_device_get_match_data(dev);
1867         smmu->version = data->version;
1868         smmu->model = data->model;
1869
1870         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1871         smmu->base = devm_ioremap_resource(dev, res);
1872         if (IS_ERR(smmu->base))
1873                 return PTR_ERR(smmu->base);
1874         smmu->size = resource_size(res);
1875
1876         if (of_property_read_u32(dev->of_node, "#global-interrupts",
1877                                  &smmu->num_global_irqs)) {
1878                 dev_err(dev, "missing #global-interrupts property\n");
1879                 return -ENODEV;
1880         }
1881
1882         num_irqs = 0;
1883         while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
1884                 num_irqs++;
1885                 if (num_irqs > smmu->num_global_irqs)
1886                         smmu->num_context_irqs++;
1887         }
1888
1889         if (!smmu->num_context_irqs) {
1890                 dev_err(dev, "found %d interrupts but expected at least %d\n",
1891                         num_irqs, smmu->num_global_irqs + 1);
1892                 return -ENODEV;
1893         }
1894
1895         smmu->irqs = devm_kzalloc(dev, sizeof(*smmu->irqs) * num_irqs,
1896                                   GFP_KERNEL);
1897         if (!smmu->irqs) {
1898                 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
1899                 return -ENOMEM;
1900         }
1901
1902         for (i = 0; i < num_irqs; ++i) {
1903                 int irq = platform_get_irq(pdev, i);
1904
1905                 if (irq < 0) {
1906                         dev_err(dev, "failed to get irq index %d\n", i);
1907                         return -ENODEV;
1908                 }
1909                 smmu->irqs[i] = irq;
1910         }
1911
1912         err = arm_smmu_device_cfg_probe(smmu);
1913         if (err)
1914                 return err;
1915
1916         parse_driver_options(smmu);
1917
1918         if (smmu->version == ARM_SMMU_V2 &&
1919             smmu->num_context_banks != smmu->num_context_irqs) {
1920                 dev_err(dev,
1921                         "found only %d context interrupt(s) but %d required\n",
1922                         smmu->num_context_irqs, smmu->num_context_banks);
1923                 return -ENODEV;
1924         }
1925
1926         for (i = 0; i < smmu->num_global_irqs; ++i) {
1927                 err = devm_request_irq(smmu->dev, smmu->irqs[i],
1928                                        arm_smmu_global_fault,
1929                                        IRQF_SHARED,
1930                                        "arm-smmu global fault",
1931                                        smmu);
1932                 if (err) {
1933                         dev_err(dev, "failed to request global IRQ %d (%u)\n",
1934                                 i, smmu->irqs[i]);
1935                         return err;
1936                 }
1937         }
1938
1939         platform_set_drvdata(pdev, smmu);
1940         arm_smmu_device_reset(smmu);
1941         return 0;
1942 }
1943
1944 static int arm_smmu_device_remove(struct platform_device *pdev)
1945 {
1946         struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
1947
1948         if (!smmu)
1949                 return -ENODEV;
1950
1951         if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
1952                 dev_err(&pdev->dev, "removing device with active domains!\n");
1953
1954         /* Turn the thing off */
1955         writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1956         return 0;
1957 }
1958
1959 static struct platform_driver arm_smmu_driver = {
1960         .driver = {
1961                 .name           = "arm-smmu",
1962                 .of_match_table = of_match_ptr(arm_smmu_of_match),
1963         },
1964         .probe  = arm_smmu_device_dt_probe,
1965         .remove = arm_smmu_device_remove,
1966 };
1967
1968 static int __init arm_smmu_init(void)
1969 {
1970         struct device_node *np;
1971         int ret;
1972
1973         /*
1974          * Play nice with systems that don't have an ARM SMMU by checking that
1975          * an ARM SMMU exists in the system before proceeding with the driver
1976          * and IOMMU bus operation registration.
1977          */
1978         np = of_find_matching_node(NULL, arm_smmu_of_match);
1979         if (!np)
1980                 return 0;
1981
1982         of_node_put(np);
1983
1984         ret = platform_driver_register(&arm_smmu_driver);
1985         if (ret)
1986                 return ret;
1987
1988         /* Oh, for a proper bus abstraction */
1989         if (!iommu_present(&platform_bus_type))
1990                 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
1991
1992 #ifdef CONFIG_ARM_AMBA
1993         if (!iommu_present(&amba_bustype))
1994                 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
1995 #endif
1996
1997 #ifdef CONFIG_PCI
1998         if (!iommu_present(&pci_bus_type)) {
1999                 pci_request_acs();
2000                 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2001         }
2002 #endif
2003
2004         return 0;
2005 }
2006
2007 static void __exit arm_smmu_exit(void)
2008 {
2009         return platform_driver_unregister(&arm_smmu_driver);
2010 }
2011
2012 subsys_initcall(arm_smmu_init);
2013 module_exit(arm_smmu_exit);
2014
2015 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2016 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2017 MODULE_LICENSE("GPL v2");