2 * IOMMU API for ARM architected SMMU implementations.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 * Copyright (C) 2013 ARM Limited
19 * Author: Will Deacon <will.deacon@arm.com>
21 * This driver currently supports:
22 * - SMMUv1 and v2 implementations
23 * - Stream-matching and stream-indexing
24 * - v7/v8 long-descriptor format
25 * - Non-secure access to the SMMU
26 * - Context fault reporting
29 #define pr_fmt(fmt) "arm-smmu: " fmt
31 #include <linux/atomic.h>
32 #include <linux/delay.h>
33 #include <linux/dma-iommu.h>
34 #include <linux/dma-mapping.h>
35 #include <linux/err.h>
36 #include <linux/interrupt.h>
38 #include <linux/io-64-nonatomic-hi-lo.h>
39 #include <linux/iommu.h>
40 #include <linux/iopoll.h>
41 #include <linux/module.h>
43 #include <linux/of_address.h>
44 #include <linux/of_device.h>
45 #include <linux/pci.h>
46 #include <linux/platform_device.h>
47 #include <linux/slab.h>
48 #include <linux/spinlock.h>
50 #include <linux/amba/bus.h>
52 #include "io-pgtable.h"
54 /* Maximum number of stream IDs assigned to a single device */
55 #define MAX_MASTER_STREAMIDS 128
57 /* Maximum number of context banks per SMMU */
58 #define ARM_SMMU_MAX_CBS 128
60 /* SMMU global address space */
61 #define ARM_SMMU_GR0(smmu) ((smmu)->base)
62 #define ARM_SMMU_GR1(smmu) ((smmu)->base + (1 << (smmu)->pgshift))
65 * SMMU global address space with conditional offset to access secure
66 * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
69 #define ARM_SMMU_GR0_NS(smmu) \
71 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) \
75 * Some 64-bit registers only make sense to write atomically, but in such
76 * cases all the data relevant to AArch32 formats lies within the lower word,
77 * therefore this actually makes more sense than it might first appear.
80 #define smmu_write_atomic_lq writeq_relaxed
82 #define smmu_write_atomic_lq writel_relaxed
85 /* Configuration registers */
86 #define ARM_SMMU_GR0_sCR0 0x0
87 #define sCR0_CLIENTPD (1 << 0)
88 #define sCR0_GFRE (1 << 1)
89 #define sCR0_GFIE (1 << 2)
90 #define sCR0_GCFGFRE (1 << 4)
91 #define sCR0_GCFGFIE (1 << 5)
92 #define sCR0_USFCFG (1 << 10)
93 #define sCR0_VMIDPNE (1 << 11)
94 #define sCR0_PTM (1 << 12)
95 #define sCR0_FB (1 << 13)
96 #define sCR0_VMID16EN (1 << 31)
97 #define sCR0_BSU_SHIFT 14
98 #define sCR0_BSU_MASK 0x3
100 /* Auxiliary Configuration register */
101 #define ARM_SMMU_GR0_sACR 0x10
103 /* Identification registers */
104 #define ARM_SMMU_GR0_ID0 0x20
105 #define ARM_SMMU_GR0_ID1 0x24
106 #define ARM_SMMU_GR0_ID2 0x28
107 #define ARM_SMMU_GR0_ID3 0x2c
108 #define ARM_SMMU_GR0_ID4 0x30
109 #define ARM_SMMU_GR0_ID5 0x34
110 #define ARM_SMMU_GR0_ID6 0x38
111 #define ARM_SMMU_GR0_ID7 0x3c
112 #define ARM_SMMU_GR0_sGFSR 0x48
113 #define ARM_SMMU_GR0_sGFSYNR0 0x50
114 #define ARM_SMMU_GR0_sGFSYNR1 0x54
115 #define ARM_SMMU_GR0_sGFSYNR2 0x58
117 #define ID0_S1TS (1 << 30)
118 #define ID0_S2TS (1 << 29)
119 #define ID0_NTS (1 << 28)
120 #define ID0_SMS (1 << 27)
121 #define ID0_ATOSNS (1 << 26)
122 #define ID0_PTFS_NO_AARCH32 (1 << 25)
123 #define ID0_PTFS_NO_AARCH32S (1 << 24)
124 #define ID0_CTTW (1 << 14)
125 #define ID0_NUMIRPT_SHIFT 16
126 #define ID0_NUMIRPT_MASK 0xff
127 #define ID0_NUMSIDB_SHIFT 9
128 #define ID0_NUMSIDB_MASK 0xf
129 #define ID0_NUMSMRG_SHIFT 0
130 #define ID0_NUMSMRG_MASK 0xff
132 #define ID1_PAGESIZE (1 << 31)
133 #define ID1_NUMPAGENDXB_SHIFT 28
134 #define ID1_NUMPAGENDXB_MASK 7
135 #define ID1_NUMS2CB_SHIFT 16
136 #define ID1_NUMS2CB_MASK 0xff
137 #define ID1_NUMCB_SHIFT 0
138 #define ID1_NUMCB_MASK 0xff
140 #define ID2_OAS_SHIFT 4
141 #define ID2_OAS_MASK 0xf
142 #define ID2_IAS_SHIFT 0
143 #define ID2_IAS_MASK 0xf
144 #define ID2_UBS_SHIFT 8
145 #define ID2_UBS_MASK 0xf
146 #define ID2_PTFS_4K (1 << 12)
147 #define ID2_PTFS_16K (1 << 13)
148 #define ID2_PTFS_64K (1 << 14)
149 #define ID2_VMID16 (1 << 15)
151 #define ID7_MAJOR_SHIFT 4
152 #define ID7_MAJOR_MASK 0xf
154 /* Global TLB invalidation */
155 #define ARM_SMMU_GR0_TLBIVMID 0x64
156 #define ARM_SMMU_GR0_TLBIALLNSNH 0x68
157 #define ARM_SMMU_GR0_TLBIALLH 0x6c
158 #define ARM_SMMU_GR0_sTLBGSYNC 0x70
159 #define ARM_SMMU_GR0_sTLBGSTATUS 0x74
160 #define sTLBGSTATUS_GSACTIVE (1 << 0)
161 #define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
163 /* Stream mapping registers */
164 #define ARM_SMMU_GR0_SMR(n) (0x800 + ((n) << 2))
165 #define SMR_VALID (1 << 31)
166 #define SMR_MASK_SHIFT 16
167 #define SMR_ID_SHIFT 0
169 #define ARM_SMMU_GR0_S2CR(n) (0xc00 + ((n) << 2))
170 #define S2CR_CBNDX_SHIFT 0
171 #define S2CR_CBNDX_MASK 0xff
172 #define S2CR_TYPE_SHIFT 16
173 #define S2CR_TYPE_MASK 0x3
174 enum arm_smmu_s2cr_type {
180 #define S2CR_PRIVCFG_SHIFT 24
181 #define S2CR_PRIVCFG_MASK 0x3
182 enum arm_smmu_s2cr_privcfg {
183 S2CR_PRIVCFG_DEFAULT,
189 /* Context bank attribute registers */
190 #define ARM_SMMU_GR1_CBAR(n) (0x0 + ((n) << 2))
191 #define CBAR_VMID_SHIFT 0
192 #define CBAR_VMID_MASK 0xff
193 #define CBAR_S1_BPSHCFG_SHIFT 8
194 #define CBAR_S1_BPSHCFG_MASK 3
195 #define CBAR_S1_BPSHCFG_NSH 3
196 #define CBAR_S1_MEMATTR_SHIFT 12
197 #define CBAR_S1_MEMATTR_MASK 0xf
198 #define CBAR_S1_MEMATTR_WB 0xf
199 #define CBAR_TYPE_SHIFT 16
200 #define CBAR_TYPE_MASK 0x3
201 #define CBAR_TYPE_S2_TRANS (0 << CBAR_TYPE_SHIFT)
202 #define CBAR_TYPE_S1_TRANS_S2_BYPASS (1 << CBAR_TYPE_SHIFT)
203 #define CBAR_TYPE_S1_TRANS_S2_FAULT (2 << CBAR_TYPE_SHIFT)
204 #define CBAR_TYPE_S1_TRANS_S2_TRANS (3 << CBAR_TYPE_SHIFT)
205 #define CBAR_IRPTNDX_SHIFT 24
206 #define CBAR_IRPTNDX_MASK 0xff
208 #define ARM_SMMU_GR1_CBA2R(n) (0x800 + ((n) << 2))
209 #define CBA2R_RW64_32BIT (0 << 0)
210 #define CBA2R_RW64_64BIT (1 << 0)
211 #define CBA2R_VMID_SHIFT 16
212 #define CBA2R_VMID_MASK 0xffff
214 /* Translation context bank */
215 #define ARM_SMMU_CB_BASE(smmu) ((smmu)->base + ((smmu)->size >> 1))
216 #define ARM_SMMU_CB(smmu, n) ((n) * (1 << (smmu)->pgshift))
218 #define ARM_SMMU_CB_SCTLR 0x0
219 #define ARM_SMMU_CB_ACTLR 0x4
220 #define ARM_SMMU_CB_RESUME 0x8
221 #define ARM_SMMU_CB_TTBCR2 0x10
222 #define ARM_SMMU_CB_TTBR0 0x20
223 #define ARM_SMMU_CB_TTBR1 0x28
224 #define ARM_SMMU_CB_TTBCR 0x30
225 #define ARM_SMMU_CB_CONTEXTIDR 0x34
226 #define ARM_SMMU_CB_S1_MAIR0 0x38
227 #define ARM_SMMU_CB_S1_MAIR1 0x3c
228 #define ARM_SMMU_CB_PAR 0x50
229 #define ARM_SMMU_CB_FSR 0x58
230 #define ARM_SMMU_CB_FAR 0x60
231 #define ARM_SMMU_CB_FSYNR0 0x68
232 #define ARM_SMMU_CB_S1_TLBIVA 0x600
233 #define ARM_SMMU_CB_S1_TLBIASID 0x610
234 #define ARM_SMMU_CB_S1_TLBIVAL 0x620
235 #define ARM_SMMU_CB_S2_TLBIIPAS2 0x630
236 #define ARM_SMMU_CB_S2_TLBIIPAS2L 0x638
237 #define ARM_SMMU_CB_ATS1PR 0x800
238 #define ARM_SMMU_CB_ATSR 0x8f0
240 #define SCTLR_S1_ASIDPNE (1 << 12)
241 #define SCTLR_CFCFG (1 << 7)
242 #define SCTLR_CFIE (1 << 6)
243 #define SCTLR_CFRE (1 << 5)
244 #define SCTLR_E (1 << 4)
245 #define SCTLR_AFE (1 << 2)
246 #define SCTLR_TRE (1 << 1)
247 #define SCTLR_M (1 << 0)
249 #define ARM_MMU500_ACTLR_CPRE (1 << 1)
251 #define ARM_MMU500_ACR_CACHE_LOCK (1 << 26)
253 #define CB_PAR_F (1 << 0)
255 #define ATSR_ACTIVE (1 << 0)
257 #define RESUME_RETRY (0 << 0)
258 #define RESUME_TERMINATE (1 << 0)
260 #define TTBCR2_SEP_SHIFT 15
261 #define TTBCR2_SEP_UPSTREAM (0x7 << TTBCR2_SEP_SHIFT)
263 #define TTBRn_ASID_SHIFT 48
265 #define FSR_MULTI (1 << 31)
266 #define FSR_SS (1 << 30)
267 #define FSR_UUT (1 << 8)
268 #define FSR_ASF (1 << 7)
269 #define FSR_TLBLKF (1 << 6)
270 #define FSR_TLBMCF (1 << 5)
271 #define FSR_EF (1 << 4)
272 #define FSR_PF (1 << 3)
273 #define FSR_AFF (1 << 2)
274 #define FSR_TF (1 << 1)
276 #define FSR_IGN (FSR_AFF | FSR_ASF | \
277 FSR_TLBMCF | FSR_TLBLKF)
278 #define FSR_FAULT (FSR_MULTI | FSR_SS | FSR_UUT | \
279 FSR_EF | FSR_PF | FSR_TF | FSR_IGN)
281 #define FSYNR0_WNR (1 << 4)
283 static int force_stage;
284 module_param(force_stage, int, S_IRUGO);
285 MODULE_PARM_DESC(force_stage,
286 "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
287 static bool disable_bypass;
288 module_param(disable_bypass, bool, S_IRUGO);
289 MODULE_PARM_DESC(disable_bypass,
290 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
292 enum arm_smmu_arch_version {
298 enum arm_smmu_implementation {
304 struct arm_smmu_s2cr {
305 enum arm_smmu_s2cr_type type;
306 enum arm_smmu_s2cr_privcfg privcfg;
310 #define s2cr_init_val (struct arm_smmu_s2cr){ \
311 .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS, \
314 struct arm_smmu_smr {
320 struct arm_smmu_master_cfg {
321 struct arm_smmu_device *smmu;
323 u16 streamids[MAX_MASTER_STREAMIDS];
324 s16 smendx[MAX_MASTER_STREAMIDS];
326 #define INVALID_SMENDX -1
328 struct arm_smmu_device {
333 unsigned long pgshift;
335 #define ARM_SMMU_FEAT_COHERENT_WALK (1 << 0)
336 #define ARM_SMMU_FEAT_STREAM_MATCH (1 << 1)
337 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
338 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
339 #define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
340 #define ARM_SMMU_FEAT_TRANS_OPS (1 << 5)
341 #define ARM_SMMU_FEAT_VMID16 (1 << 6)
342 #define ARM_SMMU_FEAT_FMT_AARCH64_4K (1 << 7)
343 #define ARM_SMMU_FEAT_FMT_AARCH64_16K (1 << 8)
344 #define ARM_SMMU_FEAT_FMT_AARCH64_64K (1 << 9)
345 #define ARM_SMMU_FEAT_FMT_AARCH32_L (1 << 10)
346 #define ARM_SMMU_FEAT_FMT_AARCH32_S (1 << 11)
349 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
351 enum arm_smmu_arch_version version;
352 enum arm_smmu_implementation model;
354 u32 num_context_banks;
355 u32 num_s2_context_banks;
356 DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
359 u32 num_mapping_groups;
362 struct arm_smmu_smr *smrs;
363 struct arm_smmu_s2cr *s2crs;
365 unsigned long va_size;
366 unsigned long ipa_size;
367 unsigned long pa_size;
368 unsigned long pgsize_bitmap;
371 u32 num_context_irqs;
374 u32 cavium_id_base; /* Specific to Cavium */
377 enum arm_smmu_context_fmt {
378 ARM_SMMU_CTX_FMT_NONE,
379 ARM_SMMU_CTX_FMT_AARCH64,
380 ARM_SMMU_CTX_FMT_AARCH32_L,
381 ARM_SMMU_CTX_FMT_AARCH32_S,
384 struct arm_smmu_cfg {
388 enum arm_smmu_context_fmt fmt;
390 #define INVALID_IRPTNDX 0xff
392 #define ARM_SMMU_CB_ASID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->cbndx)
393 #define ARM_SMMU_CB_VMID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->cbndx + 1)
395 enum arm_smmu_domain_stage {
396 ARM_SMMU_DOMAIN_S1 = 0,
398 ARM_SMMU_DOMAIN_NESTED,
401 struct arm_smmu_domain {
402 struct arm_smmu_device *smmu;
403 struct io_pgtable_ops *pgtbl_ops;
404 spinlock_t pgtbl_lock;
405 struct arm_smmu_cfg cfg;
406 enum arm_smmu_domain_stage stage;
407 struct mutex init_mutex; /* Protects smmu pointer */
408 struct iommu_domain domain;
411 struct arm_smmu_option_prop {
416 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
418 static struct arm_smmu_option_prop arm_smmu_options[] = {
419 { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
423 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
425 return container_of(dom, struct arm_smmu_domain, domain);
428 static void parse_driver_options(struct arm_smmu_device *smmu)
433 if (of_property_read_bool(smmu->dev->of_node,
434 arm_smmu_options[i].prop)) {
435 smmu->options |= arm_smmu_options[i].opt;
436 dev_notice(smmu->dev, "option %s\n",
437 arm_smmu_options[i].prop);
439 } while (arm_smmu_options[++i].opt);
442 static struct device_node *dev_get_dev_node(struct device *dev)
444 if (dev_is_pci(dev)) {
445 struct pci_bus *bus = to_pci_dev(dev)->bus;
447 while (!pci_is_root_bus(bus))
449 return of_node_get(bus->bridge->parent->of_node);
452 return of_node_get(dev->of_node);
455 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
457 *((__be32 *)data) = cpu_to_be32(alias);
458 return 0; /* Continue walking */
461 static int __find_legacy_master_phandle(struct device *dev, void *data)
463 struct of_phandle_iterator *it = *(void **)data;
464 struct device_node *np = it->node;
467 of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
468 "#stream-id-cells", 0)
469 if (it->node == np) {
470 *(void **)data = dev;
474 return err == -ENOENT ? 0 : err;
477 static struct platform_driver arm_smmu_driver;
479 static int arm_smmu_register_legacy_master(struct device *dev)
481 struct arm_smmu_device *smmu;
482 struct arm_smmu_master_cfg *cfg;
483 struct device_node *np;
484 struct of_phandle_iterator it;
489 np = dev_get_dev_node(dev);
490 if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
496 err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
497 __find_legacy_master_phandle);
504 smmu = dev_get_drvdata(data);
506 if (it.cur_count > MAX_MASTER_STREAMIDS) {
508 "reached maximum number (%d) of stream IDs for master device %s\n",
509 MAX_MASTER_STREAMIDS, dev_name(dev));
512 if (dev_is_pci(dev)) {
513 /* "mmu-masters" assumes Stream ID == Requester ID */
514 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
520 cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
525 dev->archdata.iommu = cfg;
527 while (it.cur_count--)
528 cfg->streamids[cfg->num_streamids++] = be32_to_cpup(it.cur++);
533 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
538 idx = find_next_zero_bit(map, end, start);
541 } while (test_and_set_bit(idx, map));
546 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
551 /* Wait for any pending TLB invalidations to complete */
552 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
555 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
557 writel_relaxed(0, gr0_base + ARM_SMMU_GR0_sTLBGSYNC);
558 while (readl_relaxed(gr0_base + ARM_SMMU_GR0_sTLBGSTATUS)
559 & sTLBGSTATUS_GSACTIVE) {
561 if (++count == TLB_LOOP_TIMEOUT) {
562 dev_err_ratelimited(smmu->dev,
563 "TLB sync timed out -- SMMU may be deadlocked\n");
570 static void arm_smmu_tlb_sync(void *cookie)
572 struct arm_smmu_domain *smmu_domain = cookie;
573 __arm_smmu_tlb_sync(smmu_domain->smmu);
576 static void arm_smmu_tlb_inv_context(void *cookie)
578 struct arm_smmu_domain *smmu_domain = cookie;
579 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
580 struct arm_smmu_device *smmu = smmu_domain->smmu;
581 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
585 base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
586 writel_relaxed(ARM_SMMU_CB_ASID(smmu, cfg),
587 base + ARM_SMMU_CB_S1_TLBIASID);
589 base = ARM_SMMU_GR0(smmu);
590 writel_relaxed(ARM_SMMU_CB_VMID(smmu, cfg),
591 base + ARM_SMMU_GR0_TLBIVMID);
594 __arm_smmu_tlb_sync(smmu);
597 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
598 size_t granule, bool leaf, void *cookie)
600 struct arm_smmu_domain *smmu_domain = cookie;
601 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
602 struct arm_smmu_device *smmu = smmu_domain->smmu;
603 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
607 reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
608 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
610 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
612 iova |= ARM_SMMU_CB_ASID(smmu, cfg);
614 writel_relaxed(iova, reg);
616 } while (size -= granule);
619 iova |= (u64)ARM_SMMU_CB_ASID(smmu, cfg) << 48;
621 writeq_relaxed(iova, reg);
622 iova += granule >> 12;
623 } while (size -= granule);
625 } else if (smmu->version == ARM_SMMU_V2) {
626 reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
627 reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
628 ARM_SMMU_CB_S2_TLBIIPAS2;
631 smmu_write_atomic_lq(iova, reg);
632 iova += granule >> 12;
633 } while (size -= granule);
635 reg = ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_TLBIVMID;
636 writel_relaxed(ARM_SMMU_CB_VMID(smmu, cfg), reg);
640 static struct iommu_gather_ops arm_smmu_gather_ops = {
641 .tlb_flush_all = arm_smmu_tlb_inv_context,
642 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
643 .tlb_sync = arm_smmu_tlb_sync,
646 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
650 struct iommu_domain *domain = dev;
651 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
652 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
653 struct arm_smmu_device *smmu = smmu_domain->smmu;
654 void __iomem *cb_base;
656 cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
657 fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
659 if (!(fsr & FSR_FAULT))
662 fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
663 iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
665 dev_err_ratelimited(smmu->dev,
666 "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
667 fsr, iova, fsynr, cfg->cbndx);
669 writel(fsr, cb_base + ARM_SMMU_CB_FSR);
673 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
675 u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
676 struct arm_smmu_device *smmu = dev;
677 void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
679 gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
680 gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
681 gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
682 gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
687 dev_err_ratelimited(smmu->dev,
688 "Unexpected global fault, this could be serious\n");
689 dev_err_ratelimited(smmu->dev,
690 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
691 gfsr, gfsynr0, gfsynr1, gfsynr2);
693 writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
697 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
698 struct io_pgtable_cfg *pgtbl_cfg)
703 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
704 struct arm_smmu_device *smmu = smmu_domain->smmu;
705 void __iomem *cb_base, *gr1_base;
707 gr1_base = ARM_SMMU_GR1(smmu);
708 stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
709 cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
711 if (smmu->version > ARM_SMMU_V1) {
712 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
713 reg = CBA2R_RW64_64BIT;
715 reg = CBA2R_RW64_32BIT;
716 /* 16-bit VMIDs live in CBA2R */
717 if (smmu->features & ARM_SMMU_FEAT_VMID16)
718 reg |= ARM_SMMU_CB_VMID(smmu, cfg) << CBA2R_VMID_SHIFT;
720 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(cfg->cbndx));
725 if (smmu->version < ARM_SMMU_V2)
726 reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
729 * Use the weakest shareability/memory types, so they are
730 * overridden by the ttbcr/pte.
733 reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
734 (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
735 } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
736 /* 8-bit VMIDs live in CBAR */
737 reg |= ARM_SMMU_CB_VMID(smmu, cfg) << CBAR_VMID_SHIFT;
739 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(cfg->cbndx));
743 u16 asid = ARM_SMMU_CB_ASID(smmu, cfg);
745 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
746 reg = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
747 writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0);
748 reg = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
749 writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1);
750 writel_relaxed(asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
752 reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
753 reg64 |= (u64)asid << TTBRn_ASID_SHIFT;
754 writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0);
755 reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
756 reg64 |= (u64)asid << TTBRn_ASID_SHIFT;
757 writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR1);
760 reg64 = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
761 writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0);
766 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
767 reg = pgtbl_cfg->arm_v7s_cfg.tcr;
770 reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
771 reg2 = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
772 reg2 |= TTBCR2_SEP_UPSTREAM;
774 if (smmu->version > ARM_SMMU_V1)
775 writel_relaxed(reg2, cb_base + ARM_SMMU_CB_TTBCR2);
777 reg = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
779 writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR);
781 /* MAIRs (stage-1 only) */
783 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
784 reg = pgtbl_cfg->arm_v7s_cfg.prrr;
785 reg2 = pgtbl_cfg->arm_v7s_cfg.nmrr;
787 reg = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
788 reg2 = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
790 writel_relaxed(reg, cb_base + ARM_SMMU_CB_S1_MAIR0);
791 writel_relaxed(reg2, cb_base + ARM_SMMU_CB_S1_MAIR1);
795 reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
797 reg |= SCTLR_S1_ASIDPNE;
801 writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
804 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
805 struct arm_smmu_device *smmu)
807 int irq, start, ret = 0;
808 unsigned long ias, oas;
809 struct io_pgtable_ops *pgtbl_ops;
810 struct io_pgtable_cfg pgtbl_cfg;
811 enum io_pgtable_fmt fmt;
812 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
813 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
815 mutex_lock(&smmu_domain->init_mutex);
816 if (smmu_domain->smmu)
819 /* We're bypassing these SIDs, so don't allocate an actual context */
820 if (domain->type == IOMMU_DOMAIN_DMA) {
821 smmu_domain->smmu = smmu;
826 * Mapping the requested stage onto what we support is surprisingly
827 * complicated, mainly because the spec allows S1+S2 SMMUs without
828 * support for nested translation. That means we end up with the
831 * Requested Supported Actual
841 * Note that you can't actually request stage-2 mappings.
843 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
844 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
845 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
846 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
849 * Choosing a suitable context format is even more fiddly. Until we
850 * grow some way for the caller to express a preference, and/or move
851 * the decision into the io-pgtable code where it arguably belongs,
852 * just aim for the closest thing to the rest of the system, and hope
853 * that the hardware isn't esoteric enough that we can't assume AArch64
854 * support to be a superset of AArch32 support...
856 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
857 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
858 if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
859 !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
860 (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
861 (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
862 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
863 if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
864 (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
865 ARM_SMMU_FEAT_FMT_AARCH64_16K |
866 ARM_SMMU_FEAT_FMT_AARCH64_4K)))
867 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
869 if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
874 switch (smmu_domain->stage) {
875 case ARM_SMMU_DOMAIN_S1:
876 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
877 start = smmu->num_s2_context_banks;
879 oas = smmu->ipa_size;
880 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
881 fmt = ARM_64_LPAE_S1;
882 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
883 fmt = ARM_32_LPAE_S1;
884 ias = min(ias, 32UL);
885 oas = min(oas, 40UL);
888 ias = min(ias, 32UL);
889 oas = min(oas, 32UL);
892 case ARM_SMMU_DOMAIN_NESTED:
894 * We will likely want to change this if/when KVM gets
897 case ARM_SMMU_DOMAIN_S2:
898 cfg->cbar = CBAR_TYPE_S2_TRANS;
900 ias = smmu->ipa_size;
902 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
903 fmt = ARM_64_LPAE_S2;
905 fmt = ARM_32_LPAE_S2;
906 ias = min(ias, 40UL);
907 oas = min(oas, 40UL);
915 ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
916 smmu->num_context_banks);
921 if (smmu->version < ARM_SMMU_V2) {
922 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
923 cfg->irptndx %= smmu->num_context_irqs;
925 cfg->irptndx = cfg->cbndx;
928 pgtbl_cfg = (struct io_pgtable_cfg) {
929 .pgsize_bitmap = smmu->pgsize_bitmap,
932 .tlb = &arm_smmu_gather_ops,
933 .iommu_dev = smmu->dev,
936 smmu_domain->smmu = smmu;
937 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
943 /* Update the domain's page sizes to reflect the page table format */
944 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
946 /* Initialise the context bank with our page table cfg */
947 arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
950 * Request context fault interrupt. Do this last to avoid the
951 * handler seeing a half-initialised domain state.
953 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
954 ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
955 IRQF_SHARED, "arm-smmu-context-fault", domain);
957 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
959 cfg->irptndx = INVALID_IRPTNDX;
962 mutex_unlock(&smmu_domain->init_mutex);
964 /* Publish page table ops for map/unmap */
965 smmu_domain->pgtbl_ops = pgtbl_ops;
969 smmu_domain->smmu = NULL;
971 mutex_unlock(&smmu_domain->init_mutex);
975 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
977 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
978 struct arm_smmu_device *smmu = smmu_domain->smmu;
979 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
980 void __iomem *cb_base;
983 if (!smmu || domain->type == IOMMU_DOMAIN_DMA)
987 * Disable the context bank and free the page tables before freeing
990 cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
991 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
993 if (cfg->irptndx != INVALID_IRPTNDX) {
994 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
995 devm_free_irq(smmu->dev, irq, domain);
998 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
999 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
1002 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1004 struct arm_smmu_domain *smmu_domain;
1006 if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
1009 * Allocate the domain and initialise some of its data structures.
1010 * We can't really do anything meaningful until we've added a
1013 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1017 if (type == IOMMU_DOMAIN_DMA &&
1018 iommu_get_dma_cookie(&smmu_domain->domain)) {
1023 mutex_init(&smmu_domain->init_mutex);
1024 spin_lock_init(&smmu_domain->pgtbl_lock);
1026 return &smmu_domain->domain;
1029 static void arm_smmu_domain_free(struct iommu_domain *domain)
1031 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1034 * Free the domain resources. We assume that all devices have
1035 * already been detached.
1037 iommu_put_dma_cookie(domain);
1038 arm_smmu_destroy_domain_context(domain);
1042 static int arm_smmu_alloc_smr(struct arm_smmu_device *smmu)
1046 for (i = 0; i < smmu->num_mapping_groups; i++)
1047 if (!cmpxchg(&smmu->smrs[i].valid, false, true))
1050 return INVALID_SMENDX;
1053 static void arm_smmu_free_smr(struct arm_smmu_device *smmu, int idx)
1055 writel_relaxed(~SMR_VALID, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
1056 WRITE_ONCE(smmu->smrs[idx].valid, false);
1059 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
1061 struct arm_smmu_smr *smr = smmu->smrs + idx;
1062 u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
1066 writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
1069 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
1071 struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
1072 u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
1073 (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
1074 (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
1076 writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
1079 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
1081 arm_smmu_write_s2cr(smmu, idx);
1083 arm_smmu_write_smr(smmu, idx);
1086 static int arm_smmu_master_alloc_smes(struct arm_smmu_device *smmu,
1087 struct arm_smmu_master_cfg *cfg)
1089 struct arm_smmu_smr *smrs = smmu->smrs;
1092 /* Allocate the SMRs on the SMMU */
1093 for (i = 0; i < cfg->num_streamids; ++i) {
1094 if (cfg->smendx[i] != INVALID_SMENDX)
1097 /* ...except on stream indexing hardware, of course */
1099 cfg->smendx[i] = cfg->streamids[i];
1103 idx = arm_smmu_alloc_smr(smmu);
1105 dev_err(smmu->dev, "failed to allocate free SMR\n");
1108 cfg->smendx[i] = idx;
1110 smrs[idx].id = cfg->streamids[i];
1111 smrs[idx].mask = 0; /* We don't currently share SMRs */
1117 /* It worked! Now, poke the actual hardware */
1118 for (i = 0; i < cfg->num_streamids; ++i)
1119 arm_smmu_write_smr(smmu, cfg->smendx[i]);
1125 arm_smmu_free_smr(smmu, cfg->smendx[i]);
1126 cfg->smendx[i] = INVALID_SMENDX;
1131 static void arm_smmu_master_free_smes(struct arm_smmu_master_cfg *cfg)
1133 struct arm_smmu_device *smmu = cfg->smmu;
1137 * We *must* clear the S2CR first, because freeing the SMR means
1138 * that it can be re-allocated immediately.
1140 for (i = 0; i < cfg->num_streamids; ++i) {
1141 int idx = cfg->smendx[i];
1143 /* An IOMMU group is torn down by the first device to be removed */
1144 if (idx == INVALID_SMENDX)
1147 smmu->s2crs[idx] = s2cr_init_val;
1148 arm_smmu_write_s2cr(smmu, idx);
1150 /* Sync S2CR updates before touching anything else */
1153 /* Invalidate the SMRs before freeing back to the allocator */
1154 for (i = 0; i < cfg->num_streamids; ++i) {
1156 arm_smmu_free_smr(smmu, cfg->smendx[i]);
1158 cfg->smendx[i] = INVALID_SMENDX;
1162 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1163 struct arm_smmu_master_cfg *cfg)
1166 struct arm_smmu_device *smmu = smmu_domain->smmu;
1167 struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1168 enum arm_smmu_s2cr_type type = S2CR_TYPE_TRANS;
1169 u8 cbndx = smmu_domain->cfg.cbndx;
1171 if (cfg->smendx[0] == INVALID_SMENDX)
1172 ret = arm_smmu_master_alloc_smes(smmu, cfg);
1177 * FIXME: This won't be needed once we have IOMMU-backed DMA ops
1178 * for all devices behind the SMMU. Note that we need to take
1179 * care configuring SMRs for devices both a platform_device and
1180 * and a PCI device (i.e. a PCI host controller)
1182 if (smmu_domain->domain.type == IOMMU_DOMAIN_DMA)
1183 type = S2CR_TYPE_BYPASS;
1185 for (i = 0; i < cfg->num_streamids; ++i) {
1186 int idx = cfg->smendx[i];
1188 /* Devices in an IOMMU group may already be configured */
1189 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1192 s2cr[idx].type = type;
1193 s2cr[idx].privcfg = S2CR_PRIVCFG_UNPRIV;
1194 s2cr[idx].cbndx = cbndx;
1195 arm_smmu_write_s2cr(smmu, idx);
1200 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1203 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1204 struct arm_smmu_master_cfg *cfg = dev->archdata.iommu;
1207 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1211 /* Ensure that the domain is finalised */
1212 ret = arm_smmu_init_domain_context(domain, cfg->smmu);
1217 * Sanity check the domain. We don't support domains across
1220 if (smmu_domain->smmu != cfg->smmu) {
1222 "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1223 dev_name(smmu_domain->smmu->dev), dev_name(cfg->smmu->dev));
1227 /* Looks ok, so add the device to the domain */
1228 return arm_smmu_domain_add_master(smmu_domain, cfg);
1231 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1232 phys_addr_t paddr, size_t size, int prot)
1235 unsigned long flags;
1236 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1237 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1242 spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1243 ret = ops->map(ops, iova, paddr, size, prot);
1244 spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1248 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1252 unsigned long flags;
1253 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1254 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1259 spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1260 ret = ops->unmap(ops, iova, size);
1261 spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1265 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1268 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1269 struct arm_smmu_device *smmu = smmu_domain->smmu;
1270 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1271 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1272 struct device *dev = smmu->dev;
1273 void __iomem *cb_base;
1278 cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
1280 /* ATS1 registers can only be written atomically */
1281 va = iova & ~0xfffUL;
1282 if (smmu->version == ARM_SMMU_V2)
1283 smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
1284 else /* Register is only 32-bit in v1 */
1285 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1287 if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1288 !(tmp & ATSR_ACTIVE), 5, 50)) {
1290 "iova to phys timed out on %pad. Falling back to software table walk.\n",
1292 return ops->iova_to_phys(ops, iova);
1295 phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1296 if (phys & CB_PAR_F) {
1297 dev_err(dev, "translation fault!\n");
1298 dev_err(dev, "PAR = 0x%llx\n", phys);
1302 return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1305 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1309 unsigned long flags;
1310 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1311 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1316 spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1317 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1318 smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1319 ret = arm_smmu_iova_to_phys_hard(domain, iova);
1321 ret = ops->iova_to_phys(ops, iova);
1324 spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1329 static bool arm_smmu_capable(enum iommu_cap cap)
1332 case IOMMU_CAP_CACHE_COHERENCY:
1334 * Return true here as the SMMU can always send out coherent
1338 case IOMMU_CAP_INTR_REMAP:
1339 return true; /* MSIs are just memory writes */
1340 case IOMMU_CAP_NOEXEC:
1347 static int arm_smmu_add_device(struct device *dev)
1349 struct arm_smmu_master_cfg *cfg;
1350 struct iommu_group *group;
1353 ret = arm_smmu_register_legacy_master(dev);
1354 cfg = dev->archdata.iommu;
1359 for (i = 0; i < cfg->num_streamids; i++) {
1360 u16 sid = cfg->streamids[i];
1362 if (sid & ~cfg->smmu->streamid_mask) {
1363 dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1364 sid, cfg->smmu->streamid_mask);
1367 cfg->smendx[i] = INVALID_SMENDX;
1370 group = iommu_group_get_for_dev(dev);
1371 if (IS_ERR(group)) {
1372 ret = PTR_ERR(group);
1375 iommu_group_put(group);
1380 dev->archdata.iommu = NULL;
1384 static void arm_smmu_remove_device(struct device *dev)
1386 struct arm_smmu_master_cfg *cfg = dev->archdata.iommu;
1391 arm_smmu_master_free_smes(cfg);
1392 iommu_group_remove_device(dev);
1394 dev->archdata.iommu = NULL;
1397 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1399 struct iommu_group *group;
1401 if (dev_is_pci(dev))
1402 group = pci_device_group(dev);
1404 group = generic_device_group(dev);
1409 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1410 enum iommu_attr attr, void *data)
1412 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1415 case DOMAIN_ATTR_NESTING:
1416 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1423 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1424 enum iommu_attr attr, void *data)
1427 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1429 mutex_lock(&smmu_domain->init_mutex);
1432 case DOMAIN_ATTR_NESTING:
1433 if (smmu_domain->smmu) {
1439 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1441 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1449 mutex_unlock(&smmu_domain->init_mutex);
1453 static struct iommu_ops arm_smmu_ops = {
1454 .capable = arm_smmu_capable,
1455 .domain_alloc = arm_smmu_domain_alloc,
1456 .domain_free = arm_smmu_domain_free,
1457 .attach_dev = arm_smmu_attach_dev,
1458 .map = arm_smmu_map,
1459 .unmap = arm_smmu_unmap,
1460 .map_sg = default_iommu_map_sg,
1461 .iova_to_phys = arm_smmu_iova_to_phys,
1462 .add_device = arm_smmu_add_device,
1463 .remove_device = arm_smmu_remove_device,
1464 .device_group = arm_smmu_device_group,
1465 .domain_get_attr = arm_smmu_domain_get_attr,
1466 .domain_set_attr = arm_smmu_domain_set_attr,
1467 .pgsize_bitmap = -1UL, /* Restricted during device attach */
1470 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1472 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1473 void __iomem *cb_base;
1477 /* clear global FSR */
1478 reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1479 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1482 * Reset stream mapping groups: Initial values mark all SMRn as
1483 * invalid and all S2CRn as bypass unless overridden.
1485 for (i = 0; i < smmu->num_mapping_groups; ++i)
1486 arm_smmu_write_sme(smmu, i);
1489 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1490 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1491 * bit is only present in MMU-500r2 onwards.
1493 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1494 major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
1495 if ((smmu->model == ARM_MMU500) && (major >= 2)) {
1496 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1497 reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1498 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1501 /* Make sure all context banks are disabled and clear CB_FSR */
1502 for (i = 0; i < smmu->num_context_banks; ++i) {
1503 cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, i);
1504 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
1505 writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1507 * Disable MMU-500's not-particularly-beneficial next-page
1508 * prefetcher for the sake of errata #841119 and #826419.
1510 if (smmu->model == ARM_MMU500) {
1511 reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
1512 reg &= ~ARM_MMU500_ACTLR_CPRE;
1513 writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
1517 /* Invalidate the TLB, just in case */
1518 writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1519 writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1521 reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1523 /* Enable fault reporting */
1524 reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1526 /* Disable TLB broadcasting. */
1527 reg |= (sCR0_VMIDPNE | sCR0_PTM);
1529 /* Enable client access, handling unmatched streams as appropriate */
1530 reg &= ~sCR0_CLIENTPD;
1534 reg &= ~sCR0_USFCFG;
1536 /* Disable forced broadcasting */
1539 /* Don't upgrade barriers */
1540 reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1542 if (smmu->features & ARM_SMMU_FEAT_VMID16)
1543 reg |= sCR0_VMID16EN;
1545 /* Push the button */
1546 __arm_smmu_tlb_sync(smmu);
1547 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1550 static int arm_smmu_id_size_to_bits(int size)
1569 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1572 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1574 bool cttw_dt, cttw_reg;
1577 dev_notice(smmu->dev, "probing hardware configuration...\n");
1578 dev_notice(smmu->dev, "SMMUv%d with:\n",
1579 smmu->version == ARM_SMMU_V2 ? 2 : 1);
1582 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1584 /* Restrict available stages based on module parameter */
1585 if (force_stage == 1)
1586 id &= ~(ID0_S2TS | ID0_NTS);
1587 else if (force_stage == 2)
1588 id &= ~(ID0_S1TS | ID0_NTS);
1590 if (id & ID0_S1TS) {
1591 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1592 dev_notice(smmu->dev, "\tstage 1 translation\n");
1595 if (id & ID0_S2TS) {
1596 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1597 dev_notice(smmu->dev, "\tstage 2 translation\n");
1601 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1602 dev_notice(smmu->dev, "\tnested translation\n");
1605 if (!(smmu->features &
1606 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1607 dev_err(smmu->dev, "\tno translation support!\n");
1611 if ((id & ID0_S1TS) &&
1612 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1613 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1614 dev_notice(smmu->dev, "\taddress translation ops\n");
1618 * In order for DMA API calls to work properly, we must defer to what
1619 * the DT says about coherency, regardless of what the hardware claims.
1620 * Fortunately, this also opens up a workaround for systems where the
1621 * ID register value has ended up configured incorrectly.
1623 cttw_dt = of_dma_is_coherent(smmu->dev->of_node);
1624 cttw_reg = !!(id & ID0_CTTW);
1626 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1627 if (cttw_dt || cttw_reg)
1628 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1629 cttw_dt ? "" : "non-");
1630 if (cttw_dt != cttw_reg)
1631 dev_notice(smmu->dev,
1632 "\t(IDR0.CTTW overridden by dma-coherent property)\n");
1634 /* Max. number of entries we have for stream matching/indexing */
1635 size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
1636 smmu->streamid_mask = size - 1;
1640 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1641 size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
1644 "stream-matching supported, but no SMRs present!\n");
1649 * SMR.ID bits may not be preserved if the corresponding MASK
1650 * bits are set, so check each one separately. We can reject
1651 * masters later if they try to claim IDs outside these masks.
1653 smr = smmu->streamid_mask << SMR_ID_SHIFT;
1654 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1655 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1656 smmu->streamid_mask = smr >> SMR_ID_SHIFT;
1658 smr = smmu->streamid_mask << SMR_MASK_SHIFT;
1659 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1660 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1661 smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
1663 /* Zero-initialised to mark as invalid */
1664 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1669 dev_notice(smmu->dev,
1670 "\tstream matching with %lu register groups, mask 0x%x",
1671 size, smmu->smr_mask_mask);
1673 /* s2cr->type == 0 means translation, so initialise explicitly */
1674 smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1678 for (i = 0; i < size; i++)
1679 smmu->s2crs[i] = s2cr_init_val;
1681 smmu->num_mapping_groups = size;
1683 if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1684 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1685 if (!(id & ID0_PTFS_NO_AARCH32S))
1686 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1690 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1691 smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1693 /* Check for size mismatch of SMMU address space from mapped region */
1694 size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1695 size *= 2 << smmu->pgshift;
1696 if (smmu->size != size)
1698 "SMMU address space size (0x%lx) differs from mapped region size (0x%lx)!\n",
1701 smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
1702 smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
1703 if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1704 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1707 dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1708 smmu->num_context_banks, smmu->num_s2_context_banks);
1710 * Cavium CN88xx erratum #27704.
1711 * Ensure ASID and VMID allocation is unique across all SMMUs in
1714 if (smmu->model == CAVIUM_SMMUV2) {
1715 smmu->cavium_id_base =
1716 atomic_add_return(smmu->num_context_banks,
1717 &cavium_smmu_context_count);
1718 smmu->cavium_id_base -= smmu->num_context_banks;
1722 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1723 size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1724 smmu->ipa_size = size;
1726 /* The output mask is also applied for bypass */
1727 size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1728 smmu->pa_size = size;
1730 if (id & ID2_VMID16)
1731 smmu->features |= ARM_SMMU_FEAT_VMID16;
1734 * What the page table walker can address actually depends on which
1735 * descriptor format is in use, but since a) we don't know that yet,
1736 * and b) it can vary per context bank, this will have to do...
1738 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1740 "failed to set DMA mask for table walker\n");
1742 if (smmu->version < ARM_SMMU_V2) {
1743 smmu->va_size = smmu->ipa_size;
1744 if (smmu->version == ARM_SMMU_V1_64K)
1745 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1747 size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
1748 smmu->va_size = arm_smmu_id_size_to_bits(size);
1749 if (id & ID2_PTFS_4K)
1750 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1751 if (id & ID2_PTFS_16K)
1752 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1753 if (id & ID2_PTFS_64K)
1754 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1757 /* Now we've corralled the various formats, what'll it do? */
1758 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1759 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1760 if (smmu->features &
1761 (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1762 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1763 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1764 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1765 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1766 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1768 if (arm_smmu_ops.pgsize_bitmap == -1UL)
1769 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1771 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1772 dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1773 smmu->pgsize_bitmap);
1776 if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1777 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1778 smmu->va_size, smmu->ipa_size);
1780 if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1781 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1782 smmu->ipa_size, smmu->pa_size);
1787 struct arm_smmu_match_data {
1788 enum arm_smmu_arch_version version;
1789 enum arm_smmu_implementation model;
1792 #define ARM_SMMU_MATCH_DATA(name, ver, imp) \
1793 static struct arm_smmu_match_data name = { .version = ver, .model = imp }
1795 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1796 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1797 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1798 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1799 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1801 static const struct of_device_id arm_smmu_of_match[] = {
1802 { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1803 { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1804 { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1805 { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1806 { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1807 { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1810 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1812 static int arm_smmu_device_dt_probe(struct platform_device *pdev)
1814 const struct arm_smmu_match_data *data;
1815 struct resource *res;
1816 struct arm_smmu_device *smmu;
1817 struct device *dev = &pdev->dev;
1818 int num_irqs, i, err;
1820 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
1822 dev_err(dev, "failed to allocate arm_smmu_device\n");
1827 data = of_device_get_match_data(dev);
1828 smmu->version = data->version;
1829 smmu->model = data->model;
1831 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1832 smmu->base = devm_ioremap_resource(dev, res);
1833 if (IS_ERR(smmu->base))
1834 return PTR_ERR(smmu->base);
1835 smmu->size = resource_size(res);
1837 if (of_property_read_u32(dev->of_node, "#global-interrupts",
1838 &smmu->num_global_irqs)) {
1839 dev_err(dev, "missing #global-interrupts property\n");
1844 while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
1846 if (num_irqs > smmu->num_global_irqs)
1847 smmu->num_context_irqs++;
1850 if (!smmu->num_context_irqs) {
1851 dev_err(dev, "found %d interrupts but expected at least %d\n",
1852 num_irqs, smmu->num_global_irqs + 1);
1856 smmu->irqs = devm_kzalloc(dev, sizeof(*smmu->irqs) * num_irqs,
1859 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
1863 for (i = 0; i < num_irqs; ++i) {
1864 int irq = platform_get_irq(pdev, i);
1867 dev_err(dev, "failed to get irq index %d\n", i);
1870 smmu->irqs[i] = irq;
1873 err = arm_smmu_device_cfg_probe(smmu);
1877 parse_driver_options(smmu);
1879 if (smmu->version == ARM_SMMU_V2 &&
1880 smmu->num_context_banks != smmu->num_context_irqs) {
1882 "found only %d context interrupt(s) but %d required\n",
1883 smmu->num_context_irqs, smmu->num_context_banks);
1887 for (i = 0; i < smmu->num_global_irqs; ++i) {
1888 err = devm_request_irq(smmu->dev, smmu->irqs[i],
1889 arm_smmu_global_fault,
1891 "arm-smmu global fault",
1894 dev_err(dev, "failed to request global IRQ %d (%u)\n",
1900 platform_set_drvdata(pdev, smmu);
1901 arm_smmu_device_reset(smmu);
1905 static int arm_smmu_device_remove(struct platform_device *pdev)
1907 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
1912 if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
1913 dev_err(&pdev->dev, "removing device with active domains!\n");
1915 /* Turn the thing off */
1916 writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1920 static struct platform_driver arm_smmu_driver = {
1923 .of_match_table = of_match_ptr(arm_smmu_of_match),
1925 .probe = arm_smmu_device_dt_probe,
1926 .remove = arm_smmu_device_remove,
1929 static int __init arm_smmu_init(void)
1931 struct device_node *np;
1935 * Play nice with systems that don't have an ARM SMMU by checking that
1936 * an ARM SMMU exists in the system before proceeding with the driver
1937 * and IOMMU bus operation registration.
1939 np = of_find_matching_node(NULL, arm_smmu_of_match);
1945 ret = platform_driver_register(&arm_smmu_driver);
1949 /* Oh, for a proper bus abstraction */
1950 if (!iommu_present(&platform_bus_type))
1951 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
1953 #ifdef CONFIG_ARM_AMBA
1954 if (!iommu_present(&amba_bustype))
1955 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
1959 if (!iommu_present(&pci_bus_type)) {
1961 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
1968 static void __exit arm_smmu_exit(void)
1970 return platform_driver_unregister(&arm_smmu_driver);
1973 subsys_initcall(arm_smmu_init);
1974 module_exit(arm_smmu_exit);
1976 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
1977 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
1978 MODULE_LICENSE("GPL v2");