2 * IOMMU API for ARM architected SMMU implementations.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 * Copyright (C) 2013 ARM Limited
19 * Author: Will Deacon <will.deacon@arm.com>
21 * This driver currently supports:
22 * - SMMUv1 and v2 implementations
23 * - Stream-matching and stream-indexing
24 * - v7/v8 long-descriptor format
25 * - Non-secure access to the SMMU
26 * - Context fault reporting
29 #define pr_fmt(fmt) "arm-smmu: " fmt
31 #include <linux/atomic.h>
32 #include <linux/delay.h>
33 #include <linux/dma-iommu.h>
34 #include <linux/dma-mapping.h>
35 #include <linux/err.h>
36 #include <linux/interrupt.h>
38 #include <linux/io-64-nonatomic-hi-lo.h>
39 #include <linux/iommu.h>
40 #include <linux/iopoll.h>
41 #include <linux/module.h>
43 #include <linux/of_address.h>
44 #include <linux/of_device.h>
45 #include <linux/pci.h>
46 #include <linux/platform_device.h>
47 #include <linux/slab.h>
48 #include <linux/spinlock.h>
50 #include <linux/amba/bus.h>
52 #include "io-pgtable.h"
54 /* Maximum number of stream IDs assigned to a single device */
55 #define MAX_MASTER_STREAMIDS 128
57 /* Maximum number of context banks per SMMU */
58 #define ARM_SMMU_MAX_CBS 128
60 /* SMMU global address space */
61 #define ARM_SMMU_GR0(smmu) ((smmu)->base)
62 #define ARM_SMMU_GR1(smmu) ((smmu)->base + (1 << (smmu)->pgshift))
65 * SMMU global address space with conditional offset to access secure
66 * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
69 #define ARM_SMMU_GR0_NS(smmu) \
71 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) \
75 * Some 64-bit registers only make sense to write atomically, but in such
76 * cases all the data relevant to AArch32 formats lies within the lower word,
77 * therefore this actually makes more sense than it might first appear.
80 #define smmu_write_atomic_lq writeq_relaxed
82 #define smmu_write_atomic_lq writel_relaxed
85 /* Configuration registers */
86 #define ARM_SMMU_GR0_sCR0 0x0
87 #define sCR0_CLIENTPD (1 << 0)
88 #define sCR0_GFRE (1 << 1)
89 #define sCR0_GFIE (1 << 2)
90 #define sCR0_GCFGFRE (1 << 4)
91 #define sCR0_GCFGFIE (1 << 5)
92 #define sCR0_USFCFG (1 << 10)
93 #define sCR0_VMIDPNE (1 << 11)
94 #define sCR0_PTM (1 << 12)
95 #define sCR0_FB (1 << 13)
96 #define sCR0_VMID16EN (1 << 31)
97 #define sCR0_BSU_SHIFT 14
98 #define sCR0_BSU_MASK 0x3
100 /* Auxiliary Configuration register */
101 #define ARM_SMMU_GR0_sACR 0x10
103 /* Identification registers */
104 #define ARM_SMMU_GR0_ID0 0x20
105 #define ARM_SMMU_GR0_ID1 0x24
106 #define ARM_SMMU_GR0_ID2 0x28
107 #define ARM_SMMU_GR0_ID3 0x2c
108 #define ARM_SMMU_GR0_ID4 0x30
109 #define ARM_SMMU_GR0_ID5 0x34
110 #define ARM_SMMU_GR0_ID6 0x38
111 #define ARM_SMMU_GR0_ID7 0x3c
112 #define ARM_SMMU_GR0_sGFSR 0x48
113 #define ARM_SMMU_GR0_sGFSYNR0 0x50
114 #define ARM_SMMU_GR0_sGFSYNR1 0x54
115 #define ARM_SMMU_GR0_sGFSYNR2 0x58
117 #define ID0_S1TS (1 << 30)
118 #define ID0_S2TS (1 << 29)
119 #define ID0_NTS (1 << 28)
120 #define ID0_SMS (1 << 27)
121 #define ID0_ATOSNS (1 << 26)
122 #define ID0_PTFS_NO_AARCH32 (1 << 25)
123 #define ID0_PTFS_NO_AARCH32S (1 << 24)
124 #define ID0_CTTW (1 << 14)
125 #define ID0_NUMIRPT_SHIFT 16
126 #define ID0_NUMIRPT_MASK 0xff
127 #define ID0_NUMSIDB_SHIFT 9
128 #define ID0_NUMSIDB_MASK 0xf
129 #define ID0_NUMSMRG_SHIFT 0
130 #define ID0_NUMSMRG_MASK 0xff
132 #define ID1_PAGESIZE (1 << 31)
133 #define ID1_NUMPAGENDXB_SHIFT 28
134 #define ID1_NUMPAGENDXB_MASK 7
135 #define ID1_NUMS2CB_SHIFT 16
136 #define ID1_NUMS2CB_MASK 0xff
137 #define ID1_NUMCB_SHIFT 0
138 #define ID1_NUMCB_MASK 0xff
140 #define ID2_OAS_SHIFT 4
141 #define ID2_OAS_MASK 0xf
142 #define ID2_IAS_SHIFT 0
143 #define ID2_IAS_MASK 0xf
144 #define ID2_UBS_SHIFT 8
145 #define ID2_UBS_MASK 0xf
146 #define ID2_PTFS_4K (1 << 12)
147 #define ID2_PTFS_16K (1 << 13)
148 #define ID2_PTFS_64K (1 << 14)
149 #define ID2_VMID16 (1 << 15)
151 #define ID7_MAJOR_SHIFT 4
152 #define ID7_MAJOR_MASK 0xf
154 /* Global TLB invalidation */
155 #define ARM_SMMU_GR0_TLBIVMID 0x64
156 #define ARM_SMMU_GR0_TLBIALLNSNH 0x68
157 #define ARM_SMMU_GR0_TLBIALLH 0x6c
158 #define ARM_SMMU_GR0_sTLBGSYNC 0x70
159 #define ARM_SMMU_GR0_sTLBGSTATUS 0x74
160 #define sTLBGSTATUS_GSACTIVE (1 << 0)
161 #define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
163 /* Stream mapping registers */
164 #define ARM_SMMU_GR0_SMR(n) (0x800 + ((n) << 2))
165 #define SMR_VALID (1 << 31)
166 #define SMR_MASK_SHIFT 16
167 #define SMR_ID_SHIFT 0
169 #define ARM_SMMU_GR0_S2CR(n) (0xc00 + ((n) << 2))
170 #define S2CR_CBNDX_SHIFT 0
171 #define S2CR_CBNDX_MASK 0xff
172 #define S2CR_TYPE_SHIFT 16
173 #define S2CR_TYPE_MASK 0x3
174 enum arm_smmu_s2cr_type {
180 #define S2CR_PRIVCFG_SHIFT 24
181 #define S2CR_PRIVCFG_MASK 0x3
182 enum arm_smmu_s2cr_privcfg {
183 S2CR_PRIVCFG_DEFAULT,
189 /* Context bank attribute registers */
190 #define ARM_SMMU_GR1_CBAR(n) (0x0 + ((n) << 2))
191 #define CBAR_VMID_SHIFT 0
192 #define CBAR_VMID_MASK 0xff
193 #define CBAR_S1_BPSHCFG_SHIFT 8
194 #define CBAR_S1_BPSHCFG_MASK 3
195 #define CBAR_S1_BPSHCFG_NSH 3
196 #define CBAR_S1_MEMATTR_SHIFT 12
197 #define CBAR_S1_MEMATTR_MASK 0xf
198 #define CBAR_S1_MEMATTR_WB 0xf
199 #define CBAR_TYPE_SHIFT 16
200 #define CBAR_TYPE_MASK 0x3
201 #define CBAR_TYPE_S2_TRANS (0 << CBAR_TYPE_SHIFT)
202 #define CBAR_TYPE_S1_TRANS_S2_BYPASS (1 << CBAR_TYPE_SHIFT)
203 #define CBAR_TYPE_S1_TRANS_S2_FAULT (2 << CBAR_TYPE_SHIFT)
204 #define CBAR_TYPE_S1_TRANS_S2_TRANS (3 << CBAR_TYPE_SHIFT)
205 #define CBAR_IRPTNDX_SHIFT 24
206 #define CBAR_IRPTNDX_MASK 0xff
208 #define ARM_SMMU_GR1_CBA2R(n) (0x800 + ((n) << 2))
209 #define CBA2R_RW64_32BIT (0 << 0)
210 #define CBA2R_RW64_64BIT (1 << 0)
211 #define CBA2R_VMID_SHIFT 16
212 #define CBA2R_VMID_MASK 0xffff
214 /* Translation context bank */
215 #define ARM_SMMU_CB_BASE(smmu) ((smmu)->base + ((smmu)->size >> 1))
216 #define ARM_SMMU_CB(smmu, n) ((n) * (1 << (smmu)->pgshift))
218 #define ARM_SMMU_CB_SCTLR 0x0
219 #define ARM_SMMU_CB_ACTLR 0x4
220 #define ARM_SMMU_CB_RESUME 0x8
221 #define ARM_SMMU_CB_TTBCR2 0x10
222 #define ARM_SMMU_CB_TTBR0 0x20
223 #define ARM_SMMU_CB_TTBR1 0x28
224 #define ARM_SMMU_CB_TTBCR 0x30
225 #define ARM_SMMU_CB_CONTEXTIDR 0x34
226 #define ARM_SMMU_CB_S1_MAIR0 0x38
227 #define ARM_SMMU_CB_S1_MAIR1 0x3c
228 #define ARM_SMMU_CB_PAR 0x50
229 #define ARM_SMMU_CB_FSR 0x58
230 #define ARM_SMMU_CB_FAR 0x60
231 #define ARM_SMMU_CB_FSYNR0 0x68
232 #define ARM_SMMU_CB_S1_TLBIVA 0x600
233 #define ARM_SMMU_CB_S1_TLBIASID 0x610
234 #define ARM_SMMU_CB_S1_TLBIVAL 0x620
235 #define ARM_SMMU_CB_S2_TLBIIPAS2 0x630
236 #define ARM_SMMU_CB_S2_TLBIIPAS2L 0x638
237 #define ARM_SMMU_CB_ATS1PR 0x800
238 #define ARM_SMMU_CB_ATSR 0x8f0
240 #define SCTLR_S1_ASIDPNE (1 << 12)
241 #define SCTLR_CFCFG (1 << 7)
242 #define SCTLR_CFIE (1 << 6)
243 #define SCTLR_CFRE (1 << 5)
244 #define SCTLR_E (1 << 4)
245 #define SCTLR_AFE (1 << 2)
246 #define SCTLR_TRE (1 << 1)
247 #define SCTLR_M (1 << 0)
249 #define ARM_MMU500_ACTLR_CPRE (1 << 1)
251 #define ARM_MMU500_ACR_CACHE_LOCK (1 << 26)
253 #define CB_PAR_F (1 << 0)
255 #define ATSR_ACTIVE (1 << 0)
257 #define RESUME_RETRY (0 << 0)
258 #define RESUME_TERMINATE (1 << 0)
260 #define TTBCR2_SEP_SHIFT 15
261 #define TTBCR2_SEP_UPSTREAM (0x7 << TTBCR2_SEP_SHIFT)
263 #define TTBRn_ASID_SHIFT 48
265 #define FSR_MULTI (1 << 31)
266 #define FSR_SS (1 << 30)
267 #define FSR_UUT (1 << 8)
268 #define FSR_ASF (1 << 7)
269 #define FSR_TLBLKF (1 << 6)
270 #define FSR_TLBMCF (1 << 5)
271 #define FSR_EF (1 << 4)
272 #define FSR_PF (1 << 3)
273 #define FSR_AFF (1 << 2)
274 #define FSR_TF (1 << 1)
276 #define FSR_IGN (FSR_AFF | FSR_ASF | \
277 FSR_TLBMCF | FSR_TLBLKF)
278 #define FSR_FAULT (FSR_MULTI | FSR_SS | FSR_UUT | \
279 FSR_EF | FSR_PF | FSR_TF | FSR_IGN)
281 #define FSYNR0_WNR (1 << 4)
283 static int force_stage;
284 module_param(force_stage, int, S_IRUGO);
285 MODULE_PARM_DESC(force_stage,
286 "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
287 static bool disable_bypass;
288 module_param(disable_bypass, bool, S_IRUGO);
289 MODULE_PARM_DESC(disable_bypass,
290 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
292 enum arm_smmu_arch_version {
298 enum arm_smmu_implementation {
304 struct arm_smmu_s2cr {
305 enum arm_smmu_s2cr_type type;
306 enum arm_smmu_s2cr_privcfg privcfg;
310 #define s2cr_init_val (struct arm_smmu_s2cr){ \
311 .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS, \
314 struct arm_smmu_smr {
320 struct arm_smmu_master_cfg {
321 struct arm_smmu_device *smmu;
323 u16 streamids[MAX_MASTER_STREAMIDS];
324 s16 smendx[MAX_MASTER_STREAMIDS];
326 #define INVALID_SMENDX -1
327 #define for_each_cfg_sme(cfg, i, idx) \
328 for (i = 0; idx = cfg->smendx[i], i < cfg->num_streamids; ++i)
330 struct arm_smmu_device {
335 unsigned long pgshift;
337 #define ARM_SMMU_FEAT_COHERENT_WALK (1 << 0)
338 #define ARM_SMMU_FEAT_STREAM_MATCH (1 << 1)
339 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
340 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
341 #define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
342 #define ARM_SMMU_FEAT_TRANS_OPS (1 << 5)
343 #define ARM_SMMU_FEAT_VMID16 (1 << 6)
344 #define ARM_SMMU_FEAT_FMT_AARCH64_4K (1 << 7)
345 #define ARM_SMMU_FEAT_FMT_AARCH64_16K (1 << 8)
346 #define ARM_SMMU_FEAT_FMT_AARCH64_64K (1 << 9)
347 #define ARM_SMMU_FEAT_FMT_AARCH32_L (1 << 10)
348 #define ARM_SMMU_FEAT_FMT_AARCH32_S (1 << 11)
351 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
353 enum arm_smmu_arch_version version;
354 enum arm_smmu_implementation model;
356 u32 num_context_banks;
357 u32 num_s2_context_banks;
358 DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
361 u32 num_mapping_groups;
364 struct arm_smmu_smr *smrs;
365 struct arm_smmu_s2cr *s2crs;
367 unsigned long va_size;
368 unsigned long ipa_size;
369 unsigned long pa_size;
370 unsigned long pgsize_bitmap;
373 u32 num_context_irqs;
376 u32 cavium_id_base; /* Specific to Cavium */
379 enum arm_smmu_context_fmt {
380 ARM_SMMU_CTX_FMT_NONE,
381 ARM_SMMU_CTX_FMT_AARCH64,
382 ARM_SMMU_CTX_FMT_AARCH32_L,
383 ARM_SMMU_CTX_FMT_AARCH32_S,
386 struct arm_smmu_cfg {
390 enum arm_smmu_context_fmt fmt;
392 #define INVALID_IRPTNDX 0xff
394 #define ARM_SMMU_CB_ASID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->cbndx)
395 #define ARM_SMMU_CB_VMID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->cbndx + 1)
397 enum arm_smmu_domain_stage {
398 ARM_SMMU_DOMAIN_S1 = 0,
400 ARM_SMMU_DOMAIN_NESTED,
403 struct arm_smmu_domain {
404 struct arm_smmu_device *smmu;
405 struct io_pgtable_ops *pgtbl_ops;
406 spinlock_t pgtbl_lock;
407 struct arm_smmu_cfg cfg;
408 enum arm_smmu_domain_stage stage;
409 struct mutex init_mutex; /* Protects smmu pointer */
410 struct iommu_domain domain;
413 struct arm_smmu_option_prop {
418 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
420 static struct arm_smmu_option_prop arm_smmu_options[] = {
421 { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
425 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
427 return container_of(dom, struct arm_smmu_domain, domain);
430 static void parse_driver_options(struct arm_smmu_device *smmu)
435 if (of_property_read_bool(smmu->dev->of_node,
436 arm_smmu_options[i].prop)) {
437 smmu->options |= arm_smmu_options[i].opt;
438 dev_notice(smmu->dev, "option %s\n",
439 arm_smmu_options[i].prop);
441 } while (arm_smmu_options[++i].opt);
444 static struct device_node *dev_get_dev_node(struct device *dev)
446 if (dev_is_pci(dev)) {
447 struct pci_bus *bus = to_pci_dev(dev)->bus;
449 while (!pci_is_root_bus(bus))
451 return of_node_get(bus->bridge->parent->of_node);
454 return of_node_get(dev->of_node);
457 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
459 *((__be32 *)data) = cpu_to_be32(alias);
460 return 0; /* Continue walking */
463 static int __find_legacy_master_phandle(struct device *dev, void *data)
465 struct of_phandle_iterator *it = *(void **)data;
466 struct device_node *np = it->node;
469 of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
470 "#stream-id-cells", 0)
471 if (it->node == np) {
472 *(void **)data = dev;
476 return err == -ENOENT ? 0 : err;
479 static struct platform_driver arm_smmu_driver;
481 static int arm_smmu_register_legacy_master(struct device *dev)
483 struct arm_smmu_device *smmu;
484 struct arm_smmu_master_cfg *cfg;
485 struct device_node *np;
486 struct of_phandle_iterator it;
491 np = dev_get_dev_node(dev);
492 if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
498 err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
499 __find_legacy_master_phandle);
506 smmu = dev_get_drvdata(data);
508 if (it.cur_count > MAX_MASTER_STREAMIDS) {
510 "reached maximum number (%d) of stream IDs for master device %s\n",
511 MAX_MASTER_STREAMIDS, dev_name(dev));
514 if (dev_is_pci(dev)) {
515 /* "mmu-masters" assumes Stream ID == Requester ID */
516 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
522 cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
527 dev->archdata.iommu = cfg;
529 while (it.cur_count--)
530 cfg->streamids[cfg->num_streamids++] = be32_to_cpup(it.cur++);
535 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
540 idx = find_next_zero_bit(map, end, start);
543 } while (test_and_set_bit(idx, map));
548 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
553 /* Wait for any pending TLB invalidations to complete */
554 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
557 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
559 writel_relaxed(0, gr0_base + ARM_SMMU_GR0_sTLBGSYNC);
560 while (readl_relaxed(gr0_base + ARM_SMMU_GR0_sTLBGSTATUS)
561 & sTLBGSTATUS_GSACTIVE) {
563 if (++count == TLB_LOOP_TIMEOUT) {
564 dev_err_ratelimited(smmu->dev,
565 "TLB sync timed out -- SMMU may be deadlocked\n");
572 static void arm_smmu_tlb_sync(void *cookie)
574 struct arm_smmu_domain *smmu_domain = cookie;
575 __arm_smmu_tlb_sync(smmu_domain->smmu);
578 static void arm_smmu_tlb_inv_context(void *cookie)
580 struct arm_smmu_domain *smmu_domain = cookie;
581 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
582 struct arm_smmu_device *smmu = smmu_domain->smmu;
583 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
587 base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
588 writel_relaxed(ARM_SMMU_CB_ASID(smmu, cfg),
589 base + ARM_SMMU_CB_S1_TLBIASID);
591 base = ARM_SMMU_GR0(smmu);
592 writel_relaxed(ARM_SMMU_CB_VMID(smmu, cfg),
593 base + ARM_SMMU_GR0_TLBIVMID);
596 __arm_smmu_tlb_sync(smmu);
599 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
600 size_t granule, bool leaf, void *cookie)
602 struct arm_smmu_domain *smmu_domain = cookie;
603 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
604 struct arm_smmu_device *smmu = smmu_domain->smmu;
605 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
609 reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
610 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
612 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
614 iova |= ARM_SMMU_CB_ASID(smmu, cfg);
616 writel_relaxed(iova, reg);
618 } while (size -= granule);
621 iova |= (u64)ARM_SMMU_CB_ASID(smmu, cfg) << 48;
623 writeq_relaxed(iova, reg);
624 iova += granule >> 12;
625 } while (size -= granule);
627 } else if (smmu->version == ARM_SMMU_V2) {
628 reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
629 reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
630 ARM_SMMU_CB_S2_TLBIIPAS2;
633 smmu_write_atomic_lq(iova, reg);
634 iova += granule >> 12;
635 } while (size -= granule);
637 reg = ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_TLBIVMID;
638 writel_relaxed(ARM_SMMU_CB_VMID(smmu, cfg), reg);
642 static struct iommu_gather_ops arm_smmu_gather_ops = {
643 .tlb_flush_all = arm_smmu_tlb_inv_context,
644 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
645 .tlb_sync = arm_smmu_tlb_sync,
648 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
652 struct iommu_domain *domain = dev;
653 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
654 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
655 struct arm_smmu_device *smmu = smmu_domain->smmu;
656 void __iomem *cb_base;
658 cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
659 fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
661 if (!(fsr & FSR_FAULT))
664 fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
665 iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
667 dev_err_ratelimited(smmu->dev,
668 "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
669 fsr, iova, fsynr, cfg->cbndx);
671 writel(fsr, cb_base + ARM_SMMU_CB_FSR);
675 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
677 u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
678 struct arm_smmu_device *smmu = dev;
679 void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
681 gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
682 gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
683 gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
684 gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
689 dev_err_ratelimited(smmu->dev,
690 "Unexpected global fault, this could be serious\n");
691 dev_err_ratelimited(smmu->dev,
692 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
693 gfsr, gfsynr0, gfsynr1, gfsynr2);
695 writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
699 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
700 struct io_pgtable_cfg *pgtbl_cfg)
705 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
706 struct arm_smmu_device *smmu = smmu_domain->smmu;
707 void __iomem *cb_base, *gr1_base;
709 gr1_base = ARM_SMMU_GR1(smmu);
710 stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
711 cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
713 if (smmu->version > ARM_SMMU_V1) {
714 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
715 reg = CBA2R_RW64_64BIT;
717 reg = CBA2R_RW64_32BIT;
718 /* 16-bit VMIDs live in CBA2R */
719 if (smmu->features & ARM_SMMU_FEAT_VMID16)
720 reg |= ARM_SMMU_CB_VMID(smmu, cfg) << CBA2R_VMID_SHIFT;
722 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(cfg->cbndx));
727 if (smmu->version < ARM_SMMU_V2)
728 reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
731 * Use the weakest shareability/memory types, so they are
732 * overridden by the ttbcr/pte.
735 reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
736 (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
737 } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
738 /* 8-bit VMIDs live in CBAR */
739 reg |= ARM_SMMU_CB_VMID(smmu, cfg) << CBAR_VMID_SHIFT;
741 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(cfg->cbndx));
745 u16 asid = ARM_SMMU_CB_ASID(smmu, cfg);
747 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
748 reg = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
749 writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0);
750 reg = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
751 writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1);
752 writel_relaxed(asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
754 reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
755 reg64 |= (u64)asid << TTBRn_ASID_SHIFT;
756 writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0);
757 reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
758 reg64 |= (u64)asid << TTBRn_ASID_SHIFT;
759 writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR1);
762 reg64 = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
763 writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0);
768 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
769 reg = pgtbl_cfg->arm_v7s_cfg.tcr;
772 reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
773 reg2 = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
774 reg2 |= TTBCR2_SEP_UPSTREAM;
776 if (smmu->version > ARM_SMMU_V1)
777 writel_relaxed(reg2, cb_base + ARM_SMMU_CB_TTBCR2);
779 reg = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
781 writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR);
783 /* MAIRs (stage-1 only) */
785 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
786 reg = pgtbl_cfg->arm_v7s_cfg.prrr;
787 reg2 = pgtbl_cfg->arm_v7s_cfg.nmrr;
789 reg = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
790 reg2 = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
792 writel_relaxed(reg, cb_base + ARM_SMMU_CB_S1_MAIR0);
793 writel_relaxed(reg2, cb_base + ARM_SMMU_CB_S1_MAIR1);
797 reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
799 reg |= SCTLR_S1_ASIDPNE;
803 writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
806 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
807 struct arm_smmu_device *smmu)
809 int irq, start, ret = 0;
810 unsigned long ias, oas;
811 struct io_pgtable_ops *pgtbl_ops;
812 struct io_pgtable_cfg pgtbl_cfg;
813 enum io_pgtable_fmt fmt;
814 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
815 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
817 mutex_lock(&smmu_domain->init_mutex);
818 if (smmu_domain->smmu)
821 /* We're bypassing these SIDs, so don't allocate an actual context */
822 if (domain->type == IOMMU_DOMAIN_DMA) {
823 smmu_domain->smmu = smmu;
828 * Mapping the requested stage onto what we support is surprisingly
829 * complicated, mainly because the spec allows S1+S2 SMMUs without
830 * support for nested translation. That means we end up with the
833 * Requested Supported Actual
843 * Note that you can't actually request stage-2 mappings.
845 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
846 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
847 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
848 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
851 * Choosing a suitable context format is even more fiddly. Until we
852 * grow some way for the caller to express a preference, and/or move
853 * the decision into the io-pgtable code where it arguably belongs,
854 * just aim for the closest thing to the rest of the system, and hope
855 * that the hardware isn't esoteric enough that we can't assume AArch64
856 * support to be a superset of AArch32 support...
858 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
859 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
860 if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
861 !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
862 (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
863 (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
864 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
865 if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
866 (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
867 ARM_SMMU_FEAT_FMT_AARCH64_16K |
868 ARM_SMMU_FEAT_FMT_AARCH64_4K)))
869 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
871 if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
876 switch (smmu_domain->stage) {
877 case ARM_SMMU_DOMAIN_S1:
878 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
879 start = smmu->num_s2_context_banks;
881 oas = smmu->ipa_size;
882 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
883 fmt = ARM_64_LPAE_S1;
884 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
885 fmt = ARM_32_LPAE_S1;
886 ias = min(ias, 32UL);
887 oas = min(oas, 40UL);
890 ias = min(ias, 32UL);
891 oas = min(oas, 32UL);
894 case ARM_SMMU_DOMAIN_NESTED:
896 * We will likely want to change this if/when KVM gets
899 case ARM_SMMU_DOMAIN_S2:
900 cfg->cbar = CBAR_TYPE_S2_TRANS;
902 ias = smmu->ipa_size;
904 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
905 fmt = ARM_64_LPAE_S2;
907 fmt = ARM_32_LPAE_S2;
908 ias = min(ias, 40UL);
909 oas = min(oas, 40UL);
917 ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
918 smmu->num_context_banks);
923 if (smmu->version < ARM_SMMU_V2) {
924 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
925 cfg->irptndx %= smmu->num_context_irqs;
927 cfg->irptndx = cfg->cbndx;
930 pgtbl_cfg = (struct io_pgtable_cfg) {
931 .pgsize_bitmap = smmu->pgsize_bitmap,
934 .tlb = &arm_smmu_gather_ops,
935 .iommu_dev = smmu->dev,
938 smmu_domain->smmu = smmu;
939 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
945 /* Update the domain's page sizes to reflect the page table format */
946 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
948 /* Initialise the context bank with our page table cfg */
949 arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
952 * Request context fault interrupt. Do this last to avoid the
953 * handler seeing a half-initialised domain state.
955 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
956 ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
957 IRQF_SHARED, "arm-smmu-context-fault", domain);
959 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
961 cfg->irptndx = INVALID_IRPTNDX;
964 mutex_unlock(&smmu_domain->init_mutex);
966 /* Publish page table ops for map/unmap */
967 smmu_domain->pgtbl_ops = pgtbl_ops;
971 smmu_domain->smmu = NULL;
973 mutex_unlock(&smmu_domain->init_mutex);
977 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
979 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
980 struct arm_smmu_device *smmu = smmu_domain->smmu;
981 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
982 void __iomem *cb_base;
985 if (!smmu || domain->type == IOMMU_DOMAIN_DMA)
989 * Disable the context bank and free the page tables before freeing
992 cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
993 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
995 if (cfg->irptndx != INVALID_IRPTNDX) {
996 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
997 devm_free_irq(smmu->dev, irq, domain);
1000 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1001 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
1004 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1006 struct arm_smmu_domain *smmu_domain;
1008 if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
1011 * Allocate the domain and initialise some of its data structures.
1012 * We can't really do anything meaningful until we've added a
1015 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1019 if (type == IOMMU_DOMAIN_DMA &&
1020 iommu_get_dma_cookie(&smmu_domain->domain)) {
1025 mutex_init(&smmu_domain->init_mutex);
1026 spin_lock_init(&smmu_domain->pgtbl_lock);
1028 return &smmu_domain->domain;
1031 static void arm_smmu_domain_free(struct iommu_domain *domain)
1033 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1036 * Free the domain resources. We assume that all devices have
1037 * already been detached.
1039 iommu_put_dma_cookie(domain);
1040 arm_smmu_destroy_domain_context(domain);
1044 static int arm_smmu_alloc_smr(struct arm_smmu_device *smmu)
1048 for (i = 0; i < smmu->num_mapping_groups; i++)
1049 if (!cmpxchg(&smmu->smrs[i].valid, false, true))
1052 return INVALID_SMENDX;
1055 static void arm_smmu_free_smr(struct arm_smmu_device *smmu, int idx)
1057 writel_relaxed(~SMR_VALID, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
1058 WRITE_ONCE(smmu->smrs[idx].valid, false);
1061 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
1063 struct arm_smmu_smr *smr = smmu->smrs + idx;
1064 u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
1068 writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
1071 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
1073 struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
1074 u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
1075 (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
1076 (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
1078 writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
1081 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
1083 arm_smmu_write_s2cr(smmu, idx);
1085 arm_smmu_write_smr(smmu, idx);
1088 static int arm_smmu_master_alloc_smes(struct arm_smmu_device *smmu,
1089 struct arm_smmu_master_cfg *cfg)
1091 struct arm_smmu_smr *smrs = smmu->smrs;
1094 /* Allocate the SMRs on the SMMU */
1095 for_each_cfg_sme(cfg, i, idx) {
1096 if (idx != INVALID_SMENDX)
1099 /* ...except on stream indexing hardware, of course */
1101 cfg->smendx[i] = cfg->streamids[i];
1105 idx = arm_smmu_alloc_smr(smmu);
1107 dev_err(smmu->dev, "failed to allocate free SMR\n");
1110 cfg->smendx[i] = idx;
1112 smrs[idx].id = cfg->streamids[i];
1113 smrs[idx].mask = 0; /* We don't currently share SMRs */
1119 /* It worked! Now, poke the actual hardware */
1120 for_each_cfg_sme(cfg, i, idx)
1121 arm_smmu_write_smr(smmu, idx);
1127 arm_smmu_free_smr(smmu, cfg->smendx[i]);
1128 cfg->smendx[i] = INVALID_SMENDX;
1133 static void arm_smmu_master_free_smes(struct arm_smmu_master_cfg *cfg)
1135 struct arm_smmu_device *smmu = cfg->smmu;
1139 * We *must* clear the S2CR first, because freeing the SMR means
1140 * that it can be re-allocated immediately.
1142 for_each_cfg_sme(cfg, i, idx) {
1143 /* An IOMMU group is torn down by the first device to be removed */
1144 if (idx == INVALID_SMENDX)
1147 smmu->s2crs[idx] = s2cr_init_val;
1148 arm_smmu_write_s2cr(smmu, idx);
1150 /* Sync S2CR updates before touching anything else */
1153 /* Invalidate the SMRs before freeing back to the allocator */
1154 for_each_cfg_sme(cfg, i, idx) {
1156 arm_smmu_free_smr(smmu, idx);
1158 cfg->smendx[i] = INVALID_SMENDX;
1162 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1163 struct arm_smmu_master_cfg *cfg)
1165 int i, idx, ret = 0;
1166 struct arm_smmu_device *smmu = smmu_domain->smmu;
1167 struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1168 enum arm_smmu_s2cr_type type = S2CR_TYPE_TRANS;
1169 u8 cbndx = smmu_domain->cfg.cbndx;
1171 if (cfg->smendx[0] == INVALID_SMENDX)
1172 ret = arm_smmu_master_alloc_smes(smmu, cfg);
1177 * FIXME: This won't be needed once we have IOMMU-backed DMA ops
1178 * for all devices behind the SMMU. Note that we need to take
1179 * care configuring SMRs for devices both a platform_device and
1180 * and a PCI device (i.e. a PCI host controller)
1182 if (smmu_domain->domain.type == IOMMU_DOMAIN_DMA)
1183 type = S2CR_TYPE_BYPASS;
1185 for_each_cfg_sme(cfg, i, idx) {
1186 /* Devices in an IOMMU group may already be configured */
1187 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1190 s2cr[idx].type = type;
1191 s2cr[idx].privcfg = S2CR_PRIVCFG_UNPRIV;
1192 s2cr[idx].cbndx = cbndx;
1193 arm_smmu_write_s2cr(smmu, idx);
1198 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1201 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1202 struct arm_smmu_master_cfg *cfg = dev->archdata.iommu;
1205 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1209 /* Ensure that the domain is finalised */
1210 ret = arm_smmu_init_domain_context(domain, cfg->smmu);
1215 * Sanity check the domain. We don't support domains across
1218 if (smmu_domain->smmu != cfg->smmu) {
1220 "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1221 dev_name(smmu_domain->smmu->dev), dev_name(cfg->smmu->dev));
1225 /* Looks ok, so add the device to the domain */
1226 return arm_smmu_domain_add_master(smmu_domain, cfg);
1229 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1230 phys_addr_t paddr, size_t size, int prot)
1233 unsigned long flags;
1234 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1235 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1240 spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1241 ret = ops->map(ops, iova, paddr, size, prot);
1242 spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1246 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1250 unsigned long flags;
1251 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1252 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1257 spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1258 ret = ops->unmap(ops, iova, size);
1259 spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1263 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1266 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1267 struct arm_smmu_device *smmu = smmu_domain->smmu;
1268 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1269 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1270 struct device *dev = smmu->dev;
1271 void __iomem *cb_base;
1276 cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
1278 /* ATS1 registers can only be written atomically */
1279 va = iova & ~0xfffUL;
1280 if (smmu->version == ARM_SMMU_V2)
1281 smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
1282 else /* Register is only 32-bit in v1 */
1283 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1285 if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1286 !(tmp & ATSR_ACTIVE), 5, 50)) {
1288 "iova to phys timed out on %pad. Falling back to software table walk.\n",
1290 return ops->iova_to_phys(ops, iova);
1293 phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1294 if (phys & CB_PAR_F) {
1295 dev_err(dev, "translation fault!\n");
1296 dev_err(dev, "PAR = 0x%llx\n", phys);
1300 return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1303 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1307 unsigned long flags;
1308 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1309 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1314 spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1315 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1316 smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1317 ret = arm_smmu_iova_to_phys_hard(domain, iova);
1319 ret = ops->iova_to_phys(ops, iova);
1322 spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1327 static bool arm_smmu_capable(enum iommu_cap cap)
1330 case IOMMU_CAP_CACHE_COHERENCY:
1332 * Return true here as the SMMU can always send out coherent
1336 case IOMMU_CAP_INTR_REMAP:
1337 return true; /* MSIs are just memory writes */
1338 case IOMMU_CAP_NOEXEC:
1345 static int arm_smmu_add_device(struct device *dev)
1347 struct arm_smmu_master_cfg *cfg;
1348 struct iommu_group *group;
1351 ret = arm_smmu_register_legacy_master(dev);
1352 cfg = dev->archdata.iommu;
1357 for (i = 0; i < cfg->num_streamids; i++) {
1358 u16 sid = cfg->streamids[i];
1360 if (sid & ~cfg->smmu->streamid_mask) {
1361 dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1362 sid, cfg->smmu->streamid_mask);
1365 cfg->smendx[i] = INVALID_SMENDX;
1368 group = iommu_group_get_for_dev(dev);
1369 if (IS_ERR(group)) {
1370 ret = PTR_ERR(group);
1373 iommu_group_put(group);
1378 dev->archdata.iommu = NULL;
1382 static void arm_smmu_remove_device(struct device *dev)
1384 struct arm_smmu_master_cfg *cfg = dev->archdata.iommu;
1389 arm_smmu_master_free_smes(cfg);
1390 iommu_group_remove_device(dev);
1392 dev->archdata.iommu = NULL;
1395 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1397 struct iommu_group *group;
1399 if (dev_is_pci(dev))
1400 group = pci_device_group(dev);
1402 group = generic_device_group(dev);
1407 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1408 enum iommu_attr attr, void *data)
1410 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1413 case DOMAIN_ATTR_NESTING:
1414 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1421 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1422 enum iommu_attr attr, void *data)
1425 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1427 mutex_lock(&smmu_domain->init_mutex);
1430 case DOMAIN_ATTR_NESTING:
1431 if (smmu_domain->smmu) {
1437 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1439 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1447 mutex_unlock(&smmu_domain->init_mutex);
1451 static struct iommu_ops arm_smmu_ops = {
1452 .capable = arm_smmu_capable,
1453 .domain_alloc = arm_smmu_domain_alloc,
1454 .domain_free = arm_smmu_domain_free,
1455 .attach_dev = arm_smmu_attach_dev,
1456 .map = arm_smmu_map,
1457 .unmap = arm_smmu_unmap,
1458 .map_sg = default_iommu_map_sg,
1459 .iova_to_phys = arm_smmu_iova_to_phys,
1460 .add_device = arm_smmu_add_device,
1461 .remove_device = arm_smmu_remove_device,
1462 .device_group = arm_smmu_device_group,
1463 .domain_get_attr = arm_smmu_domain_get_attr,
1464 .domain_set_attr = arm_smmu_domain_set_attr,
1465 .pgsize_bitmap = -1UL, /* Restricted during device attach */
1468 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1470 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1471 void __iomem *cb_base;
1475 /* clear global FSR */
1476 reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1477 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1480 * Reset stream mapping groups: Initial values mark all SMRn as
1481 * invalid and all S2CRn as bypass unless overridden.
1483 for (i = 0; i < smmu->num_mapping_groups; ++i)
1484 arm_smmu_write_sme(smmu, i);
1487 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1488 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1489 * bit is only present in MMU-500r2 onwards.
1491 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1492 major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
1493 if ((smmu->model == ARM_MMU500) && (major >= 2)) {
1494 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1495 reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1496 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1499 /* Make sure all context banks are disabled and clear CB_FSR */
1500 for (i = 0; i < smmu->num_context_banks; ++i) {
1501 cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, i);
1502 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
1503 writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1505 * Disable MMU-500's not-particularly-beneficial next-page
1506 * prefetcher for the sake of errata #841119 and #826419.
1508 if (smmu->model == ARM_MMU500) {
1509 reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
1510 reg &= ~ARM_MMU500_ACTLR_CPRE;
1511 writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
1515 /* Invalidate the TLB, just in case */
1516 writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1517 writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1519 reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1521 /* Enable fault reporting */
1522 reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1524 /* Disable TLB broadcasting. */
1525 reg |= (sCR0_VMIDPNE | sCR0_PTM);
1527 /* Enable client access, handling unmatched streams as appropriate */
1528 reg &= ~sCR0_CLIENTPD;
1532 reg &= ~sCR0_USFCFG;
1534 /* Disable forced broadcasting */
1537 /* Don't upgrade barriers */
1538 reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1540 if (smmu->features & ARM_SMMU_FEAT_VMID16)
1541 reg |= sCR0_VMID16EN;
1543 /* Push the button */
1544 __arm_smmu_tlb_sync(smmu);
1545 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1548 static int arm_smmu_id_size_to_bits(int size)
1567 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1570 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1572 bool cttw_dt, cttw_reg;
1575 dev_notice(smmu->dev, "probing hardware configuration...\n");
1576 dev_notice(smmu->dev, "SMMUv%d with:\n",
1577 smmu->version == ARM_SMMU_V2 ? 2 : 1);
1580 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1582 /* Restrict available stages based on module parameter */
1583 if (force_stage == 1)
1584 id &= ~(ID0_S2TS | ID0_NTS);
1585 else if (force_stage == 2)
1586 id &= ~(ID0_S1TS | ID0_NTS);
1588 if (id & ID0_S1TS) {
1589 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1590 dev_notice(smmu->dev, "\tstage 1 translation\n");
1593 if (id & ID0_S2TS) {
1594 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1595 dev_notice(smmu->dev, "\tstage 2 translation\n");
1599 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1600 dev_notice(smmu->dev, "\tnested translation\n");
1603 if (!(smmu->features &
1604 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1605 dev_err(smmu->dev, "\tno translation support!\n");
1609 if ((id & ID0_S1TS) &&
1610 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1611 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1612 dev_notice(smmu->dev, "\taddress translation ops\n");
1616 * In order for DMA API calls to work properly, we must defer to what
1617 * the DT says about coherency, regardless of what the hardware claims.
1618 * Fortunately, this also opens up a workaround for systems where the
1619 * ID register value has ended up configured incorrectly.
1621 cttw_dt = of_dma_is_coherent(smmu->dev->of_node);
1622 cttw_reg = !!(id & ID0_CTTW);
1624 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1625 if (cttw_dt || cttw_reg)
1626 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1627 cttw_dt ? "" : "non-");
1628 if (cttw_dt != cttw_reg)
1629 dev_notice(smmu->dev,
1630 "\t(IDR0.CTTW overridden by dma-coherent property)\n");
1632 /* Max. number of entries we have for stream matching/indexing */
1633 size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
1634 smmu->streamid_mask = size - 1;
1638 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1639 size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
1642 "stream-matching supported, but no SMRs present!\n");
1647 * SMR.ID bits may not be preserved if the corresponding MASK
1648 * bits are set, so check each one separately. We can reject
1649 * masters later if they try to claim IDs outside these masks.
1651 smr = smmu->streamid_mask << SMR_ID_SHIFT;
1652 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1653 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1654 smmu->streamid_mask = smr >> SMR_ID_SHIFT;
1656 smr = smmu->streamid_mask << SMR_MASK_SHIFT;
1657 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1658 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1659 smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
1661 /* Zero-initialised to mark as invalid */
1662 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1667 dev_notice(smmu->dev,
1668 "\tstream matching with %lu register groups, mask 0x%x",
1669 size, smmu->smr_mask_mask);
1671 /* s2cr->type == 0 means translation, so initialise explicitly */
1672 smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1676 for (i = 0; i < size; i++)
1677 smmu->s2crs[i] = s2cr_init_val;
1679 smmu->num_mapping_groups = size;
1681 if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1682 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1683 if (!(id & ID0_PTFS_NO_AARCH32S))
1684 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1688 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1689 smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1691 /* Check for size mismatch of SMMU address space from mapped region */
1692 size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1693 size *= 2 << smmu->pgshift;
1694 if (smmu->size != size)
1696 "SMMU address space size (0x%lx) differs from mapped region size (0x%lx)!\n",
1699 smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
1700 smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
1701 if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1702 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1705 dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1706 smmu->num_context_banks, smmu->num_s2_context_banks);
1708 * Cavium CN88xx erratum #27704.
1709 * Ensure ASID and VMID allocation is unique across all SMMUs in
1712 if (smmu->model == CAVIUM_SMMUV2) {
1713 smmu->cavium_id_base =
1714 atomic_add_return(smmu->num_context_banks,
1715 &cavium_smmu_context_count);
1716 smmu->cavium_id_base -= smmu->num_context_banks;
1720 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1721 size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1722 smmu->ipa_size = size;
1724 /* The output mask is also applied for bypass */
1725 size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1726 smmu->pa_size = size;
1728 if (id & ID2_VMID16)
1729 smmu->features |= ARM_SMMU_FEAT_VMID16;
1732 * What the page table walker can address actually depends on which
1733 * descriptor format is in use, but since a) we don't know that yet,
1734 * and b) it can vary per context bank, this will have to do...
1736 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1738 "failed to set DMA mask for table walker\n");
1740 if (smmu->version < ARM_SMMU_V2) {
1741 smmu->va_size = smmu->ipa_size;
1742 if (smmu->version == ARM_SMMU_V1_64K)
1743 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1745 size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
1746 smmu->va_size = arm_smmu_id_size_to_bits(size);
1747 if (id & ID2_PTFS_4K)
1748 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1749 if (id & ID2_PTFS_16K)
1750 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1751 if (id & ID2_PTFS_64K)
1752 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1755 /* Now we've corralled the various formats, what'll it do? */
1756 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1757 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1758 if (smmu->features &
1759 (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1760 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1761 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1762 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1763 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1764 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1766 if (arm_smmu_ops.pgsize_bitmap == -1UL)
1767 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1769 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1770 dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1771 smmu->pgsize_bitmap);
1774 if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1775 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1776 smmu->va_size, smmu->ipa_size);
1778 if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1779 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1780 smmu->ipa_size, smmu->pa_size);
1785 struct arm_smmu_match_data {
1786 enum arm_smmu_arch_version version;
1787 enum arm_smmu_implementation model;
1790 #define ARM_SMMU_MATCH_DATA(name, ver, imp) \
1791 static struct arm_smmu_match_data name = { .version = ver, .model = imp }
1793 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1794 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1795 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1796 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1797 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1799 static const struct of_device_id arm_smmu_of_match[] = {
1800 { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1801 { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1802 { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1803 { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1804 { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1805 { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1808 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1810 static int arm_smmu_device_dt_probe(struct platform_device *pdev)
1812 const struct arm_smmu_match_data *data;
1813 struct resource *res;
1814 struct arm_smmu_device *smmu;
1815 struct device *dev = &pdev->dev;
1816 int num_irqs, i, err;
1818 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
1820 dev_err(dev, "failed to allocate arm_smmu_device\n");
1825 data = of_device_get_match_data(dev);
1826 smmu->version = data->version;
1827 smmu->model = data->model;
1829 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1830 smmu->base = devm_ioremap_resource(dev, res);
1831 if (IS_ERR(smmu->base))
1832 return PTR_ERR(smmu->base);
1833 smmu->size = resource_size(res);
1835 if (of_property_read_u32(dev->of_node, "#global-interrupts",
1836 &smmu->num_global_irqs)) {
1837 dev_err(dev, "missing #global-interrupts property\n");
1842 while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
1844 if (num_irqs > smmu->num_global_irqs)
1845 smmu->num_context_irqs++;
1848 if (!smmu->num_context_irqs) {
1849 dev_err(dev, "found %d interrupts but expected at least %d\n",
1850 num_irqs, smmu->num_global_irqs + 1);
1854 smmu->irqs = devm_kzalloc(dev, sizeof(*smmu->irqs) * num_irqs,
1857 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
1861 for (i = 0; i < num_irqs; ++i) {
1862 int irq = platform_get_irq(pdev, i);
1865 dev_err(dev, "failed to get irq index %d\n", i);
1868 smmu->irqs[i] = irq;
1871 err = arm_smmu_device_cfg_probe(smmu);
1875 parse_driver_options(smmu);
1877 if (smmu->version == ARM_SMMU_V2 &&
1878 smmu->num_context_banks != smmu->num_context_irqs) {
1880 "found only %d context interrupt(s) but %d required\n",
1881 smmu->num_context_irqs, smmu->num_context_banks);
1885 for (i = 0; i < smmu->num_global_irqs; ++i) {
1886 err = devm_request_irq(smmu->dev, smmu->irqs[i],
1887 arm_smmu_global_fault,
1889 "arm-smmu global fault",
1892 dev_err(dev, "failed to request global IRQ %d (%u)\n",
1898 platform_set_drvdata(pdev, smmu);
1899 arm_smmu_device_reset(smmu);
1903 static int arm_smmu_device_remove(struct platform_device *pdev)
1905 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
1910 if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
1911 dev_err(&pdev->dev, "removing device with active domains!\n");
1913 /* Turn the thing off */
1914 writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1918 static struct platform_driver arm_smmu_driver = {
1921 .of_match_table = of_match_ptr(arm_smmu_of_match),
1923 .probe = arm_smmu_device_dt_probe,
1924 .remove = arm_smmu_device_remove,
1927 static int __init arm_smmu_init(void)
1929 struct device_node *np;
1933 * Play nice with systems that don't have an ARM SMMU by checking that
1934 * an ARM SMMU exists in the system before proceeding with the driver
1935 * and IOMMU bus operation registration.
1937 np = of_find_matching_node(NULL, arm_smmu_of_match);
1943 ret = platform_driver_register(&arm_smmu_driver);
1947 /* Oh, for a proper bus abstraction */
1948 if (!iommu_present(&platform_bus_type))
1949 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
1951 #ifdef CONFIG_ARM_AMBA
1952 if (!iommu_present(&amba_bustype))
1953 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
1957 if (!iommu_present(&pci_bus_type)) {
1959 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
1966 static void __exit arm_smmu_exit(void)
1968 return platform_driver_unregister(&arm_smmu_driver);
1971 subsys_initcall(arm_smmu_init);
1972 module_exit(arm_smmu_exit);
1974 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
1975 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
1976 MODULE_LICENSE("GPL v2");