]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Merge tag 'iommu-updates-v4.9' of git://git.kernel.org/pub/scm/linux/kernel/git/joro...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 11 Oct 2016 19:52:41 +0000 (12:52 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 11 Oct 2016 19:52:41 +0000 (12:52 -0700)
Pull IOMMU updates from Joerg Roedel:

 - support for interrupt virtualization in the AMD IOMMU driver. These
   patches were shared with the KVM tree and are already merged through
   that tree.

 - generic DT-binding support for the ARM-SMMU driver. With this the
   driver now makes use of the generic DMA-API code. This also required
   some changes outside of the IOMMU code, but these are acked by the
   respective maintainers.

 - more cleanups and fixes all over the place.

* tag 'iommu-updates-v4.9' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (40 commits)
  iommu/amd: No need to wait iommu completion if no dte irq entry change
  iommu/amd: Free domain id when free a domain of struct dma_ops_domain
  iommu/amd: Use standard bitmap operation to set bitmap
  iommu/amd: Clean up the cmpxchg64 invocation
  iommu/io-pgtable-arm: Check for v7s-incapable systems
  iommu/dma: Avoid PCI host bridge windows
  iommu/dma: Add support for mapping MSIs
  iommu/arm-smmu: Set domain geometry
  iommu/arm-smmu: Wire up generic configuration support
  Docs: dt: document ARM SMMU generic binding usage
  iommu/arm-smmu: Convert to iommu_fwspec
  iommu/arm-smmu: Intelligent SMR allocation
  iommu/arm-smmu: Add a stream map entry iterator
  iommu/arm-smmu: Streamline SMMU data lookups
  iommu/arm-smmu: Refactor mmu-masters handling
  iommu/arm-smmu: Keep track of S2CR state
  iommu/arm-smmu: Consolidate stream map entry state
  iommu/arm-smmu: Handle stream IDs more dynamically
  iommu/arm-smmu: Set PRIVCFG in stage 1 STEs
  iommu/arm-smmu: Support non-PCI devices with SMMUv3
  ...

28 files changed:
Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
Documentation/devicetree/bindings/iommu/arm,smmu.txt
Documentation/devicetree/bindings/pci/pci-iommu.txt [new file with mode: 0644]
arch/arm64/mm/dma-mapping.c
arch/x86/configs/x86_64_defconfig
drivers/gpu/drm/exynos/exynos_drm_iommu.h
drivers/iommu/Kconfig
drivers/iommu/amd_iommu.c
drivers/iommu/amd_iommu_init.c
drivers/iommu/amd_iommu_proto.h
drivers/iommu/arm-smmu-v3.c
drivers/iommu/arm-smmu.c
drivers/iommu/dma-iommu.c
drivers/iommu/exynos-iommu.c
drivers/iommu/intel-iommu.c
drivers/iommu/io-pgtable-arm-v7s.c
drivers/iommu/iommu.c
drivers/iommu/ipmmu-vmsa.c
drivers/iommu/of_iommu.c
drivers/irqchip/irq-gic-v2m.c
drivers/irqchip/irq-gic-v3-its.c
drivers/of/irq.c
drivers/of/of_pci.c
include/dt-bindings/memory/mt2701-larb-port.h
include/linux/device.h
include/linux/dma-iommu.h
include/linux/iommu.h
include/linux/of_pci.h

index 7b94c88cf2ee7341e9e7fd39f2352c6eda451a24..be57550e14e487a797c62b485870d9a728d5c731 100644 (file)
@@ -27,6 +27,12 @@ the PCIe specification.
                       * "cmdq-sync" - CMD_SYNC complete
                       * "gerror"    - Global Error activated
 
+- #iommu-cells      : See the generic IOMMU binding described in
+                        devicetree/bindings/pci/pci-iommu.txt
+                      for details. For SMMUv3, must be 1, with each cell
+                      describing a single stream ID. All possible stream
+                      IDs which a device may emit must be described.
+
 ** SMMUv3 optional properties:
 
 - dma-coherent      : Present if DMA operations made by the SMMU (page
@@ -54,6 +60,6 @@ the PCIe specification.
                              <GIC_SPI 79 IRQ_TYPE_EDGE_RISING>;
                 interrupt-names = "eventq", "priq", "cmdq-sync", "gerror";
                 dma-coherent;
-                #iommu-cells = <0>;
+                #iommu-cells = <1>;
                 msi-parent = <&its 0xff0000>;
         };
index 19fe6f2c83f61c07dd6e5f32ced3921f1804e8a4..e862d148520557acf60c8015aabadf54de669c42 100644 (file)
@@ -35,12 +35,16 @@ conditions.
                   interrupt per context bank. In the case of a single,
                   combined interrupt, it must be listed multiple times.
 
-- mmu-masters   : A list of phandles to device nodes representing bus
-                  masters for which the SMMU can provide a translation
-                  and their corresponding StreamIDs (see example below).
-                  Each device node linked from this list must have a
-                  "#stream-id-cells" property, indicating the number of
-                  StreamIDs associated with it.
+- #iommu-cells  : See Documentation/devicetree/bindings/iommu/iommu.txt
+                  for details. With a value of 1, each "iommus" entry
+                  represents a distinct stream ID emitted by that device
+                  into the relevant SMMU.
+
+                  SMMUs with stream matching support and complex masters
+                  may use a value of 2, where the second cell represents
+                  an SMR mask to combine with the ID in the first cell.
+                  Care must be taken to ensure the set of matched IDs
+                  does not result in conflicts.
 
 ** System MMU optional properties:
 
@@ -56,9 +60,20 @@ conditions.
                   aliases of secure registers have to be used during
                   SMMU configuration.
 
-Example:
+** Deprecated properties:
+
+- mmu-masters (deprecated in favour of the generic "iommus" binding) :
+                  A list of phandles to device nodes representing bus
+                  masters for which the SMMU can provide a translation
+                  and their corresponding Stream IDs. Each device node
+                  linked from this list must have a "#stream-id-cells"
+                  property, indicating the number of Stream ID
+                  arguments associated with its phandle.
 
-        smmu {
+** Examples:
+
+        /* SMMU with stream matching or stream indexing */
+        smmu1: iommu {
                 compatible = "arm,smmu-v1";
                 reg = <0xba5e0000 0x10000>;
                 #global-interrupts = <2>;
@@ -68,11 +83,29 @@ Example:
                              <0 35 4>,
                              <0 36 4>,
                              <0 37 4>;
+                #iommu-cells = <1>;
+        };
+
+        /* device with two stream IDs, 0 and 7 */
+        master1 {
+                iommus = <&smmu1 0>,
+                         <&smmu1 7>;
+        };
+
+
+        /* SMMU with stream matching */
+        smmu2: iommu {
+                ...
+                #iommu-cells = <2>;
+        };
+
+        /* device with stream IDs 0 and 7 */
+        master2 {
+                iommus = <&smmu2 0 0>,
+                         <&smmu2 7 0>;
+        };
 
-                /*
-                 * Two DMA controllers, the first with two StreamIDs (0xd01d
-                 * and 0xd01e) and the second with only one (0xd11c).
-                 */
-                mmu-masters = <&dma0 0xd01d 0xd01e>,
-                              <&dma1 0xd11c>;
+        /* device with stream IDs 1, 17, 33 and 49 */
+        master3 {
+                iommus = <&smmu2 1 0x30>;
         };
diff --git a/Documentation/devicetree/bindings/pci/pci-iommu.txt b/Documentation/devicetree/bindings/pci/pci-iommu.txt
new file mode 100644 (file)
index 0000000..56c8296
--- /dev/null
@@ -0,0 +1,171 @@
+This document describes the generic device tree binding for describing the
+relationship between PCI(e) devices and IOMMU(s).
+
+Each PCI(e) device under a root complex is uniquely identified by its Requester
+ID (AKA RID). A Requester ID is a triplet of a Bus number, Device number, and
+Function number.
+
+For the purpose of this document, when treated as a numeric value, a RID is
+formatted such that:
+
+* Bits [15:8] are the Bus number.
+* Bits [7:3] are the Device number.
+* Bits [2:0] are the Function number.
+* Any other bits required for padding must be zero.
+
+IOMMUs may distinguish PCI devices through sideband data derived from the
+Requester ID. While a given PCI device can only master through one IOMMU, a
+root complex may split masters across a set of IOMMUs (e.g. with one IOMMU per
+bus).
+
+The generic 'iommus' property is insufficient to describe this relationship,
+and a mechanism is required to map from a PCI device to its IOMMU and sideband
+data.
+
+For generic IOMMU bindings, see
+Documentation/devicetree/bindings/iommu/iommu.txt.
+
+
+PCI root complex
+================
+
+Optional properties
+-------------------
+
+- iommu-map: Maps a Requester ID to an IOMMU and associated iommu-specifier
+  data.
+
+  The property is an arbitrary number of tuples of
+  (rid-base,iommu,iommu-base,length).
+
+  Any RID r in the interval [rid-base, rid-base + length) is associated with
+  the listed IOMMU, with the iommu-specifier (r - rid-base + iommu-base).
+
+- iommu-map-mask: A mask to be applied to each Requester ID prior to being
+  mapped to an iommu-specifier per the iommu-map property.
+
+
+Example (1)
+===========
+
+/ {
+       #address-cells = <1>;
+       #size-cells = <1>;
+
+       iommu: iommu@a {
+               reg = <0xa 0x1>;
+               compatible = "vendor,some-iommu";
+               #iommu-cells = <1>;
+       };
+
+       pci: pci@f {
+               reg = <0xf 0x1>;
+               compatible = "vendor,pcie-root-complex";
+               device_type = "pci";
+
+               /*
+                * The sideband data provided to the IOMMU is the RID,
+                * identity-mapped.
+                */
+               iommu-map = <0x0 &iommu 0x0 0x10000>;
+       };
+};
+
+
+Example (2)
+===========
+
+/ {
+       #address-cells = <1>;
+       #size-cells = <1>;
+
+       iommu: iommu@a {
+               reg = <0xa 0x1>;
+               compatible = "vendor,some-iommu";
+               #iommu-cells = <1>;
+       };
+
+       pci: pci@f {
+               reg = <0xf 0x1>;
+               compatible = "vendor,pcie-root-complex";
+               device_type = "pci";
+
+               /*
+                * The sideband data provided to the IOMMU is the RID with the
+                * function bits masked out.
+                */
+               iommu-map = <0x0 &iommu 0x0 0x10000>;
+               iommu-map-mask = <0xfff8>;
+       };
+};
+
+
+Example (3)
+===========
+
+/ {
+       #address-cells = <1>;
+       #size-cells = <1>;
+
+       iommu: iommu@a {
+               reg = <0xa 0x1>;
+               compatible = "vendor,some-iommu";
+               #iommu-cells = <1>;
+       };
+
+       pci: pci@f {
+               reg = <0xf 0x1>;
+               compatible = "vendor,pcie-root-complex";
+               device_type = "pci";
+
+               /*
+                * The sideband data provided to the IOMMU is the RID,
+                * but the high bits of the bus number are flipped.
+                */
+               iommu-map = <0x0000 &iommu 0x8000 0x8000>,
+                           <0x8000 &iommu 0x0000 0x8000>;
+       };
+};
+
+
+Example (4)
+===========
+
+/ {
+       #address-cells = <1>;
+       #size-cells = <1>;
+
+       iommu_a: iommu@a {
+               reg = <0xa 0x1>;
+               compatible = "vendor,some-iommu";
+               #iommu-cells = <1>;
+       };
+
+       iommu_b: iommu@b {
+               reg = <0xb 0x1>;
+               compatible = "vendor,some-iommu";
+               #iommu-cells = <1>;
+       };
+
+       iommu_c: iommu@c {
+               reg = <0xc 0x1>;
+               compatible = "vendor,some-iommu";
+               #iommu-cells = <1>;
+       };
+
+       pci: pci@f {
+               reg = <0xf 0x1>;
+               compatible = "vendor,pcie-root-complex";
+               device_type = "pci";
+
+               /*
+                * Devices with bus number 0-127 are mastered via IOMMU
+                * a, with sideband data being RID[14:0].
+                * Devices with bus number 128-255 are mastered via
+                * IOMMU b, with sideband data being RID[14:0].
+                * No devices master via IOMMU c.
+                */
+               iommu-map = <0x0000 &iommu_a 0x0000 0x8000>,
+                           <0x8000 &iommu_b 0x0000 0x8000>;
+       };
+};
index bdacead5b8024916e7425718ea3170e9e6ffd1ba..3f74d0d98de63322b15b387cb967009db17079ff 100644 (file)
@@ -828,7 +828,7 @@ static bool do_iommu_attach(struct device *dev, const struct iommu_ops *ops,
         * then the IOMMU core will have already configured a group for this
         * device, and allocated the default domain for that group.
         */
-       if (!domain || iommu_dma_init_domain(domain, dma_base, size)) {
+       if (!domain || iommu_dma_init_domain(domain, dma_base, size, dev)) {
                pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
                        dev_name(dev));
                return false;
index d28bdabcc87ee5dea26088451ebcadae59e38779..7ef4a099defcda7f2d4e70fb7b3edec77361f2ec 100644 (file)
@@ -255,7 +255,6 @@ CONFIG_RTC_CLASS=y
 CONFIG_DMADEVICES=y
 CONFIG_EEEPC_LAPTOP=y
 CONFIG_AMD_IOMMU=y
-CONFIG_AMD_IOMMU_STATS=y
 CONFIG_INTEL_IOMMU=y
 # CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
 CONFIG_EFI_VARS=y
index c8de4913fdbe80a3962ebca2744236f523e768b0..87f6b5672e1193a1df76bf3c09eb69abfd966e55 100644 (file)
@@ -66,7 +66,7 @@ static inline int __exynos_iommu_create_mapping(struct exynos_drm_private *priv,
        if (ret)
                goto free_domain;
 
-       ret = iommu_dma_init_domain(domain, start, size);
+       ret = iommu_dma_init_domain(domain, start, size, NULL);
        if (ret)
                goto put_cookie;
 
index d432ca828472c5bff3830e172ce20bff5d84cb9f..8ee54d71c7eb3ad1e2a43f14068e75939e0dd077 100644 (file)
@@ -309,7 +309,7 @@ config ARM_SMMU
 
 config ARM_SMMU_V3
        bool "ARM Ltd. System MMU Version 3 (SMMUv3) Support"
-       depends on ARM64 && PCI
+       depends on ARM64
        select IOMMU_API
        select IOMMU_IO_PGTABLE_LPAE
        select GENERIC_MSI_IRQ_DOMAIN
index 58fa8cc0262bcc9de042042e447d6bd0902e204d..754595ee11b68afe32e8432d40337482111dcdac 100644 (file)
@@ -103,7 +103,7 @@ struct flush_queue {
        struct flush_queue_entry *entries;
 };
 
-DEFINE_PER_CPU(struct flush_queue, flush_queue);
+static DEFINE_PER_CPU(struct flush_queue, flush_queue);
 
 static atomic_t queue_timer_on;
 static struct timer_list queue_timer;
@@ -1361,7 +1361,8 @@ static u64 *alloc_pte(struct protection_domain *domain,
 
                        __npte = PM_LEVEL_PDE(level, virt_to_phys(page));
 
-                       if (cmpxchg64(pte, __pte, __npte)) {
+                       /* pte could have been changed somewhere. */
+                       if (cmpxchg64(pte, __pte, __npte) != __pte) {
                                free_page((unsigned long)page);
                                continue;
                        }
@@ -1741,6 +1742,9 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
 
        free_pagetable(&dom->domain);
 
+       if (dom->domain.id)
+               domain_id_free(dom->domain.id);
+
        kfree(dom);
 }
 
@@ -3649,7 +3653,7 @@ static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic)
 
        table = irq_lookup_table[devid];
        if (table)
-               goto out;
+               goto out_unlock;
 
        alias = amd_iommu_alias_table[devid];
        table = irq_lookup_table[alias];
@@ -3663,7 +3667,7 @@ static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic)
        /* Nothing there yet, allocate new irq remapping table */
        table = kzalloc(sizeof(*table), GFP_ATOMIC);
        if (!table)
-               goto out;
+               goto out_unlock;
 
        /* Initialize table spin-lock */
        spin_lock_init(&table->lock);
@@ -3676,7 +3680,7 @@ static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic)
        if (!table->table) {
                kfree(table);
                table = NULL;
-               goto out;
+               goto out_unlock;
        }
 
        if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
@@ -4153,6 +4157,7 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
        }
        if (index < 0) {
                pr_warn("Failed to allocate IRTE\n");
+               ret = index;
                goto out_free_parent;
        }
 
index cd1713631a4ad53d3f2d1e59312da2d71222f671..157e93421fb81667c21acc43aaa790f092595395 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/pci.h>
 #include <linux/acpi.h>
 #include <linux/list.h>
+#include <linux/bitmap.h>
 #include <linux/slab.h>
 #include <linux/syscore_ops.h>
 #include <linux/interrupt.h>
@@ -2285,7 +2286,7 @@ static int __init early_amd_iommu_init(void)
         * never allocate domain 0 because its used as the non-allocated and
         * error value placeholder
         */
-       amd_iommu_pd_alloc_bitmap[0] = 1;
+       __set_bit(0, amd_iommu_pd_alloc_bitmap);
 
        spin_lock_init(&amd_iommu_pd_lock);
 
index faa3b4895cf09705cc6e1c9f8f29a4872dd3ef50..7eb60c15c5826c83c594b47bd43316c3985e5bd5 100644 (file)
@@ -79,12 +79,6 @@ static inline int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
 extern int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid,
                                  int status, int tag);
 
-#ifndef CONFIG_AMD_IOMMU_STATS
-
-static inline void amd_iommu_stats_init(void) { }
-
-#endif /* !CONFIG_AMD_IOMMU_STATS */
-
 static inline bool is_rd890_iommu(struct pci_dev *pdev)
 {
        return (pdev->vendor == PCI_VENDOR_ID_ATI) &&
index 641e887613193c76e7afb48a81c82b1fd732cc2d..15c01c3cd540b6b0416002ca39c8a72951b3cc75 100644 (file)
 #include <linux/msi.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include <linux/of_iommu.h>
 #include <linux/of_platform.h>
 #include <linux/pci.h>
 #include <linux/platform_device.h>
 
+#include <linux/amba/bus.h>
+
 #include "io-pgtable.h"
 
 /* MMIO registers */
 #define CR2_RECINVSID                  (1 << 1)
 #define CR2_E2H                                (1 << 0)
 
+#define ARM_SMMU_GBPA                  0x44
+#define GBPA_ABORT                     (1 << 20)
+#define GBPA_UPDATE                    (1 << 31)
+
 #define ARM_SMMU_IRQ_CTRL              0x50
 #define IRQ_CTRL_EVTQ_IRQEN            (1 << 2)
 #define IRQ_CTRL_PRIQ_IRQEN            (1 << 1)
 #define STRTAB_STE_1_SHCFG_INCOMING    1UL
 #define STRTAB_STE_1_SHCFG_SHIFT       44
 
+#define STRTAB_STE_1_PRIVCFG_UNPRIV    2UL
+#define STRTAB_STE_1_PRIVCFG_SHIFT     48
+
 #define STRTAB_STE_2_S2VMID_SHIFT      0
 #define STRTAB_STE_2_S2VMID_MASK       0xffffUL
 #define STRTAB_STE_2_VTCR_SHIFT                32
@@ -606,12 +616,9 @@ struct arm_smmu_device {
        struct arm_smmu_strtab_cfg      strtab_cfg;
 };
 
-/* SMMU private data for an IOMMU group */
-struct arm_smmu_group {
+/* SMMU private data for each master */
+struct arm_smmu_master_data {
        struct arm_smmu_device          *smmu;
-       struct arm_smmu_domain          *domain;
-       int                             num_sids;
-       u32                             *sids;
        struct arm_smmu_strtab_ent      ste;
 };
 
@@ -713,19 +720,15 @@ static void queue_inc_prod(struct arm_smmu_queue *q)
        writel(q->prod, q->prod_reg);
 }
 
-static bool __queue_cons_before(struct arm_smmu_queue *q, u32 until)
-{
-       if (Q_WRP(q, q->cons) == Q_WRP(q, until))
-               return Q_IDX(q, q->cons) < Q_IDX(q, until);
-
-       return Q_IDX(q, q->cons) >= Q_IDX(q, until);
-}
-
-static int queue_poll_cons(struct arm_smmu_queue *q, u32 until, bool wfe)
+/*
+ * Wait for the SMMU to consume items. If drain is true, wait until the queue
+ * is empty. Otherwise, wait until there is at least one free slot.
+ */
+static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
 {
        ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
 
-       while (queue_sync_cons(q), __queue_cons_before(q, until)) {
+       while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) {
                if (ktime_compare(ktime_get(), timeout) > 0)
                        return -ETIMEDOUT;
 
@@ -896,8 +899,8 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
 static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
                                    struct arm_smmu_cmdq_ent *ent)
 {
-       u32 until;
        u64 cmd[CMDQ_ENT_DWORDS];
+       unsigned long flags;
        bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
        struct arm_smmu_queue *q = &smmu->cmdq.q;
 
@@ -907,20 +910,15 @@ static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
                return;
        }
 
-       spin_lock(&smmu->cmdq.lock);
-       while (until = q->prod + 1, queue_insert_raw(q, cmd) == -ENOSPC) {
-               /*
-                * Keep the queue locked, otherwise the producer could wrap
-                * twice and we could see a future consumer pointer that looks
-                * like it's behind us.
-                */
-               if (queue_poll_cons(q, until, wfe))
+       spin_lock_irqsave(&smmu->cmdq.lock, flags);
+       while (queue_insert_raw(q, cmd) == -ENOSPC) {
+               if (queue_poll_cons(q, false, wfe))
                        dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
        }
 
-       if (ent->opcode == CMDQ_OP_CMD_SYNC && queue_poll_cons(q, until, wfe))
+       if (ent->opcode == CMDQ_OP_CMD_SYNC && queue_poll_cons(q, true, wfe))
                dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
-       spin_unlock(&smmu->cmdq.lock);
+       spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
 }
 
 /* Context descriptor manipulation functions */
@@ -1073,7 +1071,9 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
 #ifdef CONFIG_PCI_ATS
                         STRTAB_STE_1_EATS_TRANS << STRTAB_STE_1_EATS_SHIFT |
 #endif
-                        STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT);
+                        STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT |
+                        STRTAB_STE_1_PRIVCFG_UNPRIV <<
+                        STRTAB_STE_1_PRIVCFG_SHIFT);
 
                if (smmu->features & ARM_SMMU_FEAT_STALLS)
                        dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
@@ -1161,36 +1161,66 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
        struct arm_smmu_queue *q = &smmu->evtq.q;
        u64 evt[EVTQ_ENT_DWORDS];
 
-       while (!queue_remove_raw(q, evt)) {
-               u8 id = evt[0] >> EVTQ_0_ID_SHIFT & EVTQ_0_ID_MASK;
+       do {
+               while (!queue_remove_raw(q, evt)) {
+                       u8 id = evt[0] >> EVTQ_0_ID_SHIFT & EVTQ_0_ID_MASK;
 
-               dev_info(smmu->dev, "event 0x%02x received:\n", id);
-               for (i = 0; i < ARRAY_SIZE(evt); ++i)
-                       dev_info(smmu->dev, "\t0x%016llx\n",
-                                (unsigned long long)evt[i]);
-       }
+                       dev_info(smmu->dev, "event 0x%02x received:\n", id);
+                       for (i = 0; i < ARRAY_SIZE(evt); ++i)
+                               dev_info(smmu->dev, "\t0x%016llx\n",
+                                        (unsigned long long)evt[i]);
+
+               }
+
+               /*
+                * Not much we can do on overflow, so scream and pretend we're
+                * trying harder.
+                */
+               if (queue_sync_prod(q) == -EOVERFLOW)
+                       dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
+       } while (!queue_empty(q));
 
        /* Sync our overflow flag, as we believe we're up to speed */
        q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
        return IRQ_HANDLED;
 }
 
-static irqreturn_t arm_smmu_evtq_handler(int irq, void *dev)
-{
-       irqreturn_t ret = IRQ_WAKE_THREAD;
-       struct arm_smmu_device *smmu = dev;
-       struct arm_smmu_queue *q = &smmu->evtq.q;
+static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
+{
+       u32 sid, ssid;
+       u16 grpid;
+       bool ssv, last;
+
+       sid = evt[0] >> PRIQ_0_SID_SHIFT & PRIQ_0_SID_MASK;
+       ssv = evt[0] & PRIQ_0_SSID_V;
+       ssid = ssv ? evt[0] >> PRIQ_0_SSID_SHIFT & PRIQ_0_SSID_MASK : 0;
+       last = evt[0] & PRIQ_0_PRG_LAST;
+       grpid = evt[1] >> PRIQ_1_PRG_IDX_SHIFT & PRIQ_1_PRG_IDX_MASK;
+
+       dev_info(smmu->dev, "unexpected PRI request received:\n");
+       dev_info(smmu->dev,
+                "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
+                sid, ssid, grpid, last ? "L" : "",
+                evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
+                evt[0] & PRIQ_0_PERM_READ ? "R" : "",
+                evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
+                evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
+                evt[1] & PRIQ_1_ADDR_MASK << PRIQ_1_ADDR_SHIFT);
+
+       if (last) {
+               struct arm_smmu_cmdq_ent cmd = {
+                       .opcode                 = CMDQ_OP_PRI_RESP,
+                       .substream_valid        = ssv,
+                       .pri                    = {
+                               .sid    = sid,
+                               .ssid   = ssid,
+                               .grpid  = grpid,
+                               .resp   = PRI_RESP_DENY,
+                       },
+               };
 
-       /*
-        * Not much we can do on overflow, so scream and pretend we're
-        * trying harder.
-        */
-       if (queue_sync_prod(q) == -EOVERFLOW)
-               dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
-       else if (queue_empty(q))
-               ret = IRQ_NONE;
-
-       return ret;
+               arm_smmu_cmdq_issue_cmd(smmu, &cmd);
+       }
 }
 
 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
@@ -1199,63 +1229,19 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
        struct arm_smmu_queue *q = &smmu->priq.q;
        u64 evt[PRIQ_ENT_DWORDS];
 
-       while (!queue_remove_raw(q, evt)) {
-               u32 sid, ssid;
-               u16 grpid;
-               bool ssv, last;
+       do {
+               while (!queue_remove_raw(q, evt))
+                       arm_smmu_handle_ppr(smmu, evt);
 
-               sid = evt[0] >> PRIQ_0_SID_SHIFT & PRIQ_0_SID_MASK;
-               ssv = evt[0] & PRIQ_0_SSID_V;
-               ssid = ssv ? evt[0] >> PRIQ_0_SSID_SHIFT & PRIQ_0_SSID_MASK : 0;
-               last = evt[0] & PRIQ_0_PRG_LAST;
-               grpid = evt[1] >> PRIQ_1_PRG_IDX_SHIFT & PRIQ_1_PRG_IDX_MASK;
-
-               dev_info(smmu->dev, "unexpected PRI request received:\n");
-               dev_info(smmu->dev,
-                        "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
-                        sid, ssid, grpid, last ? "L" : "",
-                        evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
-                        evt[0] & PRIQ_0_PERM_READ ? "R" : "",
-                        evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
-                        evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
-                        evt[1] & PRIQ_1_ADDR_MASK << PRIQ_1_ADDR_SHIFT);
-
-               if (last) {
-                       struct arm_smmu_cmdq_ent cmd = {
-                               .opcode                 = CMDQ_OP_PRI_RESP,
-                               .substream_valid        = ssv,
-                               .pri                    = {
-                                       .sid    = sid,
-                                       .ssid   = ssid,
-                                       .grpid  = grpid,
-                                       .resp   = PRI_RESP_DENY,
-                               },
-                       };
-
-                       arm_smmu_cmdq_issue_cmd(smmu, &cmd);
-               }
-       }
+               if (queue_sync_prod(q) == -EOVERFLOW)
+                       dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
+       } while (!queue_empty(q));
 
        /* Sync our overflow flag, as we believe we're up to speed */
        q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
        return IRQ_HANDLED;
 }
 
-static irqreturn_t arm_smmu_priq_handler(int irq, void *dev)
-{
-       irqreturn_t ret = IRQ_WAKE_THREAD;
-       struct arm_smmu_device *smmu = dev;
-       struct arm_smmu_queue *q = &smmu->priq.q;
-
-       /* PRIQ overflow indicates a programming error */
-       if (queue_sync_prod(q) == -EOVERFLOW)
-               dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
-       else if (queue_empty(q))
-               ret = IRQ_NONE;
-
-       return ret;
-}
-
 static irqreturn_t arm_smmu_cmdq_sync_handler(int irq, void *dev)
 {
        /* We don't actually use CMD_SYNC interrupts for anything */
@@ -1288,15 +1274,11 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
        if (active & GERROR_MSI_GERROR_ABT_ERR)
                dev_warn(smmu->dev, "GERROR MSI write aborted\n");
 
-       if (active & GERROR_MSI_PRIQ_ABT_ERR) {
+       if (active & GERROR_MSI_PRIQ_ABT_ERR)
                dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
-               arm_smmu_priq_handler(irq, smmu->dev);
-       }
 
-       if (active & GERROR_MSI_EVTQ_ABT_ERR) {
+       if (active & GERROR_MSI_EVTQ_ABT_ERR)
                dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
-               arm_smmu_evtq_handler(irq, smmu->dev);
-       }
 
        if (active & GERROR_MSI_CMDQ_ABT_ERR) {
                dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
@@ -1569,6 +1551,8 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
                return -ENOMEM;
 
        domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
+       domain->geometry.aperture_end = (1UL << ias) - 1;
+       domain->geometry.force_aperture = true;
        smmu_domain->pgtbl_ops = pgtbl_ops;
 
        ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
@@ -1578,20 +1562,6 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
        return ret;
 }
 
-static struct arm_smmu_group *arm_smmu_group_get(struct device *dev)
-{
-       struct iommu_group *group;
-       struct arm_smmu_group *smmu_group;
-
-       group = iommu_group_get(dev);
-       if (!group)
-               return NULL;
-
-       smmu_group = iommu_group_get_iommudata(group);
-       iommu_group_put(group);
-       return smmu_group;
-}
-
 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
 {
        __le64 *step;
@@ -1614,27 +1584,17 @@ static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
        return step;
 }
 
-static int arm_smmu_install_ste_for_group(struct arm_smmu_group *smmu_group)
+static int arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
 {
        int i;
-       struct arm_smmu_domain *smmu_domain = smmu_group->domain;
-       struct arm_smmu_strtab_ent *ste = &smmu_group->ste;
-       struct arm_smmu_device *smmu = smmu_group->smmu;
+       struct arm_smmu_master_data *master = fwspec->iommu_priv;
+       struct arm_smmu_device *smmu = master->smmu;
 
-       if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
-               ste->s1_cfg = &smmu_domain->s1_cfg;
-               ste->s2_cfg = NULL;
-               arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
-       } else {
-               ste->s1_cfg = NULL;
-               ste->s2_cfg = &smmu_domain->s2_cfg;
-       }
-
-       for (i = 0; i < smmu_group->num_sids; ++i) {
-               u32 sid = smmu_group->sids[i];
+       for (i = 0; i < fwspec->num_ids; ++i) {
+               u32 sid = fwspec->ids[i];
                __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
 
-               arm_smmu_write_strtab_ent(smmu, sid, step, ste);
+               arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste);
        }
 
        return 0;
@@ -1642,13 +1602,11 @@ static int arm_smmu_install_ste_for_group(struct arm_smmu_group *smmu_group)
 
 static void arm_smmu_detach_dev(struct device *dev)
 {
-       struct arm_smmu_group *smmu_group = arm_smmu_group_get(dev);
+       struct arm_smmu_master_data *master = dev->iommu_fwspec->iommu_priv;
 
-       smmu_group->ste.bypass = true;
-       if (arm_smmu_install_ste_for_group(smmu_group) < 0)
+       master->ste.bypass = true;
+       if (arm_smmu_install_ste_for_dev(dev->iommu_fwspec) < 0)
                dev_warn(dev, "failed to install bypass STE\n");
-
-       smmu_group->domain = NULL;
 }
 
 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
@@ -1656,16 +1614,20 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
        int ret = 0;
        struct arm_smmu_device *smmu;
        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
-       struct arm_smmu_group *smmu_group = arm_smmu_group_get(dev);
+       struct arm_smmu_master_data *master;
+       struct arm_smmu_strtab_ent *ste;
 
-       if (!smmu_group)
+       if (!dev->iommu_fwspec)
                return -ENOENT;
 
+       master = dev->iommu_fwspec->iommu_priv;
+       smmu = master->smmu;
+       ste = &master->ste;
+
        /* Already attached to a different domain? */
-       if (smmu_group->domain && smmu_group->domain != smmu_domain)
+       if (!ste->bypass)
                arm_smmu_detach_dev(dev);
 
-       smmu = smmu_group->smmu;
        mutex_lock(&smmu_domain->init_mutex);
 
        if (!smmu_domain->smmu) {
@@ -1684,21 +1646,21 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
                goto out_unlock;
        }
 
-       /* Group already attached to this domain? */
-       if (smmu_group->domain)
-               goto out_unlock;
-
-       smmu_group->domain      = smmu_domain;
+       ste->bypass = false;
+       ste->valid = true;
 
-       /*
-        * FIXME: This should always be "false" once we have IOMMU-backed
-        * DMA ops for all devices behind the SMMU.
-        */
-       smmu_group->ste.bypass  = domain->type == IOMMU_DOMAIN_DMA;
+       if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
+               ste->s1_cfg = &smmu_domain->s1_cfg;
+               ste->s2_cfg = NULL;
+               arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
+       } else {
+               ste->s1_cfg = NULL;
+               ste->s2_cfg = &smmu_domain->s2_cfg;
+       }
 
-       ret = arm_smmu_install_ste_for_group(smmu_group);
+       ret = arm_smmu_install_ste_for_dev(dev->iommu_fwspec);
        if (ret < 0)
-               smmu_group->domain = NULL;
+               ste->valid = false;
 
 out_unlock:
        mutex_unlock(&smmu_domain->init_mutex);
@@ -1757,40 +1719,19 @@ arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
        return ret;
 }
 
-static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *sidp)
-{
-       *(u32 *)sidp = alias;
-       return 0; /* Continue walking */
-}
+static struct platform_driver arm_smmu_driver;
 
-static void __arm_smmu_release_pci_iommudata(void *data)
+static int arm_smmu_match_node(struct device *dev, void *data)
 {
-       kfree(data);
+       return dev->of_node == data;
 }
 
-static struct arm_smmu_device *arm_smmu_get_for_pci_dev(struct pci_dev *pdev)
+static struct arm_smmu_device *arm_smmu_get_by_node(struct device_node *np)
 {
-       struct device_node *of_node;
-       struct platform_device *smmu_pdev;
-       struct arm_smmu_device *smmu = NULL;
-       struct pci_bus *bus = pdev->bus;
-
-       /* Walk up to the root bus */
-       while (!pci_is_root_bus(bus))
-               bus = bus->parent;
-
-       /* Follow the "iommus" phandle from the host controller */
-       of_node = of_parse_phandle(bus->bridge->parent->of_node, "iommus", 0);
-       if (!of_node)
-               return NULL;
-
-       /* See if we can find an SMMU corresponding to the phandle */
-       smmu_pdev = of_find_device_by_node(of_node);
-       if (smmu_pdev)
-               smmu = platform_get_drvdata(smmu_pdev);
-
-       of_node_put(of_node);
-       return smmu;
+       struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
+                                               np, arm_smmu_match_node);
+       put_device(dev);
+       return dev ? dev_get_drvdata(dev) : NULL;
 }
 
 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
@@ -1803,94 +1744,91 @@ static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
        return sid < limit;
 }
 
+static struct iommu_ops arm_smmu_ops;
+
 static int arm_smmu_add_device(struct device *dev)
 {
        int i, ret;
-       u32 sid, *sids;
-       struct pci_dev *pdev;
-       struct iommu_group *group;
-       struct arm_smmu_group *smmu_group;
        struct arm_smmu_device *smmu;
+       struct arm_smmu_master_data *master;
+       struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+       struct iommu_group *group;
 
-       /* We only support PCI, for now */
-       if (!dev_is_pci(dev))
+       if (!fwspec || fwspec->ops != &arm_smmu_ops)
                return -ENODEV;
-
-       pdev = to_pci_dev(dev);
-       group = iommu_group_get_for_dev(dev);
-       if (IS_ERR(group))
-               return PTR_ERR(group);
-
-       smmu_group = iommu_group_get_iommudata(group);
-       if (!smmu_group) {
-               smmu = arm_smmu_get_for_pci_dev(pdev);
-               if (!smmu) {
-                       ret = -ENOENT;
-                       goto out_remove_dev;
-               }
-
-               smmu_group = kzalloc(sizeof(*smmu_group), GFP_KERNEL);
-               if (!smmu_group) {
-                       ret = -ENOMEM;
-                       goto out_remove_dev;
-               }
-
-               smmu_group->ste.valid   = true;
-               smmu_group->smmu        = smmu;
-               iommu_group_set_iommudata(group, smmu_group,
-                                         __arm_smmu_release_pci_iommudata);
+       /*
+        * We _can_ actually withstand dodgy bus code re-calling add_device()
+        * without an intervening remove_device()/of_xlate() sequence, but
+        * we're not going to do so quietly...
+        */
+       if (WARN_ON_ONCE(fwspec->iommu_priv)) {
+               master = fwspec->iommu_priv;
+               smmu = master->smmu;
        } else {
-               smmu = smmu_group->smmu;
-       }
+               smmu = arm_smmu_get_by_node(to_of_node(fwspec->iommu_fwnode));
+               if (!smmu)
+                       return -ENODEV;
+               master = kzalloc(sizeof(*master), GFP_KERNEL);
+               if (!master)
+                       return -ENOMEM;
 
-       /* Assume SID == RID until firmware tells us otherwise */
-       pci_for_each_dma_alias(pdev, __arm_smmu_get_pci_sid, &sid);
-       for (i = 0; i < smmu_group->num_sids; ++i) {
-               /* If we already know about this SID, then we're done */
-               if (smmu_group->sids[i] == sid)
-                       goto out_put_group;
+               master->smmu = smmu;
+               fwspec->iommu_priv = master;
        }
 
-       /* Check the SID is in range of the SMMU and our stream table */
-       if (!arm_smmu_sid_in_range(smmu, sid)) {
-               ret = -ERANGE;
-               goto out_remove_dev;
-       }
+       /* Check the SIDs are in range of the SMMU and our stream table */
+       for (i = 0; i < fwspec->num_ids; i++) {
+               u32 sid = fwspec->ids[i];
 
-       /* Ensure l2 strtab is initialised */
-       if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
-               ret = arm_smmu_init_l2_strtab(smmu, sid);
-               if (ret)
-                       goto out_remove_dev;
-       }
+               if (!arm_smmu_sid_in_range(smmu, sid))
+                       return -ERANGE;
 
-       /* Resize the SID array for the group */
-       smmu_group->num_sids++;
-       sids = krealloc(smmu_group->sids, smmu_group->num_sids * sizeof(*sids),
-                       GFP_KERNEL);
-       if (!sids) {
-               smmu_group->num_sids--;
-               ret = -ENOMEM;
-               goto out_remove_dev;
+               /* Ensure l2 strtab is initialised */
+               if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
+                       ret = arm_smmu_init_l2_strtab(smmu, sid);
+                       if (ret)
+                               return ret;
+               }
        }
 
-       /* Add the new SID */
-       sids[smmu_group->num_sids - 1] = sid;
-       smmu_group->sids = sids;
-
-out_put_group:
-       iommu_group_put(group);
-       return 0;
+       group = iommu_group_get_for_dev(dev);
+       if (!IS_ERR(group))
+               iommu_group_put(group);
 
-out_remove_dev:
-       iommu_group_remove_device(dev);
-       iommu_group_put(group);
-       return ret;
+       return PTR_ERR_OR_ZERO(group);
 }
 
 static void arm_smmu_remove_device(struct device *dev)
 {
+       struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+       struct arm_smmu_master_data *master;
+
+       if (!fwspec || fwspec->ops != &arm_smmu_ops)
+               return;
+
+       master = fwspec->iommu_priv;
+       if (master && master->ste.valid)
+               arm_smmu_detach_dev(dev);
        iommu_group_remove_device(dev);
+       kfree(master);
+       iommu_fwspec_free(dev);
+}
+
+static struct iommu_group *arm_smmu_device_group(struct device *dev)
+{
+       struct iommu_group *group;
+
+       /*
+        * We don't support devices sharing stream IDs other than PCI RID
+        * aliases, since the necessary ID-to-device lookup becomes rather
+        * impractical given a potential sparse 32-bit stream ID space.
+        */
+       if (dev_is_pci(dev))
+               group = pci_device_group(dev);
+       else
+               group = generic_device_group(dev);
+
+       return group;
 }
 
 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
@@ -1937,6 +1875,11 @@ out_unlock:
        return ret;
 }
 
+static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
+{
+       return iommu_fwspec_add_ids(dev, args->args, 1);
+}
+
 static struct iommu_ops arm_smmu_ops = {
        .capable                = arm_smmu_capable,
        .domain_alloc           = arm_smmu_domain_alloc,
@@ -1948,9 +1891,10 @@ static struct iommu_ops arm_smmu_ops = {
        .iova_to_phys           = arm_smmu_iova_to_phys,
        .add_device             = arm_smmu_add_device,
        .remove_device          = arm_smmu_remove_device,
-       .device_group           = pci_device_group,
+       .device_group           = arm_smmu_device_group,
        .domain_get_attr        = arm_smmu_domain_get_attr,
        .domain_set_attr        = arm_smmu_domain_set_attr,
+       .of_xlate               = arm_smmu_of_xlate,
        .pgsize_bitmap          = -1UL, /* Restricted during device attach */
 };
 
@@ -2151,6 +2095,24 @@ static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
                                          1, ARM_SMMU_POLL_TIMEOUT_US);
 }
 
+/* GBPA is "special" */
+static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
+{
+       int ret;
+       u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
+
+       ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
+                                        1, ARM_SMMU_POLL_TIMEOUT_US);
+       if (ret)
+               return ret;
+
+       reg &= ~clr;
+       reg |= set;
+       writel_relaxed(reg | GBPA_UPDATE, gbpa);
+       return readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
+                                         1, ARM_SMMU_POLL_TIMEOUT_US);
+}
+
 static void arm_smmu_free_msis(void *data)
 {
        struct device *dev = data;
@@ -2235,10 +2197,10 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
        /* Request interrupt lines */
        irq = smmu->evtq.q.irq;
        if (irq) {
-               ret = devm_request_threaded_irq(smmu->dev, irq,
-                                               arm_smmu_evtq_handler,
+               ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
                                                arm_smmu_evtq_thread,
-                                               0, "arm-smmu-v3-evtq", smmu);
+                                               IRQF_ONESHOT,
+                                               "arm-smmu-v3-evtq", smmu);
                if (ret < 0)
                        dev_warn(smmu->dev, "failed to enable evtq irq\n");
        }
@@ -2263,10 +2225,10 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
        if (smmu->features & ARM_SMMU_FEAT_PRI) {
                irq = smmu->priq.q.irq;
                if (irq) {
-                       ret = devm_request_threaded_irq(smmu->dev, irq,
-                                                       arm_smmu_priq_handler,
+                       ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
                                                        arm_smmu_priq_thread,
-                                                       0, "arm-smmu-v3-priq",
+                                                       IRQF_ONESHOT,
+                                                       "arm-smmu-v3-priq",
                                                        smmu);
                        if (ret < 0)
                                dev_warn(smmu->dev,
@@ -2296,7 +2258,7 @@ static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
        return ret;
 }
 
-static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
+static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
 {
        int ret;
        u32 reg, enables;
@@ -2397,8 +2359,17 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
                return ret;
        }
 
-       /* Enable the SMMU interface */
-       enables |= CR0_SMMUEN;
+
+       /* Enable the SMMU interface, or ensure bypass */
+       if (!bypass || disable_bypass) {
+               enables |= CR0_SMMUEN;
+       } else {
+               ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
+               if (ret) {
+                       dev_err(smmu->dev, "GBPA not responding to update\n");
+                       return ret;
+               }
+       }
        ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
                                      ARM_SMMU_CR0ACK);
        if (ret) {
@@ -2597,6 +2568,15 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
        struct resource *res;
        struct arm_smmu_device *smmu;
        struct device *dev = &pdev->dev;
+       bool bypass = true;
+       u32 cells;
+
+       if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
+               dev_err(dev, "missing #iommu-cells property\n");
+       else if (cells != 1)
+               dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
+       else
+               bypass = false;
 
        smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
        if (!smmu) {
@@ -2649,7 +2629,24 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, smmu);
 
        /* Reset the device */
-       return arm_smmu_device_reset(smmu);
+       ret = arm_smmu_device_reset(smmu, bypass);
+       if (ret)
+               return ret;
+
+       /* And we're up. Go go go! */
+       of_iommu_set_ops(dev->of_node, &arm_smmu_ops);
+#ifdef CONFIG_PCI
+       pci_request_acs();
+       ret = bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
+       if (ret)
+               return ret;
+#endif
+#ifdef CONFIG_ARM_AMBA
+       ret = bus_set_iommu(&amba_bustype, &arm_smmu_ops);
+       if (ret)
+               return ret;
+#endif
+       return bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
 }
 
 static int arm_smmu_device_remove(struct platform_device *pdev)
@@ -2677,22 +2674,14 @@ static struct platform_driver arm_smmu_driver = {
 
 static int __init arm_smmu_init(void)
 {
-       struct device_node *np;
-       int ret;
-
-       np = of_find_matching_node(NULL, arm_smmu_of_match);
-       if (!np)
-               return 0;
-
-       of_node_put(np);
-
-       ret = platform_driver_register(&arm_smmu_driver);
-       if (ret)
-               return ret;
-
-       pci_request_acs();
+       static bool registered;
+       int ret = 0;
 
-       return bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
+       if (!registered) {
+               ret = platform_driver_register(&arm_smmu_driver);
+               registered = !ret;
+       }
+       return ret;
 }
 
 static void __exit arm_smmu_exit(void)
@@ -2703,6 +2692,20 @@ static void __exit arm_smmu_exit(void)
 subsys_initcall(arm_smmu_init);
 module_exit(arm_smmu_exit);
 
+static int __init arm_smmu_of_init(struct device_node *np)
+{
+       int ret = arm_smmu_init();
+
+       if (ret)
+               return ret;
+
+       if (!of_platform_device_create(np, NULL, platform_bus_type.dev_root))
+               return -ENODEV;
+
+       return 0;
+}
+IOMMU_OF_DECLARE(arm_smmuv3, "arm,smmu-v3", arm_smmu_of_init);
+
 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
 MODULE_LICENSE("GPL v2");
index 2db74ebc324060683aa39fc51a94b49c60b5daab..c841eb7a1a7417af301e6c51a9ba464d05b1472a 100644 (file)
@@ -28,6 +28,7 @@
 
 #define pr_fmt(fmt) "arm-smmu: " fmt
 
+#include <linux/atomic.h>
 #include <linux/delay.h>
 #include <linux/dma-iommu.h>
 #include <linux/dma-mapping.h>
@@ -40,6 +41,8 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_iommu.h>
 #include <linux/pci.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
 #include "io-pgtable.h"
 
-/* Maximum number of stream IDs assigned to a single device */
-#define MAX_MASTER_STREAMIDS           128
-
 /* Maximum number of context banks per SMMU */
 #define ARM_SMMU_MAX_CBS               128
 
-/* Maximum number of mapping groups per SMMU */
-#define ARM_SMMU_MAX_SMRS              128
-
 /* SMMU global address space */
 #define ARM_SMMU_GR0(smmu)             ((smmu)->base)
 #define ARM_SMMU_GR1(smmu)             ((smmu)->base + (1 << (smmu)->pgshift))
 #define ARM_SMMU_GR0_SMR(n)            (0x800 + ((n) << 2))
 #define SMR_VALID                      (1 << 31)
 #define SMR_MASK_SHIFT                 16
-#define SMR_MASK_MASK                  0x7fff
 #define SMR_ID_SHIFT                   0
-#define SMR_ID_MASK                    0x7fff
 
 #define ARM_SMMU_GR0_S2CR(n)           (0xc00 + ((n) << 2))
 #define S2CR_CBNDX_SHIFT               0
 #define S2CR_CBNDX_MASK                        0xff
 #define S2CR_TYPE_SHIFT                        16
 #define S2CR_TYPE_MASK                 0x3
-#define S2CR_TYPE_TRANS                        (0 << S2CR_TYPE_SHIFT)
-#define S2CR_TYPE_BYPASS               (1 << S2CR_TYPE_SHIFT)
-#define S2CR_TYPE_FAULT                        (2 << S2CR_TYPE_SHIFT)
+enum arm_smmu_s2cr_type {
+       S2CR_TYPE_TRANS,
+       S2CR_TYPE_BYPASS,
+       S2CR_TYPE_FAULT,
+};
 
 #define S2CR_PRIVCFG_SHIFT             24
-#define S2CR_PRIVCFG_UNPRIV            (2 << S2CR_PRIVCFG_SHIFT)
+#define S2CR_PRIVCFG_MASK              0x3
+enum arm_smmu_s2cr_privcfg {
+       S2CR_PRIVCFG_DEFAULT,
+       S2CR_PRIVCFG_DIPAN,
+       S2CR_PRIVCFG_UNPRIV,
+       S2CR_PRIVCFG_PRIV,
+};
 
 /* Context bank attribute registers */
 #define ARM_SMMU_GR1_CBAR(n)           (0x0 + ((n) << 2))
 #define ARM_SMMU_CB_TTBR0              0x20
 #define ARM_SMMU_CB_TTBR1              0x28
 #define ARM_SMMU_CB_TTBCR              0x30
+#define ARM_SMMU_CB_CONTEXTIDR         0x34
 #define ARM_SMMU_CB_S1_MAIR0           0x38
 #define ARM_SMMU_CB_S1_MAIR1           0x3c
 #define ARM_SMMU_CB_PAR                        0x50
 #define SCTLR_AFE                      (1 << 2)
 #define SCTLR_TRE                      (1 << 1)
 #define SCTLR_M                                (1 << 0)
-#define SCTLR_EAE_SBOP                 (SCTLR_AFE | SCTLR_TRE)
 
 #define ARM_MMU500_ACTLR_CPRE          (1 << 1)
 
@@ -296,23 +299,33 @@ enum arm_smmu_implementation {
        CAVIUM_SMMUV2,
 };
 
+struct arm_smmu_s2cr {
+       struct iommu_group              *group;
+       int                             count;
+       enum arm_smmu_s2cr_type         type;
+       enum arm_smmu_s2cr_privcfg      privcfg;
+       u8                              cbndx;
+};
+
+#define s2cr_init_val (struct arm_smmu_s2cr){                          \
+       .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,    \
+}
+
 struct arm_smmu_smr {
-       u8                              idx;
        u16                             mask;
        u16                             id;
+       bool                            valid;
 };
 
 struct arm_smmu_master_cfg {
-       int                             num_streamids;
-       u16                             streamids[MAX_MASTER_STREAMIDS];
-       struct arm_smmu_smr             *smrs;
-};
-
-struct arm_smmu_master {
-       struct device_node              *of_node;
-       struct rb_node                  node;
-       struct arm_smmu_master_cfg      cfg;
+       struct arm_smmu_device          *smmu;
+       s16                             smendx[];
 };
+#define INVALID_SMENDX                 -1
+#define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
+#define fwspec_smmu(fw)  (__fwspec_cfg(fw)->smmu)
+#define for_each_cfg_sme(fw, i, idx) \
+       for (i = 0; idx = __fwspec_cfg(fw)->smendx[i], i < fw->num_ids; ++i)
 
 struct arm_smmu_device {
        struct device                   *dev;
@@ -346,7 +359,11 @@ struct arm_smmu_device {
        atomic_t                        irptndx;
 
        u32                             num_mapping_groups;
-       DECLARE_BITMAP(smr_map, ARM_SMMU_MAX_SMRS);
+       u16                             streamid_mask;
+       u16                             smr_mask_mask;
+       struct arm_smmu_smr             *smrs;
+       struct arm_smmu_s2cr            *s2crs;
+       struct mutex                    stream_map_mutex;
 
        unsigned long                   va_size;
        unsigned long                   ipa_size;
@@ -357,9 +374,6 @@ struct arm_smmu_device {
        u32                             num_context_irqs;
        unsigned int                    *irqs;
 
-       struct list_head                list;
-       struct rb_root                  masters;
-
        u32                             cavium_id_base; /* Specific to Cavium */
 };
 
@@ -397,15 +411,6 @@ struct arm_smmu_domain {
        struct iommu_domain             domain;
 };
 
-struct arm_smmu_phandle_args {
-       struct device_node *np;
-       int args_count;
-       uint32_t args[MAX_MASTER_STREAMIDS];
-};
-
-static DEFINE_SPINLOCK(arm_smmu_devices_lock);
-static LIST_HEAD(arm_smmu_devices);
-
 struct arm_smmu_option_prop {
        u32 opt;
        const char *prop;
@@ -413,6 +418,8 @@ struct arm_smmu_option_prop {
 
 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
 
+static bool using_legacy_binding, using_generic_binding;
+
 static struct arm_smmu_option_prop arm_smmu_options[] = {
        { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
        { 0, NULL},
@@ -444,131 +451,86 @@ static struct device_node *dev_get_dev_node(struct device *dev)
 
                while (!pci_is_root_bus(bus))
                        bus = bus->parent;
-               return bus->bridge->parent->of_node;
+               return of_node_get(bus->bridge->parent->of_node);
        }
 
-       return dev->of_node;
+       return of_node_get(dev->of_node);
 }
 
-static struct arm_smmu_master *find_smmu_master(struct arm_smmu_device *smmu,
-                                               struct device_node *dev_node)
+static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
 {
-       struct rb_node *node = smmu->masters.rb_node;
-
-       while (node) {
-               struct arm_smmu_master *master;
-
-               master = container_of(node, struct arm_smmu_master, node);
-
-               if (dev_node < master->of_node)
-                       node = node->rb_left;
-               else if (dev_node > master->of_node)
-                       node = node->rb_right;
-               else
-                       return master;
-       }
-
-       return NULL;
+       *((__be32 *)data) = cpu_to_be32(alias);
+       return 0; /* Continue walking */
 }
 
-static struct arm_smmu_master_cfg *
-find_smmu_master_cfg(struct device *dev)
+static int __find_legacy_master_phandle(struct device *dev, void *data)
 {
-       struct arm_smmu_master_cfg *cfg = NULL;
-       struct iommu_group *group = iommu_group_get(dev);
-
-       if (group) {
-               cfg = iommu_group_get_iommudata(group);
-               iommu_group_put(group);
-       }
-
-       return cfg;
+       struct of_phandle_iterator *it = *(void **)data;
+       struct device_node *np = it->node;
+       int err;
+
+       of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
+                           "#stream-id-cells", 0)
+               if (it->node == np) {
+                       *(void **)data = dev;
+                       return 1;
+               }
+       it->node = np;
+       return err == -ENOENT ? 0 : err;
 }
 
-static int insert_smmu_master(struct arm_smmu_device *smmu,
-                             struct arm_smmu_master *master)
+static struct platform_driver arm_smmu_driver;
+static struct iommu_ops arm_smmu_ops;
+
+static int arm_smmu_register_legacy_master(struct device *dev,
+                                          struct arm_smmu_device **smmu)
 {
-       struct rb_node **new, *parent;
-
-       new = &smmu->masters.rb_node;
-       parent = NULL;
-       while (*new) {
-               struct arm_smmu_master *this
-                       = container_of(*new, struct arm_smmu_master, node);
-
-               parent = *new;
-               if (master->of_node < this->of_node)
-                       new = &((*new)->rb_left);
-               else if (master->of_node > this->of_node)
-                       new = &((*new)->rb_right);
-               else
-                       return -EEXIST;
+       struct device *smmu_dev;
+       struct device_node *np;
+       struct of_phandle_iterator it;
+       void *data = &it;
+       u32 *sids;
+       __be32 pci_sid;
+       int err;
+
+       np = dev_get_dev_node(dev);
+       if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
+               of_node_put(np);
+               return -ENODEV;
        }
 
-       rb_link_node(&master->node, parent, new);
-       rb_insert_color(&master->node, &smmu->masters);
-       return 0;
-}
-
-static int register_smmu_master(struct arm_smmu_device *smmu,
-                               struct device *dev,
-                               struct arm_smmu_phandle_args *masterspec)
-{
-       int i;
-       struct arm_smmu_master *master;
+       it.node = np;
+       err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
+                                    __find_legacy_master_phandle);
+       smmu_dev = data;
+       of_node_put(np);
+       if (err == 0)
+               return -ENODEV;
+       if (err < 0)
+               return err;
 
-       master = find_smmu_master(smmu, masterspec->np);
-       if (master) {
-               dev_err(dev,
-                       "rejecting multiple registrations for master device %s\n",
-                       masterspec->np->name);
-               return -EBUSY;
+       if (dev_is_pci(dev)) {
+               /* "mmu-masters" assumes Stream ID == Requester ID */
+               pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
+                                      &pci_sid);
+               it.cur = &pci_sid;
+               it.cur_count = 1;
        }
 
-       if (masterspec->args_count > MAX_MASTER_STREAMIDS) {
-               dev_err(dev,
-                       "reached maximum number (%d) of stream IDs for master device %s\n",
-                       MAX_MASTER_STREAMIDS, masterspec->np->name);
-               return -ENOSPC;
-       }
+       err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
+                               &arm_smmu_ops);
+       if (err)
+               return err;
 
-       master = devm_kzalloc(dev, sizeof(*master), GFP_KERNEL);
-       if (!master)
+       sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
+       if (!sids)
                return -ENOMEM;
 
-       master->of_node                 = masterspec->np;
-       master->cfg.num_streamids       = masterspec->args_count;
-
-       for (i = 0; i < master->cfg.num_streamids; ++i) {
-               u16 streamid = masterspec->args[i];
-
-               if (!(smmu->features & ARM_SMMU_FEAT_STREAM_MATCH) &&
-                    (streamid >= smmu->num_mapping_groups)) {
-                       dev_err(dev,
-                               "stream ID for master device %s greater than maximum allowed (%d)\n",
-                               masterspec->np->name, smmu->num_mapping_groups);
-                       return -ERANGE;
-               }
-               master->cfg.streamids[i] = streamid;
-       }
-       return insert_smmu_master(smmu, master);
-}
-
-static struct arm_smmu_device *find_smmu_for_device(struct device *dev)
-{
-       struct arm_smmu_device *smmu;
-       struct arm_smmu_master *master = NULL;
-       struct device_node *dev_node = dev_get_dev_node(dev);
-
-       spin_lock(&arm_smmu_devices_lock);
-       list_for_each_entry(smmu, &arm_smmu_devices, list) {
-               master = find_smmu_master(smmu, dev_node);
-               if (master)
-                       break;
-       }
-       spin_unlock(&arm_smmu_devices_lock);
-
-       return master ? smmu : NULL;
+       *smmu = dev_get_drvdata(smmu_dev);
+       of_phandle_iterator_args(&it, sids, it.cur_count);
+       err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
+       kfree(sids);
+       return err;
 }
 
 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
@@ -738,7 +700,7 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
                                       struct io_pgtable_cfg *pgtbl_cfg)
 {
-       u32 reg;
+       u32 reg, reg2;
        u64 reg64;
        bool stage1;
        struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
@@ -781,14 +743,22 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
 
        /* TTBRs */
        if (stage1) {
-               reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
-
-               reg64 |= ((u64)ARM_SMMU_CB_ASID(smmu, cfg)) << TTBRn_ASID_SHIFT;
-               writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0);
-
-               reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
-               reg64 |= ((u64)ARM_SMMU_CB_ASID(smmu, cfg)) << TTBRn_ASID_SHIFT;
-               writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR1);
+               u16 asid = ARM_SMMU_CB_ASID(smmu, cfg);
+
+               if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
+                       reg = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
+                       writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0);
+                       reg = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
+                       writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1);
+                       writel_relaxed(asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
+               } else {
+                       reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
+                       reg64 |= (u64)asid << TTBRn_ASID_SHIFT;
+                       writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0);
+                       reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
+                       reg64 |= (u64)asid << TTBRn_ASID_SHIFT;
+                       writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR1);
+               }
        } else {
                reg64 = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
                writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0);
@@ -796,28 +766,36 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
 
        /* TTBCR */
        if (stage1) {
-               reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
-               writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR);
-               if (smmu->version > ARM_SMMU_V1) {
-                       reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
-                       reg |= TTBCR2_SEP_UPSTREAM;
-                       writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR2);
+               if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
+                       reg = pgtbl_cfg->arm_v7s_cfg.tcr;
+                       reg2 = 0;
+               } else {
+                       reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
+                       reg2 = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
+                       reg2 |= TTBCR2_SEP_UPSTREAM;
                }
+               if (smmu->version > ARM_SMMU_V1)
+                       writel_relaxed(reg2, cb_base + ARM_SMMU_CB_TTBCR2);
        } else {
                reg = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
-               writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR);
        }
+       writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR);
 
        /* MAIRs (stage-1 only) */
        if (stage1) {
-               reg = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
+               if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
+                       reg = pgtbl_cfg->arm_v7s_cfg.prrr;
+                       reg2 = pgtbl_cfg->arm_v7s_cfg.nmrr;
+               } else {
+                       reg = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
+                       reg2 = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
+               }
                writel_relaxed(reg, cb_base + ARM_SMMU_CB_S1_MAIR0);
-               reg = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
-               writel_relaxed(reg, cb_base + ARM_SMMU_CB_S1_MAIR1);
+               writel_relaxed(reg2, cb_base + ARM_SMMU_CB_S1_MAIR1);
        }
 
        /* SCTLR */
-       reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_M | SCTLR_EAE_SBOP;
+       reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
        if (stage1)
                reg |= SCTLR_S1_ASIDPNE;
 #ifdef __BIG_ENDIAN
@@ -841,12 +819,6 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
        if (smmu_domain->smmu)
                goto out_unlock;
 
-       /* We're bypassing these SIDs, so don't allocate an actual context */
-       if (domain->type == IOMMU_DOMAIN_DMA) {
-               smmu_domain->smmu = smmu;
-               goto out_unlock;
-       }
-
        /*
         * Mapping the requested stage onto what we support is surprisingly
         * complicated, mainly because the spec allows S1+S2 SMMUs without
@@ -880,6 +852,11 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
         */
        if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
                cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
+       if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
+           !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
+           (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
+           (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
+               cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
        if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
            (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
                               ARM_SMMU_FEAT_FMT_AARCH64_16K |
@@ -899,10 +876,14 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
                oas = smmu->ipa_size;
                if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
                        fmt = ARM_64_LPAE_S1;
-               } else {
+               } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
                        fmt = ARM_32_LPAE_S1;
                        ias = min(ias, 32UL);
                        oas = min(oas, 40UL);
+               } else {
+                       fmt = ARM_V7S;
+                       ias = min(ias, 32UL);
+                       oas = min(oas, 32UL);
                }
                break;
        case ARM_SMMU_DOMAIN_NESTED:
@@ -958,6 +939,8 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
 
        /* Update the domain's page sizes to reflect the page table format */
        domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
+       domain->geometry.aperture_end = (1UL << ias) - 1;
+       domain->geometry.force_aperture = true;
 
        /* Initialise the context bank with our page table cfg */
        arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
@@ -996,7 +979,7 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
        void __iomem *cb_base;
        int irq;
 
-       if (!smmu || domain->type == IOMMU_DOMAIN_DMA)
+       if (!smmu)
                return;
 
        /*
@@ -1030,8 +1013,8 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
        if (!smmu_domain)
                return NULL;
 
-       if (type == IOMMU_DOMAIN_DMA &&
-           iommu_get_dma_cookie(&smmu_domain->domain)) {
+       if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
+           iommu_get_dma_cookie(&smmu_domain->domain))) {
                kfree(smmu_domain);
                return NULL;
        }
@@ -1055,162 +1038,197 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
        kfree(smmu_domain);
 }
 
-static int arm_smmu_master_configure_smrs(struct arm_smmu_device *smmu,
-                                         struct arm_smmu_master_cfg *cfg)
+static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
 {
-       int i;
-       struct arm_smmu_smr *smrs;
-       void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
+       struct arm_smmu_smr *smr = smmu->smrs + idx;
+       u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
 
-       if (!(smmu->features & ARM_SMMU_FEAT_STREAM_MATCH))
-               return 0;
+       if (smr->valid)
+               reg |= SMR_VALID;
+       writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
+}
 
-       if (cfg->smrs)
-               return -EEXIST;
+static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
+{
+       struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
+       u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
+                 (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
+                 (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
 
-       smrs = kmalloc_array(cfg->num_streamids, sizeof(*smrs), GFP_KERNEL);
-       if (!smrs) {
-               dev_err(smmu->dev, "failed to allocate %d SMRs\n",
-                       cfg->num_streamids);
-               return -ENOMEM;
-       }
+       writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
+}
 
-       /* Allocate the SMRs on the SMMU */
-       for (i = 0; i < cfg->num_streamids; ++i) {
-               int idx = __arm_smmu_alloc_bitmap(smmu->smr_map, 0,
-                                                 smmu->num_mapping_groups);
-               if (idx < 0) {
-                       dev_err(smmu->dev, "failed to allocate free SMR\n");
-                       goto err_free_smrs;
-               }
+static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
+{
+       arm_smmu_write_s2cr(smmu, idx);
+       if (smmu->smrs)
+               arm_smmu_write_smr(smmu, idx);
+}
 
-               smrs[i] = (struct arm_smmu_smr) {
-                       .idx    = idx,
-                       .mask   = 0, /* We don't currently share SMRs */
-                       .id     = cfg->streamids[i],
-               };
-       }
+static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
+{
+       struct arm_smmu_smr *smrs = smmu->smrs;
+       int i, free_idx = -ENOSPC;
 
-       /* It worked! Now, poke the actual hardware */
-       for (i = 0; i < cfg->num_streamids; ++i) {
-               u32 reg = SMR_VALID | smrs[i].id << SMR_ID_SHIFT |
-                         smrs[i].mask << SMR_MASK_SHIFT;
-               writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_SMR(smrs[i].idx));
-       }
+       /* Stream indexing is blissfully easy */
+       if (!smrs)
+               return id;
 
-       cfg->smrs = smrs;
-       return 0;
+       /* Validating SMRs is... less so */
+       for (i = 0; i < smmu->num_mapping_groups; ++i) {
+               if (!smrs[i].valid) {
+                       /*
+                        * Note the first free entry we come across, which
+                        * we'll claim in the end if nothing else matches.
+                        */
+                       if (free_idx < 0)
+                               free_idx = i;
+                       continue;
+               }
+               /*
+                * If the new entry is _entirely_ matched by an existing entry,
+                * then reuse that, with the guarantee that there also cannot
+                * be any subsequent conflicting entries. In normal use we'd
+                * expect simply identical entries for this case, but there's
+                * no harm in accommodating the generalisation.
+                */
+               if ((mask & smrs[i].mask) == mask &&
+                   !((id ^ smrs[i].id) & ~smrs[i].mask))
+                       return i;
+               /*
+                * If the new entry has any other overlap with an existing one,
+                * though, then there always exists at least one stream ID
+                * which would cause a conflict, and we can't allow that risk.
+                */
+               if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
+                       return -EINVAL;
+       }
 
-err_free_smrs:
-       while (--i >= 0)
-               __arm_smmu_free_bitmap(smmu->smr_map, smrs[i].idx);
-       kfree(smrs);
-       return -ENOSPC;
+       return free_idx;
 }
 
-static void arm_smmu_master_free_smrs(struct arm_smmu_device *smmu,
-                                     struct arm_smmu_master_cfg *cfg)
+static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
 {
-       int i;
-       void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
-       struct arm_smmu_smr *smrs = cfg->smrs;
-
-       if (!smrs)
-               return;
-
-       /* Invalidate the SMRs before freeing back to the allocator */
-       for (i = 0; i < cfg->num_streamids; ++i) {
-               u8 idx = smrs[i].idx;
+       if (--smmu->s2crs[idx].count)
+               return false;
 
-               writel_relaxed(~SMR_VALID, gr0_base + ARM_SMMU_GR0_SMR(idx));
-               __arm_smmu_free_bitmap(smmu->smr_map, idx);
-       }
+       smmu->s2crs[idx] = s2cr_init_val;
+       if (smmu->smrs)
+               smmu->smrs[idx].valid = false;
 
-       cfg->smrs = NULL;
-       kfree(smrs);
+       return true;
 }
 
-static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
-                                     struct arm_smmu_master_cfg *cfg)
+static int arm_smmu_master_alloc_smes(struct device *dev)
 {
-       int i, ret;
-       struct arm_smmu_device *smmu = smmu_domain->smmu;
-       void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
+       struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+       struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
+       struct arm_smmu_device *smmu = cfg->smmu;
+       struct arm_smmu_smr *smrs = smmu->smrs;
+       struct iommu_group *group;
+       int i, idx, ret;
 
-       /*
-        * FIXME: This won't be needed once we have IOMMU-backed DMA ops
-        * for all devices behind the SMMU. Note that we need to take
-        * care configuring SMRs for devices both a platform_device and
-        * and a PCI device (i.e. a PCI host controller)
-        */
-       if (smmu_domain->domain.type == IOMMU_DOMAIN_DMA)
-               return 0;
+       mutex_lock(&smmu->stream_map_mutex);
+       /* Figure out a viable stream map entry allocation */
+       for_each_cfg_sme(fwspec, i, idx) {
+               u16 sid = fwspec->ids[i];
+               u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
 
-       /* Devices in an IOMMU group may already be configured */
-       ret = arm_smmu_master_configure_smrs(smmu, cfg);
-       if (ret)
-               return ret == -EEXIST ? 0 : ret;
+               if (idx != INVALID_SMENDX) {
+                       ret = -EEXIST;
+                       goto out_err;
+               }
 
-       for (i = 0; i < cfg->num_streamids; ++i) {
-               u32 idx, s2cr;
+               ret = arm_smmu_find_sme(smmu, sid, mask);
+               if (ret < 0)
+                       goto out_err;
+
+               idx = ret;
+               if (smrs && smmu->s2crs[idx].count == 0) {
+                       smrs[idx].id = sid;
+                       smrs[idx].mask = mask;
+                       smrs[idx].valid = true;
+               }
+               smmu->s2crs[idx].count++;
+               cfg->smendx[i] = (s16)idx;
+       }
 
-               idx = cfg->smrs ? cfg->smrs[i].idx : cfg->streamids[i];
-               s2cr = S2CR_TYPE_TRANS | S2CR_PRIVCFG_UNPRIV |
-                      (smmu_domain->cfg.cbndx << S2CR_CBNDX_SHIFT);
-               writel_relaxed(s2cr, gr0_base + ARM_SMMU_GR0_S2CR(idx));
+       group = iommu_group_get_for_dev(dev);
+       if (!group)
+               group = ERR_PTR(-ENOMEM);
+       if (IS_ERR(group)) {
+               ret = PTR_ERR(group);
+               goto out_err;
        }
+       iommu_group_put(group);
 
+       /* It worked! Now, poke the actual hardware */
+       for_each_cfg_sme(fwspec, i, idx) {
+               arm_smmu_write_sme(smmu, idx);
+               smmu->s2crs[idx].group = group;
+       }
+
+       mutex_unlock(&smmu->stream_map_mutex);
        return 0;
+
+out_err:
+       while (i--) {
+               arm_smmu_free_sme(smmu, cfg->smendx[i]);
+               cfg->smendx[i] = INVALID_SMENDX;
+       }
+       mutex_unlock(&smmu->stream_map_mutex);
+       return ret;
 }
 
-static void arm_smmu_domain_remove_master(struct arm_smmu_domain *smmu_domain,
-                                         struct arm_smmu_master_cfg *cfg)
+static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
 {
-       int i;
-       struct arm_smmu_device *smmu = smmu_domain->smmu;
-       void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
-
-       /* An IOMMU group is torn down by the first device to be removed */
-       if ((smmu->features & ARM_SMMU_FEAT_STREAM_MATCH) && !cfg->smrs)
-               return;
+       struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
+       struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
+       int i, idx;
 
-       /*
-        * We *must* clear the S2CR first, because freeing the SMR means
-        * that it can be re-allocated immediately.
-        */
-       for (i = 0; i < cfg->num_streamids; ++i) {
-               u32 idx = cfg->smrs ? cfg->smrs[i].idx : cfg->streamids[i];
-               u32 reg = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS;
-
-               writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_S2CR(idx));
+       mutex_lock(&smmu->stream_map_mutex);
+       for_each_cfg_sme(fwspec, i, idx) {
+               if (arm_smmu_free_sme(smmu, idx))
+                       arm_smmu_write_sme(smmu, idx);
+               cfg->smendx[i] = INVALID_SMENDX;
        }
-
-       arm_smmu_master_free_smrs(smmu, cfg);
+       mutex_unlock(&smmu->stream_map_mutex);
 }
 
-static void arm_smmu_detach_dev(struct device *dev,
-                               struct arm_smmu_master_cfg *cfg)
+static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
+                                     struct iommu_fwspec *fwspec)
 {
-       struct iommu_domain *domain = dev->archdata.iommu;
-       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+       struct arm_smmu_device *smmu = smmu_domain->smmu;
+       struct arm_smmu_s2cr *s2cr = smmu->s2crs;
+       enum arm_smmu_s2cr_type type = S2CR_TYPE_TRANS;
+       u8 cbndx = smmu_domain->cfg.cbndx;
+       int i, idx;
+
+       for_each_cfg_sme(fwspec, i, idx) {
+               if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
+                       continue;
 
-       dev->archdata.iommu = NULL;
-       arm_smmu_domain_remove_master(smmu_domain, cfg);
+               s2cr[idx].type = type;
+               s2cr[idx].privcfg = S2CR_PRIVCFG_UNPRIV;
+               s2cr[idx].cbndx = cbndx;
+               arm_smmu_write_s2cr(smmu, idx);
+       }
+       return 0;
 }
 
 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 {
        int ret;
-       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+       struct iommu_fwspec *fwspec = dev->iommu_fwspec;
        struct arm_smmu_device *smmu;
-       struct arm_smmu_master_cfg *cfg;
+       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 
-       smmu = find_smmu_for_device(dev);
-       if (!smmu) {
+       if (!fwspec || fwspec->ops != &arm_smmu_ops) {
                dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
                return -ENXIO;
        }
 
+       smmu = fwspec_smmu(fwspec);
        /* Ensure that the domain is finalised */
        ret = arm_smmu_init_domain_context(domain, smmu);
        if (ret < 0)
@@ -1228,18 +1246,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
        }
 
        /* Looks ok, so add the device to the domain */
-       cfg = find_smmu_master_cfg(dev);
-       if (!cfg)
-               return -ENODEV;
-
-       /* Detach the dev from its current domain */
-       if (dev->archdata.iommu)
-               arm_smmu_detach_dev(dev, cfg);
-
-       ret = arm_smmu_domain_add_master(smmu_domain, cfg);
-       if (!ret)
-               dev->archdata.iommu = domain;
-       return ret;
+       return arm_smmu_domain_add_master(smmu_domain, fwspec);
 }
 
 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
@@ -1358,110 +1365,113 @@ static bool arm_smmu_capable(enum iommu_cap cap)
        }
 }
 
-static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
+static int arm_smmu_match_node(struct device *dev, void *data)
 {
-       *((u16 *)data) = alias;
-       return 0; /* Continue walking */
+       return dev->of_node == data;
 }
 
-static void __arm_smmu_release_pci_iommudata(void *data)
+static struct arm_smmu_device *arm_smmu_get_by_node(struct device_node *np)
 {
-       kfree(data);
+       struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
+                                               np, arm_smmu_match_node);
+       put_device(dev);
+       return dev ? dev_get_drvdata(dev) : NULL;
 }
 
-static int arm_smmu_init_pci_device(struct pci_dev *pdev,
-                                   struct iommu_group *group)
+static int arm_smmu_add_device(struct device *dev)
 {
+       struct arm_smmu_device *smmu;
        struct arm_smmu_master_cfg *cfg;
-       u16 sid;
-       int i;
-
-       cfg = iommu_group_get_iommudata(group);
-       if (!cfg) {
-               cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
-               if (!cfg)
-                       return -ENOMEM;
+       struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+       int i, ret;
 
-               iommu_group_set_iommudata(group, cfg,
-                                         __arm_smmu_release_pci_iommudata);
+       if (using_legacy_binding) {
+               ret = arm_smmu_register_legacy_master(dev, &smmu);
+               fwspec = dev->iommu_fwspec;
+               if (ret)
+                       goto out_free;
+       } else if (fwspec) {
+               smmu = arm_smmu_get_by_node(to_of_node(fwspec->iommu_fwnode));
+       } else {
+               return -ENODEV;
        }
 
-       if (cfg->num_streamids >= MAX_MASTER_STREAMIDS)
-               return -ENOSPC;
+       ret = -EINVAL;
+       for (i = 0; i < fwspec->num_ids; i++) {
+               u16 sid = fwspec->ids[i];
+               u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
 
-       /*
-        * Assume Stream ID == Requester ID for now.
-        * We need a way to describe the ID mappings in FDT.
-        */
-       pci_for_each_dma_alias(pdev, __arm_smmu_get_pci_sid, &sid);
-       for (i = 0; i < cfg->num_streamids; ++i)
-               if (cfg->streamids[i] == sid)
-                       break;
-
-       /* Avoid duplicate SIDs, as this can lead to SMR conflicts */
-       if (i == cfg->num_streamids)
-               cfg->streamids[cfg->num_streamids++] = sid;
-
-       return 0;
-}
-
-static int arm_smmu_init_platform_device(struct device *dev,
-                                        struct iommu_group *group)
-{
-       struct arm_smmu_device *smmu = find_smmu_for_device(dev);
-       struct arm_smmu_master *master;
+               if (sid & ~smmu->streamid_mask) {
+                       dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
+                               sid, smmu->streamid_mask);
+                       goto out_free;
+               }
+               if (mask & ~smmu->smr_mask_mask) {
+                       dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
+                               sid, smmu->smr_mask_mask);
+                       goto out_free;
+               }
+       }
 
-       if (!smmu)
-               return -ENODEV;
+       ret = -ENOMEM;
+       cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
+                     GFP_KERNEL);
+       if (!cfg)
+               goto out_free;
 
-       master = find_smmu_master(smmu, dev->of_node);
-       if (!master)
-               return -ENODEV;
+       cfg->smmu = smmu;
+       fwspec->iommu_priv = cfg;
+       while (i--)
+               cfg->smendx[i] = INVALID_SMENDX;
 
-       iommu_group_set_iommudata(group, &master->cfg, NULL);
+       ret = arm_smmu_master_alloc_smes(dev);
+       if (ret)
+               goto out_free;
 
        return 0;
-}
 
-static int arm_smmu_add_device(struct device *dev)
-{
-       struct iommu_group *group;
-
-       group = iommu_group_get_for_dev(dev);
-       if (IS_ERR(group))
-               return PTR_ERR(group);
-
-       iommu_group_put(group);
-       return 0;
+out_free:
+       if (fwspec)
+               kfree(fwspec->iommu_priv);
+       iommu_fwspec_free(dev);
+       return ret;
 }
 
 static void arm_smmu_remove_device(struct device *dev)
 {
+       struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+
+       if (!fwspec || fwspec->ops != &arm_smmu_ops)
+               return;
+
+       arm_smmu_master_free_smes(fwspec);
        iommu_group_remove_device(dev);
+       kfree(fwspec->iommu_priv);
+       iommu_fwspec_free(dev);
 }
 
 static struct iommu_group *arm_smmu_device_group(struct device *dev)
 {
-       struct iommu_group *group;
-       int ret;
+       struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+       struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
+       struct iommu_group *group = NULL;
+       int i, idx;
 
-       if (dev_is_pci(dev))
-               group = pci_device_group(dev);
-       else
-               group = generic_device_group(dev);
+       for_each_cfg_sme(fwspec, i, idx) {
+               if (group && smmu->s2crs[idx].group &&
+                   group != smmu->s2crs[idx].group)
+                       return ERR_PTR(-EINVAL);
+
+               group = smmu->s2crs[idx].group;
+       }
 
-       if (IS_ERR(group))
+       if (group)
                return group;
 
        if (dev_is_pci(dev))
-               ret = arm_smmu_init_pci_device(to_pci_dev(dev), group);
+               group = pci_device_group(dev);
        else
-               ret = arm_smmu_init_platform_device(dev, group);
-
-       if (ret) {
-               iommu_group_put(group);
-               group = ERR_PTR(ret);
-       }
+               group = generic_device_group(dev);
 
        return group;
 }
@@ -1510,6 +1520,19 @@ out_unlock:
        return ret;
 }
 
+static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
+{
+       u32 fwid = 0;
+
+       if (args->args_count > 0)
+               fwid |= (u16)args->args[0];
+
+       if (args->args_count > 1)
+               fwid |= (u16)args->args[1] << SMR_MASK_SHIFT;
+
+       return iommu_fwspec_add_ids(dev, &fwid, 1);
+}
+
 static struct iommu_ops arm_smmu_ops = {
        .capable                = arm_smmu_capable,
        .domain_alloc           = arm_smmu_domain_alloc,
@@ -1524,6 +1547,7 @@ static struct iommu_ops arm_smmu_ops = {
        .device_group           = arm_smmu_device_group,
        .domain_get_attr        = arm_smmu_domain_get_attr,
        .domain_set_attr        = arm_smmu_domain_set_attr,
+       .of_xlate               = arm_smmu_of_xlate,
        .pgsize_bitmap          = -1UL, /* Restricted during device attach */
 };
 
@@ -1531,19 +1555,19 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
 {
        void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
        void __iomem *cb_base;
-       int i = 0;
+       int i;
        u32 reg, major;
 
        /* clear global FSR */
        reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
        writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
 
-       /* Mark all SMRn as invalid and all S2CRn as bypass unless overridden */
-       reg = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS;
-       for (i = 0; i < smmu->num_mapping_groups; ++i) {
-               writel_relaxed(0, gr0_base + ARM_SMMU_GR0_SMR(i));
-               writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_S2CR(i));
-       }
+       /*
+        * Reset stream mapping groups: Initial values mark all SMRn as
+        * invalid and all S2CRn as bypass unless overridden.
+        */
+       for (i = 0; i < smmu->num_mapping_groups; ++i)
+               arm_smmu_write_sme(smmu, i);
 
        /*
         * Before clearing ARM_MMU500_ACTLR_CPRE, need to
@@ -1632,6 +1656,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
        void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
        u32 id;
        bool cttw_dt, cttw_reg;
+       int i;
 
        dev_notice(smmu->dev, "probing hardware configuration...\n");
        dev_notice(smmu->dev, "SMMUv%d with:\n",
@@ -1690,39 +1715,55 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
                dev_notice(smmu->dev,
                           "\t(IDR0.CTTW overridden by dma-coherent property)\n");
 
+       /* Max. number of entries we have for stream matching/indexing */
+       size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
+       smmu->streamid_mask = size - 1;
        if (id & ID0_SMS) {
-               u32 smr, sid, mask;
+               u32 smr;
 
                smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
-               smmu->num_mapping_groups = (id >> ID0_NUMSMRG_SHIFT) &
-                                          ID0_NUMSMRG_MASK;
-               if (smmu->num_mapping_groups == 0) {
+               size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
+               if (size == 0) {
                        dev_err(smmu->dev,
                                "stream-matching supported, but no SMRs present!\n");
                        return -ENODEV;
                }
 
-               smr = SMR_MASK_MASK << SMR_MASK_SHIFT;
-               smr |= (SMR_ID_MASK << SMR_ID_SHIFT);
+               /*
+                * SMR.ID bits may not be preserved if the corresponding MASK
+                * bits are set, so check each one separately. We can reject
+                * masters later if they try to claim IDs outside these masks.
+                */
+               smr = smmu->streamid_mask << SMR_ID_SHIFT;
                writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
                smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
+               smmu->streamid_mask = smr >> SMR_ID_SHIFT;
 
-               mask = (smr >> SMR_MASK_SHIFT) & SMR_MASK_MASK;
-               sid = (smr >> SMR_ID_SHIFT) & SMR_ID_MASK;
-               if ((mask & sid) != sid) {
-                       dev_err(smmu->dev,
-                               "SMR mask bits (0x%x) insufficient for ID field (0x%x)\n",
-                               mask, sid);
-                       return -ENODEV;
-               }
+               smr = smmu->streamid_mask << SMR_MASK_SHIFT;
+               writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
+               smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
+               smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
+
+               /* Zero-initialised to mark as invalid */
+               smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
+                                         GFP_KERNEL);
+               if (!smmu->smrs)
+                       return -ENOMEM;
 
                dev_notice(smmu->dev,
-                          "\tstream matching with %u register groups, mask 0x%x",
-                          smmu->num_mapping_groups, mask);
-       } else {
-               smmu->num_mapping_groups = (id >> ID0_NUMSIDB_SHIFT) &
-                                          ID0_NUMSIDB_MASK;
+                          "\tstream matching with %lu register groups, mask 0x%x",
+                          size, smmu->smr_mask_mask);
        }
+       /* s2cr->type == 0 means translation, so initialise explicitly */
+       smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
+                                        GFP_KERNEL);
+       if (!smmu->s2crs)
+               return -ENOMEM;
+       for (i = 0; i < size; i++)
+               smmu->s2crs[i] = s2cr_init_val;
+
+       smmu->num_mapping_groups = size;
+       mutex_init(&smmu->stream_map_mutex);
 
        if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
                smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
@@ -1855,15 +1896,24 @@ MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
 
 static int arm_smmu_device_dt_probe(struct platform_device *pdev)
 {
-       const struct of_device_id *of_id;
        const struct arm_smmu_match_data *data;
        struct resource *res;
        struct arm_smmu_device *smmu;
        struct device *dev = &pdev->dev;
-       struct rb_node *node;
-       struct of_phandle_iterator it;
-       struct arm_smmu_phandle_args *masterspec;
        int num_irqs, i, err;
+       bool legacy_binding;
+
+       legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
+       if (legacy_binding && !using_generic_binding) {
+               if (!using_legacy_binding)
+                       pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
+               using_legacy_binding = true;
+       } else if (!legacy_binding && !using_legacy_binding) {
+               using_generic_binding = true;
+       } else {
+               dev_err(dev, "not probing due to mismatched DT properties\n");
+               return -ENODEV;
+       }
 
        smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
        if (!smmu) {
@@ -1872,8 +1922,7 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
        }
        smmu->dev = dev;
 
-       of_id = of_match_node(arm_smmu_of_match, dev->of_node);
-       data = of_id->data;
+       data = of_device_get_match_data(dev);
        smmu->version = data->version;
        smmu->model = data->model;
 
@@ -1923,37 +1972,6 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
        if (err)
                return err;
 
-       i = 0;
-       smmu->masters = RB_ROOT;
-
-       err = -ENOMEM;
-       /* No need to zero the memory for masterspec */
-       masterspec = kmalloc(sizeof(*masterspec), GFP_KERNEL);
-       if (!masterspec)
-               goto out_put_masters;
-
-       of_for_each_phandle(&it, err, dev->of_node,
-                           "mmu-masters", "#stream-id-cells", 0) {
-               int count = of_phandle_iterator_args(&it, masterspec->args,
-                                                    MAX_MASTER_STREAMIDS);
-               masterspec->np          = of_node_get(it.node);
-               masterspec->args_count  = count;
-
-               err = register_smmu_master(smmu, dev, masterspec);
-               if (err) {
-                       dev_err(dev, "failed to add master %s\n",
-                               masterspec->np->name);
-                       kfree(masterspec);
-                       goto out_put_masters;
-               }
-
-               i++;
-       }
-
-       dev_notice(dev, "registered %d master devices\n", i);
-
-       kfree(masterspec);
-
        parse_driver_options(smmu);
 
        if (smmu->version == ARM_SMMU_V2 &&
@@ -1961,8 +1979,7 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
                dev_err(dev,
                        "found only %d context interrupt(s) but %d required\n",
                        smmu->num_context_irqs, smmu->num_context_banks);
-               err = -ENODEV;
-               goto out_put_masters;
+               return -ENODEV;
        }
 
        for (i = 0; i < smmu->num_global_irqs; ++i) {
@@ -1974,59 +1991,39 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
                if (err) {
                        dev_err(dev, "failed to request global IRQ %d (%u)\n",
                                i, smmu->irqs[i]);
-                       goto out_put_masters;
+                       return err;
                }
        }
 
-       INIT_LIST_HEAD(&smmu->list);
-       spin_lock(&arm_smmu_devices_lock);
-       list_add(&smmu->list, &arm_smmu_devices);
-       spin_unlock(&arm_smmu_devices_lock);
-
+       of_iommu_set_ops(dev->of_node, &arm_smmu_ops);
+       platform_set_drvdata(pdev, smmu);
        arm_smmu_device_reset(smmu);
-       return 0;
 
-out_put_masters:
-       for (node = rb_first(&smmu->masters); node; node = rb_next(node)) {
-               struct arm_smmu_master *master
-                       = container_of(node, struct arm_smmu_master, node);
-               of_node_put(master->of_node);
+       /* Oh, for a proper bus abstraction */
+       if (!iommu_present(&platform_bus_type))
+               bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
+#ifdef CONFIG_ARM_AMBA
+       if (!iommu_present(&amba_bustype))
+               bus_set_iommu(&amba_bustype, &arm_smmu_ops);
+#endif
+#ifdef CONFIG_PCI
+       if (!iommu_present(&pci_bus_type)) {
+               pci_request_acs();
+               bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
        }
-
-       return err;
+#endif
+       return 0;
 }
 
 static int arm_smmu_device_remove(struct platform_device *pdev)
 {
-       int i;
-       struct device *dev = &pdev->dev;
-       struct arm_smmu_device *curr, *smmu = NULL;
-       struct rb_node *node;
-
-       spin_lock(&arm_smmu_devices_lock);
-       list_for_each_entry(curr, &arm_smmu_devices, list) {
-               if (curr->dev == dev) {
-                       smmu = curr;
-                       list_del(&smmu->list);
-                       break;
-               }
-       }
-       spin_unlock(&arm_smmu_devices_lock);
+       struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
 
        if (!smmu)
                return -ENODEV;
 
-       for (node = rb_first(&smmu->masters); node; node = rb_next(node)) {
-               struct arm_smmu_master *master
-                       = container_of(node, struct arm_smmu_master, node);
-               of_node_put(master->of_node);
-       }
-
        if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
-               dev_err(dev, "removing device with active domains!\n");
-
-       for (i = 0; i < smmu->num_global_irqs; ++i)
-               devm_free_irq(smmu->dev, smmu->irqs[i], smmu);
+               dev_err(&pdev->dev, "removing device with active domains!\n");
 
        /* Turn the thing off */
        writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
@@ -2044,41 +2041,14 @@ static struct platform_driver arm_smmu_driver = {
 
 static int __init arm_smmu_init(void)
 {
-       struct device_node *np;
-       int ret;
-
-       /*
-        * Play nice with systems that don't have an ARM SMMU by checking that
-        * an ARM SMMU exists in the system before proceeding with the driver
-        * and IOMMU bus operation registration.
-        */
-       np = of_find_matching_node(NULL, arm_smmu_of_match);
-       if (!np)
-               return 0;
-
-       of_node_put(np);
-
-       ret = platform_driver_register(&arm_smmu_driver);
-       if (ret)
-               return ret;
-
-       /* Oh, for a proper bus abstraction */
-       if (!iommu_present(&platform_bus_type))
-               bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
-
-#ifdef CONFIG_ARM_AMBA
-       if (!iommu_present(&amba_bustype))
-               bus_set_iommu(&amba_bustype, &arm_smmu_ops);
-#endif
+       static bool registered;
+       int ret = 0;
 
-#ifdef CONFIG_PCI
-       if (!iommu_present(&pci_bus_type)) {
-               pci_request_acs();
-               bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
+       if (!registered) {
+               ret = platform_driver_register(&arm_smmu_driver);
+               registered = !ret;
        }
-#endif
-
-       return 0;
+       return ret;
 }
 
 static void __exit arm_smmu_exit(void)
@@ -2089,6 +2059,25 @@ static void __exit arm_smmu_exit(void)
 subsys_initcall(arm_smmu_init);
 module_exit(arm_smmu_exit);
 
+static int __init arm_smmu_of_init(struct device_node *np)
+{
+       int ret = arm_smmu_init();
+
+       if (ret)
+               return ret;
+
+       if (!of_platform_device_create(np, NULL, platform_bus_type.dev_root))
+               return -ENODEV;
+
+       return 0;
+}
+IOMMU_OF_DECLARE(arm_smmuv1, "arm,smmu-v1", arm_smmu_of_init);
+IOMMU_OF_DECLARE(arm_smmuv2, "arm,smmu-v2", arm_smmu_of_init);
+IOMMU_OF_DECLARE(arm_mmu400, "arm,mmu-400", arm_smmu_of_init);
+IOMMU_OF_DECLARE(arm_mmu401, "arm,mmu-401", arm_smmu_of_init);
+IOMMU_OF_DECLARE(arm_mmu500, "arm,mmu-500", arm_smmu_of_init);
+IOMMU_OF_DECLARE(cavium_smmuv2, "cavium,smmu-v2", arm_smmu_of_init);
+
 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
 MODULE_LICENSE("GPL v2");
index 00c8a08d56e722349c64eff91b9ae9bb5a59a0bf..c5ab8667e6f2e6e4c8390e298cc1a27be78e6bfb 100644 (file)
 #include <linux/huge_mm.h>
 #include <linux/iommu.h>
 #include <linux/iova.h>
+#include <linux/irq.h>
 #include <linux/mm.h>
+#include <linux/pci.h>
 #include <linux/scatterlist.h>
 #include <linux/vmalloc.h>
 
+struct iommu_dma_msi_page {
+       struct list_head        list;
+       dma_addr_t              iova;
+       phys_addr_t             phys;
+};
+
+struct iommu_dma_cookie {
+       struct iova_domain      iovad;
+       struct list_head        msi_page_list;
+       spinlock_t              msi_lock;
+};
+
+static inline struct iova_domain *cookie_iovad(struct iommu_domain *domain)
+{
+       return &((struct iommu_dma_cookie *)domain->iova_cookie)->iovad;
+}
+
 int iommu_dma_init(void)
 {
        return iova_cache_get();
@@ -43,15 +62,19 @@ int iommu_dma_init(void)
  */
 int iommu_get_dma_cookie(struct iommu_domain *domain)
 {
-       struct iova_domain *iovad;
+       struct iommu_dma_cookie *cookie;
 
        if (domain->iova_cookie)
                return -EEXIST;
 
-       iovad = kzalloc(sizeof(*iovad), GFP_KERNEL);
-       domain->iova_cookie = iovad;
+       cookie = kzalloc(sizeof(*cookie), GFP_KERNEL);
+       if (!cookie)
+               return -ENOMEM;
 
-       return iovad ? 0 : -ENOMEM;
+       spin_lock_init(&cookie->msi_lock);
+       INIT_LIST_HEAD(&cookie->msi_page_list);
+       domain->iova_cookie = cookie;
+       return 0;
 }
 EXPORT_SYMBOL(iommu_get_dma_cookie);
 
@@ -63,32 +86,58 @@ EXPORT_SYMBOL(iommu_get_dma_cookie);
  */
 void iommu_put_dma_cookie(struct iommu_domain *domain)
 {
-       struct iova_domain *iovad = domain->iova_cookie;
+       struct iommu_dma_cookie *cookie = domain->iova_cookie;
+       struct iommu_dma_msi_page *msi, *tmp;
 
-       if (!iovad)
+       if (!cookie)
                return;
 
-       if (iovad->granule)
-               put_iova_domain(iovad);
-       kfree(iovad);
+       if (cookie->iovad.granule)
+               put_iova_domain(&cookie->iovad);
+
+       list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) {
+               list_del(&msi->list);
+               kfree(msi);
+       }
+       kfree(cookie);
        domain->iova_cookie = NULL;
 }
 EXPORT_SYMBOL(iommu_put_dma_cookie);
 
+static void iova_reserve_pci_windows(struct pci_dev *dev,
+               struct iova_domain *iovad)
+{
+       struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus);
+       struct resource_entry *window;
+       unsigned long lo, hi;
+
+       resource_list_for_each_entry(window, &bridge->windows) {
+               if (resource_type(window->res) != IORESOURCE_MEM &&
+                   resource_type(window->res) != IORESOURCE_IO)
+                       continue;
+
+               lo = iova_pfn(iovad, window->res->start - window->offset);
+               hi = iova_pfn(iovad, window->res->end - window->offset);
+               reserve_iova(iovad, lo, hi);
+       }
+}
+
 /**
  * iommu_dma_init_domain - Initialise a DMA mapping domain
  * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
  * @base: IOVA at which the mappable address space starts
  * @size: Size of IOVA space
+ * @dev: Device the domain is being initialised for
  *
  * @base and @size should be exact multiples of IOMMU page granularity to
  * avoid rounding surprises. If necessary, we reserve the page at address 0
  * to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but
  * any change which could make prior IOVAs invalid will fail.
  */
-int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, u64 size)
+int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
+               u64 size, struct device *dev)
 {
-       struct iova_domain *iovad = domain->iova_cookie;
+       struct iova_domain *iovad = cookie_iovad(domain);
        unsigned long order, base_pfn, end_pfn;
 
        if (!iovad)
@@ -124,6 +173,8 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, u64 size
                iovad->dma_32bit_pfn = end_pfn;
        } else {
                init_iova_domain(iovad, 1UL << order, base_pfn, end_pfn);
+               if (dev && dev_is_pci(dev))
+                       iova_reserve_pci_windows(to_pci_dev(dev), iovad);
        }
        return 0;
 }
@@ -155,7 +206,7 @@ int dma_direction_to_prot(enum dma_data_direction dir, bool coherent)
 static struct iova *__alloc_iova(struct iommu_domain *domain, size_t size,
                dma_addr_t dma_limit)
 {
-       struct iova_domain *iovad = domain->iova_cookie;
+       struct iova_domain *iovad = cookie_iovad(domain);
        unsigned long shift = iova_shift(iovad);
        unsigned long length = iova_align(iovad, size) >> shift;
 
@@ -171,7 +222,7 @@ static struct iova *__alloc_iova(struct iommu_domain *domain, size_t size,
 /* The IOVA allocator knows what we mapped, so just unmap whatever that was */
 static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr)
 {
-       struct iova_domain *iovad = domain->iova_cookie;
+       struct iova_domain *iovad = cookie_iovad(domain);
        unsigned long shift = iova_shift(iovad);
        unsigned long pfn = dma_addr >> shift;
        struct iova *iova = find_iova(iovad, pfn);
@@ -294,7 +345,7 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
                void (*flush_page)(struct device *, const void *, phys_addr_t))
 {
        struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
-       struct iova_domain *iovad = domain->iova_cookie;
+       struct iova_domain *iovad = cookie_iovad(domain);
        struct iova *iova;
        struct page **pages;
        struct sg_table sgt;
@@ -386,7 +437,7 @@ dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
 {
        dma_addr_t dma_addr;
        struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
-       struct iova_domain *iovad = domain->iova_cookie;
+       struct iova_domain *iovad = cookie_iovad(domain);
        phys_addr_t phys = page_to_phys(page) + offset;
        size_t iova_off = iova_offset(iovad, phys);
        size_t len = iova_align(iovad, size + iova_off);
@@ -495,7 +546,7 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
                int nents, int prot)
 {
        struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
-       struct iova_domain *iovad = domain->iova_cookie;
+       struct iova_domain *iovad = cookie_iovad(domain);
        struct iova *iova;
        struct scatterlist *s, *prev = NULL;
        dma_addr_t dma_addr;
@@ -587,3 +638,81 @@ int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
        return dma_addr == DMA_ERROR_CODE;
 }
+
+static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
+               phys_addr_t msi_addr, struct iommu_domain *domain)
+{
+       struct iommu_dma_cookie *cookie = domain->iova_cookie;
+       struct iommu_dma_msi_page *msi_page;
+       struct iova_domain *iovad = &cookie->iovad;
+       struct iova *iova;
+       int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
+
+       msi_addr &= ~(phys_addr_t)iova_mask(iovad);
+       list_for_each_entry(msi_page, &cookie->msi_page_list, list)
+               if (msi_page->phys == msi_addr)
+                       return msi_page;
+
+       msi_page = kzalloc(sizeof(*msi_page), GFP_ATOMIC);
+       if (!msi_page)
+               return NULL;
+
+       iova = __alloc_iova(domain, iovad->granule, dma_get_mask(dev));
+       if (!iova)
+               goto out_free_page;
+
+       msi_page->phys = msi_addr;
+       msi_page->iova = iova_dma_addr(iovad, iova);
+       if (iommu_map(domain, msi_page->iova, msi_addr, iovad->granule, prot))
+               goto out_free_iova;
+
+       INIT_LIST_HEAD(&msi_page->list);
+       list_add(&msi_page->list, &cookie->msi_page_list);
+       return msi_page;
+
+out_free_iova:
+       __free_iova(iovad, iova);
+out_free_page:
+       kfree(msi_page);
+       return NULL;
+}
+
+void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg)
+{
+       struct device *dev = msi_desc_to_dev(irq_get_msi_desc(irq));
+       struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+       struct iommu_dma_cookie *cookie;
+       struct iommu_dma_msi_page *msi_page;
+       phys_addr_t msi_addr = (u64)msg->address_hi << 32 | msg->address_lo;
+       unsigned long flags;
+
+       if (!domain || !domain->iova_cookie)
+               return;
+
+       cookie = domain->iova_cookie;
+
+       /*
+        * We disable IRQs to rule out a possible inversion against
+        * irq_desc_lock if, say, someone tries to retarget the affinity
+        * of an MSI from within an IPI handler.
+        */
+       spin_lock_irqsave(&cookie->msi_lock, flags);
+       msi_page = iommu_dma_get_msi_page(dev, msi_addr, domain);
+       spin_unlock_irqrestore(&cookie->msi_lock, flags);
+
+       if (WARN_ON(!msi_page)) {
+               /*
+                * We're called from a void callback, so the best we can do is
+                * 'fail' by filling the message with obviously bogus values.
+                * Since we got this far due to an IOMMU being present, it's
+                * not like the existing address would have worked anyway...
+                */
+               msg->address_hi = ~0U;
+               msg->address_lo = ~0U;
+               msg->data = ~0U;
+       } else {
+               msg->address_hi = upper_32_bits(msi_page->iova);
+               msg->address_lo &= iova_mask(&cookie->iovad);
+               msg->address_lo += lower_32_bits(msi_page->iova);
+       }
+}
index 33dcc29ec200c19d09c2fbe824ea05356cc62875..30808e91b7757677d2723818420293ea87ddc082 100644 (file)
@@ -1345,8 +1345,8 @@ static int __init exynos_iommu_of_setup(struct device_node *np)
                exynos_iommu_init();
 
        pdev = of_platform_device_create(np, NULL, platform_bus_type.dev_root);
-       if (IS_ERR(pdev))
-               return PTR_ERR(pdev);
+       if (!pdev)
+               return -ENODEV;
 
        /*
         * use the first registered sysmmu device for performing
index ebb5bf3ddbd9424586b4d22068bd7a41656881b4..a4407eabf0e64fbacba5573bc3eb91204c97a19c 100644 (file)
@@ -2452,20 +2452,15 @@ static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
        return 0;
 }
 
-/* domain is initialized */
-static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
+static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
 {
        struct device_domain_info *info = NULL;
-       struct dmar_domain *domain, *tmp;
+       struct dmar_domain *domain = NULL;
        struct intel_iommu *iommu;
        u16 req_id, dma_alias;
        unsigned long flags;
        u8 bus, devfn;
 
-       domain = find_domain(dev);
-       if (domain)
-               return domain;
-
        iommu = device_to_iommu(dev, &bus, &devfn);
        if (!iommu)
                return NULL;
@@ -2487,9 +2482,9 @@ static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
                }
                spin_unlock_irqrestore(&device_domain_lock, flags);
 
-               /* DMA alias already has a domain, uses it */
+               /* DMA alias already has a domain, use it */
                if (info)
-                       goto found_domain;
+                       goto out;
        }
 
        /* Allocate and initialize new domain for the device */
@@ -2501,28 +2496,67 @@ static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
                return NULL;
        }
 
-       /* register PCI DMA alias device */
-       if (dev_is_pci(dev) && req_id != dma_alias) {
-               tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
-                                              dma_alias & 0xff, NULL, domain);
+out:
 
-               if (!tmp || tmp != domain) {
-                       domain_exit(domain);
-                       domain = tmp;
-               }
+       return domain;
+}
 
-               if (!domain)
-                       return NULL;
+static struct dmar_domain *set_domain_for_dev(struct device *dev,
+                                             struct dmar_domain *domain)
+{
+       struct intel_iommu *iommu;
+       struct dmar_domain *tmp;
+       u16 req_id, dma_alias;
+       u8 bus, devfn;
+
+       iommu = device_to_iommu(dev, &bus, &devfn);
+       if (!iommu)
+               return NULL;
+
+       req_id = ((u16)bus << 8) | devfn;
+
+       if (dev_is_pci(dev)) {
+               struct pci_dev *pdev = to_pci_dev(dev);
+
+               pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
+
+               /* register PCI DMA alias device */
+               if (req_id != dma_alias) {
+                       tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
+                                       dma_alias & 0xff, NULL, domain);
+
+                       if (!tmp || tmp != domain)
+                               return tmp;
+               }
        }
 
-found_domain:
        tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
+       if (!tmp || tmp != domain)
+               return tmp;
+
+       return domain;
+}
 
-       if (!tmp || tmp != domain) {
+static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
+{
+       struct dmar_domain *domain, *tmp;
+
+       domain = find_domain(dev);
+       if (domain)
+               goto out;
+
+       domain = find_or_alloc_domain(dev, gaw);
+       if (!domain)
+               goto out;
+
+       tmp = set_domain_for_dev(dev, domain);
+       if (!tmp || domain != tmp) {
                domain_exit(domain);
                domain = tmp;
        }
 
+out:
+
        return domain;
 }
 
@@ -3394,17 +3428,18 @@ static unsigned long intel_alloc_iova(struct device *dev,
 
 static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
 {
+       struct dmar_domain *domain, *tmp;
        struct dmar_rmrr_unit *rmrr;
-       struct dmar_domain *domain;
        struct device *i_dev;
        int i, ret;
 
-       domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
-       if (!domain) {
-               pr_err("Allocating domain for %s failed\n",
-                      dev_name(dev));
-               return NULL;
-       }
+       domain = find_domain(dev);
+       if (domain)
+               goto out;
+
+       domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
+       if (!domain)
+               goto out;
 
        /* We have a new domain - setup possible RMRRs for the device */
        rcu_read_lock();
@@ -3423,6 +3458,18 @@ static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
        }
        rcu_read_unlock();
 
+       tmp = set_domain_for_dev(dev, domain);
+       if (!tmp || domain != tmp) {
+               domain_exit(domain);
+               domain = tmp;
+       }
+
+out:
+
+       if (!domain)
+               pr_err("Allocating domain for %s failed\n", dev_name(dev));
+
+
        return domain;
 }
 
index def8ca1c982d5869a358ca69467a6f13763c46fb..f50e51c1a9c88630b03ca9095a2d6e732f671aee 100644 (file)
@@ -633,6 +633,10 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
 {
        struct arm_v7s_io_pgtable *data;
 
+#ifdef PHYS_OFFSET
+       if (upper_32_bits(PHYS_OFFSET))
+               return NULL;
+#endif
        if (cfg->ias > ARM_V7S_ADDR_BITS || cfg->oas > ARM_V7S_ADDR_BITS)
                return NULL;
 
index b06d93594436984fa954982a0cb763206df268c0..9a2f1960873b65e01b29ff5f1d84148ce4bae094 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/err.h>
 #include <linux/pci.h>
 #include <linux/bitops.h>
+#include <linux/property.h>
 #include <trace/events/iommu.h>
 
 static struct kset *iommu_group_kset;
@@ -1613,3 +1614,60 @@ out:
 
        return ret;
 }
+
+int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
+                     const struct iommu_ops *ops)
+{
+       struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+
+       if (fwspec)
+               return ops == fwspec->ops ? 0 : -EINVAL;
+
+       fwspec = kzalloc(sizeof(*fwspec), GFP_KERNEL);
+       if (!fwspec)
+               return -ENOMEM;
+
+       of_node_get(to_of_node(iommu_fwnode));
+       fwspec->iommu_fwnode = iommu_fwnode;
+       fwspec->ops = ops;
+       dev->iommu_fwspec = fwspec;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(iommu_fwspec_init);
+
+void iommu_fwspec_free(struct device *dev)
+{
+       struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+
+       if (fwspec) {
+               fwnode_handle_put(fwspec->iommu_fwnode);
+               kfree(fwspec);
+               dev->iommu_fwspec = NULL;
+       }
+}
+EXPORT_SYMBOL_GPL(iommu_fwspec_free);
+
+int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids)
+{
+       struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+       size_t size;
+       int i;
+
+       if (!fwspec)
+               return -EINVAL;
+
+       size = offsetof(struct iommu_fwspec, ids[fwspec->num_ids + num_ids]);
+       if (size > sizeof(*fwspec)) {
+               fwspec = krealloc(dev->iommu_fwspec, size, GFP_KERNEL);
+               if (!fwspec)
+                       return -ENOMEM;
+       }
+
+       for (i = 0; i < num_ids; i++)
+               fwspec->ids[fwspec->num_ids + i] = ids[i];
+
+       fwspec->num_ids += num_ids;
+       dev->iommu_fwspec = fwspec;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids);
index 2fdbac67a77f4b70250fd2675b35edf5295c472e..ace331da6459473685016aa8ac53601fe9c8ca84 100644 (file)
@@ -636,7 +636,7 @@ static int ipmmu_add_device(struct device *dev)
        spin_unlock(&ipmmu_devices_lock);
 
        if (ret < 0)
-               return -ENODEV;
+               goto error;
 
        for (i = 0; i < num_utlbs; ++i) {
                if (utlbs[i] >= mmu->num_utlbs) {
index 57f23eaaa2f9a4625874839a598b65c1bfb95090..5b82862f571f2ef5a324a03d99e9a453a4f6175e 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/limits.h>
 #include <linux/of.h>
 #include <linux/of_iommu.h>
+#include <linux/of_pci.h>
 #include <linux/slab.h>
 
 static const struct of_device_id __iommu_of_table_sentinel
@@ -134,6 +135,47 @@ const struct iommu_ops *of_iommu_get_ops(struct device_node *np)
        return ops;
 }
 
+static int __get_pci_rid(struct pci_dev *pdev, u16 alias, void *data)
+{
+       struct of_phandle_args *iommu_spec = data;
+
+       iommu_spec->args[0] = alias;
+       return iommu_spec->np == pdev->bus->dev.of_node;
+}
+
+static const struct iommu_ops
+*of_pci_iommu_configure(struct pci_dev *pdev, struct device_node *bridge_np)
+{
+       const struct iommu_ops *ops;
+       struct of_phandle_args iommu_spec;
+
+       /*
+        * Start by tracing the RID alias down the PCI topology as
+        * far as the host bridge whose OF node we have...
+        * (we're not even attempting to handle multi-alias devices yet)
+        */
+       iommu_spec.args_count = 1;
+       iommu_spec.np = bridge_np;
+       pci_for_each_dma_alias(pdev, __get_pci_rid, &iommu_spec);
+       /*
+        * ...then find out what that becomes once it escapes the PCI
+        * bus into the system beyond, and which IOMMU it ends up at.
+        */
+       iommu_spec.np = NULL;
+       if (of_pci_map_rid(bridge_np, iommu_spec.args[0], "iommu-map",
+                          "iommu-map-mask", &iommu_spec.np, iommu_spec.args))
+               return NULL;
+
+       ops = of_iommu_get_ops(iommu_spec.np);
+       if (!ops || !ops->of_xlate ||
+           iommu_fwspec_init(&pdev->dev, &iommu_spec.np->fwnode, ops) ||
+           ops->of_xlate(&pdev->dev, &iommu_spec))
+               ops = NULL;
+
+       of_node_put(iommu_spec.np);
+       return ops;
+}
+
 const struct iommu_ops *of_iommu_configure(struct device *dev,
                                           struct device_node *master_np)
 {
@@ -142,12 +184,8 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
        const struct iommu_ops *ops = NULL;
        int idx = 0;
 
-       /*
-        * We can't do much for PCI devices without knowing how
-        * device IDs are wired up from the PCI bus to the IOMMU.
-        */
        if (dev_is_pci(dev))
-               return NULL;
+               return of_pci_iommu_configure(to_pci_dev(dev), master_np);
 
        /*
         * We don't currently walk up the tree looking for a parent IOMMU.
@@ -160,7 +198,9 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
                np = iommu_spec.np;
                ops = of_iommu_get_ops(np);
 
-               if (!ops || !ops->of_xlate || ops->of_xlate(dev, &iommu_spec))
+               if (!ops || !ops->of_xlate ||
+                   iommu_fwspec_init(dev, &np->fwnode, ops) ||
+                   ops->of_xlate(dev, &iommu_spec))
                        goto err_put_node;
 
                of_node_put(np);
index 35eb7ac5d21f892e68cfdaf84c73a4b2d077cfbe..863e073c6f7f4e26cce71ae30e3625928e83b634 100644 (file)
@@ -16,6 +16,7 @@
 #define pr_fmt(fmt) "GICv2m: " fmt
 
 #include <linux/acpi.h>
+#include <linux/dma-iommu.h>
 #include <linux/irq.h>
 #include <linux/irqdomain.h>
 #include <linux/kernel.h>
@@ -108,6 +109,8 @@ static void gicv2m_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 
        if (v2m->flags & GICV2M_NEEDS_SPI_OFFSET)
                msg->data -= v2m->spi_offset;
+
+       iommu_dma_map_msi_msg(data->irq, msg);
 }
 
 static struct irq_chip gicv2m_irq_chip = {
index 35c851c14e497e52f3f7836246e8ae755ec7f66b..003495d91f9cfd34ea77eec0b52a4f070e58bfd5 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/bitmap.h>
 #include <linux/cpu.h>
 #include <linux/delay.h>
+#include <linux/dma-iommu.h>
 #include <linux/interrupt.h>
 #include <linux/irqdomain.h>
 #include <linux/acpi_iort.h>
@@ -659,6 +660,8 @@ static void its_irq_compose_msi_msg(struct irq_data *d, struct msi_msg *msg)
        msg->address_lo         = addr & ((1UL << 32) - 1);
        msg->address_hi         = addr >> 32;
        msg->data               = its_get_event_id(d);
+
+       iommu_dma_map_msi_msg(d->irq, msg);
 }
 
 static struct irq_chip its_irq_chip = {
index a2e68f740edacbe900af35b5875cb65e6e40771a..393fea85eb4ef89babde111e622166b441885eb5 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
+#include <linux/of_pci.h>
 #include <linux/string.h>
 #include <linux/slab.h>
 
@@ -592,87 +593,16 @@ static u32 __of_msi_map_rid(struct device *dev, struct device_node **np,
                            u32 rid_in)
 {
        struct device *parent_dev;
-       struct device_node *msi_controller_node;
-       struct device_node *msi_np = *np;
-       u32 map_mask, masked_rid, rid_base, msi_base, rid_len, phandle;
-       int msi_map_len;
-       bool matched;
        u32 rid_out = rid_in;
-       const __be32 *msi_map = NULL;
 
        /*
         * Walk up the device parent links looking for one with a
         * "msi-map" property.
         */
-       for (parent_dev = dev; parent_dev; parent_dev = parent_dev->parent) {
-               if (!parent_dev->of_node)
-                       continue;
-
-               msi_map = of_get_property(parent_dev->of_node,
-                                         "msi-map", &msi_map_len);
-               if (!msi_map)
-                       continue;
-
-               if (msi_map_len % (4 * sizeof(__be32))) {
-                       dev_err(parent_dev, "Error: Bad msi-map length: %d\n",
-                               msi_map_len);
-                       return rid_out;
-               }
-               /* We have a good parent_dev and msi_map, let's use them. */
-               break;
-       }
-       if (!msi_map)
-               return rid_out;
-
-       /* The default is to select all bits. */
-       map_mask = 0xffffffff;
-
-       /*
-        * Can be overridden by "msi-map-mask" property.  If
-        * of_property_read_u32() fails, the default is used.
-        */
-       of_property_read_u32(parent_dev->of_node, "msi-map-mask", &map_mask);
-
-       masked_rid = map_mask & rid_in;
-       matched = false;
-       while (!matched && msi_map_len >= 4 * sizeof(__be32)) {
-               rid_base = be32_to_cpup(msi_map + 0);
-               phandle = be32_to_cpup(msi_map + 1);
-               msi_base = be32_to_cpup(msi_map + 2);
-               rid_len = be32_to_cpup(msi_map + 3);
-
-               if (rid_base & ~map_mask) {
-                       dev_err(parent_dev,
-                               "Invalid msi-map translation - msi-map-mask (0x%x) ignores rid-base (0x%x)\n",
-                               map_mask, rid_base);
-                       return rid_out;
-               }
-
-               msi_controller_node = of_find_node_by_phandle(phandle);
-
-               matched = (masked_rid >= rid_base &&
-                          masked_rid < rid_base + rid_len);
-               if (msi_np)
-                       matched &= msi_np == msi_controller_node;
-
-               if (matched && !msi_np) {
-                       *np = msi_np = msi_controller_node;
+       for (parent_dev = dev; parent_dev; parent_dev = parent_dev->parent)
+               if (!of_pci_map_rid(parent_dev->of_node, rid_in, "msi-map",
+                                   "msi-map-mask", np, &rid_out))
                        break;
-               }
-
-               of_node_put(msi_controller_node);
-               msi_map_len -= 4 * sizeof(__be32);
-               msi_map += 4;
-       }
-       if (!matched)
-               return rid_out;
-
-       rid_out = masked_rid - rid_base + msi_base;
-       dev_dbg(dev,
-               "msi-map at: %s, using mask %08x, rid-base: %08x, msi-base: %08x, length: %08x, rid: %08x -> %08x\n",
-               dev_name(parent_dev), map_mask, rid_base, msi_base,
-               rid_len, rid_in, rid_out);
-
        return rid_out;
 }
 
index 589b30c68e1427efc280f13d0bbcaf660e816ab0..b58be12ab27770bbbe40c25eb220a96204c71d3d 100644 (file)
@@ -308,3 +308,105 @@ struct msi_controller *of_pci_find_msi_chip_by_node(struct device_node *of_node)
 EXPORT_SYMBOL_GPL(of_pci_find_msi_chip_by_node);
 
 #endif /* CONFIG_PCI_MSI */
+
+/**
+ * of_pci_map_rid - Translate a requester ID through a downstream mapping.
+ * @np: root complex device node.
+ * @rid: PCI requester ID to map.
+ * @map_name: property name of the map to use.
+ * @map_mask_name: optional property name of the mask to use.
+ * @target: optional pointer to a target device node.
+ * @id_out: optional pointer to receive the translated ID.
+ *
+ * Given a PCI requester ID, look up the appropriate implementation-defined
+ * platform ID and/or the target device which receives transactions on that
+ * ID, as per the "iommu-map" and "msi-map" bindings. Either of @target or
+ * @id_out may be NULL if only the other is required. If @target points to
+ * a non-NULL device node pointer, only entries targeting that node will be
+ * matched; if it points to a NULL value, it will receive the device node of
+ * the first matching target phandle, with a reference held.
+ *
+ * Return: 0 on success or a standard error code on failure.
+ */
+int of_pci_map_rid(struct device_node *np, u32 rid,
+                  const char *map_name, const char *map_mask_name,
+                  struct device_node **target, u32 *id_out)
+{
+       u32 map_mask, masked_rid;
+       int map_len;
+       const __be32 *map = NULL;
+
+       if (!np || !map_name || (!target && !id_out))
+               return -EINVAL;
+
+       map = of_get_property(np, map_name, &map_len);
+       if (!map) {
+               if (target)
+                       return -ENODEV;
+               /* Otherwise, no map implies no translation */
+               *id_out = rid;
+               return 0;
+       }
+
+       if (!map_len || map_len % (4 * sizeof(*map))) {
+               pr_err("%s: Error: Bad %s length: %d\n", np->full_name,
+                       map_name, map_len);
+               return -EINVAL;
+       }
+
+       /* The default is to select all bits. */
+       map_mask = 0xffffffff;
+
+       /*
+        * Can be overridden by "{iommu,msi}-map-mask" property.
+        * If of_property_read_u32() fails, the default is used.
+        */
+       if (map_mask_name)
+               of_property_read_u32(np, map_mask_name, &map_mask);
+
+       masked_rid = map_mask & rid;
+       for ( ; map_len > 0; map_len -= 4 * sizeof(*map), map += 4) {
+               struct device_node *phandle_node;
+               u32 rid_base = be32_to_cpup(map + 0);
+               u32 phandle = be32_to_cpup(map + 1);
+               u32 out_base = be32_to_cpup(map + 2);
+               u32 rid_len = be32_to_cpup(map + 3);
+
+               if (rid_base & ~map_mask) {
+                       pr_err("%s: Invalid %s translation - %s-mask (0x%x) ignores rid-base (0x%x)\n",
+                               np->full_name, map_name, map_name,
+                               map_mask, rid_base);
+                       return -EFAULT;
+               }
+
+               if (masked_rid < rid_base || masked_rid >= rid_base + rid_len)
+                       continue;
+
+               phandle_node = of_find_node_by_phandle(phandle);
+               if (!phandle_node)
+                       return -ENODEV;
+
+               if (target) {
+                       if (*target)
+                               of_node_put(phandle_node);
+                       else
+                               *target = phandle_node;
+
+                       if (*target != phandle_node)
+                               continue;
+               }
+
+               if (id_out)
+                       *id_out = masked_rid - rid_base + out_base;
+
+               pr_debug("%s: %s, using mask %08x, rid-base: %08x, out-base: %08x, length: %08x, rid: %08x -> %08x\n",
+                       np->full_name, map_name, map_mask, rid_base, out_base,
+                       rid_len, rid, *id_out);
+               return 0;
+       }
+
+       pr_err("%s: Invalid %s translation - no match for rid 0x%x on %s\n",
+               np->full_name, map_name, rid,
+               target && *target ? (*target)->full_name : "any target");
+       return -EFAULT;
+}
index 78f66786da91cbb2528d3b8a7827442369b6877b..6764d7447422614d3387d7332af339d10d914b3f 100644 (file)
@@ -26,7 +26,7 @@
 #define LARB0_PORT_OFFSET              0
 #define LARB1_PORT_OFFSET              11
 #define LARB2_PORT_OFFSET              21
-#define LARB3_PORT_OFFSET              43
+#define LARB3_PORT_OFFSET              44
 
 #define MT2701_M4U_ID_LARB0(port)      ((port) + LARB0_PORT_OFFSET)
 #define MT2701_M4U_ID_LARB1(port)      ((port) + LARB1_PORT_OFFSET)
index 38f02814d53a992f6a3de7849ba70fbfc195fe9f..bc41e87a969bfb7f71029538d94be0a1e14ad5c5 100644 (file)
@@ -41,6 +41,7 @@ struct device_node;
 struct fwnode_handle;
 struct iommu_ops;
 struct iommu_group;
+struct iommu_fwspec;
 
 struct bus_attribute {
        struct attribute        attr;
@@ -765,6 +766,7 @@ struct device_dma_parameters {
  *             gone away. This should be set by the allocator of the
  *             device (i.e. the bus driver that discovered the device).
  * @iommu_group: IOMMU group the device belongs to.
+ * @iommu_fwspec: IOMMU-specific properties supplied by firmware.
  *
  * @offline_disabled: If set, the device is permanently online.
  * @offline:   Set after successful invocation of bus type's .offline().
@@ -849,6 +851,7 @@ struct device {
 
        void    (*release)(struct device *dev);
        struct iommu_group      *iommu_group;
+       struct iommu_fwspec     *iommu_fwspec;
 
        bool                    offline_disabled:1;
        bool                    offline:1;
index 81c5c8d167ade060f8f568a7dc8abaf81fb90b69..32c589062bd9e871c6d11af22ac2f41b953211cc 100644 (file)
@@ -21,6 +21,7 @@
 
 #ifdef CONFIG_IOMMU_DMA
 #include <linux/iommu.h>
+#include <linux/msi.h>
 
 int iommu_dma_init(void);
 
@@ -29,7 +30,8 @@ int iommu_get_dma_cookie(struct iommu_domain *domain);
 void iommu_put_dma_cookie(struct iommu_domain *domain);
 
 /* Setup call for arch DMA mapping code */
-int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, u64 size);
+int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
+               u64 size, struct device *dev);
 
 /* General helpers for DMA-API <-> IOMMU-API interaction */
 int dma_direction_to_prot(enum dma_data_direction dir, bool coherent);
@@ -62,9 +64,13 @@ void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
 int iommu_dma_supported(struct device *dev, u64 mask);
 int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
 
+/* The DMA API isn't _quite_ the whole story, though... */
+void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg);
+
 #else
 
 struct iommu_domain;
+struct msi_msg;
 
 static inline int iommu_dma_init(void)
 {
@@ -80,6 +86,10 @@ static inline void iommu_put_dma_cookie(struct iommu_domain *domain)
 {
 }
 
+static inline void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg)
+{
+}
+
 #endif /* CONFIG_IOMMU_DMA */
 #endif /* __KERNEL__ */
 #endif /* __DMA_IOMMU_H */
index a35fb8b42e1a8d77b5dde6349846e1c9ebb15949..436dc21318af776bd6c5a7ce8eae35aac995d349 100644 (file)
@@ -331,10 +331,32 @@ extern struct iommu_group *pci_device_group(struct device *dev);
 /* Generic device grouping function */
 extern struct iommu_group *generic_device_group(struct device *dev);
 
+/**
+ * struct iommu_fwspec - per-device IOMMU instance data
+ * @ops: ops for this device's IOMMU
+ * @iommu_fwnode: firmware handle for this device's IOMMU
+ * @iommu_priv: IOMMU driver private data for this device
+ * @num_ids: number of associated device IDs
+ * @ids: IDs which this device may present to the IOMMU
+ */
+struct iommu_fwspec {
+       const struct iommu_ops  *ops;
+       struct fwnode_handle    *iommu_fwnode;
+       void                    *iommu_priv;
+       unsigned int            num_ids;
+       u32                     ids[1];
+};
+
+int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
+                     const struct iommu_ops *ops);
+void iommu_fwspec_free(struct device *dev);
+int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids);
+
 #else /* CONFIG_IOMMU_API */
 
 struct iommu_ops {};
 struct iommu_group {};
+struct iommu_fwspec {};
 
 static inline bool iommu_present(struct bus_type *bus)
 {
@@ -541,6 +563,23 @@ static inline void iommu_device_unlink(struct device *dev, struct device *link)
 {
 }
 
+static inline int iommu_fwspec_init(struct device *dev,
+                                   struct fwnode_handle *iommu_fwnode,
+                                   const struct iommu_ops *ops)
+{
+       return -ENODEV;
+}
+
+static inline void iommu_fwspec_free(struct device *dev)
+{
+}
+
+static inline int iommu_fwspec_add_ids(struct device *dev, u32 *ids,
+                                      int num_ids)
+{
+       return -ENODEV;
+}
+
 #endif /* CONFIG_IOMMU_API */
 
 #endif /* __LINUX_IOMMU_H */
index b969e944396223defb902c7e7f2ac1238b410585..7fd5cfce91403f0571335fc4b172d889e6ad371e 100644 (file)
@@ -17,6 +17,9 @@ int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin);
 int of_pci_parse_bus_range(struct device_node *node, struct resource *res);
 int of_get_pci_domain_nr(struct device_node *node);
 void of_pci_check_probe_only(void);
+int of_pci_map_rid(struct device_node *np, u32 rid,
+                  const char *map_name, const char *map_mask_name,
+                  struct device_node **target, u32 *id_out);
 #else
 static inline int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq)
 {
@@ -52,6 +55,13 @@ of_get_pci_domain_nr(struct device_node *node)
        return -1;
 }
 
+static inline int of_pci_map_rid(struct device_node *np, u32 rid,
+                       const char *map_name, const char *map_mask_name,
+                       struct device_node **target, u32 *id_out)
+{
+       return -EINVAL;
+}
+
 static inline void of_pci_check_probe_only(void) { }
 #endif