Merge remote-tracking branch 'iommu/next'
authorStephen Rothwell <sfr@canb.auug.org.au>
Thu, 5 Nov 2015 02:06:06 +0000 (13:06 +1100)
committerStephen Rothwell <sfr@canb.auug.org.au>
Thu, 5 Nov 2015 02:06:06 +0000 (13:06 +1100)
27 files changed:
Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
Documentation/devicetree/bindings/iommu/ti,omap-iommu.txt
MAINTAINERS
arch/arm64/Kconfig
arch/arm64/include/asm/dma-mapping.h
arch/arm64/mm/dma-mapping.c
arch/s390/Kconfig
arch/s390/include/asm/pci.h
arch/s390/include/asm/pci_dma.h
arch/s390/pci/pci_dma.c
drivers/iommu/Kconfig
drivers/iommu/Makefile
drivers/iommu/amd_iommu.c
drivers/iommu/amd_iommu_init.c
drivers/iommu/amd_iommu_types.h
drivers/iommu/arm-smmu-v3.c
drivers/iommu/arm-smmu.c
drivers/iommu/dma-iommu.c [new file with mode: 0644]
drivers/iommu/fsl_pamu_domain.c
drivers/iommu/intel-iommu.c
drivers/iommu/intel_irq_remapping.c
drivers/iommu/iommu.c
drivers/iommu/omap-iommu.c
drivers/iommu/omap-iommu.h
drivers/iommu/s390-iommu.c [new file with mode: 0644]
include/linux/dma-iommu.h [new file with mode: 0644]
include/linux/iommu.h

index 3443e0f..947863a 100644 (file)
@@ -36,5 +36,24 @@ the PCIe specification.
                       NOTE: this only applies to the SMMU itself, not
                       masters connected upstream of the SMMU.
 
+- msi-parent        : See the generic MSI binding described in
+                        devicetree/bindings/interrupt-controller/msi.txt
+                      for a description of the msi-parent property.
+
 - hisilicon,broken-prefetch-cmd
                     : Avoid sending CMD_PREFETCH_* commands to the SMMU.
+
+** Example
+
+        smmu@2b400000 {
+                compatible = "arm,smmu-v3";
+                reg = <0x0 0x2b400000 0x0 0x20000>;
+                interrupts = <GIC_SPI 74 IRQ_TYPE_EDGE_RISING>,
+                             <GIC_SPI 75 IRQ_TYPE_EDGE_RISING>,
+                             <GIC_SPI 77 IRQ_TYPE_EDGE_RISING>,
+                             <GIC_SPI 79 IRQ_TYPE_EDGE_RISING>;
+                interrupt-names = "eventq", "priq", "cmdq-sync", "gerror";
+                dma-coherent;
+                #iommu-cells = <0>;
+                msi-parent = <&its 0xff0000>;
+        };
index 8696999..4bd10dd 100644 (file)
@@ -4,6 +4,7 @@ Required properties:
 - compatible : Should be one of,
                "ti,omap2-iommu" for OMAP2/OMAP3 IOMMU instances
                "ti,omap4-iommu" for OMAP4/OMAP5 IOMMU instances
+               "ti,dra7-dsp-iommu" for DRA7xx DSP IOMMU instances
                "ti,dra7-iommu" for DRA7xx IOMMU instances
 - ti,hwmods  : Name of the hwmod associated with the IOMMU instance
 - reg        : Address space for the configuration registers
@@ -19,6 +20,13 @@ Optional properties:
                     Should be either 8 or 32 (default: 32)
 - ti,iommu-bus-err-back : Indicates the IOMMU instance supports throwing
                          back a bus error response on MMU faults.
+- ti,syscon-mmuconfig : Should be a pair of the phandle to the DSP_SYSTEM
+                        syscon node that contains the additional control
+                        register for enabling the MMU, and the MMU instance
+                        number (0-indexed) within the sub-system. This property
+                        is required for DSP IOMMU instances on DRA7xx SoCs. The
+                        instance number should be 0 for DSP MDMA MMUs and 1 for
+                        DSP EDMA MMUs.
 
 Example:
        /* OMAP3 ISP MMU */
@@ -30,3 +38,22 @@ Example:
                ti,hwmods = "mmu_isp";
                ti,#tlb-entries = <8>;
        };
+
+       /* DRA74x DSP2 MMUs */
+       mmu0_dsp2: mmu@41501000 {
+               compatible = "ti,dra7-dsp-iommu";
+               reg = <0x41501000 0x100>;
+               interrupts = <GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH>;
+               ti,hwmods = "mmu0_dsp2";
+               #iommu-cells = <0>;
+               ti,syscon-mmuconfig = <&dsp2_system 0x0>;
+       };
+
+       mmu1_dsp2: mmu@41502000 {
+               compatible = "ti,dra7-dsp-iommu";
+               reg = <0x41502000 0x100>;
+               interrupts = <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>;
+               ti,hwmods = "mmu1_dsp2";
+               #iommu-cells = <0>;
+               ti,syscon-mmuconfig = <&dsp2_system 0x1>;
+       };
index e0dd3ce..71c6158 100644 (file)
@@ -9073,6 +9073,13 @@ F:       drivers/s390/net/*iucv*
 F:     include/net/iucv/
 F:     net/iucv/
 
+S390 IOMMU (PCI)
+M:     Gerald Schaefer <gerald.schaefer@de.ibm.com>
+L:     linux-s390@vger.kernel.org
+W:     http://www.ibm.com/developerworks/linux/linux390/
+S:     Supported
+F:     drivers/iommu/s390-iommu.c
+
 S3C24XX SD/MMC Driver
 M:     Ben Dooks <ben-linux@fluff.org>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
index 7b10647..851fe11 100644 (file)
@@ -76,6 +76,7 @@ config ARM64
        select HAVE_PERF_USER_STACK_DUMP
        select HAVE_RCU_TABLE_FREE
        select HAVE_SYSCALL_TRACEPOINTS
+       select IOMMU_DMA if IOMMU_SUPPORT
        select IRQ_DOMAIN
        select IRQ_FORCED_THREADING
        select MODULES_USE_ELF_RELA
index cfdb34b..54d0ead 100644 (file)
@@ -54,16 +54,15 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev)
                return __generic_dma_ops(dev);
 }
 
-static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
-                                     struct iommu_ops *iommu, bool coherent)
-{
-       if (!acpi_disabled && !dev->archdata.dma_ops)
-               dev->archdata.dma_ops = dma_ops;
-
-       dev->archdata.dma_coherent = coherent;
-}
+void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
+                       struct iommu_ops *iommu, bool coherent);
 #define arch_setup_dma_ops     arch_setup_dma_ops
 
+#ifdef CONFIG_IOMMU_DMA
+void arch_teardown_dma_ops(struct device *dev);
+#define arch_teardown_dma_ops  arch_teardown_dma_ops
+#endif
+
 /* do not use this function in a driver */
 static inline bool is_device_dma_coherent(struct device *dev)
 {
index 99224dc..6320361 100644 (file)
@@ -533,3 +533,460 @@ static int __init dma_debug_do_init(void)
        return 0;
 }
 fs_initcall(dma_debug_do_init);
+
+
+#ifdef CONFIG_IOMMU_DMA
+#include <linux/dma-iommu.h>
+#include <linux/platform_device.h>
+#include <linux/amba/bus.h>
+
+/* Thankfully, all cache ops are by VA so we can ignore phys here */
+static void flush_page(struct device *dev, const void *virt, phys_addr_t phys)
+{
+       __dma_flush_range(virt, virt + PAGE_SIZE);
+}
+
+static void *__iommu_alloc_attrs(struct device *dev, size_t size,
+                                dma_addr_t *handle, gfp_t gfp,
+                                struct dma_attrs *attrs)
+{
+       bool coherent = is_device_dma_coherent(dev);
+       int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent);
+       void *addr;
+
+       if (WARN(!dev, "cannot create IOMMU mapping for unknown device\n"))
+               return NULL;
+       /*
+        * Some drivers rely on this, and we probably don't want the
+        * possibility of stale kernel data being read by devices anyway.
+        */
+       gfp |= __GFP_ZERO;
+
+       if (gfp & __GFP_WAIT) {
+               struct page **pages;
+               pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent);
+
+               pages = iommu_dma_alloc(dev, size, gfp, ioprot, handle,
+                                       flush_page);
+               if (!pages)
+                       return NULL;
+
+               addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot,
+                                             __builtin_return_address(0));
+               if (!addr)
+                       iommu_dma_free(dev, pages, size, handle);
+       } else {
+               struct page *page;
+               /*
+                * In atomic context we can't remap anything, so we'll only
+                * get the virtually contiguous buffer we need by way of a
+                * physically contiguous allocation.
+                */
+               if (coherent) {
+                       page = alloc_pages(gfp, get_order(size));
+                       addr = page ? page_address(page) : NULL;
+               } else {
+                       addr = __alloc_from_pool(size, &page, gfp);
+               }
+               if (!addr)
+                       return NULL;
+
+               *handle = iommu_dma_map_page(dev, page, 0, size, ioprot);
+               if (iommu_dma_mapping_error(dev, *handle)) {
+                       if (coherent)
+                               __free_pages(page, get_order(size));
+                       else
+                               __free_from_pool(addr, size);
+                       addr = NULL;
+               }
+       }
+       return addr;
+}
+
+static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
+                              dma_addr_t handle, struct dma_attrs *attrs)
+{
+       /*
+        * @cpu_addr will be one of 3 things depending on how it was allocated:
+        * - A remapped array of pages from iommu_dma_alloc(), for all
+        *   non-atomic allocations.
+        * - A non-cacheable alias from the atomic pool, for atomic
+        *   allocations by non-coherent devices.
+        * - A normal lowmem address, for atomic allocations by
+        *   coherent devices.
+        * Hence how dodgy the below logic looks...
+        */
+       if (__in_atomic_pool(cpu_addr, size)) {
+               iommu_dma_unmap_page(dev, handle, size, 0, NULL);
+               __free_from_pool(cpu_addr, size);
+       } else if (is_vmalloc_addr(cpu_addr)){
+               struct vm_struct *area = find_vm_area(cpu_addr);
+
+               if (WARN_ON(!area || !area->pages))
+                       return;
+               iommu_dma_free(dev, area->pages, size, &handle);
+               dma_common_free_remap(cpu_addr, size, VM_USERMAP);
+       } else {
+               iommu_dma_unmap_page(dev, handle, size, 0, NULL);
+               __free_pages(virt_to_page(cpu_addr), get_order(size));
+       }
+}
+
+static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
+                             void *cpu_addr, dma_addr_t dma_addr, size_t size,
+                             struct dma_attrs *attrs)
+{
+       struct vm_struct *area;
+       int ret;
+
+       vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
+                                            is_device_dma_coherent(dev));
+
+       if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
+               return ret;
+
+       area = find_vm_area(cpu_addr);
+       if (WARN_ON(!area || !area->pages))
+               return -ENXIO;
+
+       return iommu_dma_mmap(area->pages, size, vma);
+}
+
+static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
+                              void *cpu_addr, dma_addr_t dma_addr,
+                              size_t size, struct dma_attrs *attrs)
+{
+       unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+       struct vm_struct *area = find_vm_area(cpu_addr);
+
+       if (WARN_ON(!area || !area->pages))
+               return -ENXIO;
+
+       return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size,
+                                        GFP_KERNEL);
+}
+
+static void __iommu_sync_single_for_cpu(struct device *dev,
+                                       dma_addr_t dev_addr, size_t size,
+                                       enum dma_data_direction dir)
+{
+       phys_addr_t phys;
+
+       if (is_device_dma_coherent(dev))
+               return;
+
+       phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr);
+       __dma_unmap_area(phys_to_virt(phys), size, dir);
+}
+
+static void __iommu_sync_single_for_device(struct device *dev,
+                                          dma_addr_t dev_addr, size_t size,
+                                          enum dma_data_direction dir)
+{
+       phys_addr_t phys;
+
+       if (is_device_dma_coherent(dev))
+               return;
+
+       phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr);
+       __dma_map_area(phys_to_virt(phys), size, dir);
+}
+
+static dma_addr_t __iommu_map_page(struct device *dev, struct page *page,
+                                  unsigned long offset, size_t size,
+                                  enum dma_data_direction dir,
+                                  struct dma_attrs *attrs)
+{
+       bool coherent = is_device_dma_coherent(dev);
+       int prot = dma_direction_to_prot(dir, coherent);
+       dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot);
+
+       if (!iommu_dma_mapping_error(dev, dev_addr) &&
+           !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+               __iommu_sync_single_for_device(dev, dev_addr, size, dir);
+
+       return dev_addr;
+}
+
+static void __iommu_unmap_page(struct device *dev, dma_addr_t dev_addr,
+                              size_t size, enum dma_data_direction dir,
+                              struct dma_attrs *attrs)
+{
+       if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+               __iommu_sync_single_for_cpu(dev, dev_addr, size, dir);
+
+       iommu_dma_unmap_page(dev, dev_addr, size, dir, attrs);
+}
+
+static void __iommu_sync_sg_for_cpu(struct device *dev,
+                                   struct scatterlist *sgl, int nelems,
+                                   enum dma_data_direction dir)
+{
+       struct scatterlist *sg;
+       int i;
+
+       if (is_device_dma_coherent(dev))
+               return;
+
+       for_each_sg(sgl, sg, nelems, i)
+               __dma_unmap_area(sg_virt(sg), sg->length, dir);
+}
+
+static void __iommu_sync_sg_for_device(struct device *dev,
+                                      struct scatterlist *sgl, int nelems,
+                                      enum dma_data_direction dir)
+{
+       struct scatterlist *sg;
+       int i;
+
+       if (is_device_dma_coherent(dev))
+               return;
+
+       for_each_sg(sgl, sg, nelems, i)
+               __dma_map_area(sg_virt(sg), sg->length, dir);
+}
+
+static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
+                               int nelems, enum dma_data_direction dir,
+                               struct dma_attrs *attrs)
+{
+       bool coherent = is_device_dma_coherent(dev);
+
+       if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+               __iommu_sync_sg_for_device(dev, sgl, nelems, dir);
+
+       return iommu_dma_map_sg(dev, sgl, nelems,
+                       dma_direction_to_prot(dir, coherent));
+}
+
+static void __iommu_unmap_sg_attrs(struct device *dev,
+                                  struct scatterlist *sgl, int nelems,
+                                  enum dma_data_direction dir,
+                                  struct dma_attrs *attrs)
+{
+       if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+               __iommu_sync_sg_for_cpu(dev, sgl, nelems, dir);
+
+       iommu_dma_unmap_sg(dev, sgl, nelems, dir, attrs);
+}
+
+static struct dma_map_ops iommu_dma_ops = {
+       .alloc = __iommu_alloc_attrs,
+       .free = __iommu_free_attrs,
+       .mmap = __iommu_mmap_attrs,
+       .get_sgtable = __iommu_get_sgtable,
+       .map_page = __iommu_map_page,
+       .unmap_page = __iommu_unmap_page,
+       .map_sg = __iommu_map_sg_attrs,
+       .unmap_sg = __iommu_unmap_sg_attrs,
+       .sync_single_for_cpu = __iommu_sync_single_for_cpu,
+       .sync_single_for_device = __iommu_sync_single_for_device,
+       .sync_sg_for_cpu = __iommu_sync_sg_for_cpu,
+       .sync_sg_for_device = __iommu_sync_sg_for_device,
+       .dma_supported = iommu_dma_supported,
+       .mapping_error = iommu_dma_mapping_error,
+};
+
+/*
+ * TODO: Right now __iommu_setup_dma_ops() gets called too early to do
+ * everything it needs to - the device is only partially created and the
+ * IOMMU driver hasn't seen it yet, so it can't have a group. Thus we
+ * need this delayed attachment dance. Once IOMMU probe ordering is sorted
+ * to move the arch_setup_dma_ops() call later, all the notifier bits below
+ * become unnecessary, and will go away.
+ */
+struct iommu_dma_notifier_data {
+       struct list_head list;
+       struct device *dev;
+       const struct iommu_ops *ops;
+       u64 dma_base;
+       u64 size;
+};
+static LIST_HEAD(iommu_dma_masters);
+static DEFINE_MUTEX(iommu_dma_notifier_lock);
+
+/*
+ * Temporarily "borrow" a domain feature flag to to tell if we had to resort
+ * to creating our own domain here, in case we need to clean it up again.
+ */
+#define __IOMMU_DOMAIN_FAKE_DEFAULT            (1U << 31)
+
+static bool do_iommu_attach(struct device *dev, const struct iommu_ops *ops,
+                          u64 dma_base, u64 size)
+{
+       struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+
+       /*
+        * Best case: The device is either part of a group which was
+        * already attached to a domain in a previous call, or it's
+        * been put in a default DMA domain by the IOMMU core.
+        */
+       if (!domain) {
+               /*
+                * Urgh. The IOMMU core isn't going to do default domains
+                * for non-PCI devices anyway, until it has some means of
+                * abstracting the entirely implementation-specific
+                * sideband data/SoC topology/unicorn dust that may or
+                * may not differentiate upstream masters.
+                * So until then, HORRIBLE HACKS!
+                */
+               domain = ops->domain_alloc(IOMMU_DOMAIN_DMA);
+               if (!domain)
+                       goto out_no_domain;
+
+               domain->ops = ops;
+               domain->type = IOMMU_DOMAIN_DMA | __IOMMU_DOMAIN_FAKE_DEFAULT;
+
+               if (iommu_attach_device(domain, dev))
+                       goto out_put_domain;
+       }
+
+       if (iommu_dma_init_domain(domain, dma_base, size))
+               goto out_detach;
+
+       dev->archdata.dma_ops = &iommu_dma_ops;
+       return true;
+
+out_detach:
+       iommu_detach_device(domain, dev);
+out_put_domain:
+       if (domain->type & __IOMMU_DOMAIN_FAKE_DEFAULT)
+               iommu_domain_free(domain);
+out_no_domain:
+       pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
+               dev_name(dev));
+       return false;
+}
+
+static void queue_iommu_attach(struct device *dev, const struct iommu_ops *ops,
+                             u64 dma_base, u64 size)
+{
+       struct iommu_dma_notifier_data *iommudata;
+
+       iommudata = kzalloc(sizeof(*iommudata), GFP_KERNEL);
+       if (!iommudata)
+               return;
+
+       iommudata->dev = dev;
+       iommudata->ops = ops;
+       iommudata->dma_base = dma_base;
+       iommudata->size = size;
+
+       mutex_lock(&iommu_dma_notifier_lock);
+       list_add(&iommudata->list, &iommu_dma_masters);
+       mutex_unlock(&iommu_dma_notifier_lock);
+}
+
+static int __iommu_attach_notifier(struct notifier_block *nb,
+                                  unsigned long action, void *data)
+{
+       struct iommu_dma_notifier_data *master, *tmp;
+
+       if (action != BUS_NOTIFY_ADD_DEVICE)
+               return 0;
+
+       mutex_lock(&iommu_dma_notifier_lock);
+       list_for_each_entry_safe(master, tmp, &iommu_dma_masters, list) {
+               if (do_iommu_attach(master->dev, master->ops,
+                               master->dma_base, master->size)) {
+                       list_del(&master->list);
+                       kfree(master);
+               }
+       }
+       mutex_unlock(&iommu_dma_notifier_lock);
+       return 0;
+}
+
+static int register_iommu_dma_ops_notifier(struct bus_type *bus)
+{
+       struct notifier_block *nb = kzalloc(sizeof(*nb), GFP_KERNEL);
+       int ret;
+
+       if (!nb)
+               return -ENOMEM;
+       /*
+        * The device must be attached to a domain before the driver probe
+        * routine gets a chance to start allocating DMA buffers. However,
+        * the IOMMU driver also needs a chance to configure the iommu_group
+        * via its add_device callback first, so we need to make the attach
+        * happen between those two points. Since the IOMMU core uses a bus
+        * notifier with default priority for add_device, do the same but
+        * with a lower priority to ensure the appropriate ordering.
+        */
+       nb->notifier_call = __iommu_attach_notifier;
+       nb->priority = -100;
+
+       ret = bus_register_notifier(bus, nb);
+       if (ret) {
+               pr_warn("Failed to register DMA domain notifier; IOMMU DMA ops unavailable on bus '%s'\n",
+                       bus->name);
+               kfree(nb);
+       }
+       return ret;
+}
+
+static int __init __iommu_dma_init(void)
+{
+       int ret;
+
+       ret = iommu_dma_init();
+       if (!ret)
+               ret = register_iommu_dma_ops_notifier(&platform_bus_type);
+       if (!ret)
+               ret = register_iommu_dma_ops_notifier(&amba_bustype);
+       return ret;
+}
+arch_initcall(__iommu_dma_init);
+
+static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
+                                 const struct iommu_ops *ops)
+{
+       struct iommu_group *group;
+
+       if (!ops)
+               return;
+       /*
+        * TODO: As a concession to the future, we're ready to handle being
+        * called both early and late (i.e. after bus_add_device). Once all
+        * the platform bus code is reworked to call us late and the notifier
+        * junk above goes away, move the body of do_iommu_attach here.
+        */
+       group = iommu_group_get(dev);
+       if (group) {
+               do_iommu_attach(dev, ops, dma_base, size);
+               iommu_group_put(group);
+       } else {
+               queue_iommu_attach(dev, ops, dma_base, size);
+       }
+}
+
+void arch_teardown_dma_ops(struct device *dev)
+{
+       struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+
+       if (domain) {
+               iommu_detach_device(domain, dev);
+               if (domain->type & __IOMMU_DOMAIN_FAKE_DEFAULT)
+                       iommu_domain_free(domain);
+       }
+
+       dev->archdata.dma_ops = NULL;
+}
+
+#else
+
+static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
+                                 struct iommu_ops *iommu)
+{ }
+
+#endif  /* CONFIG_IOMMU_DMA */
+
+void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
+                       struct iommu_ops *iommu, bool coherent)
+{
+       if (!acpi_disabled && !dev->archdata.dma_ops)
+               dev->archdata.dma_ops = dma_ops;
+
+       dev->archdata.dma_coherent = coherent;
+       __iommu_setup_dma_ops(dev, dma_base, size, iommu);
+}
index 9b9a2db..3a55f49 100644 (file)
@@ -584,6 +584,7 @@ menuconfig PCI
        bool "PCI support"
        select HAVE_DMA_ATTRS
        select PCI_MSI
+       select IOMMU_SUPPORT
        help
          Enable PCI support.
 
index 34d9603..c873e68 100644 (file)
@@ -62,6 +62,8 @@ struct zpci_bar_struct {
        u8              size;           /* order 2 exponent */
 };
 
+struct s390_domain;
+
 /* Private data per function */
 struct zpci_dev {
        struct pci_dev  *pdev;
@@ -118,6 +120,8 @@ struct zpci_dev {
 
        struct dentry   *debugfs_dev;
        struct dentry   *debugfs_perf;
+
+       struct s390_domain *s390_domain; /* s390 IOMMU domain data */
 };
 
 static inline bool zdev_enabled(struct zpci_dev *zdev)
index 30b4c17..7a7abf1 100644 (file)
@@ -192,5 +192,8 @@ static inline unsigned long *get_st_pto(unsigned long entry)
 /* Prototypes */
 int zpci_dma_init_device(struct zpci_dev *);
 void zpci_dma_exit_device(struct zpci_dev *);
-
+void dma_free_seg_table(unsigned long);
+unsigned long *dma_alloc_cpu_table(void);
+void dma_cleanup_tables(unsigned long *);
+void dma_update_cpu_trans(unsigned long *, void *, dma_addr_t, int);
 #endif
index 37505b8..37d10f7 100644 (file)
@@ -24,7 +24,7 @@ static int zpci_refresh_global(struct zpci_dev *zdev)
                                  zdev->iommu_pages * PAGE_SIZE);
 }
 
-static unsigned long *dma_alloc_cpu_table(void)
+unsigned long *dma_alloc_cpu_table(void)
 {
        unsigned long *table, *entry;
 
@@ -114,12 +114,12 @@ static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr
        return &pto[px];
 }
 
-static void dma_update_cpu_trans(struct zpci_dev *zdev, void *page_addr,
-                                dma_addr_t dma_addr, int flags)
+void dma_update_cpu_trans(unsigned long *dma_table, void *page_addr,
+                         dma_addr_t dma_addr, int flags)
 {
        unsigned long *entry;
 
-       entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
+       entry = dma_walk_cpu_trans(dma_table, dma_addr);
        if (!entry) {
                WARN_ON_ONCE(1);
                return;
@@ -156,7 +156,8 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
                goto no_refresh;
 
        for (i = 0; i < nr_pages; i++) {
-               dma_update_cpu_trans(zdev, page_addr, dma_addr, flags);
+               dma_update_cpu_trans(zdev->dma_table, page_addr, dma_addr,
+                                    flags);
                page_addr += PAGE_SIZE;
                dma_addr += PAGE_SIZE;
        }
@@ -181,7 +182,7 @@ no_refresh:
        return rc;
 }
 
-static void dma_free_seg_table(unsigned long entry)
+void dma_free_seg_table(unsigned long entry)
 {
        unsigned long *sto = get_rt_sto(entry);
        int sx;
@@ -193,21 +194,18 @@ static void dma_free_seg_table(unsigned long entry)
        dma_free_cpu_table(sto);
 }
 
-static void dma_cleanup_tables(struct zpci_dev *zdev)
+void dma_cleanup_tables(unsigned long *table)
 {
-       unsigned long *table;
        int rtx;
 
-       if (!zdev || !zdev->dma_table)
+       if (!table)
                return;
 
-       table = zdev->dma_table;
        for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
                if (reg_entry_isvalid(table[rtx]))
                        dma_free_seg_table(table[rtx]);
 
        dma_free_cpu_table(table);
-       zdev->dma_table = NULL;
 }
 
 static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev,
@@ -416,6 +414,13 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
 {
        int rc;
 
+       /*
+        * At this point, if the device is part of an IOMMU domain, this would
+        * be a strong hint towards a bug in the IOMMU API (common) code and/or
+        * simultaneous access via IOMMU and DMA API. So let's issue a warning.
+        */
+       WARN_ON(zdev->s390_domain);
+
        spin_lock_init(&zdev->iommu_bitmap_lock);
        spin_lock_init(&zdev->dma_table_lock);
 
@@ -450,8 +455,16 @@ out_clean:
 
 void zpci_dma_exit_device(struct zpci_dev *zdev)
 {
+       /*
+        * At this point, if the device is part of an IOMMU domain, this would
+        * be a strong hint towards a bug in the IOMMU API (common) code and/or
+        * simultaneous access via IOMMU and DMA API. So let's issue a warning.
+        */
+       WARN_ON(zdev->s390_domain);
+
        zpci_unregister_ioat(zdev, 0);
-       dma_cleanup_tables(zdev);
+       dma_cleanup_tables(zdev->dma_table);
+       zdev->dma_table = NULL;
        vfree(zdev->iommu_bitmap);
        zdev->iommu_bitmap = NULL;
        zdev->next_bit = 0;
index cbe6a89..4d3385b 100644 (file)
@@ -48,6 +48,13 @@ config OF_IOMMU
        def_bool y
        depends on OF && IOMMU_API
 
+# IOMMU-agnostic DMA-mapping layer
+config IOMMU_DMA
+       bool
+       depends on NEED_SG_DMA_LENGTH
+       select IOMMU_API
+       select IOMMU_IOVA
+
 config FSL_PAMU
        bool "Freescale IOMMU support"
        depends on PPC32
@@ -361,6 +368,7 @@ config ARM_SMMU_V3
        depends on ARM64 && PCI
        select IOMMU_API
        select IOMMU_IO_PGTABLE_LPAE
+       select GENERIC_MSI_IRQ_DOMAIN
        help
          Support for implementations of the ARM System MMU architecture
          version 3 providing translation support to a PCIe root complex.
@@ -368,4 +376,11 @@ config ARM_SMMU_V3
          Say Y here if your system includes an IOMMU device implementing
          the ARM SMMUv3 architecture.
 
+config S390_IOMMU
+       def_bool y if S390 && PCI
+       depends on S390 && PCI
+       select IOMMU_API
+       help
+         Support for the IOMMU API for s390 PCI devices.
+
 endif # IOMMU_SUPPORT
index c6dcc51..2393d86 100644 (file)
@@ -1,6 +1,7 @@
 obj-$(CONFIG_IOMMU_API) += iommu.o
 obj-$(CONFIG_IOMMU_API) += iommu-traces.o
 obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
+obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
 obj-$(CONFIG_IOMMU_IOVA) += iova.o
@@ -23,3 +24,4 @@ obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
 obj-$(CONFIG_SHMOBILE_IOMMU) += shmobile-iommu.o
 obj-$(CONFIG_SHMOBILE_IPMMU) += shmobile-ipmmu.o
 obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
+obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
index 532e2a2..0d533bb 100644 (file)
@@ -89,8 +89,6 @@ static struct dma_map_ops amd_iommu_dma_ops;
 struct iommu_dev_data {
        struct list_head list;            /* For domain->dev_list */
        struct list_head dev_data_list;   /* For global dev_data_list */
-       struct list_head alias_list;      /* Link alias-groups together */
-       struct iommu_dev_data *alias_data;/* The alias dev_data */
        struct protection_domain *domain; /* Domain the device is bound to */
        u16 devid;                        /* PCI Device ID */
        bool iommu_v2;                    /* Device can make use of IOMMUv2 */
@@ -136,8 +134,6 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid)
        if (!dev_data)
                return NULL;
 
-       INIT_LIST_HEAD(&dev_data->alias_list);
-
        dev_data->devid = devid;
 
        spin_lock_irqsave(&dev_data_list_lock, flags);
@@ -147,17 +143,6 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid)
        return dev_data;
 }
 
-static void free_dev_data(struct iommu_dev_data *dev_data)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&dev_data_list_lock, flags);
-       list_del(&dev_data->dev_data_list);
-       spin_unlock_irqrestore(&dev_data_list_lock, flags);
-
-       kfree(dev_data);
-}
-
 static struct iommu_dev_data *search_dev_data(u16 devid)
 {
        struct iommu_dev_data *dev_data;
@@ -311,73 +296,10 @@ out:
        iommu_group_put(group);
 }
 
-static int __last_alias(struct pci_dev *pdev, u16 alias, void *data)
-{
-       *(u16 *)data = alias;
-       return 0;
-}
-
-static u16 get_alias(struct device *dev)
-{
-       struct pci_dev *pdev = to_pci_dev(dev);
-       u16 devid, ivrs_alias, pci_alias;
-
-       devid = get_device_id(dev);
-       ivrs_alias = amd_iommu_alias_table[devid];
-       pci_for_each_dma_alias(pdev, __last_alias, &pci_alias);
-
-       if (ivrs_alias == pci_alias)
-               return ivrs_alias;
-
-       /*
-        * DMA alias showdown
-        *
-        * The IVRS is fairly reliable in telling us about aliases, but it
-        * can't know about every screwy device.  If we don't have an IVRS
-        * reported alias, use the PCI reported alias.  In that case we may
-        * still need to initialize the rlookup and dev_table entries if the
-        * alias is to a non-existent device.
-        */
-       if (ivrs_alias == devid) {
-               if (!amd_iommu_rlookup_table[pci_alias]) {
-                       amd_iommu_rlookup_table[pci_alias] =
-                               amd_iommu_rlookup_table[devid];
-                       memcpy(amd_iommu_dev_table[pci_alias].data,
-                              amd_iommu_dev_table[devid].data,
-                              sizeof(amd_iommu_dev_table[pci_alias].data));
-               }
-
-               return pci_alias;
-       }
-
-       pr_info("AMD-Vi: Using IVRS reported alias %02x:%02x.%d "
-               "for device %s[%04x:%04x], kernel reported alias "
-               "%02x:%02x.%d\n", PCI_BUS_NUM(ivrs_alias), PCI_SLOT(ivrs_alias),
-               PCI_FUNC(ivrs_alias), dev_name(dev), pdev->vendor, pdev->device,
-               PCI_BUS_NUM(pci_alias), PCI_SLOT(pci_alias),
-               PCI_FUNC(pci_alias));
-
-       /*
-        * If we don't have a PCI DMA alias and the IVRS alias is on the same
-        * bus, then the IVRS table may know about a quirk that we don't.
-        */
-       if (pci_alias == devid &&
-           PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) {
-               pdev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN;
-               pdev->dma_alias_devfn = ivrs_alias & 0xff;
-               pr_info("AMD-Vi: Added PCI DMA alias %02x.%d for %s\n",
-                       PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias),
-                       dev_name(dev));
-       }
-
-       return ivrs_alias;
-}
-
 static int iommu_init_device(struct device *dev)
 {
        struct pci_dev *pdev = to_pci_dev(dev);
        struct iommu_dev_data *dev_data;
-       u16 alias;
 
        if (dev->archdata.iommu)
                return 0;
@@ -386,24 +308,6 @@ static int iommu_init_device(struct device *dev)
        if (!dev_data)
                return -ENOMEM;
 
-       alias = get_alias(dev);
-
-       if (alias != dev_data->devid) {
-               struct iommu_dev_data *alias_data;
-
-               alias_data = find_dev_data(alias);
-               if (alias_data == NULL) {
-                       pr_err("AMD-Vi: Warning: Unhandled device %s\n",
-                                       dev_name(dev));
-                       free_dev_data(dev_data);
-                       return -ENOTSUPP;
-               }
-               dev_data->alias_data = alias_data;
-
-               /* Add device to the alias_list */
-               list_add(&dev_data->alias_list, &alias_data->alias_list);
-       }
-
        if (pci_iommuv2_capable(pdev)) {
                struct amd_iommu *iommu;
 
@@ -445,9 +349,6 @@ static void iommu_uninit_device(struct device *dev)
 
        iommu_group_remove_device(dev);
 
-       /* Unlink from alias, it may change if another device is re-plugged */
-       dev_data->alias_data = NULL;
-
        /* Remove dma-ops */
        dev->archdata.dma_ops = NULL;
 
@@ -633,7 +534,7 @@ static void iommu_poll_events(struct amd_iommu *iommu)
 
        while (head != tail) {
                iommu_print_event(iommu, iommu->evt_buf + head);
-               head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size;
+               head = (head + EVENT_ENTRY_SIZE) % EVT_BUFFER_SIZE;
        }
 
        writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
@@ -783,7 +684,7 @@ static void copy_cmd_to_buffer(struct amd_iommu *iommu,
        u8 *target;
 
        target = iommu->cmd_buf + tail;
-       tail   = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
+       tail   = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
 
        /* Copy command to buffer */
        memcpy(target, cmd, sizeof(*cmd));
@@ -950,15 +851,13 @@ static int iommu_queue_command_sync(struct amd_iommu *iommu,
        u32 left, tail, head, next_tail;
        unsigned long flags;
 
-       WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED);
-
 again:
        spin_lock_irqsave(&iommu->lock, flags);
 
        head      = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
        tail      = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
-       next_tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
-       left      = (head - next_tail) % iommu->cmd_buf_size;
+       next_tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
+       left      = (head - next_tail) % CMD_BUFFER_SIZE;
 
        if (left <= 2) {
                struct iommu_cmd sync_cmd;
@@ -1114,11 +1013,15 @@ static int device_flush_iotlb(struct iommu_dev_data *dev_data,
 static int device_flush_dte(struct iommu_dev_data *dev_data)
 {
        struct amd_iommu *iommu;
+       u16 alias;
        int ret;
 
        iommu = amd_iommu_rlookup_table[dev_data->devid];
+       alias = amd_iommu_alias_table[dev_data->devid];
 
        ret = iommu_flush_dte(iommu, dev_data->devid);
+       if (!ret && alias != dev_data->devid)
+               ret = iommu_flush_dte(iommu, alias);
        if (ret)
                return ret;
 
@@ -1984,27 +1887,33 @@ static void do_attach(struct iommu_dev_data *dev_data,
                      struct protection_domain *domain)
 {
        struct amd_iommu *iommu;
+       u16 alias;
        bool ats;
 
        iommu = amd_iommu_rlookup_table[dev_data->devid];
+       alias = amd_iommu_alias_table[dev_data->devid];
        ats   = dev_data->ats.enabled;
 
        /* Update data structures */
        dev_data->domain = domain;
        list_add(&dev_data->list, &domain->dev_list);
-       set_dte_entry(dev_data->devid, domain, ats);
 
        /* Do reference counting */
        domain->dev_iommu[iommu->index] += 1;
        domain->dev_cnt                 += 1;
 
-       /* Flush the DTE entry */
+       /* Update device table */
+       set_dte_entry(dev_data->devid, domain, ats);
+       if (alias != dev_data->devid)
+               set_dte_entry(dev_data->devid, domain, ats);
+
        device_flush_dte(dev_data);
 }
 
 static void do_detach(struct iommu_dev_data *dev_data)
 {
        struct amd_iommu *iommu;
+       u16 alias;
 
        /*
         * First check if the device is still attached. It might already
@@ -2016,6 +1925,7 @@ static void do_detach(struct iommu_dev_data *dev_data)
                return;
 
        iommu = amd_iommu_rlookup_table[dev_data->devid];
+       alias = amd_iommu_alias_table[dev_data->devid];
 
        /* decrease reference counters */
        dev_data->domain->dev_iommu[iommu->index] -= 1;
@@ -2025,6 +1935,8 @@ static void do_detach(struct iommu_dev_data *dev_data)
        dev_data->domain = NULL;
        list_del(&dev_data->list);
        clear_dte_entry(dev_data->devid);
+       if (alias != dev_data->devid)
+               clear_dte_entry(alias);
 
        /* Flush the DTE entry */
        device_flush_dte(dev_data);
@@ -2037,29 +1949,23 @@ static void do_detach(struct iommu_dev_data *dev_data)
 static int __attach_device(struct iommu_dev_data *dev_data,
                           struct protection_domain *domain)
 {
-       struct iommu_dev_data *head, *entry;
        int ret;
 
+       /*
+        * Must be called with IRQs disabled. Warn here to detect early
+        * when its not.
+        */
+       WARN_ON(!irqs_disabled());
+
        /* lock domain */
        spin_lock(&domain->lock);
 
-       head = dev_data;
-
-       if (head->alias_data != NULL)
-               head = head->alias_data;
-
-       /* Now we have the root of the alias group, if any */
-
        ret = -EBUSY;
-       if (head->domain != NULL)
+       if (dev_data->domain != NULL)
                goto out_unlock;
 
        /* Attach alias group root */
-       do_attach(head, domain);
-
-       /* Attach other devices in the alias group */
-       list_for_each_entry(entry, &head->alias_list, alias_list)
-               do_attach(entry, domain);
+       do_attach(dev_data, domain);
 
        ret = 0;
 
@@ -2209,26 +2115,24 @@ static int attach_device(struct device *dev,
  */
 static void __detach_device(struct iommu_dev_data *dev_data)
 {
-       struct iommu_dev_data *head, *entry;
        struct protection_domain *domain;
-       unsigned long flags;
 
-       BUG_ON(!dev_data->domain);
-
-       domain = dev_data->domain;
+       /*
+        * Must be called with IRQs disabled. Warn here to detect early
+        * when its not.
+        */
+       WARN_ON(!irqs_disabled());
 
-       spin_lock_irqsave(&domain->lock, flags);
+       if (WARN_ON(!dev_data->domain))
+               return;
 
-       head = dev_data;
-       if (head->alias_data != NULL)
-               head = head->alias_data;
+       domain = dev_data->domain;
 
-       list_for_each_entry(entry, &head->alias_list, alias_list)
-               do_detach(entry);
+       spin_lock(&domain->lock);
 
-       do_detach(head);
+       do_detach(dev_data);
 
-       spin_unlock_irqrestore(&domain->lock, flags);
+       spin_unlock(&domain->lock);
 }
 
 /*
@@ -3198,6 +3102,7 @@ static const struct iommu_ops amd_iommu_ops = {
        .iova_to_phys = amd_iommu_iova_to_phys,
        .add_device = amd_iommu_add_device,
        .remove_device = amd_iommu_remove_device,
+       .device_group = pci_device_group,
        .get_dm_regions = amd_iommu_get_dm_regions,
        .put_dm_regions = amd_iommu_put_dm_regions,
        .pgsize_bitmap  = AMD_IOMMU_PGSIZES,
index 1b066e7..a7cc399 100644 (file)
@@ -407,20 +407,6 @@ static inline int ivhd_entry_length(u8 *ivhd)
        return 0x04 << (*ivhd >> 6);
 }
 
-/*
- * This function reads the last device id the IOMMU has to handle from the PCI
- * capability header for this IOMMU
- */
-static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr)
-{
-       u32 cap;
-
-       cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
-       update_last_devid(PCI_DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap)));
-
-       return 0;
-}
-
 /*
  * After reading the highest device id from the IOMMU PCI capability header
  * this function looks if there is a higher device id defined in the ACPI table
@@ -433,14 +419,13 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
        p += sizeof(*h);
        end += h->length;
 
-       find_last_devid_on_pci(PCI_BUS_NUM(h->devid),
-                       PCI_SLOT(h->devid),
-                       PCI_FUNC(h->devid),
-                       h->cap_ptr);
-
        while (p < end) {
                dev = (struct ivhd_entry *)p;
                switch (dev->type) {
+               case IVHD_DEV_ALL:
+                       /* Use maximum BDF value for DEV_ALL */
+                       update_last_devid(0xffff);
+                       break;
                case IVHD_DEV_SELECT:
                case IVHD_DEV_RANGE_END:
                case IVHD_DEV_ALIAS:
@@ -513,17 +498,12 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table)
  * write commands to that buffer later and the IOMMU will execute them
  * asynchronously
  */
-static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
+static int __init alloc_command_buffer(struct amd_iommu *iommu)
 {
-       u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
-                       get_order(CMD_BUFFER_SIZE));
-
-       if (cmd_buf == NULL)
-               return NULL;
-
-       iommu->cmd_buf_size = CMD_BUFFER_SIZE | CMD_BUFFER_UNINITIALIZED;
+       iommu->cmd_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+                                                 get_order(CMD_BUFFER_SIZE));
 
-       return cmd_buf;
+       return iommu->cmd_buf ? 0 : -ENOMEM;
 }
 
 /*
@@ -557,27 +537,20 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu)
                    &entry, sizeof(entry));
 
        amd_iommu_reset_cmd_buffer(iommu);
-       iommu->cmd_buf_size &= ~(CMD_BUFFER_UNINITIALIZED);
 }
 
 static void __init free_command_buffer(struct amd_iommu *iommu)
 {
-       free_pages((unsigned long)iommu->cmd_buf,
-                  get_order(iommu->cmd_buf_size & ~(CMD_BUFFER_UNINITIALIZED)));
+       free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
 }
 
 /* allocates the memory where the IOMMU will log its events to */
-static u8 * __init alloc_event_buffer(struct amd_iommu *iommu)
+static int __init alloc_event_buffer(struct amd_iommu *iommu)
 {
-       iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
-                                               get_order(EVT_BUFFER_SIZE));
+       iommu->evt_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+                                                 get_order(EVT_BUFFER_SIZE));
 
-       if (iommu->evt_buf == NULL)
-               return NULL;
-
-       iommu->evt_buf_size = EVT_BUFFER_SIZE;
-
-       return iommu->evt_buf;
+       return iommu->evt_buf ? 0 : -ENOMEM;
 }
 
 static void iommu_enable_event_buffer(struct amd_iommu *iommu)
@@ -604,15 +577,12 @@ static void __init free_event_buffer(struct amd_iommu *iommu)
 }
 
 /* allocates the memory where the IOMMU will log its events to */
-static u8 * __init alloc_ppr_log(struct amd_iommu *iommu)
+static int __init alloc_ppr_log(struct amd_iommu *iommu)
 {
-       iommu->ppr_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
-                                               get_order(PPR_LOG_SIZE));
-
-       if (iommu->ppr_log == NULL)
-               return NULL;
+       iommu->ppr_log = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+                                                 get_order(PPR_LOG_SIZE));
 
-       return iommu->ppr_log;
+       return iommu->ppr_log ? 0 : -ENOMEM;
 }
 
 static void iommu_enable_ppr_log(struct amd_iommu *iommu)
@@ -835,20 +805,10 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
                switch (e->type) {
                case IVHD_DEV_ALL:
 
-                       DUMP_printk("  DEV_ALL\t\t\t first devid: %02x:%02x.%x"
-                                   " last device %02x:%02x.%x flags: %02x\n",
-                                   PCI_BUS_NUM(iommu->first_device),
-                                   PCI_SLOT(iommu->first_device),
-                                   PCI_FUNC(iommu->first_device),
-                                   PCI_BUS_NUM(iommu->last_device),
-                                   PCI_SLOT(iommu->last_device),
-                                   PCI_FUNC(iommu->last_device),
-                                   e->flags);
+                       DUMP_printk("  DEV_ALL\t\t\tflags: %02x\n", e->flags);
 
-                       for (dev_i = iommu->first_device;
-                                       dev_i <= iommu->last_device; ++dev_i)
-                               set_dev_entry_from_acpi(iommu, dev_i,
-                                                       e->flags, 0);
+                       for (dev_i = 0; dev_i <= amd_iommu_last_bdf; ++dev_i)
+                               set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0);
                        break;
                case IVHD_DEV_SELECT:
 
@@ -1004,17 +964,6 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
        return 0;
 }
 
-/* Initializes the device->iommu mapping for the driver */
-static int __init init_iommu_devices(struct amd_iommu *iommu)
-{
-       u32 i;
-
-       for (i = iommu->first_device; i <= iommu->last_device; ++i)
-               set_iommu_for_device(iommu, i);
-
-       return 0;
-}
-
 static void __init free_iommu_one(struct amd_iommu *iommu)
 {
        free_command_buffer(iommu);
@@ -1111,12 +1060,10 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
        if (!iommu->mmio_base)
                return -ENOMEM;
 
-       iommu->cmd_buf = alloc_command_buffer(iommu);
-       if (!iommu->cmd_buf)
+       if (alloc_command_buffer(iommu))
                return -ENOMEM;
 
-       iommu->evt_buf = alloc_event_buffer(iommu);
-       if (!iommu->evt_buf)
+       if (alloc_event_buffer(iommu))
                return -ENOMEM;
 
        iommu->int_enabled = false;
@@ -1135,8 +1082,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
         */
        amd_iommu_rlookup_table[iommu->devid] = NULL;
 
-       init_iommu_devices(iommu);
-
        return 0;
 }
 
@@ -1266,11 +1211,6 @@ static int iommu_init_pci(struct amd_iommu *iommu)
        pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET,
                              &misc);
 
-       iommu->first_device = PCI_DEVID(MMIO_GET_BUS(range),
-                                        MMIO_GET_FD(range));
-       iommu->last_device = PCI_DEVID(MMIO_GET_BUS(range),
-                                       MMIO_GET_LD(range));
-
        if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
                amd_iommu_iotlb_sup = false;
 
@@ -1308,11 +1248,8 @@ static int iommu_init_pci(struct amd_iommu *iommu)
                amd_iommu_v2_present = true;
        }
 
-       if (iommu_feature(iommu, FEATURE_PPR)) {
-               iommu->ppr_log = alloc_ppr_log(iommu);
-               if (!iommu->ppr_log)
-                       return -ENOMEM;
-       }
+       if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu))
+               return -ENOMEM;
 
        if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
                amd_iommu_np_cache = true;
@@ -1758,11 +1695,8 @@ static void __init free_on_init_error(void)
        free_pages((unsigned long)irq_lookup_table,
                   get_order(rlookup_table_size));
 
-       if (amd_iommu_irq_cache) {
-               kmem_cache_destroy(amd_iommu_irq_cache);
-               amd_iommu_irq_cache = NULL;
-
-       }
+       kmem_cache_destroy(amd_iommu_irq_cache);
+       amd_iommu_irq_cache = NULL;
 
        free_pages((unsigned long)amd_iommu_rlookup_table,
                   get_order(rlookup_table_size));
@@ -2201,7 +2135,7 @@ int __init amd_iommu_detect(void)
        iommu_detected = 1;
        x86_init.iommu.iommu_init = amd_iommu_init;
 
-       return 0;
+       return 1;
 }
 
 /****************************************************************************
index c9b6472..08166ae 100644 (file)
 #define IOMMU_PTE_IR (1ULL << 61)
 #define IOMMU_PTE_IW (1ULL << 62)
 
+#define DTE_FLAG_IOTLB (1ULL << 32)
+#define DTE_FLAG_GV    (1ULL << 55)
 #define DTE_FLAG_MASK  (0x3ffULL << 32)
-#define DTE_FLAG_IOTLB (0x01UL << 32)
-#define DTE_FLAG_GV    (0x01ULL << 55)
 #define DTE_GLX_SHIFT  (56)
 #define DTE_GLX_MASK   (3)
 
@@ -517,11 +517,6 @@ struct amd_iommu {
        /* pci domain of this IOMMU */
        u16 pci_seg;
 
-       /* first device this IOMMU handles. read from PCI */
-       u16 first_device;
-       /* last device this IOMMU handles. read from PCI */
-       u16 last_device;
-
        /* start of exclusion range of that IOMMU */
        u64 exclusion_start;
        /* length of exclusion range of that IOMMU */
@@ -529,11 +524,7 @@ struct amd_iommu {
 
        /* command buffer virtual address */
        u8 *cmd_buf;
-       /* size of command buffer */
-       u32 cmd_buf_size;
 
-       /* size of event buffer */
-       u32 evt_buf_size;
        /* event buffer virtual address */
        u8 *evt_buf;
 
index 286e890..4e5118a 100644 (file)
 #include <linux/iommu.h>
 #include <linux/iopoll.h>
 #include <linux/module.h>
+#include <linux/msi.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include <linux/of_platform.h>
 #include <linux/pci.h>
 #include <linux/platform_device.h>
 
@@ -403,6 +405,31 @@ enum pri_resp {
        PRI_RESP_SUCC,
 };
 
+enum arm_smmu_msi_index {
+       EVTQ_MSI_INDEX,
+       GERROR_MSI_INDEX,
+       PRIQ_MSI_INDEX,
+       ARM_SMMU_MAX_MSIS,
+};
+
+static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
+       [EVTQ_MSI_INDEX] = {
+               ARM_SMMU_EVTQ_IRQ_CFG0,
+               ARM_SMMU_EVTQ_IRQ_CFG1,
+               ARM_SMMU_EVTQ_IRQ_CFG2,
+       },
+       [GERROR_MSI_INDEX] = {
+               ARM_SMMU_GERROR_IRQ_CFG0,
+               ARM_SMMU_GERROR_IRQ_CFG1,
+               ARM_SMMU_GERROR_IRQ_CFG2,
+       },
+       [PRIQ_MSI_INDEX] = {
+               ARM_SMMU_PRIQ_IRQ_CFG0,
+               ARM_SMMU_PRIQ_IRQ_CFG1,
+               ARM_SMMU_PRIQ_IRQ_CFG2,
+       },
+};
+
 struct arm_smmu_cmdq_ent {
        /* Common fields */
        u8                              opcode;
@@ -570,7 +597,6 @@ struct arm_smmu_device {
        unsigned int                    sid_bits;
 
        struct arm_smmu_strtab_cfg      strtab_cfg;
-       struct list_head                list;
 };
 
 /* SMMU private data for an IOMMU group */
@@ -605,10 +631,6 @@ struct arm_smmu_domain {
        struct iommu_domain             domain;
 };
 
-/* Our list of SMMU instances */
-static DEFINE_SPINLOCK(arm_smmu_devices_lock);
-static LIST_HEAD(arm_smmu_devices);
-
 struct arm_smmu_option_prop {
        u32 opt;
        const char *prop;
@@ -1427,7 +1449,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
                                       struct io_pgtable_cfg *pgtbl_cfg)
 {
        int ret;
-       u16 asid;
+       int asid;
        struct arm_smmu_device *smmu = smmu_domain->smmu;
        struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
 
@@ -1439,10 +1461,11 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
                                         &cfg->cdptr_dma, GFP_KERNEL);
        if (!cfg->cdptr) {
                dev_warn(smmu->dev, "failed to allocate context descriptor\n");
+               ret = -ENOMEM;
                goto out_free_asid;
        }
 
-       cfg->cd.asid    = asid;
+       cfg->cd.asid    = (u16)asid;
        cfg->cd.ttbr    = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
        cfg->cd.tcr     = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
        cfg->cd.mair    = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
@@ -1456,7 +1479,7 @@ out_free_asid:
 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
                                       struct io_pgtable_cfg *pgtbl_cfg)
 {
-       u16 vmid;
+       int vmid;
        struct arm_smmu_device *smmu = smmu_domain->smmu;
        struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
 
@@ -1464,7 +1487,7 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
        if (IS_ERR_VALUE(vmid))
                return vmid;
 
-       cfg->vmid       = vmid;
+       cfg->vmid       = (u16)vmid;
        cfg->vttbr      = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
        cfg->vtcr       = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
        return 0;
@@ -1726,7 +1749,8 @@ static void __arm_smmu_release_pci_iommudata(void *data)
 static struct arm_smmu_device *arm_smmu_get_for_pci_dev(struct pci_dev *pdev)
 {
        struct device_node *of_node;
-       struct arm_smmu_device *curr, *smmu = NULL;
+       struct platform_device *smmu_pdev;
+       struct arm_smmu_device *smmu = NULL;
        struct pci_bus *bus = pdev->bus;
 
        /* Walk up to the root bus */
@@ -1739,14 +1763,10 @@ static struct arm_smmu_device *arm_smmu_get_for_pci_dev(struct pci_dev *pdev)
                return NULL;
 
        /* See if we can find an SMMU corresponding to the phandle */
-       spin_lock(&arm_smmu_devices_lock);
-       list_for_each_entry(curr, &arm_smmu_devices, list) {
-               if (curr->dev->of_node == of_node) {
-                       smmu = curr;
-                       break;
-               }
-       }
-       spin_unlock(&arm_smmu_devices_lock);
+       smmu_pdev = of_find_device_by_node(of_node);
+       if (smmu_pdev)
+               smmu = platform_get_drvdata(smmu_pdev);
+
        of_node_put(of_node);
        return smmu;
 }
@@ -1902,6 +1922,7 @@ static struct iommu_ops arm_smmu_ops = {
        .iova_to_phys           = arm_smmu_iova_to_phys,
        .add_device             = arm_smmu_add_device,
        .remove_device          = arm_smmu_remove_device,
+       .device_group           = pci_device_group,
        .domain_get_attr        = arm_smmu_domain_get_attr,
        .domain_set_attr        = arm_smmu_domain_set_attr,
        .pgsize_bitmap          = -1UL, /* Restricted during device attach */
@@ -2186,6 +2207,72 @@ static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
                                          1, ARM_SMMU_POLL_TIMEOUT_US);
 }
 
+static void arm_smmu_free_msis(void *data)
+{
+       struct device *dev = data;
+       platform_msi_domain_free_irqs(dev);
+}
+
+static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
+{
+       phys_addr_t doorbell;
+       struct device *dev = msi_desc_to_dev(desc);
+       struct arm_smmu_device *smmu = dev_get_drvdata(dev);
+       phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
+
+       doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
+       doorbell &= MSI_CFG0_ADDR_MASK << MSI_CFG0_ADDR_SHIFT;
+
+       writeq_relaxed(doorbell, smmu->base + cfg[0]);
+       writel_relaxed(msg->data, smmu->base + cfg[1]);
+       writel_relaxed(MSI_CFG2_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
+}
+
+static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
+{
+       struct msi_desc *desc;
+       int ret, nvec = ARM_SMMU_MAX_MSIS;
+       struct device *dev = smmu->dev;
+
+       /* Clear the MSI address regs */
+       writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
+       writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
+
+       if (smmu->features & ARM_SMMU_FEAT_PRI)
+               writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
+       else
+               nvec--;
+
+       if (!(smmu->features & ARM_SMMU_FEAT_MSI))
+               return;
+
+       /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
+       ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
+       if (ret) {
+               dev_warn(dev, "failed to allocate MSIs\n");
+               return;
+       }
+
+       for_each_msi_entry(desc, dev) {
+               switch (desc->platform.msi_index) {
+               case EVTQ_MSI_INDEX:
+                       smmu->evtq.q.irq = desc->irq;
+                       break;
+               case GERROR_MSI_INDEX:
+                       smmu->gerr_irq = desc->irq;
+                       break;
+               case PRIQ_MSI_INDEX:
+                       smmu->priq.q.irq = desc->irq;
+                       break;
+               default:        /* Unknown */
+                       continue;
+               }
+       }
+
+       /* Add callback to free MSIs on teardown */
+       devm_add_action(dev, arm_smmu_free_msis, dev);
+}
+
 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
 {
        int ret, irq;
@@ -2199,11 +2286,9 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
                return ret;
        }
 
-       /* Clear the MSI address regs */
-       writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
-       writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
+       arm_smmu_setup_msis(smmu);
 
-       /* Request wired interrupt lines */
+       /* Request interrupt lines */
        irq = smmu->evtq.q.irq;
        if (irq) {
                ret = devm_request_threaded_irq(smmu->dev, irq,
@@ -2232,8 +2317,6 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
        }
 
        if (smmu->features & ARM_SMMU_FEAT_PRI) {
-               writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
-
                irq = smmu->priq.q.irq;
                if (irq) {
                        ret = devm_request_threaded_irq(smmu->dev, irq,
@@ -2612,16 +2695,14 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
        if (ret)
                return ret;
 
+       /* Record our private device structure */
+       platform_set_drvdata(pdev, smmu);
+
        /* Reset the device */
        ret = arm_smmu_device_reset(smmu);
        if (ret)
                goto out_free_structures;
 
-       /* Record our private device structure */
-       INIT_LIST_HEAD(&smmu->list);
-       spin_lock(&arm_smmu_devices_lock);
-       list_add(&smmu->list, &arm_smmu_devices);
-       spin_unlock(&arm_smmu_devices_lock);
        return 0;
 
 out_free_structures:
@@ -2631,21 +2712,7 @@ out_free_structures:
 
 static int arm_smmu_device_remove(struct platform_device *pdev)
 {
-       struct arm_smmu_device *curr, *smmu = NULL;
-       struct device *dev = &pdev->dev;
-
-       spin_lock(&arm_smmu_devices_lock);
-       list_for_each_entry(curr, &arm_smmu_devices, list) {
-               if (curr->dev == dev) {
-                       smmu = curr;
-                       list_del(&smmu->list);
-                       break;
-               }
-       }
-       spin_unlock(&arm_smmu_devices_lock);
-
-       if (!smmu)
-               return -ENODEV;
+       struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
 
        arm_smmu_device_disable(smmu);
        arm_smmu_free_structures(smmu);
index 48a39df..47dc7a7 100644 (file)
                ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS)       \
                        ? 0x400 : 0))
 
+#ifdef CONFIG_64BIT
+#define smmu_writeq    writeq_relaxed
+#else
+#define smmu_writeq(reg64, addr)                               \
+       do {                                                    \
+               u64 __val = (reg64);                            \
+               void __iomem *__addr = (addr);                  \
+               writel_relaxed(__val >> 32, __addr + 4);        \
+               writel_relaxed(__val, __addr);                  \
+       } while (0)
+#endif
+
 /* Configuration registers */
 #define ARM_SMMU_GR0_sCR0              0x0
 #define sCR0_CLIENTPD                  (1 << 0)
 #define ARM_SMMU_CB_SCTLR              0x0
 #define ARM_SMMU_CB_RESUME             0x8
 #define ARM_SMMU_CB_TTBCR2             0x10
-#define ARM_SMMU_CB_TTBR0_LO           0x20
-#define ARM_SMMU_CB_TTBR0_HI           0x24
-#define ARM_SMMU_CB_TTBR1_LO           0x28
-#define ARM_SMMU_CB_TTBR1_HI           0x2c
+#define ARM_SMMU_CB_TTBR0              0x20
+#define ARM_SMMU_CB_TTBR1              0x28
 #define ARM_SMMU_CB_TTBCR              0x30
 #define ARM_SMMU_CB_S1_MAIR0           0x38
 #define ARM_SMMU_CB_S1_MAIR1           0x3c
 #define TTBCR2_SEP_SHIFT               15
 #define TTBCR2_SEP_UPSTREAM            (0x7 << TTBCR2_SEP_SHIFT)
 
-#define TTBRn_HI_ASID_SHIFT            16
+#define TTBRn_ASID_SHIFT               48
 
 #define FSR_MULTI                      (1 << 31)
 #define FSR_SS                         (1 << 30)
@@ -695,12 +705,12 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
                                       struct io_pgtable_cfg *pgtbl_cfg)
 {
        u32 reg;
+       u64 reg64;
        bool stage1;
        struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
        struct arm_smmu_device *smmu = smmu_domain->smmu;
-       void __iomem *cb_base, *gr0_base, *gr1_base;
+       void __iomem *cb_base, *gr1_base;
 
-       gr0_base = ARM_SMMU_GR0(smmu);
        gr1_base = ARM_SMMU_GR1(smmu);
        stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
        cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
@@ -738,22 +748,17 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
 
        /* TTBRs */
        if (stage1) {
-               reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
-               writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
-               reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0] >> 32;
-               reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT;
-               writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI);
-
-               reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
-               writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_LO);
-               reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1] >> 32;
-               reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT;
-               writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_HI);
+               reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
+
+               reg64 |= ((u64)ARM_SMMU_CB_ASID(cfg)) << TTBRn_ASID_SHIFT;
+               smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR0);
+
+               reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
+               reg64 |= ((u64)ARM_SMMU_CB_ASID(cfg)) << TTBRn_ASID_SHIFT;
+               smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR1);
        } else {
-               reg = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
-               writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
-               reg = pgtbl_cfg->arm_lpae_s2_cfg.vttbr >> 32;
-               writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI);
+               reg64 = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
+               smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR0);
        }
 
        /* TTBCR */
@@ -1212,17 +1217,15 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
 
        /* ATS1 registers can only be written atomically */
        va = iova & ~0xfffUL;
-#ifdef CONFIG_64BIT
        if (smmu->version == ARM_SMMU_V2)
-               writeq_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
+               smmu_writeq(va, cb_base + ARM_SMMU_CB_ATS1PR);
        else
-#endif
                writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
 
        if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
                                      !(tmp & ATSR_ACTIVE), 5, 50)) {
                dev_err(dev,
-                       "iova to phys timed out on 0x%pad. Falling back to software table walk.\n",
+                       "iova to phys timed out on %pad. Falling back to software table walk.\n",
                        &iova);
                return ops->iova_to_phys(ops, iova);
        }
@@ -1292,33 +1295,25 @@ static void __arm_smmu_release_pci_iommudata(void *data)
        kfree(data);
 }
 
-static int arm_smmu_add_pci_device(struct pci_dev *pdev)
+static int arm_smmu_init_pci_device(struct pci_dev *pdev,
+                                   struct iommu_group *group)
 {
-       int i, ret;
-       u16 sid;
-       struct iommu_group *group;
        struct arm_smmu_master_cfg *cfg;
-
-       group = iommu_group_get_for_dev(&pdev->dev);
-       if (IS_ERR(group))
-               return PTR_ERR(group);
+       u16 sid;
+       int i;
 
        cfg = iommu_group_get_iommudata(group);
        if (!cfg) {
                cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
-               if (!cfg) {
-                       ret = -ENOMEM;
-                       goto out_put_group;
-               }
+               if (!cfg)
+                       return -ENOMEM;
 
                iommu_group_set_iommudata(group, cfg,
                                          __arm_smmu_release_pci_iommudata);
        }
 
-       if (cfg->num_streamids >= MAX_MASTER_STREAMIDS) {
-               ret = -ENOSPC;
-               goto out_put_group;
-       }
+       if (cfg->num_streamids >= MAX_MASTER_STREAMIDS)
+               return -ENOSPC;
 
        /*
         * Assume Stream ID == Requester ID for now.
@@ -1334,16 +1329,13 @@ static int arm_smmu_add_pci_device(struct pci_dev *pdev)
                cfg->streamids[cfg->num_streamids++] = sid;
 
        return 0;
-out_put_group:
-       iommu_group_put(group);
-       return ret;
 }
 
-static int arm_smmu_add_platform_device(struct device *dev)
+static int arm_smmu_init_platform_device(struct device *dev,
+                                        struct iommu_group *group)
 {
-       struct iommu_group *group;
-       struct arm_smmu_master *master;
        struct arm_smmu_device *smmu = find_smmu_for_device(dev);
+       struct arm_smmu_master *master;
 
        if (!smmu)
                return -ENODEV;
@@ -1352,21 +1344,20 @@ static int arm_smmu_add_platform_device(struct device *dev)
        if (!master)
                return -ENODEV;
 
-       /* No automatic group creation for platform devices */
-       group = iommu_group_alloc();
-       if (IS_ERR(group))
-               return PTR_ERR(group);
-
        iommu_group_set_iommudata(group, &master->cfg, NULL);
-       return iommu_group_add_device(group, dev);
+
+       return 0;
 }
 
 static int arm_smmu_add_device(struct device *dev)
 {
-       if (dev_is_pci(dev))
-               return arm_smmu_add_pci_device(to_pci_dev(dev));
+       struct iommu_group *group;
+
+       group = iommu_group_get_for_dev(dev);
+       if (IS_ERR(group))
+               return PTR_ERR(group);
 
-       return arm_smmu_add_platform_device(dev);
+       return 0;
 }
 
 static void arm_smmu_remove_device(struct device *dev)
@@ -1374,6 +1365,32 @@ static void arm_smmu_remove_device(struct device *dev)
        iommu_group_remove_device(dev);
 }
 
+static struct iommu_group *arm_smmu_device_group(struct device *dev)
+{
+       struct iommu_group *group;
+       int ret;
+
+       if (dev_is_pci(dev))
+               group = pci_device_group(dev);
+       else
+               group = generic_device_group(dev);
+
+       if (IS_ERR(group))
+               return group;
+
+       if (dev_is_pci(dev))
+               ret = arm_smmu_init_pci_device(to_pci_dev(dev), group);
+       else
+               ret = arm_smmu_init_platform_device(dev, group);
+
+       if (ret) {
+               iommu_group_put(group);
+               group = ERR_PTR(ret);
+       }
+
+       return group;
+}
+
 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
                                    enum iommu_attr attr, void *data)
 {
@@ -1430,6 +1447,7 @@ static struct iommu_ops arm_smmu_ops = {
        .iova_to_phys           = arm_smmu_iova_to_phys,
        .add_device             = arm_smmu_add_device,
        .remove_device          = arm_smmu_remove_device,
+       .device_group           = arm_smmu_device_group,
        .domain_get_attr        = arm_smmu_domain_get_attr,
        .domain_set_attr        = arm_smmu_domain_set_attr,
        .pgsize_bitmap          = -1UL, /* Restricted during device attach */
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
new file mode 100644 (file)
index 0000000..3a20db4
--- /dev/null
@@ -0,0 +1,524 @@
+/*
+ * A fairly generic DMA-API to IOMMU-API glue layer.
+ *
+ * Copyright (C) 2014-2015 ARM Ltd.
+ *
+ * based in part on arch/arm/mm/dma-mapping.c:
+ * Copyright (C) 2000-2004 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/device.h>
+#include <linux/dma-iommu.h>
+#include <linux/huge_mm.h>
+#include <linux/iommu.h>
+#include <linux/iova.h>
+#include <linux/mm.h>
+
+int iommu_dma_init(void)
+{
+       return iova_cache_get();
+}
+
+/**
+ * iommu_get_dma_cookie - Acquire DMA-API resources for a domain
+ * @domain: IOMMU domain to prepare for DMA-API usage
+ *
+ * IOMMU drivers should normally call this from their domain_alloc
+ * callback when domain->type == IOMMU_DOMAIN_DMA.
+ */
+int iommu_get_dma_cookie(struct iommu_domain *domain)
+{
+       struct iova_domain *iovad;
+
+       if (domain->iova_cookie)
+               return -EEXIST;
+
+       iovad = kzalloc(sizeof(*iovad), GFP_KERNEL);
+       domain->iova_cookie = iovad;
+
+       return iovad ? 0 : -ENOMEM;
+}
+EXPORT_SYMBOL(iommu_get_dma_cookie);
+
+/**
+ * iommu_put_dma_cookie - Release a domain's DMA mapping resources
+ * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
+ *
+ * IOMMU drivers should normally call this from their domain_free callback.
+ */
+void iommu_put_dma_cookie(struct iommu_domain *domain)
+{
+       struct iova_domain *iovad = domain->iova_cookie;
+
+       if (!iovad)
+               return;
+
+       put_iova_domain(iovad);
+       kfree(iovad);
+       domain->iova_cookie = NULL;
+}
+EXPORT_SYMBOL(iommu_put_dma_cookie);
+
+/**
+ * iommu_dma_init_domain - Initialise a DMA mapping domain
+ * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
+ * @base: IOVA at which the mappable address space starts
+ * @size: Size of IOVA space
+ *
+ * @base and @size should be exact multiples of IOMMU page granularity to
+ * avoid rounding surprises. If necessary, we reserve the page at address 0
+ * to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but
+ * any change which could make prior IOVAs invalid will fail.
+ */
+int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, u64 size)
+{
+       struct iova_domain *iovad = domain->iova_cookie;
+       unsigned long order, base_pfn, end_pfn;
+
+       if (!iovad)
+               return -ENODEV;
+
+       /* Use the smallest supported page size for IOVA granularity */
+       order = __ffs(domain->ops->pgsize_bitmap);
+       base_pfn = max_t(unsigned long, 1, base >> order);
+       end_pfn = (base + size - 1) >> order;
+
+       /* Check the domain allows at least some access to the device... */
+       if (domain->geometry.force_aperture) {
+               if (base > domain->geometry.aperture_end ||
+                   base + size <= domain->geometry.aperture_start) {
+                       pr_warn("specified DMA range outside IOMMU capability\n");
+                       return -EFAULT;
+               }
+               /* ...then finally give it a kicking to make sure it fits */
+               base_pfn = max_t(unsigned long, base_pfn,
+                               domain->geometry.aperture_start >> order);
+               end_pfn = min_t(unsigned long, end_pfn,
+                               domain->geometry.aperture_end >> order);
+       }
+
+       /* All we can safely do with an existing domain is enlarge it */
+       if (iovad->start_pfn) {
+               if (1UL << order != iovad->granule ||
+                   base_pfn != iovad->start_pfn ||
+                   end_pfn < iovad->dma_32bit_pfn) {
+                       pr_warn("Incompatible range for DMA domain\n");
+                       return -EFAULT;
+               }
+               iovad->dma_32bit_pfn = end_pfn;
+       } else {
+               init_iova_domain(iovad, 1UL << order, base_pfn, end_pfn);
+       }
+       return 0;
+}
+EXPORT_SYMBOL(iommu_dma_init_domain);
+
+/**
+ * dma_direction_to_prot - Translate DMA API directions to IOMMU API page flags
+ * @dir: Direction of DMA transfer
+ * @coherent: Is the DMA master cache-coherent?
+ *
+ * Return: corresponding IOMMU API page protection flags
+ */
+int dma_direction_to_prot(enum dma_data_direction dir, bool coherent)
+{
+       int prot = coherent ? IOMMU_CACHE : 0;
+
+       switch (dir) {
+       case DMA_BIDIRECTIONAL:
+               return prot | IOMMU_READ | IOMMU_WRITE;
+       case DMA_TO_DEVICE:
+               return prot | IOMMU_READ;
+       case DMA_FROM_DEVICE:
+               return prot | IOMMU_WRITE;
+       default:
+               return 0;
+       }
+}
+
+static struct iova *__alloc_iova(struct iova_domain *iovad, size_t size,
+               dma_addr_t dma_limit)
+{
+       unsigned long shift = iova_shift(iovad);
+       unsigned long length = iova_align(iovad, size) >> shift;
+
+       /*
+        * Enforce size-alignment to be safe - there could perhaps be an
+        * attribute to control this per-device, or at least per-domain...
+        */
+       return alloc_iova(iovad, length, dma_limit >> shift, true);
+}
+
+/* The IOVA allocator knows what we mapped, so just unmap whatever that was */
+static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr)
+{
+       struct iova_domain *iovad = domain->iova_cookie;
+       unsigned long shift = iova_shift(iovad);
+       unsigned long pfn = dma_addr >> shift;
+       struct iova *iova = find_iova(iovad, pfn);
+       size_t size;
+
+       if (WARN_ON(!iova))
+               return;
+
+       size = iova_size(iova) << shift;
+       size -= iommu_unmap(domain, pfn << shift, size);
+       /* ...and if we can't, then something is horribly, horribly wrong */
+       WARN_ON(size > 0);
+       __free_iova(iovad, iova);
+}
+
+static void __iommu_dma_free_pages(struct page **pages, int count)
+{
+       while (count--)
+               __free_page(pages[count]);
+       kvfree(pages);
+}
+
+static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
+{
+       struct page **pages;
+       unsigned int i = 0, array_size = count * sizeof(*pages);
+
+       if (array_size <= PAGE_SIZE)
+               pages = kzalloc(array_size, GFP_KERNEL);
+       else
+               pages = vzalloc(array_size);
+       if (!pages)
+               return NULL;
+
+       /* IOMMU can map any pages, so himem can also be used here */
+       gfp |= __GFP_NOWARN | __GFP_HIGHMEM;
+
+       while (count) {
+               struct page *page = NULL;
+               int j, order = __fls(count);
+
+               /*
+                * Higher-order allocations are a convenience rather
+                * than a necessity, hence using __GFP_NORETRY until
+                * falling back to single-page allocations.
+                */
+               for (order = min(order, MAX_ORDER); order > 0; order--) {
+                       page = alloc_pages(gfp | __GFP_NORETRY, order);
+                       if (!page)
+                               continue;
+                       if (PageCompound(page)) {
+                               if (!split_huge_page(page))
+                                       break;
+                               __free_pages(page, order);
+                       } else {
+                               split_page(page, order);
+                               break;
+                       }
+               }
+               if (!page)
+                       page = alloc_page(gfp);
+               if (!page) {
+                       __iommu_dma_free_pages(pages, i);
+                       return NULL;
+               }
+               j = 1 << order;
+               count -= j;
+               while (j--)
+                       pages[i++] = page++;
+       }
+       return pages;
+}
+
+/**
+ * iommu_dma_free - Free a buffer allocated by iommu_dma_alloc()
+ * @dev: Device which owns this buffer
+ * @pages: Array of buffer pages as returned by iommu_dma_alloc()
+ * @size: Size of buffer in bytes
+ * @handle: DMA address of buffer
+ *
+ * Frees both the pages associated with the buffer, and the array
+ * describing them
+ */
+void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
+               dma_addr_t *handle)
+{
+       __iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle);
+       __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
+       *handle = DMA_ERROR_CODE;
+}
+
+/**
+ * iommu_dma_alloc - Allocate and map a buffer contiguous in IOVA space
+ * @dev: Device to allocate memory for. Must be a real device
+ *      attached to an iommu_dma_domain
+ * @size: Size of buffer in bytes
+ * @gfp: Allocation flags
+ * @prot: IOMMU mapping flags
+ * @handle: Out argument for allocated DMA handle
+ * @flush_page: Arch callback which must ensure PAGE_SIZE bytes from the
+ *             given VA/PA are visible to the given non-coherent device.
+ *
+ * If @size is less than PAGE_SIZE, then a full CPU page will be allocated,
+ * but an IOMMU which supports smaller pages might not map the whole thing.
+ *
+ * Return: Array of struct page pointers describing the buffer,
+ *        or NULL on failure.
+ */
+struct page **iommu_dma_alloc(struct device *dev, size_t size,
+               gfp_t gfp, int prot, dma_addr_t *handle,
+               void (*flush_page)(struct device *, const void *, phys_addr_t))
+{
+       struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+       struct iova_domain *iovad = domain->iova_cookie;
+       struct iova *iova;
+       struct page **pages;
+       struct sg_table sgt;
+       dma_addr_t dma_addr;
+       unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+       *handle = DMA_ERROR_CODE;
+
+       pages = __iommu_dma_alloc_pages(count, gfp);
+       if (!pages)
+               return NULL;
+
+       iova = __alloc_iova(iovad, size, dev->coherent_dma_mask);
+       if (!iova)
+               goto out_free_pages;
+
+       size = iova_align(iovad, size);
+       if (sg_alloc_table_from_pages(&sgt, pages, count, 0, size, GFP_KERNEL))
+               goto out_free_iova;
+
+       if (!(prot & IOMMU_CACHE)) {
+               struct sg_mapping_iter miter;
+               /*
+                * The CPU-centric flushing implied by SG_MITER_TO_SG isn't
+                * sufficient here, so skip it by using the "wrong" direction.
+                */
+               sg_miter_start(&miter, sgt.sgl, sgt.orig_nents, SG_MITER_FROM_SG);
+               while (sg_miter_next(&miter))
+                       flush_page(dev, miter.addr, page_to_phys(miter.page));
+               sg_miter_stop(&miter);
+       }
+
+       dma_addr = iova_dma_addr(iovad, iova);
+       if (iommu_map_sg(domain, dma_addr, sgt.sgl, sgt.orig_nents, prot)
+                       < size)
+               goto out_free_sg;
+
+       *handle = dma_addr;
+       sg_free_table(&sgt);
+       return pages;
+
+out_free_sg:
+       sg_free_table(&sgt);
+out_free_iova:
+       __free_iova(iovad, iova);
+out_free_pages:
+       __iommu_dma_free_pages(pages, count);
+       return NULL;
+}
+
+/**
+ * iommu_dma_mmap - Map a buffer into provided user VMA
+ * @pages: Array representing buffer from iommu_dma_alloc()
+ * @size: Size of buffer in bytes
+ * @vma: VMA describing requested userspace mapping
+ *
+ * Maps the pages of the buffer in @pages into @vma. The caller is responsible
+ * for verifying the correct size and protection of @vma beforehand.
+ */
+
+int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma)
+{
+       unsigned long uaddr = vma->vm_start;
+       unsigned int i, count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+       int ret = -ENXIO;
+
+       for (i = vma->vm_pgoff; i < count && uaddr < vma->vm_end; i++) {
+               ret = vm_insert_page(vma, uaddr, pages[i]);
+               if (ret)
+                       break;
+               uaddr += PAGE_SIZE;
+       }
+       return ret;
+}
+
+dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
+               unsigned long offset, size_t size, int prot)
+{
+       dma_addr_t dma_addr;
+       struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+       struct iova_domain *iovad = domain->iova_cookie;
+       phys_addr_t phys = page_to_phys(page) + offset;
+       size_t iova_off = iova_offset(iovad, phys);
+       size_t len = iova_align(iovad, size + iova_off);
+       struct iova *iova = __alloc_iova(iovad, len, dma_get_mask(dev));
+
+       if (!iova)
+               return DMA_ERROR_CODE;
+
+       dma_addr = iova_dma_addr(iovad, iova);
+       if (iommu_map(domain, dma_addr, phys - iova_off, len, prot)) {
+               __free_iova(iovad, iova);
+               return DMA_ERROR_CODE;
+       }
+       return dma_addr + iova_off;
+}
+
+void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size,
+               enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+       __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle);
+}
+
+/*
+ * Prepare a successfully-mapped scatterlist to give back to the caller.
+ * Handling IOVA concatenation can come later, if needed
+ */
+static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents,
+               dma_addr_t dma_addr)
+{
+       struct scatterlist *s;
+       int i;
+
+       for_each_sg(sg, s, nents, i) {
+               /* Un-swizzling the fields here, hence the naming mismatch */
+               unsigned int s_offset = sg_dma_address(s);
+               unsigned int s_length = sg_dma_len(s);
+               unsigned int s_dma_len = s->length;
+
+               s->offset = s_offset;
+               s->length = s_length;
+               sg_dma_address(s) = dma_addr + s_offset;
+               dma_addr += s_dma_len;
+       }
+       return i;
+}
+
+/*
+ * If mapping failed, then just restore the original list,
+ * but making sure the DMA fields are invalidated.
+ */
+static void __invalidate_sg(struct scatterlist *sg, int nents)
+{
+       struct scatterlist *s;
+       int i;
+
+       for_each_sg(sg, s, nents, i) {
+               if (sg_dma_address(s) != DMA_ERROR_CODE)
+                       s->offset = sg_dma_address(s);
+               if (sg_dma_len(s))
+                       s->length = sg_dma_len(s);
+               sg_dma_address(s) = DMA_ERROR_CODE;
+               sg_dma_len(s) = 0;
+       }
+}
+
+/*
+ * The DMA API client is passing in a scatterlist which could describe
+ * any old buffer layout, but the IOMMU API requires everything to be
+ * aligned to IOMMU pages. Hence the need for this complicated bit of
+ * impedance-matching, to be able to hand off a suitably-aligned list,
+ * but still preserve the original offsets and sizes for the caller.
+ */
+int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
+               int nents, int prot)
+{
+       struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+       struct iova_domain *iovad = domain->iova_cookie;
+       struct iova *iova;
+       struct scatterlist *s, *prev = NULL;
+       dma_addr_t dma_addr;
+       size_t iova_len = 0;
+       int i;
+
+       /*
+        * Work out how much IOVA space we need, and align the segments to
+        * IOVA granules for the IOMMU driver to handle. With some clever
+        * trickery we can modify the list in-place, but reversibly, by
+        * hiding the original data in the as-yet-unused DMA fields.
+        */
+       for_each_sg(sg, s, nents, i) {
+               size_t s_offset = iova_offset(iovad, s->offset);
+               size_t s_length = s->length;
+
+               sg_dma_address(s) = s->offset;
+               sg_dma_len(s) = s_length;
+               s->offset -= s_offset;
+               s_length = iova_align(iovad, s_length + s_offset);
+               s->length = s_length;
+
+               /*
+                * The simple way to avoid the rare case of a segment
+                * crossing the boundary mask is to pad the previous one
+                * to end at a naturally-aligned IOVA for this one's size,
+                * at the cost of potentially over-allocating a little.
+                */
+               if (prev) {
+                       size_t pad_len = roundup_pow_of_two(s_length);
+
+                       pad_len = (pad_len - iova_len) & (pad_len - 1);
+                       prev->length += pad_len;
+                       iova_len += pad_len;
+               }
+
+               iova_len += s_length;
+               prev = s;
+       }
+
+       iova = __alloc_iova(iovad, iova_len, dma_get_mask(dev));
+       if (!iova)
+               goto out_restore_sg;
+
+       /*
+        * We'll leave any physical concatenation to the IOMMU driver's
+        * implementation - it knows better than we do.
+        */
+       dma_addr = iova_dma_addr(iovad, iova);
+       if (iommu_map_sg(domain, dma_addr, sg, nents, prot) < iova_len)
+               goto out_free_iova;
+
+       return __finalise_sg(dev, sg, nents, dma_addr);
+
+out_free_iova:
+       __free_iova(iovad, iova);
+out_restore_sg:
+       __invalidate_sg(sg, nents);
+       return 0;
+}
+
+void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
+               enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+       /*
+        * The scatterlist segments are mapped into a single
+        * contiguous IOVA allocation, so this is incredibly easy.
+        */
+       __iommu_dma_unmap(iommu_get_domain_for_dev(dev), sg_dma_address(sg));
+}
+
+int iommu_dma_supported(struct device *dev, u64 mask)
+{
+       /*
+        * 'Special' IOMMUs which don't have the same addressing capability
+        * as the CPU will have to wait until we have some way to query that
+        * before they'll be able to use this framework.
+        */
+       return 1;
+}
+
+int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+       return dma_addr == DMA_ERROR_CODE;
+}
index 1d45293..da0e1e3 100644 (file)
@@ -923,7 +923,7 @@ static struct iommu_group *get_pci_device_group(struct pci_dev *pdev)
        pci_endpt_partioning = check_pci_ctl_endpt_part(pci_ctl);
        /* We can partition PCIe devices so assign device group to the device */
        if (pci_endpt_partioning) {
-               group = iommu_group_get_for_dev(&pdev->dev);
+               group = pci_device_group(&pdev->dev);
 
                /*
                 * PCIe controller is not a paritionable entity
@@ -956,44 +956,34 @@ static struct iommu_group *get_pci_device_group(struct pci_dev *pdev)
        return group;
 }
 
-static int fsl_pamu_add_device(struct device *dev)
+static struct iommu_group *fsl_pamu_device_group(struct device *dev)
 {
        struct iommu_group *group = ERR_PTR(-ENODEV);
-       struct pci_dev *pdev;
-       const u32 *prop;
-       int ret = 0, len;
+       int len;
 
        /*
         * For platform devices we allocate a separate group for
         * each of the devices.
         */
-       if (dev_is_pci(dev)) {
-               pdev = to_pci_dev(dev);
-               /* Don't create device groups for virtual PCI bridges */
-               if (pdev->subordinate)
-                       return 0;
+       if (dev_is_pci(dev))
+               group = get_pci_device_group(to_pci_dev(dev));
+       else if (of_get_property(dev->of_node, "fsl,liodn", &len))
+               group = get_device_iommu_group(dev);
 
-               group = get_pci_device_group(pdev);
+       return group;
+}
 
-       } else {
-               prop = of_get_property(dev->of_node, "fsl,liodn", &len);
-               if (prop)
-                       group = get_device_iommu_group(dev);
-       }
+static int fsl_pamu_add_device(struct device *dev)
+{
+       struct iommu_group *group;
 
+       group = iommu_group_get_for_dev(dev);
        if (IS_ERR(group))
                return PTR_ERR(group);
 
-       /*
-        * Check if device has already been added to an iommu group.
-        * Group could have already been created for a PCI device in
-        * the iommu_group_get_for_dev path.
-        */
-       if (!dev->iommu_group)
-               ret = iommu_group_add_device(group, dev);
-
        iommu_group_put(group);
-       return ret;
+
+       return 0;
 }
 
 static void fsl_pamu_remove_device(struct device *dev)
@@ -1072,6 +1062,7 @@ static const struct iommu_ops fsl_pamu_ops = {
        .domain_get_attr = fsl_pamu_get_domain_attr,
        .add_device     = fsl_pamu_add_device,
        .remove_device  = fsl_pamu_remove_device,
+       .device_group   = fsl_pamu_device_group,
 };
 
 int __init pamu_domain_init(void)
index d65cf42..16b243e 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/mempool.h>
 #include <linux/memory.h>
 #include <linux/timer.h>
+#include <linux/io.h>
 #include <linux/iova.h>
 #include <linux/iommu.h>
 #include <linux/intel-iommu.h>
@@ -2434,17 +2435,11 @@ static int iommu_domain_identity_map(struct dmar_domain *domain,
                                  DMA_PTE_READ|DMA_PTE_WRITE);
 }
 
-static int iommu_prepare_identity_map(struct device *dev,
-                                     unsigned long long start,
-                                     unsigned long long end)
+static int domain_prepare_identity_map(struct device *dev,
+                                      struct dmar_domain *domain,
+                                      unsigned long long start,
+                                      unsigned long long end)
 {
-       struct dmar_domain *domain;
-       int ret;
-
-       domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
-       if (!domain)
-               return -ENOMEM;
-
        /* For _hardware_ passthrough, don't bother. But for software
           passthrough, we do it anyway -- it may indicate a memory
           range which is reserved in E820, so which didn't get set
@@ -2464,8 +2459,7 @@ static int iommu_prepare_identity_map(struct device *dev,
                        dmi_get_system_info(DMI_BIOS_VENDOR),
                        dmi_get_system_info(DMI_BIOS_VERSION),
                     dmi_get_system_info(DMI_PRODUCT_VERSION));
-               ret = -EIO;
-               goto error;
+               return -EIO;
        }
 
        if (end >> agaw_to_width(domain->agaw)) {
@@ -2475,18 +2469,27 @@ static int iommu_prepare_identity_map(struct device *dev,
                     dmi_get_system_info(DMI_BIOS_VENDOR),
                     dmi_get_system_info(DMI_BIOS_VERSION),
                     dmi_get_system_info(DMI_PRODUCT_VERSION));
-               ret = -EIO;
-               goto error;
+               return -EIO;
        }
 
-       ret = iommu_domain_identity_map(domain, start, end);
-       if (ret)
-               goto error;
+       return iommu_domain_identity_map(domain, start, end);
+}
 
-       return 0;
+static int iommu_prepare_identity_map(struct device *dev,
+                                     unsigned long long start,
+                                     unsigned long long end)
+{
+       struct dmar_domain *domain;
+       int ret;
+
+       domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
+       if (!domain)
+               return -ENOMEM;
+
+       ret = domain_prepare_identity_map(dev, domain, start, end);
+       if (ret)
+               domain_exit(domain);
 
- error:
-       domain_exit(domain);
        return ret;
 }
 
@@ -2812,18 +2815,18 @@ static void intel_iommu_init_qi(struct intel_iommu *iommu)
 }
 
 static int copy_context_table(struct intel_iommu *iommu,
-                             struct root_entry __iomem *old_re,
+                             struct root_entry *old_re,
                              struct context_entry **tbl,
                              int bus, bool ext)
 {
        int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
-       struct context_entry __iomem *old_ce = NULL;
        struct context_entry *new_ce = NULL, ce;
+       struct context_entry *old_ce = NULL;
        struct root_entry re;
        phys_addr_t old_ce_phys;
 
        tbl_idx = ext ? bus * 2 : bus;
-       memcpy_fromio(&re, old_re, sizeof(re));
+       memcpy(&re, old_re, sizeof(re));
 
        for (devfn = 0; devfn < 256; devfn++) {
                /* First calculate the correct index */
@@ -2858,7 +2861,8 @@ static int copy_context_table(struct intel_iommu *iommu,
                        }
 
                        ret = -ENOMEM;
-                       old_ce = ioremap_cache(old_ce_phys, PAGE_SIZE);
+                       old_ce = memremap(old_ce_phys, PAGE_SIZE,
+                                       MEMREMAP_WB);
                        if (!old_ce)
                                goto out;
 
@@ -2870,7 +2874,7 @@ static int copy_context_table(struct intel_iommu *iommu,
                }
 
                /* Now copy the context entry */
-               memcpy_fromio(&ce, old_ce + idx, sizeof(ce));
+               memcpy(&ce, old_ce + idx, sizeof(ce));
 
                if (!__context_present(&ce))
                        continue;
@@ -2906,7 +2910,7 @@ static int copy_context_table(struct intel_iommu *iommu,
        __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
 
 out_unmap:
-       iounmap(old_ce);
+       memunmap(old_ce);
 
 out:
        return ret;
@@ -2914,8 +2918,8 @@ out:
 
 static int copy_translation_tables(struct intel_iommu *iommu)
 {
-       struct root_entry __iomem *old_rt;
        struct context_entry **ctxt_tbls;
+       struct root_entry *old_rt;
        phys_addr_t old_rt_phys;
        int ctxt_table_entries;
        unsigned long flags;
@@ -2940,7 +2944,7 @@ static int copy_translation_tables(struct intel_iommu *iommu)
        if (!old_rt_phys)
                return -EINVAL;
 
-       old_rt = ioremap_cache(old_rt_phys, PAGE_SIZE);
+       old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
        if (!old_rt)
                return -ENOMEM;
 
@@ -2989,7 +2993,7 @@ static int copy_translation_tables(struct intel_iommu *iommu)
        ret = 0;
 
 out_unmap:
-       iounmap(old_rt);
+       memunmap(old_rt);
 
        return ret;
 }
@@ -3246,7 +3250,10 @@ static struct iova *intel_alloc_iova(struct device *dev,
 
 static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
 {
+       struct dmar_rmrr_unit *rmrr;
        struct dmar_domain *domain;
+       struct device *i_dev;
+       int i, ret;
 
        domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
        if (!domain) {
@@ -3255,6 +3262,23 @@ static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
                return NULL;
        }
 
+       /* We have a new domain - setup possible RMRRs for the device */
+       rcu_read_lock();
+       for_each_rmrr_units(rmrr) {
+               for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
+                                         i, i_dev) {
+                       if (i_dev != dev)
+                               continue;
+
+                       ret = domain_prepare_identity_map(dev, domain,
+                                                         rmrr->base_address,
+                                                         rmrr->end_address);
+                       if (ret)
+                               dev_err(dev, "Mapping reserved region failed\n");
+               }
+       }
+       rcu_read_unlock();
+
        return domain;
 }
 
@@ -4877,6 +4901,7 @@ static const struct iommu_ops intel_iommu_ops = {
        .iova_to_phys   = intel_iommu_iova_to_phys,
        .add_device     = intel_iommu_add_device,
        .remove_device  = intel_iommu_remove_device,
+       .device_group   = pci_device_group,
        .pgsize_bitmap  = INTEL_IOMMU_PGSIZES,
 };
 
index 9ec4e0d..1fae188 100644 (file)
@@ -169,8 +169,26 @@ static int modify_irte(struct irq_2_iommu *irq_iommu,
        index = irq_iommu->irte_index + irq_iommu->sub_handle;
        irte = &iommu->ir_table->base[index];
 
-       set_64bit(&irte->low, irte_modified->low);
-       set_64bit(&irte->high, irte_modified->high);
+#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE)
+       if ((irte->pst == 1) || (irte_modified->pst == 1)) {
+               bool ret;
+
+               ret = cmpxchg_double(&irte->low, &irte->high,
+                                    irte->low, irte->high,
+                                    irte_modified->low, irte_modified->high);
+               /*
+                * We use cmpxchg16 to atomically update the 128-bit IRTE,
+                * and it cannot be updated by the hardware or other processors
+                * behind us, so the return value of cmpxchg16 should be the
+                * same as the old value.
+                */
+               WARN_ON(!ret);
+       } else
+#endif
+       {
+               set_64bit(&irte->low, irte_modified->low);
+               set_64bit(&irte->high, irte_modified->high);
+       }
        __iommu_flush_cache(iommu, irte, sizeof(*irte));
 
        rc = qi_flush_iec(iommu, index, 0);
@@ -384,7 +402,7 @@ static int set_msi_sid(struct irte *irte, struct pci_dev *dev)
 
 static int iommu_load_old_irte(struct intel_iommu *iommu)
 {
-       struct irte __iomem *old_ir_table;
+       struct irte *old_ir_table;
        phys_addr_t irt_phys;
        unsigned int i;
        size_t size;
@@ -408,12 +426,12 @@ static int iommu_load_old_irte(struct intel_iommu *iommu)
        size     = INTR_REMAP_TABLE_ENTRIES*sizeof(struct irte);
 
        /* Map the old IR table */
-       old_ir_table = ioremap_cache(irt_phys, size);
+       old_ir_table = memremap(irt_phys, size, MEMREMAP_WB);
        if (!old_ir_table)
                return -ENOMEM;
 
        /* Copy data over */
-       memcpy_fromio(iommu->ir_table->base, old_ir_table, size);
+       memcpy(iommu->ir_table->base, old_ir_table, size);
 
        __iommu_flush_cache(iommu, iommu->ir_table->base, size);
 
@@ -426,7 +444,7 @@ static int iommu_load_old_irte(struct intel_iommu *iommu)
                        bitmap_set(iommu->ir_table->bitmap, i, 1);
        }
 
-       iounmap(old_ir_table);
+       memunmap(old_ir_table);
 
        return 0;
 }
@@ -672,7 +690,7 @@ static int __init intel_prepare_irq_remapping(void)
        if (!dmar_ir_support())
                return -ENODEV;
 
-       if (parse_ioapics_under_ir() != 1) {
+       if (parse_ioapics_under_ir()) {
                pr_info("Not enabling interrupt remapping\n");
                goto error;
        }
@@ -727,7 +745,16 @@ static inline void set_irq_posting_cap(void)
        struct intel_iommu *iommu;
 
        if (!disable_irq_post) {
-               intel_irq_remap_ops.capability |= 1 << IRQ_POSTING_CAP;
+               /*
+                * If IRTE is in posted format, the 'pda' field goes across the
+                * 64-bit boundary, we need use cmpxchg16b to atomically update
+                * it. We only expose posted-interrupt when X86_FEATURE_CX16
+                * is supported. Actually, hardware platforms supporting PI
+                * should have X86_FEATURE_CX16 support, this has been confirmed
+                * with Intel hardware guys.
+                */
+               if ( cpu_has_cx16 )
+                       intel_irq_remap_ops.capability |= 1 << IRQ_POSTING_CAP;
 
                for_each_iommu(iommu, drhd)
                        if (!cap_pi_support(iommu->cap)) {
@@ -907,16 +934,21 @@ static int __init parse_ioapics_under_ir(void)
        bool ir_supported = false;
        int ioapic_idx;
 
-       for_each_iommu(iommu, drhd)
-               if (ecap_ir_support(iommu->ecap)) {
-                       if (ir_parse_ioapic_hpet_scope(drhd->hdr, iommu))
-                               return -1;
+       for_each_iommu(iommu, drhd) {
+               int ret;
 
-                       ir_supported = true;
-               }
+               if (!ecap_ir_support(iommu->ecap))
+                       continue;
+
+               ret = ir_parse_ioapic_hpet_scope(drhd->hdr, iommu);
+               if (ret)
+                       return ret;
+
+               ir_supported = true;
+       }
 
        if (!ir_supported)
-               return 0;
+               return -ENODEV;
 
        for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) {
                int ioapic_id = mpc_ioapic_id(ioapic_idx);
@@ -928,7 +960,7 @@ static int __init parse_ioapics_under_ir(void)
                }
        }
 
-       return 1;
+       return 0;
 }
 
 static int __init ir_dev_scope_init(void)
index 049df49..abae363 100644 (file)
@@ -727,17 +727,36 @@ static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque)
        return data->group != NULL;
 }
 
+/*
+ * Generic device_group call-back function. It just allocates one
+ * iommu-group per device.
+ */
+struct iommu_group *generic_device_group(struct device *dev)
+{
+       struct iommu_group *group;
+
+       group = iommu_group_alloc();
+       if (IS_ERR(group))
+               return NULL;
+
+       return group;
+}
+
 /*
  * Use standard PCI bus topology, isolation features, and DMA alias quirks
  * to find or create an IOMMU group for a device.
  */
-static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev)
+struct iommu_group *pci_device_group(struct device *dev)
 {
+       struct pci_dev *pdev = to_pci_dev(dev);
        struct group_for_pci_data data;
        struct pci_bus *bus;
        struct iommu_group *group = NULL;
        u64 devfns[4] = { 0 };
 
+       if (WARN_ON(!dev_is_pci(dev)))
+               return ERR_PTR(-EINVAL);
+
        /*
         * Find the upstream DMA alias for the device.  A device must not
         * be aliased due to topology in order to have its own IOMMU group.
@@ -791,14 +810,6 @@ static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev)
        if (IS_ERR(group))
                return NULL;
 
-       /*
-        * Try to allocate a default domain - needs support from the
-        * IOMMU driver.
-        */
-       group->default_domain = __iommu_domain_alloc(pdev->dev.bus,
-                                                    IOMMU_DOMAIN_DMA);
-       group->domain = group->default_domain;
-
        return group;
 }
 
@@ -814,6 +825,7 @@ static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev)
  */
 struct iommu_group *iommu_group_get_for_dev(struct device *dev)
 {
+       const struct iommu_ops *ops = dev->bus->iommu_ops;
        struct iommu_group *group;
        int ret;
 
@@ -821,14 +833,24 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev)
        if (group)
                return group;
 
-       if (!dev_is_pci(dev))
-               return ERR_PTR(-EINVAL);
+       group = ERR_PTR(-EINVAL);
 
-       group = iommu_group_get_for_pci_dev(to_pci_dev(dev));
+       if (ops && ops->device_group)
+               group = ops->device_group(dev);
 
        if (IS_ERR(group))
                return group;
 
+       /*
+        * Try to allocate a default domain - needs support from the
+        * IOMMU driver.
+        */
+       if (!group->default_domain) {
+               group->default_domain = __iommu_domain_alloc(dev->bus,
+                                                            IOMMU_DOMAIN_DMA);
+               group->domain = group->default_domain;
+       }
+
        ret = iommu_group_add_device(group, dev);
        if (ret) {
                iommu_group_put(group);
index 36d0033..3dc5b65 100644 (file)
@@ -26,6 +26,8 @@
 #include <linux/of_iommu.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
+#include <linux/regmap.h>
+#include <linux/mfd/syscon.h>
 
 #include <asm/cacheflush.h>
 
@@ -112,6 +114,18 @@ void omap_iommu_restore_ctx(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(omap_iommu_restore_ctx);
 
+static void dra7_cfg_dspsys_mmu(struct omap_iommu *obj, bool enable)
+{
+       u32 val, mask;
+
+       if (!obj->syscfg)
+               return;
+
+       mask = (1 << (obj->id * DSP_SYS_MMU_CONFIG_EN_SHIFT));
+       val = enable ? mask : 0;
+       regmap_update_bits(obj->syscfg, DSP_SYS_MMU_CONFIG, mask, val);
+}
+
 static void __iommu_set_twl(struct omap_iommu *obj, bool on)
 {
        u32 l = iommu_read_reg(obj, MMU_CNTL);
@@ -147,6 +161,8 @@ static int omap2_iommu_enable(struct omap_iommu *obj)
 
        iommu_write_reg(obj, pa, MMU_TTB);
 
+       dra7_cfg_dspsys_mmu(obj, true);
+
        if (obj->has_bus_err_back)
                iommu_write_reg(obj, MMU_GP_REG_BUS_ERR_BACK_EN, MMU_GP_REG);
 
@@ -161,6 +177,7 @@ static void omap2_iommu_disable(struct omap_iommu *obj)
 
        l &= ~MMU_CNTL_MASK;
        iommu_write_reg(obj, l, MMU_CNTL);
+       dra7_cfg_dspsys_mmu(obj, false);
 
        dev_dbg(obj->dev, "%s is shutting down\n", obj->name);
 }
@@ -864,6 +881,42 @@ static void omap_iommu_detach(struct omap_iommu *obj)
        dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name);
 }
 
+static int omap_iommu_dra7_get_dsp_system_cfg(struct platform_device *pdev,
+                                             struct omap_iommu *obj)
+{
+       struct device_node *np = pdev->dev.of_node;
+       int ret;
+
+       if (!of_device_is_compatible(np, "ti,dra7-dsp-iommu"))
+               return 0;
+
+       if (!of_property_read_bool(np, "ti,syscon-mmuconfig")) {
+               dev_err(&pdev->dev, "ti,syscon-mmuconfig property is missing\n");
+               return -EINVAL;
+       }
+
+       obj->syscfg =
+               syscon_regmap_lookup_by_phandle(np, "ti,syscon-mmuconfig");
+       if (IS_ERR(obj->syscfg)) {
+               /* can fail with -EPROBE_DEFER */
+               ret = PTR_ERR(obj->syscfg);
+               return ret;
+       }
+
+       if (of_property_read_u32_index(np, "ti,syscon-mmuconfig", 1,
+                                      &obj->id)) {
+               dev_err(&pdev->dev, "couldn't get the IOMMU instance id within subsystem\n");
+               return -EINVAL;
+       }
+
+       if (obj->id != 0 && obj->id != 1) {
+               dev_err(&pdev->dev, "invalid IOMMU instance id\n");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 /*
  *     OMAP Device MMU(IOMMU) detection
  */
@@ -907,6 +960,10 @@ static int omap_iommu_probe(struct platform_device *pdev)
        if (IS_ERR(obj->regbase))
                return PTR_ERR(obj->regbase);
 
+       err = omap_iommu_dra7_get_dsp_system_cfg(pdev, obj);
+       if (err)
+               return err;
+
        irq = platform_get_irq(pdev, 0);
        if (irq < 0)
                return -ENODEV;
@@ -943,6 +1000,7 @@ static const struct of_device_id omap_iommu_of_match[] = {
        { .compatible = "ti,omap2-iommu" },
        { .compatible = "ti,omap4-iommu" },
        { .compatible = "ti,dra7-iommu" },
+       { .compatible = "ti,dra7-dsp-iommu" },
        {},
 };
 
index a656df2..59628e5 100644 (file)
@@ -30,6 +30,7 @@ struct iotlb_entry {
 struct omap_iommu {
        const char      *name;
        void __iomem    *regbase;
+       struct regmap   *syscfg;
        struct device   *dev;
        struct iommu_domain *domain;
        struct dentry   *debug_dir;
@@ -48,6 +49,7 @@ struct omap_iommu {
        void *ctx; /* iommu context: registres saved area */
 
        int has_bus_err_back;
+       u32 id;
 };
 
 struct cr_regs {
@@ -158,6 +160,13 @@ static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev)
         ((pgsz) == MMU_CAM_PGSZ_64K) ? 0xffff0000 :    \
         ((pgsz) == MMU_CAM_PGSZ_4K)  ? 0xfffff000 : 0)
 
+/*
+ * DSP_SYSTEM registers and bit definitions (applicable only for DRA7xx DSP)
+ */
+#define DSP_SYS_REVISION               0x00
+#define DSP_SYS_MMU_CONFIG             0x18
+#define DSP_SYS_MMU_CONFIG_EN_SHIFT    4
+
 /*
  * utilities for super page(16MB, 1MB, 64KB and 4KB)
  */
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
new file mode 100644 (file)
index 0000000..cbe198c
--- /dev/null
@@ -0,0 +1,337 @@
+/*
+ * IOMMU API for s390 PCI devices
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#include <linux/pci.h>
+#include <linux/iommu.h>
+#include <linux/iommu-helper.h>
+#include <linux/pci.h>
+#include <linux/sizes.h>
+#include <asm/pci_dma.h>
+
+/*
+ * Physically contiguous memory regions can be mapped with 4 KiB alignment,
+ * we allow all page sizes that are an order of 4KiB (no special large page
+ * support so far).
+ */
+#define S390_IOMMU_PGSIZES     (~0xFFFUL)
+
+struct s390_domain {
+       struct iommu_domain     domain;
+       struct list_head        devices;
+       unsigned long           *dma_table;
+       spinlock_t              dma_table_lock;
+       spinlock_t              list_lock;
+};
+
+struct s390_domain_device {
+       struct list_head        list;
+       struct zpci_dev         *zdev;
+};
+
+static struct s390_domain *to_s390_domain(struct iommu_domain *dom)
+{
+       return container_of(dom, struct s390_domain, domain);
+}
+
+static bool s390_iommu_capable(enum iommu_cap cap)
+{
+       switch (cap) {
+       case IOMMU_CAP_CACHE_COHERENCY:
+               return true;
+       case IOMMU_CAP_INTR_REMAP:
+               return true;
+       default:
+               return false;
+       }
+}
+
+struct iommu_domain *s390_domain_alloc(unsigned domain_type)
+{
+       struct s390_domain *s390_domain;
+
+       if (domain_type != IOMMU_DOMAIN_UNMANAGED)
+               return NULL;
+
+       s390_domain = kzalloc(sizeof(*s390_domain), GFP_KERNEL);
+       if (!s390_domain)
+               return NULL;
+
+       s390_domain->dma_table = dma_alloc_cpu_table();
+       if (!s390_domain->dma_table) {
+               kfree(s390_domain);
+               return NULL;
+       }
+
+       spin_lock_init(&s390_domain->dma_table_lock);
+       spin_lock_init(&s390_domain->list_lock);
+       INIT_LIST_HEAD(&s390_domain->devices);
+
+       return &s390_domain->domain;
+}
+
+void s390_domain_free(struct iommu_domain *domain)
+{
+       struct s390_domain *s390_domain = to_s390_domain(domain);
+
+       dma_cleanup_tables(s390_domain->dma_table);
+       kfree(s390_domain);
+}
+
+static int s390_iommu_attach_device(struct iommu_domain *domain,
+                                   struct device *dev)
+{
+       struct s390_domain *s390_domain = to_s390_domain(domain);
+       struct zpci_dev *zdev = to_pci_dev(dev)->sysdata;
+       struct s390_domain_device *domain_device;
+       unsigned long flags;
+       int rc;
+
+       if (!zdev)
+               return -ENODEV;
+
+       domain_device = kzalloc(sizeof(*domain_device), GFP_KERNEL);
+       if (!domain_device)
+               return -ENOMEM;
+
+       if (zdev->dma_table)
+               zpci_dma_exit_device(zdev);
+
+       zdev->dma_table = s390_domain->dma_table;
+       rc = zpci_register_ioat(zdev, 0, zdev->start_dma + PAGE_OFFSET,
+                               zdev->start_dma + zdev->iommu_size - 1,
+                               (u64) zdev->dma_table);
+       if (rc)
+               goto out_restore;
+
+       spin_lock_irqsave(&s390_domain->list_lock, flags);
+       /* First device defines the DMA range limits */
+       if (list_empty(&s390_domain->devices)) {
+               domain->geometry.aperture_start = zdev->start_dma;
+               domain->geometry.aperture_end = zdev->end_dma;
+               domain->geometry.force_aperture = true;
+       /* Allow only devices with identical DMA range limits */
+       } else if (domain->geometry.aperture_start != zdev->start_dma ||
+                  domain->geometry.aperture_end != zdev->end_dma) {
+               rc = -EINVAL;
+               spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+               goto out_restore;
+       }
+       domain_device->zdev = zdev;
+       zdev->s390_domain = s390_domain;
+       list_add(&domain_device->list, &s390_domain->devices);
+       spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+
+       return 0;
+
+out_restore:
+       zpci_dma_init_device(zdev);
+       kfree(domain_device);
+
+       return rc;
+}
+
+static void s390_iommu_detach_device(struct iommu_domain *domain,
+                                    struct device *dev)
+{
+       struct s390_domain *s390_domain = to_s390_domain(domain);
+       struct zpci_dev *zdev = to_pci_dev(dev)->sysdata;
+       struct s390_domain_device *domain_device, *tmp;
+       unsigned long flags;
+       int found = 0;
+
+       if (!zdev)
+               return;
+
+       spin_lock_irqsave(&s390_domain->list_lock, flags);
+       list_for_each_entry_safe(domain_device, tmp, &s390_domain->devices,
+                                list) {
+               if (domain_device->zdev == zdev) {
+                       list_del(&domain_device->list);
+                       kfree(domain_device);
+                       found = 1;
+                       break;
+               }
+       }
+       spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+
+       if (found) {
+               zdev->s390_domain = NULL;
+               zpci_unregister_ioat(zdev, 0);
+               zpci_dma_init_device(zdev);
+       }
+}
+
+static int s390_iommu_add_device(struct device *dev)
+{
+       struct iommu_group *group;
+       int rc;
+
+       group = iommu_group_get(dev);
+       if (!group) {
+               group = iommu_group_alloc();
+               if (IS_ERR(group))
+                       return PTR_ERR(group);
+       }
+
+       rc = iommu_group_add_device(group, dev);
+       iommu_group_put(group);
+
+       return rc;
+}
+
+static void s390_iommu_remove_device(struct device *dev)
+{
+       struct zpci_dev *zdev = to_pci_dev(dev)->sysdata;
+       struct iommu_domain *domain;
+
+       /*
+        * This is a workaround for a scenario where the IOMMU API common code
+        * "forgets" to call the detach_dev callback: After binding a device
+        * to vfio-pci and completing the VFIO_SET_IOMMU ioctl (which triggers
+        * the attach_dev), removing the device via
+        * "echo 1 > /sys/bus/pci/devices/.../remove" won't trigger detach_dev,
+        * only remove_device will be called via the BUS_NOTIFY_REMOVED_DEVICE
+        * notifier.
+        *
+        * So let's call detach_dev from here if it hasn't been called before.
+        */
+       if (zdev && zdev->s390_domain) {
+               domain = iommu_get_domain_for_dev(dev);
+               if (domain)
+                       s390_iommu_detach_device(domain, dev);
+       }
+
+       iommu_group_remove_device(dev);
+}
+
+static int s390_iommu_update_trans(struct s390_domain *s390_domain,
+                                  unsigned long pa, dma_addr_t dma_addr,
+                                  size_t size, int flags)
+{
+       struct s390_domain_device *domain_device;
+       u8 *page_addr = (u8 *) (pa & PAGE_MASK);
+       dma_addr_t start_dma_addr = dma_addr;
+       unsigned long irq_flags, nr_pages, i;
+       int rc = 0;
+
+       if (dma_addr < s390_domain->domain.geometry.aperture_start ||
+           dma_addr + size > s390_domain->domain.geometry.aperture_end)
+               return -EINVAL;
+
+       nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+       if (!nr_pages)
+               return 0;
+
+       spin_lock_irqsave(&s390_domain->dma_table_lock, irq_flags);
+       for (i = 0; i < nr_pages; i++) {
+               dma_update_cpu_trans(s390_domain->dma_table, page_addr,
+                                    dma_addr, flags);
+               page_addr += PAGE_SIZE;
+               dma_addr += PAGE_SIZE;
+       }
+
+       spin_lock(&s390_domain->list_lock);
+       list_for_each_entry(domain_device, &s390_domain->devices, list) {
+               rc = zpci_refresh_trans((u64) domain_device->zdev->fh << 32,
+                                       start_dma_addr, nr_pages * PAGE_SIZE);
+               if (rc)
+                       break;
+       }
+       spin_unlock(&s390_domain->list_lock);
+       spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags);
+
+       return rc;
+}
+
+static int s390_iommu_map(struct iommu_domain *domain, unsigned long iova,
+                         phys_addr_t paddr, size_t size, int prot)
+{
+       struct s390_domain *s390_domain = to_s390_domain(domain);
+       int flags = ZPCI_PTE_VALID, rc = 0;
+
+       if (!(prot & IOMMU_READ))
+               return -EINVAL;
+
+       if (!(prot & IOMMU_WRITE))
+               flags |= ZPCI_TABLE_PROTECTED;
+
+       rc = s390_iommu_update_trans(s390_domain, (unsigned long) paddr, iova,
+                                    size, flags);
+
+       return rc;
+}
+
+static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
+                                          dma_addr_t iova)
+{
+       struct s390_domain *s390_domain = to_s390_domain(domain);
+       unsigned long *sto, *pto, *rto, flags;
+       unsigned int rtx, sx, px;
+       phys_addr_t phys = 0;
+
+       if (iova < domain->geometry.aperture_start ||
+           iova > domain->geometry.aperture_end)
+               return 0;
+
+       rtx = calc_rtx(iova);
+       sx = calc_sx(iova);
+       px = calc_px(iova);
+       rto = s390_domain->dma_table;
+
+       spin_lock_irqsave(&s390_domain->dma_table_lock, flags);
+       if (rto && reg_entry_isvalid(rto[rtx])) {
+               sto = get_rt_sto(rto[rtx]);
+               if (sto && reg_entry_isvalid(sto[sx])) {
+                       pto = get_st_pto(sto[sx]);
+                       if (pto && pt_entry_isvalid(pto[px]))
+                               phys = pto[px] & ZPCI_PTE_ADDR_MASK;
+               }
+       }
+       spin_unlock_irqrestore(&s390_domain->dma_table_lock, flags);
+
+       return phys;
+}
+
+static size_t s390_iommu_unmap(struct iommu_domain *domain,
+                              unsigned long iova, size_t size)
+{
+       struct s390_domain *s390_domain = to_s390_domain(domain);
+       int flags = ZPCI_PTE_INVALID;
+       phys_addr_t paddr;
+       int rc;
+
+       paddr = s390_iommu_iova_to_phys(domain, iova);
+       if (!paddr)
+               return 0;
+
+       rc = s390_iommu_update_trans(s390_domain, (unsigned long) paddr, iova,
+                                    size, flags);
+       if (rc)
+               return 0;
+
+       return size;
+}
+
+static struct iommu_ops s390_iommu_ops = {
+       .capable = s390_iommu_capable,
+       .domain_alloc = s390_domain_alloc,
+       .domain_free = s390_domain_free,
+       .attach_dev = s390_iommu_attach_device,
+       .detach_dev = s390_iommu_detach_device,
+       .map = s390_iommu_map,
+       .unmap = s390_iommu_unmap,
+       .iova_to_phys = s390_iommu_iova_to_phys,
+       .add_device = s390_iommu_add_device,
+       .remove_device = s390_iommu_remove_device,
+       .pgsize_bitmap = S390_IOMMU_PGSIZES,
+};
+
+static int __init s390_iommu_init(void)
+{
+       return bus_set_iommu(&pci_bus_type, &s390_iommu_ops);
+}
+subsys_initcall(s390_iommu_init);
diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
new file mode 100644 (file)
index 0000000..fc48103
--- /dev/null
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2014-2015 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __DMA_IOMMU_H
+#define __DMA_IOMMU_H
+
+#ifdef __KERNEL__
+#include <asm/errno.h>
+
+#ifdef CONFIG_IOMMU_DMA
+#include <linux/iommu.h>
+
+int iommu_dma_init(void);
+
+/* Domain management interface for IOMMU drivers */
+int iommu_get_dma_cookie(struct iommu_domain *domain);
+void iommu_put_dma_cookie(struct iommu_domain *domain);
+
+/* Setup call for arch DMA mapping code */
+int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, u64 size);
+
+/* General helpers for DMA-API <-> IOMMU-API interaction */
+int dma_direction_to_prot(enum dma_data_direction dir, bool coherent);
+
+/*
+ * These implement the bulk of the relevant DMA mapping callbacks, but require
+ * the arch code to take care of attributes and cache maintenance
+ */
+struct page **iommu_dma_alloc(struct device *dev, size_t size,
+               gfp_t gfp, int prot, dma_addr_t *handle,
+               void (*flush_page)(struct device *, const void *, phys_addr_t));
+void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
+               dma_addr_t *handle);
+
+int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma);
+
+dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
+               unsigned long offset, size_t size, int prot);
+int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
+               int nents, int prot);
+
+/*
+ * Arch code with no special attribute handling may use these
+ * directly as DMA mapping callbacks for simplicity
+ */
+void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size,
+               enum dma_data_direction dir, struct dma_attrs *attrs);
+void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
+               enum dma_data_direction dir, struct dma_attrs *attrs);
+int iommu_dma_supported(struct device *dev, u64 mask);
+int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
+
+#else
+
+struct iommu_domain;
+
+static inline int iommu_dma_init(void)
+{
+       return 0;
+}
+
+static inline int iommu_get_dma_cookie(struct iommu_domain *domain)
+{
+       return -ENODEV;
+}
+
+static inline void iommu_put_dma_cookie(struct iommu_domain *domain)
+{
+}
+
+#endif /* CONFIG_IOMMU_DMA */
+#endif /* __KERNEL__ */
+#endif /* __DMA_IOMMU_H */
index f9c1b6d..f28dff3 100644 (file)
@@ -81,6 +81,7 @@ struct iommu_domain {
        iommu_fault_handler_t handler;
        void *handler_token;
        struct iommu_domain_geometry geometry;
+       void *iova_cookie;
 };
 
 enum iommu_cap {
@@ -167,7 +168,7 @@ struct iommu_ops {
        phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova);
        int (*add_device)(struct device *dev);
        void (*remove_device)(struct device *dev);
-       int (*device_group)(struct device *dev, unsigned int *groupid);
+       struct iommu_group *(*device_group)(struct device *dev);
        int (*domain_get_attr)(struct iommu_domain *domain,
                               enum iommu_attr attr, void *data);
        int (*domain_set_attr)(struct iommu_domain *domain,
@@ -316,6 +317,11 @@ static inline size_t iommu_map_sg(struct iommu_domain *domain,
        return domain->ops->map_sg(domain, iova, sg, nents, prot);
 }
 
+/* PCI device grouping function */
+extern struct iommu_group *pci_device_group(struct device *dev);
+/* Generic device grouping function */
+extern struct iommu_group *generic_device_group(struct device *dev);
+
 #else /* CONFIG_IOMMU_API */
 
 struct iommu_ops {};