]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/iommu/intel-iommu.c
farsync: Fix confusion about DMA address and buffer offset types
[karo-tx-linux.git] / drivers / iommu / intel-iommu.c
1 /*
2  * Copyright © 2006-2014 Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * Authors: David Woodhouse <dwmw2@infradead.org>,
14  *          Ashok Raj <ashok.raj@intel.com>,
15  *          Shaohua Li <shaohua.li@intel.com>,
16  *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17  *          Fenghua Yu <fenghua.yu@intel.com>
18  */
19
20 #include <linux/init.h>
21 #include <linux/bitmap.h>
22 #include <linux/debugfs.h>
23 #include <linux/export.h>
24 #include <linux/slab.h>
25 #include <linux/irq.h>
26 #include <linux/interrupt.h>
27 #include <linux/spinlock.h>
28 #include <linux/pci.h>
29 #include <linux/dmar.h>
30 #include <linux/dma-mapping.h>
31 #include <linux/mempool.h>
32 #include <linux/memory.h>
33 #include <linux/timer.h>
34 #include <linux/iova.h>
35 #include <linux/iommu.h>
36 #include <linux/intel-iommu.h>
37 #include <linux/syscore_ops.h>
38 #include <linux/tboot.h>
39 #include <linux/dmi.h>
40 #include <linux/pci-ats.h>
41 #include <linux/memblock.h>
42 #include <asm/irq_remapping.h>
43 #include <asm/cacheflush.h>
44 #include <asm/iommu.h>
45
46 #include "irq_remapping.h"
47 #include "pci.h"
48
49 #define ROOT_SIZE               VTD_PAGE_SIZE
50 #define CONTEXT_SIZE            VTD_PAGE_SIZE
51
52 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
53 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
54 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
55
56 #define IOAPIC_RANGE_START      (0xfee00000)
57 #define IOAPIC_RANGE_END        (0xfeefffff)
58 #define IOVA_START_ADDR         (0x1000)
59
60 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
61
62 #define MAX_AGAW_WIDTH 64
63 #define MAX_AGAW_PFN_WIDTH      (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
64
65 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
66 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
67
68 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
69    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
70 #define DOMAIN_MAX_PFN(gaw)     ((unsigned long) min_t(uint64_t, \
71                                 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
72 #define DOMAIN_MAX_ADDR(gaw)    (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
73
74 #define IOVA_PFN(addr)          ((addr) >> PAGE_SHIFT)
75 #define DMA_32BIT_PFN           IOVA_PFN(DMA_BIT_MASK(32))
76 #define DMA_64BIT_PFN           IOVA_PFN(DMA_BIT_MASK(64))
77
78 /* page table handling */
79 #define LEVEL_STRIDE            (9)
80 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
81
82 /*
83  * This bitmap is used to advertise the page sizes our hardware support
84  * to the IOMMU core, which will then use this information to split
85  * physically contiguous memory regions it is mapping into page sizes
86  * that we support.
87  *
88  * Traditionally the IOMMU core just handed us the mappings directly,
89  * after making sure the size is an order of a 4KiB page and that the
90  * mapping has natural alignment.
91  *
92  * To retain this behavior, we currently advertise that we support
93  * all page sizes that are an order of 4KiB.
94  *
95  * If at some point we'd like to utilize the IOMMU core's new behavior,
96  * we could change this to advertise the real page sizes we support.
97  */
98 #define INTEL_IOMMU_PGSIZES     (~0xFFFUL)
99
100 static inline int agaw_to_level(int agaw)
101 {
102         return agaw + 2;
103 }
104
105 static inline int agaw_to_width(int agaw)
106 {
107         return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
108 }
109
110 static inline int width_to_agaw(int width)
111 {
112         return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
113 }
114
115 static inline unsigned int level_to_offset_bits(int level)
116 {
117         return (level - 1) * LEVEL_STRIDE;
118 }
119
120 static inline int pfn_level_offset(unsigned long pfn, int level)
121 {
122         return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
123 }
124
125 static inline unsigned long level_mask(int level)
126 {
127         return -1UL << level_to_offset_bits(level);
128 }
129
130 static inline unsigned long level_size(int level)
131 {
132         return 1UL << level_to_offset_bits(level);
133 }
134
135 static inline unsigned long align_to_level(unsigned long pfn, int level)
136 {
137         return (pfn + level_size(level) - 1) & level_mask(level);
138 }
139
140 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
141 {
142         return  1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
143 }
144
145 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
146    are never going to work. */
147 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
148 {
149         return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
150 }
151
152 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
153 {
154         return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
155 }
156 static inline unsigned long page_to_dma_pfn(struct page *pg)
157 {
158         return mm_to_dma_pfn(page_to_pfn(pg));
159 }
160 static inline unsigned long virt_to_dma_pfn(void *p)
161 {
162         return page_to_dma_pfn(virt_to_page(p));
163 }
164
165 /* global iommu list, set NULL for ignored DMAR units */
166 static struct intel_iommu **g_iommus;
167
168 static void __init check_tylersburg_isoch(void);
169 static int rwbf_quirk;
170
171 /*
172  * set to 1 to panic kernel if can't successfully enable VT-d
173  * (used when kernel is launched w/ TXT)
174  */
175 static int force_on = 0;
176
177 /*
178  * 0: Present
179  * 1-11: Reserved
180  * 12-63: Context Ptr (12 - (haw-1))
181  * 64-127: Reserved
182  */
183 struct root_entry {
184         u64     val;
185         u64     rsvd1;
186 };
187 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
188 static inline bool root_present(struct root_entry *root)
189 {
190         return (root->val & 1);
191 }
192 static inline void set_root_present(struct root_entry *root)
193 {
194         root->val |= 1;
195 }
196 static inline void set_root_value(struct root_entry *root, unsigned long value)
197 {
198         root->val |= value & VTD_PAGE_MASK;
199 }
200
201 static inline struct context_entry *
202 get_context_addr_from_root(struct root_entry *root)
203 {
204         return (struct context_entry *)
205                 (root_present(root)?phys_to_virt(
206                 root->val & VTD_PAGE_MASK) :
207                 NULL);
208 }
209
210 /*
211  * low 64 bits:
212  * 0: present
213  * 1: fault processing disable
214  * 2-3: translation type
215  * 12-63: address space root
216  * high 64 bits:
217  * 0-2: address width
218  * 3-6: aval
219  * 8-23: domain id
220  */
221 struct context_entry {
222         u64 lo;
223         u64 hi;
224 };
225
226 static inline bool context_present(struct context_entry *context)
227 {
228         return (context->lo & 1);
229 }
230 static inline void context_set_present(struct context_entry *context)
231 {
232         context->lo |= 1;
233 }
234
235 static inline void context_set_fault_enable(struct context_entry *context)
236 {
237         context->lo &= (((u64)-1) << 2) | 1;
238 }
239
240 static inline void context_set_translation_type(struct context_entry *context,
241                                                 unsigned long value)
242 {
243         context->lo &= (((u64)-1) << 4) | 3;
244         context->lo |= (value & 3) << 2;
245 }
246
247 static inline void context_set_address_root(struct context_entry *context,
248                                             unsigned long value)
249 {
250         context->lo |= value & VTD_PAGE_MASK;
251 }
252
253 static inline void context_set_address_width(struct context_entry *context,
254                                              unsigned long value)
255 {
256         context->hi |= value & 7;
257 }
258
259 static inline void context_set_domain_id(struct context_entry *context,
260                                          unsigned long value)
261 {
262         context->hi |= (value & ((1 << 16) - 1)) << 8;
263 }
264
265 static inline void context_clear_entry(struct context_entry *context)
266 {
267         context->lo = 0;
268         context->hi = 0;
269 }
270
271 /*
272  * 0: readable
273  * 1: writable
274  * 2-6: reserved
275  * 7: super page
276  * 8-10: available
277  * 11: snoop behavior
278  * 12-63: Host physcial address
279  */
280 struct dma_pte {
281         u64 val;
282 };
283
284 static inline void dma_clear_pte(struct dma_pte *pte)
285 {
286         pte->val = 0;
287 }
288
289 static inline u64 dma_pte_addr(struct dma_pte *pte)
290 {
291 #ifdef CONFIG_64BIT
292         return pte->val & VTD_PAGE_MASK;
293 #else
294         /* Must have a full atomic 64-bit read */
295         return  __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
296 #endif
297 }
298
299 static inline bool dma_pte_present(struct dma_pte *pte)
300 {
301         return (pte->val & 3) != 0;
302 }
303
304 static inline bool dma_pte_superpage(struct dma_pte *pte)
305 {
306         return (pte->val & (1 << 7));
307 }
308
309 static inline int first_pte_in_page(struct dma_pte *pte)
310 {
311         return !((unsigned long)pte & ~VTD_PAGE_MASK);
312 }
313
314 /*
315  * This domain is a statically identity mapping domain.
316  *      1. This domain creats a static 1:1 mapping to all usable memory.
317  *      2. It maps to each iommu if successful.
318  *      3. Each iommu mapps to this domain if successful.
319  */
320 static struct dmar_domain *si_domain;
321 static int hw_pass_through = 1;
322
323 /* devices under the same p2p bridge are owned in one domain */
324 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
325
326 /* domain represents a virtual machine, more than one devices
327  * across iommus may be owned in one domain, e.g. kvm guest.
328  */
329 #define DOMAIN_FLAG_VIRTUAL_MACHINE     (1 << 1)
330
331 /* si_domain contains mulitple devices */
332 #define DOMAIN_FLAG_STATIC_IDENTITY     (1 << 2)
333
334 /* define the limit of IOMMUs supported in each domain */
335 #ifdef  CONFIG_X86
336 # define        IOMMU_UNITS_SUPPORTED   MAX_IO_APICS
337 #else
338 # define        IOMMU_UNITS_SUPPORTED   64
339 #endif
340
341 struct dmar_domain {
342         int     id;                     /* domain id */
343         int     nid;                    /* node id */
344         DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED);
345                                         /* bitmap of iommus this domain uses*/
346
347         struct list_head devices;       /* all devices' list */
348         struct iova_domain iovad;       /* iova's that belong to this domain */
349
350         struct dma_pte  *pgd;           /* virtual address */
351         int             gaw;            /* max guest address width */
352
353         /* adjusted guest address width, 0 is level 2 30-bit */
354         int             agaw;
355
356         int             flags;          /* flags to find out type of domain */
357
358         int             iommu_coherency;/* indicate coherency of iommu access */
359         int             iommu_snooping; /* indicate snooping control feature*/
360         int             iommu_count;    /* reference count of iommu */
361         int             iommu_superpage;/* Level of superpages supported:
362                                            0 == 4KiB (no superpages), 1 == 2MiB,
363                                            2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
364         spinlock_t      iommu_lock;     /* protect iommu set in domain */
365         u64             max_addr;       /* maximum mapped address */
366 };
367
368 /* PCI domain-device relationship */
369 struct device_domain_info {
370         struct list_head link;  /* link to domain siblings */
371         struct list_head global; /* link to global list */
372         u8 bus;                 /* PCI bus number */
373         u8 devfn;               /* PCI devfn number */
374         struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
375         struct intel_iommu *iommu; /* IOMMU used by this device */
376         struct dmar_domain *domain; /* pointer to domain */
377 };
378
379 struct dmar_rmrr_unit {
380         struct list_head list;          /* list of rmrr units   */
381         struct acpi_dmar_header *hdr;   /* ACPI header          */
382         u64     base_address;           /* reserved base address*/
383         u64     end_address;            /* reserved end address */
384         struct dmar_dev_scope *devices; /* target devices */
385         int     devices_cnt;            /* target device count */
386 };
387
388 struct dmar_atsr_unit {
389         struct list_head list;          /* list of ATSR units */
390         struct acpi_dmar_header *hdr;   /* ACPI header */
391         struct dmar_dev_scope *devices; /* target devices */
392         int devices_cnt;                /* target device count */
393         u8 include_all:1;               /* include all ports */
394 };
395
396 static LIST_HEAD(dmar_atsr_units);
397 static LIST_HEAD(dmar_rmrr_units);
398
399 #define for_each_rmrr_units(rmrr) \
400         list_for_each_entry(rmrr, &dmar_rmrr_units, list)
401
402 static void flush_unmaps_timeout(unsigned long data);
403
404 static DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
405
406 #define HIGH_WATER_MARK 250
407 struct deferred_flush_tables {
408         int next;
409         struct iova *iova[HIGH_WATER_MARK];
410         struct dmar_domain *domain[HIGH_WATER_MARK];
411         struct page *freelist[HIGH_WATER_MARK];
412 };
413
414 static struct deferred_flush_tables *deferred_flush;
415
416 /* bitmap for indexing intel_iommus */
417 static int g_num_of_iommus;
418
419 static DEFINE_SPINLOCK(async_umap_flush_lock);
420 static LIST_HEAD(unmaps_to_do);
421
422 static int timer_on;
423 static long list_size;
424
425 static void domain_exit(struct dmar_domain *domain);
426 static void domain_remove_dev_info(struct dmar_domain *domain);
427 static void domain_remove_one_dev_info(struct dmar_domain *domain,
428                                        struct device *dev);
429 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
430                                            struct device *dev);
431
432 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
433 int dmar_disabled = 0;
434 #else
435 int dmar_disabled = 1;
436 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
437
438 int intel_iommu_enabled = 0;
439 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
440
441 static int dmar_map_gfx = 1;
442 static int dmar_forcedac;
443 static int intel_iommu_strict;
444 static int intel_iommu_superpage = 1;
445
446 int intel_iommu_gfx_mapped;
447 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
448
449 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
450 static DEFINE_SPINLOCK(device_domain_lock);
451 static LIST_HEAD(device_domain_list);
452
453 static struct iommu_ops intel_iommu_ops;
454
455 static int __init intel_iommu_setup(char *str)
456 {
457         if (!str)
458                 return -EINVAL;
459         while (*str) {
460                 if (!strncmp(str, "on", 2)) {
461                         dmar_disabled = 0;
462                         printk(KERN_INFO "Intel-IOMMU: enabled\n");
463                 } else if (!strncmp(str, "off", 3)) {
464                         dmar_disabled = 1;
465                         printk(KERN_INFO "Intel-IOMMU: disabled\n");
466                 } else if (!strncmp(str, "igfx_off", 8)) {
467                         dmar_map_gfx = 0;
468                         printk(KERN_INFO
469                                 "Intel-IOMMU: disable GFX device mapping\n");
470                 } else if (!strncmp(str, "forcedac", 8)) {
471                         printk(KERN_INFO
472                                 "Intel-IOMMU: Forcing DAC for PCI devices\n");
473                         dmar_forcedac = 1;
474                 } else if (!strncmp(str, "strict", 6)) {
475                         printk(KERN_INFO
476                                 "Intel-IOMMU: disable batched IOTLB flush\n");
477                         intel_iommu_strict = 1;
478                 } else if (!strncmp(str, "sp_off", 6)) {
479                         printk(KERN_INFO
480                                 "Intel-IOMMU: disable supported super page\n");
481                         intel_iommu_superpage = 0;
482                 }
483
484                 str += strcspn(str, ",");
485                 while (*str == ',')
486                         str++;
487         }
488         return 0;
489 }
490 __setup("intel_iommu=", intel_iommu_setup);
491
492 static struct kmem_cache *iommu_domain_cache;
493 static struct kmem_cache *iommu_devinfo_cache;
494 static struct kmem_cache *iommu_iova_cache;
495
496 static inline void *alloc_pgtable_page(int node)
497 {
498         struct page *page;
499         void *vaddr = NULL;
500
501         page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
502         if (page)
503                 vaddr = page_address(page);
504         return vaddr;
505 }
506
507 static inline void free_pgtable_page(void *vaddr)
508 {
509         free_page((unsigned long)vaddr);
510 }
511
512 static inline void *alloc_domain_mem(void)
513 {
514         return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
515 }
516
517 static void free_domain_mem(void *vaddr)
518 {
519         kmem_cache_free(iommu_domain_cache, vaddr);
520 }
521
522 static inline void * alloc_devinfo_mem(void)
523 {
524         return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
525 }
526
527 static inline void free_devinfo_mem(void *vaddr)
528 {
529         kmem_cache_free(iommu_devinfo_cache, vaddr);
530 }
531
532 struct iova *alloc_iova_mem(void)
533 {
534         return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
535 }
536
537 void free_iova_mem(struct iova *iova)
538 {
539         kmem_cache_free(iommu_iova_cache, iova);
540 }
541
542
543 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
544 {
545         unsigned long sagaw;
546         int agaw = -1;
547
548         sagaw = cap_sagaw(iommu->cap);
549         for (agaw = width_to_agaw(max_gaw);
550              agaw >= 0; agaw--) {
551                 if (test_bit(agaw, &sagaw))
552                         break;
553         }
554
555         return agaw;
556 }
557
558 /*
559  * Calculate max SAGAW for each iommu.
560  */
561 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
562 {
563         return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
564 }
565
566 /*
567  * calculate agaw for each iommu.
568  * "SAGAW" may be different across iommus, use a default agaw, and
569  * get a supported less agaw for iommus that don't support the default agaw.
570  */
571 int iommu_calculate_agaw(struct intel_iommu *iommu)
572 {
573         return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
574 }
575
576 /* This functionin only returns single iommu in a domain */
577 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
578 {
579         int iommu_id;
580
581         /* si_domain and vm domain should not get here. */
582         BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
583         BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
584
585         iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
586         if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
587                 return NULL;
588
589         return g_iommus[iommu_id];
590 }
591
592 static void domain_update_iommu_coherency(struct dmar_domain *domain)
593 {
594         struct dmar_drhd_unit *drhd;
595         struct intel_iommu *iommu;
596         int i, found = 0;
597
598         domain->iommu_coherency = 1;
599
600         for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
601                 found = 1;
602                 if (!ecap_coherent(g_iommus[i]->ecap)) {
603                         domain->iommu_coherency = 0;
604                         break;
605                 }
606         }
607         if (found)
608                 return;
609
610         /* No hardware attached; use lowest common denominator */
611         rcu_read_lock();
612         for_each_active_iommu(iommu, drhd) {
613                 if (!ecap_coherent(iommu->ecap)) {
614                         domain->iommu_coherency = 0;
615                         break;
616                 }
617         }
618         rcu_read_unlock();
619 }
620
621 static void domain_update_iommu_snooping(struct dmar_domain *domain)
622 {
623         int i;
624
625         domain->iommu_snooping = 1;
626
627         for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
628                 if (!ecap_sc_support(g_iommus[i]->ecap)) {
629                         domain->iommu_snooping = 0;
630                         break;
631                 }
632         }
633 }
634
635 static void domain_update_iommu_superpage(struct dmar_domain *domain)
636 {
637         struct dmar_drhd_unit *drhd;
638         struct intel_iommu *iommu = NULL;
639         int mask = 0xf;
640
641         if (!intel_iommu_superpage) {
642                 domain->iommu_superpage = 0;
643                 return;
644         }
645
646         /* set iommu_superpage to the smallest common denominator */
647         rcu_read_lock();
648         for_each_active_iommu(iommu, drhd) {
649                 mask &= cap_super_page_val(iommu->cap);
650                 if (!mask) {
651                         break;
652                 }
653         }
654         rcu_read_unlock();
655
656         domain->iommu_superpage = fls(mask);
657 }
658
659 /* Some capabilities may be different across iommus */
660 static void domain_update_iommu_cap(struct dmar_domain *domain)
661 {
662         domain_update_iommu_coherency(domain);
663         domain_update_iommu_snooping(domain);
664         domain_update_iommu_superpage(domain);
665 }
666
667 static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
668 {
669         struct dmar_drhd_unit *drhd = NULL;
670         struct intel_iommu *iommu;
671         struct device *tmp;
672         struct pci_dev *ptmp, *pdev = NULL;
673         u16 segment;
674         int i;
675
676         if (dev_is_pci(dev)) {
677                 pdev = to_pci_dev(dev);
678                 segment = pci_domain_nr(pdev->bus);
679         } else if (ACPI_COMPANION(dev))
680                 dev = &ACPI_COMPANION(dev)->dev;
681
682         rcu_read_lock();
683         for_each_active_iommu(iommu, drhd) {
684                 if (pdev && segment != drhd->segment)
685                         continue;
686
687                 for_each_active_dev_scope(drhd->devices,
688                                           drhd->devices_cnt, i, tmp) {
689                         if (tmp == dev) {
690                                 *bus = drhd->devices[i].bus;
691                                 *devfn = drhd->devices[i].devfn;
692                                 goto out;
693                         }
694
695                         if (!pdev || !dev_is_pci(tmp))
696                                 continue;
697
698                         ptmp = to_pci_dev(tmp);
699                         if (ptmp->subordinate &&
700                             ptmp->subordinate->number <= pdev->bus->number &&
701                             ptmp->subordinate->busn_res.end >= pdev->bus->number)
702                                 goto got_pdev;
703                 }
704
705                 if (pdev && drhd->include_all) {
706                 got_pdev:
707                         *bus = pdev->bus->number;
708                         *devfn = pdev->devfn;
709                         goto out;
710                 }
711         }
712         iommu = NULL;
713  out:
714         rcu_read_unlock();
715
716         return iommu;
717 }
718
719 static void domain_flush_cache(struct dmar_domain *domain,
720                                void *addr, int size)
721 {
722         if (!domain->iommu_coherency)
723                 clflush_cache_range(addr, size);
724 }
725
726 /* Gets context entry for a given bus and devfn */
727 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
728                 u8 bus, u8 devfn)
729 {
730         struct root_entry *root;
731         struct context_entry *context;
732         unsigned long phy_addr;
733         unsigned long flags;
734
735         spin_lock_irqsave(&iommu->lock, flags);
736         root = &iommu->root_entry[bus];
737         context = get_context_addr_from_root(root);
738         if (!context) {
739                 context = (struct context_entry *)
740                                 alloc_pgtable_page(iommu->node);
741                 if (!context) {
742                         spin_unlock_irqrestore(&iommu->lock, flags);
743                         return NULL;
744                 }
745                 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
746                 phy_addr = virt_to_phys((void *)context);
747                 set_root_value(root, phy_addr);
748                 set_root_present(root);
749                 __iommu_flush_cache(iommu, root, sizeof(*root));
750         }
751         spin_unlock_irqrestore(&iommu->lock, flags);
752         return &context[devfn];
753 }
754
755 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
756 {
757         struct root_entry *root;
758         struct context_entry *context;
759         int ret;
760         unsigned long flags;
761
762         spin_lock_irqsave(&iommu->lock, flags);
763         root = &iommu->root_entry[bus];
764         context = get_context_addr_from_root(root);
765         if (!context) {
766                 ret = 0;
767                 goto out;
768         }
769         ret = context_present(&context[devfn]);
770 out:
771         spin_unlock_irqrestore(&iommu->lock, flags);
772         return ret;
773 }
774
775 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
776 {
777         struct root_entry *root;
778         struct context_entry *context;
779         unsigned long flags;
780
781         spin_lock_irqsave(&iommu->lock, flags);
782         root = &iommu->root_entry[bus];
783         context = get_context_addr_from_root(root);
784         if (context) {
785                 context_clear_entry(&context[devfn]);
786                 __iommu_flush_cache(iommu, &context[devfn], \
787                         sizeof(*context));
788         }
789         spin_unlock_irqrestore(&iommu->lock, flags);
790 }
791
792 static void free_context_table(struct intel_iommu *iommu)
793 {
794         struct root_entry *root;
795         int i;
796         unsigned long flags;
797         struct context_entry *context;
798
799         spin_lock_irqsave(&iommu->lock, flags);
800         if (!iommu->root_entry) {
801                 goto out;
802         }
803         for (i = 0; i < ROOT_ENTRY_NR; i++) {
804                 root = &iommu->root_entry[i];
805                 context = get_context_addr_from_root(root);
806                 if (context)
807                         free_pgtable_page(context);
808         }
809         free_pgtable_page(iommu->root_entry);
810         iommu->root_entry = NULL;
811 out:
812         spin_unlock_irqrestore(&iommu->lock, flags);
813 }
814
815 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
816                                       unsigned long pfn, int *target_level)
817 {
818         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
819         struct dma_pte *parent, *pte = NULL;
820         int level = agaw_to_level(domain->agaw);
821         int offset;
822
823         BUG_ON(!domain->pgd);
824
825         if (addr_width < BITS_PER_LONG && pfn >> addr_width)
826                 /* Address beyond IOMMU's addressing capabilities. */
827                 return NULL;
828
829         parent = domain->pgd;
830
831         while (1) {
832                 void *tmp_page;
833
834                 offset = pfn_level_offset(pfn, level);
835                 pte = &parent[offset];
836                 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
837                         break;
838                 if (level == *target_level)
839                         break;
840
841                 if (!dma_pte_present(pte)) {
842                         uint64_t pteval;
843
844                         tmp_page = alloc_pgtable_page(domain->nid);
845
846                         if (!tmp_page)
847                                 return NULL;
848
849                         domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
850                         pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
851                         if (cmpxchg64(&pte->val, 0ULL, pteval)) {
852                                 /* Someone else set it while we were thinking; use theirs. */
853                                 free_pgtable_page(tmp_page);
854                         } else {
855                                 dma_pte_addr(pte);
856                                 domain_flush_cache(domain, pte, sizeof(*pte));
857                         }
858                 }
859                 if (level == 1)
860                         break;
861
862                 parent = phys_to_virt(dma_pte_addr(pte));
863                 level--;
864         }
865
866         if (!*target_level)
867                 *target_level = level;
868
869         return pte;
870 }
871
872
873 /* return address's pte at specific level */
874 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
875                                          unsigned long pfn,
876                                          int level, int *large_page)
877 {
878         struct dma_pte *parent, *pte = NULL;
879         int total = agaw_to_level(domain->agaw);
880         int offset;
881
882         parent = domain->pgd;
883         while (level <= total) {
884                 offset = pfn_level_offset(pfn, total);
885                 pte = &parent[offset];
886                 if (level == total)
887                         return pte;
888
889                 if (!dma_pte_present(pte)) {
890                         *large_page = total;
891                         break;
892                 }
893
894                 if (pte->val & DMA_PTE_LARGE_PAGE) {
895                         *large_page = total;
896                         return pte;
897                 }
898
899                 parent = phys_to_virt(dma_pte_addr(pte));
900                 total--;
901         }
902         return NULL;
903 }
904
905 /* clear last level pte, a tlb flush should be followed */
906 static void dma_pte_clear_range(struct dmar_domain *domain,
907                                 unsigned long start_pfn,
908                                 unsigned long last_pfn)
909 {
910         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
911         unsigned int large_page = 1;
912         struct dma_pte *first_pte, *pte;
913
914         BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
915         BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
916         BUG_ON(start_pfn > last_pfn);
917
918         /* we don't need lock here; nobody else touches the iova range */
919         do {
920                 large_page = 1;
921                 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
922                 if (!pte) {
923                         start_pfn = align_to_level(start_pfn + 1, large_page + 1);
924                         continue;
925                 }
926                 do {
927                         dma_clear_pte(pte);
928                         start_pfn += lvl_to_nr_pages(large_page);
929                         pte++;
930                 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
931
932                 domain_flush_cache(domain, first_pte,
933                                    (void *)pte - (void *)first_pte);
934
935         } while (start_pfn && start_pfn <= last_pfn);
936 }
937
938 static void dma_pte_free_level(struct dmar_domain *domain, int level,
939                                struct dma_pte *pte, unsigned long pfn,
940                                unsigned long start_pfn, unsigned long last_pfn)
941 {
942         pfn = max(start_pfn, pfn);
943         pte = &pte[pfn_level_offset(pfn, level)];
944
945         do {
946                 unsigned long level_pfn;
947                 struct dma_pte *level_pte;
948
949                 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
950                         goto next;
951
952                 level_pfn = pfn & level_mask(level - 1);
953                 level_pte = phys_to_virt(dma_pte_addr(pte));
954
955                 if (level > 2)
956                         dma_pte_free_level(domain, level - 1, level_pte,
957                                            level_pfn, start_pfn, last_pfn);
958
959                 /* If range covers entire pagetable, free it */
960                 if (!(start_pfn > level_pfn ||
961                       last_pfn < level_pfn + level_size(level) - 1)) {
962                         dma_clear_pte(pte);
963                         domain_flush_cache(domain, pte, sizeof(*pte));
964                         free_pgtable_page(level_pte);
965                 }
966 next:
967                 pfn += level_size(level);
968         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
969 }
970
971 /* free page table pages. last level pte should already be cleared */
972 static void dma_pte_free_pagetable(struct dmar_domain *domain,
973                                    unsigned long start_pfn,
974                                    unsigned long last_pfn)
975 {
976         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
977
978         BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
979         BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
980         BUG_ON(start_pfn > last_pfn);
981
982         /* We don't need lock here; nobody else touches the iova range */
983         dma_pte_free_level(domain, agaw_to_level(domain->agaw),
984                            domain->pgd, 0, start_pfn, last_pfn);
985
986         /* free pgd */
987         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
988                 free_pgtable_page(domain->pgd);
989                 domain->pgd = NULL;
990         }
991 }
992
993 /* When a page at a given level is being unlinked from its parent, we don't
994    need to *modify* it at all. All we need to do is make a list of all the
995    pages which can be freed just as soon as we've flushed the IOTLB and we
996    know the hardware page-walk will no longer touch them.
997    The 'pte' argument is the *parent* PTE, pointing to the page that is to
998    be freed. */
999 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1000                                             int level, struct dma_pte *pte,
1001                                             struct page *freelist)
1002 {
1003         struct page *pg;
1004
1005         pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1006         pg->freelist = freelist;
1007         freelist = pg;
1008
1009         if (level == 1)
1010                 return freelist;
1011
1012         pte = page_address(pg);
1013         do {
1014                 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1015                         freelist = dma_pte_list_pagetables(domain, level - 1,
1016                                                            pte, freelist);
1017                 pte++;
1018         } while (!first_pte_in_page(pte));
1019
1020         return freelist;
1021 }
1022
1023 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1024                                         struct dma_pte *pte, unsigned long pfn,
1025                                         unsigned long start_pfn,
1026                                         unsigned long last_pfn,
1027                                         struct page *freelist)
1028 {
1029         struct dma_pte *first_pte = NULL, *last_pte = NULL;
1030
1031         pfn = max(start_pfn, pfn);
1032         pte = &pte[pfn_level_offset(pfn, level)];
1033
1034         do {
1035                 unsigned long level_pfn;
1036
1037                 if (!dma_pte_present(pte))
1038                         goto next;
1039
1040                 level_pfn = pfn & level_mask(level);
1041
1042                 /* If range covers entire pagetable, free it */
1043                 if (start_pfn <= level_pfn &&
1044                     last_pfn >= level_pfn + level_size(level) - 1) {
1045                         /* These suborbinate page tables are going away entirely. Don't
1046                            bother to clear them; we're just going to *free* them. */
1047                         if (level > 1 && !dma_pte_superpage(pte))
1048                                 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1049
1050                         dma_clear_pte(pte);
1051                         if (!first_pte)
1052                                 first_pte = pte;
1053                         last_pte = pte;
1054                 } else if (level > 1) {
1055                         /* Recurse down into a level that isn't *entirely* obsolete */
1056                         freelist = dma_pte_clear_level(domain, level - 1,
1057                                                        phys_to_virt(dma_pte_addr(pte)),
1058                                                        level_pfn, start_pfn, last_pfn,
1059                                                        freelist);
1060                 }
1061 next:
1062                 pfn += level_size(level);
1063         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1064
1065         if (first_pte)
1066                 domain_flush_cache(domain, first_pte,
1067                                    (void *)++last_pte - (void *)first_pte);
1068
1069         return freelist;
1070 }
1071
1072 /* We can't just free the pages because the IOMMU may still be walking
1073    the page tables, and may have cached the intermediate levels. The
1074    pages can only be freed after the IOTLB flush has been done. */
1075 struct page *domain_unmap(struct dmar_domain *domain,
1076                           unsigned long start_pfn,
1077                           unsigned long last_pfn)
1078 {
1079         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1080         struct page *freelist = NULL;
1081
1082         BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
1083         BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
1084         BUG_ON(start_pfn > last_pfn);
1085
1086         /* we don't need lock here; nobody else touches the iova range */
1087         freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1088                                        domain->pgd, 0, start_pfn, last_pfn, NULL);
1089
1090         /* free pgd */
1091         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1092                 struct page *pgd_page = virt_to_page(domain->pgd);
1093                 pgd_page->freelist = freelist;
1094                 freelist = pgd_page;
1095
1096                 domain->pgd = NULL;
1097         }
1098
1099         return freelist;
1100 }
1101
1102 void dma_free_pagelist(struct page *freelist)
1103 {
1104         struct page *pg;
1105
1106         while ((pg = freelist)) {
1107                 freelist = pg->freelist;
1108                 free_pgtable_page(page_address(pg));
1109         }
1110 }
1111
1112 /* iommu handling */
1113 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1114 {
1115         struct root_entry *root;
1116         unsigned long flags;
1117
1118         root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1119         if (!root)
1120                 return -ENOMEM;
1121
1122         __iommu_flush_cache(iommu, root, ROOT_SIZE);
1123
1124         spin_lock_irqsave(&iommu->lock, flags);
1125         iommu->root_entry = root;
1126         spin_unlock_irqrestore(&iommu->lock, flags);
1127
1128         return 0;
1129 }
1130
1131 static void iommu_set_root_entry(struct intel_iommu *iommu)
1132 {
1133         void *addr;
1134         u32 sts;
1135         unsigned long flag;
1136
1137         addr = iommu->root_entry;
1138
1139         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1140         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
1141
1142         writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1143
1144         /* Make sure hardware complete it */
1145         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1146                       readl, (sts & DMA_GSTS_RTPS), sts);
1147
1148         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1149 }
1150
1151 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1152 {
1153         u32 val;
1154         unsigned long flag;
1155
1156         if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1157                 return;
1158
1159         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1160         writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1161
1162         /* Make sure hardware complete it */
1163         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1164                       readl, (!(val & DMA_GSTS_WBFS)), val);
1165
1166         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1167 }
1168
1169 /* return value determine if we need a write buffer flush */
1170 static void __iommu_flush_context(struct intel_iommu *iommu,
1171                                   u16 did, u16 source_id, u8 function_mask,
1172                                   u64 type)
1173 {
1174         u64 val = 0;
1175         unsigned long flag;
1176
1177         switch (type) {
1178         case DMA_CCMD_GLOBAL_INVL:
1179                 val = DMA_CCMD_GLOBAL_INVL;
1180                 break;
1181         case DMA_CCMD_DOMAIN_INVL:
1182                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1183                 break;
1184         case DMA_CCMD_DEVICE_INVL:
1185                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1186                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1187                 break;
1188         default:
1189                 BUG();
1190         }
1191         val |= DMA_CCMD_ICC;
1192
1193         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1194         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1195
1196         /* Make sure hardware complete it */
1197         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1198                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1199
1200         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1201 }
1202
1203 /* return value determine if we need a write buffer flush */
1204 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1205                                 u64 addr, unsigned int size_order, u64 type)
1206 {
1207         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1208         u64 val = 0, val_iva = 0;
1209         unsigned long flag;
1210
1211         switch (type) {
1212         case DMA_TLB_GLOBAL_FLUSH:
1213                 /* global flush doesn't need set IVA_REG */
1214                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1215                 break;
1216         case DMA_TLB_DSI_FLUSH:
1217                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1218                 break;
1219         case DMA_TLB_PSI_FLUSH:
1220                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1221                 /* IH bit is passed in as part of address */
1222                 val_iva = size_order | addr;
1223                 break;
1224         default:
1225                 BUG();
1226         }
1227         /* Note: set drain read/write */
1228 #if 0
1229         /*
1230          * This is probably to be super secure.. Looks like we can
1231          * ignore it without any impact.
1232          */
1233         if (cap_read_drain(iommu->cap))
1234                 val |= DMA_TLB_READ_DRAIN;
1235 #endif
1236         if (cap_write_drain(iommu->cap))
1237                 val |= DMA_TLB_WRITE_DRAIN;
1238
1239         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1240         /* Note: Only uses first TLB reg currently */
1241         if (val_iva)
1242                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1243         dmar_writeq(iommu->reg + tlb_offset + 8, val);
1244
1245         /* Make sure hardware complete it */
1246         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1247                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1248
1249         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1250
1251         /* check IOTLB invalidation granularity */
1252         if (DMA_TLB_IAIG(val) == 0)
1253                 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1254         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1255                 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1256                         (unsigned long long)DMA_TLB_IIRG(type),
1257                         (unsigned long long)DMA_TLB_IAIG(val));
1258 }
1259
1260 static struct device_domain_info *
1261 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1262                          u8 bus, u8 devfn)
1263 {
1264         int found = 0;
1265         unsigned long flags;
1266         struct device_domain_info *info;
1267         struct pci_dev *pdev;
1268
1269         if (!ecap_dev_iotlb_support(iommu->ecap))
1270                 return NULL;
1271
1272         if (!iommu->qi)
1273                 return NULL;
1274
1275         spin_lock_irqsave(&device_domain_lock, flags);
1276         list_for_each_entry(info, &domain->devices, link)
1277                 if (info->bus == bus && info->devfn == devfn) {
1278                         found = 1;
1279                         break;
1280                 }
1281         spin_unlock_irqrestore(&device_domain_lock, flags);
1282
1283         if (!found || !info->dev || !dev_is_pci(info->dev))
1284                 return NULL;
1285
1286         pdev = to_pci_dev(info->dev);
1287
1288         if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))
1289                 return NULL;
1290
1291         if (!dmar_find_matched_atsr_unit(pdev))
1292                 return NULL;
1293
1294         return info;
1295 }
1296
1297 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1298 {
1299         if (!info || !dev_is_pci(info->dev))
1300                 return;
1301
1302         pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT);
1303 }
1304
1305 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1306 {
1307         if (!info->dev || !dev_is_pci(info->dev) ||
1308             !pci_ats_enabled(to_pci_dev(info->dev)))
1309                 return;
1310
1311         pci_disable_ats(to_pci_dev(info->dev));
1312 }
1313
1314 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1315                                   u64 addr, unsigned mask)
1316 {
1317         u16 sid, qdep;
1318         unsigned long flags;
1319         struct device_domain_info *info;
1320
1321         spin_lock_irqsave(&device_domain_lock, flags);
1322         list_for_each_entry(info, &domain->devices, link) {
1323                 struct pci_dev *pdev;
1324                 if (!info->dev || !dev_is_pci(info->dev))
1325                         continue;
1326
1327                 pdev = to_pci_dev(info->dev);
1328                 if (!pci_ats_enabled(pdev))
1329                         continue;
1330
1331                 sid = info->bus << 8 | info->devfn;
1332                 qdep = pci_ats_queue_depth(pdev);
1333                 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1334         }
1335         spin_unlock_irqrestore(&device_domain_lock, flags);
1336 }
1337
1338 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1339                                   unsigned long pfn, unsigned int pages, int ih, int map)
1340 {
1341         unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1342         uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1343
1344         BUG_ON(pages == 0);
1345
1346         if (ih)
1347                 ih = 1 << 6;
1348         /*
1349          * Fallback to domain selective flush if no PSI support or the size is
1350          * too big.
1351          * PSI requires page size to be 2 ^ x, and the base address is naturally
1352          * aligned to the size
1353          */
1354         if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1355                 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1356                                                 DMA_TLB_DSI_FLUSH);
1357         else
1358                 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1359                                                 DMA_TLB_PSI_FLUSH);
1360
1361         /*
1362          * In caching mode, changes of pages from non-present to present require
1363          * flush. However, device IOTLB doesn't need to be flushed in this case.
1364          */
1365         if (!cap_caching_mode(iommu->cap) || !map)
1366                 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1367 }
1368
1369 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1370 {
1371         u32 pmen;
1372         unsigned long flags;
1373
1374         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1375         pmen = readl(iommu->reg + DMAR_PMEN_REG);
1376         pmen &= ~DMA_PMEN_EPM;
1377         writel(pmen, iommu->reg + DMAR_PMEN_REG);
1378
1379         /* wait for the protected region status bit to clear */
1380         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1381                 readl, !(pmen & DMA_PMEN_PRS), pmen);
1382
1383         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1384 }
1385
1386 static int iommu_enable_translation(struct intel_iommu *iommu)
1387 {
1388         u32 sts;
1389         unsigned long flags;
1390
1391         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1392         iommu->gcmd |= DMA_GCMD_TE;
1393         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1394
1395         /* Make sure hardware complete it */
1396         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1397                       readl, (sts & DMA_GSTS_TES), sts);
1398
1399         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1400         return 0;
1401 }
1402
1403 static int iommu_disable_translation(struct intel_iommu *iommu)
1404 {
1405         u32 sts;
1406         unsigned long flag;
1407
1408         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1409         iommu->gcmd &= ~DMA_GCMD_TE;
1410         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1411
1412         /* Make sure hardware complete it */
1413         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1414                       readl, (!(sts & DMA_GSTS_TES)), sts);
1415
1416         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1417         return 0;
1418 }
1419
1420
1421 static int iommu_init_domains(struct intel_iommu *iommu)
1422 {
1423         unsigned long ndomains;
1424         unsigned long nlongs;
1425
1426         ndomains = cap_ndoms(iommu->cap);
1427         pr_debug("IOMMU%d: Number of Domains supported <%ld>\n",
1428                  iommu->seq_id, ndomains);
1429         nlongs = BITS_TO_LONGS(ndomains);
1430
1431         spin_lock_init(&iommu->lock);
1432
1433         /* TBD: there might be 64K domains,
1434          * consider other allocation for future chip
1435          */
1436         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1437         if (!iommu->domain_ids) {
1438                 pr_err("IOMMU%d: allocating domain id array failed\n",
1439                        iommu->seq_id);
1440                 return -ENOMEM;
1441         }
1442         iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1443                         GFP_KERNEL);
1444         if (!iommu->domains) {
1445                 pr_err("IOMMU%d: allocating domain array failed\n",
1446                        iommu->seq_id);
1447                 kfree(iommu->domain_ids);
1448                 iommu->domain_ids = NULL;
1449                 return -ENOMEM;
1450         }
1451
1452         /*
1453          * if Caching mode is set, then invalid translations are tagged
1454          * with domainid 0. Hence we need to pre-allocate it.
1455          */
1456         if (cap_caching_mode(iommu->cap))
1457                 set_bit(0, iommu->domain_ids);
1458         return 0;
1459 }
1460
1461 static void free_dmar_iommu(struct intel_iommu *iommu)
1462 {
1463         struct dmar_domain *domain;
1464         int i, count;
1465         unsigned long flags;
1466
1467         if ((iommu->domains) && (iommu->domain_ids)) {
1468                 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1469                         /*
1470                          * Domain id 0 is reserved for invalid translation
1471                          * if hardware supports caching mode.
1472                          */
1473                         if (cap_caching_mode(iommu->cap) && i == 0)
1474                                 continue;
1475
1476                         domain = iommu->domains[i];
1477                         clear_bit(i, iommu->domain_ids);
1478
1479                         spin_lock_irqsave(&domain->iommu_lock, flags);
1480                         count = --domain->iommu_count;
1481                         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1482                         if (count == 0)
1483                                 domain_exit(domain);
1484                 }
1485         }
1486
1487         if (iommu->gcmd & DMA_GCMD_TE)
1488                 iommu_disable_translation(iommu);
1489
1490         kfree(iommu->domains);
1491         kfree(iommu->domain_ids);
1492         iommu->domains = NULL;
1493         iommu->domain_ids = NULL;
1494
1495         g_iommus[iommu->seq_id] = NULL;
1496
1497         /* free context mapping */
1498         free_context_table(iommu);
1499 }
1500
1501 static struct dmar_domain *alloc_domain(bool vm)
1502 {
1503         /* domain id for virtual machine, it won't be set in context */
1504         static atomic_t vm_domid = ATOMIC_INIT(0);
1505         struct dmar_domain *domain;
1506
1507         domain = alloc_domain_mem();
1508         if (!domain)
1509                 return NULL;
1510
1511         domain->nid = -1;
1512         domain->iommu_count = 0;
1513         memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
1514         domain->flags = 0;
1515         spin_lock_init(&domain->iommu_lock);
1516         INIT_LIST_HEAD(&domain->devices);
1517         if (vm) {
1518                 domain->id = atomic_inc_return(&vm_domid);
1519                 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
1520         }
1521
1522         return domain;
1523 }
1524
1525 static int iommu_attach_domain(struct dmar_domain *domain,
1526                                struct intel_iommu *iommu)
1527 {
1528         int num;
1529         unsigned long ndomains;
1530         unsigned long flags;
1531
1532         ndomains = cap_ndoms(iommu->cap);
1533
1534         spin_lock_irqsave(&iommu->lock, flags);
1535
1536         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1537         if (num >= ndomains) {
1538                 spin_unlock_irqrestore(&iommu->lock, flags);
1539                 printk(KERN_ERR "IOMMU: no free domain ids\n");
1540                 return -ENOMEM;
1541         }
1542
1543         domain->id = num;
1544         domain->iommu_count++;
1545         set_bit(num, iommu->domain_ids);
1546         set_bit(iommu->seq_id, domain->iommu_bmp);
1547         iommu->domains[num] = domain;
1548         spin_unlock_irqrestore(&iommu->lock, flags);
1549
1550         return 0;
1551 }
1552
1553 static void iommu_detach_domain(struct dmar_domain *domain,
1554                                 struct intel_iommu *iommu)
1555 {
1556         unsigned long flags;
1557         int num, ndomains;
1558
1559         spin_lock_irqsave(&iommu->lock, flags);
1560         ndomains = cap_ndoms(iommu->cap);
1561         for_each_set_bit(num, iommu->domain_ids, ndomains) {
1562                 if (iommu->domains[num] == domain) {
1563                         clear_bit(num, iommu->domain_ids);
1564                         iommu->domains[num] = NULL;
1565                         break;
1566                 }
1567         }
1568         spin_unlock_irqrestore(&iommu->lock, flags);
1569 }
1570
1571 static struct iova_domain reserved_iova_list;
1572 static struct lock_class_key reserved_rbtree_key;
1573
1574 static int dmar_init_reserved_ranges(void)
1575 {
1576         struct pci_dev *pdev = NULL;
1577         struct iova *iova;
1578         int i;
1579
1580         init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1581
1582         lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1583                 &reserved_rbtree_key);
1584
1585         /* IOAPIC ranges shouldn't be accessed by DMA */
1586         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1587                 IOVA_PFN(IOAPIC_RANGE_END));
1588         if (!iova) {
1589                 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1590                 return -ENODEV;
1591         }
1592
1593         /* Reserve all PCI MMIO to avoid peer-to-peer access */
1594         for_each_pci_dev(pdev) {
1595                 struct resource *r;
1596
1597                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1598                         r = &pdev->resource[i];
1599                         if (!r->flags || !(r->flags & IORESOURCE_MEM))
1600                                 continue;
1601                         iova = reserve_iova(&reserved_iova_list,
1602                                             IOVA_PFN(r->start),
1603                                             IOVA_PFN(r->end));
1604                         if (!iova) {
1605                                 printk(KERN_ERR "Reserve iova failed\n");
1606                                 return -ENODEV;
1607                         }
1608                 }
1609         }
1610         return 0;
1611 }
1612
1613 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1614 {
1615         copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1616 }
1617
1618 static inline int guestwidth_to_adjustwidth(int gaw)
1619 {
1620         int agaw;
1621         int r = (gaw - 12) % 9;
1622
1623         if (r == 0)
1624                 agaw = gaw;
1625         else
1626                 agaw = gaw + 9 - r;
1627         if (agaw > 64)
1628                 agaw = 64;
1629         return agaw;
1630 }
1631
1632 static int domain_init(struct dmar_domain *domain, int guest_width)
1633 {
1634         struct intel_iommu *iommu;
1635         int adjust_width, agaw;
1636         unsigned long sagaw;
1637
1638         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1639         domain_reserve_special_ranges(domain);
1640
1641         /* calculate AGAW */
1642         iommu = domain_get_iommu(domain);
1643         if (guest_width > cap_mgaw(iommu->cap))
1644                 guest_width = cap_mgaw(iommu->cap);
1645         domain->gaw = guest_width;
1646         adjust_width = guestwidth_to_adjustwidth(guest_width);
1647         agaw = width_to_agaw(adjust_width);
1648         sagaw = cap_sagaw(iommu->cap);
1649         if (!test_bit(agaw, &sagaw)) {
1650                 /* hardware doesn't support it, choose a bigger one */
1651                 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1652                 agaw = find_next_bit(&sagaw, 5, agaw);
1653                 if (agaw >= 5)
1654                         return -ENODEV;
1655         }
1656         domain->agaw = agaw;
1657
1658         if (ecap_coherent(iommu->ecap))
1659                 domain->iommu_coherency = 1;
1660         else
1661                 domain->iommu_coherency = 0;
1662
1663         if (ecap_sc_support(iommu->ecap))
1664                 domain->iommu_snooping = 1;
1665         else
1666                 domain->iommu_snooping = 0;
1667
1668         if (intel_iommu_superpage)
1669                 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1670         else
1671                 domain->iommu_superpage = 0;
1672
1673         domain->nid = iommu->node;
1674
1675         /* always allocate the top pgd */
1676         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1677         if (!domain->pgd)
1678                 return -ENOMEM;
1679         __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1680         return 0;
1681 }
1682
1683 static void domain_exit(struct dmar_domain *domain)
1684 {
1685         struct dmar_drhd_unit *drhd;
1686         struct intel_iommu *iommu;
1687         struct page *freelist = NULL;
1688
1689         /* Domain 0 is reserved, so dont process it */
1690         if (!domain)
1691                 return;
1692
1693         /* Flush any lazy unmaps that may reference this domain */
1694         if (!intel_iommu_strict)
1695                 flush_unmaps_timeout(0);
1696
1697         /* remove associated devices */
1698         domain_remove_dev_info(domain);
1699
1700         /* destroy iovas */
1701         put_iova_domain(&domain->iovad);
1702
1703         freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1704
1705         /* clear attached or cached domains */
1706         rcu_read_lock();
1707         for_each_active_iommu(iommu, drhd)
1708                 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1709                     test_bit(iommu->seq_id, domain->iommu_bmp))
1710                         iommu_detach_domain(domain, iommu);
1711         rcu_read_unlock();
1712
1713         dma_free_pagelist(freelist);
1714
1715         free_domain_mem(domain);
1716 }
1717
1718 static int domain_context_mapping_one(struct dmar_domain *domain,
1719                                       struct intel_iommu *iommu,
1720                                       u8 bus, u8 devfn, int translation)
1721 {
1722         struct context_entry *context;
1723         unsigned long flags;
1724         struct dma_pte *pgd;
1725         unsigned long num;
1726         unsigned long ndomains;
1727         int id;
1728         int agaw;
1729         struct device_domain_info *info = NULL;
1730
1731         pr_debug("Set context mapping for %02x:%02x.%d\n",
1732                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1733
1734         BUG_ON(!domain->pgd);
1735         BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1736                translation != CONTEXT_TT_MULTI_LEVEL);
1737
1738         context = device_to_context_entry(iommu, bus, devfn);
1739         if (!context)
1740                 return -ENOMEM;
1741         spin_lock_irqsave(&iommu->lock, flags);
1742         if (context_present(context)) {
1743                 spin_unlock_irqrestore(&iommu->lock, flags);
1744                 return 0;
1745         }
1746
1747         id = domain->id;
1748         pgd = domain->pgd;
1749
1750         if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1751             domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
1752                 int found = 0;
1753
1754                 /* find an available domain id for this device in iommu */
1755                 ndomains = cap_ndoms(iommu->cap);
1756                 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1757                         if (iommu->domains[num] == domain) {
1758                                 id = num;
1759                                 found = 1;
1760                                 break;
1761                         }
1762                 }
1763
1764                 if (found == 0) {
1765                         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1766                         if (num >= ndomains) {
1767                                 spin_unlock_irqrestore(&iommu->lock, flags);
1768                                 printk(KERN_ERR "IOMMU: no free domain ids\n");
1769                                 return -EFAULT;
1770                         }
1771
1772                         set_bit(num, iommu->domain_ids);
1773                         iommu->domains[num] = domain;
1774                         id = num;
1775                 }
1776
1777                 /* Skip top levels of page tables for
1778                  * iommu which has less agaw than default.
1779                  * Unnecessary for PT mode.
1780                  */
1781                 if (translation != CONTEXT_TT_PASS_THROUGH) {
1782                         for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1783                                 pgd = phys_to_virt(dma_pte_addr(pgd));
1784                                 if (!dma_pte_present(pgd)) {
1785                                         spin_unlock_irqrestore(&iommu->lock, flags);
1786                                         return -ENOMEM;
1787                                 }
1788                         }
1789                 }
1790         }
1791
1792         context_set_domain_id(context, id);
1793
1794         if (translation != CONTEXT_TT_PASS_THROUGH) {
1795                 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
1796                 translation = info ? CONTEXT_TT_DEV_IOTLB :
1797                                      CONTEXT_TT_MULTI_LEVEL;
1798         }
1799         /*
1800          * In pass through mode, AW must be programmed to indicate the largest
1801          * AGAW value supported by hardware. And ASR is ignored by hardware.
1802          */
1803         if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1804                 context_set_address_width(context, iommu->msagaw);
1805         else {
1806                 context_set_address_root(context, virt_to_phys(pgd));
1807                 context_set_address_width(context, iommu->agaw);
1808         }
1809
1810         context_set_translation_type(context, translation);
1811         context_set_fault_enable(context);
1812         context_set_present(context);
1813         domain_flush_cache(domain, context, sizeof(*context));
1814
1815         /*
1816          * It's a non-present to present mapping. If hardware doesn't cache
1817          * non-present entry we only need to flush the write-buffer. If the
1818          * _does_ cache non-present entries, then it does so in the special
1819          * domain #0, which we have to flush:
1820          */
1821         if (cap_caching_mode(iommu->cap)) {
1822                 iommu->flush.flush_context(iommu, 0,
1823                                            (((u16)bus) << 8) | devfn,
1824                                            DMA_CCMD_MASK_NOBIT,
1825                                            DMA_CCMD_DEVICE_INVL);
1826                 iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
1827         } else {
1828                 iommu_flush_write_buffer(iommu);
1829         }
1830         iommu_enable_dev_iotlb(info);
1831         spin_unlock_irqrestore(&iommu->lock, flags);
1832
1833         spin_lock_irqsave(&domain->iommu_lock, flags);
1834         if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
1835                 domain->iommu_count++;
1836                 if (domain->iommu_count == 1)
1837                         domain->nid = iommu->node;
1838                 domain_update_iommu_cap(domain);
1839         }
1840         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1841         return 0;
1842 }
1843
1844 static int
1845 domain_context_mapping(struct dmar_domain *domain, struct device *dev,
1846                        int translation)
1847 {
1848         int ret;
1849         struct pci_dev *pdev, *tmp, *parent;
1850         struct intel_iommu *iommu;
1851         u8 bus, devfn;
1852
1853         iommu = device_to_iommu(dev, &bus, &devfn);
1854         if (!iommu)
1855                 return -ENODEV;
1856
1857         ret = domain_context_mapping_one(domain, iommu, bus, devfn,
1858                                          translation);
1859         if (ret || !dev_is_pci(dev))
1860                 return ret;
1861
1862         /* dependent device mapping */
1863         pdev = to_pci_dev(dev);
1864         tmp = pci_find_upstream_pcie_bridge(pdev);
1865         if (!tmp)
1866                 return 0;
1867         /* Secondary interface's bus number and devfn 0 */
1868         parent = pdev->bus->self;
1869         while (parent != tmp) {
1870                 ret = domain_context_mapping_one(domain, iommu,
1871                                                  parent->bus->number,
1872                                                  parent->devfn, translation);
1873                 if (ret)
1874                         return ret;
1875                 parent = parent->bus->self;
1876         }
1877         if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
1878                 return domain_context_mapping_one(domain, iommu,
1879                                         tmp->subordinate->number, 0,
1880                                         translation);
1881         else /* this is a legacy PCI bridge */
1882                 return domain_context_mapping_one(domain, iommu,
1883                                                   tmp->bus->number,
1884                                                   tmp->devfn,
1885                                                   translation);
1886 }
1887
1888 static int domain_context_mapped(struct device *dev)
1889 {
1890         int ret;
1891         struct pci_dev *pdev, *tmp, *parent;
1892         struct intel_iommu *iommu;
1893         u8 bus, devfn;
1894
1895         iommu = device_to_iommu(dev, &bus, &devfn);
1896         if (!iommu)
1897                 return -ENODEV;
1898
1899         ret = device_context_mapped(iommu, bus, devfn);
1900         if (!ret || !dev_is_pci(dev))
1901                 return ret;
1902
1903         /* dependent device mapping */
1904         pdev = to_pci_dev(dev);
1905         tmp = pci_find_upstream_pcie_bridge(pdev);
1906         if (!tmp)
1907                 return ret;
1908         /* Secondary interface's bus number and devfn 0 */
1909         parent = pdev->bus->self;
1910         while (parent != tmp) {
1911                 ret = device_context_mapped(iommu, parent->bus->number,
1912                                             parent->devfn);
1913                 if (!ret)
1914                         return ret;
1915                 parent = parent->bus->self;
1916         }
1917         if (pci_is_pcie(tmp))
1918                 return device_context_mapped(iommu, tmp->subordinate->number,
1919                                              0);
1920         else
1921                 return device_context_mapped(iommu, tmp->bus->number,
1922                                              tmp->devfn);
1923 }
1924
1925 /* Returns a number of VTD pages, but aligned to MM page size */
1926 static inline unsigned long aligned_nrpages(unsigned long host_addr,
1927                                             size_t size)
1928 {
1929         host_addr &= ~PAGE_MASK;
1930         return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1931 }
1932
1933 /* Return largest possible superpage level for a given mapping */
1934 static inline int hardware_largepage_caps(struct dmar_domain *domain,
1935                                           unsigned long iov_pfn,
1936                                           unsigned long phy_pfn,
1937                                           unsigned long pages)
1938 {
1939         int support, level = 1;
1940         unsigned long pfnmerge;
1941
1942         support = domain->iommu_superpage;
1943
1944         /* To use a large page, the virtual *and* physical addresses
1945            must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1946            of them will mean we have to use smaller pages. So just
1947            merge them and check both at once. */
1948         pfnmerge = iov_pfn | phy_pfn;
1949
1950         while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1951                 pages >>= VTD_STRIDE_SHIFT;
1952                 if (!pages)
1953                         break;
1954                 pfnmerge >>= VTD_STRIDE_SHIFT;
1955                 level++;
1956                 support--;
1957         }
1958         return level;
1959 }
1960
1961 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1962                             struct scatterlist *sg, unsigned long phys_pfn,
1963                             unsigned long nr_pages, int prot)
1964 {
1965         struct dma_pte *first_pte = NULL, *pte = NULL;
1966         phys_addr_t uninitialized_var(pteval);
1967         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1968         unsigned long sg_res;
1969         unsigned int largepage_lvl = 0;
1970         unsigned long lvl_pages = 0;
1971
1972         BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1973
1974         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1975                 return -EINVAL;
1976
1977         prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1978
1979         if (sg)
1980                 sg_res = 0;
1981         else {
1982                 sg_res = nr_pages + 1;
1983                 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1984         }
1985
1986         while (nr_pages > 0) {
1987                 uint64_t tmp;
1988
1989                 if (!sg_res) {
1990                         sg_res = aligned_nrpages(sg->offset, sg->length);
1991                         sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1992                         sg->dma_length = sg->length;
1993                         pteval = page_to_phys(sg_page(sg)) | prot;
1994                         phys_pfn = pteval >> VTD_PAGE_SHIFT;
1995                 }
1996
1997                 if (!pte) {
1998                         largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1999
2000                         first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2001                         if (!pte)
2002                                 return -ENOMEM;
2003                         /* It is large page*/
2004                         if (largepage_lvl > 1) {
2005                                 pteval |= DMA_PTE_LARGE_PAGE;
2006                                 /* Ensure that old small page tables are removed to make room
2007                                    for superpage, if they exist. */
2008                                 dma_pte_clear_range(domain, iov_pfn,
2009                                                     iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
2010                                 dma_pte_free_pagetable(domain, iov_pfn,
2011                                                        iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
2012                         } else {
2013                                 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2014                         }
2015
2016                 }
2017                 /* We don't need lock here, nobody else
2018                  * touches the iova range
2019                  */
2020                 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2021                 if (tmp) {
2022                         static int dumps = 5;
2023                         printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2024                                iov_pfn, tmp, (unsigned long long)pteval);
2025                         if (dumps) {
2026                                 dumps--;
2027                                 debug_dma_dump_mappings(NULL);
2028                         }
2029                         WARN_ON(1);
2030                 }
2031
2032                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2033
2034                 BUG_ON(nr_pages < lvl_pages);
2035                 BUG_ON(sg_res < lvl_pages);
2036
2037                 nr_pages -= lvl_pages;
2038                 iov_pfn += lvl_pages;
2039                 phys_pfn += lvl_pages;
2040                 pteval += lvl_pages * VTD_PAGE_SIZE;
2041                 sg_res -= lvl_pages;
2042
2043                 /* If the next PTE would be the first in a new page, then we
2044                    need to flush the cache on the entries we've just written.
2045                    And then we'll need to recalculate 'pte', so clear it and
2046                    let it get set again in the if (!pte) block above.
2047
2048                    If we're done (!nr_pages) we need to flush the cache too.
2049
2050                    Also if we've been setting superpages, we may need to
2051                    recalculate 'pte' and switch back to smaller pages for the
2052                    end of the mapping, if the trailing size is not enough to
2053                    use another superpage (i.e. sg_res < lvl_pages). */
2054                 pte++;
2055                 if (!nr_pages || first_pte_in_page(pte) ||
2056                     (largepage_lvl > 1 && sg_res < lvl_pages)) {
2057                         domain_flush_cache(domain, first_pte,
2058                                            (void *)pte - (void *)first_pte);
2059                         pte = NULL;
2060                 }
2061
2062                 if (!sg_res && nr_pages)
2063                         sg = sg_next(sg);
2064         }
2065         return 0;
2066 }
2067
2068 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2069                                     struct scatterlist *sg, unsigned long nr_pages,
2070                                     int prot)
2071 {
2072         return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2073 }
2074
2075 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2076                                      unsigned long phys_pfn, unsigned long nr_pages,
2077                                      int prot)
2078 {
2079         return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2080 }
2081
2082 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
2083 {
2084         if (!iommu)
2085                 return;
2086
2087         clear_context_table(iommu, bus, devfn);
2088         iommu->flush.flush_context(iommu, 0, 0, 0,
2089                                            DMA_CCMD_GLOBAL_INVL);
2090         iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2091 }
2092
2093 static inline void unlink_domain_info(struct device_domain_info *info)
2094 {
2095         assert_spin_locked(&device_domain_lock);
2096         list_del(&info->link);
2097         list_del(&info->global);
2098         if (info->dev)
2099                 info->dev->archdata.iommu = NULL;
2100 }
2101
2102 static void domain_remove_dev_info(struct dmar_domain *domain)
2103 {
2104         struct device_domain_info *info;
2105         unsigned long flags, flags2;
2106
2107         spin_lock_irqsave(&device_domain_lock, flags);
2108         while (!list_empty(&domain->devices)) {
2109                 info = list_entry(domain->devices.next,
2110                         struct device_domain_info, link);
2111                 unlink_domain_info(info);
2112                 spin_unlock_irqrestore(&device_domain_lock, flags);
2113
2114                 iommu_disable_dev_iotlb(info);
2115                 iommu_detach_dev(info->iommu, info->bus, info->devfn);
2116
2117                 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
2118                         iommu_detach_dependent_devices(info->iommu, info->dev);
2119                         /* clear this iommu in iommu_bmp, update iommu count
2120                          * and capabilities
2121                          */
2122                         spin_lock_irqsave(&domain->iommu_lock, flags2);
2123                         if (test_and_clear_bit(info->iommu->seq_id,
2124                                                domain->iommu_bmp)) {
2125                                 domain->iommu_count--;
2126                                 domain_update_iommu_cap(domain);
2127                         }
2128                         spin_unlock_irqrestore(&domain->iommu_lock, flags2);
2129                 }
2130
2131                 free_devinfo_mem(info);
2132                 spin_lock_irqsave(&device_domain_lock, flags);
2133         }
2134         spin_unlock_irqrestore(&device_domain_lock, flags);
2135 }
2136
2137 /*
2138  * find_domain
2139  * Note: we use struct device->archdata.iommu stores the info
2140  */
2141 static struct dmar_domain *find_domain(struct device *dev)
2142 {
2143         struct device_domain_info *info;
2144
2145         /* No lock here, assumes no domain exit in normal case */
2146         info = dev->archdata.iommu;
2147         if (info)
2148                 return info->domain;
2149         return NULL;
2150 }
2151
2152 static inline struct device_domain_info *
2153 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2154 {
2155         struct device_domain_info *info;
2156
2157         list_for_each_entry(info, &device_domain_list, global)
2158                 if (info->iommu->segment == segment && info->bus == bus &&
2159                     info->devfn == devfn)
2160                         return info;
2161
2162         return NULL;
2163 }
2164
2165 static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu,
2166                                                 int bus, int devfn,
2167                                                 struct device *dev,
2168                                                 struct dmar_domain *domain)
2169 {
2170         struct dmar_domain *found = NULL;
2171         struct device_domain_info *info;
2172         unsigned long flags;
2173
2174         info = alloc_devinfo_mem();
2175         if (!info)
2176                 return NULL;
2177
2178         info->bus = bus;
2179         info->devfn = devfn;
2180         info->dev = dev;
2181         info->domain = domain;
2182         info->iommu = iommu;
2183         if (!dev)
2184                 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
2185
2186         spin_lock_irqsave(&device_domain_lock, flags);
2187         if (dev)
2188                 found = find_domain(dev);
2189         else {
2190                 struct device_domain_info *info2;
2191                 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2192                 if (info2)
2193                         found = info2->domain;
2194         }
2195         if (found) {
2196                 spin_unlock_irqrestore(&device_domain_lock, flags);
2197                 free_devinfo_mem(info);
2198                 /* Caller must free the original domain */
2199                 return found;
2200         }
2201
2202         list_add(&info->link, &domain->devices);
2203         list_add(&info->global, &device_domain_list);
2204         if (dev)
2205                 dev->archdata.iommu = info;
2206         spin_unlock_irqrestore(&device_domain_lock, flags);
2207
2208         return domain;
2209 }
2210
2211 /* domain is initialized */
2212 static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2213 {
2214         struct dmar_domain *domain, *free = NULL;
2215         struct intel_iommu *iommu = NULL;
2216         struct device_domain_info *info;
2217         struct pci_dev *dev_tmp = NULL;
2218         unsigned long flags;
2219         u8 bus, devfn, bridge_bus, bridge_devfn;
2220
2221         domain = find_domain(dev);
2222         if (domain)
2223                 return domain;
2224
2225         if (dev_is_pci(dev)) {
2226                 struct pci_dev *pdev = to_pci_dev(dev);
2227                 u16 segment;
2228
2229                 segment = pci_domain_nr(pdev->bus);
2230                 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
2231                 if (dev_tmp) {
2232                         if (pci_is_pcie(dev_tmp)) {
2233                                 bridge_bus = dev_tmp->subordinate->number;
2234                                 bridge_devfn = 0;
2235                         } else {
2236                                 bridge_bus = dev_tmp->bus->number;
2237                                 bridge_devfn = dev_tmp->devfn;
2238                         }
2239                         spin_lock_irqsave(&device_domain_lock, flags);
2240                         info = dmar_search_domain_by_dev_info(segment,
2241                                                               bridge_bus,
2242                                                               bridge_devfn);
2243                         if (info) {
2244                                 iommu = info->iommu;
2245                                 domain = info->domain;
2246                         }
2247                         spin_unlock_irqrestore(&device_domain_lock, flags);
2248                         /* pcie-pci bridge already has a domain, uses it */
2249                         if (info)
2250                                 goto found_domain;
2251                 }
2252         }
2253
2254         iommu = device_to_iommu(dev, &bus, &devfn);
2255         if (!iommu)
2256                 goto error;
2257
2258         /* Allocate and initialize new domain for the device */
2259         domain = alloc_domain(false);
2260         if (!domain)
2261                 goto error;
2262         if (iommu_attach_domain(domain, iommu)) {
2263                 free_domain_mem(domain);
2264                 domain = NULL;
2265                 goto error;
2266         }
2267         free = domain;
2268         if (domain_init(domain, gaw))
2269                 goto error;
2270
2271         /* register pcie-to-pci device */
2272         if (dev_tmp) {
2273                 domain = dmar_insert_dev_info(iommu, bridge_bus, bridge_devfn,
2274                                               NULL, domain);
2275                 if (!domain)
2276                         goto error;
2277         }
2278
2279 found_domain:
2280         domain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2281 error:
2282         if (free != domain)
2283                 domain_exit(free);
2284
2285         return domain;
2286 }
2287
2288 static int iommu_identity_mapping;
2289 #define IDENTMAP_ALL            1
2290 #define IDENTMAP_GFX            2
2291 #define IDENTMAP_AZALIA         4
2292
2293 static int iommu_domain_identity_map(struct dmar_domain *domain,
2294                                      unsigned long long start,
2295                                      unsigned long long end)
2296 {
2297         unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2298         unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2299
2300         if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2301                           dma_to_mm_pfn(last_vpfn))) {
2302                 printk(KERN_ERR "IOMMU: reserve iova failed\n");
2303                 return -ENOMEM;
2304         }
2305
2306         pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2307                  start, end, domain->id);
2308         /*
2309          * RMRR range might have overlap with physical memory range,
2310          * clear it first
2311          */
2312         dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2313
2314         return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2315                                   last_vpfn - first_vpfn + 1,
2316                                   DMA_PTE_READ|DMA_PTE_WRITE);
2317 }
2318
2319 static int iommu_prepare_identity_map(struct device *dev,
2320                                       unsigned long long start,
2321                                       unsigned long long end)
2322 {
2323         struct dmar_domain *domain;
2324         int ret;
2325
2326         domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2327         if (!domain)
2328                 return -ENOMEM;
2329
2330         /* For _hardware_ passthrough, don't bother. But for software
2331            passthrough, we do it anyway -- it may indicate a memory
2332            range which is reserved in E820, so which didn't get set
2333            up to start with in si_domain */
2334         if (domain == si_domain && hw_pass_through) {
2335                 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2336                        dev_name(dev), start, end);
2337                 return 0;
2338         }
2339
2340         printk(KERN_INFO
2341                "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2342                dev_name(dev), start, end);
2343         
2344         if (end < start) {
2345                 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2346                         "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2347                         dmi_get_system_info(DMI_BIOS_VENDOR),
2348                         dmi_get_system_info(DMI_BIOS_VERSION),
2349                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2350                 ret = -EIO;
2351                 goto error;
2352         }
2353
2354         if (end >> agaw_to_width(domain->agaw)) {
2355                 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2356                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2357                      agaw_to_width(domain->agaw),
2358                      dmi_get_system_info(DMI_BIOS_VENDOR),
2359                      dmi_get_system_info(DMI_BIOS_VERSION),
2360                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2361                 ret = -EIO;
2362                 goto error;
2363         }
2364
2365         ret = iommu_domain_identity_map(domain, start, end);
2366         if (ret)
2367                 goto error;
2368
2369         /* context entry init */
2370         ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
2371         if (ret)
2372                 goto error;
2373
2374         return 0;
2375
2376  error:
2377         domain_exit(domain);
2378         return ret;
2379 }
2380
2381 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2382                                          struct device *dev)
2383 {
2384         if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2385                 return 0;
2386         return iommu_prepare_identity_map(dev, rmrr->base_address,
2387                                           rmrr->end_address);
2388 }
2389
2390 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2391 static inline void iommu_prepare_isa(void)
2392 {
2393         struct pci_dev *pdev;
2394         int ret;
2395
2396         pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2397         if (!pdev)
2398                 return;
2399
2400         printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2401         ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
2402
2403         if (ret)
2404                 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2405                        "floppy might not work\n");
2406
2407 }
2408 #else
2409 static inline void iommu_prepare_isa(void)
2410 {
2411         return;
2412 }
2413 #endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2414
2415 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2416
2417 static int __init si_domain_init(int hw)
2418 {
2419         struct dmar_drhd_unit *drhd;
2420         struct intel_iommu *iommu;
2421         int nid, ret = 0;
2422
2423         si_domain = alloc_domain(false);
2424         if (!si_domain)
2425                 return -EFAULT;
2426
2427         si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2428
2429         for_each_active_iommu(iommu, drhd) {
2430                 ret = iommu_attach_domain(si_domain, iommu);
2431                 if (ret) {
2432                         domain_exit(si_domain);
2433                         return -EFAULT;
2434                 }
2435         }
2436
2437         if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2438                 domain_exit(si_domain);
2439                 return -EFAULT;
2440         }
2441
2442         pr_debug("IOMMU: identity mapping domain is domain %d\n",
2443                  si_domain->id);
2444
2445         if (hw)
2446                 return 0;
2447
2448         for_each_online_node(nid) {
2449                 unsigned long start_pfn, end_pfn;
2450                 int i;
2451
2452                 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2453                         ret = iommu_domain_identity_map(si_domain,
2454                                         PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2455                         if (ret)
2456                                 return ret;
2457                 }
2458         }
2459
2460         return 0;
2461 }
2462
2463 static int identity_mapping(struct device *dev)
2464 {
2465         struct device_domain_info *info;
2466
2467         if (likely(!iommu_identity_mapping))
2468                 return 0;
2469
2470         info = dev->archdata.iommu;
2471         if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2472                 return (info->domain == si_domain);
2473
2474         return 0;
2475 }
2476
2477 static int domain_add_dev_info(struct dmar_domain *domain,
2478                                struct device *dev, int translation)
2479 {
2480         struct dmar_domain *ndomain;
2481         struct intel_iommu *iommu;
2482         u8 bus, devfn;
2483         int ret;
2484
2485         iommu = device_to_iommu(dev, &bus, &devfn);
2486         if (!iommu)
2487                 return -ENODEV;
2488
2489         ndomain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2490         if (ndomain != domain)
2491                 return -EBUSY;
2492
2493         ret = domain_context_mapping(domain, dev, translation);
2494         if (ret) {
2495                 domain_remove_one_dev_info(domain, dev);
2496                 return ret;
2497         }
2498
2499         return 0;
2500 }
2501
2502 static bool device_has_rmrr(struct device *dev)
2503 {
2504         struct dmar_rmrr_unit *rmrr;
2505         struct device *tmp;
2506         int i;
2507
2508         rcu_read_lock();
2509         for_each_rmrr_units(rmrr) {
2510                 /*
2511                  * Return TRUE if this RMRR contains the device that
2512                  * is passed in.
2513                  */
2514                 for_each_active_dev_scope(rmrr->devices,
2515                                           rmrr->devices_cnt, i, tmp)
2516                         if (tmp == dev) {
2517                                 rcu_read_unlock();
2518                                 return true;
2519                         }
2520         }
2521         rcu_read_unlock();
2522         return false;
2523 }
2524
2525 static int iommu_should_identity_map(struct device *dev, int startup)
2526 {
2527
2528         if (dev_is_pci(dev)) {
2529                 struct pci_dev *pdev = to_pci_dev(dev);
2530
2531                 /*
2532                  * We want to prevent any device associated with an RMRR from
2533                  * getting placed into the SI Domain. This is done because
2534                  * problems exist when devices are moved in and out of domains
2535                  * and their respective RMRR info is lost. We exempt USB devices
2536                  * from this process due to their usage of RMRRs that are known
2537                  * to not be needed after BIOS hand-off to OS.
2538                  */
2539                 if (device_has_rmrr(dev) &&
2540                     (pdev->class >> 8) != PCI_CLASS_SERIAL_USB)
2541                         return 0;
2542
2543                 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2544                         return 1;
2545
2546                 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2547                         return 1;
2548
2549                 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2550                         return 0;
2551
2552                 /*
2553                  * We want to start off with all devices in the 1:1 domain, and
2554                  * take them out later if we find they can't access all of memory.
2555                  *
2556                  * However, we can't do this for PCI devices behind bridges,
2557                  * because all PCI devices behind the same bridge will end up
2558                  * with the same source-id on their transactions.
2559                  *
2560                  * Practically speaking, we can't change things around for these
2561                  * devices at run-time, because we can't be sure there'll be no
2562                  * DMA transactions in flight for any of their siblings.
2563                  *
2564                  * So PCI devices (unless they're on the root bus) as well as
2565                  * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2566                  * the 1:1 domain, just in _case_ one of their siblings turns out
2567                  * not to be able to map all of memory.
2568                  */
2569                 if (!pci_is_pcie(pdev)) {
2570                         if (!pci_is_root_bus(pdev->bus))
2571                                 return 0;
2572                         if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2573                                 return 0;
2574                 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2575                         return 0;
2576         } else {
2577                 if (device_has_rmrr(dev))
2578                         return 0;
2579         }
2580
2581         /*
2582          * At boot time, we don't yet know if devices will be 64-bit capable.
2583          * Assume that they will — if they turn out not to be, then we can
2584          * take them out of the 1:1 domain later.
2585          */
2586         if (!startup) {
2587                 /*
2588                  * If the device's dma_mask is less than the system's memory
2589                  * size then this is not a candidate for identity mapping.
2590                  */
2591                 u64 dma_mask = *dev->dma_mask;
2592
2593                 if (dev->coherent_dma_mask &&
2594                     dev->coherent_dma_mask < dma_mask)
2595                         dma_mask = dev->coherent_dma_mask;
2596
2597                 return dma_mask >= dma_get_required_mask(dev);
2598         }
2599
2600         return 1;
2601 }
2602
2603 static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2604 {
2605         int ret;
2606
2607         if (!iommu_should_identity_map(dev, 1))
2608                 return 0;
2609
2610         ret = domain_add_dev_info(si_domain, dev,
2611                                   hw ? CONTEXT_TT_PASS_THROUGH :
2612                                        CONTEXT_TT_MULTI_LEVEL);
2613         if (!ret)
2614                 pr_info("IOMMU: %s identity mapping for device %s\n",
2615                         hw ? "hardware" : "software", dev_name(dev));
2616         else if (ret == -ENODEV)
2617                 /* device not associated with an iommu */
2618                 ret = 0;
2619
2620         return ret;
2621 }
2622
2623
2624 static int __init iommu_prepare_static_identity_mapping(int hw)
2625 {
2626         struct pci_dev *pdev = NULL;
2627         struct dmar_drhd_unit *drhd;
2628         struct intel_iommu *iommu;
2629         struct device *dev;
2630         int i;
2631         int ret = 0;
2632
2633         ret = si_domain_init(hw);
2634         if (ret)
2635                 return -EFAULT;
2636
2637         for_each_pci_dev(pdev) {
2638                 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2639                 if (ret)
2640                         return ret;
2641         }
2642
2643         for_each_active_iommu(iommu, drhd)
2644                 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2645                         struct acpi_device_physical_node *pn;
2646                         struct acpi_device *adev;
2647
2648                         if (dev->bus != &acpi_bus_type)
2649                                 continue;
2650                                 
2651                         adev= to_acpi_device(dev);
2652                         mutex_lock(&adev->physical_node_lock);
2653                         list_for_each_entry(pn, &adev->physical_node_list, node) {
2654                                 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2655                                 if (ret)
2656                                         break;
2657                         }
2658                         mutex_unlock(&adev->physical_node_lock);
2659                         if (ret)
2660                                 return ret;
2661                 }
2662
2663         return 0;
2664 }
2665
2666 static int __init init_dmars(void)
2667 {
2668         struct dmar_drhd_unit *drhd;
2669         struct dmar_rmrr_unit *rmrr;
2670         struct device *dev;
2671         struct intel_iommu *iommu;
2672         int i, ret;
2673
2674         /*
2675          * for each drhd
2676          *    allocate root
2677          *    initialize and program root entry to not present
2678          * endfor
2679          */
2680         for_each_drhd_unit(drhd) {
2681                 /*
2682                  * lock not needed as this is only incremented in the single
2683                  * threaded kernel __init code path all other access are read
2684                  * only
2685                  */
2686                 if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) {
2687                         g_num_of_iommus++;
2688                         continue;
2689                 }
2690                 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2691                           IOMMU_UNITS_SUPPORTED);
2692         }
2693
2694         g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2695                         GFP_KERNEL);
2696         if (!g_iommus) {
2697                 printk(KERN_ERR "Allocating global iommu array failed\n");
2698                 ret = -ENOMEM;
2699                 goto error;
2700         }
2701
2702         deferred_flush = kzalloc(g_num_of_iommus *
2703                 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2704         if (!deferred_flush) {
2705                 ret = -ENOMEM;
2706                 goto free_g_iommus;
2707         }
2708
2709         for_each_active_iommu(iommu, drhd) {
2710                 g_iommus[iommu->seq_id] = iommu;
2711
2712                 ret = iommu_init_domains(iommu);
2713                 if (ret)
2714                         goto free_iommu;
2715
2716                 /*
2717                  * TBD:
2718                  * we could share the same root & context tables
2719                  * among all IOMMU's. Need to Split it later.
2720                  */
2721                 ret = iommu_alloc_root_entry(iommu);
2722                 if (ret) {
2723                         printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2724                         goto free_iommu;
2725                 }
2726                 if (!ecap_pass_through(iommu->ecap))
2727                         hw_pass_through = 0;
2728         }
2729
2730         /*
2731          * Start from the sane iommu hardware state.
2732          */
2733         for_each_active_iommu(iommu, drhd) {
2734                 /*
2735                  * If the queued invalidation is already initialized by us
2736                  * (for example, while enabling interrupt-remapping) then
2737                  * we got the things already rolling from a sane state.
2738                  */
2739                 if (iommu->qi)
2740                         continue;
2741
2742                 /*
2743                  * Clear any previous faults.
2744                  */
2745                 dmar_fault(-1, iommu);
2746                 /*
2747                  * Disable queued invalidation if supported and already enabled
2748                  * before OS handover.
2749                  */
2750                 dmar_disable_qi(iommu);
2751         }
2752
2753         for_each_active_iommu(iommu, drhd) {
2754                 if (dmar_enable_qi(iommu)) {
2755                         /*
2756                          * Queued Invalidate not enabled, use Register Based
2757                          * Invalidate
2758                          */
2759                         iommu->flush.flush_context = __iommu_flush_context;
2760                         iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2761                         printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
2762                                "invalidation\n",
2763                                 iommu->seq_id,
2764                                (unsigned long long)drhd->reg_base_addr);
2765                 } else {
2766                         iommu->flush.flush_context = qi_flush_context;
2767                         iommu->flush.flush_iotlb = qi_flush_iotlb;
2768                         printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
2769                                "invalidation\n",
2770                                 iommu->seq_id,
2771                                (unsigned long long)drhd->reg_base_addr);
2772                 }
2773         }
2774
2775         if (iommu_pass_through)
2776                 iommu_identity_mapping |= IDENTMAP_ALL;
2777
2778 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
2779         iommu_identity_mapping |= IDENTMAP_GFX;
2780 #endif
2781
2782         check_tylersburg_isoch();
2783
2784         /*
2785          * If pass through is not set or not enabled, setup context entries for
2786          * identity mappings for rmrr, gfx, and isa and may fall back to static
2787          * identity mapping if iommu_identity_mapping is set.
2788          */
2789         if (iommu_identity_mapping) {
2790                 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2791                 if (ret) {
2792                         printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2793                         goto free_iommu;
2794                 }
2795         }
2796         /*
2797          * For each rmrr
2798          *   for each dev attached to rmrr
2799          *   do
2800          *     locate drhd for dev, alloc domain for dev
2801          *     allocate free domain
2802          *     allocate page table entries for rmrr
2803          *     if context not allocated for bus
2804          *           allocate and init context
2805          *           set present in root table for this bus
2806          *     init context with domain, translation etc
2807          *    endfor
2808          * endfor
2809          */
2810         printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2811         for_each_rmrr_units(rmrr) {
2812                 /* some BIOS lists non-exist devices in DMAR table. */
2813                 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2814                                           i, dev) {
2815                         ret = iommu_prepare_rmrr_dev(rmrr, dev);
2816                         if (ret)
2817                                 printk(KERN_ERR
2818                                        "IOMMU: mapping reserved region failed\n");
2819                 }
2820         }
2821
2822         iommu_prepare_isa();
2823
2824         /*
2825          * for each drhd
2826          *   enable fault log
2827          *   global invalidate context cache
2828          *   global invalidate iotlb
2829          *   enable translation
2830          */
2831         for_each_iommu(iommu, drhd) {
2832                 if (drhd->ignored) {
2833                         /*
2834                          * we always have to disable PMRs or DMA may fail on
2835                          * this device
2836                          */
2837                         if (force_on)
2838                                 iommu_disable_protect_mem_regions(iommu);
2839                         continue;
2840                 }
2841
2842                 iommu_flush_write_buffer(iommu);
2843
2844                 ret = dmar_set_interrupt(iommu);
2845                 if (ret)
2846                         goto free_iommu;
2847
2848                 iommu_set_root_entry(iommu);
2849
2850                 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2851                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2852
2853                 ret = iommu_enable_translation(iommu);
2854                 if (ret)
2855                         goto free_iommu;
2856
2857                 iommu_disable_protect_mem_regions(iommu);
2858         }
2859
2860         return 0;
2861
2862 free_iommu:
2863         for_each_active_iommu(iommu, drhd)
2864                 free_dmar_iommu(iommu);
2865         kfree(deferred_flush);
2866 free_g_iommus:
2867         kfree(g_iommus);
2868 error:
2869         return ret;
2870 }
2871
2872 /* This takes a number of _MM_ pages, not VTD pages */
2873 static struct iova *intel_alloc_iova(struct device *dev,
2874                                      struct dmar_domain *domain,
2875                                      unsigned long nrpages, uint64_t dma_mask)
2876 {
2877         struct iova *iova = NULL;
2878
2879         /* Restrict dma_mask to the width that the iommu can handle */
2880         dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2881
2882         if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2883                 /*
2884                  * First try to allocate an io virtual address in
2885                  * DMA_BIT_MASK(32) and if that fails then try allocating
2886                  * from higher range
2887                  */
2888                 iova = alloc_iova(&domain->iovad, nrpages,
2889                                   IOVA_PFN(DMA_BIT_MASK(32)), 1);
2890                 if (iova)
2891                         return iova;
2892         }
2893         iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2894         if (unlikely(!iova)) {
2895                 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2896                        nrpages, dev_name(dev));
2897                 return NULL;
2898         }
2899
2900         return iova;
2901 }
2902
2903 static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
2904 {
2905         struct dmar_domain *domain;
2906         int ret;
2907
2908         domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2909         if (!domain) {
2910                 printk(KERN_ERR "Allocating domain for %s failed",
2911                        dev_name(dev));
2912                 return NULL;
2913         }
2914
2915         /* make sure context mapping is ok */
2916         if (unlikely(!domain_context_mapped(dev))) {
2917                 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
2918                 if (ret) {
2919                         printk(KERN_ERR "Domain context map for %s failed",
2920                                dev_name(dev));
2921                         return NULL;
2922                 }
2923         }
2924
2925         return domain;
2926 }
2927
2928 static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
2929 {
2930         struct device_domain_info *info;
2931
2932         /* No lock here, assumes no domain exit in normal case */
2933         info = dev->archdata.iommu;
2934         if (likely(info))
2935                 return info->domain;
2936
2937         return __get_valid_domain_for_dev(dev);
2938 }
2939
2940 static int iommu_dummy(struct device *dev)
2941 {
2942         return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2943 }
2944
2945 /* Check if the dev needs to go through non-identity map and unmap process.*/
2946 static int iommu_no_mapping(struct device *dev)
2947 {
2948         int found;
2949
2950         if (iommu_dummy(dev))
2951                 return 1;
2952
2953         if (!iommu_identity_mapping)
2954                 return 0;
2955
2956         found = identity_mapping(dev);
2957         if (found) {
2958                 if (iommu_should_identity_map(dev, 0))
2959                         return 1;
2960                 else {
2961                         /*
2962                          * 32 bit DMA is removed from si_domain and fall back
2963                          * to non-identity mapping.
2964                          */
2965                         domain_remove_one_dev_info(si_domain, dev);
2966                         printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2967                                dev_name(dev));
2968                         return 0;
2969                 }
2970         } else {
2971                 /*
2972                  * In case of a detached 64 bit DMA device from vm, the device
2973                  * is put into si_domain for identity mapping.
2974                  */
2975                 if (iommu_should_identity_map(dev, 0)) {
2976                         int ret;
2977                         ret = domain_add_dev_info(si_domain, dev,
2978                                                   hw_pass_through ?
2979                                                   CONTEXT_TT_PASS_THROUGH :
2980                                                   CONTEXT_TT_MULTI_LEVEL);
2981                         if (!ret) {
2982                                 printk(KERN_INFO "64bit %s uses identity mapping\n",
2983                                        dev_name(dev));
2984                                 return 1;
2985                         }
2986                 }
2987         }
2988
2989         return 0;
2990 }
2991
2992 static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
2993                                      size_t size, int dir, u64 dma_mask)
2994 {
2995         struct dmar_domain *domain;
2996         phys_addr_t start_paddr;
2997         struct iova *iova;
2998         int prot = 0;
2999         int ret;
3000         struct intel_iommu *iommu;
3001         unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3002
3003         BUG_ON(dir == DMA_NONE);
3004
3005         if (iommu_no_mapping(dev))
3006                 return paddr;
3007
3008         domain = get_valid_domain_for_dev(dev);
3009         if (!domain)
3010                 return 0;
3011
3012         iommu = domain_get_iommu(domain);
3013         size = aligned_nrpages(paddr, size);
3014
3015         iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3016         if (!iova)
3017                 goto error;
3018
3019         /*
3020          * Check if DMAR supports zero-length reads on write only
3021          * mappings..
3022          */
3023         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3024                         !cap_zlr(iommu->cap))
3025                 prot |= DMA_PTE_READ;
3026         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3027                 prot |= DMA_PTE_WRITE;
3028         /*
3029          * paddr - (paddr + size) might be partial page, we should map the whole
3030          * page.  Note: if two part of one page are separately mapped, we
3031          * might have two guest_addr mapping to the same host paddr, but this
3032          * is not a big problem
3033          */
3034         ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
3035                                  mm_to_dma_pfn(paddr_pfn), size, prot);
3036         if (ret)
3037                 goto error;
3038
3039         /* it's a non-present to present mapping. Only flush if caching mode */
3040         if (cap_caching_mode(iommu->cap))
3041                 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1);
3042         else
3043                 iommu_flush_write_buffer(iommu);
3044
3045         start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3046         start_paddr += paddr & ~PAGE_MASK;
3047         return start_paddr;
3048
3049 error:
3050         if (iova)
3051                 __free_iova(&domain->iovad, iova);
3052         printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
3053                 dev_name(dev), size, (unsigned long long)paddr, dir);
3054         return 0;
3055 }
3056
3057 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3058                                  unsigned long offset, size_t size,
3059                                  enum dma_data_direction dir,
3060                                  struct dma_attrs *attrs)
3061 {
3062         return __intel_map_single(dev, page_to_phys(page) + offset, size,
3063                                   dir, *dev->dma_mask);
3064 }
3065
3066 static void flush_unmaps(void)
3067 {
3068         int i, j;
3069
3070         timer_on = 0;
3071
3072         /* just flush them all */
3073         for (i = 0; i < g_num_of_iommus; i++) {
3074                 struct intel_iommu *iommu = g_iommus[i];
3075                 if (!iommu)
3076                         continue;
3077
3078                 if (!deferred_flush[i].next)
3079                         continue;
3080
3081                 /* In caching mode, global flushes turn emulation expensive */
3082                 if (!cap_caching_mode(iommu->cap))
3083                         iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3084                                          DMA_TLB_GLOBAL_FLUSH);
3085                 for (j = 0; j < deferred_flush[i].next; j++) {
3086                         unsigned long mask;
3087                         struct iova *iova = deferred_flush[i].iova[j];
3088                         struct dmar_domain *domain = deferred_flush[i].domain[j];
3089
3090                         /* On real hardware multiple invalidations are expensive */
3091                         if (cap_caching_mode(iommu->cap))
3092                                 iommu_flush_iotlb_psi(iommu, domain->id,
3093                                         iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1,
3094                                         !deferred_flush[i].freelist[j], 0);
3095                         else {
3096                                 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
3097                                 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3098                                                 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3099                         }
3100                         __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
3101                         if (deferred_flush[i].freelist[j])
3102                                 dma_free_pagelist(deferred_flush[i].freelist[j]);
3103                 }
3104                 deferred_flush[i].next = 0;
3105         }
3106
3107         list_size = 0;
3108 }
3109
3110 static void flush_unmaps_timeout(unsigned long data)
3111 {
3112         unsigned long flags;
3113
3114         spin_lock_irqsave(&async_umap_flush_lock, flags);
3115         flush_unmaps();
3116         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3117 }
3118
3119 static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
3120 {
3121         unsigned long flags;
3122         int next, iommu_id;
3123         struct intel_iommu *iommu;
3124
3125         spin_lock_irqsave(&async_umap_flush_lock, flags);
3126         if (list_size == HIGH_WATER_MARK)
3127                 flush_unmaps();
3128
3129         iommu = domain_get_iommu(dom);
3130         iommu_id = iommu->seq_id;
3131
3132         next = deferred_flush[iommu_id].next;
3133         deferred_flush[iommu_id].domain[next] = dom;
3134         deferred_flush[iommu_id].iova[next] = iova;
3135         deferred_flush[iommu_id].freelist[next] = freelist;
3136         deferred_flush[iommu_id].next++;
3137
3138         if (!timer_on) {
3139                 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3140                 timer_on = 1;
3141         }
3142         list_size++;
3143         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3144 }
3145
3146 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3147                              size_t size, enum dma_data_direction dir,
3148                              struct dma_attrs *attrs)
3149 {
3150         struct dmar_domain *domain;
3151         unsigned long start_pfn, last_pfn;
3152         struct iova *iova;
3153         struct intel_iommu *iommu;
3154         struct page *freelist;
3155
3156         if (iommu_no_mapping(dev))
3157                 return;
3158
3159         domain = find_domain(dev);
3160         BUG_ON(!domain);
3161
3162         iommu = domain_get_iommu(domain);
3163
3164         iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
3165         if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3166                       (unsigned long long)dev_addr))
3167                 return;
3168
3169         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3170         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3171
3172         pr_debug("Device %s unmapping: pfn %lx-%lx\n",
3173                  dev_name(dev), start_pfn, last_pfn);
3174
3175         freelist = domain_unmap(domain, start_pfn, last_pfn);
3176
3177         if (intel_iommu_strict) {
3178                 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
3179                                       last_pfn - start_pfn + 1, !freelist, 0);
3180                 /* free iova */
3181                 __free_iova(&domain->iovad, iova);
3182                 dma_free_pagelist(freelist);
3183         } else {
3184                 add_unmap(domain, iova, freelist);
3185                 /*
3186                  * queue up the release of the unmap to save the 1/6th of the
3187                  * cpu used up by the iotlb flush operation...
3188                  */
3189         }
3190 }
3191
3192 static void *intel_alloc_coherent(struct device *dev, size_t size,
3193                                   dma_addr_t *dma_handle, gfp_t flags,
3194                                   struct dma_attrs *attrs)
3195 {
3196         void *vaddr;
3197         int order;
3198
3199         size = PAGE_ALIGN(size);
3200         order = get_order(size);
3201
3202         if (!iommu_no_mapping(dev))
3203                 flags &= ~(GFP_DMA | GFP_DMA32);
3204         else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3205                 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
3206                         flags |= GFP_DMA;
3207                 else
3208                         flags |= GFP_DMA32;
3209         }
3210
3211         vaddr = (void *)__get_free_pages(flags, order);
3212         if (!vaddr)
3213                 return NULL;
3214         memset(vaddr, 0, size);
3215
3216         *dma_handle = __intel_map_single(dev, virt_to_bus(vaddr), size,
3217                                          DMA_BIDIRECTIONAL,
3218                                          dev->coherent_dma_mask);
3219         if (*dma_handle)
3220                 return vaddr;
3221         free_pages((unsigned long)vaddr, order);
3222         return NULL;
3223 }
3224
3225 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3226                                 dma_addr_t dma_handle, struct dma_attrs *attrs)
3227 {
3228         int order;
3229
3230         size = PAGE_ALIGN(size);
3231         order = get_order(size);
3232
3233         intel_unmap_page(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
3234         free_pages((unsigned long)vaddr, order);
3235 }
3236
3237 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3238                            int nelems, enum dma_data_direction dir,
3239                            struct dma_attrs *attrs)
3240 {
3241         struct dmar_domain *domain;
3242         unsigned long start_pfn, last_pfn;
3243         struct iova *iova;
3244         struct intel_iommu *iommu;
3245         struct page *freelist;
3246
3247         if (iommu_no_mapping(dev))
3248                 return;
3249
3250         domain = find_domain(dev);
3251         BUG_ON(!domain);
3252
3253         iommu = domain_get_iommu(domain);
3254
3255         iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
3256         if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
3257                       (unsigned long long)sglist[0].dma_address))
3258                 return;
3259
3260         start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3261         last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3262
3263         freelist = domain_unmap(domain, start_pfn, last_pfn);
3264
3265         if (intel_iommu_strict) {
3266                 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
3267                                       last_pfn - start_pfn + 1, !freelist, 0);
3268                 /* free iova */
3269                 __free_iova(&domain->iovad, iova);
3270                 dma_free_pagelist(freelist);
3271         } else {
3272                 add_unmap(domain, iova, freelist);
3273                 /*
3274                  * queue up the release of the unmap to save the 1/6th of the
3275                  * cpu used up by the iotlb flush operation...
3276                  */
3277         }
3278 }
3279
3280 static int intel_nontranslate_map_sg(struct device *hddev,
3281         struct scatterlist *sglist, int nelems, int dir)
3282 {
3283         int i;
3284         struct scatterlist *sg;
3285
3286         for_each_sg(sglist, sg, nelems, i) {
3287                 BUG_ON(!sg_page(sg));
3288                 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3289                 sg->dma_length = sg->length;
3290         }
3291         return nelems;
3292 }
3293
3294 static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3295                         enum dma_data_direction dir, struct dma_attrs *attrs)
3296 {
3297         int i;
3298         struct dmar_domain *domain;
3299         size_t size = 0;
3300         int prot = 0;
3301         struct iova *iova = NULL;
3302         int ret;
3303         struct scatterlist *sg;
3304         unsigned long start_vpfn;
3305         struct intel_iommu *iommu;
3306
3307         BUG_ON(dir == DMA_NONE);
3308         if (iommu_no_mapping(dev))
3309                 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
3310
3311         domain = get_valid_domain_for_dev(dev);
3312         if (!domain)
3313                 return 0;
3314
3315         iommu = domain_get_iommu(domain);
3316
3317         for_each_sg(sglist, sg, nelems, i)
3318                 size += aligned_nrpages(sg->offset, sg->length);
3319
3320         iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3321                                 *dev->dma_mask);
3322         if (!iova) {
3323                 sglist->dma_length = 0;
3324                 return 0;
3325         }
3326
3327         /*
3328          * Check if DMAR supports zero-length reads on write only
3329          * mappings..
3330          */
3331         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3332                         !cap_zlr(iommu->cap))
3333                 prot |= DMA_PTE_READ;
3334         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3335                 prot |= DMA_PTE_WRITE;
3336
3337         start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3338
3339         ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3340         if (unlikely(ret)) {
3341                 /*  clear the page */
3342                 dma_pte_clear_range(domain, start_vpfn,
3343                                     start_vpfn + size - 1);
3344                 /* free page tables */
3345                 dma_pte_free_pagetable(domain, start_vpfn,
3346                                        start_vpfn + size - 1);
3347                 /* free iova */
3348                 __free_iova(&domain->iovad, iova);
3349                 return 0;
3350         }
3351
3352         /* it's a non-present to present mapping. Only flush if caching mode */
3353         if (cap_caching_mode(iommu->cap))
3354                 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1);
3355         else
3356                 iommu_flush_write_buffer(iommu);
3357
3358         return nelems;
3359 }
3360
3361 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3362 {
3363         return !dma_addr;
3364 }
3365
3366 struct dma_map_ops intel_dma_ops = {
3367         .alloc = intel_alloc_coherent,
3368         .free = intel_free_coherent,
3369         .map_sg = intel_map_sg,
3370         .unmap_sg = intel_unmap_sg,
3371         .map_page = intel_map_page,
3372         .unmap_page = intel_unmap_page,
3373         .mapping_error = intel_mapping_error,
3374 };
3375
3376 static inline int iommu_domain_cache_init(void)
3377 {
3378         int ret = 0;
3379
3380         iommu_domain_cache = kmem_cache_create("iommu_domain",
3381                                          sizeof(struct dmar_domain),
3382                                          0,
3383                                          SLAB_HWCACHE_ALIGN,
3384
3385                                          NULL);
3386         if (!iommu_domain_cache) {
3387                 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3388                 ret = -ENOMEM;
3389         }
3390
3391         return ret;
3392 }
3393
3394 static inline int iommu_devinfo_cache_init(void)
3395 {
3396         int ret = 0;
3397
3398         iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3399                                          sizeof(struct device_domain_info),
3400                                          0,
3401                                          SLAB_HWCACHE_ALIGN,
3402                                          NULL);
3403         if (!iommu_devinfo_cache) {
3404                 printk(KERN_ERR "Couldn't create devinfo cache\n");
3405                 ret = -ENOMEM;
3406         }
3407
3408         return ret;
3409 }
3410
3411 static inline int iommu_iova_cache_init(void)
3412 {
3413         int ret = 0;
3414
3415         iommu_iova_cache = kmem_cache_create("iommu_iova",
3416                                          sizeof(struct iova),
3417                                          0,
3418                                          SLAB_HWCACHE_ALIGN,
3419                                          NULL);
3420         if (!iommu_iova_cache) {
3421                 printk(KERN_ERR "Couldn't create iova cache\n");
3422                 ret = -ENOMEM;
3423         }
3424
3425         return ret;
3426 }
3427
3428 static int __init iommu_init_mempool(void)
3429 {
3430         int ret;
3431         ret = iommu_iova_cache_init();
3432         if (ret)
3433                 return ret;
3434
3435         ret = iommu_domain_cache_init();
3436         if (ret)
3437                 goto domain_error;
3438
3439         ret = iommu_devinfo_cache_init();
3440         if (!ret)
3441                 return ret;
3442
3443         kmem_cache_destroy(iommu_domain_cache);
3444 domain_error:
3445         kmem_cache_destroy(iommu_iova_cache);
3446
3447         return -ENOMEM;
3448 }
3449
3450 static void __init iommu_exit_mempool(void)
3451 {
3452         kmem_cache_destroy(iommu_devinfo_cache);
3453         kmem_cache_destroy(iommu_domain_cache);
3454         kmem_cache_destroy(iommu_iova_cache);
3455
3456 }
3457
3458 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3459 {
3460         struct dmar_drhd_unit *drhd;
3461         u32 vtbar;
3462         int rc;
3463
3464         /* We know that this device on this chipset has its own IOMMU.
3465          * If we find it under a different IOMMU, then the BIOS is lying
3466          * to us. Hope that the IOMMU for this device is actually
3467          * disabled, and it needs no translation...
3468          */
3469         rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3470         if (rc) {
3471                 /* "can't" happen */
3472                 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3473                 return;
3474         }
3475         vtbar &= 0xffff0000;
3476
3477         /* we know that the this iommu should be at offset 0xa000 from vtbar */
3478         drhd = dmar_find_matched_drhd_unit(pdev);
3479         if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3480                             TAINT_FIRMWARE_WORKAROUND,
3481                             "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3482                 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3483 }
3484 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3485
3486 static void __init init_no_remapping_devices(void)
3487 {
3488         struct dmar_drhd_unit *drhd;
3489         struct device *dev;
3490         int i;
3491
3492         for_each_drhd_unit(drhd) {
3493                 if (!drhd->include_all) {
3494                         for_each_active_dev_scope(drhd->devices,
3495                                                   drhd->devices_cnt, i, dev)
3496                                 break;
3497                         /* ignore DMAR unit if no devices exist */
3498                         if (i == drhd->devices_cnt)
3499                                 drhd->ignored = 1;
3500                 }
3501         }
3502
3503         for_each_active_drhd_unit(drhd) {
3504                 if (drhd->include_all)
3505                         continue;
3506
3507                 for_each_active_dev_scope(drhd->devices,
3508                                           drhd->devices_cnt, i, dev)
3509                         if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
3510                                 break;
3511                 if (i < drhd->devices_cnt)
3512                         continue;
3513
3514                 /* This IOMMU has *only* gfx devices. Either bypass it or
3515                    set the gfx_mapped flag, as appropriate */
3516                 if (dmar_map_gfx) {
3517                         intel_iommu_gfx_mapped = 1;
3518                 } else {
3519                         drhd->ignored = 1;
3520                         for_each_active_dev_scope(drhd->devices,
3521                                                   drhd->devices_cnt, i, dev)
3522                                 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3523                 }
3524         }
3525 }
3526
3527 #ifdef CONFIG_SUSPEND
3528 static int init_iommu_hw(void)
3529 {
3530         struct dmar_drhd_unit *drhd;
3531         struct intel_iommu *iommu = NULL;
3532
3533         for_each_active_iommu(iommu, drhd)
3534                 if (iommu->qi)
3535                         dmar_reenable_qi(iommu);
3536
3537         for_each_iommu(iommu, drhd) {
3538                 if (drhd->ignored) {
3539                         /*
3540                          * we always have to disable PMRs or DMA may fail on
3541                          * this device
3542                          */
3543                         if (force_on)
3544                                 iommu_disable_protect_mem_regions(iommu);
3545                         continue;
3546                 }
3547         
3548                 iommu_flush_write_buffer(iommu);
3549
3550                 iommu_set_root_entry(iommu);
3551
3552                 iommu->flush.flush_context(iommu, 0, 0, 0,
3553                                            DMA_CCMD_GLOBAL_INVL);
3554                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3555                                          DMA_TLB_GLOBAL_FLUSH);
3556                 if (iommu_enable_translation(iommu))
3557                         return 1;
3558                 iommu_disable_protect_mem_regions(iommu);
3559         }
3560
3561         return 0;
3562 }
3563
3564 static void iommu_flush_all(void)
3565 {
3566         struct dmar_drhd_unit *drhd;
3567         struct intel_iommu *iommu;
3568
3569         for_each_active_iommu(iommu, drhd) {
3570                 iommu->flush.flush_context(iommu, 0, 0, 0,
3571                                            DMA_CCMD_GLOBAL_INVL);
3572                 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3573                                          DMA_TLB_GLOBAL_FLUSH);
3574         }
3575 }
3576
3577 static int iommu_suspend(void)
3578 {
3579         struct dmar_drhd_unit *drhd;
3580         struct intel_iommu *iommu = NULL;
3581         unsigned long flag;
3582
3583         for_each_active_iommu(iommu, drhd) {
3584                 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3585                                                  GFP_ATOMIC);
3586                 if (!iommu->iommu_state)
3587                         goto nomem;
3588         }
3589
3590         iommu_flush_all();
3591
3592         for_each_active_iommu(iommu, drhd) {
3593                 iommu_disable_translation(iommu);
3594
3595                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3596
3597                 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3598                         readl(iommu->reg + DMAR_FECTL_REG);
3599                 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3600                         readl(iommu->reg + DMAR_FEDATA_REG);
3601                 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3602                         readl(iommu->reg + DMAR_FEADDR_REG);
3603                 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3604                         readl(iommu->reg + DMAR_FEUADDR_REG);
3605
3606                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3607         }
3608         return 0;
3609
3610 nomem:
3611         for_each_active_iommu(iommu, drhd)
3612                 kfree(iommu->iommu_state);
3613
3614         return -ENOMEM;
3615 }
3616
3617 static void iommu_resume(void)
3618 {
3619         struct dmar_drhd_unit *drhd;
3620         struct intel_iommu *iommu = NULL;
3621         unsigned long flag;
3622
3623         if (init_iommu_hw()) {
3624                 if (force_on)
3625                         panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3626                 else
3627                         WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3628                 return;
3629         }
3630
3631         for_each_active_iommu(iommu, drhd) {
3632
3633                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3634
3635                 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3636                         iommu->reg + DMAR_FECTL_REG);
3637                 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3638                         iommu->reg + DMAR_FEDATA_REG);
3639                 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3640                         iommu->reg + DMAR_FEADDR_REG);
3641                 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3642                         iommu->reg + DMAR_FEUADDR_REG);
3643
3644                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3645         }
3646
3647         for_each_active_iommu(iommu, drhd)
3648                 kfree(iommu->iommu_state);
3649 }
3650
3651 static struct syscore_ops iommu_syscore_ops = {
3652         .resume         = iommu_resume,
3653         .suspend        = iommu_suspend,
3654 };
3655
3656 static void __init init_iommu_pm_ops(void)
3657 {
3658         register_syscore_ops(&iommu_syscore_ops);
3659 }
3660
3661 #else
3662 static inline void init_iommu_pm_ops(void) {}
3663 #endif  /* CONFIG_PM */
3664
3665
3666 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
3667 {
3668         struct acpi_dmar_reserved_memory *rmrr;
3669         struct dmar_rmrr_unit *rmrru;
3670
3671         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3672         if (!rmrru)
3673                 return -ENOMEM;
3674
3675         rmrru->hdr = header;
3676         rmrr = (struct acpi_dmar_reserved_memory *)header;
3677         rmrru->base_address = rmrr->base_address;
3678         rmrru->end_address = rmrr->end_address;
3679         rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3680                                 ((void *)rmrr) + rmrr->header.length,
3681                                 &rmrru->devices_cnt);
3682         if (rmrru->devices_cnt && rmrru->devices == NULL) {
3683                 kfree(rmrru);
3684                 return -ENOMEM;
3685         }
3686
3687         list_add(&rmrru->list, &dmar_rmrr_units);
3688
3689         return 0;
3690 }
3691
3692 int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
3693 {
3694         struct acpi_dmar_atsr *atsr;
3695         struct dmar_atsr_unit *atsru;
3696
3697         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3698         atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
3699         if (!atsru)
3700                 return -ENOMEM;
3701
3702         atsru->hdr = hdr;
3703         atsru->include_all = atsr->flags & 0x1;
3704         if (!atsru->include_all) {
3705                 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
3706                                 (void *)atsr + atsr->header.length,
3707                                 &atsru->devices_cnt);
3708                 if (atsru->devices_cnt && atsru->devices == NULL) {
3709                         kfree(atsru);
3710                         return -ENOMEM;
3711                 }
3712         }
3713
3714         list_add_rcu(&atsru->list, &dmar_atsr_units);
3715
3716         return 0;
3717 }
3718
3719 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3720 {
3721         dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3722         kfree(atsru);
3723 }
3724
3725 static void intel_iommu_free_dmars(void)
3726 {
3727         struct dmar_rmrr_unit *rmrru, *rmrr_n;
3728         struct dmar_atsr_unit *atsru, *atsr_n;
3729
3730         list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3731                 list_del(&rmrru->list);
3732                 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3733                 kfree(rmrru);
3734         }
3735
3736         list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3737                 list_del(&atsru->list);
3738                 intel_iommu_free_atsr(atsru);
3739         }
3740 }
3741
3742 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3743 {
3744         int i, ret = 1;
3745         struct pci_bus *bus;
3746         struct pci_dev *bridge = NULL;
3747         struct device *tmp;
3748         struct acpi_dmar_atsr *atsr;
3749         struct dmar_atsr_unit *atsru;
3750
3751         dev = pci_physfn(dev);
3752         for (bus = dev->bus; bus; bus = bus->parent) {
3753                 bridge = bus->self;
3754                 if (!bridge || !pci_is_pcie(bridge) ||
3755                     pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
3756                         return 0;
3757                 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
3758                         break;
3759         }
3760         if (!bridge)
3761                 return 0;
3762
3763         rcu_read_lock();
3764         list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3765                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3766                 if (atsr->segment != pci_domain_nr(dev->bus))
3767                         continue;
3768
3769                 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
3770                         if (tmp == &bridge->dev)
3771                                 goto out;
3772
3773                 if (atsru->include_all)
3774                         goto out;
3775         }
3776         ret = 0;
3777 out:
3778         rcu_read_unlock();
3779
3780         return ret;
3781 }
3782
3783 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
3784 {
3785         int ret = 0;
3786         struct dmar_rmrr_unit *rmrru;
3787         struct dmar_atsr_unit *atsru;
3788         struct acpi_dmar_atsr *atsr;
3789         struct acpi_dmar_reserved_memory *rmrr;
3790
3791         if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
3792                 return 0;
3793
3794         list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
3795                 rmrr = container_of(rmrru->hdr,
3796                                     struct acpi_dmar_reserved_memory, header);
3797                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3798                         ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
3799                                 ((void *)rmrr) + rmrr->header.length,
3800                                 rmrr->segment, rmrru->devices,
3801                                 rmrru->devices_cnt);
3802                         if (ret > 0)
3803                                 break;
3804                         else if(ret < 0)
3805                                 return ret;
3806                 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3807                         if (dmar_remove_dev_scope(info, rmrr->segment,
3808                                 rmrru->devices, rmrru->devices_cnt))
3809                                 break;
3810                 }
3811         }
3812
3813         list_for_each_entry(atsru, &dmar_atsr_units, list) {
3814                 if (atsru->include_all)
3815                         continue;
3816
3817                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3818                 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3819                         ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
3820                                         (void *)atsr + atsr->header.length,
3821                                         atsr->segment, atsru->devices,
3822                                         atsru->devices_cnt);
3823                         if (ret > 0)
3824                                 break;
3825                         else if(ret < 0)
3826                                 return ret;
3827                 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3828                         if (dmar_remove_dev_scope(info, atsr->segment,
3829                                         atsru->devices, atsru->devices_cnt))
3830                                 break;
3831                 }
3832         }
3833
3834         return 0;
3835 }
3836
3837 /*
3838  * Here we only respond to action of unbound device from driver.
3839  *
3840  * Added device is not attached to its DMAR domain here yet. That will happen
3841  * when mapping the device to iova.
3842  */
3843 static int device_notifier(struct notifier_block *nb,
3844                                   unsigned long action, void *data)
3845 {
3846         struct device *dev = data;
3847         struct dmar_domain *domain;
3848
3849         if (iommu_dummy(dev))
3850                 return 0;
3851
3852         if (action != BUS_NOTIFY_UNBOUND_DRIVER &&
3853             action != BUS_NOTIFY_DEL_DEVICE)
3854                 return 0;
3855
3856         domain = find_domain(dev);
3857         if (!domain)
3858                 return 0;
3859
3860         down_read(&dmar_global_lock);
3861         domain_remove_one_dev_info(domain, dev);
3862         if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3863             !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3864             list_empty(&domain->devices))
3865                 domain_exit(domain);
3866         up_read(&dmar_global_lock);
3867
3868         return 0;
3869 }
3870
3871 static struct notifier_block device_nb = {
3872         .notifier_call = device_notifier,
3873 };
3874
3875 static int intel_iommu_memory_notifier(struct notifier_block *nb,
3876                                        unsigned long val, void *v)
3877 {
3878         struct memory_notify *mhp = v;
3879         unsigned long long start, end;
3880         unsigned long start_vpfn, last_vpfn;
3881
3882         switch (val) {
3883         case MEM_GOING_ONLINE:
3884                 start = mhp->start_pfn << PAGE_SHIFT;
3885                 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
3886                 if (iommu_domain_identity_map(si_domain, start, end)) {
3887                         pr_warn("dmar: failed to build identity map for [%llx-%llx]\n",
3888                                 start, end);
3889                         return NOTIFY_BAD;
3890                 }
3891                 break;
3892
3893         case MEM_OFFLINE:
3894         case MEM_CANCEL_ONLINE:
3895                 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
3896                 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
3897                 while (start_vpfn <= last_vpfn) {
3898                         struct iova *iova;
3899                         struct dmar_drhd_unit *drhd;
3900                         struct intel_iommu *iommu;
3901                         struct page *freelist;
3902
3903                         iova = find_iova(&si_domain->iovad, start_vpfn);
3904                         if (iova == NULL) {
3905                                 pr_debug("dmar: failed get IOVA for PFN %lx\n",
3906                                          start_vpfn);
3907                                 break;
3908                         }
3909
3910                         iova = split_and_remove_iova(&si_domain->iovad, iova,
3911                                                      start_vpfn, last_vpfn);
3912                         if (iova == NULL) {
3913                                 pr_warn("dmar: failed to split IOVA PFN [%lx-%lx]\n",
3914                                         start_vpfn, last_vpfn);
3915                                 return NOTIFY_BAD;
3916                         }
3917
3918                         freelist = domain_unmap(si_domain, iova->pfn_lo,
3919                                                iova->pfn_hi);
3920
3921                         rcu_read_lock();
3922                         for_each_active_iommu(iommu, drhd)
3923                                 iommu_flush_iotlb_psi(iommu, si_domain->id,
3924                                         iova->pfn_lo,
3925                                         iova->pfn_hi - iova->pfn_lo + 1,
3926                                         !freelist, 0);
3927                         rcu_read_unlock();
3928                         dma_free_pagelist(freelist);
3929
3930                         start_vpfn = iova->pfn_hi + 1;
3931                         free_iova_mem(iova);
3932                 }
3933                 break;
3934         }
3935
3936         return NOTIFY_OK;
3937 }
3938
3939 static struct notifier_block intel_iommu_memory_nb = {
3940         .notifier_call = intel_iommu_memory_notifier,
3941         .priority = 0
3942 };
3943
3944 int __init intel_iommu_init(void)
3945 {
3946         int ret = -ENODEV;
3947         struct dmar_drhd_unit *drhd;
3948         struct intel_iommu *iommu;
3949
3950         /* VT-d is required for a TXT/tboot launch, so enforce that */
3951         force_on = tboot_force_iommu();
3952
3953         if (iommu_init_mempool()) {
3954                 if (force_on)
3955                         panic("tboot: Failed to initialize iommu memory\n");
3956                 return -ENOMEM;
3957         }
3958
3959         down_write(&dmar_global_lock);
3960         if (dmar_table_init()) {
3961                 if (force_on)
3962                         panic("tboot: Failed to initialize DMAR table\n");
3963                 goto out_free_dmar;
3964         }
3965
3966         /*
3967          * Disable translation if already enabled prior to OS handover.
3968          */
3969         for_each_active_iommu(iommu, drhd)
3970                 if (iommu->gcmd & DMA_GCMD_TE)
3971                         iommu_disable_translation(iommu);
3972
3973         if (dmar_dev_scope_init() < 0) {
3974                 if (force_on)
3975                         panic("tboot: Failed to initialize DMAR device scope\n");
3976                 goto out_free_dmar;
3977         }
3978
3979         if (no_iommu || dmar_disabled)
3980                 goto out_free_dmar;
3981
3982         if (list_empty(&dmar_rmrr_units))
3983                 printk(KERN_INFO "DMAR: No RMRR found\n");
3984
3985         if (list_empty(&dmar_atsr_units))
3986                 printk(KERN_INFO "DMAR: No ATSR found\n");
3987
3988         if (dmar_init_reserved_ranges()) {
3989                 if (force_on)
3990                         panic("tboot: Failed to reserve iommu ranges\n");
3991                 goto out_free_reserved_range;
3992         }
3993
3994         init_no_remapping_devices();
3995
3996         ret = init_dmars();
3997         if (ret) {
3998                 if (force_on)
3999                         panic("tboot: Failed to initialize DMARs\n");
4000                 printk(KERN_ERR "IOMMU: dmar init failed\n");
4001                 goto out_free_reserved_range;
4002         }
4003         up_write(&dmar_global_lock);
4004         printk(KERN_INFO
4005         "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
4006
4007         init_timer(&unmap_timer);
4008 #ifdef CONFIG_SWIOTLB
4009         swiotlb = 0;
4010 #endif
4011         dma_ops = &intel_dma_ops;
4012
4013         init_iommu_pm_ops();
4014
4015         bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4016         bus_register_notifier(&pci_bus_type, &device_nb);
4017         if (si_domain && !hw_pass_through)
4018                 register_memory_notifier(&intel_iommu_memory_nb);
4019
4020         intel_iommu_enabled = 1;
4021
4022         return 0;
4023
4024 out_free_reserved_range:
4025         put_iova_domain(&reserved_iova_list);
4026 out_free_dmar:
4027         intel_iommu_free_dmars();
4028         up_write(&dmar_global_lock);
4029         iommu_exit_mempool();
4030         return ret;
4031 }
4032
4033 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
4034                                            struct device *dev)
4035 {
4036         struct pci_dev *tmp, *parent, *pdev;
4037
4038         if (!iommu || !dev || !dev_is_pci(dev))
4039                 return;
4040
4041         pdev = to_pci_dev(dev);
4042
4043         /* dependent device detach */
4044         tmp = pci_find_upstream_pcie_bridge(pdev);
4045         /* Secondary interface's bus number and devfn 0 */
4046         if (tmp) {
4047                 parent = pdev->bus->self;
4048                 while (parent != tmp) {
4049                         iommu_detach_dev(iommu, parent->bus->number,
4050                                          parent->devfn);
4051                         parent = parent->bus->self;
4052                 }
4053                 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
4054                         iommu_detach_dev(iommu,
4055                                 tmp->subordinate->number, 0);
4056                 else /* this is a legacy PCI bridge */
4057                         iommu_detach_dev(iommu, tmp->bus->number,
4058                                          tmp->devfn);
4059         }
4060 }
4061
4062 static void domain_remove_one_dev_info(struct dmar_domain *domain,
4063                                        struct device *dev)
4064 {
4065         struct device_domain_info *info, *tmp;
4066         struct intel_iommu *iommu;
4067         unsigned long flags;
4068         int found = 0;
4069         u8 bus, devfn;
4070
4071         iommu = device_to_iommu(dev, &bus, &devfn);
4072         if (!iommu)
4073                 return;
4074
4075         spin_lock_irqsave(&device_domain_lock, flags);
4076         list_for_each_entry_safe(info, tmp, &domain->devices, link) {
4077                 if (info->iommu == iommu && info->bus == bus &&
4078                     info->devfn == devfn) {
4079                         unlink_domain_info(info);
4080                         spin_unlock_irqrestore(&device_domain_lock, flags);
4081
4082                         iommu_disable_dev_iotlb(info);
4083                         iommu_detach_dev(iommu, info->bus, info->devfn);
4084                         iommu_detach_dependent_devices(iommu, dev);
4085                         free_devinfo_mem(info);
4086
4087                         spin_lock_irqsave(&device_domain_lock, flags);
4088
4089                         if (found)
4090                                 break;
4091                         else
4092                                 continue;
4093                 }
4094
4095                 /* if there is no other devices under the same iommu
4096                  * owned by this domain, clear this iommu in iommu_bmp
4097                  * update iommu count and coherency
4098                  */
4099                 if (info->iommu == iommu)
4100                         found = 1;
4101         }
4102
4103         spin_unlock_irqrestore(&device_domain_lock, flags);
4104
4105         if (found == 0) {
4106                 unsigned long tmp_flags;
4107                 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
4108                 clear_bit(iommu->seq_id, domain->iommu_bmp);
4109                 domain->iommu_count--;
4110                 domain_update_iommu_cap(domain);
4111                 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
4112
4113                 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
4114                     !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
4115                         spin_lock_irqsave(&iommu->lock, tmp_flags);
4116                         clear_bit(domain->id, iommu->domain_ids);
4117                         iommu->domains[domain->id] = NULL;
4118                         spin_unlock_irqrestore(&iommu->lock, tmp_flags);
4119                 }
4120         }
4121 }
4122
4123 static int md_domain_init(struct dmar_domain *domain, int guest_width)
4124 {
4125         int adjust_width;
4126
4127         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
4128         domain_reserve_special_ranges(domain);
4129
4130         /* calculate AGAW */
4131         domain->gaw = guest_width;
4132         adjust_width = guestwidth_to_adjustwidth(guest_width);
4133         domain->agaw = width_to_agaw(adjust_width);
4134
4135         domain->iommu_coherency = 0;
4136         domain->iommu_snooping = 0;
4137         domain->iommu_superpage = 0;
4138         domain->max_addr = 0;
4139         domain->nid = -1;
4140
4141         /* always allocate the top pgd */
4142         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
4143         if (!domain->pgd)
4144                 return -ENOMEM;
4145         domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4146         return 0;
4147 }
4148
4149 static int intel_iommu_domain_init(struct iommu_domain *domain)
4150 {
4151         struct dmar_domain *dmar_domain;
4152
4153         dmar_domain = alloc_domain(true);
4154         if (!dmar_domain) {
4155                 printk(KERN_ERR
4156                         "intel_iommu_domain_init: dmar_domain == NULL\n");
4157                 return -ENOMEM;
4158         }
4159         if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4160                 printk(KERN_ERR
4161                         "intel_iommu_domain_init() failed\n");
4162                 domain_exit(dmar_domain);
4163                 return -ENOMEM;
4164         }
4165         domain_update_iommu_cap(dmar_domain);
4166         domain->priv = dmar_domain;
4167
4168         domain->geometry.aperture_start = 0;
4169         domain->geometry.aperture_end   = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4170         domain->geometry.force_aperture = true;
4171
4172         return 0;
4173 }
4174
4175 static void intel_iommu_domain_destroy(struct iommu_domain *domain)
4176 {
4177         struct dmar_domain *dmar_domain = domain->priv;
4178
4179         domain->priv = NULL;
4180         domain_exit(dmar_domain);
4181 }
4182
4183 static int intel_iommu_attach_device(struct iommu_domain *domain,
4184                                      struct device *dev)
4185 {
4186         struct dmar_domain *dmar_domain = domain->priv;
4187         struct intel_iommu *iommu;
4188         int addr_width;
4189         u8 bus, devfn;
4190
4191         /* normally dev is not mapped */
4192         if (unlikely(domain_context_mapped(dev))) {
4193                 struct dmar_domain *old_domain;
4194
4195                 old_domain = find_domain(dev);
4196                 if (old_domain) {
4197                         if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
4198                             dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
4199                                 domain_remove_one_dev_info(old_domain, dev);
4200                         else
4201                                 domain_remove_dev_info(old_domain);
4202                 }
4203         }
4204
4205         iommu = device_to_iommu(dev, &bus, &devfn);
4206         if (!iommu)
4207                 return -ENODEV;
4208
4209         /* check if this iommu agaw is sufficient for max mapped address */
4210         addr_width = agaw_to_width(iommu->agaw);
4211         if (addr_width > cap_mgaw(iommu->cap))
4212                 addr_width = cap_mgaw(iommu->cap);
4213
4214         if (dmar_domain->max_addr > (1LL << addr_width)) {
4215                 printk(KERN_ERR "%s: iommu width (%d) is not "
4216                        "sufficient for the mapped address (%llx)\n",
4217                        __func__, addr_width, dmar_domain->max_addr);
4218                 return -EFAULT;
4219         }
4220         dmar_domain->gaw = addr_width;
4221
4222         /*
4223          * Knock out extra levels of page tables if necessary
4224          */
4225         while (iommu->agaw < dmar_domain->agaw) {
4226                 struct dma_pte *pte;
4227
4228                 pte = dmar_domain->pgd;
4229                 if (dma_pte_present(pte)) {
4230                         dmar_domain->pgd = (struct dma_pte *)
4231                                 phys_to_virt(dma_pte_addr(pte));
4232                         free_pgtable_page(pte);
4233                 }
4234                 dmar_domain->agaw--;
4235         }
4236
4237         return domain_add_dev_info(dmar_domain, dev, CONTEXT_TT_MULTI_LEVEL);
4238 }
4239
4240 static void intel_iommu_detach_device(struct iommu_domain *domain,
4241                                       struct device *dev)
4242 {
4243         struct dmar_domain *dmar_domain = domain->priv;
4244
4245         domain_remove_one_dev_info(dmar_domain, dev);
4246 }
4247
4248 static int intel_iommu_map(struct iommu_domain *domain,
4249                            unsigned long iova, phys_addr_t hpa,
4250                            size_t size, int iommu_prot)
4251 {
4252         struct dmar_domain *dmar_domain = domain->priv;
4253         u64 max_addr;
4254         int prot = 0;
4255         int ret;
4256
4257         if (iommu_prot & IOMMU_READ)
4258                 prot |= DMA_PTE_READ;
4259         if (iommu_prot & IOMMU_WRITE)
4260                 prot |= DMA_PTE_WRITE;
4261         if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4262                 prot |= DMA_PTE_SNP;
4263
4264         max_addr = iova + size;
4265         if (dmar_domain->max_addr < max_addr) {
4266                 u64 end;
4267
4268                 /* check if minimum agaw is sufficient for mapped address */
4269                 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4270                 if (end < max_addr) {
4271                         printk(KERN_ERR "%s: iommu width (%d) is not "
4272                                "sufficient for the mapped address (%llx)\n",
4273                                __func__, dmar_domain->gaw, max_addr);
4274                         return -EFAULT;
4275                 }
4276                 dmar_domain->max_addr = max_addr;
4277         }
4278         /* Round up size to next multiple of PAGE_SIZE, if it and
4279            the low bits of hpa would take us onto the next page */
4280         size = aligned_nrpages(hpa, size);
4281         ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4282                                  hpa >> VTD_PAGE_SHIFT, size, prot);
4283         return ret;
4284 }
4285
4286 static size_t intel_iommu_unmap(struct iommu_domain *domain,
4287                                 unsigned long iova, size_t size)
4288 {
4289         struct dmar_domain *dmar_domain = domain->priv;
4290         struct page *freelist = NULL;
4291         struct intel_iommu *iommu;
4292         unsigned long start_pfn, last_pfn;
4293         unsigned int npages;
4294         int iommu_id, num, ndomains, level = 0;
4295
4296         /* Cope with horrid API which requires us to unmap more than the
4297            size argument if it happens to be a large-page mapping. */
4298         if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level))
4299                 BUG();
4300
4301         if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4302                 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4303
4304         start_pfn = iova >> VTD_PAGE_SHIFT;
4305         last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4306
4307         freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4308
4309         npages = last_pfn - start_pfn + 1;
4310
4311         for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) {
4312                iommu = g_iommus[iommu_id];
4313
4314                /*
4315                 * find bit position of dmar_domain
4316                 */
4317                ndomains = cap_ndoms(iommu->cap);
4318                for_each_set_bit(num, iommu->domain_ids, ndomains) {
4319                        if (iommu->domains[num] == dmar_domain)
4320                                iommu_flush_iotlb_psi(iommu, num, start_pfn,
4321                                                      npages, !freelist, 0);
4322                }
4323
4324         }
4325
4326         dma_free_pagelist(freelist);
4327
4328         if (dmar_domain->max_addr == iova + size)
4329                 dmar_domain->max_addr = iova;
4330
4331         return size;
4332 }
4333
4334 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4335                                             dma_addr_t iova)
4336 {
4337         struct dmar_domain *dmar_domain = domain->priv;
4338         struct dma_pte *pte;
4339         int level = 0;
4340         u64 phys = 0;
4341
4342         pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
4343         if (pte)
4344                 phys = dma_pte_addr(pte);
4345
4346         return phys;
4347 }
4348
4349 static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
4350                                       unsigned long cap)
4351 {
4352         struct dmar_domain *dmar_domain = domain->priv;
4353
4354         if (cap == IOMMU_CAP_CACHE_COHERENCY)
4355                 return dmar_domain->iommu_snooping;
4356         if (cap == IOMMU_CAP_INTR_REMAP)
4357                 return irq_remapping_enabled;
4358
4359         return 0;
4360 }
4361
4362 #define REQ_ACS_FLAGS   (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
4363
4364 static int intel_iommu_add_device(struct device *dev)
4365 {
4366         struct pci_dev *pdev = to_pci_dev(dev);
4367         struct pci_dev *bridge, *dma_pdev = NULL;
4368         struct iommu_group *group;
4369         int ret;
4370         u8 bus, devfn;
4371
4372         if (!device_to_iommu(dev, &bus, &devfn))
4373                 return -ENODEV;
4374
4375         bridge = pci_find_upstream_pcie_bridge(pdev);
4376         if (bridge) {
4377                 if (pci_is_pcie(bridge))
4378                         dma_pdev = pci_get_domain_bus_and_slot(
4379                                                 pci_domain_nr(pdev->bus),
4380                                                 bridge->subordinate->number, 0);
4381                 if (!dma_pdev)
4382                         dma_pdev = pci_dev_get(bridge);
4383         } else
4384                 dma_pdev = pci_dev_get(pdev);
4385
4386         /* Account for quirked devices */
4387         swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev));
4388
4389         /*
4390          * If it's a multifunction device that does not support our
4391          * required ACS flags, add to the same group as lowest numbered
4392          * function that also does not suport the required ACS flags.
4393          */
4394         if (dma_pdev->multifunction &&
4395             !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) {
4396                 u8 i, slot = PCI_SLOT(dma_pdev->devfn);
4397
4398                 for (i = 0; i < 8; i++) {
4399                         struct pci_dev *tmp;
4400
4401                         tmp = pci_get_slot(dma_pdev->bus, PCI_DEVFN(slot, i));
4402                         if (!tmp)
4403                                 continue;
4404
4405                         if (!pci_acs_enabled(tmp, REQ_ACS_FLAGS)) {
4406                                 swap_pci_ref(&dma_pdev, tmp);
4407                                 break;
4408                         }
4409                         pci_dev_put(tmp);
4410                 }
4411         }
4412
4413         /*
4414          * Devices on the root bus go through the iommu.  If that's not us,
4415          * find the next upstream device and test ACS up to the root bus.
4416          * Finding the next device may require skipping virtual buses.
4417          */
4418         while (!pci_is_root_bus(dma_pdev->bus)) {
4419                 struct pci_bus *bus = dma_pdev->bus;
4420
4421                 while (!bus->self) {
4422                         if (!pci_is_root_bus(bus))
4423                                 bus = bus->parent;
4424                         else
4425                                 goto root_bus;
4426                 }
4427
4428                 if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
4429                         break;
4430
4431                 swap_pci_ref(&dma_pdev, pci_dev_get(bus->self));
4432         }
4433
4434 root_bus:
4435         group = iommu_group_get(&dma_pdev->dev);
4436         pci_dev_put(dma_pdev);
4437         if (!group) {
4438                 group = iommu_group_alloc();
4439                 if (IS_ERR(group))
4440                         return PTR_ERR(group);
4441         }
4442
4443         ret = iommu_group_add_device(group, dev);
4444
4445         iommu_group_put(group);
4446         return ret;
4447 }
4448
4449 static void intel_iommu_remove_device(struct device *dev)
4450 {
4451         iommu_group_remove_device(dev);
4452 }
4453
4454 static struct iommu_ops intel_iommu_ops = {
4455         .domain_init    = intel_iommu_domain_init,
4456         .domain_destroy = intel_iommu_domain_destroy,
4457         .attach_dev     = intel_iommu_attach_device,
4458         .detach_dev     = intel_iommu_detach_device,
4459         .map            = intel_iommu_map,
4460         .unmap          = intel_iommu_unmap,
4461         .iova_to_phys   = intel_iommu_iova_to_phys,
4462         .domain_has_cap = intel_iommu_domain_has_cap,
4463         .add_device     = intel_iommu_add_device,
4464         .remove_device  = intel_iommu_remove_device,
4465         .pgsize_bitmap  = INTEL_IOMMU_PGSIZES,
4466 };
4467
4468 static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4469 {
4470         /* G4x/GM45 integrated gfx dmar support is totally busted. */
4471         printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4472         dmar_map_gfx = 0;
4473 }
4474
4475 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4476 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4477 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4478 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4479 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4480 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4481 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4482
4483 static void quirk_iommu_rwbf(struct pci_dev *dev)
4484 {
4485         /*
4486          * Mobile 4 Series Chipset neglects to set RWBF capability,
4487          * but needs it. Same seems to hold for the desktop versions.
4488          */
4489         printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4490         rwbf_quirk = 1;
4491 }
4492
4493 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4494 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4495 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4496 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4497 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4498 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4499 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
4500
4501 #define GGC 0x52
4502 #define GGC_MEMORY_SIZE_MASK    (0xf << 8)
4503 #define GGC_MEMORY_SIZE_NONE    (0x0 << 8)
4504 #define GGC_MEMORY_SIZE_1M      (0x1 << 8)
4505 #define GGC_MEMORY_SIZE_2M      (0x3 << 8)
4506 #define GGC_MEMORY_VT_ENABLED   (0x8 << 8)
4507 #define GGC_MEMORY_SIZE_2M_VT   (0x9 << 8)
4508 #define GGC_MEMORY_SIZE_3M_VT   (0xa << 8)
4509 #define GGC_MEMORY_SIZE_4M_VT   (0xb << 8)
4510
4511 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4512 {
4513         unsigned short ggc;
4514
4515         if (pci_read_config_word(dev, GGC, &ggc))
4516                 return;
4517
4518         if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4519                 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4520                 dmar_map_gfx = 0;
4521         } else if (dmar_map_gfx) {
4522                 /* we have to ensure the gfx device is idle before we flush */
4523                 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4524                 intel_iommu_strict = 1;
4525        }
4526 }
4527 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4528 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4529 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4530 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4531
4532 /* On Tylersburg chipsets, some BIOSes have been known to enable the
4533    ISOCH DMAR unit for the Azalia sound device, but not give it any
4534    TLB entries, which causes it to deadlock. Check for that.  We do
4535    this in a function called from init_dmars(), instead of in a PCI
4536    quirk, because we don't want to print the obnoxious "BIOS broken"
4537    message if VT-d is actually disabled.
4538 */
4539 static void __init check_tylersburg_isoch(void)
4540 {
4541         struct pci_dev *pdev;
4542         uint32_t vtisochctrl;
4543
4544         /* If there's no Azalia in the system anyway, forget it. */
4545         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4546         if (!pdev)
4547                 return;
4548         pci_dev_put(pdev);
4549
4550         /* System Management Registers. Might be hidden, in which case
4551            we can't do the sanity check. But that's OK, because the
4552            known-broken BIOSes _don't_ actually hide it, so far. */
4553         pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4554         if (!pdev)
4555                 return;
4556
4557         if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4558                 pci_dev_put(pdev);
4559                 return;
4560         }
4561
4562         pci_dev_put(pdev);
4563
4564         /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4565         if (vtisochctrl & 1)
4566                 return;
4567
4568         /* Drop all bits other than the number of TLB entries */
4569         vtisochctrl &= 0x1c;
4570
4571         /* If we have the recommended number of TLB entries (16), fine. */
4572         if (vtisochctrl == 0x10)
4573                 return;
4574
4575         /* Zero TLB entries? You get to ride the short bus to school. */
4576         if (!vtisochctrl) {
4577                 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4578                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4579                      dmi_get_system_info(DMI_BIOS_VENDOR),
4580                      dmi_get_system_info(DMI_BIOS_VERSION),
4581                      dmi_get_system_info(DMI_PRODUCT_VERSION));
4582                 iommu_identity_mapping |= IDENTMAP_AZALIA;
4583                 return;
4584         }
4585         
4586         printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4587                vtisochctrl);
4588 }