]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - arch/sparc/kernel/pci_sun4v.c
Merge remote-tracking branch 'samsung/for-next'
[karo-tx-linux.git] / arch / sparc / kernel / pci_sun4v.c
1 /* pci_sun4v.c: SUN4V specific PCI controller support.
2  *
3  * Copyright (C) 2006, 2007, 2008 David S. Miller (davem@davemloft.net)
4  */
5
6 #include <linux/kernel.h>
7 #include <linux/types.h>
8 #include <linux/pci.h>
9 #include <linux/init.h>
10 #include <linux/slab.h>
11 #include <linux/interrupt.h>
12 #include <linux/percpu.h>
13 #include <linux/irq.h>
14 #include <linux/msi.h>
15 #include <linux/export.h>
16 #include <linux/log2.h>
17 #include <linux/of_device.h>
18 #include <linux/iommu-common.h>
19
20 #include <asm/iommu.h>
21 #include <asm/irq.h>
22 #include <asm/hypervisor.h>
23 #include <asm/prom.h>
24
25 #include "pci_impl.h"
26 #include "iommu_common.h"
27
28 #include "pci_sun4v.h"
29
30 #define DRIVER_NAME     "pci_sun4v"
31 #define PFX             DRIVER_NAME ": "
32
33 static unsigned long vpci_major = 1;
34 static unsigned long vpci_minor = 1;
35
36 #define PGLIST_NENTS    (PAGE_SIZE / sizeof(u64))
37
38 struct iommu_batch {
39         struct device   *dev;           /* Device mapping is for.       */
40         unsigned long   prot;           /* IOMMU page protections       */
41         unsigned long   entry;          /* Index into IOTSB.            */
42         u64             *pglist;        /* List of physical pages       */
43         unsigned long   npages;         /* Number of pages in list.     */
44 };
45
46 static DEFINE_PER_CPU(struct iommu_batch, iommu_batch);
47 static int iommu_batch_initialized;
48
49 /* Interrupts must be disabled.  */
50 static inline void iommu_batch_start(struct device *dev, unsigned long prot, unsigned long entry)
51 {
52         struct iommu_batch *p = this_cpu_ptr(&iommu_batch);
53
54         p->dev          = dev;
55         p->prot         = prot;
56         p->entry        = entry;
57         p->npages       = 0;
58 }
59
60 /* Interrupts must be disabled.  */
61 static long iommu_batch_flush(struct iommu_batch *p)
62 {
63         struct pci_pbm_info *pbm = p->dev->archdata.host_controller;
64         unsigned long devhandle = pbm->devhandle;
65         unsigned long prot = p->prot;
66         unsigned long entry = p->entry;
67         u64 *pglist = p->pglist;
68         unsigned long npages = p->npages;
69
70         while (npages != 0) {
71                 long num;
72
73                 num = pci_sun4v_iommu_map(devhandle, HV_PCI_TSBID(0, entry),
74                                           npages, prot, __pa(pglist));
75                 if (unlikely(num < 0)) {
76                         if (printk_ratelimit())
77                                 printk("iommu_batch_flush: IOMMU map of "
78                                        "[%08lx:%08llx:%lx:%lx:%lx] failed with "
79                                        "status %ld\n",
80                                        devhandle, HV_PCI_TSBID(0, entry),
81                                        npages, prot, __pa(pglist), num);
82                         return -1;
83                 }
84
85                 entry += num;
86                 npages -= num;
87                 pglist += num;
88         }
89
90         p->entry = entry;
91         p->npages = 0;
92
93         return 0;
94 }
95
96 static inline void iommu_batch_new_entry(unsigned long entry)
97 {
98         struct iommu_batch *p = this_cpu_ptr(&iommu_batch);
99
100         if (p->entry + p->npages == entry)
101                 return;
102         if (p->entry != ~0UL)
103                 iommu_batch_flush(p);
104         p->entry = entry;
105 }
106
107 /* Interrupts must be disabled.  */
108 static inline long iommu_batch_add(u64 phys_page)
109 {
110         struct iommu_batch *p = this_cpu_ptr(&iommu_batch);
111
112         BUG_ON(p->npages >= PGLIST_NENTS);
113
114         p->pglist[p->npages++] = phys_page;
115         if (p->npages == PGLIST_NENTS)
116                 return iommu_batch_flush(p);
117
118         return 0;
119 }
120
121 /* Interrupts must be disabled.  */
122 static inline long iommu_batch_end(void)
123 {
124         struct iommu_batch *p = this_cpu_ptr(&iommu_batch);
125
126         BUG_ON(p->npages >= PGLIST_NENTS);
127
128         return iommu_batch_flush(p);
129 }
130
131 static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
132                                    dma_addr_t *dma_addrp, gfp_t gfp,
133                                    struct dma_attrs *attrs)
134 {
135         unsigned long flags, order, first_page, npages, n;
136         struct iommu *iommu;
137         struct page *page;
138         void *ret;
139         long entry;
140         int nid;
141
142         size = IO_PAGE_ALIGN(size);
143         order = get_order(size);
144         if (unlikely(order >= MAX_ORDER))
145                 return NULL;
146
147         npages = size >> IO_PAGE_SHIFT;
148
149         nid = dev->archdata.numa_node;
150         page = alloc_pages_node(nid, gfp, order);
151         if (unlikely(!page))
152                 return NULL;
153
154         first_page = (unsigned long) page_address(page);
155         memset((char *)first_page, 0, PAGE_SIZE << order);
156
157         iommu = dev->archdata.iommu;
158
159         entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
160                                       (unsigned long)(-1), 0);
161
162         if (unlikely(entry == IOMMU_ERROR_CODE))
163                 goto range_alloc_fail;
164
165         *dma_addrp = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT));
166         ret = (void *) first_page;
167         first_page = __pa(first_page);
168
169         local_irq_save(flags);
170
171         iommu_batch_start(dev,
172                           (HV_PCI_MAP_ATTR_READ |
173                            HV_PCI_MAP_ATTR_WRITE),
174                           entry);
175
176         for (n = 0; n < npages; n++) {
177                 long err = iommu_batch_add(first_page + (n * PAGE_SIZE));
178                 if (unlikely(err < 0L))
179                         goto iommu_map_fail;
180         }
181
182         if (unlikely(iommu_batch_end() < 0L))
183                 goto iommu_map_fail;
184
185         local_irq_restore(flags);
186
187         return ret;
188
189 iommu_map_fail:
190         iommu_tbl_range_free(&iommu->tbl, *dma_addrp, npages, IOMMU_ERROR_CODE);
191
192 range_alloc_fail:
193         free_pages(first_page, order);
194         return NULL;
195 }
196
197 static void dma_4v_iommu_demap(void *demap_arg, unsigned long entry,
198                                unsigned long npages)
199 {
200         u32 devhandle = *(u32 *)demap_arg;
201         unsigned long num, flags;
202
203         local_irq_save(flags);
204         do {
205                 num = pci_sun4v_iommu_demap(devhandle,
206                                             HV_PCI_TSBID(0, entry),
207                                             npages);
208
209                 entry += num;
210                 npages -= num;
211         } while (npages != 0);
212         local_irq_restore(flags);
213 }
214
215 static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
216                                  dma_addr_t dvma, struct dma_attrs *attrs)
217 {
218         struct pci_pbm_info *pbm;
219         struct iommu *iommu;
220         unsigned long order, npages, entry;
221         u32 devhandle;
222
223         npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
224         iommu = dev->archdata.iommu;
225         pbm = dev->archdata.host_controller;
226         devhandle = pbm->devhandle;
227         entry = ((dvma - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT);
228         dma_4v_iommu_demap(&devhandle, entry, npages);
229         iommu_tbl_range_free(&iommu->tbl, dvma, npages, IOMMU_ERROR_CODE);
230         order = get_order(size);
231         if (order < 10)
232                 free_pages((unsigned long)cpu, order);
233 }
234
235 static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
236                                   unsigned long offset, size_t sz,
237                                   enum dma_data_direction direction,
238                                   struct dma_attrs *attrs)
239 {
240         struct iommu *iommu;
241         unsigned long flags, npages, oaddr;
242         unsigned long i, base_paddr;
243         u32 bus_addr, ret;
244         unsigned long prot;
245         long entry;
246
247         iommu = dev->archdata.iommu;
248
249         if (unlikely(direction == DMA_NONE))
250                 goto bad;
251
252         oaddr = (unsigned long)(page_address(page) + offset);
253         npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
254         npages >>= IO_PAGE_SHIFT;
255
256         entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
257                                       (unsigned long)(-1), 0);
258
259         if (unlikely(entry == IOMMU_ERROR_CODE))
260                 goto bad;
261
262         bus_addr = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT));
263         ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
264         base_paddr = __pa(oaddr & IO_PAGE_MASK);
265         prot = HV_PCI_MAP_ATTR_READ;
266         if (direction != DMA_TO_DEVICE)
267                 prot |= HV_PCI_MAP_ATTR_WRITE;
268
269         local_irq_save(flags);
270
271         iommu_batch_start(dev, prot, entry);
272
273         for (i = 0; i < npages; i++, base_paddr += IO_PAGE_SIZE) {
274                 long err = iommu_batch_add(base_paddr);
275                 if (unlikely(err < 0L))
276                         goto iommu_map_fail;
277         }
278         if (unlikely(iommu_batch_end() < 0L))
279                 goto iommu_map_fail;
280
281         local_irq_restore(flags);
282
283         return ret;
284
285 bad:
286         if (printk_ratelimit())
287                 WARN_ON(1);
288         return DMA_ERROR_CODE;
289
290 iommu_map_fail:
291         iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE);
292         return DMA_ERROR_CODE;
293 }
294
295 static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
296                               size_t sz, enum dma_data_direction direction,
297                               struct dma_attrs *attrs)
298 {
299         struct pci_pbm_info *pbm;
300         struct iommu *iommu;
301         unsigned long npages;
302         long entry;
303         u32 devhandle;
304
305         if (unlikely(direction == DMA_NONE)) {
306                 if (printk_ratelimit())
307                         WARN_ON(1);
308                 return;
309         }
310
311         iommu = dev->archdata.iommu;
312         pbm = dev->archdata.host_controller;
313         devhandle = pbm->devhandle;
314
315         npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
316         npages >>= IO_PAGE_SHIFT;
317         bus_addr &= IO_PAGE_MASK;
318         entry = (bus_addr - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT;
319         dma_4v_iommu_demap(&devhandle, entry, npages);
320         iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE);
321 }
322
323 static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
324                          int nelems, enum dma_data_direction direction,
325                          struct dma_attrs *attrs)
326 {
327         struct scatterlist *s, *outs, *segstart;
328         unsigned long flags, handle, prot;
329         dma_addr_t dma_next = 0, dma_addr;
330         unsigned int max_seg_size;
331         unsigned long seg_boundary_size;
332         int outcount, incount, i;
333         struct iommu *iommu;
334         unsigned long base_shift;
335         long err;
336
337         BUG_ON(direction == DMA_NONE);
338
339         iommu = dev->archdata.iommu;
340         if (nelems == 0 || !iommu)
341                 return 0;
342         
343         prot = HV_PCI_MAP_ATTR_READ;
344         if (direction != DMA_TO_DEVICE)
345                 prot |= HV_PCI_MAP_ATTR_WRITE;
346
347         outs = s = segstart = &sglist[0];
348         outcount = 1;
349         incount = nelems;
350         handle = 0;
351
352         /* Init first segment length for backout at failure */
353         outs->dma_length = 0;
354
355         local_irq_save(flags);
356
357         iommu_batch_start(dev, prot, ~0UL);
358
359         max_seg_size = dma_get_max_seg_size(dev);
360         seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
361                                   IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
362         base_shift = iommu->tbl.table_map_base >> IO_PAGE_SHIFT;
363         for_each_sg(sglist, s, nelems, i) {
364                 unsigned long paddr, npages, entry, out_entry = 0, slen;
365
366                 slen = s->length;
367                 /* Sanity check */
368                 if (slen == 0) {
369                         dma_next = 0;
370                         continue;
371                 }
372                 /* Allocate iommu entries for that segment */
373                 paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
374                 npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE);
375                 entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages,
376                                               &handle, (unsigned long)(-1), 0);
377
378                 /* Handle failure */
379                 if (unlikely(entry == IOMMU_ERROR_CODE)) {
380                         if (printk_ratelimit())
381                                 printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx"
382                                        " npages %lx\n", iommu, paddr, npages);
383                         goto iommu_map_failed;
384                 }
385
386                 iommu_batch_new_entry(entry);
387
388                 /* Convert entry to a dma_addr_t */
389                 dma_addr = iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT);
390                 dma_addr |= (s->offset & ~IO_PAGE_MASK);
391
392                 /* Insert into HW table */
393                 paddr &= IO_PAGE_MASK;
394                 while (npages--) {
395                         err = iommu_batch_add(paddr);
396                         if (unlikely(err < 0L))
397                                 goto iommu_map_failed;
398                         paddr += IO_PAGE_SIZE;
399                 }
400
401                 /* If we are in an open segment, try merging */
402                 if (segstart != s) {
403                         /* We cannot merge if:
404                          * - allocated dma_addr isn't contiguous to previous allocation
405                          */
406                         if ((dma_addr != dma_next) ||
407                             (outs->dma_length + s->length > max_seg_size) ||
408                             (is_span_boundary(out_entry, base_shift,
409                                               seg_boundary_size, outs, s))) {
410                                 /* Can't merge: create a new segment */
411                                 segstart = s;
412                                 outcount++;
413                                 outs = sg_next(outs);
414                         } else {
415                                 outs->dma_length += s->length;
416                         }
417                 }
418
419                 if (segstart == s) {
420                         /* This is a new segment, fill entries */
421                         outs->dma_address = dma_addr;
422                         outs->dma_length = slen;
423                         out_entry = entry;
424                 }
425
426                 /* Calculate next page pointer for contiguous check */
427                 dma_next = dma_addr + slen;
428         }
429
430         err = iommu_batch_end();
431
432         if (unlikely(err < 0L))
433                 goto iommu_map_failed;
434
435         local_irq_restore(flags);
436
437         if (outcount < incount) {
438                 outs = sg_next(outs);
439                 outs->dma_address = DMA_ERROR_CODE;
440                 outs->dma_length = 0;
441         }
442
443         return outcount;
444
445 iommu_map_failed:
446         for_each_sg(sglist, s, nelems, i) {
447                 if (s->dma_length != 0) {
448                         unsigned long vaddr, npages;
449
450                         vaddr = s->dma_address & IO_PAGE_MASK;
451                         npages = iommu_num_pages(s->dma_address, s->dma_length,
452                                                  IO_PAGE_SIZE);
453                         iommu_tbl_range_free(&iommu->tbl, vaddr, npages,
454                                              IOMMU_ERROR_CODE);
455                         /* XXX demap? XXX */
456                         s->dma_address = DMA_ERROR_CODE;
457                         s->dma_length = 0;
458                 }
459                 if (s == outs)
460                         break;
461         }
462         local_irq_restore(flags);
463
464         return 0;
465 }
466
467 static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
468                             int nelems, enum dma_data_direction direction,
469                             struct dma_attrs *attrs)
470 {
471         struct pci_pbm_info *pbm;
472         struct scatterlist *sg;
473         struct iommu *iommu;
474         unsigned long flags, entry;
475         u32 devhandle;
476
477         BUG_ON(direction == DMA_NONE);
478
479         iommu = dev->archdata.iommu;
480         pbm = dev->archdata.host_controller;
481         devhandle = pbm->devhandle;
482         
483         local_irq_save(flags);
484
485         sg = sglist;
486         while (nelems--) {
487                 dma_addr_t dma_handle = sg->dma_address;
488                 unsigned int len = sg->dma_length;
489                 unsigned long npages;
490                 struct iommu_map_table *tbl = &iommu->tbl;
491                 unsigned long shift = IO_PAGE_SHIFT;
492
493                 if (!len)
494                         break;
495                 npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE);
496                 entry = ((dma_handle - tbl->table_map_base) >> shift);
497                 dma_4v_iommu_demap(&devhandle, entry, npages);
498                 iommu_tbl_range_free(&iommu->tbl, dma_handle, npages,
499                                      IOMMU_ERROR_CODE);
500                 sg = sg_next(sg);
501         }
502
503         local_irq_restore(flags);
504 }
505
506 static struct dma_map_ops sun4v_dma_ops = {
507         .alloc                          = dma_4v_alloc_coherent,
508         .free                           = dma_4v_free_coherent,
509         .map_page                       = dma_4v_map_page,
510         .unmap_page                     = dma_4v_unmap_page,
511         .map_sg                         = dma_4v_map_sg,
512         .unmap_sg                       = dma_4v_unmap_sg,
513 };
514
515 static void pci_sun4v_scan_bus(struct pci_pbm_info *pbm, struct device *parent)
516 {
517         struct property *prop;
518         struct device_node *dp;
519
520         dp = pbm->op->dev.of_node;
521         prop = of_find_property(dp, "66mhz-capable", NULL);
522         pbm->is_66mhz_capable = (prop != NULL);
523         pbm->pci_bus = pci_scan_one_pbm(pbm, parent);
524
525         /* XXX register error interrupt handlers XXX */
526 }
527
528 static unsigned long probe_existing_entries(struct pci_pbm_info *pbm,
529                                             struct iommu_map_table *iommu)
530 {
531         struct iommu_pool *pool;
532         unsigned long i, pool_nr, cnt = 0;
533         u32 devhandle;
534
535         devhandle = pbm->devhandle;
536         for (pool_nr = 0; pool_nr < iommu->nr_pools; pool_nr++) {
537                 pool = &(iommu->pools[pool_nr]);
538                 for (i = pool->start; i <= pool->end; i++) {
539                         unsigned long ret, io_attrs, ra;
540
541                         ret = pci_sun4v_iommu_getmap(devhandle,
542                                                      HV_PCI_TSBID(0, i),
543                                                      &io_attrs, &ra);
544                         if (ret == HV_EOK) {
545                                 if (page_in_phys_avail(ra)) {
546                                         pci_sun4v_iommu_demap(devhandle,
547                                                               HV_PCI_TSBID(0,
548                                                               i), 1);
549                                 } else {
550                                         cnt++;
551                                         __set_bit(i, iommu->map);
552                                 }
553                         }
554                 }
555         }
556         return cnt;
557 }
558
559 static int pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
560 {
561         static const u32 vdma_default[] = { 0x80000000, 0x80000000 };
562         struct iommu *iommu = pbm->iommu;
563         unsigned long num_tsb_entries, sz;
564         u32 dma_mask, dma_offset;
565         const u32 *vdma;
566
567         vdma = of_get_property(pbm->op->dev.of_node, "virtual-dma", NULL);
568         if (!vdma)
569                 vdma = vdma_default;
570
571         if ((vdma[0] | vdma[1]) & ~IO_PAGE_MASK) {
572                 printk(KERN_ERR PFX "Strange virtual-dma[%08x:%08x].\n",
573                        vdma[0], vdma[1]);
574                 return -EINVAL;
575         }
576
577         dma_mask = (roundup_pow_of_two(vdma[1]) - 1UL);
578         num_tsb_entries = vdma[1] / IO_PAGE_SIZE;
579
580         dma_offset = vdma[0];
581
582         /* Setup initial software IOMMU state. */
583         spin_lock_init(&iommu->lock);
584         iommu->ctx_lowest_free = 1;
585         iommu->tbl.table_map_base = dma_offset;
586         iommu->dma_addr_mask = dma_mask;
587
588         /* Allocate and initialize the free area map.  */
589         sz = (num_tsb_entries + 7) / 8;
590         sz = (sz + 7UL) & ~7UL;
591         iommu->tbl.map = kzalloc(sz, GFP_KERNEL);
592         if (!iommu->tbl.map) {
593                 printk(KERN_ERR PFX "Error, kmalloc(arena.map) failed.\n");
594                 return -ENOMEM;
595         }
596         iommu_tbl_pool_init(&iommu->tbl, num_tsb_entries, IO_PAGE_SHIFT,
597                             NULL, false /* no large_pool */,
598                             0 /* default npools */,
599                             false /* want span boundary checking */);
600         sz = probe_existing_entries(pbm, &iommu->tbl);
601         if (sz)
602                 printk("%s: Imported %lu TSB entries from OBP\n",
603                        pbm->name, sz);
604
605         return 0;
606 }
607
608 #ifdef CONFIG_PCI_MSI
609 struct pci_sun4v_msiq_entry {
610         u64             version_type;
611 #define MSIQ_VERSION_MASK               0xffffffff00000000UL
612 #define MSIQ_VERSION_SHIFT              32
613 #define MSIQ_TYPE_MASK                  0x00000000000000ffUL
614 #define MSIQ_TYPE_SHIFT                 0
615 #define MSIQ_TYPE_NONE                  0x00
616 #define MSIQ_TYPE_MSG                   0x01
617 #define MSIQ_TYPE_MSI32                 0x02
618 #define MSIQ_TYPE_MSI64                 0x03
619 #define MSIQ_TYPE_INTX                  0x08
620 #define MSIQ_TYPE_NONE2                 0xff
621
622         u64             intx_sysino;
623         u64             reserved1;
624         u64             stick;
625         u64             req_id;  /* bus/device/func */
626 #define MSIQ_REQID_BUS_MASK             0xff00UL
627 #define MSIQ_REQID_BUS_SHIFT            8
628 #define MSIQ_REQID_DEVICE_MASK          0x00f8UL
629 #define MSIQ_REQID_DEVICE_SHIFT         3
630 #define MSIQ_REQID_FUNC_MASK            0x0007UL
631 #define MSIQ_REQID_FUNC_SHIFT           0
632
633         u64             msi_address;
634
635         /* The format of this value is message type dependent.
636          * For MSI bits 15:0 are the data from the MSI packet.
637          * For MSI-X bits 31:0 are the data from the MSI packet.
638          * For MSG, the message code and message routing code where:
639          *      bits 39:32 is the bus/device/fn of the msg target-id
640          *      bits 18:16 is the message routing code
641          *      bits 7:0 is the message code
642          * For INTx the low order 2-bits are:
643          *      00 - INTA
644          *      01 - INTB
645          *      10 - INTC
646          *      11 - INTD
647          */
648         u64             msi_data;
649
650         u64             reserved2;
651 };
652
653 static int pci_sun4v_get_head(struct pci_pbm_info *pbm, unsigned long msiqid,
654                               unsigned long *head)
655 {
656         unsigned long err, limit;
657
658         err = pci_sun4v_msiq_gethead(pbm->devhandle, msiqid, head);
659         if (unlikely(err))
660                 return -ENXIO;
661
662         limit = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry);
663         if (unlikely(*head >= limit))
664                 return -EFBIG;
665
666         return 0;
667 }
668
669 static int pci_sun4v_dequeue_msi(struct pci_pbm_info *pbm,
670                                  unsigned long msiqid, unsigned long *head,
671                                  unsigned long *msi)
672 {
673         struct pci_sun4v_msiq_entry *ep;
674         unsigned long err, type;
675
676         /* Note: void pointer arithmetic, 'head' is a byte offset  */
677         ep = (pbm->msi_queues + ((msiqid - pbm->msiq_first) *
678                                  (pbm->msiq_ent_count *
679                                   sizeof(struct pci_sun4v_msiq_entry))) +
680               *head);
681
682         if ((ep->version_type & MSIQ_TYPE_MASK) == 0)
683                 return 0;
684
685         type = (ep->version_type & MSIQ_TYPE_MASK) >> MSIQ_TYPE_SHIFT;
686         if (unlikely(type != MSIQ_TYPE_MSI32 &&
687                      type != MSIQ_TYPE_MSI64))
688                 return -EINVAL;
689
690         *msi = ep->msi_data;
691
692         err = pci_sun4v_msi_setstate(pbm->devhandle,
693                                      ep->msi_data /* msi_num */,
694                                      HV_MSISTATE_IDLE);
695         if (unlikely(err))
696                 return -ENXIO;
697
698         /* Clear the entry.  */
699         ep->version_type &= ~MSIQ_TYPE_MASK;
700
701         (*head) += sizeof(struct pci_sun4v_msiq_entry);
702         if (*head >=
703             (pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry)))
704                 *head = 0;
705
706         return 1;
707 }
708
709 static int pci_sun4v_set_head(struct pci_pbm_info *pbm, unsigned long msiqid,
710                               unsigned long head)
711 {
712         unsigned long err;
713
714         err = pci_sun4v_msiq_sethead(pbm->devhandle, msiqid, head);
715         if (unlikely(err))
716                 return -EINVAL;
717
718         return 0;
719 }
720
721 static int pci_sun4v_msi_setup(struct pci_pbm_info *pbm, unsigned long msiqid,
722                                unsigned long msi, int is_msi64)
723 {
724         if (pci_sun4v_msi_setmsiq(pbm->devhandle, msi, msiqid,
725                                   (is_msi64 ?
726                                    HV_MSITYPE_MSI64 : HV_MSITYPE_MSI32)))
727                 return -ENXIO;
728         if (pci_sun4v_msi_setstate(pbm->devhandle, msi, HV_MSISTATE_IDLE))
729                 return -ENXIO;
730         if (pci_sun4v_msi_setvalid(pbm->devhandle, msi, HV_MSIVALID_VALID))
731                 return -ENXIO;
732         return 0;
733 }
734
735 static int pci_sun4v_msi_teardown(struct pci_pbm_info *pbm, unsigned long msi)
736 {
737         unsigned long err, msiqid;
738
739         err = pci_sun4v_msi_getmsiq(pbm->devhandle, msi, &msiqid);
740         if (err)
741                 return -ENXIO;
742
743         pci_sun4v_msi_setvalid(pbm->devhandle, msi, HV_MSIVALID_INVALID);
744
745         return 0;
746 }
747
748 static int pci_sun4v_msiq_alloc(struct pci_pbm_info *pbm)
749 {
750         unsigned long q_size, alloc_size, pages, order;
751         int i;
752
753         q_size = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry);
754         alloc_size = (pbm->msiq_num * q_size);
755         order = get_order(alloc_size);
756         pages = __get_free_pages(GFP_KERNEL | __GFP_COMP, order);
757         if (pages == 0UL) {
758                 printk(KERN_ERR "MSI: Cannot allocate MSI queues (o=%lu).\n",
759                        order);
760                 return -ENOMEM;
761         }
762         memset((char *)pages, 0, PAGE_SIZE << order);
763         pbm->msi_queues = (void *) pages;
764
765         for (i = 0; i < pbm->msiq_num; i++) {
766                 unsigned long err, base = __pa(pages + (i * q_size));
767                 unsigned long ret1, ret2;
768
769                 err = pci_sun4v_msiq_conf(pbm->devhandle,
770                                           pbm->msiq_first + i,
771                                           base, pbm->msiq_ent_count);
772                 if (err) {
773                         printk(KERN_ERR "MSI: msiq register fails (err=%lu)\n",
774                                err);
775                         goto h_error;
776                 }
777
778                 err = pci_sun4v_msiq_info(pbm->devhandle,
779                                           pbm->msiq_first + i,
780                                           &ret1, &ret2);
781                 if (err) {
782                         printk(KERN_ERR "MSI: Cannot read msiq (err=%lu)\n",
783                                err);
784                         goto h_error;
785                 }
786                 if (ret1 != base || ret2 != pbm->msiq_ent_count) {
787                         printk(KERN_ERR "MSI: Bogus qconf "
788                                "expected[%lx:%x] got[%lx:%lx]\n",
789                                base, pbm->msiq_ent_count,
790                                ret1, ret2);
791                         goto h_error;
792                 }
793         }
794
795         return 0;
796
797 h_error:
798         free_pages(pages, order);
799         return -EINVAL;
800 }
801
802 static void pci_sun4v_msiq_free(struct pci_pbm_info *pbm)
803 {
804         unsigned long q_size, alloc_size, pages, order;
805         int i;
806
807         for (i = 0; i < pbm->msiq_num; i++) {
808                 unsigned long msiqid = pbm->msiq_first + i;
809
810                 (void) pci_sun4v_msiq_conf(pbm->devhandle, msiqid, 0UL, 0);
811         }
812
813         q_size = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry);
814         alloc_size = (pbm->msiq_num * q_size);
815         order = get_order(alloc_size);
816
817         pages = (unsigned long) pbm->msi_queues;
818
819         free_pages(pages, order);
820
821         pbm->msi_queues = NULL;
822 }
823
824 static int pci_sun4v_msiq_build_irq(struct pci_pbm_info *pbm,
825                                     unsigned long msiqid,
826                                     unsigned long devino)
827 {
828         unsigned int irq = sun4v_build_irq(pbm->devhandle, devino);
829
830         if (!irq)
831                 return -ENOMEM;
832
833         if (pci_sun4v_msiq_setvalid(pbm->devhandle, msiqid, HV_MSIQ_VALID))
834                 return -EINVAL;
835         if (pci_sun4v_msiq_setstate(pbm->devhandle, msiqid, HV_MSIQSTATE_IDLE))
836                 return -EINVAL;
837
838         return irq;
839 }
840
841 static const struct sparc64_msiq_ops pci_sun4v_msiq_ops = {
842         .get_head       =       pci_sun4v_get_head,
843         .dequeue_msi    =       pci_sun4v_dequeue_msi,
844         .set_head       =       pci_sun4v_set_head,
845         .msi_setup      =       pci_sun4v_msi_setup,
846         .msi_teardown   =       pci_sun4v_msi_teardown,
847         .msiq_alloc     =       pci_sun4v_msiq_alloc,
848         .msiq_free      =       pci_sun4v_msiq_free,
849         .msiq_build_irq =       pci_sun4v_msiq_build_irq,
850 };
851
852 static void pci_sun4v_msi_init(struct pci_pbm_info *pbm)
853 {
854         sparc64_pbm_msi_init(pbm, &pci_sun4v_msiq_ops);
855 }
856 #else /* CONFIG_PCI_MSI */
857 static void pci_sun4v_msi_init(struct pci_pbm_info *pbm)
858 {
859 }
860 #endif /* !(CONFIG_PCI_MSI) */
861
862 static int pci_sun4v_pbm_init(struct pci_pbm_info *pbm,
863                               struct platform_device *op, u32 devhandle)
864 {
865         struct device_node *dp = op->dev.of_node;
866         int err;
867
868         pbm->numa_node = of_node_to_nid(dp);
869
870         pbm->pci_ops = &sun4v_pci_ops;
871         pbm->config_space_reg_bits = 12;
872
873         pbm->index = pci_num_pbms++;
874
875         pbm->op = op;
876
877         pbm->devhandle = devhandle;
878
879         pbm->name = dp->full_name;
880
881         printk("%s: SUN4V PCI Bus Module\n", pbm->name);
882         printk("%s: On NUMA node %d\n", pbm->name, pbm->numa_node);
883
884         pci_determine_mem_io_space(pbm);
885
886         pci_get_pbm_props(pbm);
887
888         err = pci_sun4v_iommu_init(pbm);
889         if (err)
890                 return err;
891
892         pci_sun4v_msi_init(pbm);
893
894         pci_sun4v_scan_bus(pbm, &op->dev);
895
896         pbm->next = pci_pbm_root;
897         pci_pbm_root = pbm;
898
899         return 0;
900 }
901
902 static int pci_sun4v_probe(struct platform_device *op)
903 {
904         const struct linux_prom64_registers *regs;
905         static int hvapi_negotiated = 0;
906         struct pci_pbm_info *pbm;
907         struct device_node *dp;
908         struct iommu *iommu;
909         u32 devhandle;
910         int i, err;
911
912         dp = op->dev.of_node;
913
914         if (!hvapi_negotiated++) {
915                 err = sun4v_hvapi_register(HV_GRP_PCI,
916                                            vpci_major,
917                                            &vpci_minor);
918
919                 if (err) {
920                         printk(KERN_ERR PFX "Could not register hvapi, "
921                                "err=%d\n", err);
922                         return err;
923                 }
924                 printk(KERN_INFO PFX "Registered hvapi major[%lu] minor[%lu]\n",
925                        vpci_major, vpci_minor);
926
927                 dma_ops = &sun4v_dma_ops;
928         }
929
930         regs = of_get_property(dp, "reg", NULL);
931         err = -ENODEV;
932         if (!regs) {
933                 printk(KERN_ERR PFX "Could not find config registers\n");
934                 goto out_err;
935         }
936         devhandle = (regs->phys_addr >> 32UL) & 0x0fffffff;
937
938         err = -ENOMEM;
939         if (!iommu_batch_initialized) {
940                 for_each_possible_cpu(i) {
941                         unsigned long page = get_zeroed_page(GFP_KERNEL);
942
943                         if (!page)
944                                 goto out_err;
945
946                         per_cpu(iommu_batch, i).pglist = (u64 *) page;
947                 }
948                 iommu_batch_initialized = 1;
949         }
950
951         pbm = kzalloc(sizeof(*pbm), GFP_KERNEL);
952         if (!pbm) {
953                 printk(KERN_ERR PFX "Could not allocate pci_pbm_info\n");
954                 goto out_err;
955         }
956
957         iommu = kzalloc(sizeof(struct iommu), GFP_KERNEL);
958         if (!iommu) {
959                 printk(KERN_ERR PFX "Could not allocate pbm iommu\n");
960                 goto out_free_controller;
961         }
962
963         pbm->iommu = iommu;
964
965         err = pci_sun4v_pbm_init(pbm, op, devhandle);
966         if (err)
967                 goto out_free_iommu;
968
969         dev_set_drvdata(&op->dev, pbm);
970
971         return 0;
972
973 out_free_iommu:
974         kfree(pbm->iommu);
975
976 out_free_controller:
977         kfree(pbm);
978
979 out_err:
980         return err;
981 }
982
983 static const struct of_device_id pci_sun4v_match[] = {
984         {
985                 .name = "pci",
986                 .compatible = "SUNW,sun4v-pci",
987         },
988         {},
989 };
990
991 static struct platform_driver pci_sun4v_driver = {
992         .driver = {
993                 .name = DRIVER_NAME,
994                 .of_match_table = pci_sun4v_match,
995         },
996         .probe          = pci_sun4v_probe,
997 };
998
999 static int __init pci_sun4v_init(void)
1000 {
1001         return platform_driver_register(&pci_sun4v_driver);
1002 }
1003
1004 subsys_initcall(pci_sun4v_init);