]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - arch/ia64/kernel/iosapic.c
Merge branch 'cpuidle' into release
[karo-tx-linux.git] / arch / ia64 / kernel / iosapic.c
1 /*
2  * I/O SAPIC support.
3  *
4  * Copyright (C) 1999 Intel Corp.
5  * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
6  * Copyright (C) 2000-2002 J.I. Lee <jung-ik.lee@intel.com>
7  * Copyright (C) 1999-2000, 2002-2003 Hewlett-Packard Co.
8  *      David Mosberger-Tang <davidm@hpl.hp.com>
9  * Copyright (C) 1999 VA Linux Systems
10  * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
11  *
12  * 00/04/19     D. Mosberger    Rewritten to mirror more closely the x86 I/O
13  *                              APIC code.  In particular, we now have separate
14  *                              handlers for edge and level triggered
15  *                              interrupts.
16  * 00/10/27     Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector
17  *                              allocation PCI to vector mapping, shared PCI
18  *                              interrupts.
19  * 00/10/27     D. Mosberger    Document things a bit more to make them more
20  *                              understandable.  Clean up much of the old
21  *                              IOSAPIC cruft.
22  * 01/07/27     J.I. Lee        PCI irq routing, Platform/Legacy interrupts
23  *                              and fixes for ACPI S5(SoftOff) support.
24  * 02/01/23     J.I. Lee        iosapic pgm fixes for PCI irq routing from _PRT
25  * 02/01/07     E. Focht        <efocht@ess.nec.de> Redirectable interrupt
26  *                              vectors in iosapic_set_affinity(),
27  *                              initializations for /proc/irq/#/smp_affinity
28  * 02/04/02     P. Diefenbaugh  Cleaned up ACPI PCI IRQ routing.
29  * 02/04/18     J.I. Lee        bug fix in iosapic_init_pci_irq
30  * 02/04/30     J.I. Lee        bug fix in find_iosapic to fix ACPI PCI IRQ to
31  *                              IOSAPIC mapping error
32  * 02/07/29     T. Kochi        Allocate interrupt vectors dynamically
33  * 02/08/04     T. Kochi        Cleaned up terminology (irq, global system
34  *                              interrupt, vector, etc.)
35  * 02/09/20     D. Mosberger    Simplified by taking advantage of ACPI's
36  *                              pci_irq code.
37  * 03/02/19     B. Helgaas      Make pcat_compat system-wide, not per-IOSAPIC.
38  *                              Remove iosapic_address & gsi_base from
39  *                              external interfaces.  Rationalize
40  *                              __init/__devinit attributes.
41  * 04/12/04 Ashok Raj   <ashok.raj@intel.com> Intel Corporation 2004
42  *                              Updated to work with irq migration necessary
43  *                              for CPU Hotplug
44  */
45 /*
46  * Here is what the interrupt logic between a PCI device and the kernel looks
47  * like:
48  *
49  * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC,
50  *     INTD).  The device is uniquely identified by its bus-, and slot-number
51  *     (the function number does not matter here because all functions share
52  *     the same interrupt lines).
53  *
54  * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC
55  *     controller.  Multiple interrupt lines may have to share the same
56  *     IOSAPIC pin (if they're level triggered and use the same polarity).
57  *     Each interrupt line has a unique Global System Interrupt (GSI) number
58  *     which can be calculated as the sum of the controller's base GSI number
59  *     and the IOSAPIC pin number to which the line connects.
60  *
61  * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the
62  * IOSAPIC pin into the IA-64 interrupt vector.  This interrupt vector is then
63  * sent to the CPU.
64  *
65  * (4) The kernel recognizes an interrupt as an IRQ.  The IRQ interface is
66  *     used as architecture-independent interrupt handling mechanism in Linux.
67  *     As an IRQ is a number, we have to have
68  *     IA-64 interrupt vector number <-> IRQ number mapping.  On smaller
69  *     systems, we use one-to-one mapping between IA-64 vector and IRQ.  A
70  *     platform can implement platform_irq_to_vector(irq) and
71  *     platform_local_vector_to_irq(vector) APIs to differentiate the mapping.
72  *     Please see also arch/ia64/include/asm/hw_irq.h for those APIs.
73  *
74  * To sum up, there are three levels of mappings involved:
75  *
76  *      PCI pin -> global system interrupt (GSI) -> IA-64 vector <-> IRQ
77  *
78  * Note: The term "IRQ" is loosely used everywhere in Linux kernel to
79  * describe interrupts.  Now we use "IRQ" only for Linux IRQ's.  ISA IRQ
80  * (isa_irq) is the only exception in this source code.
81  */
82
83 #include <linux/acpi.h>
84 #include <linux/init.h>
85 #include <linux/irq.h>
86 #include <linux/kernel.h>
87 #include <linux/list.h>
88 #include <linux/pci.h>
89 #include <linux/slab.h>
90 #include <linux/smp.h>
91 #include <linux/string.h>
92 #include <linux/bootmem.h>
93
94 #include <asm/delay.h>
95 #include <asm/hw_irq.h>
96 #include <asm/io.h>
97 #include <asm/iosapic.h>
98 #include <asm/machvec.h>
99 #include <asm/processor.h>
100 #include <asm/ptrace.h>
101
102 #undef DEBUG_INTERRUPT_ROUTING
103
104 #ifdef DEBUG_INTERRUPT_ROUTING
105 #define DBG(fmt...)     printk(fmt)
106 #else
107 #define DBG(fmt...)
108 #endif
109
110 static DEFINE_SPINLOCK(iosapic_lock);
111
112 /*
113  * These tables map IA-64 vectors to the IOSAPIC pin that generates this
114  * vector.
115  */
116
117 #define NO_REF_RTE      0
118
119 static struct iosapic {
120         char __iomem    *addr;          /* base address of IOSAPIC */
121         unsigned int    gsi_base;       /* GSI base */
122         unsigned short  num_rte;        /* # of RTEs on this IOSAPIC */
123         int             rtes_inuse;     /* # of RTEs in use on this IOSAPIC */
124 #ifdef CONFIG_NUMA
125         unsigned short  node;           /* numa node association via pxm */
126 #endif
127         spinlock_t      lock;           /* lock for indirect reg access */
128 } iosapic_lists[NR_IOSAPICS];
129
130 struct iosapic_rte_info {
131         struct list_head rte_list;      /* RTEs sharing the same vector */
132         char            rte_index;      /* IOSAPIC RTE index */
133         int             refcnt;         /* reference counter */
134         struct iosapic  *iosapic;
135 } ____cacheline_aligned;
136
137 static struct iosapic_intr_info {
138         struct list_head rtes;          /* RTEs using this vector (empty =>
139                                          * not an IOSAPIC interrupt) */
140         int             count;          /* # of registered RTEs */
141         u32             low32;          /* current value of low word of
142                                          * Redirection table entry */
143         unsigned int    dest;           /* destination CPU physical ID */
144         unsigned char   dmode   : 3;    /* delivery mode (see iosapic.h) */
145         unsigned char   polarity: 1;    /* interrupt polarity
146                                          * (see iosapic.h) */
147         unsigned char   trigger : 1;    /* trigger mode (see iosapic.h) */
148 } iosapic_intr_info[NR_IRQS];
149
150 static unsigned char pcat_compat;       /* 8259 compatibility flag */
151
152 static inline void
153 iosapic_write(struct iosapic *iosapic, unsigned int reg, u32 val)
154 {
155         unsigned long flags;
156
157         spin_lock_irqsave(&iosapic->lock, flags);
158         __iosapic_write(iosapic->addr, reg, val);
159         spin_unlock_irqrestore(&iosapic->lock, flags);
160 }
161
162 /*
163  * Find an IOSAPIC associated with a GSI
164  */
165 static inline int
166 find_iosapic (unsigned int gsi)
167 {
168         int i;
169
170         for (i = 0; i < NR_IOSAPICS; i++) {
171                 if ((unsigned) (gsi - iosapic_lists[i].gsi_base) <
172                     iosapic_lists[i].num_rte)
173                         return i;
174         }
175
176         return -1;
177 }
178
179 static inline int __gsi_to_irq(unsigned int gsi)
180 {
181         int irq;
182         struct iosapic_intr_info *info;
183         struct iosapic_rte_info *rte;
184
185         for (irq = 0; irq < NR_IRQS; irq++) {
186                 info = &iosapic_intr_info[irq];
187                 list_for_each_entry(rte, &info->rtes, rte_list)
188                         if (rte->iosapic->gsi_base + rte->rte_index == gsi)
189                                 return irq;
190         }
191         return -1;
192 }
193
194 int
195 gsi_to_irq (unsigned int gsi)
196 {
197         unsigned long flags;
198         int irq;
199
200         spin_lock_irqsave(&iosapic_lock, flags);
201         irq = __gsi_to_irq(gsi);
202         spin_unlock_irqrestore(&iosapic_lock, flags);
203         return irq;
204 }
205
206 static struct iosapic_rte_info *find_rte(unsigned int irq, unsigned int gsi)
207 {
208         struct iosapic_rte_info *rte;
209
210         list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list)
211                 if (rte->iosapic->gsi_base + rte->rte_index == gsi)
212                         return rte;
213         return NULL;
214 }
215
216 static void
217 set_rte (unsigned int gsi, unsigned int irq, unsigned int dest, int mask)
218 {
219         unsigned long pol, trigger, dmode;
220         u32 low32, high32;
221         int rte_index;
222         char redir;
223         struct iosapic_rte_info *rte;
224         ia64_vector vector = irq_to_vector(irq);
225
226         DBG(KERN_DEBUG"IOSAPIC: routing vector %d to 0x%x\n", vector, dest);
227
228         rte = find_rte(irq, gsi);
229         if (!rte)
230                 return;         /* not an IOSAPIC interrupt */
231
232         rte_index = rte->rte_index;
233         pol     = iosapic_intr_info[irq].polarity;
234         trigger = iosapic_intr_info[irq].trigger;
235         dmode   = iosapic_intr_info[irq].dmode;
236
237         redir = (dmode == IOSAPIC_LOWEST_PRIORITY) ? 1 : 0;
238
239 #ifdef CONFIG_SMP
240         set_irq_affinity_info(irq, (int)(dest & 0xffff), redir);
241 #endif
242
243         low32 = ((pol << IOSAPIC_POLARITY_SHIFT) |
244                  (trigger << IOSAPIC_TRIGGER_SHIFT) |
245                  (dmode << IOSAPIC_DELIVERY_SHIFT) |
246                  ((mask ? 1 : 0) << IOSAPIC_MASK_SHIFT) |
247                  vector);
248
249         /* dest contains both id and eid */
250         high32 = (dest << IOSAPIC_DEST_SHIFT);
251
252         iosapic_write(rte->iosapic, IOSAPIC_RTE_HIGH(rte_index), high32);
253         iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
254         iosapic_intr_info[irq].low32 = low32;
255         iosapic_intr_info[irq].dest = dest;
256 }
257
258 static void
259 nop (struct irq_data *data)
260 {
261         /* do nothing... */
262 }
263
264
265 #ifdef CONFIG_KEXEC
266 void
267 kexec_disable_iosapic(void)
268 {
269         struct iosapic_intr_info *info;
270         struct iosapic_rte_info *rte;
271         ia64_vector vec;
272         int irq;
273
274         for (irq = 0; irq < NR_IRQS; irq++) {
275                 info = &iosapic_intr_info[irq];
276                 vec = irq_to_vector(irq);
277                 list_for_each_entry(rte, &info->rtes,
278                                 rte_list) {
279                         iosapic_write(rte->iosapic,
280                                         IOSAPIC_RTE_LOW(rte->rte_index),
281                                         IOSAPIC_MASK|vec);
282                         iosapic_eoi(rte->iosapic->addr, vec);
283                 }
284         }
285 }
286 #endif
287
288 static void
289 mask_irq (struct irq_data *data)
290 {
291         unsigned int irq = data->irq;
292         u32 low32;
293         int rte_index;
294         struct iosapic_rte_info *rte;
295
296         if (!iosapic_intr_info[irq].count)
297                 return;                 /* not an IOSAPIC interrupt! */
298
299         /* set only the mask bit */
300         low32 = iosapic_intr_info[irq].low32 |= IOSAPIC_MASK;
301         list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) {
302                 rte_index = rte->rte_index;
303                 iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
304         }
305 }
306
307 static void
308 unmask_irq (struct irq_data *data)
309 {
310         unsigned int irq = data->irq;
311         u32 low32;
312         int rte_index;
313         struct iosapic_rte_info *rte;
314
315         if (!iosapic_intr_info[irq].count)
316                 return;                 /* not an IOSAPIC interrupt! */
317
318         low32 = iosapic_intr_info[irq].low32 &= ~IOSAPIC_MASK;
319         list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) {
320                 rte_index = rte->rte_index;
321                 iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
322         }
323 }
324
325
326 static int
327 iosapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
328                      bool force)
329 {
330 #ifdef CONFIG_SMP
331         unsigned int irq = data->irq;
332         u32 high32, low32;
333         int cpu, dest, rte_index;
334         int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0;
335         struct iosapic_rte_info *rte;
336         struct iosapic *iosapic;
337
338         irq &= (~IA64_IRQ_REDIRECTED);
339
340         cpu = cpumask_first_and(cpu_online_mask, mask);
341         if (cpu >= nr_cpu_ids)
342                 return -1;
343
344         if (irq_prepare_move(irq, cpu))
345                 return -1;
346
347         dest = cpu_physical_id(cpu);
348
349         if (!iosapic_intr_info[irq].count)
350                 return -1;                      /* not an IOSAPIC interrupt */
351
352         set_irq_affinity_info(irq, dest, redir);
353
354         /* dest contains both id and eid */
355         high32 = dest << IOSAPIC_DEST_SHIFT;
356
357         low32 = iosapic_intr_info[irq].low32 & ~(7 << IOSAPIC_DELIVERY_SHIFT);
358         if (redir)
359                 /* change delivery mode to lowest priority */
360                 low32 |= (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
361         else
362                 /* change delivery mode to fixed */
363                 low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT);
364         low32 &= IOSAPIC_VECTOR_MASK;
365         low32 |= irq_to_vector(irq);
366
367         iosapic_intr_info[irq].low32 = low32;
368         iosapic_intr_info[irq].dest = dest;
369         list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) {
370                 iosapic = rte->iosapic;
371                 rte_index = rte->rte_index;
372                 iosapic_write(iosapic, IOSAPIC_RTE_HIGH(rte_index), high32);
373                 iosapic_write(iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
374         }
375
376 #endif
377         return 0;
378 }
379
380 /*
381  * Handlers for level-triggered interrupts.
382  */
383
384 static unsigned int
385 iosapic_startup_level_irq (struct irq_data *data)
386 {
387         unmask_irq(data);
388         return 0;
389 }
390
391 static void
392 iosapic_unmask_level_irq (struct irq_data *data)
393 {
394         unsigned int irq = data->irq;
395         ia64_vector vec = irq_to_vector(irq);
396         struct iosapic_rte_info *rte;
397         int do_unmask_irq = 0;
398
399         irq_complete_move(irq);
400         if (unlikely(irqd_is_setaffinity_pending(data))) {
401                 do_unmask_irq = 1;
402                 mask_irq(data);
403         } else
404                 unmask_irq(data);
405
406         list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list)
407                 iosapic_eoi(rte->iosapic->addr, vec);
408
409         if (unlikely(do_unmask_irq)) {
410                 irq_move_masked_irq(data);
411                 unmask_irq(data);
412         }
413 }
414
415 #define iosapic_shutdown_level_irq      mask_irq
416 #define iosapic_enable_level_irq        unmask_irq
417 #define iosapic_disable_level_irq       mask_irq
418 #define iosapic_ack_level_irq           nop
419
420 static struct irq_chip irq_type_iosapic_level = {
421         .name =                 "IO-SAPIC-level",
422         .irq_startup =          iosapic_startup_level_irq,
423         .irq_shutdown =         iosapic_shutdown_level_irq,
424         .irq_enable =           iosapic_enable_level_irq,
425         .irq_disable =          iosapic_disable_level_irq,
426         .irq_ack =              iosapic_ack_level_irq,
427         .irq_mask =             mask_irq,
428         .irq_unmask =           iosapic_unmask_level_irq,
429         .irq_set_affinity =     iosapic_set_affinity
430 };
431
432 /*
433  * Handlers for edge-triggered interrupts.
434  */
435
436 static unsigned int
437 iosapic_startup_edge_irq (struct irq_data *data)
438 {
439         unmask_irq(data);
440         /*
441          * IOSAPIC simply drops interrupts pended while the
442          * corresponding pin was masked, so we can't know if an
443          * interrupt is pending already.  Let's hope not...
444          */
445         return 0;
446 }
447
448 static void
449 iosapic_ack_edge_irq (struct irq_data *data)
450 {
451         irq_complete_move(data->irq);
452         irq_move_irq(data);
453 }
454
455 #define iosapic_enable_edge_irq         unmask_irq
456 #define iosapic_disable_edge_irq        nop
457
458 static struct irq_chip irq_type_iosapic_edge = {
459         .name =                 "IO-SAPIC-edge",
460         .irq_startup =          iosapic_startup_edge_irq,
461         .irq_shutdown =         iosapic_disable_edge_irq,
462         .irq_enable =           iosapic_enable_edge_irq,
463         .irq_disable =          iosapic_disable_edge_irq,
464         .irq_ack =              iosapic_ack_edge_irq,
465         .irq_mask =             mask_irq,
466         .irq_unmask =           unmask_irq,
467         .irq_set_affinity =     iosapic_set_affinity
468 };
469
470 static unsigned int
471 iosapic_version (char __iomem *addr)
472 {
473         /*
474          * IOSAPIC Version Register return 32 bit structure like:
475          * {
476          *      unsigned int version   : 8;
477          *      unsigned int reserved1 : 8;
478          *      unsigned int max_redir : 8;
479          *      unsigned int reserved2 : 8;
480          * }
481          */
482         return __iosapic_read(addr, IOSAPIC_VERSION);
483 }
484
485 static int iosapic_find_sharable_irq(unsigned long trigger, unsigned long pol)
486 {
487         int i, irq = -ENOSPC, min_count = -1;
488         struct iosapic_intr_info *info;
489
490         /*
491          * shared vectors for edge-triggered interrupts are not
492          * supported yet
493          */
494         if (trigger == IOSAPIC_EDGE)
495                 return -EINVAL;
496
497         for (i = 0; i < NR_IRQS; i++) {
498                 info = &iosapic_intr_info[i];
499                 if (info->trigger == trigger && info->polarity == pol &&
500                     (info->dmode == IOSAPIC_FIXED ||
501                      info->dmode == IOSAPIC_LOWEST_PRIORITY) &&
502                     can_request_irq(i, IRQF_SHARED)) {
503                         if (min_count == -1 || info->count < min_count) {
504                                 irq = i;
505                                 min_count = info->count;
506                         }
507                 }
508         }
509         return irq;
510 }
511
512 /*
513  * if the given vector is already owned by other,
514  *  assign a new vector for the other and make the vector available
515  */
516 static void __init
517 iosapic_reassign_vector (int irq)
518 {
519         int new_irq;
520
521         if (iosapic_intr_info[irq].count) {
522                 new_irq = create_irq();
523                 if (new_irq < 0)
524                         panic("%s: out of interrupt vectors!\n", __func__);
525                 printk(KERN_INFO "Reassigning vector %d to %d\n",
526                        irq_to_vector(irq), irq_to_vector(new_irq));
527                 memcpy(&iosapic_intr_info[new_irq], &iosapic_intr_info[irq],
528                        sizeof(struct iosapic_intr_info));
529                 INIT_LIST_HEAD(&iosapic_intr_info[new_irq].rtes);
530                 list_move(iosapic_intr_info[irq].rtes.next,
531                           &iosapic_intr_info[new_irq].rtes);
532                 memset(&iosapic_intr_info[irq], 0,
533                        sizeof(struct iosapic_intr_info));
534                 iosapic_intr_info[irq].low32 = IOSAPIC_MASK;
535                 INIT_LIST_HEAD(&iosapic_intr_info[irq].rtes);
536         }
537 }
538
539 static inline int irq_is_shared (int irq)
540 {
541         return (iosapic_intr_info[irq].count > 1);
542 }
543
544 struct irq_chip*
545 ia64_native_iosapic_get_irq_chip(unsigned long trigger)
546 {
547         if (trigger == IOSAPIC_EDGE)
548                 return &irq_type_iosapic_edge;
549         else
550                 return &irq_type_iosapic_level;
551 }
552
553 static int
554 register_intr (unsigned int gsi, int irq, unsigned char delivery,
555                unsigned long polarity, unsigned long trigger)
556 {
557         struct irq_chip *chip, *irq_type;
558         int index;
559         struct iosapic_rte_info *rte;
560
561         index = find_iosapic(gsi);
562         if (index < 0) {
563                 printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
564                        __func__, gsi);
565                 return -ENODEV;
566         }
567
568         rte = find_rte(irq, gsi);
569         if (!rte) {
570                 rte = kzalloc(sizeof (*rte), GFP_ATOMIC);
571                 if (!rte) {
572                         printk(KERN_WARNING "%s: cannot allocate memory\n",
573                                __func__);
574                         return -ENOMEM;
575                 }
576
577                 rte->iosapic    = &iosapic_lists[index];
578                 rte->rte_index  = gsi - rte->iosapic->gsi_base;
579                 rte->refcnt++;
580                 list_add_tail(&rte->rte_list, &iosapic_intr_info[irq].rtes);
581                 iosapic_intr_info[irq].count++;
582                 iosapic_lists[index].rtes_inuse++;
583         }
584         else if (rte->refcnt == NO_REF_RTE) {
585                 struct iosapic_intr_info *info = &iosapic_intr_info[irq];
586                 if (info->count > 0 &&
587                     (info->trigger != trigger || info->polarity != polarity)){
588                         printk (KERN_WARNING
589                                 "%s: cannot override the interrupt\n",
590                                 __func__);
591                         return -EINVAL;
592                 }
593                 rte->refcnt++;
594                 iosapic_intr_info[irq].count++;
595                 iosapic_lists[index].rtes_inuse++;
596         }
597
598         iosapic_intr_info[irq].polarity = polarity;
599         iosapic_intr_info[irq].dmode    = delivery;
600         iosapic_intr_info[irq].trigger  = trigger;
601
602         irq_type = iosapic_get_irq_chip(trigger);
603
604         chip = irq_get_chip(irq);
605         if (irq_type != NULL && chip != irq_type) {
606                 if (chip != &no_irq_chip)
607                         printk(KERN_WARNING
608                                "%s: changing vector %d from %s to %s\n",
609                                __func__, irq_to_vector(irq),
610                                chip->name, irq_type->name);
611                 chip = irq_type;
612         }
613         irq_set_chip_handler_name_locked(irq_get_irq_data(irq), chip,
614                 trigger == IOSAPIC_EDGE ? handle_edge_irq : handle_level_irq,
615                 NULL);
616         return 0;
617 }
618
619 static unsigned int
620 get_target_cpu (unsigned int gsi, int irq)
621 {
622 #ifdef CONFIG_SMP
623         static int cpu = -1;
624         extern int cpe_vector;
625         cpumask_t domain = irq_to_domain(irq);
626
627         /*
628          * In case of vector shared by multiple RTEs, all RTEs that
629          * share the vector need to use the same destination CPU.
630          */
631         if (iosapic_intr_info[irq].count)
632                 return iosapic_intr_info[irq].dest;
633
634         /*
635          * If the platform supports redirection via XTP, let it
636          * distribute interrupts.
637          */
638         if (smp_int_redirect & SMP_IRQ_REDIRECTION)
639                 return cpu_physical_id(smp_processor_id());
640
641         /*
642          * Some interrupts (ACPI SCI, for instance) are registered
643          * before the BSP is marked as online.
644          */
645         if (!cpu_online(smp_processor_id()))
646                 return cpu_physical_id(smp_processor_id());
647
648 #ifdef CONFIG_ACPI
649         if (cpe_vector > 0 && irq_to_vector(irq) == IA64_CPEP_VECTOR)
650                 return get_cpei_target_cpu();
651 #endif
652
653 #ifdef CONFIG_NUMA
654         {
655                 int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0;
656                 const struct cpumask *cpu_mask;
657
658                 iosapic_index = find_iosapic(gsi);
659                 if (iosapic_index < 0 ||
660                     iosapic_lists[iosapic_index].node == MAX_NUMNODES)
661                         goto skip_numa_setup;
662
663                 cpu_mask = cpumask_of_node(iosapic_lists[iosapic_index].node);
664                 num_cpus = 0;
665                 for_each_cpu_and(numa_cpu, cpu_mask, &domain) {
666                         if (cpu_online(numa_cpu))
667                                 num_cpus++;
668                 }
669
670                 if (!num_cpus)
671                         goto skip_numa_setup;
672
673                 /* Use irq assignment to distribute across cpus in node */
674                 cpu_index = irq % num_cpus;
675
676                 for_each_cpu_and(numa_cpu, cpu_mask, &domain)
677                         if (cpu_online(numa_cpu) && i++ >= cpu_index)
678                                 break;
679
680                 if (numa_cpu < nr_cpu_ids)
681                         return cpu_physical_id(numa_cpu);
682         }
683 skip_numa_setup:
684 #endif
685         /*
686          * Otherwise, round-robin interrupt vectors across all the
687          * processors.  (It'd be nice if we could be smarter in the
688          * case of NUMA.)
689          */
690         do {
691                 if (++cpu >= nr_cpu_ids)
692                         cpu = 0;
693         } while (!cpu_online(cpu) || !cpumask_test_cpu(cpu, &domain));
694
695         return cpu_physical_id(cpu);
696 #else  /* CONFIG_SMP */
697         return cpu_physical_id(smp_processor_id());
698 #endif
699 }
700
701 static inline unsigned char choose_dmode(void)
702 {
703 #ifdef CONFIG_SMP
704         if (smp_int_redirect & SMP_IRQ_REDIRECTION)
705                 return IOSAPIC_LOWEST_PRIORITY;
706 #endif
707         return IOSAPIC_FIXED;
708 }
709
710 /*
711  * ACPI can describe IOSAPIC interrupts via static tables and namespace
712  * methods.  This provides an interface to register those interrupts and
713  * program the IOSAPIC RTE.
714  */
715 int
716 iosapic_register_intr (unsigned int gsi,
717                        unsigned long polarity, unsigned long trigger)
718 {
719         int irq, mask = 1, err;
720         unsigned int dest;
721         unsigned long flags;
722         struct iosapic_rte_info *rte;
723         u32 low32;
724         unsigned char dmode;
725         struct irq_desc *desc;
726
727         /*
728          * If this GSI has already been registered (i.e., it's a
729          * shared interrupt, or we lost a race to register it),
730          * don't touch the RTE.
731          */
732         spin_lock_irqsave(&iosapic_lock, flags);
733         irq = __gsi_to_irq(gsi);
734         if (irq > 0) {
735                 rte = find_rte(irq, gsi);
736                 if(iosapic_intr_info[irq].count == 0) {
737                         assign_irq_vector(irq);
738                         irq_init_desc(irq);
739                 } else if (rte->refcnt != NO_REF_RTE) {
740                         rte->refcnt++;
741                         goto unlock_iosapic_lock;
742                 }
743         } else
744                 irq = create_irq();
745
746         /* If vector is running out, we try to find a sharable vector */
747         if (irq < 0) {
748                 irq = iosapic_find_sharable_irq(trigger, polarity);
749                 if (irq < 0)
750                         goto unlock_iosapic_lock;
751         }
752
753         desc = irq_to_desc(irq);
754         raw_spin_lock(&desc->lock);
755         dest = get_target_cpu(gsi, irq);
756         dmode = choose_dmode();
757         err = register_intr(gsi, irq, dmode, polarity, trigger);
758         if (err < 0) {
759                 raw_spin_unlock(&desc->lock);
760                 irq = err;
761                 goto unlock_iosapic_lock;
762         }
763
764         /*
765          * If the vector is shared and already unmasked for other
766          * interrupt sources, don't mask it.
767          */
768         low32 = iosapic_intr_info[irq].low32;
769         if (irq_is_shared(irq) && !(low32 & IOSAPIC_MASK))
770                 mask = 0;
771         set_rte(gsi, irq, dest, mask);
772
773         printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n",
774                gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
775                (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
776                cpu_logical_id(dest), dest, irq_to_vector(irq));
777
778         raw_spin_unlock(&desc->lock);
779  unlock_iosapic_lock:
780         spin_unlock_irqrestore(&iosapic_lock, flags);
781         return irq;
782 }
783
784 void
785 iosapic_unregister_intr (unsigned int gsi)
786 {
787         unsigned long flags;
788         int irq, index;
789         u32 low32;
790         unsigned long trigger, polarity;
791         unsigned int dest;
792         struct iosapic_rte_info *rte;
793
794         /*
795          * If the irq associated with the gsi is not found,
796          * iosapic_unregister_intr() is unbalanced. We need to check
797          * this again after getting locks.
798          */
799         irq = gsi_to_irq(gsi);
800         if (irq < 0) {
801                 printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n",
802                        gsi);
803                 WARN_ON(1);
804                 return;
805         }
806
807         spin_lock_irqsave(&iosapic_lock, flags);
808         if ((rte = find_rte(irq, gsi)) == NULL) {
809                 printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n",
810                        gsi);
811                 WARN_ON(1);
812                 goto out;
813         }
814
815         if (--rte->refcnt > 0)
816                 goto out;
817
818         rte->refcnt = NO_REF_RTE;
819
820         /* Mask the interrupt */
821         low32 = iosapic_intr_info[irq].low32 | IOSAPIC_MASK;
822         iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte->rte_index), low32);
823
824         iosapic_intr_info[irq].count--;
825         index = find_iosapic(gsi);
826         iosapic_lists[index].rtes_inuse--;
827         WARN_ON(iosapic_lists[index].rtes_inuse < 0);
828
829         trigger  = iosapic_intr_info[irq].trigger;
830         polarity = iosapic_intr_info[irq].polarity;
831         dest     = iosapic_intr_info[irq].dest;
832         printk(KERN_INFO
833                "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d unregistered\n",
834                gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
835                (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
836                cpu_logical_id(dest), dest, irq_to_vector(irq));
837
838         if (iosapic_intr_info[irq].count == 0) {
839 #ifdef CONFIG_SMP
840                 /* Clear affinity */
841                 cpumask_setall(irq_get_affinity_mask(irq));
842 #endif
843                 /* Clear the interrupt information */
844                 iosapic_intr_info[irq].dest = 0;
845                 iosapic_intr_info[irq].dmode = 0;
846                 iosapic_intr_info[irq].polarity = 0;
847                 iosapic_intr_info[irq].trigger = 0;
848                 iosapic_intr_info[irq].low32 |= IOSAPIC_MASK;
849
850                 /* Destroy and reserve IRQ */
851                 destroy_and_reserve_irq(irq);
852         }
853  out:
854         spin_unlock_irqrestore(&iosapic_lock, flags);
855 }
856
857 /*
858  * ACPI calls this when it finds an entry for a platform interrupt.
859  */
860 int __init
861 iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
862                                 int iosapic_vector, u16 eid, u16 id,
863                                 unsigned long polarity, unsigned long trigger)
864 {
865         static const char * const name[] = {"unknown", "PMI", "INIT", "CPEI"};
866         unsigned char delivery;
867         int irq, vector, mask = 0;
868         unsigned int dest = ((id << 8) | eid) & 0xffff;
869
870         switch (int_type) {
871               case ACPI_INTERRUPT_PMI:
872                 irq = vector = iosapic_vector;
873                 bind_irq_vector(irq, vector, CPU_MASK_ALL);
874                 /*
875                  * since PMI vector is alloc'd by FW(ACPI) not by kernel,
876                  * we need to make sure the vector is available
877                  */
878                 iosapic_reassign_vector(irq);
879                 delivery = IOSAPIC_PMI;
880                 break;
881               case ACPI_INTERRUPT_INIT:
882                 irq = create_irq();
883                 if (irq < 0)
884                         panic("%s: out of interrupt vectors!\n", __func__);
885                 vector = irq_to_vector(irq);
886                 delivery = IOSAPIC_INIT;
887                 break;
888               case ACPI_INTERRUPT_CPEI:
889                 irq = vector = IA64_CPE_VECTOR;
890                 BUG_ON(bind_irq_vector(irq, vector, CPU_MASK_ALL));
891                 delivery = IOSAPIC_FIXED;
892                 mask = 1;
893                 break;
894               default:
895                 printk(KERN_ERR "%s: invalid int type 0x%x\n", __func__,
896                        int_type);
897                 return -1;
898         }
899
900         register_intr(gsi, irq, delivery, polarity, trigger);
901
902         printk(KERN_INFO
903                "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x)"
904                " vector %d\n",
905                int_type < ARRAY_SIZE(name) ? name[int_type] : "unknown",
906                int_type, gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
907                (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
908                cpu_logical_id(dest), dest, vector);
909
910         set_rte(gsi, irq, dest, mask);
911         return vector;
912 }
913
914 /*
915  * ACPI calls this when it finds an entry for a legacy ISA IRQ override.
916  */
917 void iosapic_override_isa_irq(unsigned int isa_irq, unsigned int gsi,
918                               unsigned long polarity, unsigned long trigger)
919 {
920         int vector, irq;
921         unsigned int dest = cpu_physical_id(smp_processor_id());
922         unsigned char dmode;
923
924         irq = vector = isa_irq_to_vector(isa_irq);
925         BUG_ON(bind_irq_vector(irq, vector, CPU_MASK_ALL));
926         dmode = choose_dmode();
927         register_intr(gsi, irq, dmode, polarity, trigger);
928
929         DBG("ISA: IRQ %u -> GSI %u (%s,%s) -> CPU %d (0x%04x) vector %d\n",
930             isa_irq, gsi, trigger == IOSAPIC_EDGE ? "edge" : "level",
931             polarity == IOSAPIC_POL_HIGH ? "high" : "low",
932             cpu_logical_id(dest), dest, vector);
933
934         set_rte(gsi, irq, dest, 1);
935 }
936
937 void __init
938 ia64_native_iosapic_pcat_compat_init(void)
939 {
940         if (pcat_compat) {
941                 /*
942                  * Disable the compatibility mode interrupts (8259 style),
943                  * needs IN/OUT support enabled.
944                  */
945                 printk(KERN_INFO
946                        "%s: Disabling PC-AT compatible 8259 interrupts\n",
947                        __func__);
948                 outb(0xff, 0xA1);
949                 outb(0xff, 0x21);
950         }
951 }
952
953 void __init
954 iosapic_system_init (int system_pcat_compat)
955 {
956         int irq;
957
958         for (irq = 0; irq < NR_IRQS; ++irq) {
959                 iosapic_intr_info[irq].low32 = IOSAPIC_MASK;
960                 /* mark as unused */
961                 INIT_LIST_HEAD(&iosapic_intr_info[irq].rtes);
962
963                 iosapic_intr_info[irq].count = 0;
964         }
965
966         pcat_compat = system_pcat_compat;
967         if (pcat_compat)
968                 iosapic_pcat_compat_init();
969 }
970
971 static inline int
972 iosapic_alloc (void)
973 {
974         int index;
975
976         for (index = 0; index < NR_IOSAPICS; index++)
977                 if (!iosapic_lists[index].addr)
978                         return index;
979
980         printk(KERN_WARNING "%s: failed to allocate iosapic\n", __func__);
981         return -1;
982 }
983
984 static inline void
985 iosapic_free (int index)
986 {
987         memset(&iosapic_lists[index], 0, sizeof(iosapic_lists[0]));
988 }
989
990 static inline int
991 iosapic_check_gsi_range (unsigned int gsi_base, unsigned int ver)
992 {
993         int index;
994         unsigned int gsi_end, base, end;
995
996         /* check gsi range */
997         gsi_end = gsi_base + ((ver >> 16) & 0xff);
998         for (index = 0; index < NR_IOSAPICS; index++) {
999                 if (!iosapic_lists[index].addr)
1000                         continue;
1001
1002                 base = iosapic_lists[index].gsi_base;
1003                 end  = base + iosapic_lists[index].num_rte - 1;
1004
1005                 if (gsi_end < base || end < gsi_base)
1006                         continue; /* OK */
1007
1008                 return -EBUSY;
1009         }
1010         return 0;
1011 }
1012
1013 static int
1014 iosapic_delete_rte(unsigned int irq, unsigned int gsi)
1015 {
1016         struct iosapic_rte_info *rte, *temp;
1017
1018         list_for_each_entry_safe(rte, temp, &iosapic_intr_info[irq].rtes,
1019                                                                 rte_list) {
1020                 if (rte->iosapic->gsi_base + rte->rte_index == gsi) {
1021                         if (rte->refcnt)
1022                                 return -EBUSY;
1023
1024                         list_del(&rte->rte_list);
1025                         kfree(rte);
1026                         return 0;
1027                 }
1028         }
1029
1030         return -EINVAL;
1031 }
1032
1033 int iosapic_init(unsigned long phys_addr, unsigned int gsi_base)
1034 {
1035         int num_rte, err, index;
1036         unsigned int isa_irq, ver;
1037         char __iomem *addr;
1038         unsigned long flags;
1039
1040         spin_lock_irqsave(&iosapic_lock, flags);
1041         index = find_iosapic(gsi_base);
1042         if (index >= 0) {
1043                 spin_unlock_irqrestore(&iosapic_lock, flags);
1044                 return -EBUSY;
1045         }
1046
1047         addr = ioremap(phys_addr, 0);
1048         if (addr == NULL) {
1049                 spin_unlock_irqrestore(&iosapic_lock, flags);
1050                 return -ENOMEM;
1051         }
1052         ver = iosapic_version(addr);
1053         if ((err = iosapic_check_gsi_range(gsi_base, ver))) {
1054                 iounmap(addr);
1055                 spin_unlock_irqrestore(&iosapic_lock, flags);
1056                 return err;
1057         }
1058
1059         /*
1060          * The MAX_REDIR register holds the highest input pin number
1061          * (starting from 0).  We add 1 so that we can use it for
1062          * number of pins (= RTEs)
1063          */
1064         num_rte = ((ver >> 16) & 0xff) + 1;
1065
1066         index = iosapic_alloc();
1067         iosapic_lists[index].addr = addr;
1068         iosapic_lists[index].gsi_base = gsi_base;
1069         iosapic_lists[index].num_rte = num_rte;
1070 #ifdef CONFIG_NUMA
1071         iosapic_lists[index].node = MAX_NUMNODES;
1072 #endif
1073         spin_lock_init(&iosapic_lists[index].lock);
1074         spin_unlock_irqrestore(&iosapic_lock, flags);
1075
1076         if ((gsi_base == 0) && pcat_compat) {
1077                 /*
1078                  * Map the legacy ISA devices into the IOSAPIC data.  Some of
1079                  * these may get reprogrammed later on with data from the ACPI
1080                  * Interrupt Source Override table.
1081                  */
1082                 for (isa_irq = 0; isa_irq < 16; ++isa_irq)
1083                         iosapic_override_isa_irq(isa_irq, isa_irq,
1084                                                  IOSAPIC_POL_HIGH,
1085                                                  IOSAPIC_EDGE);
1086         }
1087         return 0;
1088 }
1089
1090 int iosapic_remove(unsigned int gsi_base)
1091 {
1092         int i, irq, index, err = 0;
1093         unsigned long flags;
1094
1095         spin_lock_irqsave(&iosapic_lock, flags);
1096         index = find_iosapic(gsi_base);
1097         if (index < 0) {
1098                 printk(KERN_WARNING "%s: No IOSAPIC for GSI base %u\n",
1099                        __func__, gsi_base);
1100                 goto out;
1101         }
1102
1103         if (iosapic_lists[index].rtes_inuse) {
1104                 err = -EBUSY;
1105                 printk(KERN_WARNING "%s: IOSAPIC for GSI base %u is busy\n",
1106                        __func__, gsi_base);
1107                 goto out;
1108         }
1109
1110         for (i = gsi_base; i < gsi_base + iosapic_lists[index].num_rte; i++) {
1111                 irq = __gsi_to_irq(i);
1112                 if (irq < 0)
1113                         continue;
1114
1115                 err = iosapic_delete_rte(irq, i);
1116                 if (err)
1117                         goto out;
1118         }
1119
1120         iounmap(iosapic_lists[index].addr);
1121         iosapic_free(index);
1122  out:
1123         spin_unlock_irqrestore(&iosapic_lock, flags);
1124         return err;
1125 }
1126
1127 #ifdef CONFIG_NUMA
1128 void map_iosapic_to_node(unsigned int gsi_base, int node)
1129 {
1130         int index;
1131
1132         index = find_iosapic(gsi_base);
1133         if (index < 0) {
1134                 printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
1135                        __func__, gsi_base);
1136                 return;
1137         }
1138         iosapic_lists[index].node = node;
1139         return;
1140 }
1141 #endif