]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - arch/x86/kernel/cpu/perf_event_intel_uncore.c
Merge remote-tracking branch 'input-current/for-linus'
[karo-tx-linux.git] / arch / x86 / kernel / cpu / perf_event_intel_uncore.c
1 #include "perf_event_intel_uncore.h"
2
3 static struct intel_uncore_type *empty_uncore[] = { NULL, };
4 struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
5 struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
6
7 static bool pcidrv_registered;
8 struct pci_driver *uncore_pci_driver;
9 /* pci bus to socket mapping */
10 DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
11 struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
12 struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
13
14 static DEFINE_RAW_SPINLOCK(uncore_box_lock);
15 /* mask of cpus that collect uncore events */
16 static cpumask_t uncore_cpu_mask;
17
18 /* constraint for the fixed counter */
19 static struct event_constraint uncore_constraint_fixed =
20         EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
21 struct event_constraint uncore_constraint_empty =
22         EVENT_CONSTRAINT(0, 0, 0);
23
24 int uncore_pcibus_to_physid(struct pci_bus *bus)
25 {
26         struct pci2phy_map *map;
27         int phys_id = -1;
28
29         raw_spin_lock(&pci2phy_map_lock);
30         list_for_each_entry(map, &pci2phy_map_head, list) {
31                 if (map->segment == pci_domain_nr(bus)) {
32                         phys_id = map->pbus_to_physid[bus->number];
33                         break;
34                 }
35         }
36         raw_spin_unlock(&pci2phy_map_lock);
37
38         return phys_id;
39 }
40
41 struct pci2phy_map *__find_pci2phy_map(int segment)
42 {
43         struct pci2phy_map *map, *alloc = NULL;
44         int i;
45
46         lockdep_assert_held(&pci2phy_map_lock);
47
48 lookup:
49         list_for_each_entry(map, &pci2phy_map_head, list) {
50                 if (map->segment == segment)
51                         goto end;
52         }
53
54         if (!alloc) {
55                 raw_spin_unlock(&pci2phy_map_lock);
56                 alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
57                 raw_spin_lock(&pci2phy_map_lock);
58
59                 if (!alloc)
60                         return NULL;
61
62                 goto lookup;
63         }
64
65         map = alloc;
66         alloc = NULL;
67         map->segment = segment;
68         for (i = 0; i < 256; i++)
69                 map->pbus_to_physid[i] = -1;
70         list_add_tail(&map->list, &pci2phy_map_head);
71
72 end:
73         kfree(alloc);
74         return map;
75 }
76
77 ssize_t uncore_event_show(struct kobject *kobj,
78                           struct kobj_attribute *attr, char *buf)
79 {
80         struct uncore_event_desc *event =
81                 container_of(attr, struct uncore_event_desc, attr);
82         return sprintf(buf, "%s", event->config);
83 }
84
85 struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
86 {
87         return container_of(event->pmu, struct intel_uncore_pmu, pmu);
88 }
89
90 struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
91 {
92         struct intel_uncore_box *box;
93
94         box = *per_cpu_ptr(pmu->box, cpu);
95         if (box)
96                 return box;
97
98         raw_spin_lock(&uncore_box_lock);
99         /* Recheck in lock to handle races. */
100         if (*per_cpu_ptr(pmu->box, cpu))
101                 goto out;
102         list_for_each_entry(box, &pmu->box_list, list) {
103                 if (box->phys_id == topology_physical_package_id(cpu)) {
104                         atomic_inc(&box->refcnt);
105                         *per_cpu_ptr(pmu->box, cpu) = box;
106                         break;
107                 }
108         }
109 out:
110         raw_spin_unlock(&uncore_box_lock);
111
112         return *per_cpu_ptr(pmu->box, cpu);
113 }
114
115 struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
116 {
117         /*
118          * perf core schedules event on the basis of cpu, uncore events are
119          * collected by one of the cpus inside a physical package.
120          */
121         return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id());
122 }
123
124 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
125 {
126         u64 count;
127
128         rdmsrl(event->hw.event_base, count);
129
130         return count;
131 }
132
133 /*
134  * generic get constraint function for shared match/mask registers.
135  */
136 struct event_constraint *
137 uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
138 {
139         struct intel_uncore_extra_reg *er;
140         struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
141         struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
142         unsigned long flags;
143         bool ok = false;
144
145         /*
146          * reg->alloc can be set due to existing state, so for fake box we
147          * need to ignore this, otherwise we might fail to allocate proper
148          * fake state for this extra reg constraint.
149          */
150         if (reg1->idx == EXTRA_REG_NONE ||
151             (!uncore_box_is_fake(box) && reg1->alloc))
152                 return NULL;
153
154         er = &box->shared_regs[reg1->idx];
155         raw_spin_lock_irqsave(&er->lock, flags);
156         if (!atomic_read(&er->ref) ||
157             (er->config1 == reg1->config && er->config2 == reg2->config)) {
158                 atomic_inc(&er->ref);
159                 er->config1 = reg1->config;
160                 er->config2 = reg2->config;
161                 ok = true;
162         }
163         raw_spin_unlock_irqrestore(&er->lock, flags);
164
165         if (ok) {
166                 if (!uncore_box_is_fake(box))
167                         reg1->alloc = 1;
168                 return NULL;
169         }
170
171         return &uncore_constraint_empty;
172 }
173
174 void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
175 {
176         struct intel_uncore_extra_reg *er;
177         struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
178
179         /*
180          * Only put constraint if extra reg was actually allocated. Also
181          * takes care of event which do not use an extra shared reg.
182          *
183          * Also, if this is a fake box we shouldn't touch any event state
184          * (reg->alloc) and we don't care about leaving inconsistent box
185          * state either since it will be thrown out.
186          */
187         if (uncore_box_is_fake(box) || !reg1->alloc)
188                 return;
189
190         er = &box->shared_regs[reg1->idx];
191         atomic_dec(&er->ref);
192         reg1->alloc = 0;
193 }
194
195 u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
196 {
197         struct intel_uncore_extra_reg *er;
198         unsigned long flags;
199         u64 config;
200
201         er = &box->shared_regs[idx];
202
203         raw_spin_lock_irqsave(&er->lock, flags);
204         config = er->config;
205         raw_spin_unlock_irqrestore(&er->lock, flags);
206
207         return config;
208 }
209
210 static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_event *event, int idx)
211 {
212         struct hw_perf_event *hwc = &event->hw;
213
214         hwc->idx = idx;
215         hwc->last_tag = ++box->tags[idx];
216
217         if (hwc->idx == UNCORE_PMC_IDX_FIXED) {
218                 hwc->event_base = uncore_fixed_ctr(box);
219                 hwc->config_base = uncore_fixed_ctl(box);
220                 return;
221         }
222
223         hwc->config_base = uncore_event_ctl(box, hwc->idx);
224         hwc->event_base  = uncore_perf_ctr(box, hwc->idx);
225 }
226
227 void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
228 {
229         u64 prev_count, new_count, delta;
230         int shift;
231
232         if (event->hw.idx >= UNCORE_PMC_IDX_FIXED)
233                 shift = 64 - uncore_fixed_ctr_bits(box);
234         else
235                 shift = 64 - uncore_perf_ctr_bits(box);
236
237         /* the hrtimer might modify the previous event value */
238 again:
239         prev_count = local64_read(&event->hw.prev_count);
240         new_count = uncore_read_counter(box, event);
241         if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
242                 goto again;
243
244         delta = (new_count << shift) - (prev_count << shift);
245         delta >>= shift;
246
247         local64_add(delta, &event->count);
248 }
249
250 /*
251  * The overflow interrupt is unavailable for SandyBridge-EP, is broken
252  * for SandyBridge. So we use hrtimer to periodically poll the counter
253  * to avoid overflow.
254  */
255 static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
256 {
257         struct intel_uncore_box *box;
258         struct perf_event *event;
259         unsigned long flags;
260         int bit;
261
262         box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
263         if (!box->n_active || box->cpu != smp_processor_id())
264                 return HRTIMER_NORESTART;
265         /*
266          * disable local interrupt to prevent uncore_pmu_event_start/stop
267          * to interrupt the update process
268          */
269         local_irq_save(flags);
270
271         /*
272          * handle boxes with an active event list as opposed to active
273          * counters
274          */
275         list_for_each_entry(event, &box->active_list, active_entry) {
276                 uncore_perf_event_update(box, event);
277         }
278
279         for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
280                 uncore_perf_event_update(box, box->events[bit]);
281
282         local_irq_restore(flags);
283
284         hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
285         return HRTIMER_RESTART;
286 }
287
288 void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
289 {
290         hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
291                       HRTIMER_MODE_REL_PINNED);
292 }
293
294 void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
295 {
296         hrtimer_cancel(&box->hrtimer);
297 }
298
299 static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
300 {
301         hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
302         box->hrtimer.function = uncore_pmu_hrtimer;
303 }
304
305 static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node)
306 {
307         struct intel_uncore_box *box;
308         int i, size;
309
310         size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg);
311
312         box = kzalloc_node(size, GFP_KERNEL, node);
313         if (!box)
314                 return NULL;
315
316         for (i = 0; i < type->num_shared_regs; i++)
317                 raw_spin_lock_init(&box->shared_regs[i].lock);
318
319         uncore_pmu_init_hrtimer(box);
320         atomic_set(&box->refcnt, 1);
321         box->cpu = -1;
322         box->phys_id = -1;
323
324         /* set default hrtimer timeout */
325         box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
326
327         INIT_LIST_HEAD(&box->active_list);
328
329         return box;
330 }
331
332 /*
333  * Using uncore_pmu_event_init pmu event_init callback
334  * as a detection point for uncore events.
335  */
336 static int uncore_pmu_event_init(struct perf_event *event);
337
338 static bool is_uncore_event(struct perf_event *event)
339 {
340         return event->pmu->event_init == uncore_pmu_event_init;
341 }
342
343 static int
344 uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp)
345 {
346         struct perf_event *event;
347         int n, max_count;
348
349         max_count = box->pmu->type->num_counters;
350         if (box->pmu->type->fixed_ctl)
351                 max_count++;
352
353         if (box->n_events >= max_count)
354                 return -EINVAL;
355
356         n = box->n_events;
357
358         if (is_uncore_event(leader)) {
359                 box->event_list[n] = leader;
360                 n++;
361         }
362
363         if (!dogrp)
364                 return n;
365
366         list_for_each_entry(event, &leader->sibling_list, group_entry) {
367                 if (!is_uncore_event(event) ||
368                     event->state <= PERF_EVENT_STATE_OFF)
369                         continue;
370
371                 if (n >= max_count)
372                         return -EINVAL;
373
374                 box->event_list[n] = event;
375                 n++;
376         }
377         return n;
378 }
379
380 static struct event_constraint *
381 uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
382 {
383         struct intel_uncore_type *type = box->pmu->type;
384         struct event_constraint *c;
385
386         if (type->ops->get_constraint) {
387                 c = type->ops->get_constraint(box, event);
388                 if (c)
389                         return c;
390         }
391
392         if (event->attr.config == UNCORE_FIXED_EVENT)
393                 return &uncore_constraint_fixed;
394
395         if (type->constraints) {
396                 for_each_event_constraint(c, type->constraints) {
397                         if ((event->hw.config & c->cmask) == c->code)
398                                 return c;
399                 }
400         }
401
402         return &type->unconstrainted;
403 }
404
405 static void uncore_put_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
406 {
407         if (box->pmu->type->ops->put_constraint)
408                 box->pmu->type->ops->put_constraint(box, event);
409 }
410
411 static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
412 {
413         unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
414         struct event_constraint *c;
415         int i, wmin, wmax, ret = 0;
416         struct hw_perf_event *hwc;
417
418         bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
419
420         for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
421                 c = uncore_get_event_constraint(box, box->event_list[i]);
422                 box->event_constraint[i] = c;
423                 wmin = min(wmin, c->weight);
424                 wmax = max(wmax, c->weight);
425         }
426
427         /* fastpath, try to reuse previous register */
428         for (i = 0; i < n; i++) {
429                 hwc = &box->event_list[i]->hw;
430                 c = box->event_constraint[i];
431
432                 /* never assigned */
433                 if (hwc->idx == -1)
434                         break;
435
436                 /* constraint still honored */
437                 if (!test_bit(hwc->idx, c->idxmsk))
438                         break;
439
440                 /* not already used */
441                 if (test_bit(hwc->idx, used_mask))
442                         break;
443
444                 __set_bit(hwc->idx, used_mask);
445                 if (assign)
446                         assign[i] = hwc->idx;
447         }
448         /* slow path */
449         if (i != n)
450                 ret = perf_assign_events(box->event_constraint, n,
451                                          wmin, wmax, n, assign);
452
453         if (!assign || ret) {
454                 for (i = 0; i < n; i++)
455                         uncore_put_event_constraint(box, box->event_list[i]);
456         }
457         return ret ? -EINVAL : 0;
458 }
459
460 static void uncore_pmu_event_start(struct perf_event *event, int flags)
461 {
462         struct intel_uncore_box *box = uncore_event_to_box(event);
463         int idx = event->hw.idx;
464
465         if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
466                 return;
467
468         if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
469                 return;
470
471         event->hw.state = 0;
472         box->events[idx] = event;
473         box->n_active++;
474         __set_bit(idx, box->active_mask);
475
476         local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
477         uncore_enable_event(box, event);
478
479         if (box->n_active == 1) {
480                 uncore_enable_box(box);
481                 uncore_pmu_start_hrtimer(box);
482         }
483 }
484
485 static void uncore_pmu_event_stop(struct perf_event *event, int flags)
486 {
487         struct intel_uncore_box *box = uncore_event_to_box(event);
488         struct hw_perf_event *hwc = &event->hw;
489
490         if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
491                 uncore_disable_event(box, event);
492                 box->n_active--;
493                 box->events[hwc->idx] = NULL;
494                 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
495                 hwc->state |= PERF_HES_STOPPED;
496
497                 if (box->n_active == 0) {
498                         uncore_disable_box(box);
499                         uncore_pmu_cancel_hrtimer(box);
500                 }
501         }
502
503         if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
504                 /*
505                  * Drain the remaining delta count out of a event
506                  * that we are disabling:
507                  */
508                 uncore_perf_event_update(box, event);
509                 hwc->state |= PERF_HES_UPTODATE;
510         }
511 }
512
513 static int uncore_pmu_event_add(struct perf_event *event, int flags)
514 {
515         struct intel_uncore_box *box = uncore_event_to_box(event);
516         struct hw_perf_event *hwc = &event->hw;
517         int assign[UNCORE_PMC_IDX_MAX];
518         int i, n, ret;
519
520         if (!box)
521                 return -ENODEV;
522
523         ret = n = uncore_collect_events(box, event, false);
524         if (ret < 0)
525                 return ret;
526
527         hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
528         if (!(flags & PERF_EF_START))
529                 hwc->state |= PERF_HES_ARCH;
530
531         ret = uncore_assign_events(box, assign, n);
532         if (ret)
533                 return ret;
534
535         /* save events moving to new counters */
536         for (i = 0; i < box->n_events; i++) {
537                 event = box->event_list[i];
538                 hwc = &event->hw;
539
540                 if (hwc->idx == assign[i] &&
541                         hwc->last_tag == box->tags[assign[i]])
542                         continue;
543                 /*
544                  * Ensure we don't accidentally enable a stopped
545                  * counter simply because we rescheduled.
546                  */
547                 if (hwc->state & PERF_HES_STOPPED)
548                         hwc->state |= PERF_HES_ARCH;
549
550                 uncore_pmu_event_stop(event, PERF_EF_UPDATE);
551         }
552
553         /* reprogram moved events into new counters */
554         for (i = 0; i < n; i++) {
555                 event = box->event_list[i];
556                 hwc = &event->hw;
557
558                 if (hwc->idx != assign[i] ||
559                         hwc->last_tag != box->tags[assign[i]])
560                         uncore_assign_hw_event(box, event, assign[i]);
561                 else if (i < box->n_events)
562                         continue;
563
564                 if (hwc->state & PERF_HES_ARCH)
565                         continue;
566
567                 uncore_pmu_event_start(event, 0);
568         }
569         box->n_events = n;
570
571         return 0;
572 }
573
574 static void uncore_pmu_event_del(struct perf_event *event, int flags)
575 {
576         struct intel_uncore_box *box = uncore_event_to_box(event);
577         int i;
578
579         uncore_pmu_event_stop(event, PERF_EF_UPDATE);
580
581         for (i = 0; i < box->n_events; i++) {
582                 if (event == box->event_list[i]) {
583                         uncore_put_event_constraint(box, event);
584
585                         while (++i < box->n_events)
586                                 box->event_list[i - 1] = box->event_list[i];
587
588                         --box->n_events;
589                         break;
590                 }
591         }
592
593         event->hw.idx = -1;
594         event->hw.last_tag = ~0ULL;
595 }
596
597 void uncore_pmu_event_read(struct perf_event *event)
598 {
599         struct intel_uncore_box *box = uncore_event_to_box(event);
600         uncore_perf_event_update(box, event);
601 }
602
603 /*
604  * validation ensures the group can be loaded onto the
605  * PMU if it was the only group available.
606  */
607 static int uncore_validate_group(struct intel_uncore_pmu *pmu,
608                                 struct perf_event *event)
609 {
610         struct perf_event *leader = event->group_leader;
611         struct intel_uncore_box *fake_box;
612         int ret = -EINVAL, n;
613
614         fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
615         if (!fake_box)
616                 return -ENOMEM;
617
618         fake_box->pmu = pmu;
619         /*
620          * the event is not yet connected with its
621          * siblings therefore we must first collect
622          * existing siblings, then add the new event
623          * before we can simulate the scheduling
624          */
625         n = uncore_collect_events(fake_box, leader, true);
626         if (n < 0)
627                 goto out;
628
629         fake_box->n_events = n;
630         n = uncore_collect_events(fake_box, event, false);
631         if (n < 0)
632                 goto out;
633
634         fake_box->n_events = n;
635
636         ret = uncore_assign_events(fake_box, NULL, n);
637 out:
638         kfree(fake_box);
639         return ret;
640 }
641
642 static int uncore_pmu_event_init(struct perf_event *event)
643 {
644         struct intel_uncore_pmu *pmu;
645         struct intel_uncore_box *box;
646         struct hw_perf_event *hwc = &event->hw;
647         int ret;
648
649         if (event->attr.type != event->pmu->type)
650                 return -ENOENT;
651
652         pmu = uncore_event_to_pmu(event);
653         /* no device found for this pmu */
654         if (pmu->func_id < 0)
655                 return -ENOENT;
656
657         /*
658          * Uncore PMU does measure at all privilege level all the time.
659          * So it doesn't make sense to specify any exclude bits.
660          */
661         if (event->attr.exclude_user || event->attr.exclude_kernel ||
662                         event->attr.exclude_hv || event->attr.exclude_idle)
663                 return -EINVAL;
664
665         /* Sampling not supported yet */
666         if (hwc->sample_period)
667                 return -EINVAL;
668
669         /*
670          * Place all uncore events for a particular physical package
671          * onto a single cpu
672          */
673         if (event->cpu < 0)
674                 return -EINVAL;
675         box = uncore_pmu_to_box(pmu, event->cpu);
676         if (!box || box->cpu < 0)
677                 return -EINVAL;
678         event->cpu = box->cpu;
679
680         event->hw.idx = -1;
681         event->hw.last_tag = ~0ULL;
682         event->hw.extra_reg.idx = EXTRA_REG_NONE;
683         event->hw.branch_reg.idx = EXTRA_REG_NONE;
684
685         if (event->attr.config == UNCORE_FIXED_EVENT) {
686                 /* no fixed counter */
687                 if (!pmu->type->fixed_ctl)
688                         return -EINVAL;
689                 /*
690                  * if there is only one fixed counter, only the first pmu
691                  * can access the fixed counter
692                  */
693                 if (pmu->type->single_fixed && pmu->pmu_idx > 0)
694                         return -EINVAL;
695
696                 /* fixed counters have event field hardcoded to zero */
697                 hwc->config = 0ULL;
698         } else {
699                 hwc->config = event->attr.config & pmu->type->event_mask;
700                 if (pmu->type->ops->hw_config) {
701                         ret = pmu->type->ops->hw_config(box, event);
702                         if (ret)
703                                 return ret;
704                 }
705         }
706
707         if (event->group_leader != event)
708                 ret = uncore_validate_group(pmu, event);
709         else
710                 ret = 0;
711
712         return ret;
713 }
714
715 static ssize_t uncore_get_attr_cpumask(struct device *dev,
716                                 struct device_attribute *attr, char *buf)
717 {
718         return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
719 }
720
721 static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
722
723 static struct attribute *uncore_pmu_attrs[] = {
724         &dev_attr_cpumask.attr,
725         NULL,
726 };
727
728 static struct attribute_group uncore_pmu_attr_group = {
729         .attrs = uncore_pmu_attrs,
730 };
731
732 static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
733 {
734         int ret;
735
736         if (!pmu->type->pmu) {
737                 pmu->pmu = (struct pmu) {
738                         .attr_groups    = pmu->type->attr_groups,
739                         .task_ctx_nr    = perf_invalid_context,
740                         .event_init     = uncore_pmu_event_init,
741                         .add            = uncore_pmu_event_add,
742                         .del            = uncore_pmu_event_del,
743                         .start          = uncore_pmu_event_start,
744                         .stop           = uncore_pmu_event_stop,
745                         .read           = uncore_pmu_event_read,
746                 };
747         } else {
748                 pmu->pmu = *pmu->type->pmu;
749                 pmu->pmu.attr_groups = pmu->type->attr_groups;
750         }
751
752         if (pmu->type->num_boxes == 1) {
753                 if (strlen(pmu->type->name) > 0)
754                         sprintf(pmu->name, "uncore_%s", pmu->type->name);
755                 else
756                         sprintf(pmu->name, "uncore");
757         } else {
758                 sprintf(pmu->name, "uncore_%s_%d", pmu->type->name,
759                         pmu->pmu_idx);
760         }
761
762         ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
763         return ret;
764 }
765
766 static void __init uncore_type_exit(struct intel_uncore_type *type)
767 {
768         int i;
769
770         for (i = 0; i < type->num_boxes; i++)
771                 free_percpu(type->pmus[i].box);
772         kfree(type->pmus);
773         type->pmus = NULL;
774         kfree(type->events_group);
775         type->events_group = NULL;
776 }
777
778 static void __init uncore_types_exit(struct intel_uncore_type **types)
779 {
780         int i;
781         for (i = 0; types[i]; i++)
782                 uncore_type_exit(types[i]);
783 }
784
785 static int __init uncore_type_init(struct intel_uncore_type *type)
786 {
787         struct intel_uncore_pmu *pmus;
788         struct attribute_group *attr_group;
789         struct attribute **attrs;
790         int i, j;
791
792         pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL);
793         if (!pmus)
794                 return -ENOMEM;
795
796         type->pmus = pmus;
797
798         type->unconstrainted = (struct event_constraint)
799                 __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
800                                 0, type->num_counters, 0, 0);
801
802         for (i = 0; i < type->num_boxes; i++) {
803                 pmus[i].func_id = -1;
804                 pmus[i].pmu_idx = i;
805                 pmus[i].type = type;
806                 INIT_LIST_HEAD(&pmus[i].box_list);
807                 pmus[i].box = alloc_percpu(struct intel_uncore_box *);
808                 if (!pmus[i].box)
809                         goto fail;
810         }
811
812         if (type->event_descs) {
813                 i = 0;
814                 while (type->event_descs[i].attr.attr.name)
815                         i++;
816
817                 attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) +
818                                         sizeof(*attr_group), GFP_KERNEL);
819                 if (!attr_group)
820                         goto fail;
821
822                 attrs = (struct attribute **)(attr_group + 1);
823                 attr_group->name = "events";
824                 attr_group->attrs = attrs;
825
826                 for (j = 0; j < i; j++)
827                         attrs[j] = &type->event_descs[j].attr.attr;
828
829                 type->events_group = attr_group;
830         }
831
832         type->pmu_group = &uncore_pmu_attr_group;
833         return 0;
834 fail:
835         uncore_type_exit(type);
836         return -ENOMEM;
837 }
838
839 static int __init uncore_types_init(struct intel_uncore_type **types)
840 {
841         int i, ret;
842
843         for (i = 0; types[i]; i++) {
844                 ret = uncore_type_init(types[i]);
845                 if (ret)
846                         goto fail;
847         }
848         return 0;
849 fail:
850         while (--i >= 0)
851                 uncore_type_exit(types[i]);
852         return ret;
853 }
854
855 /*
856  * add a pci uncore device
857  */
858 static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
859 {
860         struct intel_uncore_pmu *pmu;
861         struct intel_uncore_box *box;
862         struct intel_uncore_type *type;
863         int phys_id;
864         bool first_box = false;
865
866         phys_id = uncore_pcibus_to_physid(pdev->bus);
867         if (phys_id < 0)
868                 return -ENODEV;
869
870         if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
871                 int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
872                 uncore_extra_pci_dev[phys_id][idx] = pdev;
873                 pci_set_drvdata(pdev, NULL);
874                 return 0;
875         }
876
877         type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
878         box = uncore_alloc_box(type, NUMA_NO_NODE);
879         if (!box)
880                 return -ENOMEM;
881
882         /*
883          * for performance monitoring unit with multiple boxes,
884          * each box has a different function id.
885          */
886         pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
887         if (pmu->func_id < 0)
888                 pmu->func_id = pdev->devfn;
889         else
890                 WARN_ON_ONCE(pmu->func_id != pdev->devfn);
891
892         box->phys_id = phys_id;
893         box->pci_dev = pdev;
894         box->pmu = pmu;
895         uncore_box_init(box);
896         pci_set_drvdata(pdev, box);
897
898         raw_spin_lock(&uncore_box_lock);
899         if (list_empty(&pmu->box_list))
900                 first_box = true;
901         list_add_tail(&box->list, &pmu->box_list);
902         raw_spin_unlock(&uncore_box_lock);
903
904         if (first_box)
905                 uncore_pmu_register(pmu);
906         return 0;
907 }
908
909 static void uncore_pci_remove(struct pci_dev *pdev)
910 {
911         struct intel_uncore_box *box = pci_get_drvdata(pdev);
912         struct intel_uncore_pmu *pmu;
913         int i, cpu, phys_id;
914         bool last_box = false;
915
916         phys_id = uncore_pcibus_to_physid(pdev->bus);
917         box = pci_get_drvdata(pdev);
918         if (!box) {
919                 for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
920                         if (uncore_extra_pci_dev[phys_id][i] == pdev) {
921                                 uncore_extra_pci_dev[phys_id][i] = NULL;
922                                 break;
923                         }
924                 }
925                 WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
926                 return;
927         }
928
929         pmu = box->pmu;
930         if (WARN_ON_ONCE(phys_id != box->phys_id))
931                 return;
932
933         pci_set_drvdata(pdev, NULL);
934
935         raw_spin_lock(&uncore_box_lock);
936         list_del(&box->list);
937         if (list_empty(&pmu->box_list))
938                 last_box = true;
939         raw_spin_unlock(&uncore_box_lock);
940
941         for_each_possible_cpu(cpu) {
942                 if (*per_cpu_ptr(pmu->box, cpu) == box) {
943                         *per_cpu_ptr(pmu->box, cpu) = NULL;
944                         atomic_dec(&box->refcnt);
945                 }
946         }
947
948         WARN_ON_ONCE(atomic_read(&box->refcnt) != 1);
949         kfree(box);
950
951         if (last_box)
952                 perf_pmu_unregister(&pmu->pmu);
953 }
954
955 static int __init uncore_pci_init(void)
956 {
957         int ret;
958
959         switch (boot_cpu_data.x86_model) {
960         case 45: /* Sandy Bridge-EP */
961                 ret = snbep_uncore_pci_init();
962                 break;
963         case 62: /* Ivy Bridge-EP */
964                 ret = ivbep_uncore_pci_init();
965                 break;
966         case 63: /* Haswell-EP */
967                 ret = hswep_uncore_pci_init();
968                 break;
969         case 86: /* BDX-DE */
970                 ret = bdx_uncore_pci_init();
971                 break;
972         case 42: /* Sandy Bridge */
973                 ret = snb_uncore_pci_init();
974                 break;
975         case 58: /* Ivy Bridge */
976                 ret = ivb_uncore_pci_init();
977                 break;
978         case 60: /* Haswell */
979         case 69: /* Haswell Celeron */
980                 ret = hsw_uncore_pci_init();
981                 break;
982         case 61: /* Broadwell */
983                 ret = bdw_uncore_pci_init();
984                 break;
985         default:
986                 return 0;
987         }
988
989         if (ret)
990                 return ret;
991
992         ret = uncore_types_init(uncore_pci_uncores);
993         if (ret)
994                 return ret;
995
996         uncore_pci_driver->probe = uncore_pci_probe;
997         uncore_pci_driver->remove = uncore_pci_remove;
998
999         ret = pci_register_driver(uncore_pci_driver);
1000         if (ret == 0)
1001                 pcidrv_registered = true;
1002         else
1003                 uncore_types_exit(uncore_pci_uncores);
1004
1005         return ret;
1006 }
1007
1008 static void __init uncore_pci_exit(void)
1009 {
1010         if (pcidrv_registered) {
1011                 pcidrv_registered = false;
1012                 pci_unregister_driver(uncore_pci_driver);
1013                 uncore_types_exit(uncore_pci_uncores);
1014         }
1015 }
1016
1017 /* CPU hot plug/unplug are serialized by cpu_add_remove_lock mutex */
1018 static LIST_HEAD(boxes_to_free);
1019
1020 static void uncore_kfree_boxes(void)
1021 {
1022         struct intel_uncore_box *box;
1023
1024         while (!list_empty(&boxes_to_free)) {
1025                 box = list_entry(boxes_to_free.next,
1026                                  struct intel_uncore_box, list);
1027                 list_del(&box->list);
1028                 kfree(box);
1029         }
1030 }
1031
1032 static void uncore_cpu_dying(int cpu)
1033 {
1034         struct intel_uncore_type *type;
1035         struct intel_uncore_pmu *pmu;
1036         struct intel_uncore_box *box;
1037         int i, j;
1038
1039         for (i = 0; uncore_msr_uncores[i]; i++) {
1040                 type = uncore_msr_uncores[i];
1041                 for (j = 0; j < type->num_boxes; j++) {
1042                         pmu = &type->pmus[j];
1043                         box = *per_cpu_ptr(pmu->box, cpu);
1044                         *per_cpu_ptr(pmu->box, cpu) = NULL;
1045                         if (box && atomic_dec_and_test(&box->refcnt))
1046                                 list_add(&box->list, &boxes_to_free);
1047                 }
1048         }
1049 }
1050
1051 static int uncore_cpu_starting(int cpu)
1052 {
1053         struct intel_uncore_type *type;
1054         struct intel_uncore_pmu *pmu;
1055         struct intel_uncore_box *box, *exist;
1056         int i, j, k, phys_id;
1057
1058         phys_id = topology_physical_package_id(cpu);
1059
1060         for (i = 0; uncore_msr_uncores[i]; i++) {
1061                 type = uncore_msr_uncores[i];
1062                 for (j = 0; j < type->num_boxes; j++) {
1063                         pmu = &type->pmus[j];
1064                         box = *per_cpu_ptr(pmu->box, cpu);
1065                         /* called by uncore_cpu_init? */
1066                         if (box && box->phys_id >= 0) {
1067                                 uncore_box_init(box);
1068                                 continue;
1069                         }
1070
1071                         for_each_online_cpu(k) {
1072                                 exist = *per_cpu_ptr(pmu->box, k);
1073                                 if (exist && exist->phys_id == phys_id) {
1074                                         atomic_inc(&exist->refcnt);
1075                                         *per_cpu_ptr(pmu->box, cpu) = exist;
1076                                         if (box) {
1077                                                 list_add(&box->list,
1078                                                          &boxes_to_free);
1079                                                 box = NULL;
1080                                         }
1081                                         break;
1082                                 }
1083                         }
1084
1085                         if (box) {
1086                                 box->phys_id = phys_id;
1087                                 uncore_box_init(box);
1088                         }
1089                 }
1090         }
1091         return 0;
1092 }
1093
1094 static int uncore_cpu_prepare(int cpu, int phys_id)
1095 {
1096         struct intel_uncore_type *type;
1097         struct intel_uncore_pmu *pmu;
1098         struct intel_uncore_box *box;
1099         int i, j;
1100
1101         for (i = 0; uncore_msr_uncores[i]; i++) {
1102                 type = uncore_msr_uncores[i];
1103                 for (j = 0; j < type->num_boxes; j++) {
1104                         pmu = &type->pmus[j];
1105                         if (pmu->func_id < 0)
1106                                 pmu->func_id = j;
1107
1108                         box = uncore_alloc_box(type, cpu_to_node(cpu));
1109                         if (!box)
1110                                 return -ENOMEM;
1111
1112                         box->pmu = pmu;
1113                         box->phys_id = phys_id;
1114                         *per_cpu_ptr(pmu->box, cpu) = box;
1115                 }
1116         }
1117         return 0;
1118 }
1119
1120 static void
1121 uncore_change_context(struct intel_uncore_type **uncores, int old_cpu, int new_cpu)
1122 {
1123         struct intel_uncore_type *type;
1124         struct intel_uncore_pmu *pmu;
1125         struct intel_uncore_box *box;
1126         int i, j;
1127
1128         for (i = 0; uncores[i]; i++) {
1129                 type = uncores[i];
1130                 for (j = 0; j < type->num_boxes; j++) {
1131                         pmu = &type->pmus[j];
1132                         if (old_cpu < 0)
1133                                 box = uncore_pmu_to_box(pmu, new_cpu);
1134                         else
1135                                 box = uncore_pmu_to_box(pmu, old_cpu);
1136                         if (!box)
1137                                 continue;
1138
1139                         if (old_cpu < 0) {
1140                                 WARN_ON_ONCE(box->cpu != -1);
1141                                 box->cpu = new_cpu;
1142                                 continue;
1143                         }
1144
1145                         WARN_ON_ONCE(box->cpu != old_cpu);
1146                         if (new_cpu >= 0) {
1147                                 uncore_pmu_cancel_hrtimer(box);
1148                                 perf_pmu_migrate_context(&pmu->pmu,
1149                                                 old_cpu, new_cpu);
1150                                 box->cpu = new_cpu;
1151                         } else {
1152                                 box->cpu = -1;
1153                         }
1154                 }
1155         }
1156 }
1157
1158 static void uncore_event_exit_cpu(int cpu)
1159 {
1160         int i, phys_id, target;
1161
1162         /* if exiting cpu is used for collecting uncore events */
1163         if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
1164                 return;
1165
1166         /* find a new cpu to collect uncore events */
1167         phys_id = topology_physical_package_id(cpu);
1168         target = -1;
1169         for_each_online_cpu(i) {
1170                 if (i == cpu)
1171                         continue;
1172                 if (phys_id == topology_physical_package_id(i)) {
1173                         target = i;
1174                         break;
1175                 }
1176         }
1177
1178         /* migrate uncore events to the new cpu */
1179         if (target >= 0)
1180                 cpumask_set_cpu(target, &uncore_cpu_mask);
1181
1182         uncore_change_context(uncore_msr_uncores, cpu, target);
1183         uncore_change_context(uncore_pci_uncores, cpu, target);
1184 }
1185
1186 static void uncore_event_init_cpu(int cpu)
1187 {
1188         int i, phys_id;
1189
1190         phys_id = topology_physical_package_id(cpu);
1191         for_each_cpu(i, &uncore_cpu_mask) {
1192                 if (phys_id == topology_physical_package_id(i))
1193                         return;
1194         }
1195
1196         cpumask_set_cpu(cpu, &uncore_cpu_mask);
1197
1198         uncore_change_context(uncore_msr_uncores, -1, cpu);
1199         uncore_change_context(uncore_pci_uncores, -1, cpu);
1200 }
1201
1202 static int uncore_cpu_notifier(struct notifier_block *self,
1203                                unsigned long action, void *hcpu)
1204 {
1205         unsigned int cpu = (long)hcpu;
1206
1207         /* allocate/free data structure for uncore box */
1208         switch (action & ~CPU_TASKS_FROZEN) {
1209         case CPU_UP_PREPARE:
1210                 uncore_cpu_prepare(cpu, -1);
1211                 break;
1212         case CPU_STARTING:
1213                 uncore_cpu_starting(cpu);
1214                 break;
1215         case CPU_UP_CANCELED:
1216         case CPU_DYING:
1217                 uncore_cpu_dying(cpu);
1218                 break;
1219         case CPU_ONLINE:
1220         case CPU_DEAD:
1221                 uncore_kfree_boxes();
1222                 break;
1223         default:
1224                 break;
1225         }
1226
1227         /* select the cpu that collects uncore events */
1228         switch (action & ~CPU_TASKS_FROZEN) {
1229         case CPU_DOWN_FAILED:
1230         case CPU_STARTING:
1231                 uncore_event_init_cpu(cpu);
1232                 break;
1233         case CPU_DOWN_PREPARE:
1234                 uncore_event_exit_cpu(cpu);
1235                 break;
1236         default:
1237                 break;
1238         }
1239
1240         return NOTIFY_OK;
1241 }
1242
1243 static struct notifier_block uncore_cpu_nb = {
1244         .notifier_call  = uncore_cpu_notifier,
1245         /*
1246          * to migrate uncore events, our notifier should be executed
1247          * before perf core's notifier.
1248          */
1249         .priority       = CPU_PRI_PERF + 1,
1250 };
1251
1252 static void __init uncore_cpu_setup(void *dummy)
1253 {
1254         uncore_cpu_starting(smp_processor_id());
1255 }
1256
1257 static int __init uncore_cpu_init(void)
1258 {
1259         int ret;
1260
1261         switch (boot_cpu_data.x86_model) {
1262         case 26: /* Nehalem */
1263         case 30:
1264         case 37: /* Westmere */
1265         case 44:
1266                 nhm_uncore_cpu_init();
1267                 break;
1268         case 42: /* Sandy Bridge */
1269         case 58: /* Ivy Bridge */
1270         case 60: /* Haswell */
1271         case 69: /* Haswell */
1272         case 70: /* Haswell */
1273         case 61: /* Broadwell */
1274         case 71: /* Broadwell */
1275                 snb_uncore_cpu_init();
1276                 break;
1277         case 45: /* Sandy Bridge-EP */
1278                 snbep_uncore_cpu_init();
1279                 break;
1280         case 46: /* Nehalem-EX */
1281         case 47: /* Westmere-EX aka. Xeon E7 */
1282                 nhmex_uncore_cpu_init();
1283                 break;
1284         case 62: /* Ivy Bridge-EP */
1285                 ivbep_uncore_cpu_init();
1286                 break;
1287         case 63: /* Haswell-EP */
1288                 hswep_uncore_cpu_init();
1289                 break;
1290         case 86: /* BDX-DE */
1291                 bdx_uncore_cpu_init();
1292                 break;
1293         default:
1294                 return 0;
1295         }
1296
1297         ret = uncore_types_init(uncore_msr_uncores);
1298         if (ret)
1299                 return ret;
1300
1301         return 0;
1302 }
1303
1304 static int __init uncore_pmus_register(void)
1305 {
1306         struct intel_uncore_pmu *pmu;
1307         struct intel_uncore_type *type;
1308         int i, j;
1309
1310         for (i = 0; uncore_msr_uncores[i]; i++) {
1311                 type = uncore_msr_uncores[i];
1312                 for (j = 0; j < type->num_boxes; j++) {
1313                         pmu = &type->pmus[j];
1314                         uncore_pmu_register(pmu);
1315                 }
1316         }
1317
1318         return 0;
1319 }
1320
1321 static void __init uncore_cpumask_init(void)
1322 {
1323         int cpu;
1324
1325         /*
1326          * ony invoke once from msr or pci init code
1327          */
1328         if (!cpumask_empty(&uncore_cpu_mask))
1329                 return;
1330
1331         cpu_notifier_register_begin();
1332
1333         for_each_online_cpu(cpu) {
1334                 int i, phys_id = topology_physical_package_id(cpu);
1335
1336                 for_each_cpu(i, &uncore_cpu_mask) {
1337                         if (phys_id == topology_physical_package_id(i)) {
1338                                 phys_id = -1;
1339                                 break;
1340                         }
1341                 }
1342                 if (phys_id < 0)
1343                         continue;
1344
1345                 uncore_cpu_prepare(cpu, phys_id);
1346                 uncore_event_init_cpu(cpu);
1347         }
1348         on_each_cpu(uncore_cpu_setup, NULL, 1);
1349
1350         __register_cpu_notifier(&uncore_cpu_nb);
1351
1352         cpu_notifier_register_done();
1353 }
1354
1355
1356 static int __init intel_uncore_init(void)
1357 {
1358         int ret;
1359
1360         if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
1361                 return -ENODEV;
1362
1363         if (cpu_has_hypervisor)
1364                 return -ENODEV;
1365
1366         ret = uncore_pci_init();
1367         if (ret)
1368                 goto fail;
1369         ret = uncore_cpu_init();
1370         if (ret) {
1371                 uncore_pci_exit();
1372                 goto fail;
1373         }
1374         uncore_cpumask_init();
1375
1376         uncore_pmus_register();
1377         return 0;
1378 fail:
1379         return ret;
1380 }
1381 device_initcall(intel_uncore_init);