]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - arch/x86/events/intel/uncore.c
Merge branch 'cec-defines' into for-linus
[karo-tx-linux.git] / arch / x86 / events / intel / uncore.c
1 #include "uncore.h"
2
3 static struct intel_uncore_type *empty_uncore[] = { NULL, };
4 struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
5 struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
6
7 static bool pcidrv_registered;
8 struct pci_driver *uncore_pci_driver;
9 /* pci bus to socket mapping */
10 DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
11 struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
12 struct pci_extra_dev *uncore_extra_pci_dev;
13 static int max_packages;
14
15 /* mask of cpus that collect uncore events */
16 static cpumask_t uncore_cpu_mask;
17
18 /* constraint for the fixed counter */
19 static struct event_constraint uncore_constraint_fixed =
20         EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
21 struct event_constraint uncore_constraint_empty =
22         EVENT_CONSTRAINT(0, 0, 0);
23
24 static int uncore_pcibus_to_physid(struct pci_bus *bus)
25 {
26         struct pci2phy_map *map;
27         int phys_id = -1;
28
29         raw_spin_lock(&pci2phy_map_lock);
30         list_for_each_entry(map, &pci2phy_map_head, list) {
31                 if (map->segment == pci_domain_nr(bus)) {
32                         phys_id = map->pbus_to_physid[bus->number];
33                         break;
34                 }
35         }
36         raw_spin_unlock(&pci2phy_map_lock);
37
38         return phys_id;
39 }
40
41 static void uncore_free_pcibus_map(void)
42 {
43         struct pci2phy_map *map, *tmp;
44
45         list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) {
46                 list_del(&map->list);
47                 kfree(map);
48         }
49 }
50
51 struct pci2phy_map *__find_pci2phy_map(int segment)
52 {
53         struct pci2phy_map *map, *alloc = NULL;
54         int i;
55
56         lockdep_assert_held(&pci2phy_map_lock);
57
58 lookup:
59         list_for_each_entry(map, &pci2phy_map_head, list) {
60                 if (map->segment == segment)
61                         goto end;
62         }
63
64         if (!alloc) {
65                 raw_spin_unlock(&pci2phy_map_lock);
66                 alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
67                 raw_spin_lock(&pci2phy_map_lock);
68
69                 if (!alloc)
70                         return NULL;
71
72                 goto lookup;
73         }
74
75         map = alloc;
76         alloc = NULL;
77         map->segment = segment;
78         for (i = 0; i < 256; i++)
79                 map->pbus_to_physid[i] = -1;
80         list_add_tail(&map->list, &pci2phy_map_head);
81
82 end:
83         kfree(alloc);
84         return map;
85 }
86
87 ssize_t uncore_event_show(struct kobject *kobj,
88                           struct kobj_attribute *attr, char *buf)
89 {
90         struct uncore_event_desc *event =
91                 container_of(attr, struct uncore_event_desc, attr);
92         return sprintf(buf, "%s", event->config);
93 }
94
95 struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
96 {
97         return pmu->boxes[topology_logical_package_id(cpu)];
98 }
99
100 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
101 {
102         u64 count;
103
104         rdmsrl(event->hw.event_base, count);
105
106         return count;
107 }
108
109 /*
110  * generic get constraint function for shared match/mask registers.
111  */
112 struct event_constraint *
113 uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
114 {
115         struct intel_uncore_extra_reg *er;
116         struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
117         struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
118         unsigned long flags;
119         bool ok = false;
120
121         /*
122          * reg->alloc can be set due to existing state, so for fake box we
123          * need to ignore this, otherwise we might fail to allocate proper
124          * fake state for this extra reg constraint.
125          */
126         if (reg1->idx == EXTRA_REG_NONE ||
127             (!uncore_box_is_fake(box) && reg1->alloc))
128                 return NULL;
129
130         er = &box->shared_regs[reg1->idx];
131         raw_spin_lock_irqsave(&er->lock, flags);
132         if (!atomic_read(&er->ref) ||
133             (er->config1 == reg1->config && er->config2 == reg2->config)) {
134                 atomic_inc(&er->ref);
135                 er->config1 = reg1->config;
136                 er->config2 = reg2->config;
137                 ok = true;
138         }
139         raw_spin_unlock_irqrestore(&er->lock, flags);
140
141         if (ok) {
142                 if (!uncore_box_is_fake(box))
143                         reg1->alloc = 1;
144                 return NULL;
145         }
146
147         return &uncore_constraint_empty;
148 }
149
150 void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
151 {
152         struct intel_uncore_extra_reg *er;
153         struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
154
155         /*
156          * Only put constraint if extra reg was actually allocated. Also
157          * takes care of event which do not use an extra shared reg.
158          *
159          * Also, if this is a fake box we shouldn't touch any event state
160          * (reg->alloc) and we don't care about leaving inconsistent box
161          * state either since it will be thrown out.
162          */
163         if (uncore_box_is_fake(box) || !reg1->alloc)
164                 return;
165
166         er = &box->shared_regs[reg1->idx];
167         atomic_dec(&er->ref);
168         reg1->alloc = 0;
169 }
170
171 u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
172 {
173         struct intel_uncore_extra_reg *er;
174         unsigned long flags;
175         u64 config;
176
177         er = &box->shared_regs[idx];
178
179         raw_spin_lock_irqsave(&er->lock, flags);
180         config = er->config;
181         raw_spin_unlock_irqrestore(&er->lock, flags);
182
183         return config;
184 }
185
186 static void uncore_assign_hw_event(struct intel_uncore_box *box,
187                                    struct perf_event *event, int idx)
188 {
189         struct hw_perf_event *hwc = &event->hw;
190
191         hwc->idx = idx;
192         hwc->last_tag = ++box->tags[idx];
193
194         if (hwc->idx == UNCORE_PMC_IDX_FIXED) {
195                 hwc->event_base = uncore_fixed_ctr(box);
196                 hwc->config_base = uncore_fixed_ctl(box);
197                 return;
198         }
199
200         hwc->config_base = uncore_event_ctl(box, hwc->idx);
201         hwc->event_base  = uncore_perf_ctr(box, hwc->idx);
202 }
203
204 void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
205 {
206         u64 prev_count, new_count, delta;
207         int shift;
208
209         if (event->hw.idx >= UNCORE_PMC_IDX_FIXED)
210                 shift = 64 - uncore_fixed_ctr_bits(box);
211         else
212                 shift = 64 - uncore_perf_ctr_bits(box);
213
214         /* the hrtimer might modify the previous event value */
215 again:
216         prev_count = local64_read(&event->hw.prev_count);
217         new_count = uncore_read_counter(box, event);
218         if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
219                 goto again;
220
221         delta = (new_count << shift) - (prev_count << shift);
222         delta >>= shift;
223
224         local64_add(delta, &event->count);
225 }
226
227 /*
228  * The overflow interrupt is unavailable for SandyBridge-EP, is broken
229  * for SandyBridge. So we use hrtimer to periodically poll the counter
230  * to avoid overflow.
231  */
232 static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
233 {
234         struct intel_uncore_box *box;
235         struct perf_event *event;
236         unsigned long flags;
237         int bit;
238
239         box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
240         if (!box->n_active || box->cpu != smp_processor_id())
241                 return HRTIMER_NORESTART;
242         /*
243          * disable local interrupt to prevent uncore_pmu_event_start/stop
244          * to interrupt the update process
245          */
246         local_irq_save(flags);
247
248         /*
249          * handle boxes with an active event list as opposed to active
250          * counters
251          */
252         list_for_each_entry(event, &box->active_list, active_entry) {
253                 uncore_perf_event_update(box, event);
254         }
255
256         for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
257                 uncore_perf_event_update(box, box->events[bit]);
258
259         local_irq_restore(flags);
260
261         hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
262         return HRTIMER_RESTART;
263 }
264
265 void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
266 {
267         hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
268                       HRTIMER_MODE_REL_PINNED);
269 }
270
271 void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
272 {
273         hrtimer_cancel(&box->hrtimer);
274 }
275
276 static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
277 {
278         hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
279         box->hrtimer.function = uncore_pmu_hrtimer;
280 }
281
282 static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
283                                                  int node)
284 {
285         int i, size, numshared = type->num_shared_regs ;
286         struct intel_uncore_box *box;
287
288         size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg);
289
290         box = kzalloc_node(size, GFP_KERNEL, node);
291         if (!box)
292                 return NULL;
293
294         for (i = 0; i < numshared; i++)
295                 raw_spin_lock_init(&box->shared_regs[i].lock);
296
297         uncore_pmu_init_hrtimer(box);
298         box->cpu = -1;
299         box->pci_phys_id = -1;
300         box->pkgid = -1;
301
302         /* set default hrtimer timeout */
303         box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
304
305         INIT_LIST_HEAD(&box->active_list);
306
307         return box;
308 }
309
310 /*
311  * Using uncore_pmu_event_init pmu event_init callback
312  * as a detection point for uncore events.
313  */
314 static int uncore_pmu_event_init(struct perf_event *event);
315
316 static bool is_uncore_event(struct perf_event *event)
317 {
318         return event->pmu->event_init == uncore_pmu_event_init;
319 }
320
321 static int
322 uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
323                       bool dogrp)
324 {
325         struct perf_event *event;
326         int n, max_count;
327
328         max_count = box->pmu->type->num_counters;
329         if (box->pmu->type->fixed_ctl)
330                 max_count++;
331
332         if (box->n_events >= max_count)
333                 return -EINVAL;
334
335         n = box->n_events;
336
337         if (is_uncore_event(leader)) {
338                 box->event_list[n] = leader;
339                 n++;
340         }
341
342         if (!dogrp)
343                 return n;
344
345         list_for_each_entry(event, &leader->sibling_list, group_entry) {
346                 if (!is_uncore_event(event) ||
347                     event->state <= PERF_EVENT_STATE_OFF)
348                         continue;
349
350                 if (n >= max_count)
351                         return -EINVAL;
352
353                 box->event_list[n] = event;
354                 n++;
355         }
356         return n;
357 }
358
359 static struct event_constraint *
360 uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
361 {
362         struct intel_uncore_type *type = box->pmu->type;
363         struct event_constraint *c;
364
365         if (type->ops->get_constraint) {
366                 c = type->ops->get_constraint(box, event);
367                 if (c)
368                         return c;
369         }
370
371         if (event->attr.config == UNCORE_FIXED_EVENT)
372                 return &uncore_constraint_fixed;
373
374         if (type->constraints) {
375                 for_each_event_constraint(c, type->constraints) {
376                         if ((event->hw.config & c->cmask) == c->code)
377                                 return c;
378                 }
379         }
380
381         return &type->unconstrainted;
382 }
383
384 static void uncore_put_event_constraint(struct intel_uncore_box *box,
385                                         struct perf_event *event)
386 {
387         if (box->pmu->type->ops->put_constraint)
388                 box->pmu->type->ops->put_constraint(box, event);
389 }
390
391 static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
392 {
393         unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
394         struct event_constraint *c;
395         int i, wmin, wmax, ret = 0;
396         struct hw_perf_event *hwc;
397
398         bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
399
400         for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
401                 c = uncore_get_event_constraint(box, box->event_list[i]);
402                 box->event_constraint[i] = c;
403                 wmin = min(wmin, c->weight);
404                 wmax = max(wmax, c->weight);
405         }
406
407         /* fastpath, try to reuse previous register */
408         for (i = 0; i < n; i++) {
409                 hwc = &box->event_list[i]->hw;
410                 c = box->event_constraint[i];
411
412                 /* never assigned */
413                 if (hwc->idx == -1)
414                         break;
415
416                 /* constraint still honored */
417                 if (!test_bit(hwc->idx, c->idxmsk))
418                         break;
419
420                 /* not already used */
421                 if (test_bit(hwc->idx, used_mask))
422                         break;
423
424                 __set_bit(hwc->idx, used_mask);
425                 if (assign)
426                         assign[i] = hwc->idx;
427         }
428         /* slow path */
429         if (i != n)
430                 ret = perf_assign_events(box->event_constraint, n,
431                                          wmin, wmax, n, assign);
432
433         if (!assign || ret) {
434                 for (i = 0; i < n; i++)
435                         uncore_put_event_constraint(box, box->event_list[i]);
436         }
437         return ret ? -EINVAL : 0;
438 }
439
440 static void uncore_pmu_event_start(struct perf_event *event, int flags)
441 {
442         struct intel_uncore_box *box = uncore_event_to_box(event);
443         int idx = event->hw.idx;
444
445         if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
446                 return;
447
448         if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
449                 return;
450
451         event->hw.state = 0;
452         box->events[idx] = event;
453         box->n_active++;
454         __set_bit(idx, box->active_mask);
455
456         local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
457         uncore_enable_event(box, event);
458
459         if (box->n_active == 1) {
460                 uncore_enable_box(box);
461                 uncore_pmu_start_hrtimer(box);
462         }
463 }
464
465 static void uncore_pmu_event_stop(struct perf_event *event, int flags)
466 {
467         struct intel_uncore_box *box = uncore_event_to_box(event);
468         struct hw_perf_event *hwc = &event->hw;
469
470         if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
471                 uncore_disable_event(box, event);
472                 box->n_active--;
473                 box->events[hwc->idx] = NULL;
474                 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
475                 hwc->state |= PERF_HES_STOPPED;
476
477                 if (box->n_active == 0) {
478                         uncore_disable_box(box);
479                         uncore_pmu_cancel_hrtimer(box);
480                 }
481         }
482
483         if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
484                 /*
485                  * Drain the remaining delta count out of a event
486                  * that we are disabling:
487                  */
488                 uncore_perf_event_update(box, event);
489                 hwc->state |= PERF_HES_UPTODATE;
490         }
491 }
492
493 static int uncore_pmu_event_add(struct perf_event *event, int flags)
494 {
495         struct intel_uncore_box *box = uncore_event_to_box(event);
496         struct hw_perf_event *hwc = &event->hw;
497         int assign[UNCORE_PMC_IDX_MAX];
498         int i, n, ret;
499
500         if (!box)
501                 return -ENODEV;
502
503         ret = n = uncore_collect_events(box, event, false);
504         if (ret < 0)
505                 return ret;
506
507         hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
508         if (!(flags & PERF_EF_START))
509                 hwc->state |= PERF_HES_ARCH;
510
511         ret = uncore_assign_events(box, assign, n);
512         if (ret)
513                 return ret;
514
515         /* save events moving to new counters */
516         for (i = 0; i < box->n_events; i++) {
517                 event = box->event_list[i];
518                 hwc = &event->hw;
519
520                 if (hwc->idx == assign[i] &&
521                         hwc->last_tag == box->tags[assign[i]])
522                         continue;
523                 /*
524                  * Ensure we don't accidentally enable a stopped
525                  * counter simply because we rescheduled.
526                  */
527                 if (hwc->state & PERF_HES_STOPPED)
528                         hwc->state |= PERF_HES_ARCH;
529
530                 uncore_pmu_event_stop(event, PERF_EF_UPDATE);
531         }
532
533         /* reprogram moved events into new counters */
534         for (i = 0; i < n; i++) {
535                 event = box->event_list[i];
536                 hwc = &event->hw;
537
538                 if (hwc->idx != assign[i] ||
539                         hwc->last_tag != box->tags[assign[i]])
540                         uncore_assign_hw_event(box, event, assign[i]);
541                 else if (i < box->n_events)
542                         continue;
543
544                 if (hwc->state & PERF_HES_ARCH)
545                         continue;
546
547                 uncore_pmu_event_start(event, 0);
548         }
549         box->n_events = n;
550
551         return 0;
552 }
553
554 static void uncore_pmu_event_del(struct perf_event *event, int flags)
555 {
556         struct intel_uncore_box *box = uncore_event_to_box(event);
557         int i;
558
559         uncore_pmu_event_stop(event, PERF_EF_UPDATE);
560
561         for (i = 0; i < box->n_events; i++) {
562                 if (event == box->event_list[i]) {
563                         uncore_put_event_constraint(box, event);
564
565                         for (++i; i < box->n_events; i++)
566                                 box->event_list[i - 1] = box->event_list[i];
567
568                         --box->n_events;
569                         break;
570                 }
571         }
572
573         event->hw.idx = -1;
574         event->hw.last_tag = ~0ULL;
575 }
576
577 void uncore_pmu_event_read(struct perf_event *event)
578 {
579         struct intel_uncore_box *box = uncore_event_to_box(event);
580         uncore_perf_event_update(box, event);
581 }
582
583 /*
584  * validation ensures the group can be loaded onto the
585  * PMU if it was the only group available.
586  */
587 static int uncore_validate_group(struct intel_uncore_pmu *pmu,
588                                 struct perf_event *event)
589 {
590         struct perf_event *leader = event->group_leader;
591         struct intel_uncore_box *fake_box;
592         int ret = -EINVAL, n;
593
594         fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
595         if (!fake_box)
596                 return -ENOMEM;
597
598         fake_box->pmu = pmu;
599         /*
600          * the event is not yet connected with its
601          * siblings therefore we must first collect
602          * existing siblings, then add the new event
603          * before we can simulate the scheduling
604          */
605         n = uncore_collect_events(fake_box, leader, true);
606         if (n < 0)
607                 goto out;
608
609         fake_box->n_events = n;
610         n = uncore_collect_events(fake_box, event, false);
611         if (n < 0)
612                 goto out;
613
614         fake_box->n_events = n;
615
616         ret = uncore_assign_events(fake_box, NULL, n);
617 out:
618         kfree(fake_box);
619         return ret;
620 }
621
622 static int uncore_pmu_event_init(struct perf_event *event)
623 {
624         struct intel_uncore_pmu *pmu;
625         struct intel_uncore_box *box;
626         struct hw_perf_event *hwc = &event->hw;
627         int ret;
628
629         if (event->attr.type != event->pmu->type)
630                 return -ENOENT;
631
632         pmu = uncore_event_to_pmu(event);
633         /* no device found for this pmu */
634         if (pmu->func_id < 0)
635                 return -ENOENT;
636
637         /*
638          * Uncore PMU does measure at all privilege level all the time.
639          * So it doesn't make sense to specify any exclude bits.
640          */
641         if (event->attr.exclude_user || event->attr.exclude_kernel ||
642                         event->attr.exclude_hv || event->attr.exclude_idle)
643                 return -EINVAL;
644
645         /* Sampling not supported yet */
646         if (hwc->sample_period)
647                 return -EINVAL;
648
649         /*
650          * Place all uncore events for a particular physical package
651          * onto a single cpu
652          */
653         if (event->cpu < 0)
654                 return -EINVAL;
655         box = uncore_pmu_to_box(pmu, event->cpu);
656         if (!box || box->cpu < 0)
657                 return -EINVAL;
658         event->cpu = box->cpu;
659         event->pmu_private = box;
660
661         event->hw.idx = -1;
662         event->hw.last_tag = ~0ULL;
663         event->hw.extra_reg.idx = EXTRA_REG_NONE;
664         event->hw.branch_reg.idx = EXTRA_REG_NONE;
665
666         if (event->attr.config == UNCORE_FIXED_EVENT) {
667                 /* no fixed counter */
668                 if (!pmu->type->fixed_ctl)
669                         return -EINVAL;
670                 /*
671                  * if there is only one fixed counter, only the first pmu
672                  * can access the fixed counter
673                  */
674                 if (pmu->type->single_fixed && pmu->pmu_idx > 0)
675                         return -EINVAL;
676
677                 /* fixed counters have event field hardcoded to zero */
678                 hwc->config = 0ULL;
679         } else {
680                 hwc->config = event->attr.config & pmu->type->event_mask;
681                 if (pmu->type->ops->hw_config) {
682                         ret = pmu->type->ops->hw_config(box, event);
683                         if (ret)
684                                 return ret;
685                 }
686         }
687
688         if (event->group_leader != event)
689                 ret = uncore_validate_group(pmu, event);
690         else
691                 ret = 0;
692
693         return ret;
694 }
695
696 static ssize_t uncore_get_attr_cpumask(struct device *dev,
697                                 struct device_attribute *attr, char *buf)
698 {
699         return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
700 }
701
702 static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
703
704 static struct attribute *uncore_pmu_attrs[] = {
705         &dev_attr_cpumask.attr,
706         NULL,
707 };
708
709 static struct attribute_group uncore_pmu_attr_group = {
710         .attrs = uncore_pmu_attrs,
711 };
712
713 static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
714 {
715         int ret;
716
717         if (!pmu->type->pmu) {
718                 pmu->pmu = (struct pmu) {
719                         .attr_groups    = pmu->type->attr_groups,
720                         .task_ctx_nr    = perf_invalid_context,
721                         .event_init     = uncore_pmu_event_init,
722                         .add            = uncore_pmu_event_add,
723                         .del            = uncore_pmu_event_del,
724                         .start          = uncore_pmu_event_start,
725                         .stop           = uncore_pmu_event_stop,
726                         .read           = uncore_pmu_event_read,
727                 };
728         } else {
729                 pmu->pmu = *pmu->type->pmu;
730                 pmu->pmu.attr_groups = pmu->type->attr_groups;
731         }
732
733         if (pmu->type->num_boxes == 1) {
734                 if (strlen(pmu->type->name) > 0)
735                         sprintf(pmu->name, "uncore_%s", pmu->type->name);
736                 else
737                         sprintf(pmu->name, "uncore");
738         } else {
739                 sprintf(pmu->name, "uncore_%s_%d", pmu->type->name,
740                         pmu->pmu_idx);
741         }
742
743         ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
744         if (!ret)
745                 pmu->registered = true;
746         return ret;
747 }
748
749 static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
750 {
751         if (!pmu->registered)
752                 return;
753         perf_pmu_unregister(&pmu->pmu);
754         pmu->registered = false;
755 }
756
757 static void __init __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
758 {
759         struct intel_uncore_pmu *pmu = type->pmus;
760         struct intel_uncore_box *box;
761         int i, pkg;
762
763         if (pmu) {
764                 pkg = topology_physical_package_id(cpu);
765                 for (i = 0; i < type->num_boxes; i++, pmu++) {
766                         box = pmu->boxes[pkg];
767                         if (box)
768                                 uncore_box_exit(box);
769                 }
770         }
771 }
772
773 static void __init uncore_exit_boxes(void *dummy)
774 {
775         struct intel_uncore_type **types;
776
777         for (types = uncore_msr_uncores; *types; types++)
778                 __uncore_exit_boxes(*types++, smp_processor_id());
779 }
780
781 static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
782 {
783         int pkg;
784
785         for (pkg = 0; pkg < max_packages; pkg++)
786                 kfree(pmu->boxes[pkg]);
787         kfree(pmu->boxes);
788 }
789
790 static void __init uncore_type_exit(struct intel_uncore_type *type)
791 {
792         struct intel_uncore_pmu *pmu = type->pmus;
793         int i;
794
795         if (pmu) {
796                 for (i = 0; i < type->num_boxes; i++, pmu++) {
797                         uncore_pmu_unregister(pmu);
798                         uncore_free_boxes(pmu);
799                 }
800                 kfree(type->pmus);
801                 type->pmus = NULL;
802         }
803         kfree(type->events_group);
804         type->events_group = NULL;
805 }
806
807 static void __init uncore_types_exit(struct intel_uncore_type **types)
808 {
809         for (; *types; types++)
810                 uncore_type_exit(*types);
811 }
812
813 static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
814 {
815         struct intel_uncore_pmu *pmus;
816         struct attribute_group *attr_group;
817         struct attribute **attrs;
818         size_t size;
819         int i, j;
820
821         pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL);
822         if (!pmus)
823                 return -ENOMEM;
824
825         size = max_packages * sizeof(struct intel_uncore_box *);
826
827         for (i = 0; i < type->num_boxes; i++) {
828                 pmus[i].func_id = setid ? i : -1;
829                 pmus[i].pmu_idx = i;
830                 pmus[i].type    = type;
831                 pmus[i].boxes   = kzalloc(size, GFP_KERNEL);
832                 if (!pmus[i].boxes)
833                         return -ENOMEM;
834         }
835
836         type->pmus = pmus;
837         type->unconstrainted = (struct event_constraint)
838                 __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
839                                 0, type->num_counters, 0, 0);
840
841         if (type->event_descs) {
842                 for (i = 0; type->event_descs[i].attr.attr.name; i++);
843
844                 attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) +
845                                         sizeof(*attr_group), GFP_KERNEL);
846                 if (!attr_group)
847                         return -ENOMEM;
848
849                 attrs = (struct attribute **)(attr_group + 1);
850                 attr_group->name = "events";
851                 attr_group->attrs = attrs;
852
853                 for (j = 0; j < i; j++)
854                         attrs[j] = &type->event_descs[j].attr.attr;
855
856                 type->events_group = attr_group;
857         }
858
859         type->pmu_group = &uncore_pmu_attr_group;
860         return 0;
861 }
862
863 static int __init
864 uncore_types_init(struct intel_uncore_type **types, bool setid)
865 {
866         int ret;
867
868         for (; *types; types++) {
869                 ret = uncore_type_init(*types, setid);
870                 if (ret)
871                         return ret;
872         }
873         return 0;
874 }
875
876 /*
877  * add a pci uncore device
878  */
879 static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
880 {
881         struct intel_uncore_type *type;
882         struct intel_uncore_pmu *pmu;
883         struct intel_uncore_box *box;
884         int phys_id, pkg, ret;
885
886         phys_id = uncore_pcibus_to_physid(pdev->bus);
887         if (phys_id < 0)
888                 return -ENODEV;
889
890         pkg = topology_phys_to_logical_pkg(phys_id);
891         if (WARN_ON_ONCE(pkg < 0))
892                 return -EINVAL;
893
894         if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
895                 int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
896
897                 uncore_extra_pci_dev[pkg].dev[idx] = pdev;
898                 pci_set_drvdata(pdev, NULL);
899                 return 0;
900         }
901
902         type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
903         /*
904          * for performance monitoring unit with multiple boxes,
905          * each box has a different function id.
906          */
907         pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
908         /* Knights Landing uses a common PCI device ID for multiple instances of
909          * an uncore PMU device type. There is only one entry per device type in
910          * the knl_uncore_pci_ids table inspite of multiple devices present for
911          * some device types. Hence PCI device idx would be 0 for all devices.
912          * So increment pmu pointer to point to an unused array element.
913          */
914         if (boot_cpu_data.x86_model == 87) {
915                 while (pmu->func_id >= 0)
916                         pmu++;
917         }
918
919         if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL))
920                 return -EINVAL;
921
922         box = uncore_alloc_box(type, NUMA_NO_NODE);
923         if (!box)
924                 return -ENOMEM;
925
926         if (pmu->func_id < 0)
927                 pmu->func_id = pdev->devfn;
928         else
929                 WARN_ON_ONCE(pmu->func_id != pdev->devfn);
930
931         atomic_inc(&box->refcnt);
932         box->pci_phys_id = phys_id;
933         box->pkgid = pkg;
934         box->pci_dev = pdev;
935         box->pmu = pmu;
936         uncore_box_init(box);
937         pci_set_drvdata(pdev, box);
938
939         pmu->boxes[pkg] = box;
940         if (atomic_inc_return(&pmu->activeboxes) > 1)
941                 return 0;
942
943         /* First active box registers the pmu */
944         ret = uncore_pmu_register(pmu);
945         if (ret) {
946                 pci_set_drvdata(pdev, NULL);
947                 pmu->boxes[pkg] = NULL;
948                 uncore_box_exit(box);
949                 kfree(box);
950         }
951         return ret;
952 }
953
954 static void uncore_pci_remove(struct pci_dev *pdev)
955 {
956         struct intel_uncore_box *box = pci_get_drvdata(pdev);
957         struct intel_uncore_pmu *pmu;
958         int i, phys_id, pkg;
959
960         phys_id = uncore_pcibus_to_physid(pdev->bus);
961         pkg = topology_phys_to_logical_pkg(phys_id);
962
963         box = pci_get_drvdata(pdev);
964         if (!box) {
965                 for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
966                         if (uncore_extra_pci_dev[pkg].dev[i] == pdev) {
967                                 uncore_extra_pci_dev[pkg].dev[i] = NULL;
968                                 break;
969                         }
970                 }
971                 WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
972                 return;
973         }
974
975         pmu = box->pmu;
976         if (WARN_ON_ONCE(phys_id != box->pci_phys_id))
977                 return;
978
979         pci_set_drvdata(pdev, NULL);
980         pmu->boxes[pkg] = NULL;
981         if (atomic_dec_return(&pmu->activeboxes) == 0)
982                 uncore_pmu_unregister(pmu);
983         uncore_box_exit(box);
984         kfree(box);
985 }
986
987 static int __init uncore_pci_init(void)
988 {
989         size_t size;
990         int ret;
991
992         switch (boot_cpu_data.x86_model) {
993         case 45: /* Sandy Bridge-EP */
994                 ret = snbep_uncore_pci_init();
995                 break;
996         case 62: /* Ivy Bridge-EP */
997                 ret = ivbep_uncore_pci_init();
998                 break;
999         case 63: /* Haswell-EP */
1000                 ret = hswep_uncore_pci_init();
1001                 break;
1002         case 79: /* BDX-EP */
1003         case 86: /* BDX-DE */
1004                 ret = bdx_uncore_pci_init();
1005                 break;
1006         case 42: /* Sandy Bridge */
1007                 ret = snb_uncore_pci_init();
1008                 break;
1009         case 58: /* Ivy Bridge */
1010                 ret = ivb_uncore_pci_init();
1011                 break;
1012         case 60: /* Haswell */
1013         case 69: /* Haswell Celeron */
1014                 ret = hsw_uncore_pci_init();
1015                 break;
1016         case 61: /* Broadwell */
1017                 ret = bdw_uncore_pci_init();
1018                 break;
1019         case 87: /* Knights Landing */
1020                 ret = knl_uncore_pci_init();
1021                 break;
1022         case 94: /* SkyLake */
1023                 ret = skl_uncore_pci_init();
1024                 break;
1025         default:
1026                 return -ENODEV;
1027         }
1028
1029         if (ret)
1030                 return ret;
1031
1032         size = max_packages * sizeof(struct pci_extra_dev);
1033         uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
1034         if (!uncore_extra_pci_dev) {
1035                 ret = -ENOMEM;
1036                 goto err;
1037         }
1038
1039         ret = uncore_types_init(uncore_pci_uncores, false);
1040         if (ret)
1041                 goto errtype;
1042
1043         uncore_pci_driver->probe = uncore_pci_probe;
1044         uncore_pci_driver->remove = uncore_pci_remove;
1045
1046         ret = pci_register_driver(uncore_pci_driver);
1047         if (ret)
1048                 goto errtype;
1049
1050         pcidrv_registered = true;
1051         return 0;
1052
1053 errtype:
1054         uncore_types_exit(uncore_pci_uncores);
1055         kfree(uncore_extra_pci_dev);
1056         uncore_extra_pci_dev = NULL;
1057         uncore_free_pcibus_map();
1058 err:
1059         uncore_pci_uncores = empty_uncore;
1060         return ret;
1061 }
1062
1063 static void __init uncore_pci_exit(void)
1064 {
1065         if (pcidrv_registered) {
1066                 pcidrv_registered = false;
1067                 pci_unregister_driver(uncore_pci_driver);
1068                 uncore_types_exit(uncore_pci_uncores);
1069                 kfree(uncore_extra_pci_dev);
1070                 uncore_free_pcibus_map();
1071         }
1072 }
1073
1074 static void uncore_cpu_dying(int cpu)
1075 {
1076         struct intel_uncore_type *type, **types = uncore_msr_uncores;
1077         struct intel_uncore_pmu *pmu;
1078         struct intel_uncore_box *box;
1079         int i, pkg;
1080
1081         pkg = topology_logical_package_id(cpu);
1082         for (; *types; types++) {
1083                 type = *types;
1084                 pmu = type->pmus;
1085                 for (i = 0; i < type->num_boxes; i++, pmu++) {
1086                         box = pmu->boxes[pkg];
1087                         if (box && atomic_dec_return(&box->refcnt) == 0)
1088                                 uncore_box_exit(box);
1089                 }
1090         }
1091 }
1092
1093 static void uncore_cpu_starting(int cpu, bool init)
1094 {
1095         struct intel_uncore_type *type, **types = uncore_msr_uncores;
1096         struct intel_uncore_pmu *pmu;
1097         struct intel_uncore_box *box;
1098         int i, pkg, ncpus = 1;
1099
1100         if (init) {
1101                 /*
1102                  * On init we get the number of online cpus in the package
1103                  * and set refcount for all of them.
1104                  */
1105                 ncpus = cpumask_weight(topology_core_cpumask(cpu));
1106         }
1107
1108         pkg = topology_logical_package_id(cpu);
1109         for (; *types; types++) {
1110                 type = *types;
1111                 pmu = type->pmus;
1112                 for (i = 0; i < type->num_boxes; i++, pmu++) {
1113                         box = pmu->boxes[pkg];
1114                         if (!box)
1115                                 continue;
1116                         /* The first cpu on a package activates the box */
1117                         if (atomic_add_return(ncpus, &box->refcnt) == ncpus)
1118                                 uncore_box_init(box);
1119                 }
1120         }
1121 }
1122
1123 static int uncore_cpu_prepare(int cpu)
1124 {
1125         struct intel_uncore_type *type, **types = uncore_msr_uncores;
1126         struct intel_uncore_pmu *pmu;
1127         struct intel_uncore_box *box;
1128         int i, pkg;
1129
1130         pkg = topology_logical_package_id(cpu);
1131         for (; *types; types++) {
1132                 type = *types;
1133                 pmu = type->pmus;
1134                 for (i = 0; i < type->num_boxes; i++, pmu++) {
1135                         if (pmu->boxes[pkg])
1136                                 continue;
1137                         /* First cpu of a package allocates the box */
1138                         box = uncore_alloc_box(type, cpu_to_node(cpu));
1139                         if (!box)
1140                                 return -ENOMEM;
1141                         box->pmu = pmu;
1142                         box->pkgid = pkg;
1143                         pmu->boxes[pkg] = box;
1144                 }
1145         }
1146         return 0;
1147 }
1148
1149 static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
1150                                    int new_cpu)
1151 {
1152         struct intel_uncore_pmu *pmu = type->pmus;
1153         struct intel_uncore_box *box;
1154         int i, pkg;
1155
1156         pkg = topology_logical_package_id(old_cpu < 0 ? new_cpu : old_cpu);
1157         for (i = 0; i < type->num_boxes; i++, pmu++) {
1158                 box = pmu->boxes[pkg];
1159                 if (!box)
1160                         continue;
1161
1162                 if (old_cpu < 0) {
1163                         WARN_ON_ONCE(box->cpu != -1);
1164                         box->cpu = new_cpu;
1165                         continue;
1166                 }
1167
1168                 WARN_ON_ONCE(box->cpu != old_cpu);
1169                 box->cpu = -1;
1170                 if (new_cpu < 0)
1171                         continue;
1172
1173                 uncore_pmu_cancel_hrtimer(box);
1174                 perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu);
1175                 box->cpu = new_cpu;
1176         }
1177 }
1178
1179 static void uncore_change_context(struct intel_uncore_type **uncores,
1180                                   int old_cpu, int new_cpu)
1181 {
1182         for (; *uncores; uncores++)
1183                 uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
1184 }
1185
1186 static void uncore_event_exit_cpu(int cpu)
1187 {
1188         int target;
1189
1190         /* Check if exiting cpu is used for collecting uncore events */
1191         if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
1192                 return;
1193
1194         /* Find a new cpu to collect uncore events */
1195         target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
1196
1197         /* Migrate uncore events to the new target */
1198         if (target < nr_cpu_ids)
1199                 cpumask_set_cpu(target, &uncore_cpu_mask);
1200         else
1201                 target = -1;
1202
1203         uncore_change_context(uncore_msr_uncores, cpu, target);
1204         uncore_change_context(uncore_pci_uncores, cpu, target);
1205 }
1206
1207 static void uncore_event_init_cpu(int cpu)
1208 {
1209         int target;
1210
1211         /*
1212          * Check if there is an online cpu in the package
1213          * which collects uncore events already.
1214          */
1215         target = cpumask_any_and(&uncore_cpu_mask, topology_core_cpumask(cpu));
1216         if (target < nr_cpu_ids)
1217                 return;
1218
1219         cpumask_set_cpu(cpu, &uncore_cpu_mask);
1220
1221         uncore_change_context(uncore_msr_uncores, -1, cpu);
1222         uncore_change_context(uncore_pci_uncores, -1, cpu);
1223 }
1224
1225 static int uncore_cpu_notifier(struct notifier_block *self,
1226                                unsigned long action, void *hcpu)
1227 {
1228         unsigned int cpu = (long)hcpu;
1229
1230         switch (action & ~CPU_TASKS_FROZEN) {
1231         case CPU_UP_PREPARE:
1232                 return notifier_from_errno(uncore_cpu_prepare(cpu));
1233
1234         case CPU_STARTING:
1235                 uncore_cpu_starting(cpu, false);
1236         case CPU_DOWN_FAILED:
1237                 uncore_event_init_cpu(cpu);
1238                 break;
1239
1240         case CPU_UP_CANCELED:
1241         case CPU_DYING:
1242                 uncore_cpu_dying(cpu);
1243                 break;
1244
1245         case CPU_DOWN_PREPARE:
1246                 uncore_event_exit_cpu(cpu);
1247                 break;
1248         }
1249         return NOTIFY_OK;
1250 }
1251
1252 static struct notifier_block uncore_cpu_nb = {
1253         .notifier_call  = uncore_cpu_notifier,
1254         /*
1255          * to migrate uncore events, our notifier should be executed
1256          * before perf core's notifier.
1257          */
1258         .priority       = CPU_PRI_PERF + 1,
1259 };
1260
1261 static int __init type_pmu_register(struct intel_uncore_type *type)
1262 {
1263         int i, ret;
1264
1265         for (i = 0; i < type->num_boxes; i++) {
1266                 ret = uncore_pmu_register(&type->pmus[i]);
1267                 if (ret)
1268                         return ret;
1269         }
1270         return 0;
1271 }
1272
1273 static int __init uncore_msr_pmus_register(void)
1274 {
1275         struct intel_uncore_type **types = uncore_msr_uncores;
1276         int ret;
1277
1278         for (; *types; types++) {
1279                 ret = type_pmu_register(*types);
1280                 if (ret)
1281                         return ret;
1282         }
1283         return 0;
1284 }
1285
1286 static int __init uncore_cpu_init(void)
1287 {
1288         int ret;
1289
1290         switch (boot_cpu_data.x86_model) {
1291         case 26: /* Nehalem */
1292         case 30:
1293         case 37: /* Westmere */
1294         case 44:
1295                 nhm_uncore_cpu_init();
1296                 break;
1297         case 42: /* Sandy Bridge */
1298         case 58: /* Ivy Bridge */
1299         case 60: /* Haswell */
1300         case 69: /* Haswell */
1301         case 70: /* Haswell */
1302         case 61: /* Broadwell */
1303         case 71: /* Broadwell */
1304                 snb_uncore_cpu_init();
1305                 break;
1306         case 45: /* Sandy Bridge-EP */
1307                 snbep_uncore_cpu_init();
1308                 break;
1309         case 46: /* Nehalem-EX */
1310         case 47: /* Westmere-EX aka. Xeon E7 */
1311                 nhmex_uncore_cpu_init();
1312                 break;
1313         case 62: /* Ivy Bridge-EP */
1314                 ivbep_uncore_cpu_init();
1315                 break;
1316         case 63: /* Haswell-EP */
1317                 hswep_uncore_cpu_init();
1318                 break;
1319         case 79: /* BDX-EP */
1320         case 86: /* BDX-DE */
1321                 bdx_uncore_cpu_init();
1322                 break;
1323         case 87: /* Knights Landing */
1324                 knl_uncore_cpu_init();
1325                 break;
1326         default:
1327                 return -ENODEV;
1328         }
1329
1330         ret = uncore_types_init(uncore_msr_uncores, true);
1331         if (ret)
1332                 goto err;
1333
1334         ret = uncore_msr_pmus_register();
1335         if (ret)
1336                 goto err;
1337         return 0;
1338 err:
1339         uncore_types_exit(uncore_msr_uncores);
1340         uncore_msr_uncores = empty_uncore;
1341         return ret;
1342 }
1343
1344 static void __init uncore_cpu_setup(void *dummy)
1345 {
1346         uncore_cpu_starting(smp_processor_id(), true);
1347 }
1348
1349 /* Lazy to avoid allocation of a few bytes for the normal case */
1350 static __initdata DECLARE_BITMAP(packages, MAX_LOCAL_APIC);
1351
1352 static int __init uncore_cpumask_init(bool msr)
1353 {
1354         unsigned int cpu;
1355
1356         for_each_online_cpu(cpu) {
1357                 unsigned int pkg = topology_logical_package_id(cpu);
1358                 int ret;
1359
1360                 if (test_and_set_bit(pkg, packages))
1361                         continue;
1362                 /*
1363                  * The first online cpu of each package allocates and takes
1364                  * the refcounts for all other online cpus in that package.
1365                  * If msrs are not enabled no allocation is required.
1366                  */
1367                 if (msr) {
1368                         ret = uncore_cpu_prepare(cpu);
1369                         if (ret)
1370                                 return ret;
1371                 }
1372                 uncore_event_init_cpu(cpu);
1373                 smp_call_function_single(cpu, uncore_cpu_setup, NULL, 1);
1374         }
1375         __register_cpu_notifier(&uncore_cpu_nb);
1376         return 0;
1377 }
1378
1379 static int __init intel_uncore_init(void)
1380 {
1381         int pret, cret, ret;
1382
1383         if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
1384                 return -ENODEV;
1385
1386         if (cpu_has_hypervisor)
1387                 return -ENODEV;
1388
1389         max_packages = topology_max_packages();
1390
1391         pret = uncore_pci_init();
1392         cret = uncore_cpu_init();
1393
1394         if (cret && pret)
1395                 return -ENODEV;
1396
1397         cpu_notifier_register_begin();
1398         ret = uncore_cpumask_init(!cret);
1399         if (ret)
1400                 goto err;
1401         cpu_notifier_register_done();
1402         return 0;
1403
1404 err:
1405         /* Undo box->init_box() */
1406         on_each_cpu_mask(&uncore_cpu_mask, uncore_exit_boxes, NULL, 1);
1407         uncore_types_exit(uncore_msr_uncores);
1408         uncore_pci_exit();
1409         cpu_notifier_register_done();
1410         return ret;
1411 }
1412 device_initcall(intel_uncore_init);