]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - arch/powerpc/perf/hv-24x7.c
Merge remote-tracking branch 'powerpc/next'
[karo-tx-linux.git] / arch / powerpc / perf / hv-24x7.c
1 /*
2  * Hypervisor supplied "24x7" performance counter support
3  *
4  * Author: Cody P Schafer <cody@linux.vnet.ibm.com>
5  * Copyright 2014 IBM Corporation.
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version
10  * 2 of the License, or (at your option) any later version.
11  */
12
13 #define pr_fmt(fmt) "hv-24x7: " fmt
14
15 #include <linux/perf_event.h>
16 #include <linux/rbtree.h>
17 #include <linux/module.h>
18 #include <linux/slab.h>
19 #include <linux/vmalloc.h>
20
21 #include <asm/firmware.h>
22 #include <asm/hvcall.h>
23 #include <asm/io.h>
24 #include <linux/byteorder/generic.h>
25
26 #include "hv-24x7.h"
27 #include "hv-24x7-catalog.h"
28 #include "hv-common.h"
29
30 static const char *event_domain_suffix(unsigned domain)
31 {
32         switch (domain) {
33 #define DOMAIN(n, v, x, c)              \
34         case HV_PERF_DOMAIN_##n:        \
35                 return "__" #n;
36 #include "hv-24x7-domains.h"
37 #undef DOMAIN
38         default:
39                 WARN(1, "unknown domain %d\n", domain);
40                 return "__UNKNOWN_DOMAIN_SUFFIX";
41         }
42 }
43
44 static bool domain_is_valid(unsigned domain)
45 {
46         switch (domain) {
47 #define DOMAIN(n, v, x, c)              \
48         case HV_PERF_DOMAIN_##n:        \
49                 /* fall through */
50 #include "hv-24x7-domains.h"
51 #undef DOMAIN
52                 return true;
53         default:
54                 return false;
55         }
56 }
57
58 static bool is_physical_domain(unsigned domain)
59 {
60         switch (domain) {
61 #define DOMAIN(n, v, x, c)              \
62         case HV_PERF_DOMAIN_##n:        \
63                 return c;
64 #include "hv-24x7-domains.h"
65 #undef DOMAIN
66         default:
67                 return false;
68         }
69 }
70
71 static bool catalog_entry_domain_is_valid(unsigned domain)
72 {
73         return is_physical_domain(domain);
74 }
75
76 /*
77  * TODO: Merging events:
78  * - Think of the hcall as an interface to a 4d array of counters:
79  *   - x = domains
80  *   - y = indexes in the domain (core, chip, vcpu, node, etc)
81  *   - z = offset into the counter space
82  *   - w = lpars (guest vms, "logical partitions")
83  * - A single request is: x,y,y_last,z,z_last,w,w_last
84  *   - this means we can retrieve a rectangle of counters in y,z for a single x.
85  *
86  * - Things to consider (ignoring w):
87  *   - input  cost_per_request = 16
88  *   - output cost_per_result(ys,zs)  = 8 + 8 * ys + ys * zs
89  *   - limited number of requests per hcall (must fit into 4K bytes)
90  *     - 4k = 16 [buffer header] - 16 [request size] * request_count
91  *     - 255 requests per hcall
92  *   - sometimes it will be more efficient to read extra data and discard
93  */
94
95 /*
96  * Example usage:
97  *  perf stat -e 'hv_24x7/domain=2,offset=8,vcpu=0,lpar=0xffffffff/'
98  */
99
100 /* u3 0-6, one of HV_24X7_PERF_DOMAIN */
101 EVENT_DEFINE_RANGE_FORMAT(domain, config, 0, 3);
102 /* u16 */
103 EVENT_DEFINE_RANGE_FORMAT(core, config, 16, 31);
104 EVENT_DEFINE_RANGE_FORMAT(vcpu, config, 16, 31);
105 /* u32, see "data_offset" */
106 EVENT_DEFINE_RANGE_FORMAT(offset, config, 32, 63);
107 /* u16 */
108 EVENT_DEFINE_RANGE_FORMAT(lpar, config1, 0, 15);
109
110 EVENT_DEFINE_RANGE(reserved1, config,   4, 15);
111 EVENT_DEFINE_RANGE(reserved2, config1, 16, 63);
112 EVENT_DEFINE_RANGE(reserved3, config2,  0, 63);
113
114 static struct attribute *format_attrs[] = {
115         &format_attr_domain.attr,
116         &format_attr_offset.attr,
117         &format_attr_core.attr,
118         &format_attr_vcpu.attr,
119         &format_attr_lpar.attr,
120         NULL,
121 };
122
123 static struct attribute_group format_group = {
124         .name = "format",
125         .attrs = format_attrs,
126 };
127
128 static struct attribute_group event_group = {
129         .name = "events",
130         /* .attrs is set in init */
131 };
132
133 static struct attribute_group event_desc_group = {
134         .name = "event_descs",
135         /* .attrs is set in init */
136 };
137
138 static struct attribute_group event_long_desc_group = {
139         .name = "event_long_descs",
140         /* .attrs is set in init */
141 };
142
143 static struct kmem_cache *hv_page_cache;
144
145 DEFINE_PER_CPU(int, hv_24x7_txn_flags);
146 DEFINE_PER_CPU(int, hv_24x7_txn_err);
147
148 struct hv_24x7_hw {
149         struct perf_event *events[255];
150 };
151
152 DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw);
153
154 /*
155  * request_buffer and result_buffer are not required to be 4k aligned,
156  * but are not allowed to cross any 4k boundary. Aligning them to 4k is
157  * the simplest way to ensure that.
158  */
159 #define H24x7_DATA_BUFFER_SIZE  4096
160 DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
161 DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
162
163 static char *event_name(struct hv_24x7_event_data *ev, int *len)
164 {
165         *len = be16_to_cpu(ev->event_name_len) - 2;
166         return (char *)ev->remainder;
167 }
168
169 static char *event_desc(struct hv_24x7_event_data *ev, int *len)
170 {
171         unsigned nl = be16_to_cpu(ev->event_name_len);
172         __be16 *desc_len = (__be16 *)(ev->remainder + nl - 2);
173
174         *len = be16_to_cpu(*desc_len) - 2;
175         return (char *)ev->remainder + nl;
176 }
177
178 static char *event_long_desc(struct hv_24x7_event_data *ev, int *len)
179 {
180         unsigned nl = be16_to_cpu(ev->event_name_len);
181         __be16 *desc_len_ = (__be16 *)(ev->remainder + nl - 2);
182         unsigned desc_len = be16_to_cpu(*desc_len_);
183         __be16 *long_desc_len = (__be16 *)(ev->remainder + nl + desc_len - 2);
184
185         *len = be16_to_cpu(*long_desc_len) - 2;
186         return (char *)ev->remainder + nl + desc_len;
187 }
188
189 static bool event_fixed_portion_is_within(struct hv_24x7_event_data *ev,
190                                           void *end)
191 {
192         void *start = ev;
193
194         return (start + offsetof(struct hv_24x7_event_data, remainder)) < end;
195 }
196
197 /*
198  * Things we don't check:
199  *  - padding for desc, name, and long/detailed desc is required to be '\0'
200  *    bytes.
201  *
202  *  Return NULL if we pass end,
203  *  Otherwise return the address of the byte just following the event.
204  */
205 static void *event_end(struct hv_24x7_event_data *ev, void *end)
206 {
207         void *start = ev;
208         __be16 *dl_, *ldl_;
209         unsigned dl, ldl;
210         unsigned nl = be16_to_cpu(ev->event_name_len);
211
212         if (nl < 2) {
213                 pr_debug("%s: name length too short: %d", __func__, nl);
214                 return NULL;
215         }
216
217         if (start + nl > end) {
218                 pr_debug("%s: start=%p + nl=%u > end=%p",
219                                 __func__, start, nl, end);
220                 return NULL;
221         }
222
223         dl_ = (__be16 *)(ev->remainder + nl - 2);
224         if (!IS_ALIGNED((uintptr_t)dl_, 2))
225                 pr_warn("desc len not aligned %p", dl_);
226         dl = be16_to_cpu(*dl_);
227         if (dl < 2) {
228                 pr_debug("%s: desc len too short: %d", __func__, dl);
229                 return NULL;
230         }
231
232         if (start + nl + dl > end) {
233                 pr_debug("%s: (start=%p + nl=%u + dl=%u)=%p > end=%p",
234                                 __func__, start, nl, dl, start + nl + dl, end);
235                 return NULL;
236         }
237
238         ldl_ = (__be16 *)(ev->remainder + nl + dl - 2);
239         if (!IS_ALIGNED((uintptr_t)ldl_, 2))
240                 pr_warn("long desc len not aligned %p", ldl_);
241         ldl = be16_to_cpu(*ldl_);
242         if (ldl < 2) {
243                 pr_debug("%s: long desc len too short (ldl=%u)",
244                                 __func__, ldl);
245                 return NULL;
246         }
247
248         if (start + nl + dl + ldl > end) {
249                 pr_debug("%s: start=%p + nl=%u + dl=%u + ldl=%u > end=%p",
250                                 __func__, start, nl, dl, ldl, end);
251                 return NULL;
252         }
253
254         return start + nl + dl + ldl;
255 }
256
257 static unsigned long h_get_24x7_catalog_page_(unsigned long phys_4096,
258                                               unsigned long version,
259                                               unsigned long index)
260 {
261         pr_devel("h_get_24x7_catalog_page(0x%lx, %lu, %lu)",
262                         phys_4096, version, index);
263
264         WARN_ON(!IS_ALIGNED(phys_4096, 4096));
265
266         return plpar_hcall_norets(H_GET_24X7_CATALOG_PAGE,
267                         phys_4096, version, index);
268 }
269
270 static unsigned long h_get_24x7_catalog_page(char page[],
271                                              u64 version, u32 index)
272 {
273         return h_get_24x7_catalog_page_(virt_to_phys(page),
274                                         version, index);
275 }
276
277 static unsigned core_domains[] = {
278         HV_PERF_DOMAIN_PHYS_CORE,
279         HV_PERF_DOMAIN_VCPU_HOME_CORE,
280         HV_PERF_DOMAIN_VCPU_HOME_CHIP,
281         HV_PERF_DOMAIN_VCPU_HOME_NODE,
282         HV_PERF_DOMAIN_VCPU_REMOTE_NODE,
283 };
284 /* chip event data always yeilds a single event, core yeilds multiple */
285 #define MAX_EVENTS_PER_EVENT_DATA ARRAY_SIZE(core_domains)
286
287 static char *event_fmt(struct hv_24x7_event_data *event, unsigned domain)
288 {
289         const char *sindex;
290         const char *lpar;
291
292         if (is_physical_domain(domain)) {
293                 lpar = "0x0";
294                 sindex = "core";
295         } else {
296                 lpar = "?";
297                 sindex = "vcpu";
298         }
299
300         return kasprintf(GFP_KERNEL,
301                         "domain=0x%x,offset=0x%x,%s=?,lpar=%s",
302                         domain,
303                         be16_to_cpu(event->event_counter_offs) +
304                                 be16_to_cpu(event->event_group_record_offs),
305                         sindex,
306                         lpar);
307 }
308
309 /* Avoid trusting fw to NUL terminate strings */
310 static char *memdup_to_str(char *maybe_str, int max_len, gfp_t gfp)
311 {
312         return kasprintf(gfp, "%.*s", max_len, maybe_str);
313 }
314
315 static ssize_t device_show_string(struct device *dev,
316                 struct device_attribute *attr, char *buf)
317 {
318         struct dev_ext_attribute *d;
319
320         d = container_of(attr, struct dev_ext_attribute, attr);
321
322         return sprintf(buf, "%s\n", (char *)d->var);
323 }
324
325 static struct attribute *device_str_attr_create_(char *name, char *str)
326 {
327         struct dev_ext_attribute *attr = kzalloc(sizeof(*attr), GFP_KERNEL);
328
329         if (!attr)
330                 return NULL;
331
332         sysfs_attr_init(&attr->attr.attr);
333
334         attr->var = str;
335         attr->attr.attr.name = name;
336         attr->attr.attr.mode = 0444;
337         attr->attr.show = device_show_string;
338
339         return &attr->attr.attr;
340 }
341
342 static struct attribute *device_str_attr_create(char *name, int name_max,
343                                                 int name_nonce,
344                                                 char *str, size_t str_max)
345 {
346         char *n;
347         char *s = memdup_to_str(str, str_max, GFP_KERNEL);
348         struct attribute *a;
349
350         if (!s)
351                 return NULL;
352
353         if (!name_nonce)
354                 n = kasprintf(GFP_KERNEL, "%.*s", name_max, name);
355         else
356                 n = kasprintf(GFP_KERNEL, "%.*s__%d", name_max, name,
357                                         name_nonce);
358         if (!n)
359                 goto out_s;
360
361         a = device_str_attr_create_(n, s);
362         if (!a)
363                 goto out_n;
364
365         return a;
366 out_n:
367         kfree(n);
368 out_s:
369         kfree(s);
370         return NULL;
371 }
372
373 static void device_str_attr_destroy(struct attribute *attr)
374 {
375         struct dev_ext_attribute *d;
376
377         d = container_of(attr, struct dev_ext_attribute, attr.attr);
378         kfree(d->var);
379         kfree(d->attr.attr.name);
380         kfree(d);
381 }
382
383 static struct attribute *event_to_attr(unsigned ix,
384                                        struct hv_24x7_event_data *event,
385                                        unsigned domain,
386                                        int nonce)
387 {
388         int event_name_len;
389         char *ev_name, *a_ev_name, *val;
390         const char *ev_suffix;
391         struct attribute *attr;
392
393         if (!domain_is_valid(domain)) {
394                 pr_warn("catalog event %u has invalid domain %u\n",
395                                 ix, domain);
396                 return NULL;
397         }
398
399         val = event_fmt(event, domain);
400         if (!val)
401                 return NULL;
402
403         ev_suffix = event_domain_suffix(domain);
404         ev_name = event_name(event, &event_name_len);
405         if (!nonce)
406                 a_ev_name = kasprintf(GFP_KERNEL, "%.*s%s",
407                                 (int)event_name_len, ev_name, ev_suffix);
408         else
409                 a_ev_name = kasprintf(GFP_KERNEL, "%.*s%s__%d",
410                                 (int)event_name_len, ev_name, ev_suffix, nonce);
411
412         if (!a_ev_name)
413                 goto out_val;
414
415         attr = device_str_attr_create_(a_ev_name, val);
416         if (!attr)
417                 goto out_name;
418
419         return attr;
420 out_name:
421         kfree(a_ev_name);
422 out_val:
423         kfree(val);
424         return NULL;
425 }
426
427 static struct attribute *event_to_desc_attr(struct hv_24x7_event_data *event,
428                                             int nonce)
429 {
430         int nl, dl;
431         char *name = event_name(event, &nl);
432         char *desc = event_desc(event, &dl);
433
434         /* If there isn't a description, don't create the sysfs file */
435         if (!dl)
436                 return NULL;
437
438         return device_str_attr_create(name, nl, nonce, desc, dl);
439 }
440
441 static struct attribute *
442 event_to_long_desc_attr(struct hv_24x7_event_data *event, int nonce)
443 {
444         int nl, dl;
445         char *name = event_name(event, &nl);
446         char *desc = event_long_desc(event, &dl);
447
448         /* If there isn't a description, don't create the sysfs file */
449         if (!dl)
450                 return NULL;
451
452         return device_str_attr_create(name, nl, nonce, desc, dl);
453 }
454
455 static ssize_t event_data_to_attrs(unsigned ix, struct attribute **attrs,
456                                    struct hv_24x7_event_data *event, int nonce)
457 {
458         unsigned i;
459
460         switch (event->domain) {
461         case HV_PERF_DOMAIN_PHYS_CHIP:
462                 *attrs = event_to_attr(ix, event, event->domain, nonce);
463                 return 1;
464         case HV_PERF_DOMAIN_PHYS_CORE:
465                 for (i = 0; i < ARRAY_SIZE(core_domains); i++) {
466                         attrs[i] = event_to_attr(ix, event, core_domains[i],
467                                                 nonce);
468                         if (!attrs[i]) {
469                                 pr_warn("catalog event %u: individual attr %u "
470                                         "creation failure\n", ix, i);
471                                 for (; i; i--)
472                                         device_str_attr_destroy(attrs[i - 1]);
473                                 return -1;
474                         }
475                 }
476                 return i;
477         default:
478                 pr_warn("catalog event %u: domain %u is not allowed in the "
479                                 "catalog\n", ix, event->domain);
480                 return -1;
481         }
482 }
483
484 static size_t event_to_attr_ct(struct hv_24x7_event_data *event)
485 {
486         switch (event->domain) {
487         case HV_PERF_DOMAIN_PHYS_CHIP:
488                 return 1;
489         case HV_PERF_DOMAIN_PHYS_CORE:
490                 return ARRAY_SIZE(core_domains);
491         default:
492                 return 0;
493         }
494 }
495
496 static unsigned long vmalloc_to_phys(void *v)
497 {
498         struct page *p = vmalloc_to_page(v);
499
500         BUG_ON(!p);
501         return page_to_phys(p) + offset_in_page(v);
502 }
503
504 /* */
505 struct event_uniq {
506         struct rb_node node;
507         const char *name;
508         int nl;
509         unsigned ct;
510         unsigned domain;
511 };
512
513 static int memord(const void *d1, size_t s1, const void *d2, size_t s2)
514 {
515         if (s1 < s2)
516                 return 1;
517         if (s2 > s1)
518                 return -1;
519
520         return memcmp(d1, d2, s1);
521 }
522
523 static int ev_uniq_ord(const void *v1, size_t s1, unsigned d1, const void *v2,
524                        size_t s2, unsigned d2)
525 {
526         int r = memord(v1, s1, v2, s2);
527
528         if (r)
529                 return r;
530         if (d1 > d2)
531                 return 1;
532         if (d2 > d1)
533                 return -1;
534         return 0;
535 }
536
537 static int event_uniq_add(struct rb_root *root, const char *name, int nl,
538                           unsigned domain)
539 {
540         struct rb_node **new = &(root->rb_node), *parent = NULL;
541         struct event_uniq *data;
542
543         /* Figure out where to put new node */
544         while (*new) {
545                 struct event_uniq *it;
546                 int result;
547
548                 it = container_of(*new, struct event_uniq, node);
549                 result = ev_uniq_ord(name, nl, domain, it->name, it->nl,
550                                         it->domain);
551
552                 parent = *new;
553                 if (result < 0)
554                         new = &((*new)->rb_left);
555                 else if (result > 0)
556                         new = &((*new)->rb_right);
557                 else {
558                         it->ct++;
559                         pr_info("found a duplicate event %.*s, ct=%u\n", nl,
560                                                 name, it->ct);
561                         return it->ct;
562                 }
563         }
564
565         data = kmalloc(sizeof(*data), GFP_KERNEL);
566         if (!data)
567                 return -ENOMEM;
568
569         *data = (struct event_uniq) {
570                 .name = name,
571                 .nl = nl,
572                 .ct = 0,
573                 .domain = domain,
574         };
575
576         /* Add new node and rebalance tree. */
577         rb_link_node(&data->node, parent, new);
578         rb_insert_color(&data->node, root);
579
580         /* data->ct */
581         return 0;
582 }
583
584 static void event_uniq_destroy(struct rb_root *root)
585 {
586         /*
587          * the strings we point to are in the giant block of memory filled by
588          * the catalog, and are freed separately.
589          */
590         struct event_uniq *pos, *n;
591
592         rbtree_postorder_for_each_entry_safe(pos, n, root, node)
593                 kfree(pos);
594 }
595
596
597 /*
598  * ensure the event structure's sizes are self consistent and don't cause us to
599  * read outside of the event
600  *
601  * On success, return the event length in bytes.
602  * Otherwise, return -1 (and print as appropriate).
603  */
604 static ssize_t catalog_event_len_validate(struct hv_24x7_event_data *event,
605                                           size_t event_idx,
606                                           size_t event_data_bytes,
607                                           size_t event_entry_count,
608                                           size_t offset, void *end)
609 {
610         ssize_t ev_len;
611         void *ev_end, *calc_ev_end;
612
613         if (offset >= event_data_bytes)
614                 return -1;
615
616         if (event_idx >= event_entry_count) {
617                 pr_devel("catalog event data has %zu bytes of padding after last event\n",
618                                 event_data_bytes - offset);
619                 return -1;
620         }
621
622         if (!event_fixed_portion_is_within(event, end)) {
623                 pr_warn("event %zu fixed portion is not within range\n",
624                                 event_idx);
625                 return -1;
626         }
627
628         ev_len = be16_to_cpu(event->length);
629
630         if (ev_len % 16)
631                 pr_info("event %zu has length %zu not divisible by 16: event=%pK\n",
632                                 event_idx, ev_len, event);
633
634         ev_end = (__u8 *)event + ev_len;
635         if (ev_end > end) {
636                 pr_warn("event %zu has .length=%zu, ends after buffer end: ev_end=%pK > end=%pK, offset=%zu\n",
637                                 event_idx, ev_len, ev_end, end,
638                                 offset);
639                 return -1;
640         }
641
642         calc_ev_end = event_end(event, end);
643         if (!calc_ev_end) {
644                 pr_warn("event %zu has a calculated length which exceeds buffer length %zu: event=%pK end=%pK, offset=%zu\n",
645                         event_idx, event_data_bytes, event, end,
646                         offset);
647                 return -1;
648         }
649
650         if (calc_ev_end > ev_end) {
651                 pr_warn("event %zu exceeds it's own length: event=%pK, end=%pK, offset=%zu, calc_ev_end=%pK\n",
652                         event_idx, event, ev_end, offset, calc_ev_end);
653                 return -1;
654         }
655
656         return ev_len;
657 }
658
659 #define MAX_4K (SIZE_MAX / 4096)
660
661 static int create_events_from_catalog(struct attribute ***events_,
662                                       struct attribute ***event_descs_,
663                                       struct attribute ***event_long_descs_)
664 {
665         unsigned long hret;
666         size_t catalog_len, catalog_page_len, event_entry_count,
667                event_data_len, event_data_offs,
668                event_data_bytes, junk_events, event_idx, event_attr_ct, i,
669                attr_max, event_idx_last, desc_ct, long_desc_ct;
670         ssize_t ct, ev_len;
671         uint32_t catalog_version_num;
672         struct attribute **events, **event_descs, **event_long_descs;
673         struct hv_24x7_catalog_page_0 *page_0 =
674                 kmem_cache_alloc(hv_page_cache, GFP_KERNEL);
675         void *page = page_0;
676         void *event_data, *end;
677         struct hv_24x7_event_data *event;
678         struct rb_root ev_uniq = RB_ROOT;
679         int ret = 0;
680
681         if (!page) {
682                 ret = -ENOMEM;
683                 goto e_out;
684         }
685
686         hret = h_get_24x7_catalog_page(page, 0, 0);
687         if (hret) {
688                 ret = -EIO;
689                 goto e_free;
690         }
691
692         catalog_version_num = be64_to_cpu(page_0->version);
693         catalog_page_len = be32_to_cpu(page_0->length);
694
695         if (MAX_4K < catalog_page_len) {
696                 pr_err("invalid page count: %zu\n", catalog_page_len);
697                 ret = -EIO;
698                 goto e_free;
699         }
700
701         catalog_len = catalog_page_len * 4096;
702
703         event_entry_count = be16_to_cpu(page_0->event_entry_count);
704         event_data_offs   = be16_to_cpu(page_0->event_data_offs);
705         event_data_len    = be16_to_cpu(page_0->event_data_len);
706
707         pr_devel("cv %zu cl %zu eec %zu edo %zu edl %zu\n",
708                         (size_t)catalog_version_num, catalog_len,
709                         event_entry_count, event_data_offs, event_data_len);
710
711         if ((MAX_4K < event_data_len)
712                         || (MAX_4K < event_data_offs)
713                         || (MAX_4K - event_data_offs < event_data_len)) {
714                 pr_err("invalid event data offs %zu and/or len %zu\n",
715                                 event_data_offs, event_data_len);
716                 ret = -EIO;
717                 goto e_free;
718         }
719
720         if ((event_data_offs + event_data_len) > catalog_page_len) {
721                 pr_err("event data %zu-%zu does not fit inside catalog 0-%zu\n",
722                                 event_data_offs,
723                                 event_data_offs + event_data_len,
724                                 catalog_page_len);
725                 ret = -EIO;
726                 goto e_free;
727         }
728
729         if (SIZE_MAX / MAX_EVENTS_PER_EVENT_DATA - 1 < event_entry_count) {
730                 pr_err("event_entry_count %zu is invalid\n",
731                                 event_entry_count);
732                 ret = -EIO;
733                 goto e_free;
734         }
735
736         event_data_bytes = event_data_len * 4096;
737
738         /*
739          * event data can span several pages, events can cross between these
740          * pages. Use vmalloc to make this easier.
741          */
742         event_data = vmalloc(event_data_bytes);
743         if (!event_data) {
744                 pr_err("could not allocate event data\n");
745                 ret = -ENOMEM;
746                 goto e_free;
747         }
748
749         end = event_data + event_data_bytes;
750
751         /*
752          * using vmalloc_to_phys() like this only works if PAGE_SIZE is
753          * divisible by 4096
754          */
755         BUILD_BUG_ON(PAGE_SIZE % 4096);
756
757         for (i = 0; i < event_data_len; i++) {
758                 hret = h_get_24x7_catalog_page_(
759                                 vmalloc_to_phys(event_data + i * 4096),
760                                 catalog_version_num,
761                                 i + event_data_offs);
762                 if (hret) {
763                         pr_err("failed to get event data in page %zu\n",
764                                         i + event_data_offs);
765                         ret = -EIO;
766                         goto e_event_data;
767                 }
768         }
769
770         /*
771          * scan the catalog to determine the number of attributes we need, and
772          * verify it at the same time.
773          */
774         for (junk_events = 0, event = event_data, event_idx = 0, attr_max = 0;
775              ;
776              event_idx++, event = (void *)event + ev_len) {
777                 size_t offset = (void *)event - (void *)event_data;
778                 char *name;
779                 int nl;
780
781                 ev_len = catalog_event_len_validate(event, event_idx,
782                                                     event_data_bytes,
783                                                     event_entry_count,
784                                                     offset, end);
785                 if (ev_len < 0)
786                         break;
787
788                 name = event_name(event, &nl);
789
790                 if (event->event_group_record_len == 0) {
791                         pr_devel("invalid event %zu (%.*s): group_record_len == 0, skipping\n",
792                                         event_idx, nl, name);
793                         junk_events++;
794                         continue;
795                 }
796
797                 if (!catalog_entry_domain_is_valid(event->domain)) {
798                         pr_info("event %zu (%.*s) has invalid domain %d\n",
799                                         event_idx, nl, name, event->domain);
800                         junk_events++;
801                         continue;
802                 }
803
804                 attr_max += event_to_attr_ct(event);
805         }
806
807         event_idx_last = event_idx;
808         if (event_idx_last != event_entry_count)
809                 pr_warn("event buffer ended before listed # of events were parsed (got %zu, wanted %zu, junk %zu)\n",
810                                 event_idx_last, event_entry_count, junk_events);
811
812         events = kmalloc_array(attr_max + 1, sizeof(*events), GFP_KERNEL);
813         if (!events) {
814                 ret = -ENOMEM;
815                 goto e_event_data;
816         }
817
818         event_descs = kmalloc_array(event_idx + 1, sizeof(*event_descs),
819                                 GFP_KERNEL);
820         if (!event_descs) {
821                 ret = -ENOMEM;
822                 goto e_event_attrs;
823         }
824
825         event_long_descs = kmalloc_array(event_idx + 1,
826                         sizeof(*event_long_descs), GFP_KERNEL);
827         if (!event_long_descs) {
828                 ret = -ENOMEM;
829                 goto e_event_descs;
830         }
831
832         /* Iterate over the catalog filling in the attribute vector */
833         for (junk_events = 0, event_attr_ct = 0, desc_ct = 0, long_desc_ct = 0,
834                                 event = event_data, event_idx = 0;
835                         event_idx < event_idx_last;
836                         event_idx++, ev_len = be16_to_cpu(event->length),
837                                 event = (void *)event + ev_len) {
838                 char *name;
839                 int nl;
840                 int nonce;
841                 /*
842                  * these are the only "bad" events that are intermixed and that
843                  * we can ignore without issue. make sure to skip them here
844                  */
845                 if (event->event_group_record_len == 0)
846                         continue;
847                 if (!catalog_entry_domain_is_valid(event->domain))
848                         continue;
849
850                 name  = event_name(event, &nl);
851                 nonce = event_uniq_add(&ev_uniq, name, nl, event->domain);
852                 ct    = event_data_to_attrs(event_idx, events + event_attr_ct,
853                                             event, nonce);
854                 if (ct <= 0) {
855                         pr_warn("event %zu (%.*s) creation failure, skipping\n",
856                                 event_idx, nl, name);
857                         junk_events++;
858                 } else {
859                         event_attr_ct += ct;
860                         event_descs[desc_ct] = event_to_desc_attr(event, nonce);
861                         if (event_descs[desc_ct])
862                                 desc_ct++;
863                         event_long_descs[long_desc_ct] =
864                                         event_to_long_desc_attr(event, nonce);
865                         if (event_long_descs[long_desc_ct])
866                                 long_desc_ct++;
867                 }
868         }
869
870         pr_info("read %zu catalog entries, created %zu event attrs (%zu failures), %zu descs\n",
871                         event_idx, event_attr_ct, junk_events, desc_ct);
872
873         events[event_attr_ct] = NULL;
874         event_descs[desc_ct] = NULL;
875         event_long_descs[long_desc_ct] = NULL;
876
877         event_uniq_destroy(&ev_uniq);
878         vfree(event_data);
879         kmem_cache_free(hv_page_cache, page);
880
881         *events_ = events;
882         *event_descs_ = event_descs;
883         *event_long_descs_ = event_long_descs;
884         return 0;
885
886 e_event_descs:
887         kfree(event_descs);
888 e_event_attrs:
889         kfree(events);
890 e_event_data:
891         vfree(event_data);
892 e_free:
893         kmem_cache_free(hv_page_cache, page);
894 e_out:
895         *events_ = NULL;
896         *event_descs_ = NULL;
897         *event_long_descs_ = NULL;
898         return ret;
899 }
900
901 static ssize_t catalog_read(struct file *filp, struct kobject *kobj,
902                             struct bin_attribute *bin_attr, char *buf,
903                             loff_t offset, size_t count)
904 {
905         unsigned long hret;
906         ssize_t ret = 0;
907         size_t catalog_len = 0, catalog_page_len = 0;
908         loff_t page_offset = 0;
909         loff_t offset_in_page;
910         size_t copy_len;
911         uint64_t catalog_version_num = 0;
912         void *page = kmem_cache_alloc(hv_page_cache, GFP_USER);
913         struct hv_24x7_catalog_page_0 *page_0 = page;
914
915         if (!page)
916                 return -ENOMEM;
917
918         hret = h_get_24x7_catalog_page(page, 0, 0);
919         if (hret) {
920                 ret = -EIO;
921                 goto e_free;
922         }
923
924         catalog_version_num = be64_to_cpu(page_0->version);
925         catalog_page_len = be32_to_cpu(page_0->length);
926         catalog_len = catalog_page_len * 4096;
927
928         page_offset = offset / 4096;
929         offset_in_page = offset % 4096;
930
931         if (page_offset >= catalog_page_len)
932                 goto e_free;
933
934         if (page_offset != 0) {
935                 hret = h_get_24x7_catalog_page(page, catalog_version_num,
936                                                page_offset);
937                 if (hret) {
938                         ret = -EIO;
939                         goto e_free;
940                 }
941         }
942
943         copy_len = 4096 - offset_in_page;
944         if (copy_len > count)
945                 copy_len = count;
946
947         memcpy(buf, page+offset_in_page, copy_len);
948         ret = copy_len;
949
950 e_free:
951         if (hret)
952                 pr_err("h_get_24x7_catalog_page(ver=%lld, page=%lld) failed:"
953                        " rc=%ld\n",
954                        catalog_version_num, page_offset, hret);
955         kmem_cache_free(hv_page_cache, page);
956
957         pr_devel("catalog_read: offset=%lld(%lld) count=%zu "
958                         "catalog_len=%zu(%zu) => %zd\n", offset, page_offset,
959                         count, catalog_len, catalog_page_len, ret);
960
961         return ret;
962 }
963
964 #define PAGE_0_ATTR(_name, _fmt, _expr)                         \
965 static ssize_t _name##_show(struct device *dev,                 \
966                             struct device_attribute *dev_attr,  \
967                             char *buf)                          \
968 {                                                               \
969         unsigned long hret;                                     \
970         ssize_t ret = 0;                                        \
971         void *page = kmem_cache_alloc(hv_page_cache, GFP_USER); \
972         struct hv_24x7_catalog_page_0 *page_0 = page;           \
973         if (!page)                                              \
974                 return -ENOMEM;                                 \
975         hret = h_get_24x7_catalog_page(page, 0, 0);             \
976         if (hret) {                                             \
977                 ret = -EIO;                                     \
978                 goto e_free;                                    \
979         }                                                       \
980         ret = sprintf(buf, _fmt, _expr);                        \
981 e_free:                                                         \
982         kmem_cache_free(hv_page_cache, page);                   \
983         return ret;                                             \
984 }                                                               \
985 static DEVICE_ATTR_RO(_name)
986
987 PAGE_0_ATTR(catalog_version, "%lld\n",
988                 (unsigned long long)be64_to_cpu(page_0->version));
989 PAGE_0_ATTR(catalog_len, "%lld\n",
990                 (unsigned long long)be32_to_cpu(page_0->length) * 4096);
991 static BIN_ATTR_RO(catalog, 0/* real length varies */);
992
993 static struct bin_attribute *if_bin_attrs[] = {
994         &bin_attr_catalog,
995         NULL,
996 };
997
998 static struct attribute *if_attrs[] = {
999         &dev_attr_catalog_len.attr,
1000         &dev_attr_catalog_version.attr,
1001         NULL,
1002 };
1003
1004 static struct attribute_group if_group = {
1005         .name = "interface",
1006         .bin_attrs = if_bin_attrs,
1007         .attrs = if_attrs,
1008 };
1009
1010 static const struct attribute_group *attr_groups[] = {
1011         &format_group,
1012         &event_group,
1013         &event_desc_group,
1014         &event_long_desc_group,
1015         &if_group,
1016         NULL,
1017 };
1018
1019 static void log_24x7_hcall(struct hv_24x7_request_buffer *request_buffer,
1020                            struct hv_24x7_data_result_buffer *result_buffer,
1021                            unsigned long ret)
1022 {
1023         struct hv_24x7_request *req;
1024
1025         req = &request_buffer->requests[0];
1026         pr_notice_ratelimited("hcall failed: [%d %#x %#x %d] => "
1027                         "ret 0x%lx (%ld) detail=0x%x failing ix=%x\n",
1028                         req->performance_domain, req->data_offset,
1029                         req->starting_ix, req->starting_lpar_ix, ret, ret,
1030                         result_buffer->detailed_rc,
1031                         result_buffer->failing_request_ix);
1032 }
1033
1034 /*
1035  * Start the process for a new H_GET_24x7_DATA hcall.
1036  */
1037 static void init_24x7_request(struct hv_24x7_request_buffer *request_buffer,
1038                               struct hv_24x7_data_result_buffer *result_buffer)
1039 {
1040
1041         memset(request_buffer, 0, 4096);
1042         memset(result_buffer, 0, 4096);
1043
1044         request_buffer->interface_version = HV_24X7_IF_VERSION_CURRENT;
1045         /* memset above set request_buffer->num_requests to 0 */
1046 }
1047
1048 /*
1049  * Commit (i.e perform) the H_GET_24x7_DATA hcall using the data collected
1050  * by 'init_24x7_request()' and 'add_event_to_24x7_request()'.
1051  */
1052 static int make_24x7_request(struct hv_24x7_request_buffer *request_buffer,
1053                              struct hv_24x7_data_result_buffer *result_buffer)
1054 {
1055         unsigned long ret;
1056
1057         /*
1058          * NOTE: Due to variable number of array elements in request and
1059          *       result buffer(s), sizeof() is not reliable. Use the actual
1060          *       allocated buffer size, H24x7_DATA_BUFFER_SIZE.
1061          */
1062         ret = plpar_hcall_norets(H_GET_24X7_DATA,
1063                         virt_to_phys(request_buffer), H24x7_DATA_BUFFER_SIZE,
1064                         virt_to_phys(result_buffer),  H24x7_DATA_BUFFER_SIZE);
1065
1066         if (ret)
1067                 log_24x7_hcall(request_buffer, result_buffer, ret);
1068
1069         return ret;
1070 }
1071
1072 /*
1073  * Add the given @event to the next slot in the 24x7 request_buffer.
1074  *
1075  * Note that H_GET_24X7_DATA hcall allows reading several counters'
1076  * values in a single HCALL. We expect the caller to add events to the
1077  * request buffer one by one, make the HCALL and process the results.
1078  */
1079 static int add_event_to_24x7_request(struct perf_event *event,
1080                                 struct hv_24x7_request_buffer *request_buffer)
1081 {
1082         u16 idx;
1083         int i;
1084         struct hv_24x7_request *req;
1085
1086         if (request_buffer->num_requests > 254) {
1087                 pr_devel("Too many requests for 24x7 HCALL %d\n",
1088                                 request_buffer->num_requests);
1089                 return -EINVAL;
1090         }
1091
1092         if (is_physical_domain(event_get_domain(event)))
1093                 idx = event_get_core(event);
1094         else
1095                 idx = event_get_vcpu(event);
1096
1097         i = request_buffer->num_requests++;
1098         req = &request_buffer->requests[i];
1099
1100         req->performance_domain = event_get_domain(event);
1101         req->data_size = cpu_to_be16(8);
1102         req->data_offset = cpu_to_be32(event_get_offset(event));
1103         req->starting_lpar_ix = cpu_to_be16(event_get_lpar(event)),
1104         req->max_num_lpars = cpu_to_be16(1);
1105         req->starting_ix = cpu_to_be16(idx);
1106         req->max_ix = cpu_to_be16(1);
1107
1108         return 0;
1109 }
1110
1111 static unsigned long single_24x7_request(struct perf_event *event, u64 *count)
1112 {
1113         unsigned long ret;
1114         struct hv_24x7_request_buffer *request_buffer;
1115         struct hv_24x7_data_result_buffer *result_buffer;
1116
1117         BUILD_BUG_ON(sizeof(*request_buffer) > 4096);
1118         BUILD_BUG_ON(sizeof(*result_buffer) > 4096);
1119
1120         request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
1121         result_buffer = (void *)get_cpu_var(hv_24x7_resb);
1122
1123         init_24x7_request(request_buffer, result_buffer);
1124
1125         ret = add_event_to_24x7_request(event, request_buffer);
1126         if (ret)
1127                 goto out;
1128
1129         ret = make_24x7_request(request_buffer, result_buffer);
1130         if (ret) {
1131                 log_24x7_hcall(request_buffer, result_buffer, ret);
1132                 goto out;
1133         }
1134
1135         /* process result from hcall */
1136         *count = be64_to_cpu(result_buffer->results[0].elements[0].element_data[0]);
1137
1138 out:
1139         put_cpu_var(hv_24x7_reqb);
1140         put_cpu_var(hv_24x7_resb);
1141         return ret;
1142 }
1143
1144
1145 static int h_24x7_event_init(struct perf_event *event)
1146 {
1147         struct hv_perf_caps caps;
1148         unsigned domain;
1149         unsigned long hret;
1150         u64 ct;
1151
1152         /* Not our event */
1153         if (event->attr.type != event->pmu->type)
1154                 return -ENOENT;
1155
1156         /* Unused areas must be 0 */
1157         if (event_get_reserved1(event) ||
1158             event_get_reserved2(event) ||
1159             event_get_reserved3(event)) {
1160                 pr_devel("reserved set when forbidden 0x%llx(0x%llx) 0x%llx(0x%llx) 0x%llx(0x%llx)\n",
1161                                 event->attr.config,
1162                                 event_get_reserved1(event),
1163                                 event->attr.config1,
1164                                 event_get_reserved2(event),
1165                                 event->attr.config2,
1166                                 event_get_reserved3(event));
1167                 return -EINVAL;
1168         }
1169
1170         /* unsupported modes and filters */
1171         if (event->attr.exclude_user   ||
1172             event->attr.exclude_kernel ||
1173             event->attr.exclude_hv     ||
1174             event->attr.exclude_idle   ||
1175             event->attr.exclude_host   ||
1176             event->attr.exclude_guest)
1177                 return -EINVAL;
1178
1179         /* no branch sampling */
1180         if (has_branch_stack(event))
1181                 return -EOPNOTSUPP;
1182
1183         /* offset must be 8 byte aligned */
1184         if (event_get_offset(event) % 8) {
1185                 pr_devel("bad alignment\n");
1186                 return -EINVAL;
1187         }
1188
1189         /* Domains above 6 are invalid */
1190         domain = event_get_domain(event);
1191         if (domain > 6) {
1192                 pr_devel("invalid domain %d\n", domain);
1193                 return -EINVAL;
1194         }
1195
1196         hret = hv_perf_caps_get(&caps);
1197         if (hret) {
1198                 pr_devel("could not get capabilities: rc=%ld\n", hret);
1199                 return -EIO;
1200         }
1201
1202         /* Physical domains & other lpars require extra capabilities */
1203         if (!caps.collect_privileged && (is_physical_domain(domain) ||
1204                 (event_get_lpar(event) != event_get_lpar_max()))) {
1205                 pr_devel("hv permissions disallow: is_physical_domain:%d, lpar=0x%llx\n",
1206                                 is_physical_domain(domain),
1207                                 event_get_lpar(event));
1208                 return -EACCES;
1209         }
1210
1211         /* see if the event complains */
1212         if (single_24x7_request(event, &ct)) {
1213                 pr_devel("test hcall failed\n");
1214                 return -EIO;
1215         }
1216
1217         return 0;
1218 }
1219
1220 static u64 h_24x7_get_value(struct perf_event *event)
1221 {
1222         unsigned long ret;
1223         u64 ct;
1224         ret = single_24x7_request(event, &ct);
1225         if (ret)
1226                 /* We checked this in event init, shouldn't fail here... */
1227                 return 0;
1228
1229         return ct;
1230 }
1231
1232 static void update_event_count(struct perf_event *event, u64 now)
1233 {
1234         s64 prev;
1235
1236         prev = local64_xchg(&event->hw.prev_count, now);
1237         local64_add(now - prev, &event->count);
1238 }
1239
1240 static void h_24x7_event_read(struct perf_event *event)
1241 {
1242         u64 now;
1243         struct hv_24x7_request_buffer *request_buffer;
1244         struct hv_24x7_hw *h24x7hw;
1245         int txn_flags;
1246
1247         txn_flags = __this_cpu_read(hv_24x7_txn_flags);
1248
1249         /*
1250          * If in a READ transaction, add this counter to the list of
1251          * counters to read during the next HCALL (i.e commit_txn()).
1252          * If not in a READ transaction, go ahead and make the HCALL
1253          * to read this counter by itself.
1254          */
1255
1256         if (txn_flags & PERF_PMU_TXN_READ) {
1257                 int i;
1258                 int ret;
1259
1260                 if (__this_cpu_read(hv_24x7_txn_err))
1261                         return;
1262
1263                 request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
1264
1265                 ret = add_event_to_24x7_request(event, request_buffer);
1266                 if (ret) {
1267                         __this_cpu_write(hv_24x7_txn_err, ret);
1268                 } else {
1269                         /*
1270                          * Assoicate the event with the HCALL request index,
1271                          * so ->commit_txn() can quickly find/update count.
1272                          */
1273                         i = request_buffer->num_requests - 1;
1274
1275                         h24x7hw = &get_cpu_var(hv_24x7_hw);
1276                         h24x7hw->events[i] = event;
1277                         put_cpu_var(h24x7hw);
1278                 }
1279
1280                 put_cpu_var(hv_24x7_reqb);
1281         } else {
1282                 now = h_24x7_get_value(event);
1283                 update_event_count(event, now);
1284         }
1285 }
1286
1287 static void h_24x7_event_start(struct perf_event *event, int flags)
1288 {
1289         if (flags & PERF_EF_RELOAD)
1290                 local64_set(&event->hw.prev_count, h_24x7_get_value(event));
1291 }
1292
1293 static void h_24x7_event_stop(struct perf_event *event, int flags)
1294 {
1295         h_24x7_event_read(event);
1296 }
1297
1298 static int h_24x7_event_add(struct perf_event *event, int flags)
1299 {
1300         if (flags & PERF_EF_START)
1301                 h_24x7_event_start(event, flags);
1302
1303         return 0;
1304 }
1305
1306 /*
1307  * 24x7 counters only support READ transactions. They are
1308  * always counting and dont need/support ADD transactions.
1309  * Cache the flags, but otherwise ignore transactions that
1310  * are not PERF_PMU_TXN_READ.
1311  */
1312 static void h_24x7_event_start_txn(struct pmu *pmu, unsigned int flags)
1313 {
1314         struct hv_24x7_request_buffer *request_buffer;
1315         struct hv_24x7_data_result_buffer *result_buffer;
1316
1317         /* We should not be called if we are already in a txn */
1318         WARN_ON_ONCE(__this_cpu_read(hv_24x7_txn_flags));
1319
1320         __this_cpu_write(hv_24x7_txn_flags, flags);
1321         if (flags & ~PERF_PMU_TXN_READ)
1322                 return;
1323
1324         request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
1325         result_buffer = (void *)get_cpu_var(hv_24x7_resb);
1326
1327         init_24x7_request(request_buffer, result_buffer);
1328
1329         put_cpu_var(hv_24x7_resb);
1330         put_cpu_var(hv_24x7_reqb);
1331 }
1332
1333 /*
1334  * Clean up transaction state.
1335  *
1336  * NOTE: Ignore state of request and result buffers for now.
1337  *       We will initialize them during the next read/txn.
1338  */
1339 static void reset_txn(void)
1340 {
1341         __this_cpu_write(hv_24x7_txn_flags, 0);
1342         __this_cpu_write(hv_24x7_txn_err, 0);
1343 }
1344
1345 /*
1346  * 24x7 counters only support READ transactions. They are always counting
1347  * and dont need/support ADD transactions. Clear ->txn_flags but otherwise
1348  * ignore transactions that are not of type PERF_PMU_TXN_READ.
1349  *
1350  * For READ transactions, submit all pending 24x7 requests (i.e requests
1351  * that were queued by h_24x7_event_read()), to the hypervisor and update
1352  * the event counts.
1353  */
1354 static int h_24x7_event_commit_txn(struct pmu *pmu)
1355 {
1356         struct hv_24x7_request_buffer *request_buffer;
1357         struct hv_24x7_data_result_buffer *result_buffer;
1358         struct hv_24x7_result *resb;
1359         struct perf_event *event;
1360         u64 count;
1361         int i, ret, txn_flags;
1362         struct hv_24x7_hw *h24x7hw;
1363
1364         txn_flags = __this_cpu_read(hv_24x7_txn_flags);
1365         WARN_ON_ONCE(!txn_flags);
1366
1367         ret = 0;
1368         if (txn_flags & ~PERF_PMU_TXN_READ)
1369                 goto out;
1370
1371         ret = __this_cpu_read(hv_24x7_txn_err);
1372         if (ret)
1373                 goto out;
1374
1375         request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
1376         result_buffer = (void *)get_cpu_var(hv_24x7_resb);
1377
1378         ret = make_24x7_request(request_buffer, result_buffer);
1379         if (ret) {
1380                 log_24x7_hcall(request_buffer, result_buffer, ret);
1381                 goto put_reqb;
1382         }
1383
1384         h24x7hw = &get_cpu_var(hv_24x7_hw);
1385
1386         /* Update event counts from hcall */
1387         for (i = 0; i < request_buffer->num_requests; i++) {
1388                 resb = &result_buffer->results[i];
1389                 count = be64_to_cpu(resb->elements[0].element_data[0]);
1390                 event = h24x7hw->events[i];
1391                 h24x7hw->events[i] = NULL;
1392                 update_event_count(event, count);
1393         }
1394
1395         put_cpu_var(hv_24x7_hw);
1396
1397 put_reqb:
1398         put_cpu_var(hv_24x7_resb);
1399         put_cpu_var(hv_24x7_reqb);
1400 out:
1401         reset_txn();
1402         return ret;
1403 }
1404
1405 /*
1406  * 24x7 counters only support READ transactions. They are always counting
1407  * and dont need/support ADD transactions. However, regardless of type
1408  * of transaction, all we need to do is cleanup, so we don't have to check
1409  * the type of transaction.
1410  */
1411 static void h_24x7_event_cancel_txn(struct pmu *pmu)
1412 {
1413         WARN_ON_ONCE(!__this_cpu_read(hv_24x7_txn_flags));
1414         reset_txn();
1415 }
1416
1417 static struct pmu h_24x7_pmu = {
1418         .task_ctx_nr = perf_invalid_context,
1419
1420         .name = "hv_24x7",
1421         .attr_groups = attr_groups,
1422         .event_init  = h_24x7_event_init,
1423         .add         = h_24x7_event_add,
1424         .del         = h_24x7_event_stop,
1425         .start       = h_24x7_event_start,
1426         .stop        = h_24x7_event_stop,
1427         .read        = h_24x7_event_read,
1428         .start_txn   = h_24x7_event_start_txn,
1429         .commit_txn  = h_24x7_event_commit_txn,
1430         .cancel_txn  = h_24x7_event_cancel_txn,
1431 };
1432
1433 static int hv_24x7_init(void)
1434 {
1435         int r;
1436         unsigned long hret;
1437         struct hv_perf_caps caps;
1438
1439         if (!firmware_has_feature(FW_FEATURE_LPAR)) {
1440                 pr_debug("not a virtualized system, not enabling\n");
1441                 return -ENODEV;
1442         }
1443
1444         hret = hv_perf_caps_get(&caps);
1445         if (hret) {
1446                 pr_debug("could not obtain capabilities, not enabling, rc=%ld\n",
1447                                 hret);
1448                 return -ENODEV;
1449         }
1450
1451         hv_page_cache = kmem_cache_create("hv-page-4096", 4096, 4096, 0, NULL);
1452         if (!hv_page_cache)
1453                 return -ENOMEM;
1454
1455         /* sampling not supported */
1456         h_24x7_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
1457
1458         r = create_events_from_catalog(&event_group.attrs,
1459                                    &event_desc_group.attrs,
1460                                    &event_long_desc_group.attrs);
1461
1462         if (r)
1463                 return r;
1464
1465         r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1);
1466         if (r)
1467                 return r;
1468
1469         return 0;
1470 }
1471
1472 device_initcall(hv_24x7_init);