perf/x86/intel/uncore: Fix error handling
[cascardo/linux.git] / arch / x86 / events / intel / uncore.c
1 #include "uncore.h"
2
3 static struct intel_uncore_type *empty_uncore[] = { NULL, };
4 struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
5 struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
6
7 static bool pcidrv_registered;
8 struct pci_driver *uncore_pci_driver;
9 /* pci bus to socket mapping */
10 DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
11 struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
12 struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
13
14 static DEFINE_RAW_SPINLOCK(uncore_box_lock);
15 /* mask of cpus that collect uncore events */
16 static cpumask_t uncore_cpu_mask;
17
18 /* constraint for the fixed counter */
19 static struct event_constraint uncore_constraint_fixed =
20         EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
21 struct event_constraint uncore_constraint_empty =
22         EVENT_CONSTRAINT(0, 0, 0);
23
24 int uncore_pcibus_to_physid(struct pci_bus *bus)
25 {
26         struct pci2phy_map *map;
27         int phys_id = -1;
28
29         raw_spin_lock(&pci2phy_map_lock);
30         list_for_each_entry(map, &pci2phy_map_head, list) {
31                 if (map->segment == pci_domain_nr(bus)) {
32                         phys_id = map->pbus_to_physid[bus->number];
33                         break;
34                 }
35         }
36         raw_spin_unlock(&pci2phy_map_lock);
37
38         return phys_id;
39 }
40
41 static void uncore_free_pcibus_map(void)
42 {
43         struct pci2phy_map *map, *tmp;
44
45         list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) {
46                 list_del(&map->list);
47                 kfree(map);
48         }
49 }
50
51 struct pci2phy_map *__find_pci2phy_map(int segment)
52 {
53         struct pci2phy_map *map, *alloc = NULL;
54         int i;
55
56         lockdep_assert_held(&pci2phy_map_lock);
57
58 lookup:
59         list_for_each_entry(map, &pci2phy_map_head, list) {
60                 if (map->segment == segment)
61                         goto end;
62         }
63
64         if (!alloc) {
65                 raw_spin_unlock(&pci2phy_map_lock);
66                 alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
67                 raw_spin_lock(&pci2phy_map_lock);
68
69                 if (!alloc)
70                         return NULL;
71
72                 goto lookup;
73         }
74
75         map = alloc;
76         alloc = NULL;
77         map->segment = segment;
78         for (i = 0; i < 256; i++)
79                 map->pbus_to_physid[i] = -1;
80         list_add_tail(&map->list, &pci2phy_map_head);
81
82 end:
83         kfree(alloc);
84         return map;
85 }
86
87 ssize_t uncore_event_show(struct kobject *kobj,
88                           struct kobj_attribute *attr, char *buf)
89 {
90         struct uncore_event_desc *event =
91                 container_of(attr, struct uncore_event_desc, attr);
92         return sprintf(buf, "%s", event->config);
93 }
94
95 struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
96 {
97         return container_of(event->pmu, struct intel_uncore_pmu, pmu);
98 }
99
100 struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
101 {
102         struct intel_uncore_box *box;
103
104         box = *per_cpu_ptr(pmu->box, cpu);
105         if (box)
106                 return box;
107
108         raw_spin_lock(&uncore_box_lock);
109         /* Recheck in lock to handle races. */
110         if (*per_cpu_ptr(pmu->box, cpu))
111                 goto out;
112         list_for_each_entry(box, &pmu->box_list, list) {
113                 if (box->phys_id == topology_physical_package_id(cpu)) {
114                         atomic_inc(&box->refcnt);
115                         *per_cpu_ptr(pmu->box, cpu) = box;
116                         break;
117                 }
118         }
119 out:
120         raw_spin_unlock(&uncore_box_lock);
121
122         return *per_cpu_ptr(pmu->box, cpu);
123 }
124
125 struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
126 {
127         /*
128          * perf core schedules event on the basis of cpu, uncore events are
129          * collected by one of the cpus inside a physical package.
130          */
131         return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id());
132 }
133
134 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
135 {
136         u64 count;
137
138         rdmsrl(event->hw.event_base, count);
139
140         return count;
141 }
142
143 /*
144  * generic get constraint function for shared match/mask registers.
145  */
146 struct event_constraint *
147 uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
148 {
149         struct intel_uncore_extra_reg *er;
150         struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
151         struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
152         unsigned long flags;
153         bool ok = false;
154
155         /*
156          * reg->alloc can be set due to existing state, so for fake box we
157          * need to ignore this, otherwise we might fail to allocate proper
158          * fake state for this extra reg constraint.
159          */
160         if (reg1->idx == EXTRA_REG_NONE ||
161             (!uncore_box_is_fake(box) && reg1->alloc))
162                 return NULL;
163
164         er = &box->shared_regs[reg1->idx];
165         raw_spin_lock_irqsave(&er->lock, flags);
166         if (!atomic_read(&er->ref) ||
167             (er->config1 == reg1->config && er->config2 == reg2->config)) {
168                 atomic_inc(&er->ref);
169                 er->config1 = reg1->config;
170                 er->config2 = reg2->config;
171                 ok = true;
172         }
173         raw_spin_unlock_irqrestore(&er->lock, flags);
174
175         if (ok) {
176                 if (!uncore_box_is_fake(box))
177                         reg1->alloc = 1;
178                 return NULL;
179         }
180
181         return &uncore_constraint_empty;
182 }
183
184 void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
185 {
186         struct intel_uncore_extra_reg *er;
187         struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
188
189         /*
190          * Only put constraint if extra reg was actually allocated. Also
191          * takes care of event which do not use an extra shared reg.
192          *
193          * Also, if this is a fake box we shouldn't touch any event state
194          * (reg->alloc) and we don't care about leaving inconsistent box
195          * state either since it will be thrown out.
196          */
197         if (uncore_box_is_fake(box) || !reg1->alloc)
198                 return;
199
200         er = &box->shared_regs[reg1->idx];
201         atomic_dec(&er->ref);
202         reg1->alloc = 0;
203 }
204
205 u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
206 {
207         struct intel_uncore_extra_reg *er;
208         unsigned long flags;
209         u64 config;
210
211         er = &box->shared_regs[idx];
212
213         raw_spin_lock_irqsave(&er->lock, flags);
214         config = er->config;
215         raw_spin_unlock_irqrestore(&er->lock, flags);
216
217         return config;
218 }
219
220 static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_event *event, int idx)
221 {
222         struct hw_perf_event *hwc = &event->hw;
223
224         hwc->idx = idx;
225         hwc->last_tag = ++box->tags[idx];
226
227         if (hwc->idx == UNCORE_PMC_IDX_FIXED) {
228                 hwc->event_base = uncore_fixed_ctr(box);
229                 hwc->config_base = uncore_fixed_ctl(box);
230                 return;
231         }
232
233         hwc->config_base = uncore_event_ctl(box, hwc->idx);
234         hwc->event_base  = uncore_perf_ctr(box, hwc->idx);
235 }
236
237 void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
238 {
239         u64 prev_count, new_count, delta;
240         int shift;
241
242         if (event->hw.idx >= UNCORE_PMC_IDX_FIXED)
243                 shift = 64 - uncore_fixed_ctr_bits(box);
244         else
245                 shift = 64 - uncore_perf_ctr_bits(box);
246
247         /* the hrtimer might modify the previous event value */
248 again:
249         prev_count = local64_read(&event->hw.prev_count);
250         new_count = uncore_read_counter(box, event);
251         if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
252                 goto again;
253
254         delta = (new_count << shift) - (prev_count << shift);
255         delta >>= shift;
256
257         local64_add(delta, &event->count);
258 }
259
260 /*
261  * The overflow interrupt is unavailable for SandyBridge-EP, is broken
262  * for SandyBridge. So we use hrtimer to periodically poll the counter
263  * to avoid overflow.
264  */
265 static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
266 {
267         struct intel_uncore_box *box;
268         struct perf_event *event;
269         unsigned long flags;
270         int bit;
271
272         box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
273         if (!box->n_active || box->cpu != smp_processor_id())
274                 return HRTIMER_NORESTART;
275         /*
276          * disable local interrupt to prevent uncore_pmu_event_start/stop
277          * to interrupt the update process
278          */
279         local_irq_save(flags);
280
281         /*
282          * handle boxes with an active event list as opposed to active
283          * counters
284          */
285         list_for_each_entry(event, &box->active_list, active_entry) {
286                 uncore_perf_event_update(box, event);
287         }
288
289         for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
290                 uncore_perf_event_update(box, box->events[bit]);
291
292         local_irq_restore(flags);
293
294         hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
295         return HRTIMER_RESTART;
296 }
297
298 void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
299 {
300         hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
301                       HRTIMER_MODE_REL_PINNED);
302 }
303
304 void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
305 {
306         hrtimer_cancel(&box->hrtimer);
307 }
308
309 static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
310 {
311         hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
312         box->hrtimer.function = uncore_pmu_hrtimer;
313 }
314
315 static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node)
316 {
317         struct intel_uncore_box *box;
318         int i, size;
319
320         size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg);
321
322         box = kzalloc_node(size, GFP_KERNEL, node);
323         if (!box)
324                 return NULL;
325
326         for (i = 0; i < type->num_shared_regs; i++)
327                 raw_spin_lock_init(&box->shared_regs[i].lock);
328
329         uncore_pmu_init_hrtimer(box);
330         atomic_set(&box->refcnt, 1);
331         box->cpu = -1;
332         box->phys_id = -1;
333
334         /* set default hrtimer timeout */
335         box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
336
337         INIT_LIST_HEAD(&box->active_list);
338
339         return box;
340 }
341
342 /*
343  * Using uncore_pmu_event_init pmu event_init callback
344  * as a detection point for uncore events.
345  */
346 static int uncore_pmu_event_init(struct perf_event *event);
347
348 static bool is_uncore_event(struct perf_event *event)
349 {
350         return event->pmu->event_init == uncore_pmu_event_init;
351 }
352
353 static int
354 uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp)
355 {
356         struct perf_event *event;
357         int n, max_count;
358
359         max_count = box->pmu->type->num_counters;
360         if (box->pmu->type->fixed_ctl)
361                 max_count++;
362
363         if (box->n_events >= max_count)
364                 return -EINVAL;
365
366         n = box->n_events;
367
368         if (is_uncore_event(leader)) {
369                 box->event_list[n] = leader;
370                 n++;
371         }
372
373         if (!dogrp)
374                 return n;
375
376         list_for_each_entry(event, &leader->sibling_list, group_entry) {
377                 if (!is_uncore_event(event) ||
378                     event->state <= PERF_EVENT_STATE_OFF)
379                         continue;
380
381                 if (n >= max_count)
382                         return -EINVAL;
383
384                 box->event_list[n] = event;
385                 n++;
386         }
387         return n;
388 }
389
390 static struct event_constraint *
391 uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
392 {
393         struct intel_uncore_type *type = box->pmu->type;
394         struct event_constraint *c;
395
396         if (type->ops->get_constraint) {
397                 c = type->ops->get_constraint(box, event);
398                 if (c)
399                         return c;
400         }
401
402         if (event->attr.config == UNCORE_FIXED_EVENT)
403                 return &uncore_constraint_fixed;
404
405         if (type->constraints) {
406                 for_each_event_constraint(c, type->constraints) {
407                         if ((event->hw.config & c->cmask) == c->code)
408                                 return c;
409                 }
410         }
411
412         return &type->unconstrainted;
413 }
414
415 static void uncore_put_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
416 {
417         if (box->pmu->type->ops->put_constraint)
418                 box->pmu->type->ops->put_constraint(box, event);
419 }
420
421 static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
422 {
423         unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
424         struct event_constraint *c;
425         int i, wmin, wmax, ret = 0;
426         struct hw_perf_event *hwc;
427
428         bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
429
430         for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
431                 c = uncore_get_event_constraint(box, box->event_list[i]);
432                 box->event_constraint[i] = c;
433                 wmin = min(wmin, c->weight);
434                 wmax = max(wmax, c->weight);
435         }
436
437         /* fastpath, try to reuse previous register */
438         for (i = 0; i < n; i++) {
439                 hwc = &box->event_list[i]->hw;
440                 c = box->event_constraint[i];
441
442                 /* never assigned */
443                 if (hwc->idx == -1)
444                         break;
445
446                 /* constraint still honored */
447                 if (!test_bit(hwc->idx, c->idxmsk))
448                         break;
449
450                 /* not already used */
451                 if (test_bit(hwc->idx, used_mask))
452                         break;
453
454                 __set_bit(hwc->idx, used_mask);
455                 if (assign)
456                         assign[i] = hwc->idx;
457         }
458         /* slow path */
459         if (i != n)
460                 ret = perf_assign_events(box->event_constraint, n,
461                                          wmin, wmax, n, assign);
462
463         if (!assign || ret) {
464                 for (i = 0; i < n; i++)
465                         uncore_put_event_constraint(box, box->event_list[i]);
466         }
467         return ret ? -EINVAL : 0;
468 }
469
470 static void uncore_pmu_event_start(struct perf_event *event, int flags)
471 {
472         struct intel_uncore_box *box = uncore_event_to_box(event);
473         int idx = event->hw.idx;
474
475         if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
476                 return;
477
478         if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
479                 return;
480
481         event->hw.state = 0;
482         box->events[idx] = event;
483         box->n_active++;
484         __set_bit(idx, box->active_mask);
485
486         local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
487         uncore_enable_event(box, event);
488
489         if (box->n_active == 1) {
490                 uncore_enable_box(box);
491                 uncore_pmu_start_hrtimer(box);
492         }
493 }
494
495 static void uncore_pmu_event_stop(struct perf_event *event, int flags)
496 {
497         struct intel_uncore_box *box = uncore_event_to_box(event);
498         struct hw_perf_event *hwc = &event->hw;
499
500         if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
501                 uncore_disable_event(box, event);
502                 box->n_active--;
503                 box->events[hwc->idx] = NULL;
504                 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
505                 hwc->state |= PERF_HES_STOPPED;
506
507                 if (box->n_active == 0) {
508                         uncore_disable_box(box);
509                         uncore_pmu_cancel_hrtimer(box);
510                 }
511         }
512
513         if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
514                 /*
515                  * Drain the remaining delta count out of a event
516                  * that we are disabling:
517                  */
518                 uncore_perf_event_update(box, event);
519                 hwc->state |= PERF_HES_UPTODATE;
520         }
521 }
522
523 static int uncore_pmu_event_add(struct perf_event *event, int flags)
524 {
525         struct intel_uncore_box *box = uncore_event_to_box(event);
526         struct hw_perf_event *hwc = &event->hw;
527         int assign[UNCORE_PMC_IDX_MAX];
528         int i, n, ret;
529
530         if (!box)
531                 return -ENODEV;
532
533         ret = n = uncore_collect_events(box, event, false);
534         if (ret < 0)
535                 return ret;
536
537         hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
538         if (!(flags & PERF_EF_START))
539                 hwc->state |= PERF_HES_ARCH;
540
541         ret = uncore_assign_events(box, assign, n);
542         if (ret)
543                 return ret;
544
545         /* save events moving to new counters */
546         for (i = 0; i < box->n_events; i++) {
547                 event = box->event_list[i];
548                 hwc = &event->hw;
549
550                 if (hwc->idx == assign[i] &&
551                         hwc->last_tag == box->tags[assign[i]])
552                         continue;
553                 /*
554                  * Ensure we don't accidentally enable a stopped
555                  * counter simply because we rescheduled.
556                  */
557                 if (hwc->state & PERF_HES_STOPPED)
558                         hwc->state |= PERF_HES_ARCH;
559
560                 uncore_pmu_event_stop(event, PERF_EF_UPDATE);
561         }
562
563         /* reprogram moved events into new counters */
564         for (i = 0; i < n; i++) {
565                 event = box->event_list[i];
566                 hwc = &event->hw;
567
568                 if (hwc->idx != assign[i] ||
569                         hwc->last_tag != box->tags[assign[i]])
570                         uncore_assign_hw_event(box, event, assign[i]);
571                 else if (i < box->n_events)
572                         continue;
573
574                 if (hwc->state & PERF_HES_ARCH)
575                         continue;
576
577                 uncore_pmu_event_start(event, 0);
578         }
579         box->n_events = n;
580
581         return 0;
582 }
583
584 static void uncore_pmu_event_del(struct perf_event *event, int flags)
585 {
586         struct intel_uncore_box *box = uncore_event_to_box(event);
587         int i;
588
589         uncore_pmu_event_stop(event, PERF_EF_UPDATE);
590
591         for (i = 0; i < box->n_events; i++) {
592                 if (event == box->event_list[i]) {
593                         uncore_put_event_constraint(box, event);
594
595                         while (++i < box->n_events)
596                                 box->event_list[i - 1] = box->event_list[i];
597
598                         --box->n_events;
599                         break;
600                 }
601         }
602
603         event->hw.idx = -1;
604         event->hw.last_tag = ~0ULL;
605 }
606
607 void uncore_pmu_event_read(struct perf_event *event)
608 {
609         struct intel_uncore_box *box = uncore_event_to_box(event);
610         uncore_perf_event_update(box, event);
611 }
612
613 /*
614  * validation ensures the group can be loaded onto the
615  * PMU if it was the only group available.
616  */
617 static int uncore_validate_group(struct intel_uncore_pmu *pmu,
618                                 struct perf_event *event)
619 {
620         struct perf_event *leader = event->group_leader;
621         struct intel_uncore_box *fake_box;
622         int ret = -EINVAL, n;
623
624         fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
625         if (!fake_box)
626                 return -ENOMEM;
627
628         fake_box->pmu = pmu;
629         /*
630          * the event is not yet connected with its
631          * siblings therefore we must first collect
632          * existing siblings, then add the new event
633          * before we can simulate the scheduling
634          */
635         n = uncore_collect_events(fake_box, leader, true);
636         if (n < 0)
637                 goto out;
638
639         fake_box->n_events = n;
640         n = uncore_collect_events(fake_box, event, false);
641         if (n < 0)
642                 goto out;
643
644         fake_box->n_events = n;
645
646         ret = uncore_assign_events(fake_box, NULL, n);
647 out:
648         kfree(fake_box);
649         return ret;
650 }
651
652 static int uncore_pmu_event_init(struct perf_event *event)
653 {
654         struct intel_uncore_pmu *pmu;
655         struct intel_uncore_box *box;
656         struct hw_perf_event *hwc = &event->hw;
657         int ret;
658
659         if (event->attr.type != event->pmu->type)
660                 return -ENOENT;
661
662         pmu = uncore_event_to_pmu(event);
663         /* no device found for this pmu */
664         if (pmu->func_id < 0)
665                 return -ENOENT;
666
667         /*
668          * Uncore PMU does measure at all privilege level all the time.
669          * So it doesn't make sense to specify any exclude bits.
670          */
671         if (event->attr.exclude_user || event->attr.exclude_kernel ||
672                         event->attr.exclude_hv || event->attr.exclude_idle)
673                 return -EINVAL;
674
675         /* Sampling not supported yet */
676         if (hwc->sample_period)
677                 return -EINVAL;
678
679         /*
680          * Place all uncore events for a particular physical package
681          * onto a single cpu
682          */
683         if (event->cpu < 0)
684                 return -EINVAL;
685         box = uncore_pmu_to_box(pmu, event->cpu);
686         if (!box || box->cpu < 0)
687                 return -EINVAL;
688         event->cpu = box->cpu;
689
690         event->hw.idx = -1;
691         event->hw.last_tag = ~0ULL;
692         event->hw.extra_reg.idx = EXTRA_REG_NONE;
693         event->hw.branch_reg.idx = EXTRA_REG_NONE;
694
695         if (event->attr.config == UNCORE_FIXED_EVENT) {
696                 /* no fixed counter */
697                 if (!pmu->type->fixed_ctl)
698                         return -EINVAL;
699                 /*
700                  * if there is only one fixed counter, only the first pmu
701                  * can access the fixed counter
702                  */
703                 if (pmu->type->single_fixed && pmu->pmu_idx > 0)
704                         return -EINVAL;
705
706                 /* fixed counters have event field hardcoded to zero */
707                 hwc->config = 0ULL;
708         } else {
709                 hwc->config = event->attr.config & pmu->type->event_mask;
710                 if (pmu->type->ops->hw_config) {
711                         ret = pmu->type->ops->hw_config(box, event);
712                         if (ret)
713                                 return ret;
714                 }
715         }
716
717         if (event->group_leader != event)
718                 ret = uncore_validate_group(pmu, event);
719         else
720                 ret = 0;
721
722         return ret;
723 }
724
725 static ssize_t uncore_get_attr_cpumask(struct device *dev,
726                                 struct device_attribute *attr, char *buf)
727 {
728         return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
729 }
730
731 static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
732
733 static struct attribute *uncore_pmu_attrs[] = {
734         &dev_attr_cpumask.attr,
735         NULL,
736 };
737
738 static struct attribute_group uncore_pmu_attr_group = {
739         .attrs = uncore_pmu_attrs,
740 };
741
742 static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
743 {
744         int ret;
745
746         if (!pmu->type->pmu) {
747                 pmu->pmu = (struct pmu) {
748                         .attr_groups    = pmu->type->attr_groups,
749                         .task_ctx_nr    = perf_invalid_context,
750                         .event_init     = uncore_pmu_event_init,
751                         .add            = uncore_pmu_event_add,
752                         .del            = uncore_pmu_event_del,
753                         .start          = uncore_pmu_event_start,
754                         .stop           = uncore_pmu_event_stop,
755                         .read           = uncore_pmu_event_read,
756                 };
757         } else {
758                 pmu->pmu = *pmu->type->pmu;
759                 pmu->pmu.attr_groups = pmu->type->attr_groups;
760         }
761
762         if (pmu->type->num_boxes == 1) {
763                 if (strlen(pmu->type->name) > 0)
764                         sprintf(pmu->name, "uncore_%s", pmu->type->name);
765                 else
766                         sprintf(pmu->name, "uncore");
767         } else {
768                 sprintf(pmu->name, "uncore_%s_%d", pmu->type->name,
769                         pmu->pmu_idx);
770         }
771
772         ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
773         if (!ret)
774                 pmu->registered = true;
775         return ret;
776 }
777
778 static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
779 {
780         if (!pmu->registered)
781                 return;
782         perf_pmu_unregister(&pmu->pmu);
783         pmu->registered = false;
784 }
785
786 static void __init uncore_type_exit(struct intel_uncore_type *type)
787 {
788         int i;
789
790         if (type->pmus) {
791                 for (i = 0; i < type->num_boxes; i++) {
792                         uncore_pmu_unregister(&type->pmus[i]);
793                         free_percpu(type->pmus[i].box);
794                 }
795                 kfree(type->pmus);
796                 type->pmus = NULL;
797         }
798         kfree(type->events_group);
799         type->events_group = NULL;
800 }
801
802 static void __init uncore_types_exit(struct intel_uncore_type **types)
803 {
804         int i;
805
806         for (i = 0; types[i]; i++)
807                 uncore_type_exit(types[i]);
808 }
809
810 static int __init uncore_type_init(struct intel_uncore_type *type)
811 {
812         struct intel_uncore_pmu *pmus;
813         struct attribute_group *attr_group;
814         struct attribute **attrs;
815         int i, j;
816
817         pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL);
818         if (!pmus)
819                 return -ENOMEM;
820
821         type->pmus = pmus;
822
823         type->unconstrainted = (struct event_constraint)
824                 __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
825                                 0, type->num_counters, 0, 0);
826
827         for (i = 0; i < type->num_boxes; i++) {
828                 pmus[i].func_id = -1;
829                 pmus[i].pmu_idx = i;
830                 pmus[i].type = type;
831                 INIT_LIST_HEAD(&pmus[i].box_list);
832                 pmus[i].box = alloc_percpu(struct intel_uncore_box *);
833                 if (!pmus[i].box)
834                         return -ENOMEM;
835         }
836
837         if (type->event_descs) {
838                 i = 0;
839                 while (type->event_descs[i].attr.attr.name)
840                         i++;
841
842                 attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) +
843                                         sizeof(*attr_group), GFP_KERNEL);
844                 if (!attr_group)
845                         return -ENOMEM;
846
847                 attrs = (struct attribute **)(attr_group + 1);
848                 attr_group->name = "events";
849                 attr_group->attrs = attrs;
850
851                 for (j = 0; j < i; j++)
852                         attrs[j] = &type->event_descs[j].attr.attr;
853
854                 type->events_group = attr_group;
855         }
856
857         type->pmu_group = &uncore_pmu_attr_group;
858         return 0;
859 }
860
861 static int __init uncore_types_init(struct intel_uncore_type **types)
862 {
863         int i, ret;
864
865         for (i = 0; types[i]; i++) {
866                 ret = uncore_type_init(types[i]);
867                 if (ret)
868                         return ret;
869         }
870         return 0;
871 }
872
873 /*
874  * add a pci uncore device
875  */
876 static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
877 {
878         struct intel_uncore_pmu *pmu;
879         struct intel_uncore_box *box;
880         struct intel_uncore_type *type;
881         bool first_box = false;
882         int phys_id, ret;
883
884         phys_id = uncore_pcibus_to_physid(pdev->bus);
885         if (phys_id < 0)
886                 return -ENODEV;
887
888         if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
889                 int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
890                 uncore_extra_pci_dev[phys_id][idx] = pdev;
891                 pci_set_drvdata(pdev, NULL);
892                 return 0;
893         }
894
895         type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
896         box = uncore_alloc_box(type, NUMA_NO_NODE);
897         if (!box)
898                 return -ENOMEM;
899
900         /*
901          * for performance monitoring unit with multiple boxes,
902          * each box has a different function id.
903          */
904         pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
905         /* Knights Landing uses a common PCI device ID for multiple instances of
906          * an uncore PMU device type. There is only one entry per device type in
907          * the knl_uncore_pci_ids table inspite of multiple devices present for
908          * some device types. Hence PCI device idx would be 0 for all devices.
909          * So increment pmu pointer to point to an unused array element.
910          */
911         if (boot_cpu_data.x86_model == 87)
912                 while (pmu->func_id >= 0)
913                         pmu++;
914         if (pmu->func_id < 0)
915                 pmu->func_id = pdev->devfn;
916         else
917                 WARN_ON_ONCE(pmu->func_id != pdev->devfn);
918
919         box->phys_id = phys_id;
920         box->pci_dev = pdev;
921         box->pmu = pmu;
922         uncore_box_init(box);
923         pci_set_drvdata(pdev, box);
924
925         raw_spin_lock(&uncore_box_lock);
926         if (list_empty(&pmu->box_list))
927                 first_box = true;
928         list_add_tail(&box->list, &pmu->box_list);
929         raw_spin_unlock(&uncore_box_lock);
930
931         if (!first_box)
932                 return 0;
933
934         ret = uncore_pmu_register(pmu);
935         if (ret) {
936                 pci_set_drvdata(pdev, NULL);
937                 raw_spin_lock(&uncore_box_lock);
938                 list_del(&box->list);
939                 raw_spin_unlock(&uncore_box_lock);
940                 kfree(box);
941         }
942         return ret;
943 }
944
945 static void uncore_pci_remove(struct pci_dev *pdev)
946 {
947         struct intel_uncore_box *box = pci_get_drvdata(pdev);
948         struct intel_uncore_pmu *pmu;
949         int i, cpu, phys_id;
950         bool last_box = false;
951
952         phys_id = uncore_pcibus_to_physid(pdev->bus);
953         box = pci_get_drvdata(pdev);
954         if (!box) {
955                 for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
956                         if (uncore_extra_pci_dev[phys_id][i] == pdev) {
957                                 uncore_extra_pci_dev[phys_id][i] = NULL;
958                                 break;
959                         }
960                 }
961                 WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
962                 return;
963         }
964
965         pmu = box->pmu;
966         if (WARN_ON_ONCE(phys_id != box->phys_id))
967                 return;
968
969         pci_set_drvdata(pdev, NULL);
970
971         raw_spin_lock(&uncore_box_lock);
972         list_del(&box->list);
973         if (list_empty(&pmu->box_list))
974                 last_box = true;
975         raw_spin_unlock(&uncore_box_lock);
976
977         for_each_possible_cpu(cpu) {
978                 if (*per_cpu_ptr(pmu->box, cpu) == box) {
979                         *per_cpu_ptr(pmu->box, cpu) = NULL;
980                         atomic_dec(&box->refcnt);
981                 }
982         }
983
984         WARN_ON_ONCE(atomic_read(&box->refcnt) != 1);
985         kfree(box);
986
987         if (last_box)
988                 uncore_pmu_unregister(pmu);
989 }
990
991 static int __init uncore_pci_init(void)
992 {
993         int ret;
994
995         switch (boot_cpu_data.x86_model) {
996         case 45: /* Sandy Bridge-EP */
997                 ret = snbep_uncore_pci_init();
998                 break;
999         case 62: /* Ivy Bridge-EP */
1000                 ret = ivbep_uncore_pci_init();
1001                 break;
1002         case 63: /* Haswell-EP */
1003                 ret = hswep_uncore_pci_init();
1004                 break;
1005         case 79: /* BDX-EP */
1006         case 86: /* BDX-DE */
1007                 ret = bdx_uncore_pci_init();
1008                 break;
1009         case 42: /* Sandy Bridge */
1010                 ret = snb_uncore_pci_init();
1011                 break;
1012         case 58: /* Ivy Bridge */
1013                 ret = ivb_uncore_pci_init();
1014                 break;
1015         case 60: /* Haswell */
1016         case 69: /* Haswell Celeron */
1017                 ret = hsw_uncore_pci_init();
1018                 break;
1019         case 61: /* Broadwell */
1020                 ret = bdw_uncore_pci_init();
1021                 break;
1022         case 87: /* Knights Landing */
1023                 ret = knl_uncore_pci_init();
1024                 break;
1025         case 94: /* SkyLake */
1026                 ret = skl_uncore_pci_init();
1027                 break;
1028         default:
1029                 return 0;
1030         }
1031
1032         if (ret)
1033                 return ret;
1034
1035         ret = uncore_types_init(uncore_pci_uncores);
1036         if (ret)
1037                 goto err;
1038
1039         uncore_pci_driver->probe = uncore_pci_probe;
1040         uncore_pci_driver->remove = uncore_pci_remove;
1041
1042         ret = pci_register_driver(uncore_pci_driver);
1043         if (ret)
1044                 goto err;
1045
1046         pcidrv_registered = true;
1047         return 0;
1048
1049 err:
1050         uncore_types_exit(uncore_pci_uncores);
1051         uncore_pci_uncores = empty_uncore;
1052         uncore_free_pcibus_map();
1053         return ret;
1054 }
1055
1056 static void __init uncore_pci_exit(void)
1057 {
1058         if (pcidrv_registered) {
1059                 pcidrv_registered = false;
1060                 pci_unregister_driver(uncore_pci_driver);
1061                 uncore_types_exit(uncore_pci_uncores);
1062                 uncore_free_pcibus_map();
1063         }
1064 }
1065
1066 /* CPU hot plug/unplug are serialized by cpu_add_remove_lock mutex */
1067 static LIST_HEAD(boxes_to_free);
1068
1069 static void uncore_kfree_boxes(void)
1070 {
1071         struct intel_uncore_box *box;
1072
1073         while (!list_empty(&boxes_to_free)) {
1074                 box = list_entry(boxes_to_free.next,
1075                                  struct intel_uncore_box, list);
1076                 list_del(&box->list);
1077                 kfree(box);
1078         }
1079 }
1080
1081 static void uncore_cpu_dying(int cpu)
1082 {
1083         struct intel_uncore_type *type;
1084         struct intel_uncore_pmu *pmu;
1085         struct intel_uncore_box *box;
1086         int i, j;
1087
1088         for (i = 0; uncore_msr_uncores[i]; i++) {
1089                 type = uncore_msr_uncores[i];
1090                 for (j = 0; j < type->num_boxes; j++) {
1091                         pmu = &type->pmus[j];
1092                         box = *per_cpu_ptr(pmu->box, cpu);
1093                         *per_cpu_ptr(pmu->box, cpu) = NULL;
1094                         if (box && atomic_dec_and_test(&box->refcnt))
1095                                 list_add(&box->list, &boxes_to_free);
1096                 }
1097         }
1098 }
1099
1100 static int uncore_cpu_starting(int cpu)
1101 {
1102         struct intel_uncore_type *type;
1103         struct intel_uncore_pmu *pmu;
1104         struct intel_uncore_box *box, *exist;
1105         int i, j, k, phys_id;
1106
1107         phys_id = topology_physical_package_id(cpu);
1108
1109         for (i = 0; uncore_msr_uncores[i]; i++) {
1110                 type = uncore_msr_uncores[i];
1111                 for (j = 0; j < type->num_boxes; j++) {
1112                         pmu = &type->pmus[j];
1113                         box = *per_cpu_ptr(pmu->box, cpu);
1114                         /* called by uncore_cpu_init? */
1115                         if (box && box->phys_id >= 0) {
1116                                 uncore_box_init(box);
1117                                 continue;
1118                         }
1119
1120                         for_each_online_cpu(k) {
1121                                 exist = *per_cpu_ptr(pmu->box, k);
1122                                 if (exist && exist->phys_id == phys_id) {
1123                                         atomic_inc(&exist->refcnt);
1124                                         *per_cpu_ptr(pmu->box, cpu) = exist;
1125                                         if (box) {
1126                                                 list_add(&box->list,
1127                                                          &boxes_to_free);
1128                                                 box = NULL;
1129                                         }
1130                                         break;
1131                                 }
1132                         }
1133
1134                         if (box) {
1135                                 box->phys_id = phys_id;
1136                                 uncore_box_init(box);
1137                         }
1138                 }
1139         }
1140         return 0;
1141 }
1142
1143 static int uncore_cpu_prepare(int cpu, int phys_id)
1144 {
1145         struct intel_uncore_type *type;
1146         struct intel_uncore_pmu *pmu;
1147         struct intel_uncore_box *box;
1148         int i, j;
1149
1150         for (i = 0; uncore_msr_uncores[i]; i++) {
1151                 type = uncore_msr_uncores[i];
1152                 for (j = 0; j < type->num_boxes; j++) {
1153                         pmu = &type->pmus[j];
1154                         if (pmu->func_id < 0)
1155                                 pmu->func_id = j;
1156
1157                         box = uncore_alloc_box(type, cpu_to_node(cpu));
1158                         if (!box)
1159                                 return -ENOMEM;
1160
1161                         box->pmu = pmu;
1162                         box->phys_id = phys_id;
1163                         *per_cpu_ptr(pmu->box, cpu) = box;
1164                 }
1165         }
1166         return 0;
1167 }
1168
1169 static void
1170 uncore_change_context(struct intel_uncore_type **uncores, int old_cpu, int new_cpu)
1171 {
1172         struct intel_uncore_type *type;
1173         struct intel_uncore_pmu *pmu;
1174         struct intel_uncore_box *box;
1175         int i, j;
1176
1177         for (i = 0; uncores[i]; i++) {
1178                 type = uncores[i];
1179                 for (j = 0; j < type->num_boxes; j++) {
1180                         pmu = &type->pmus[j];
1181                         if (old_cpu < 0)
1182                                 box = uncore_pmu_to_box(pmu, new_cpu);
1183                         else
1184                                 box = uncore_pmu_to_box(pmu, old_cpu);
1185                         if (!box)
1186                                 continue;
1187
1188                         if (old_cpu < 0) {
1189                                 WARN_ON_ONCE(box->cpu != -1);
1190                                 box->cpu = new_cpu;
1191                                 continue;
1192                         }
1193
1194                         WARN_ON_ONCE(box->cpu != old_cpu);
1195                         if (new_cpu >= 0) {
1196                                 uncore_pmu_cancel_hrtimer(box);
1197                                 perf_pmu_migrate_context(&pmu->pmu,
1198                                                 old_cpu, new_cpu);
1199                                 box->cpu = new_cpu;
1200                         } else {
1201                                 box->cpu = -1;
1202                         }
1203                 }
1204         }
1205 }
1206
1207 static void uncore_event_exit_cpu(int cpu)
1208 {
1209         int i, phys_id, target;
1210
1211         /* if exiting cpu is used for collecting uncore events */
1212         if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
1213                 return;
1214
1215         /* find a new cpu to collect uncore events */
1216         phys_id = topology_physical_package_id(cpu);
1217         target = -1;
1218         for_each_online_cpu(i) {
1219                 if (i == cpu)
1220                         continue;
1221                 if (phys_id == topology_physical_package_id(i)) {
1222                         target = i;
1223                         break;
1224                 }
1225         }
1226
1227         /* migrate uncore events to the new cpu */
1228         if (target >= 0)
1229                 cpumask_set_cpu(target, &uncore_cpu_mask);
1230
1231         uncore_change_context(uncore_msr_uncores, cpu, target);
1232         uncore_change_context(uncore_pci_uncores, cpu, target);
1233 }
1234
1235 static void uncore_event_init_cpu(int cpu)
1236 {
1237         int i, phys_id;
1238
1239         phys_id = topology_physical_package_id(cpu);
1240         for_each_cpu(i, &uncore_cpu_mask) {
1241                 if (phys_id == topology_physical_package_id(i))
1242                         return;
1243         }
1244
1245         cpumask_set_cpu(cpu, &uncore_cpu_mask);
1246
1247         uncore_change_context(uncore_msr_uncores, -1, cpu);
1248         uncore_change_context(uncore_pci_uncores, -1, cpu);
1249 }
1250
1251 static int uncore_cpu_notifier(struct notifier_block *self,
1252                                unsigned long action, void *hcpu)
1253 {
1254         unsigned int cpu = (long)hcpu;
1255
1256         /* allocate/free data structure for uncore box */
1257         switch (action & ~CPU_TASKS_FROZEN) {
1258         case CPU_UP_PREPARE:
1259                 return notifier_from_errno(uncore_cpu_prepare(cpu, -1));
1260         case CPU_STARTING:
1261                 uncore_cpu_starting(cpu);
1262                 break;
1263         case CPU_UP_CANCELED:
1264         case CPU_DYING:
1265                 uncore_cpu_dying(cpu);
1266                 break;
1267         case CPU_ONLINE:
1268         case CPU_DEAD:
1269                 uncore_kfree_boxes();
1270                 break;
1271         default:
1272                 break;
1273         }
1274
1275         /* select the cpu that collects uncore events */
1276         switch (action & ~CPU_TASKS_FROZEN) {
1277         case CPU_DOWN_FAILED:
1278         case CPU_STARTING:
1279                 uncore_event_init_cpu(cpu);
1280                 break;
1281         case CPU_DOWN_PREPARE:
1282                 uncore_event_exit_cpu(cpu);
1283                 break;
1284         default:
1285                 break;
1286         }
1287
1288         return NOTIFY_OK;
1289 }
1290
1291 static struct notifier_block uncore_cpu_nb = {
1292         .notifier_call  = uncore_cpu_notifier,
1293         /*
1294          * to migrate uncore events, our notifier should be executed
1295          * before perf core's notifier.
1296          */
1297         .priority       = CPU_PRI_PERF + 1,
1298 };
1299
1300 static int __init type_pmu_register(struct intel_uncore_type *type)
1301 {
1302         int i, ret;
1303
1304         for (i = 0; i < type->num_boxes; i++) {
1305                 ret = uncore_pmu_register(&type->pmus[i]);
1306                 if (ret)
1307                         return ret;
1308         }
1309         return 0;
1310 }
1311
1312 static int __init uncore_msr_pmus_register(void)
1313 {
1314         struct intel_uncore_type **types = uncore_msr_uncores;
1315         int ret;
1316
1317         while (*types) {
1318                 ret = type_pmu_register(*types++);
1319                 if (ret)
1320                         return ret;
1321         }
1322         return 0;
1323 }
1324
1325 static int __init uncore_cpu_init(void)
1326 {
1327         int ret;
1328
1329         switch (boot_cpu_data.x86_model) {
1330         case 26: /* Nehalem */
1331         case 30:
1332         case 37: /* Westmere */
1333         case 44:
1334                 nhm_uncore_cpu_init();
1335                 break;
1336         case 42: /* Sandy Bridge */
1337         case 58: /* Ivy Bridge */
1338         case 60: /* Haswell */
1339         case 69: /* Haswell */
1340         case 70: /* Haswell */
1341         case 61: /* Broadwell */
1342         case 71: /* Broadwell */
1343                 snb_uncore_cpu_init();
1344                 break;
1345         case 45: /* Sandy Bridge-EP */
1346                 snbep_uncore_cpu_init();
1347                 break;
1348         case 46: /* Nehalem-EX */
1349         case 47: /* Westmere-EX aka. Xeon E7 */
1350                 nhmex_uncore_cpu_init();
1351                 break;
1352         case 62: /* Ivy Bridge-EP */
1353                 ivbep_uncore_cpu_init();
1354                 break;
1355         case 63: /* Haswell-EP */
1356                 hswep_uncore_cpu_init();
1357                 break;
1358         case 79: /* BDX-EP */
1359         case 86: /* BDX-DE */
1360                 bdx_uncore_cpu_init();
1361                 break;
1362         case 87: /* Knights Landing */
1363                 knl_uncore_cpu_init();
1364                 break;
1365         default:
1366                 return 0;
1367         }
1368
1369         ret = uncore_types_init(uncore_msr_uncores);
1370         if (ret)
1371                 goto err;
1372
1373         ret = uncore_msr_pmus_register();
1374         if (ret)
1375                 goto err;
1376         return 0;
1377 err:
1378         uncore_types_exit(uncore_msr_uncores);
1379         uncore_msr_uncores = empty_uncore;
1380         return ret;
1381 }
1382
1383 static void __init uncore_cpu_setup(void *dummy)
1384 {
1385         uncore_cpu_starting(smp_processor_id());
1386 }
1387
1388 static int __init uncore_cpumask_init(void)
1389 {
1390         int cpu, ret = 0;
1391
1392         cpu_notifier_register_begin();
1393
1394         for_each_online_cpu(cpu) {
1395                 int i, phys_id = topology_physical_package_id(cpu);
1396
1397                 for_each_cpu(i, &uncore_cpu_mask) {
1398                         if (phys_id == topology_physical_package_id(i)) {
1399                                 phys_id = -1;
1400                                 break;
1401                         }
1402                 }
1403                 if (phys_id < 0)
1404                         continue;
1405
1406                 ret = uncore_cpu_prepare(cpu, phys_id);
1407                 if (ret)
1408                         goto out;
1409                 uncore_event_init_cpu(cpu);
1410         }
1411         on_each_cpu(uncore_cpu_setup, NULL, 1);
1412
1413         __register_cpu_notifier(&uncore_cpu_nb);
1414
1415 out:
1416         cpu_notifier_register_done();
1417         return ret;
1418 }
1419
1420 static int __init intel_uncore_init(void)
1421 {
1422         int ret;
1423
1424         if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
1425                 return -ENODEV;
1426
1427         if (cpu_has_hypervisor)
1428                 return -ENODEV;
1429
1430         ret = uncore_pci_init();
1431         if (ret)
1432                 return ret;
1433         ret = uncore_cpu_init();
1434         if (ret)
1435                 goto errpci;
1436         ret = uncore_cpumask_init();
1437         if (ret)
1438                 goto errcpu;
1439
1440         return 0;
1441
1442 errcpu:
1443         uncore_types_exit(uncore_msr_uncores);
1444 errpci:
1445         uncore_pci_exit();
1446         return ret;
1447 }
1448 device_initcall(intel_uncore_init);