7aab177fb566378ab40a815dd8c073b7ce367b1f
[cascardo/linux.git] / arch / x86 / kernel / cpu / perf_counter.c
1 /*
2  * Performance counter x86 architecture code
3  *
4  *  Copyright(C) 2008 Thomas Gleixner <tglx@linutronix.de>
5  *  Copyright(C) 2008 Red Hat, Inc., Ingo Molnar
6  *  Copyright(C) 2009 Jaswinder Singh Rajput
7  *
8  *  For licencing details see kernel-base/COPYING
9  */
10
11 #include <linux/perf_counter.h>
12 #include <linux/capability.h>
13 #include <linux/notifier.h>
14 #include <linux/hardirq.h>
15 #include <linux/kprobes.h>
16 #include <linux/module.h>
17 #include <linux/kdebug.h>
18 #include <linux/sched.h>
19
20 #include <asm/apic.h>
21
22 static bool perf_counters_initialized __read_mostly;
23
24 /*
25  * Number of (generic) HW counters:
26  */
27 static int nr_counters_generic __read_mostly;
28 static u64 perf_counter_mask __read_mostly;
29 static u64 counter_value_mask __read_mostly;
30 static int counter_value_bits __read_mostly;
31
32 static int nr_counters_fixed __read_mostly;
33
34 struct cpu_hw_counters {
35         struct perf_counter     *counters[X86_PMC_IDX_MAX];
36         unsigned long           used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
37         unsigned long           interrupts;
38         u64                     throttle_ctrl;
39         unsigned long           active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
40         int                     enabled;
41 };
42
43 /*
44  * struct pmc_x86_ops - performance counter x86 ops
45  */
46 struct pmc_x86_ops {
47         u64             (*save_disable_all)(void);
48         void            (*restore_all)(u64);
49         u64             (*get_status)(u64);
50         void            (*ack_status)(u64);
51         void            (*enable)(int, u64);
52         void            (*disable)(int, u64);
53         unsigned        eventsel;
54         unsigned        perfctr;
55         u64             (*event_map)(int);
56         u64             (*raw_event)(u64);
57         int             max_events;
58 };
59
60 static struct pmc_x86_ops *pmc_ops __read_mostly;
61
62 static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
63         .enabled = 1,
64 };
65
66 static __read_mostly int intel_perfmon_version;
67
68 /*
69  * Intel PerfMon v3. Used on Core2 and later.
70  */
71 static const u64 intel_perfmon_event_map[] =
72 {
73   [PERF_COUNT_CPU_CYCLES]               = 0x003c,
74   [PERF_COUNT_INSTRUCTIONS]             = 0x00c0,
75   [PERF_COUNT_CACHE_REFERENCES]         = 0x4f2e,
76   [PERF_COUNT_CACHE_MISSES]             = 0x412e,
77   [PERF_COUNT_BRANCH_INSTRUCTIONS]      = 0x00c4,
78   [PERF_COUNT_BRANCH_MISSES]            = 0x00c5,
79   [PERF_COUNT_BUS_CYCLES]               = 0x013c,
80 };
81
82 static u64 pmc_intel_event_map(int event)
83 {
84         return intel_perfmon_event_map[event];
85 }
86
87 static u64 pmc_intel_raw_event(u64 event)
88 {
89 #define CORE_EVNTSEL_EVENT_MASK         0x000000FFULL
90 #define CORE_EVNTSEL_UNIT_MASK          0x0000FF00ULL
91 #define CORE_EVNTSEL_COUNTER_MASK       0xFF000000ULL
92
93 #define CORE_EVNTSEL_MASK               \
94         (CORE_EVNTSEL_EVENT_MASK |      \
95          CORE_EVNTSEL_UNIT_MASK  |      \
96          CORE_EVNTSEL_COUNTER_MASK)
97
98         return event & CORE_EVNTSEL_MASK;
99 }
100
101 /*
102  * AMD Performance Monitor K7 and later.
103  */
104 static const u64 amd_perfmon_event_map[] =
105 {
106   [PERF_COUNT_CPU_CYCLES]               = 0x0076,
107   [PERF_COUNT_INSTRUCTIONS]             = 0x00c0,
108   [PERF_COUNT_CACHE_REFERENCES]         = 0x0080,
109   [PERF_COUNT_CACHE_MISSES]             = 0x0081,
110   [PERF_COUNT_BRANCH_INSTRUCTIONS]      = 0x00c4,
111   [PERF_COUNT_BRANCH_MISSES]            = 0x00c5,
112 };
113
114 static u64 pmc_amd_event_map(int event)
115 {
116         return amd_perfmon_event_map[event];
117 }
118
119 static u64 pmc_amd_raw_event(u64 event)
120 {
121 #define K7_EVNTSEL_EVENT_MASK   0x7000000FFULL
122 #define K7_EVNTSEL_UNIT_MASK    0x00000FF00ULL
123 #define K7_EVNTSEL_COUNTER_MASK 0x0FF000000ULL
124
125 #define K7_EVNTSEL_MASK                 \
126         (K7_EVNTSEL_EVENT_MASK |        \
127          K7_EVNTSEL_UNIT_MASK  |        \
128          K7_EVNTSEL_COUNTER_MASK)
129
130         return event & K7_EVNTSEL_MASK;
131 }
132
133 /*
134  * Propagate counter elapsed time into the generic counter.
135  * Can only be executed on the CPU where the counter is active.
136  * Returns the delta events processed.
137  */
138 static void
139 x86_perf_counter_update(struct perf_counter *counter,
140                         struct hw_perf_counter *hwc, int idx)
141 {
142         u64 prev_raw_count, new_raw_count, delta;
143
144         /*
145          * Careful: an NMI might modify the previous counter value.
146          *
147          * Our tactic to handle this is to first atomically read and
148          * exchange a new raw count - then add that new-prev delta
149          * count to the generic counter atomically:
150          */
151 again:
152         prev_raw_count = atomic64_read(&hwc->prev_count);
153         rdmsrl(hwc->counter_base + idx, new_raw_count);
154
155         if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
156                                         new_raw_count) != prev_raw_count)
157                 goto again;
158
159         /*
160          * Now we have the new raw value and have updated the prev
161          * timestamp already. We can now calculate the elapsed delta
162          * (counter-)time and add that to the generic counter.
163          *
164          * Careful, not all hw sign-extends above the physical width
165          * of the count, so we do that by clipping the delta to 32 bits:
166          */
167         delta = (u64)(u32)((s32)new_raw_count - (s32)prev_raw_count);
168
169         atomic64_add(delta, &counter->count);
170         atomic64_sub(delta, &hwc->period_left);
171 }
172
173 /*
174  * Setup the hardware configuration for a given hw_event_type
175  */
176 static int __hw_perf_counter_init(struct perf_counter *counter)
177 {
178         struct perf_counter_hw_event *hw_event = &counter->hw_event;
179         struct hw_perf_counter *hwc = &counter->hw;
180
181         if (unlikely(!perf_counters_initialized))
182                 return -EINVAL;
183
184         /*
185          * Generate PMC IRQs:
186          * (keep 'enabled' bit clear for now)
187          */
188         hwc->config = ARCH_PERFMON_EVENTSEL_INT;
189
190         /*
191          * Count user and OS events unless requested not to.
192          */
193         if (!hw_event->exclude_user)
194                 hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
195         if (!hw_event->exclude_kernel)
196                 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
197
198         /*
199          * If privileged enough, allow NMI events:
200          */
201         hwc->nmi = 0;
202         if (capable(CAP_SYS_ADMIN) && hw_event->nmi)
203                 hwc->nmi = 1;
204
205         hwc->irq_period         = hw_event->irq_period;
206         /*
207          * Intel PMCs cannot be accessed sanely above 32 bit width,
208          * so we install an artificial 1<<31 period regardless of
209          * the generic counter period:
210          */
211         if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
212                 if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF)
213                         hwc->irq_period = 0x7FFFFFFF;
214
215         atomic64_set(&hwc->period_left, hwc->irq_period);
216
217         /*
218          * Raw event type provide the config in the event structure
219          */
220         if (perf_event_raw(hw_event)) {
221                 hwc->config |= pmc_ops->raw_event(perf_event_config(hw_event));
222         } else {
223                 if (perf_event_id(hw_event) >= pmc_ops->max_events)
224                         return -EINVAL;
225                 /*
226                  * The generic map:
227                  */
228                 hwc->config |= pmc_ops->event_map(perf_event_id(hw_event));
229         }
230
231         return 0;
232 }
233
234 static u64 pmc_intel_save_disable_all(void)
235 {
236         u64 ctrl;
237
238         rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
239         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
240
241         return ctrl;
242 }
243
244 static u64 pmc_amd_save_disable_all(void)
245 {
246         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
247         int enabled, idx;
248
249         enabled = cpuc->enabled;
250         cpuc->enabled = 0;
251         /*
252          * ensure we write the disable before we start disabling the
253          * counters proper, so that pcm_amd_enable() does the right thing.
254          */
255         barrier();
256
257         for (idx = 0; idx < nr_counters_generic; idx++) {
258                 u64 val;
259
260                 rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
261                 if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) {
262                         val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
263                         wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
264                 }
265         }
266
267         return enabled;
268 }
269
270 u64 hw_perf_save_disable(void)
271 {
272         if (unlikely(!perf_counters_initialized))
273                 return 0;
274
275         return pmc_ops->save_disable_all();
276 }
277 /*
278  * Exported because of ACPI idle
279  */
280 EXPORT_SYMBOL_GPL(hw_perf_save_disable);
281
282 static void pmc_intel_restore_all(u64 ctrl)
283 {
284         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
285 }
286
287 static void pmc_amd_restore_all(u64 ctrl)
288 {
289         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
290         int idx;
291
292         cpuc->enabled = ctrl;
293         barrier();
294         if (!ctrl)
295                 return;
296
297         for (idx = 0; idx < nr_counters_generic; idx++) {
298                 if (test_bit(idx, cpuc->active_mask)) {
299                         u64 val;
300
301                         rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
302                         val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
303                         wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
304                 }
305         }
306 }
307
308 void hw_perf_restore(u64 ctrl)
309 {
310         if (unlikely(!perf_counters_initialized))
311                 return;
312
313         pmc_ops->restore_all(ctrl);
314 }
315 /*
316  * Exported because of ACPI idle
317  */
318 EXPORT_SYMBOL_GPL(hw_perf_restore);
319
320 static u64 pmc_intel_get_status(u64 mask)
321 {
322         u64 status;
323
324         rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
325
326         return status;
327 }
328
329 static u64 pmc_amd_get_status(u64 mask)
330 {
331         u64 status = 0;
332         int idx;
333
334         for (idx = 0; idx < nr_counters_generic; idx++) {
335                 s64 val;
336
337                 if (!(mask & (1 << idx)))
338                         continue;
339
340                 rdmsrl(MSR_K7_PERFCTR0 + idx, val);
341                 val <<= (64 - counter_value_bits);
342                 if (val >= 0)
343                         status |= (1 << idx);
344         }
345
346         return status;
347 }
348
349 static u64 hw_perf_get_status(u64 mask)
350 {
351         if (unlikely(!perf_counters_initialized))
352                 return 0;
353
354         return pmc_ops->get_status(mask);
355 }
356
357 static void pmc_intel_ack_status(u64 ack)
358 {
359         wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
360 }
361
362 static void pmc_amd_ack_status(u64 ack)
363 {
364 }
365
366 static void hw_perf_ack_status(u64 ack)
367 {
368         if (unlikely(!perf_counters_initialized))
369                 return;
370
371         pmc_ops->ack_status(ack);
372 }
373
374 static void pmc_intel_enable(int idx, u64 config)
375 {
376         wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx,
377                         config | ARCH_PERFMON_EVENTSEL0_ENABLE);
378 }
379
380 static void pmc_amd_enable(int idx, u64 config)
381 {
382         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
383
384         set_bit(idx, cpuc->active_mask);
385         if (cpuc->enabled)
386                 config |= ARCH_PERFMON_EVENTSEL0_ENABLE;
387
388         wrmsrl(MSR_K7_EVNTSEL0 + idx, config);
389 }
390
391 static void hw_perf_enable(int idx, u64 config)
392 {
393         if (unlikely(!perf_counters_initialized))
394                 return;
395
396         pmc_ops->enable(idx, config);
397 }
398
399 static void pmc_intel_disable(int idx, u64 config)
400 {
401         wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, config);
402 }
403
404 static void pmc_amd_disable(int idx, u64 config)
405 {
406         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
407
408         clear_bit(idx, cpuc->active_mask);
409         wrmsrl(MSR_K7_EVNTSEL0 + idx, config);
410
411 }
412
413 static void hw_perf_disable(int idx, u64 config)
414 {
415         if (unlikely(!perf_counters_initialized))
416                 return;
417
418         pmc_ops->disable(idx, config);
419 }
420
421 static inline void
422 __pmc_fixed_disable(struct perf_counter *counter,
423                     struct hw_perf_counter *hwc, unsigned int __idx)
424 {
425         int idx = __idx - X86_PMC_IDX_FIXED;
426         u64 ctrl_val, mask;
427         int err;
428
429         mask = 0xfULL << (idx * 4);
430
431         rdmsrl(hwc->config_base, ctrl_val);
432         ctrl_val &= ~mask;
433         err = checking_wrmsrl(hwc->config_base, ctrl_val);
434 }
435
436 static inline void
437 __pmc_generic_disable(struct perf_counter *counter,
438                            struct hw_perf_counter *hwc, unsigned int idx)
439 {
440         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
441                 __pmc_fixed_disable(counter, hwc, idx);
442         else
443                 hw_perf_disable(idx, hwc->config);
444 }
445
446 static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
447
448 /*
449  * Set the next IRQ period, based on the hwc->period_left value.
450  * To be called with the counter disabled in hw:
451  */
452 static void
453 __hw_perf_counter_set_period(struct perf_counter *counter,
454                              struct hw_perf_counter *hwc, int idx)
455 {
456         s64 left = atomic64_read(&hwc->period_left);
457         s64 period = hwc->irq_period;
458         int err;
459
460         /*
461          * If we are way outside a reasoable range then just skip forward:
462          */
463         if (unlikely(left <= -period)) {
464                 left = period;
465                 atomic64_set(&hwc->period_left, left);
466         }
467
468         if (unlikely(left <= 0)) {
469                 left += period;
470                 atomic64_set(&hwc->period_left, left);
471         }
472
473         per_cpu(prev_left[idx], smp_processor_id()) = left;
474
475         /*
476          * The hw counter starts counting from this counter offset,
477          * mark it to be able to extra future deltas:
478          */
479         atomic64_set(&hwc->prev_count, (u64)-left);
480
481         err = checking_wrmsrl(hwc->counter_base + idx,
482                              (u64)(-left) & counter_value_mask);
483 }
484
485 static inline void
486 __pmc_fixed_enable(struct perf_counter *counter,
487                    struct hw_perf_counter *hwc, unsigned int __idx)
488 {
489         int idx = __idx - X86_PMC_IDX_FIXED;
490         u64 ctrl_val, bits, mask;
491         int err;
492
493         /*
494          * Enable IRQ generation (0x8),
495          * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
496          * if requested:
497          */
498         bits = 0x8ULL;
499         if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
500                 bits |= 0x2;
501         if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
502                 bits |= 0x1;
503         bits <<= (idx * 4);
504         mask = 0xfULL << (idx * 4);
505
506         rdmsrl(hwc->config_base, ctrl_val);
507         ctrl_val &= ~mask;
508         ctrl_val |= bits;
509         err = checking_wrmsrl(hwc->config_base, ctrl_val);
510 }
511
512 static void
513 __pmc_generic_enable(struct perf_counter *counter,
514                           struct hw_perf_counter *hwc, int idx)
515 {
516         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
517                 __pmc_fixed_enable(counter, hwc, idx);
518         else
519                 hw_perf_enable(idx, hwc->config);
520 }
521
522 static int
523 fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
524 {
525         unsigned int event;
526
527         if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
528                 return -1;
529
530         if (unlikely(hwc->nmi))
531                 return -1;
532
533         event = hwc->config & ARCH_PERFMON_EVENT_MASK;
534
535         if (unlikely(event == pmc_ops->event_map(PERF_COUNT_INSTRUCTIONS)))
536                 return X86_PMC_IDX_FIXED_INSTRUCTIONS;
537         if (unlikely(event == pmc_ops->event_map(PERF_COUNT_CPU_CYCLES)))
538                 return X86_PMC_IDX_FIXED_CPU_CYCLES;
539         if (unlikely(event == pmc_ops->event_map(PERF_COUNT_BUS_CYCLES)))
540                 return X86_PMC_IDX_FIXED_BUS_CYCLES;
541
542         return -1;
543 }
544
545 /*
546  * Find a PMC slot for the freshly enabled / scheduled in counter:
547  */
548 static int pmc_generic_enable(struct perf_counter *counter)
549 {
550         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
551         struct hw_perf_counter *hwc = &counter->hw;
552         int idx;
553
554         idx = fixed_mode_idx(counter, hwc);
555         if (idx >= 0) {
556                 /*
557                  * Try to get the fixed counter, if that is already taken
558                  * then try to get a generic counter:
559                  */
560                 if (test_and_set_bit(idx, cpuc->used))
561                         goto try_generic;
562
563                 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
564                 /*
565                  * We set it so that counter_base + idx in wrmsr/rdmsr maps to
566                  * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
567                  */
568                 hwc->counter_base =
569                         MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
570                 hwc->idx = idx;
571         } else {
572                 idx = hwc->idx;
573                 /* Try to get the previous generic counter again */
574                 if (test_and_set_bit(idx, cpuc->used)) {
575 try_generic:
576                         idx = find_first_zero_bit(cpuc->used, nr_counters_generic);
577                         if (idx == nr_counters_generic)
578                                 return -EAGAIN;
579
580                         set_bit(idx, cpuc->used);
581                         hwc->idx = idx;
582                 }
583                 hwc->config_base  = pmc_ops->eventsel;
584                 hwc->counter_base = pmc_ops->perfctr;
585         }
586
587         perf_counters_lapic_init(hwc->nmi);
588
589         __pmc_generic_disable(counter, hwc, idx);
590
591         cpuc->counters[idx] = counter;
592         /*
593          * Make it visible before enabling the hw:
594          */
595         smp_wmb();
596
597         __hw_perf_counter_set_period(counter, hwc, idx);
598         __pmc_generic_enable(counter, hwc, idx);
599
600         return 0;
601 }
602
603 void perf_counter_print_debug(void)
604 {
605         u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
606         struct cpu_hw_counters *cpuc;
607         int cpu, idx;
608
609         if (!nr_counters_generic)
610                 return;
611
612         local_irq_disable();
613
614         cpu = smp_processor_id();
615         cpuc = &per_cpu(cpu_hw_counters, cpu);
616
617         if (intel_perfmon_version >= 2) {
618                 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
619                 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
620                 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
621                 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
622
623                 pr_info("\n");
624                 pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
625                 pr_info("CPU#%d: status:     %016llx\n", cpu, status);
626                 pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
627                 pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
628         }
629         pr_info("CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used);
630
631         for (idx = 0; idx < nr_counters_generic; idx++) {
632                 rdmsrl(pmc_ops->eventsel + idx, pmc_ctrl);
633                 rdmsrl(pmc_ops->perfctr  + idx, pmc_count);
634
635                 prev_left = per_cpu(prev_left[idx], cpu);
636
637                 pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
638                         cpu, idx, pmc_ctrl);
639                 pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
640                         cpu, idx, pmc_count);
641                 pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
642                         cpu, idx, prev_left);
643         }
644         for (idx = 0; idx < nr_counters_fixed; idx++) {
645                 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
646
647                 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
648                         cpu, idx, pmc_count);
649         }
650         local_irq_enable();
651 }
652
653 static void pmc_generic_disable(struct perf_counter *counter)
654 {
655         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
656         struct hw_perf_counter *hwc = &counter->hw;
657         unsigned int idx = hwc->idx;
658
659         __pmc_generic_disable(counter, hwc, idx);
660
661         clear_bit(idx, cpuc->used);
662         cpuc->counters[idx] = NULL;
663         /*
664          * Make sure the cleared pointer becomes visible before we
665          * (potentially) free the counter:
666          */
667         smp_wmb();
668
669         /*
670          * Drain the remaining delta count out of a counter
671          * that we are disabling:
672          */
673         x86_perf_counter_update(counter, hwc, idx);
674 }
675
676 /*
677  * Save and restart an expired counter. Called by NMI contexts,
678  * so it has to be careful about preempting normal counter ops:
679  */
680 static void perf_save_and_restart(struct perf_counter *counter)
681 {
682         struct hw_perf_counter *hwc = &counter->hw;
683         int idx = hwc->idx;
684
685         x86_perf_counter_update(counter, hwc, idx);
686         __hw_perf_counter_set_period(counter, hwc, idx);
687
688         if (counter->state == PERF_COUNTER_STATE_ACTIVE)
689                 __pmc_generic_enable(counter, hwc, idx);
690 }
691
692 /*
693  * Maximum interrupt frequency of 100KHz per CPU
694  */
695 #define PERFMON_MAX_INTERRUPTS (100000/HZ)
696
697 /*
698  * This handler is triggered by the local APIC, so the APIC IRQ handling
699  * rules apply:
700  */
701 static int __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
702 {
703         int bit, cpu = smp_processor_id();
704         u64 ack, status;
705         struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu);
706         int ret = 0;
707
708         cpuc->throttle_ctrl = hw_perf_save_disable();
709
710         status = hw_perf_get_status(cpuc->throttle_ctrl);
711         if (!status)
712                 goto out;
713
714         ret = 1;
715 again:
716         inc_irq_stat(apic_perf_irqs);
717         ack = status;
718         for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
719                 struct perf_counter *counter = cpuc->counters[bit];
720
721                 clear_bit(bit, (unsigned long *) &status);
722                 if (!counter)
723                         continue;
724
725                 perf_save_and_restart(counter);
726                 perf_counter_output(counter, nmi, regs);
727         }
728
729         hw_perf_ack_status(ack);
730
731         /*
732          * Repeat if there is more work to be done:
733          */
734         status = hw_perf_get_status(cpuc->throttle_ctrl);
735         if (status)
736                 goto again;
737 out:
738         /*
739          * Restore - do not reenable when global enable is off or throttled:
740          */
741         if (++cpuc->interrupts < PERFMON_MAX_INTERRUPTS)
742                 hw_perf_restore(cpuc->throttle_ctrl);
743
744         return ret;
745 }
746
747 void perf_counter_unthrottle(void)
748 {
749         struct cpu_hw_counters *cpuc;
750
751         if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
752                 return;
753
754         if (unlikely(!perf_counters_initialized))
755                 return;
756
757         cpuc = &__get_cpu_var(cpu_hw_counters);
758         if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) {
759                 if (printk_ratelimit())
760                         printk(KERN_WARNING "PERFMON: max interrupts exceeded!\n");
761                 hw_perf_restore(cpuc->throttle_ctrl);
762         }
763         cpuc->interrupts = 0;
764 }
765
766 void smp_perf_counter_interrupt(struct pt_regs *regs)
767 {
768         irq_enter();
769         apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
770         ack_APIC_irq();
771         __smp_perf_counter_interrupt(regs, 0);
772         irq_exit();
773 }
774
775 void perf_counters_lapic_init(int nmi)
776 {
777         u32 apic_val;
778
779         if (!perf_counters_initialized)
780                 return;
781         /*
782          * Enable the performance counter vector in the APIC LVT:
783          */
784         apic_val = apic_read(APIC_LVTERR);
785
786         apic_write(APIC_LVTERR, apic_val | APIC_LVT_MASKED);
787         if (nmi)
788                 apic_write(APIC_LVTPC, APIC_DM_NMI);
789         else
790                 apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
791         apic_write(APIC_LVTERR, apic_val);
792 }
793
794 static int __kprobes
795 perf_counter_nmi_handler(struct notifier_block *self,
796                          unsigned long cmd, void *__args)
797 {
798         struct die_args *args = __args;
799         struct pt_regs *regs;
800         int ret;
801
802         switch (cmd) {
803         case DIE_NMI:
804         case DIE_NMI_IPI:
805                 break;
806
807         default:
808                 return NOTIFY_DONE;
809         }
810
811         regs = args->regs;
812
813         apic_write(APIC_LVTPC, APIC_DM_NMI);
814         ret = __smp_perf_counter_interrupt(regs, 1);
815
816         return ret ? NOTIFY_STOP : NOTIFY_OK;
817 }
818
819 static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
820         .notifier_call          = perf_counter_nmi_handler,
821         .next                   = NULL,
822         .priority               = 1
823 };
824
825 static struct pmc_x86_ops pmc_intel_ops = {
826         .save_disable_all       = pmc_intel_save_disable_all,
827         .restore_all            = pmc_intel_restore_all,
828         .get_status             = pmc_intel_get_status,
829         .ack_status             = pmc_intel_ack_status,
830         .enable                 = pmc_intel_enable,
831         .disable                = pmc_intel_disable,
832         .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
833         .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
834         .event_map              = pmc_intel_event_map,
835         .raw_event              = pmc_intel_raw_event,
836         .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
837 };
838
839 static struct pmc_x86_ops pmc_amd_ops = {
840         .save_disable_all       = pmc_amd_save_disable_all,
841         .restore_all            = pmc_amd_restore_all,
842         .get_status             = pmc_amd_get_status,
843         .ack_status             = pmc_amd_ack_status,
844         .enable                 = pmc_amd_enable,
845         .disable                = pmc_amd_disable,
846         .eventsel               = MSR_K7_EVNTSEL0,
847         .perfctr                = MSR_K7_PERFCTR0,
848         .event_map              = pmc_amd_event_map,
849         .raw_event              = pmc_amd_raw_event,
850         .max_events             = ARRAY_SIZE(amd_perfmon_event_map),
851 };
852
853 static struct pmc_x86_ops *pmc_intel_init(void)
854 {
855         union cpuid10_edx edx;
856         union cpuid10_eax eax;
857         unsigned int unused;
858         unsigned int ebx;
859
860         /*
861          * Check whether the Architectural PerfMon supports
862          * Branch Misses Retired Event or not.
863          */
864         cpuid(10, &eax.full, &ebx, &unused, &edx.full);
865         if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
866                 return NULL;
867
868         intel_perfmon_version = eax.split.version_id;
869         if (intel_perfmon_version < 2)
870                 return NULL;
871
872         pr_info("Intel Performance Monitoring support detected.\n");
873         pr_info("... version:         %d\n", intel_perfmon_version);
874         pr_info("... bit width:       %d\n", eax.split.bit_width);
875         pr_info("... mask length:     %d\n", eax.split.mask_length);
876
877         nr_counters_generic = eax.split.num_counters;
878         nr_counters_fixed = edx.split.num_counters_fixed;
879         counter_value_mask = (1ULL << eax.split.bit_width) - 1;
880
881         return &pmc_intel_ops;
882 }
883
884 static struct pmc_x86_ops *pmc_amd_init(void)
885 {
886         nr_counters_generic = 4;
887         nr_counters_fixed = 0;
888         counter_value_mask = 0x0000FFFFFFFFFFFFULL;
889         counter_value_bits = 48;
890
891         pr_info("AMD Performance Monitoring support detected.\n");
892
893         return &pmc_amd_ops;
894 }
895
896 void __init init_hw_perf_counters(void)
897 {
898         if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
899                 return;
900
901         switch (boot_cpu_data.x86_vendor) {
902         case X86_VENDOR_INTEL:
903                 pmc_ops = pmc_intel_init();
904                 break;
905         case X86_VENDOR_AMD:
906                 pmc_ops = pmc_amd_init();
907                 break;
908         }
909         if (!pmc_ops)
910                 return;
911
912         pr_info("... num counters:    %d\n", nr_counters_generic);
913         if (nr_counters_generic > X86_PMC_MAX_GENERIC) {
914                 nr_counters_generic = X86_PMC_MAX_GENERIC;
915                 WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
916                         nr_counters_generic, X86_PMC_MAX_GENERIC);
917         }
918         perf_counter_mask = (1 << nr_counters_generic) - 1;
919         perf_max_counters = nr_counters_generic;
920
921         pr_info("... value mask:      %016Lx\n", counter_value_mask);
922
923         if (nr_counters_fixed > X86_PMC_MAX_FIXED) {
924                 nr_counters_fixed = X86_PMC_MAX_FIXED;
925                 WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
926                         nr_counters_fixed, X86_PMC_MAX_FIXED);
927         }
928         pr_info("... fixed counters:  %d\n", nr_counters_fixed);
929
930         perf_counter_mask |= ((1LL << nr_counters_fixed)-1) << X86_PMC_IDX_FIXED;
931
932         pr_info("... counter mask:    %016Lx\n", perf_counter_mask);
933         perf_counters_initialized = true;
934
935         perf_counters_lapic_init(0);
936         register_die_notifier(&perf_counter_nmi_notifier);
937 }
938
939 static void pmc_generic_read(struct perf_counter *counter)
940 {
941         x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
942 }
943
944 static const struct hw_perf_counter_ops x86_perf_counter_ops = {
945         .enable         = pmc_generic_enable,
946         .disable        = pmc_generic_disable,
947         .read           = pmc_generic_read,
948 };
949
950 const struct hw_perf_counter_ops *
951 hw_perf_counter_init(struct perf_counter *counter)
952 {
953         int err;
954
955         err = __hw_perf_counter_init(counter);
956         if (err)
957                 return NULL;
958
959         return &x86_perf_counter_ops;
960 }