cpufreq: intel_pstate: Replace timers with utilization update callbacks
[cascardo/linux.git] / drivers / cpufreq / intel_pstate.c
1 /*
2  * intel_pstate.c: Native P state management for Intel processors
3  *
4  * (C) Copyright 2012 Intel Corporation
5  * Author: Dirk Brandewie <dirk.j.brandewie@intel.com>
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; version 2
10  * of the License.
11  */
12
13 #include <linux/kernel.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/module.h>
16 #include <linux/ktime.h>
17 #include <linux/hrtimer.h>
18 #include <linux/tick.h>
19 #include <linux/slab.h>
20 #include <linux/sched.h>
21 #include <linux/list.h>
22 #include <linux/cpu.h>
23 #include <linux/cpufreq.h>
24 #include <linux/sysfs.h>
25 #include <linux/types.h>
26 #include <linux/fs.h>
27 #include <linux/debugfs.h>
28 #include <linux/acpi.h>
29 #include <linux/vmalloc.h>
30 #include <trace/events/power.h>
31
32 #include <asm/div64.h>
33 #include <asm/msr.h>
34 #include <asm/cpu_device_id.h>
35 #include <asm/cpufeature.h>
36
37 #define ATOM_RATIOS             0x66a
38 #define ATOM_VIDS               0x66b
39 #define ATOM_TURBO_RATIOS       0x66c
40 #define ATOM_TURBO_VIDS         0x66d
41
42 #define FRAC_BITS 8
43 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
44 #define fp_toint(X) ((X) >> FRAC_BITS)
45
46 static inline int32_t mul_fp(int32_t x, int32_t y)
47 {
48         return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
49 }
50
51 static inline int32_t div_fp(s64 x, s64 y)
52 {
53         return div64_s64((int64_t)x << FRAC_BITS, y);
54 }
55
56 static inline int ceiling_fp(int32_t x)
57 {
58         int mask, ret;
59
60         ret = fp_toint(x);
61         mask = (1 << FRAC_BITS) - 1;
62         if (x & mask)
63                 ret += 1;
64         return ret;
65 }
66
67 struct sample {
68         int32_t core_pct_busy;
69         int32_t busy_scaled;
70         u64 aperf;
71         u64 mperf;
72         u64 tsc;
73         int freq;
74         u64 time;
75 };
76
77 struct pstate_data {
78         int     current_pstate;
79         int     min_pstate;
80         int     max_pstate;
81         int     max_pstate_physical;
82         int     scaling;
83         int     turbo_pstate;
84 };
85
86 struct vid_data {
87         int min;
88         int max;
89         int turbo;
90         int32_t ratio;
91 };
92
93 struct _pid {
94         int setpoint;
95         int32_t integral;
96         int32_t p_gain;
97         int32_t i_gain;
98         int32_t d_gain;
99         int deadband;
100         int32_t last_err;
101 };
102
103 struct cpudata {
104         int cpu;
105
106         struct update_util_data update_util;
107
108         struct pstate_data pstate;
109         struct vid_data vid;
110         struct _pid pid;
111
112         u64     last_sample_time;
113         u64     prev_aperf;
114         u64     prev_mperf;
115         u64     prev_tsc;
116         u64     prev_cummulative_iowait;
117         struct sample sample;
118 };
119
120 static struct cpudata **all_cpu_data;
121 struct pstate_adjust_policy {
122         int sample_rate_ms;
123         s64 sample_rate_ns;
124         int deadband;
125         int setpoint;
126         int p_gain_pct;
127         int d_gain_pct;
128         int i_gain_pct;
129 };
130
131 struct pstate_funcs {
132         int (*get_max)(void);
133         int (*get_max_physical)(void);
134         int (*get_min)(void);
135         int (*get_turbo)(void);
136         int (*get_scaling)(void);
137         void (*set)(struct cpudata*, int pstate);
138         void (*get_vid)(struct cpudata *);
139         int32_t (*get_target_pstate)(struct cpudata *);
140 };
141
142 struct cpu_defaults {
143         struct pstate_adjust_policy pid_policy;
144         struct pstate_funcs funcs;
145 };
146
147 static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu);
148 static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu);
149
150 static struct pstate_adjust_policy pid_params;
151 static struct pstate_funcs pstate_funcs;
152 static int hwp_active;
153
154 struct perf_limits {
155         int no_turbo;
156         int turbo_disabled;
157         int max_perf_pct;
158         int min_perf_pct;
159         int32_t max_perf;
160         int32_t min_perf;
161         int max_policy_pct;
162         int max_sysfs_pct;
163         int min_policy_pct;
164         int min_sysfs_pct;
165 };
166
167 static struct perf_limits performance_limits = {
168         .no_turbo = 0,
169         .turbo_disabled = 0,
170         .max_perf_pct = 100,
171         .max_perf = int_tofp(1),
172         .min_perf_pct = 100,
173         .min_perf = int_tofp(1),
174         .max_policy_pct = 100,
175         .max_sysfs_pct = 100,
176         .min_policy_pct = 0,
177         .min_sysfs_pct = 0,
178 };
179
180 static struct perf_limits powersave_limits = {
181         .no_turbo = 0,
182         .turbo_disabled = 0,
183         .max_perf_pct = 100,
184         .max_perf = int_tofp(1),
185         .min_perf_pct = 0,
186         .min_perf = 0,
187         .max_policy_pct = 100,
188         .max_sysfs_pct = 100,
189         .min_policy_pct = 0,
190         .min_sysfs_pct = 0,
191 };
192
193 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE
194 static struct perf_limits *limits = &performance_limits;
195 #else
196 static struct perf_limits *limits = &powersave_limits;
197 #endif
198
199 static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
200                              int deadband, int integral) {
201         pid->setpoint = setpoint;
202         pid->deadband  = deadband;
203         pid->integral  = int_tofp(integral);
204         pid->last_err  = int_tofp(setpoint) - int_tofp(busy);
205 }
206
207 static inline void pid_p_gain_set(struct _pid *pid, int percent)
208 {
209         pid->p_gain = div_fp(int_tofp(percent), int_tofp(100));
210 }
211
212 static inline void pid_i_gain_set(struct _pid *pid, int percent)
213 {
214         pid->i_gain = div_fp(int_tofp(percent), int_tofp(100));
215 }
216
217 static inline void pid_d_gain_set(struct _pid *pid, int percent)
218 {
219         pid->d_gain = div_fp(int_tofp(percent), int_tofp(100));
220 }
221
222 static signed int pid_calc(struct _pid *pid, int32_t busy)
223 {
224         signed int result;
225         int32_t pterm, dterm, fp_error;
226         int32_t integral_limit;
227
228         fp_error = int_tofp(pid->setpoint) - busy;
229
230         if (abs(fp_error) <= int_tofp(pid->deadband))
231                 return 0;
232
233         pterm = mul_fp(pid->p_gain, fp_error);
234
235         pid->integral += fp_error;
236
237         /*
238          * We limit the integral here so that it will never
239          * get higher than 30.  This prevents it from becoming
240          * too large an input over long periods of time and allows
241          * it to get factored out sooner.
242          *
243          * The value of 30 was chosen through experimentation.
244          */
245         integral_limit = int_tofp(30);
246         if (pid->integral > integral_limit)
247                 pid->integral = integral_limit;
248         if (pid->integral < -integral_limit)
249                 pid->integral = -integral_limit;
250
251         dterm = mul_fp(pid->d_gain, fp_error - pid->last_err);
252         pid->last_err = fp_error;
253
254         result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm;
255         result = result + (1 << (FRAC_BITS-1));
256         return (signed int)fp_toint(result);
257 }
258
259 static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu)
260 {
261         pid_p_gain_set(&cpu->pid, pid_params.p_gain_pct);
262         pid_d_gain_set(&cpu->pid, pid_params.d_gain_pct);
263         pid_i_gain_set(&cpu->pid, pid_params.i_gain_pct);
264
265         pid_reset(&cpu->pid, pid_params.setpoint, 100, pid_params.deadband, 0);
266 }
267
268 static inline void intel_pstate_reset_all_pid(void)
269 {
270         unsigned int cpu;
271
272         for_each_online_cpu(cpu) {
273                 if (all_cpu_data[cpu])
274                         intel_pstate_busy_pid_reset(all_cpu_data[cpu]);
275         }
276 }
277
278 static inline void update_turbo_state(void)
279 {
280         u64 misc_en;
281         struct cpudata *cpu;
282
283         cpu = all_cpu_data[0];
284         rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
285         limits->turbo_disabled =
286                 (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
287                  cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
288 }
289
290 static void intel_pstate_hwp_set(void)
291 {
292         int min, hw_min, max, hw_max, cpu, range, adj_range;
293         u64 value, cap;
294
295         rdmsrl(MSR_HWP_CAPABILITIES, cap);
296         hw_min = HWP_LOWEST_PERF(cap);
297         hw_max = HWP_HIGHEST_PERF(cap);
298         range = hw_max - hw_min;
299
300         get_online_cpus();
301
302         for_each_online_cpu(cpu) {
303                 rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
304                 adj_range = limits->min_perf_pct * range / 100;
305                 min = hw_min + adj_range;
306                 value &= ~HWP_MIN_PERF(~0L);
307                 value |= HWP_MIN_PERF(min);
308
309                 adj_range = limits->max_perf_pct * range / 100;
310                 max = hw_min + adj_range;
311                 if (limits->no_turbo) {
312                         hw_max = HWP_GUARANTEED_PERF(cap);
313                         if (hw_max < max)
314                                 max = hw_max;
315                 }
316
317                 value &= ~HWP_MAX_PERF(~0L);
318                 value |= HWP_MAX_PERF(max);
319                 wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
320         }
321
322         put_online_cpus();
323 }
324
325 /************************** debugfs begin ************************/
326 static int pid_param_set(void *data, u64 val)
327 {
328         *(u32 *)data = val;
329         intel_pstate_reset_all_pid();
330         return 0;
331 }
332
333 static int pid_param_get(void *data, u64 *val)
334 {
335         *val = *(u32 *)data;
336         return 0;
337 }
338 DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get, pid_param_set, "%llu\n");
339
340 struct pid_param {
341         char *name;
342         void *value;
343 };
344
345 static struct pid_param pid_files[] = {
346         {"sample_rate_ms", &pid_params.sample_rate_ms},
347         {"d_gain_pct", &pid_params.d_gain_pct},
348         {"i_gain_pct", &pid_params.i_gain_pct},
349         {"deadband", &pid_params.deadband},
350         {"setpoint", &pid_params.setpoint},
351         {"p_gain_pct", &pid_params.p_gain_pct},
352         {NULL, NULL}
353 };
354
355 static void __init intel_pstate_debug_expose_params(void)
356 {
357         struct dentry *debugfs_parent;
358         int i = 0;
359
360         if (hwp_active)
361                 return;
362         debugfs_parent = debugfs_create_dir("pstate_snb", NULL);
363         if (IS_ERR_OR_NULL(debugfs_parent))
364                 return;
365         while (pid_files[i].name) {
366                 debugfs_create_file(pid_files[i].name, 0660,
367                                     debugfs_parent, pid_files[i].value,
368                                     &fops_pid_param);
369                 i++;
370         }
371 }
372
373 /************************** debugfs end ************************/
374
375 /************************** sysfs begin ************************/
376 #define show_one(file_name, object)                                     \
377         static ssize_t show_##file_name                                 \
378         (struct kobject *kobj, struct attribute *attr, char *buf)       \
379         {                                                               \
380                 return sprintf(buf, "%u\n", limits->object);            \
381         }
382
383 static ssize_t show_turbo_pct(struct kobject *kobj,
384                                 struct attribute *attr, char *buf)
385 {
386         struct cpudata *cpu;
387         int total, no_turbo, turbo_pct;
388         uint32_t turbo_fp;
389
390         cpu = all_cpu_data[0];
391
392         total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
393         no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1;
394         turbo_fp = div_fp(int_tofp(no_turbo), int_tofp(total));
395         turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100)));
396         return sprintf(buf, "%u\n", turbo_pct);
397 }
398
399 static ssize_t show_num_pstates(struct kobject *kobj,
400                                 struct attribute *attr, char *buf)
401 {
402         struct cpudata *cpu;
403         int total;
404
405         cpu = all_cpu_data[0];
406         total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
407         return sprintf(buf, "%u\n", total);
408 }
409
410 static ssize_t show_no_turbo(struct kobject *kobj,
411                              struct attribute *attr, char *buf)
412 {
413         ssize_t ret;
414
415         update_turbo_state();
416         if (limits->turbo_disabled)
417                 ret = sprintf(buf, "%u\n", limits->turbo_disabled);
418         else
419                 ret = sprintf(buf, "%u\n", limits->no_turbo);
420
421         return ret;
422 }
423
424 static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
425                               const char *buf, size_t count)
426 {
427         unsigned int input;
428         int ret;
429
430         ret = sscanf(buf, "%u", &input);
431         if (ret != 1)
432                 return -EINVAL;
433
434         update_turbo_state();
435         if (limits->turbo_disabled) {
436                 pr_warn("intel_pstate: Turbo disabled by BIOS or unavailable on processor\n");
437                 return -EPERM;
438         }
439
440         limits->no_turbo = clamp_t(int, input, 0, 1);
441
442         if (hwp_active)
443                 intel_pstate_hwp_set();
444
445         return count;
446 }
447
448 static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
449                                   const char *buf, size_t count)
450 {
451         unsigned int input;
452         int ret;
453
454         ret = sscanf(buf, "%u", &input);
455         if (ret != 1)
456                 return -EINVAL;
457
458         limits->max_sysfs_pct = clamp_t(int, input, 0 , 100);
459         limits->max_perf_pct = min(limits->max_policy_pct,
460                                    limits->max_sysfs_pct);
461         limits->max_perf_pct = max(limits->min_policy_pct,
462                                    limits->max_perf_pct);
463         limits->max_perf_pct = max(limits->min_perf_pct,
464                                    limits->max_perf_pct);
465         limits->max_perf = div_fp(int_tofp(limits->max_perf_pct),
466                                   int_tofp(100));
467
468         if (hwp_active)
469                 intel_pstate_hwp_set();
470         return count;
471 }
472
473 static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
474                                   const char *buf, size_t count)
475 {
476         unsigned int input;
477         int ret;
478
479         ret = sscanf(buf, "%u", &input);
480         if (ret != 1)
481                 return -EINVAL;
482
483         limits->min_sysfs_pct = clamp_t(int, input, 0 , 100);
484         limits->min_perf_pct = max(limits->min_policy_pct,
485                                    limits->min_sysfs_pct);
486         limits->min_perf_pct = min(limits->max_policy_pct,
487                                    limits->min_perf_pct);
488         limits->min_perf_pct = min(limits->max_perf_pct,
489                                    limits->min_perf_pct);
490         limits->min_perf = div_fp(int_tofp(limits->min_perf_pct),
491                                   int_tofp(100));
492
493         if (hwp_active)
494                 intel_pstate_hwp_set();
495         return count;
496 }
497
498 show_one(max_perf_pct, max_perf_pct);
499 show_one(min_perf_pct, min_perf_pct);
500
501 define_one_global_rw(no_turbo);
502 define_one_global_rw(max_perf_pct);
503 define_one_global_rw(min_perf_pct);
504 define_one_global_ro(turbo_pct);
505 define_one_global_ro(num_pstates);
506
507 static struct attribute *intel_pstate_attributes[] = {
508         &no_turbo.attr,
509         &max_perf_pct.attr,
510         &min_perf_pct.attr,
511         &turbo_pct.attr,
512         &num_pstates.attr,
513         NULL
514 };
515
516 static struct attribute_group intel_pstate_attr_group = {
517         .attrs = intel_pstate_attributes,
518 };
519
520 static void __init intel_pstate_sysfs_expose_params(void)
521 {
522         struct kobject *intel_pstate_kobject;
523         int rc;
524
525         intel_pstate_kobject = kobject_create_and_add("intel_pstate",
526                                                 &cpu_subsys.dev_root->kobj);
527         BUG_ON(!intel_pstate_kobject);
528         rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group);
529         BUG_ON(rc);
530 }
531 /************************** sysfs end ************************/
532
533 static void intel_pstate_hwp_enable(struct cpudata *cpudata)
534 {
535         wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
536 }
537
538 static int atom_get_min_pstate(void)
539 {
540         u64 value;
541
542         rdmsrl(ATOM_RATIOS, value);
543         return (value >> 8) & 0x7F;
544 }
545
546 static int atom_get_max_pstate(void)
547 {
548         u64 value;
549
550         rdmsrl(ATOM_RATIOS, value);
551         return (value >> 16) & 0x7F;
552 }
553
554 static int atom_get_turbo_pstate(void)
555 {
556         u64 value;
557
558         rdmsrl(ATOM_TURBO_RATIOS, value);
559         return value & 0x7F;
560 }
561
562 static void atom_set_pstate(struct cpudata *cpudata, int pstate)
563 {
564         u64 val;
565         int32_t vid_fp;
566         u32 vid;
567
568         val = (u64)pstate << 8;
569         if (limits->no_turbo && !limits->turbo_disabled)
570                 val |= (u64)1 << 32;
571
572         vid_fp = cpudata->vid.min + mul_fp(
573                 int_tofp(pstate - cpudata->pstate.min_pstate),
574                 cpudata->vid.ratio);
575
576         vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max);
577         vid = ceiling_fp(vid_fp);
578
579         if (pstate > cpudata->pstate.max_pstate)
580                 vid = cpudata->vid.turbo;
581
582         val |= vid;
583
584         wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val);
585 }
586
587 static int silvermont_get_scaling(void)
588 {
589         u64 value;
590         int i;
591         /* Defined in Table 35-6 from SDM (Sept 2015) */
592         static int silvermont_freq_table[] = {
593                 83300, 100000, 133300, 116700, 80000};
594
595         rdmsrl(MSR_FSB_FREQ, value);
596         i = value & 0x7;
597         WARN_ON(i > 4);
598
599         return silvermont_freq_table[i];
600 }
601
602 static int airmont_get_scaling(void)
603 {
604         u64 value;
605         int i;
606         /* Defined in Table 35-10 from SDM (Sept 2015) */
607         static int airmont_freq_table[] = {
608                 83300, 100000, 133300, 116700, 80000,
609                 93300, 90000, 88900, 87500};
610
611         rdmsrl(MSR_FSB_FREQ, value);
612         i = value & 0xF;
613         WARN_ON(i > 8);
614
615         return airmont_freq_table[i];
616 }
617
618 static void atom_get_vid(struct cpudata *cpudata)
619 {
620         u64 value;
621
622         rdmsrl(ATOM_VIDS, value);
623         cpudata->vid.min = int_tofp((value >> 8) & 0x7f);
624         cpudata->vid.max = int_tofp((value >> 16) & 0x7f);
625         cpudata->vid.ratio = div_fp(
626                 cpudata->vid.max - cpudata->vid.min,
627                 int_tofp(cpudata->pstate.max_pstate -
628                         cpudata->pstate.min_pstate));
629
630         rdmsrl(ATOM_TURBO_VIDS, value);
631         cpudata->vid.turbo = value & 0x7f;
632 }
633
634 static int core_get_min_pstate(void)
635 {
636         u64 value;
637
638         rdmsrl(MSR_PLATFORM_INFO, value);
639         return (value >> 40) & 0xFF;
640 }
641
642 static int core_get_max_pstate_physical(void)
643 {
644         u64 value;
645
646         rdmsrl(MSR_PLATFORM_INFO, value);
647         return (value >> 8) & 0xFF;
648 }
649
650 static int core_get_max_pstate(void)
651 {
652         u64 tar;
653         u64 plat_info;
654         int max_pstate;
655         int err;
656
657         rdmsrl(MSR_PLATFORM_INFO, plat_info);
658         max_pstate = (plat_info >> 8) & 0xFF;
659
660         err = rdmsrl_safe(MSR_TURBO_ACTIVATION_RATIO, &tar);
661         if (!err) {
662                 /* Do some sanity checking for safety */
663                 if (plat_info & 0x600000000) {
664                         u64 tdp_ctrl;
665                         u64 tdp_ratio;
666                         int tdp_msr;
667
668                         err = rdmsrl_safe(MSR_CONFIG_TDP_CONTROL, &tdp_ctrl);
669                         if (err)
670                                 goto skip_tar;
671
672                         tdp_msr = MSR_CONFIG_TDP_NOMINAL + tdp_ctrl;
673                         err = rdmsrl_safe(tdp_msr, &tdp_ratio);
674                         if (err)
675                                 goto skip_tar;
676
677                         if (tdp_ratio - 1 == tar) {
678                                 max_pstate = tar;
679                                 pr_debug("max_pstate=TAC %x\n", max_pstate);
680                         } else {
681                                 goto skip_tar;
682                         }
683                 }
684         }
685
686 skip_tar:
687         return max_pstate;
688 }
689
690 static int core_get_turbo_pstate(void)
691 {
692         u64 value;
693         int nont, ret;
694
695         rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value);
696         nont = core_get_max_pstate();
697         ret = (value) & 255;
698         if (ret <= nont)
699                 ret = nont;
700         return ret;
701 }
702
703 static inline int core_get_scaling(void)
704 {
705         return 100000;
706 }
707
708 static void core_set_pstate(struct cpudata *cpudata, int pstate)
709 {
710         u64 val;
711
712         val = (u64)pstate << 8;
713         if (limits->no_turbo && !limits->turbo_disabled)
714                 val |= (u64)1 << 32;
715
716         wrmsrl(MSR_IA32_PERF_CTL, val);
717 }
718
719 static int knl_get_turbo_pstate(void)
720 {
721         u64 value;
722         int nont, ret;
723
724         rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value);
725         nont = core_get_max_pstate();
726         ret = (((value) >> 8) & 0xFF);
727         if (ret <= nont)
728                 ret = nont;
729         return ret;
730 }
731
732 static struct cpu_defaults core_params = {
733         .pid_policy = {
734                 .sample_rate_ms = 10,
735                 .deadband = 0,
736                 .setpoint = 97,
737                 .p_gain_pct = 20,
738                 .d_gain_pct = 0,
739                 .i_gain_pct = 0,
740         },
741         .funcs = {
742                 .get_max = core_get_max_pstate,
743                 .get_max_physical = core_get_max_pstate_physical,
744                 .get_min = core_get_min_pstate,
745                 .get_turbo = core_get_turbo_pstate,
746                 .get_scaling = core_get_scaling,
747                 .set = core_set_pstate,
748                 .get_target_pstate = get_target_pstate_use_performance,
749         },
750 };
751
752 static struct cpu_defaults silvermont_params = {
753         .pid_policy = {
754                 .sample_rate_ms = 10,
755                 .deadband = 0,
756                 .setpoint = 60,
757                 .p_gain_pct = 14,
758                 .d_gain_pct = 0,
759                 .i_gain_pct = 4,
760         },
761         .funcs = {
762                 .get_max = atom_get_max_pstate,
763                 .get_max_physical = atom_get_max_pstate,
764                 .get_min = atom_get_min_pstate,
765                 .get_turbo = atom_get_turbo_pstate,
766                 .set = atom_set_pstate,
767                 .get_scaling = silvermont_get_scaling,
768                 .get_vid = atom_get_vid,
769                 .get_target_pstate = get_target_pstate_use_cpu_load,
770         },
771 };
772
773 static struct cpu_defaults airmont_params = {
774         .pid_policy = {
775                 .sample_rate_ms = 10,
776                 .deadband = 0,
777                 .setpoint = 60,
778                 .p_gain_pct = 14,
779                 .d_gain_pct = 0,
780                 .i_gain_pct = 4,
781         },
782         .funcs = {
783                 .get_max = atom_get_max_pstate,
784                 .get_max_physical = atom_get_max_pstate,
785                 .get_min = atom_get_min_pstate,
786                 .get_turbo = atom_get_turbo_pstate,
787                 .set = atom_set_pstate,
788                 .get_scaling = airmont_get_scaling,
789                 .get_vid = atom_get_vid,
790                 .get_target_pstate = get_target_pstate_use_cpu_load,
791         },
792 };
793
794 static struct cpu_defaults knl_params = {
795         .pid_policy = {
796                 .sample_rate_ms = 10,
797                 .deadband = 0,
798                 .setpoint = 97,
799                 .p_gain_pct = 20,
800                 .d_gain_pct = 0,
801                 .i_gain_pct = 0,
802         },
803         .funcs = {
804                 .get_max = core_get_max_pstate,
805                 .get_max_physical = core_get_max_pstate_physical,
806                 .get_min = core_get_min_pstate,
807                 .get_turbo = knl_get_turbo_pstate,
808                 .get_scaling = core_get_scaling,
809                 .set = core_set_pstate,
810                 .get_target_pstate = get_target_pstate_use_performance,
811         },
812 };
813
814 static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
815 {
816         int max_perf = cpu->pstate.turbo_pstate;
817         int max_perf_adj;
818         int min_perf;
819
820         if (limits->no_turbo || limits->turbo_disabled)
821                 max_perf = cpu->pstate.max_pstate;
822
823         /*
824          * performance can be limited by user through sysfs, by cpufreq
825          * policy, or by cpu specific default values determined through
826          * experimentation.
827          */
828         max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits->max_perf));
829         *max = clamp_t(int, max_perf_adj,
830                         cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
831
832         min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits->min_perf));
833         *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
834 }
835
836 static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate, bool force)
837 {
838         int max_perf, min_perf;
839
840         if (force) {
841                 update_turbo_state();
842
843                 intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
844
845                 pstate = clamp_t(int, pstate, min_perf, max_perf);
846
847                 if (pstate == cpu->pstate.current_pstate)
848                         return;
849         }
850         trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
851
852         cpu->pstate.current_pstate = pstate;
853
854         pstate_funcs.set(cpu, pstate);
855 }
856
857 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
858 {
859         cpu->pstate.min_pstate = pstate_funcs.get_min();
860         cpu->pstate.max_pstate = pstate_funcs.get_max();
861         cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical();
862         cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
863         cpu->pstate.scaling = pstate_funcs.get_scaling();
864
865         if (pstate_funcs.get_vid)
866                 pstate_funcs.get_vid(cpu);
867         intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false);
868 }
869
870 static inline void intel_pstate_calc_busy(struct cpudata *cpu)
871 {
872         struct sample *sample = &cpu->sample;
873         int64_t core_pct;
874
875         core_pct = int_tofp(sample->aperf) * int_tofp(100);
876         core_pct = div64_u64(core_pct, int_tofp(sample->mperf));
877
878         sample->freq = fp_toint(
879                 mul_fp(int_tofp(
880                         cpu->pstate.max_pstate_physical *
881                         cpu->pstate.scaling / 100),
882                         core_pct));
883
884         sample->core_pct_busy = (int32_t)core_pct;
885 }
886
887 static inline void intel_pstate_sample(struct cpudata *cpu, u64 time)
888 {
889         u64 aperf, mperf;
890         unsigned long flags;
891         u64 tsc;
892
893         local_irq_save(flags);
894         rdmsrl(MSR_IA32_APERF, aperf);
895         rdmsrl(MSR_IA32_MPERF, mperf);
896         tsc = rdtsc();
897         if ((cpu->prev_mperf == mperf) || (cpu->prev_tsc == tsc)) {
898                 local_irq_restore(flags);
899                 return;
900         }
901         local_irq_restore(flags);
902
903         cpu->last_sample_time = cpu->sample.time;
904         cpu->sample.time = time;
905         cpu->sample.aperf = aperf;
906         cpu->sample.mperf = mperf;
907         cpu->sample.tsc =  tsc;
908         cpu->sample.aperf -= cpu->prev_aperf;
909         cpu->sample.mperf -= cpu->prev_mperf;
910         cpu->sample.tsc -= cpu->prev_tsc;
911
912         intel_pstate_calc_busy(cpu);
913
914         cpu->prev_aperf = aperf;
915         cpu->prev_mperf = mperf;
916         cpu->prev_tsc = tsc;
917 }
918
919 static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
920 {
921         struct sample *sample = &cpu->sample;
922         u64 cummulative_iowait, delta_iowait_us;
923         u64 delta_iowait_mperf;
924         u64 mperf, now;
925         int32_t cpu_load;
926
927         cummulative_iowait = get_cpu_iowait_time_us(cpu->cpu, &now);
928
929         /*
930          * Convert iowait time into number of IO cycles spent at max_freq.
931          * IO is considered as busy only for the cpu_load algorithm. For
932          * performance this is not needed since we always try to reach the
933          * maximum P-State, so we are already boosting the IOs.
934          */
935         delta_iowait_us = cummulative_iowait - cpu->prev_cummulative_iowait;
936         delta_iowait_mperf = div64_u64(delta_iowait_us * cpu->pstate.scaling *
937                 cpu->pstate.max_pstate, MSEC_PER_SEC);
938
939         mperf = cpu->sample.mperf + delta_iowait_mperf;
940         cpu->prev_cummulative_iowait = cummulative_iowait;
941
942
943         /*
944          * The load can be estimated as the ratio of the mperf counter
945          * running at a constant frequency during active periods
946          * (C0) and the time stamp counter running at the same frequency
947          * also during C-states.
948          */
949         cpu_load = div64_u64(int_tofp(100) * mperf, sample->tsc);
950         cpu->sample.busy_scaled = cpu_load;
951
952         return cpu->pstate.current_pstate - pid_calc(&cpu->pid, cpu_load);
953 }
954
955 static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
956 {
957         int32_t core_busy, max_pstate, current_pstate, sample_ratio;
958         u64 duration_ns;
959
960         /*
961          * core_busy is the ratio of actual performance to max
962          * max_pstate is the max non turbo pstate available
963          * current_pstate was the pstate that was requested during
964          *      the last sample period.
965          *
966          * We normalize core_busy, which was our actual percent
967          * performance to what we requested during the last sample
968          * period. The result will be a percentage of busy at a
969          * specified pstate.
970          */
971         core_busy = cpu->sample.core_pct_busy;
972         max_pstate = int_tofp(cpu->pstate.max_pstate_physical);
973         current_pstate = int_tofp(cpu->pstate.current_pstate);
974         core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
975
976         /*
977          * Since our utilization update callback will not run unless we are
978          * in C0, check if the actual elapsed time is significantly greater (3x)
979          * than our sample interval.  If it is, then we were idle for a long
980          * enough period of time to adjust our busyness.
981          */
982         duration_ns = cpu->sample.time - cpu->last_sample_time;
983         if ((s64)duration_ns > pid_params.sample_rate_ns * 3
984             && cpu->last_sample_time > 0) {
985                 sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns),
986                                       int_tofp(duration_ns));
987                 core_busy = mul_fp(core_busy, sample_ratio);
988         }
989
990         cpu->sample.busy_scaled = core_busy;
991         return cpu->pstate.current_pstate - pid_calc(&cpu->pid, core_busy);
992 }
993
994 static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
995 {
996         int from, target_pstate;
997         struct sample *sample;
998
999         from = cpu->pstate.current_pstate;
1000
1001         target_pstate = pstate_funcs.get_target_pstate(cpu);
1002
1003         intel_pstate_set_pstate(cpu, target_pstate, true);
1004
1005         sample = &cpu->sample;
1006         trace_pstate_sample(fp_toint(sample->core_pct_busy),
1007                 fp_toint(sample->busy_scaled),
1008                 from,
1009                 cpu->pstate.current_pstate,
1010                 sample->mperf,
1011                 sample->aperf,
1012                 sample->tsc,
1013                 sample->freq);
1014 }
1015
1016 static void intel_pstate_update_util(struct update_util_data *data, u64 time,
1017                                      unsigned long util, unsigned long max)
1018 {
1019         struct cpudata *cpu = container_of(data, struct cpudata, update_util);
1020         u64 delta_ns = time - cpu->sample.time;
1021
1022         if ((s64)delta_ns >= pid_params.sample_rate_ns) {
1023                 intel_pstate_sample(cpu, time);
1024                 if (!hwp_active)
1025                         intel_pstate_adjust_busy_pstate(cpu);
1026         }
1027 }
1028
1029 #define ICPU(model, policy) \
1030         { X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\
1031                         (unsigned long)&policy }
1032
1033 static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
1034         ICPU(0x2a, core_params),
1035         ICPU(0x2d, core_params),
1036         ICPU(0x37, silvermont_params),
1037         ICPU(0x3a, core_params),
1038         ICPU(0x3c, core_params),
1039         ICPU(0x3d, core_params),
1040         ICPU(0x3e, core_params),
1041         ICPU(0x3f, core_params),
1042         ICPU(0x45, core_params),
1043         ICPU(0x46, core_params),
1044         ICPU(0x47, core_params),
1045         ICPU(0x4c, airmont_params),
1046         ICPU(0x4e, core_params),
1047         ICPU(0x4f, core_params),
1048         ICPU(0x5e, core_params),
1049         ICPU(0x56, core_params),
1050         ICPU(0x57, knl_params),
1051         {}
1052 };
1053 MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
1054
1055 static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] = {
1056         ICPU(0x56, core_params),
1057         {}
1058 };
1059
1060 static int intel_pstate_init_cpu(unsigned int cpunum)
1061 {
1062         struct cpudata *cpu;
1063
1064         if (!all_cpu_data[cpunum])
1065                 all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata),
1066                                                GFP_KERNEL);
1067         if (!all_cpu_data[cpunum])
1068                 return -ENOMEM;
1069
1070         cpu = all_cpu_data[cpunum];
1071
1072         cpu->cpu = cpunum;
1073
1074         if (hwp_active) {
1075                 intel_pstate_hwp_enable(cpu);
1076                 pid_params.sample_rate_ms = 50;
1077                 pid_params.sample_rate_ns = 50 * NSEC_PER_MSEC;
1078         }
1079
1080         intel_pstate_get_cpu_pstates(cpu);
1081
1082         intel_pstate_busy_pid_reset(cpu);
1083         intel_pstate_sample(cpu, 0);
1084
1085         cpu->update_util.func = intel_pstate_update_util;
1086         cpufreq_set_update_util_data(cpunum, &cpu->update_util);
1087
1088         pr_debug("intel_pstate: controlling: cpu %d\n", cpunum);
1089
1090         return 0;
1091 }
1092
1093 static unsigned int intel_pstate_get(unsigned int cpu_num)
1094 {
1095         struct sample *sample;
1096         struct cpudata *cpu;
1097
1098         cpu = all_cpu_data[cpu_num];
1099         if (!cpu)
1100                 return 0;
1101         sample = &cpu->sample;
1102         return sample->freq;
1103 }
1104
1105 static int intel_pstate_set_policy(struct cpufreq_policy *policy)
1106 {
1107         if (!policy->cpuinfo.max_freq)
1108                 return -ENODEV;
1109
1110         if (policy->policy == CPUFREQ_POLICY_PERFORMANCE &&
1111             policy->max >= policy->cpuinfo.max_freq) {
1112                 pr_debug("intel_pstate: set performance\n");
1113                 limits = &performance_limits;
1114                 if (hwp_active)
1115                         intel_pstate_hwp_set();
1116                 return 0;
1117         }
1118
1119         pr_debug("intel_pstate: set powersave\n");
1120         limits = &powersave_limits;
1121         limits->min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
1122         limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, 0 , 100);
1123         limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100,
1124                                               policy->cpuinfo.max_freq);
1125         limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0 , 100);
1126
1127         /* Normalize user input to [min_policy_pct, max_policy_pct] */
1128         limits->min_perf_pct = max(limits->min_policy_pct,
1129                                    limits->min_sysfs_pct);
1130         limits->min_perf_pct = min(limits->max_policy_pct,
1131                                    limits->min_perf_pct);
1132         limits->max_perf_pct = min(limits->max_policy_pct,
1133                                    limits->max_sysfs_pct);
1134         limits->max_perf_pct = max(limits->min_policy_pct,
1135                                    limits->max_perf_pct);
1136         limits->max_perf = round_up(limits->max_perf, FRAC_BITS);
1137
1138         /* Make sure min_perf_pct <= max_perf_pct */
1139         limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct);
1140
1141         limits->min_perf = div_fp(int_tofp(limits->min_perf_pct),
1142                                   int_tofp(100));
1143         limits->max_perf = div_fp(int_tofp(limits->max_perf_pct),
1144                                   int_tofp(100));
1145
1146         if (hwp_active)
1147                 intel_pstate_hwp_set();
1148
1149         return 0;
1150 }
1151
1152 static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
1153 {
1154         cpufreq_verify_within_cpu_limits(policy);
1155
1156         if (policy->policy != CPUFREQ_POLICY_POWERSAVE &&
1157             policy->policy != CPUFREQ_POLICY_PERFORMANCE)
1158                 return -EINVAL;
1159
1160         return 0;
1161 }
1162
1163 static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
1164 {
1165         int cpu_num = policy->cpu;
1166         struct cpudata *cpu = all_cpu_data[cpu_num];
1167
1168         pr_debug("intel_pstate: CPU %d exiting\n", cpu_num);
1169
1170         cpufreq_set_update_util_data(cpu_num, NULL);
1171         synchronize_rcu();
1172
1173         if (hwp_active)
1174                 return;
1175
1176         intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false);
1177 }
1178
1179 static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
1180 {
1181         struct cpudata *cpu;
1182         int rc;
1183
1184         rc = intel_pstate_init_cpu(policy->cpu);
1185         if (rc)
1186                 return rc;
1187
1188         cpu = all_cpu_data[policy->cpu];
1189
1190         if (limits->min_perf_pct == 100 && limits->max_perf_pct == 100)
1191                 policy->policy = CPUFREQ_POLICY_PERFORMANCE;
1192         else
1193                 policy->policy = CPUFREQ_POLICY_POWERSAVE;
1194
1195         policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
1196         policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
1197
1198         /* cpuinfo and default policy values */
1199         policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
1200         policy->cpuinfo.max_freq =
1201                 cpu->pstate.turbo_pstate * cpu->pstate.scaling;
1202         policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
1203         cpumask_set_cpu(policy->cpu, policy->cpus);
1204
1205         return 0;
1206 }
1207
1208 static struct cpufreq_driver intel_pstate_driver = {
1209         .flags          = CPUFREQ_CONST_LOOPS,
1210         .verify         = intel_pstate_verify_policy,
1211         .setpolicy      = intel_pstate_set_policy,
1212         .get            = intel_pstate_get,
1213         .init           = intel_pstate_cpu_init,
1214         .stop_cpu       = intel_pstate_stop_cpu,
1215         .name           = "intel_pstate",
1216 };
1217
1218 static int __initdata no_load;
1219 static int __initdata no_hwp;
1220 static int __initdata hwp_only;
1221 static unsigned int force_load;
1222
1223 static int intel_pstate_msrs_not_valid(void)
1224 {
1225         if (!pstate_funcs.get_max() ||
1226             !pstate_funcs.get_min() ||
1227             !pstate_funcs.get_turbo())
1228                 return -ENODEV;
1229
1230         return 0;
1231 }
1232
1233 static void copy_pid_params(struct pstate_adjust_policy *policy)
1234 {
1235         pid_params.sample_rate_ms = policy->sample_rate_ms;
1236         pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC;
1237         pid_params.p_gain_pct = policy->p_gain_pct;
1238         pid_params.i_gain_pct = policy->i_gain_pct;
1239         pid_params.d_gain_pct = policy->d_gain_pct;
1240         pid_params.deadband = policy->deadband;
1241         pid_params.setpoint = policy->setpoint;
1242 }
1243
1244 static void copy_cpu_funcs(struct pstate_funcs *funcs)
1245 {
1246         pstate_funcs.get_max   = funcs->get_max;
1247         pstate_funcs.get_max_physical = funcs->get_max_physical;
1248         pstate_funcs.get_min   = funcs->get_min;
1249         pstate_funcs.get_turbo = funcs->get_turbo;
1250         pstate_funcs.get_scaling = funcs->get_scaling;
1251         pstate_funcs.set       = funcs->set;
1252         pstate_funcs.get_vid   = funcs->get_vid;
1253         pstate_funcs.get_target_pstate = funcs->get_target_pstate;
1254
1255 }
1256
1257 #if IS_ENABLED(CONFIG_ACPI)
1258 #include <acpi/processor.h>
1259
1260 static bool intel_pstate_no_acpi_pss(void)
1261 {
1262         int i;
1263
1264         for_each_possible_cpu(i) {
1265                 acpi_status status;
1266                 union acpi_object *pss;
1267                 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
1268                 struct acpi_processor *pr = per_cpu(processors, i);
1269
1270                 if (!pr)
1271                         continue;
1272
1273                 status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer);
1274                 if (ACPI_FAILURE(status))
1275                         continue;
1276
1277                 pss = buffer.pointer;
1278                 if (pss && pss->type == ACPI_TYPE_PACKAGE) {
1279                         kfree(pss);
1280                         return false;
1281                 }
1282
1283                 kfree(pss);
1284         }
1285
1286         return true;
1287 }
1288
1289 static bool intel_pstate_has_acpi_ppc(void)
1290 {
1291         int i;
1292
1293         for_each_possible_cpu(i) {
1294                 struct acpi_processor *pr = per_cpu(processors, i);
1295
1296                 if (!pr)
1297                         continue;
1298                 if (acpi_has_method(pr->handle, "_PPC"))
1299                         return true;
1300         }
1301         return false;
1302 }
1303
1304 enum {
1305         PSS,
1306         PPC,
1307 };
1308
1309 struct hw_vendor_info {
1310         u16  valid;
1311         char oem_id[ACPI_OEM_ID_SIZE];
1312         char oem_table_id[ACPI_OEM_TABLE_ID_SIZE];
1313         int  oem_pwr_table;
1314 };
1315
1316 /* Hardware vendor-specific info that has its own power management modes */
1317 static struct hw_vendor_info vendor_info[] = {
1318         {1, "HP    ", "ProLiant", PSS},
1319         {1, "ORACLE", "X4-2    ", PPC},
1320         {1, "ORACLE", "X4-2L   ", PPC},
1321         {1, "ORACLE", "X4-2B   ", PPC},
1322         {1, "ORACLE", "X3-2    ", PPC},
1323         {1, "ORACLE", "X3-2L   ", PPC},
1324         {1, "ORACLE", "X3-2B   ", PPC},
1325         {1, "ORACLE", "X4470M2 ", PPC},
1326         {1, "ORACLE", "X4270M3 ", PPC},
1327         {1, "ORACLE", "X4270M2 ", PPC},
1328         {1, "ORACLE", "X4170M2 ", PPC},
1329         {1, "ORACLE", "X4170 M3", PPC},
1330         {1, "ORACLE", "X4275 M3", PPC},
1331         {1, "ORACLE", "X6-2    ", PPC},
1332         {1, "ORACLE", "Sudbury ", PPC},
1333         {0, "", ""},
1334 };
1335
1336 static bool intel_pstate_platform_pwr_mgmt_exists(void)
1337 {
1338         struct acpi_table_header hdr;
1339         struct hw_vendor_info *v_info;
1340         const struct x86_cpu_id *id;
1341         u64 misc_pwr;
1342
1343         id = x86_match_cpu(intel_pstate_cpu_oob_ids);
1344         if (id) {
1345                 rdmsrl(MSR_MISC_PWR_MGMT, misc_pwr);
1346                 if ( misc_pwr & (1 << 8))
1347                         return true;
1348         }
1349
1350         if (acpi_disabled ||
1351             ACPI_FAILURE(acpi_get_table_header(ACPI_SIG_FADT, 0, &hdr)))
1352                 return false;
1353
1354         for (v_info = vendor_info; v_info->valid; v_info++) {
1355                 if (!strncmp(hdr.oem_id, v_info->oem_id, ACPI_OEM_ID_SIZE) &&
1356                         !strncmp(hdr.oem_table_id, v_info->oem_table_id,
1357                                                 ACPI_OEM_TABLE_ID_SIZE))
1358                         switch (v_info->oem_pwr_table) {
1359                         case PSS:
1360                                 return intel_pstate_no_acpi_pss();
1361                         case PPC:
1362                                 return intel_pstate_has_acpi_ppc() &&
1363                                         (!force_load);
1364                         }
1365         }
1366
1367         return false;
1368 }
1369 #else /* CONFIG_ACPI not enabled */
1370 static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; }
1371 static inline bool intel_pstate_has_acpi_ppc(void) { return false; }
1372 #endif /* CONFIG_ACPI */
1373
1374 static int __init intel_pstate_init(void)
1375 {
1376         int cpu, rc = 0;
1377         const struct x86_cpu_id *id;
1378         struct cpu_defaults *cpu_def;
1379
1380         if (no_load)
1381                 return -ENODEV;
1382
1383         id = x86_match_cpu(intel_pstate_cpu_ids);
1384         if (!id)
1385                 return -ENODEV;
1386
1387         /*
1388          * The Intel pstate driver will be ignored if the platform
1389          * firmware has its own power management modes.
1390          */
1391         if (intel_pstate_platform_pwr_mgmt_exists())
1392                 return -ENODEV;
1393
1394         cpu_def = (struct cpu_defaults *)id->driver_data;
1395
1396         copy_pid_params(&cpu_def->pid_policy);
1397         copy_cpu_funcs(&cpu_def->funcs);
1398
1399         if (intel_pstate_msrs_not_valid())
1400                 return -ENODEV;
1401
1402         pr_info("Intel P-state driver initializing.\n");
1403
1404         all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus());
1405         if (!all_cpu_data)
1406                 return -ENOMEM;
1407
1408         if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) {
1409                 pr_info("intel_pstate: HWP enabled\n");
1410                 hwp_active++;
1411         }
1412
1413         if (!hwp_active && hwp_only)
1414                 goto out;
1415
1416         rc = cpufreq_register_driver(&intel_pstate_driver);
1417         if (rc)
1418                 goto out;
1419
1420         intel_pstate_debug_expose_params();
1421         intel_pstate_sysfs_expose_params();
1422
1423         return rc;
1424 out:
1425         get_online_cpus();
1426         for_each_online_cpu(cpu) {
1427                 if (all_cpu_data[cpu]) {
1428                         cpufreq_set_update_util_data(cpu, NULL);
1429                         synchronize_rcu();
1430                         kfree(all_cpu_data[cpu]);
1431                 }
1432         }
1433
1434         put_online_cpus();
1435         vfree(all_cpu_data);
1436         return -ENODEV;
1437 }
1438 device_initcall(intel_pstate_init);
1439
1440 static int __init intel_pstate_setup(char *str)
1441 {
1442         if (!str)
1443                 return -EINVAL;
1444
1445         if (!strcmp(str, "disable"))
1446                 no_load = 1;
1447         if (!strcmp(str, "no_hwp")) {
1448                 pr_info("intel_pstate: HWP disabled\n");
1449                 no_hwp = 1;
1450         }
1451         if (!strcmp(str, "force"))
1452                 force_load = 1;
1453         if (!strcmp(str, "hwp_only"))
1454                 hwp_only = 1;
1455         return 0;
1456 }
1457 early_param("intel_pstate", intel_pstate_setup);
1458
1459 MODULE_AUTHOR("Dirk Brandewie <dirk.j.brandewie@intel.com>");
1460 MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors");
1461 MODULE_LICENSE("GPL");