cpufreq: Avoid false-positive WARN_ON()s in cpufreq_update_policy()
[cascardo/linux.git] / kernel / sched / cpufreq_schedutil.c
1 /*
2  * CPUFreq governor based on scheduler-provided CPU utilization data.
3  *
4  * Copyright (C) 2016, Intel Corporation
5  * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11
12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
14 #include <linux/cpufreq.h>
15 #include <linux/module.h>
16 #include <linux/slab.h>
17 #include <trace/events/power.h>
18
19 #include "sched.h"
20
21 struct sugov_tunables {
22         struct gov_attr_set attr_set;
23         unsigned int rate_limit_us;
24 };
25
26 struct sugov_policy {
27         struct cpufreq_policy *policy;
28
29         struct sugov_tunables *tunables;
30         struct list_head tunables_hook;
31
32         raw_spinlock_t update_lock;  /* For shared policies */
33         u64 last_freq_update_time;
34         s64 freq_update_delay_ns;
35         unsigned int next_freq;
36
37         /* The next fields are only needed if fast switch cannot be used. */
38         struct irq_work irq_work;
39         struct work_struct work;
40         struct mutex work_lock;
41         bool work_in_progress;
42
43         bool need_freq_update;
44 };
45
46 struct sugov_cpu {
47         struct update_util_data update_util;
48         struct sugov_policy *sg_policy;
49
50         /* The fields below are only needed when sharing a policy. */
51         unsigned long util;
52         unsigned long max;
53         u64 last_update;
54 };
55
56 static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
57
58 /************************ Governor internals ***********************/
59
60 static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
61 {
62         s64 delta_ns;
63
64         if (sg_policy->work_in_progress)
65                 return false;
66
67         if (unlikely(sg_policy->need_freq_update)) {
68                 sg_policy->need_freq_update = false;
69                 /*
70                  * This happens when limits change, so forget the previous
71                  * next_freq value and force an update.
72                  */
73                 sg_policy->next_freq = UINT_MAX;
74                 return true;
75         }
76
77         delta_ns = time - sg_policy->last_freq_update_time;
78         return delta_ns >= sg_policy->freq_update_delay_ns;
79 }
80
81 static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
82                                 unsigned int next_freq)
83 {
84         struct cpufreq_policy *policy = sg_policy->policy;
85
86         sg_policy->last_freq_update_time = time;
87
88         if (policy->fast_switch_enabled) {
89                 if (sg_policy->next_freq == next_freq) {
90                         trace_cpu_frequency(policy->cur, smp_processor_id());
91                         return;
92                 }
93                 sg_policy->next_freq = next_freq;
94                 next_freq = cpufreq_driver_fast_switch(policy, next_freq);
95                 if (next_freq == CPUFREQ_ENTRY_INVALID)
96                         return;
97
98                 policy->cur = next_freq;
99                 trace_cpu_frequency(next_freq, smp_processor_id());
100         } else if (sg_policy->next_freq != next_freq) {
101                 sg_policy->next_freq = next_freq;
102                 sg_policy->work_in_progress = true;
103                 irq_work_queue(&sg_policy->irq_work);
104         }
105 }
106
107 /**
108  * get_next_freq - Compute a new frequency for a given cpufreq policy.
109  * @policy: cpufreq policy object to compute the new frequency for.
110  * @util: Current CPU utilization.
111  * @max: CPU capacity.
112  *
113  * If the utilization is frequency-invariant, choose the new frequency to be
114  * proportional to it, that is
115  *
116  * next_freq = C * max_freq * util / max
117  *
118  * Otherwise, approximate the would-be frequency-invariant utilization by
119  * util_raw * (curr_freq / max_freq) which leads to
120  *
121  * next_freq = C * curr_freq * util_raw / max
122  *
123  * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8.
124  */
125 static unsigned int get_next_freq(struct cpufreq_policy *policy,
126                                   unsigned long util, unsigned long max)
127 {
128         unsigned int freq = arch_scale_freq_invariant() ?
129                                 policy->cpuinfo.max_freq : policy->cur;
130
131         return (freq + (freq >> 2)) * util / max;
132 }
133
134 static void sugov_update_single(struct update_util_data *hook, u64 time,
135                                 unsigned long util, unsigned long max)
136 {
137         struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
138         struct sugov_policy *sg_policy = sg_cpu->sg_policy;
139         struct cpufreq_policy *policy = sg_policy->policy;
140         unsigned int next_f;
141
142         if (!sugov_should_update_freq(sg_policy, time))
143                 return;
144
145         next_f = util == ULONG_MAX ? policy->cpuinfo.max_freq :
146                         get_next_freq(policy, util, max);
147         sugov_update_commit(sg_policy, time, next_f);
148 }
149
150 static unsigned int sugov_next_freq_shared(struct sugov_policy *sg_policy,
151                                            unsigned long util, unsigned long max)
152 {
153         struct cpufreq_policy *policy = sg_policy->policy;
154         unsigned int max_f = policy->cpuinfo.max_freq;
155         u64 last_freq_update_time = sg_policy->last_freq_update_time;
156         unsigned int j;
157
158         if (util == ULONG_MAX)
159                 return max_f;
160
161         for_each_cpu(j, policy->cpus) {
162                 struct sugov_cpu *j_sg_cpu;
163                 unsigned long j_util, j_max;
164                 s64 delta_ns;
165
166                 if (j == smp_processor_id())
167                         continue;
168
169                 j_sg_cpu = &per_cpu(sugov_cpu, j);
170                 /*
171                  * If the CPU utilization was last updated before the previous
172                  * frequency update and the time elapsed between the last update
173                  * of the CPU utilization and the last frequency update is long
174                  * enough, don't take the CPU into account as it probably is
175                  * idle now.
176                  */
177                 delta_ns = last_freq_update_time - j_sg_cpu->last_update;
178                 if (delta_ns > TICK_NSEC)
179                         continue;
180
181                 j_util = j_sg_cpu->util;
182                 if (j_util == ULONG_MAX)
183                         return max_f;
184
185                 j_max = j_sg_cpu->max;
186                 if (j_util * max > j_max * util) {
187                         util = j_util;
188                         max = j_max;
189                 }
190         }
191
192         return get_next_freq(policy, util, max);
193 }
194
195 static void sugov_update_shared(struct update_util_data *hook, u64 time,
196                                 unsigned long util, unsigned long max)
197 {
198         struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
199         struct sugov_policy *sg_policy = sg_cpu->sg_policy;
200         unsigned int next_f;
201
202         raw_spin_lock(&sg_policy->update_lock);
203
204         sg_cpu->util = util;
205         sg_cpu->max = max;
206         sg_cpu->last_update = time;
207
208         if (sugov_should_update_freq(sg_policy, time)) {
209                 next_f = sugov_next_freq_shared(sg_policy, util, max);
210                 sugov_update_commit(sg_policy, time, next_f);
211         }
212
213         raw_spin_unlock(&sg_policy->update_lock);
214 }
215
216 static void sugov_work(struct work_struct *work)
217 {
218         struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work);
219
220         mutex_lock(&sg_policy->work_lock);
221         __cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq,
222                                 CPUFREQ_RELATION_L);
223         mutex_unlock(&sg_policy->work_lock);
224
225         sg_policy->work_in_progress = false;
226 }
227
228 static void sugov_irq_work(struct irq_work *irq_work)
229 {
230         struct sugov_policy *sg_policy;
231
232         sg_policy = container_of(irq_work, struct sugov_policy, irq_work);
233         schedule_work_on(smp_processor_id(), &sg_policy->work);
234 }
235
236 /************************** sysfs interface ************************/
237
238 static struct sugov_tunables *global_tunables;
239 static DEFINE_MUTEX(global_tunables_lock);
240
241 static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set)
242 {
243         return container_of(attr_set, struct sugov_tunables, attr_set);
244 }
245
246 static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf)
247 {
248         struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
249
250         return sprintf(buf, "%u\n", tunables->rate_limit_us);
251 }
252
253 static ssize_t rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf,
254                                    size_t count)
255 {
256         struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
257         struct sugov_policy *sg_policy;
258         unsigned int rate_limit_us;
259
260         if (kstrtouint(buf, 10, &rate_limit_us))
261                 return -EINVAL;
262
263         tunables->rate_limit_us = rate_limit_us;
264
265         list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook)
266                 sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC;
267
268         return count;
269 }
270
271 static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us);
272
273 static struct attribute *sugov_attributes[] = {
274         &rate_limit_us.attr,
275         NULL
276 };
277
278 static struct kobj_type sugov_tunables_ktype = {
279         .default_attrs = sugov_attributes,
280         .sysfs_ops = &governor_sysfs_ops,
281 };
282
283 /********************** cpufreq governor interface *********************/
284
285 static struct cpufreq_governor schedutil_gov;
286
287 static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy)
288 {
289         struct sugov_policy *sg_policy;
290
291         sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL);
292         if (!sg_policy)
293                 return NULL;
294
295         sg_policy->policy = policy;
296         init_irq_work(&sg_policy->irq_work, sugov_irq_work);
297         INIT_WORK(&sg_policy->work, sugov_work);
298         mutex_init(&sg_policy->work_lock);
299         raw_spin_lock_init(&sg_policy->update_lock);
300         return sg_policy;
301 }
302
303 static void sugov_policy_free(struct sugov_policy *sg_policy)
304 {
305         mutex_destroy(&sg_policy->work_lock);
306         kfree(sg_policy);
307 }
308
309 static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy)
310 {
311         struct sugov_tunables *tunables;
312
313         tunables = kzalloc(sizeof(*tunables), GFP_KERNEL);
314         if (tunables) {
315                 gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook);
316                 if (!have_governor_per_policy())
317                         global_tunables = tunables;
318         }
319         return tunables;
320 }
321
322 static void sugov_tunables_free(struct sugov_tunables *tunables)
323 {
324         if (!have_governor_per_policy())
325                 global_tunables = NULL;
326
327         kfree(tunables);
328 }
329
330 static int sugov_init(struct cpufreq_policy *policy)
331 {
332         struct sugov_policy *sg_policy;
333         struct sugov_tunables *tunables;
334         unsigned int lat;
335         int ret = 0;
336
337         /* State should be equivalent to EXIT */
338         if (policy->governor_data)
339                 return -EBUSY;
340
341         sg_policy = sugov_policy_alloc(policy);
342         if (!sg_policy)
343                 return -ENOMEM;
344
345         mutex_lock(&global_tunables_lock);
346
347         if (global_tunables) {
348                 if (WARN_ON(have_governor_per_policy())) {
349                         ret = -EINVAL;
350                         goto free_sg_policy;
351                 }
352                 policy->governor_data = sg_policy;
353                 sg_policy->tunables = global_tunables;
354
355                 gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook);
356                 goto out;
357         }
358
359         tunables = sugov_tunables_alloc(sg_policy);
360         if (!tunables) {
361                 ret = -ENOMEM;
362                 goto free_sg_policy;
363         }
364
365         tunables->rate_limit_us = LATENCY_MULTIPLIER;
366         lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC;
367         if (lat)
368                 tunables->rate_limit_us *= lat;
369
370         policy->governor_data = sg_policy;
371         sg_policy->tunables = tunables;
372
373         ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype,
374                                    get_governor_parent_kobj(policy), "%s",
375                                    schedutil_gov.name);
376         if (ret)
377                 goto fail;
378
379  out:
380         mutex_unlock(&global_tunables_lock);
381
382         cpufreq_enable_fast_switch(policy);
383         return 0;
384
385  fail:
386         policy->governor_data = NULL;
387         sugov_tunables_free(tunables);
388
389  free_sg_policy:
390         mutex_unlock(&global_tunables_lock);
391
392         sugov_policy_free(sg_policy);
393         pr_err("initialization failed (error %d)\n", ret);
394         return ret;
395 }
396
397 static int sugov_exit(struct cpufreq_policy *policy)
398 {
399         struct sugov_policy *sg_policy = policy->governor_data;
400         struct sugov_tunables *tunables = sg_policy->tunables;
401         unsigned int count;
402
403         cpufreq_disable_fast_switch(policy);
404
405         mutex_lock(&global_tunables_lock);
406
407         count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook);
408         policy->governor_data = NULL;
409         if (!count)
410                 sugov_tunables_free(tunables);
411
412         mutex_unlock(&global_tunables_lock);
413
414         sugov_policy_free(sg_policy);
415         return 0;
416 }
417
418 static int sugov_start(struct cpufreq_policy *policy)
419 {
420         struct sugov_policy *sg_policy = policy->governor_data;
421         unsigned int cpu;
422
423         sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
424         sg_policy->last_freq_update_time = 0;
425         sg_policy->next_freq = UINT_MAX;
426         sg_policy->work_in_progress = false;
427         sg_policy->need_freq_update = false;
428
429         for_each_cpu(cpu, policy->cpus) {
430                 struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
431
432                 sg_cpu->sg_policy = sg_policy;
433                 if (policy_is_shared(policy)) {
434                         sg_cpu->util = ULONG_MAX;
435                         sg_cpu->max = 0;
436                         sg_cpu->last_update = 0;
437                         cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
438                                                      sugov_update_shared);
439                 } else {
440                         cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
441                                                      sugov_update_single);
442                 }
443         }
444         return 0;
445 }
446
447 static int sugov_stop(struct cpufreq_policy *policy)
448 {
449         struct sugov_policy *sg_policy = policy->governor_data;
450         unsigned int cpu;
451
452         for_each_cpu(cpu, policy->cpus)
453                 cpufreq_remove_update_util_hook(cpu);
454
455         synchronize_sched();
456
457         irq_work_sync(&sg_policy->irq_work);
458         cancel_work_sync(&sg_policy->work);
459         return 0;
460 }
461
462 static int sugov_limits(struct cpufreq_policy *policy)
463 {
464         struct sugov_policy *sg_policy = policy->governor_data;
465
466         if (!policy->fast_switch_enabled) {
467                 mutex_lock(&sg_policy->work_lock);
468
469                 if (policy->max < policy->cur)
470                         __cpufreq_driver_target(policy, policy->max,
471                                                 CPUFREQ_RELATION_H);
472                 else if (policy->min > policy->cur)
473                         __cpufreq_driver_target(policy, policy->min,
474                                                 CPUFREQ_RELATION_L);
475
476                 mutex_unlock(&sg_policy->work_lock);
477         }
478
479         sg_policy->need_freq_update = true;
480         return 0;
481 }
482
483 int sugov_governor(struct cpufreq_policy *policy, unsigned int event)
484 {
485         if (event == CPUFREQ_GOV_POLICY_INIT) {
486                 return sugov_init(policy);
487         } else if (policy->governor_data) {
488                 switch (event) {
489                 case CPUFREQ_GOV_POLICY_EXIT:
490                         return sugov_exit(policy);
491                 case CPUFREQ_GOV_START:
492                         return sugov_start(policy);
493                 case CPUFREQ_GOV_STOP:
494                         return sugov_stop(policy);
495                 case CPUFREQ_GOV_LIMITS:
496                         return sugov_limits(policy);
497                 }
498         }
499         return -EINVAL;
500 }
501
502 static struct cpufreq_governor schedutil_gov = {
503         .name = "schedutil",
504         .governor = sugov_governor,
505         .owner = THIS_MODULE,
506 };
507
508 static int __init sugov_module_init(void)
509 {
510         return cpufreq_register_governor(&schedutil_gov);
511 }
512
513 static void __exit sugov_module_exit(void)
514 {
515         cpufreq_unregister_governor(&schedutil_gov);
516 }
517
518 MODULE_AUTHOR("Rafael J. Wysocki <rafael.j.wysocki@intel.com>");
519 MODULE_DESCRIPTION("Utilization-based CPU frequency selection");
520 MODULE_LICENSE("GPL");
521
522 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
523 struct cpufreq_governor *cpufreq_default_governor(void)
524 {
525         return &schedutil_gov;
526 }
527
528 fs_initcall(sugov_module_init);
529 #else
530 module_init(sugov_module_init);
531 #endif
532 module_exit(sugov_module_exit);