]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - arch/powerpc/platforms/powernv/idle.c
Merge branches 'for-4.13/multitouch', 'for-4.13/retrode', 'for-4.13/transport-open...
[karo-tx-linux.git] / arch / powerpc / platforms / powernv / idle.c
1 /*
2  * PowerNV cpuidle code
3  *
4  * Copyright 2015 IBM Corp.
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11
12 #include <linux/types.h>
13 #include <linux/mm.h>
14 #include <linux/slab.h>
15 #include <linux/of.h>
16 #include <linux/device.h>
17 #include <linux/cpu.h>
18
19 #include <asm/firmware.h>
20 #include <asm/machdep.h>
21 #include <asm/opal.h>
22 #include <asm/cputhreads.h>
23 #include <asm/cpuidle.h>
24 #include <asm/code-patching.h>
25 #include <asm/smp.h>
26
27 #include "powernv.h"
28 #include "subcore.h"
29
30 /* Power ISA 3.0 allows for stop states 0x0 - 0xF */
31 #define MAX_STOP_STATE  0xF
32
33 static u32 supported_cpuidle_states;
34
35 static int pnv_save_sprs_for_deep_states(void)
36 {
37         int cpu;
38         int rc;
39
40         /*
41          * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across
42          * all cpus at boot. Get these reg values of current cpu and use the
43          * same across all cpus.
44          */
45         uint64_t lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
46         uint64_t hid0_val = mfspr(SPRN_HID0);
47         uint64_t hid1_val = mfspr(SPRN_HID1);
48         uint64_t hid4_val = mfspr(SPRN_HID4);
49         uint64_t hid5_val = mfspr(SPRN_HID5);
50         uint64_t hmeer_val = mfspr(SPRN_HMEER);
51
52         for_each_possible_cpu(cpu) {
53                 uint64_t pir = get_hard_smp_processor_id(cpu);
54                 uint64_t hsprg0_val = (uint64_t)&paca[cpu];
55
56                 rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
57                 if (rc != 0)
58                         return rc;
59
60                 rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
61                 if (rc != 0)
62                         return rc;
63
64                 /* HIDs are per core registers */
65                 if (cpu_thread_in_core(cpu) == 0) {
66
67                         rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val);
68                         if (rc != 0)
69                                 return rc;
70
71                         rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val);
72                         if (rc != 0)
73                                 return rc;
74
75                         rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
76                         if (rc != 0)
77                                 return rc;
78
79                         rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
80                         if (rc != 0)
81                                 return rc;
82
83                         rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
84                         if (rc != 0)
85                                 return rc;
86                 }
87         }
88
89         return 0;
90 }
91
92 static void pnv_alloc_idle_core_states(void)
93 {
94         int i, j;
95         int nr_cores = cpu_nr_cores();
96         u32 *core_idle_state;
97
98         /*
99          * core_idle_state - First 8 bits track the idle state of each thread
100          * of the core. The 8th bit is the lock bit. Initially all thread bits
101          * are set. They are cleared when the thread enters deep idle state
102          * like sleep and winkle. Initially the lock bit is cleared.
103          * The lock bit has 2 purposes
104          * a. While the first thread is restoring core state, it prevents
105          * other threads in the core from switching to process context.
106          * b. While the last thread in the core is saving the core state, it
107          * prevents a different thread from waking up.
108          */
109         for (i = 0; i < nr_cores; i++) {
110                 int first_cpu = i * threads_per_core;
111                 int node = cpu_to_node(first_cpu);
112                 size_t paca_ptr_array_size;
113
114                 core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
115                 *core_idle_state = PNV_CORE_IDLE_THREAD_BITS;
116                 paca_ptr_array_size = (threads_per_core *
117                                        sizeof(struct paca_struct *));
118
119                 for (j = 0; j < threads_per_core; j++) {
120                         int cpu = first_cpu + j;
121
122                         paca[cpu].core_idle_state_ptr = core_idle_state;
123                         paca[cpu].thread_idle_state = PNV_THREAD_RUNNING;
124                         paca[cpu].thread_mask = 1 << j;
125                         if (!cpu_has_feature(CPU_FTR_POWER9_DD1))
126                                 continue;
127                         paca[cpu].thread_sibling_pacas =
128                                 kmalloc_node(paca_ptr_array_size,
129                                              GFP_KERNEL, node);
130                 }
131         }
132
133         update_subcore_sibling_mask();
134
135         if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)
136                 pnv_save_sprs_for_deep_states();
137 }
138
139 u32 pnv_get_supported_cpuidle_states(void)
140 {
141         return supported_cpuidle_states;
142 }
143 EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
144
145 static void pnv_fastsleep_workaround_apply(void *info)
146
147 {
148         int rc;
149         int *err = info;
150
151         rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
152                                         OPAL_CONFIG_IDLE_APPLY);
153         if (rc)
154                 *err = 1;
155 }
156
157 /*
158  * Used to store fastsleep workaround state
159  * 0 - Workaround applied/undone at fastsleep entry/exit path (Default)
160  * 1 - Workaround applied once, never undone.
161  */
162 static u8 fastsleep_workaround_applyonce;
163
164 static ssize_t show_fastsleep_workaround_applyonce(struct device *dev,
165                 struct device_attribute *attr, char *buf)
166 {
167         return sprintf(buf, "%u\n", fastsleep_workaround_applyonce);
168 }
169
170 static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
171                 struct device_attribute *attr, const char *buf,
172                 size_t count)
173 {
174         cpumask_t primary_thread_mask;
175         int err;
176         u8 val;
177
178         if (kstrtou8(buf, 0, &val) || val != 1)
179                 return -EINVAL;
180
181         if (fastsleep_workaround_applyonce == 1)
182                 return count;
183
184         /*
185          * fastsleep_workaround_applyonce = 1 implies
186          * fastsleep workaround needs to be left in 'applied' state on all
187          * the cores. Do this by-
188          * 1. Patching out the call to 'undo' workaround in fastsleep exit path
189          * 2. Sending ipi to all the cores which have at least one online thread
190          * 3. Patching out the call to 'apply' workaround in fastsleep entry
191          * path
192          * There is no need to send ipi to cores which have all threads
193          * offlined, as last thread of the core entering fastsleep or deeper
194          * state would have applied workaround.
195          */
196         err = patch_instruction(
197                 (unsigned int *)pnv_fastsleep_workaround_at_exit,
198                 PPC_INST_NOP);
199         if (err) {
200                 pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_exit");
201                 goto fail;
202         }
203
204         get_online_cpus();
205         primary_thread_mask = cpu_online_cores_map();
206         on_each_cpu_mask(&primary_thread_mask,
207                                 pnv_fastsleep_workaround_apply,
208                                 &err, 1);
209         put_online_cpus();
210         if (err) {
211                 pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply");
212                 goto fail;
213         }
214
215         err = patch_instruction(
216                 (unsigned int *)pnv_fastsleep_workaround_at_entry,
217                 PPC_INST_NOP);
218         if (err) {
219                 pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_entry");
220                 goto fail;
221         }
222
223         fastsleep_workaround_applyonce = 1;
224
225         return count;
226 fail:
227         return -EIO;
228 }
229
230 static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600,
231                         show_fastsleep_workaround_applyonce,
232                         store_fastsleep_workaround_applyonce);
233
234 /*
235  * The default stop state that will be used by ppc_md.power_save
236  * function on platforms that support stop instruction.
237  */
238 static u64 pnv_default_stop_val;
239 static u64 pnv_default_stop_mask;
240 static bool default_stop_found;
241
242 /*
243  * Used for ppc_md.power_save which needs a function with no parameters
244  */
245 static void power9_idle(void)
246 {
247         power9_idle_stop(pnv_default_stop_val, pnv_default_stop_mask);
248 }
249
250 /*
251  * First deep stop state. Used to figure out when to save/restore
252  * hypervisor context.
253  */
254 u64 pnv_first_deep_stop_state = MAX_STOP_STATE;
255
256 /*
257  * psscr value and mask of the deepest stop idle state.
258  * Used when a cpu is offlined.
259  */
260 static u64 pnv_deepest_stop_psscr_val;
261 static u64 pnv_deepest_stop_psscr_mask;
262 static bool deepest_stop_found;
263
264 /*
265  * pnv_cpu_offline: A function that puts the CPU into the deepest
266  * available platform idle state on a CPU-Offline.
267  */
268 unsigned long pnv_cpu_offline(unsigned int cpu)
269 {
270         unsigned long srr1;
271
272         u32 idle_states = pnv_get_supported_cpuidle_states();
273
274         if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) {
275                 srr1 = power9_idle_stop(pnv_deepest_stop_psscr_val,
276                                         pnv_deepest_stop_psscr_mask);
277         } else if (idle_states & OPAL_PM_WINKLE_ENABLED) {
278                 srr1 = power7_winkle();
279         } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
280                    (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
281                 srr1 = power7_sleep();
282         } else if (idle_states & OPAL_PM_NAP_ENABLED) {
283                 srr1 = power7_nap(1);
284         } else {
285                 /* This is the fallback method. We emulate snooze */
286                 while (!generic_check_cpu_restart(cpu)) {
287                         HMT_low();
288                         HMT_very_low();
289                 }
290                 srr1 = 0;
291                 HMT_medium();
292         }
293
294         return srr1;
295 }
296
297 /*
298  * Power ISA 3.0 idle initialization.
299  *
300  * POWER ISA 3.0 defines a new SPR Processor stop Status and Control
301  * Register (PSSCR) to control idle behavior.
302  *
303  * PSSCR layout:
304  * ----------------------------------------------------------
305  * | PLS | /// | SD | ESL | EC | PSLL | /// | TR | MTL | RL |
306  * ----------------------------------------------------------
307  * 0      4     41   42    43   44     48    54   56    60
308  *
309  * PSSCR key fields:
310  *      Bits 0:3  - Power-Saving Level Status (PLS). This field indicates the
311  *      lowest power-saving state the thread entered since stop instruction was
312  *      last executed.
313  *
314  *      Bit 41 - Status Disable(SD)
315  *      0 - Shows PLS entries
316  *      1 - PLS entries are all 0
317  *
318  *      Bit 42 - Enable State Loss
319  *      0 - No state is lost irrespective of other fields
320  *      1 - Allows state loss
321  *
322  *      Bit 43 - Exit Criterion
323  *      0 - Exit from power-save mode on any interrupt
324  *      1 - Exit from power-save mode controlled by LPCR's PECE bits
325  *
326  *      Bits 44:47 - Power-Saving Level Limit
327  *      This limits the power-saving level that can be entered into.
328  *
329  *      Bits 60:63 - Requested Level
330  *      Used to specify which power-saving level must be entered on executing
331  *      stop instruction
332  */
333
334 int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags)
335 {
336         int err = 0;
337
338         /*
339          * psscr_mask == 0xf indicates an older firmware.
340          * Set remaining fields of psscr to the default values.
341          * See NOTE above definition of PSSCR_HV_DEFAULT_VAL
342          */
343         if (*psscr_mask == 0xf) {
344                 *psscr_val = *psscr_val | PSSCR_HV_DEFAULT_VAL;
345                 *psscr_mask = PSSCR_HV_DEFAULT_MASK;
346                 return err;
347         }
348
349         /*
350          * New firmware is expected to set the psscr_val bits correctly.
351          * Validate that the following invariants are correctly maintained by
352          * the new firmware.
353          * - ESL bit value matches the EC bit value.
354          * - ESL bit is set for all the deep stop states.
355          */
356         if (GET_PSSCR_ESL(*psscr_val) != GET_PSSCR_EC(*psscr_val)) {
357                 err = ERR_EC_ESL_MISMATCH;
358         } else if ((flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
359                 GET_PSSCR_ESL(*psscr_val) == 0) {
360                 err = ERR_DEEP_STATE_ESL_MISMATCH;
361         }
362
363         return err;
364 }
365
366 /*
367  * pnv_arch300_idle_init: Initializes the default idle state, first
368  *                        deep idle state and deepest idle state on
369  *                        ISA 3.0 CPUs.
370  *
371  * @np: /ibm,opal/power-mgt device node
372  * @flags: cpu-idle-state-flags array
373  * @dt_idle_states: Number of idle state entries
374  * Returns 0 on success
375  */
376 static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags,
377                                         int dt_idle_states)
378 {
379         u64 *psscr_val = NULL;
380         u64 *psscr_mask = NULL;
381         u32 *residency_ns = NULL;
382         u64 max_residency_ns = 0;
383         int rc = 0, i;
384
385         psscr_val = kcalloc(dt_idle_states, sizeof(*psscr_val), GFP_KERNEL);
386         psscr_mask = kcalloc(dt_idle_states, sizeof(*psscr_mask), GFP_KERNEL);
387         residency_ns = kcalloc(dt_idle_states, sizeof(*residency_ns),
388                                GFP_KERNEL);
389
390         if (!psscr_val || !psscr_mask || !residency_ns) {
391                 rc = -1;
392                 goto out;
393         }
394
395         if (of_property_read_u64_array(np,
396                 "ibm,cpu-idle-state-psscr",
397                 psscr_val, dt_idle_states)) {
398                 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n");
399                 rc = -1;
400                 goto out;
401         }
402
403         if (of_property_read_u64_array(np,
404                                        "ibm,cpu-idle-state-psscr-mask",
405                                        psscr_mask, dt_idle_states)) {
406                 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n");
407                 rc = -1;
408                 goto out;
409         }
410
411         if (of_property_read_u32_array(np,
412                                        "ibm,cpu-idle-state-residency-ns",
413                                         residency_ns, dt_idle_states)) {
414                 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n");
415                 rc = -1;
416                 goto out;
417         }
418
419         /*
420          * Set pnv_first_deep_stop_state, pnv_deepest_stop_psscr_{val,mask},
421          * and the pnv_default_stop_{val,mask}.
422          *
423          * pnv_first_deep_stop_state should be set to the first stop
424          * level to cause hypervisor state loss.
425          *
426          * pnv_deepest_stop_{val,mask} should be set to values corresponding to
427          * the deepest stop state.
428          *
429          * pnv_default_stop_{val,mask} should be set to values corresponding to
430          * the shallowest (OPAL_PM_STOP_INST_FAST) loss-less stop state.
431          */
432         pnv_first_deep_stop_state = MAX_STOP_STATE;
433         for (i = 0; i < dt_idle_states; i++) {
434                 int err;
435                 u64 psscr_rl = psscr_val[i] & PSSCR_RL_MASK;
436
437                 if ((flags[i] & OPAL_PM_LOSE_FULL_CONTEXT) &&
438                      (pnv_first_deep_stop_state > psscr_rl))
439                         pnv_first_deep_stop_state = psscr_rl;
440
441                 err = validate_psscr_val_mask(&psscr_val[i], &psscr_mask[i],
442                                               flags[i]);
443                 if (err) {
444                         report_invalid_psscr_val(psscr_val[i], err);
445                         continue;
446                 }
447
448                 if (max_residency_ns < residency_ns[i]) {
449                         max_residency_ns = residency_ns[i];
450                         pnv_deepest_stop_psscr_val = psscr_val[i];
451                         pnv_deepest_stop_psscr_mask = psscr_mask[i];
452                         deepest_stop_found = true;
453                 }
454
455                 if (!default_stop_found &&
456                     (flags[i] & OPAL_PM_STOP_INST_FAST)) {
457                         pnv_default_stop_val = psscr_val[i];
458                         pnv_default_stop_mask = psscr_mask[i];
459                         default_stop_found = true;
460                 }
461         }
462
463         if (unlikely(!default_stop_found)) {
464                 pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n");
465         } else {
466                 ppc_md.power_save = power9_idle;
467                 pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n",
468                         pnv_default_stop_val, pnv_default_stop_mask);
469         }
470
471         if (unlikely(!deepest_stop_found)) {
472                 pr_warn("cpuidle-powernv: No suitable stop state for CPU-Hotplug. Offlined CPUs will busy wait");
473         } else {
474                 pr_info("cpuidle-powernv: Deepest stop: psscr = 0x%016llx,mask=0x%016llx\n",
475                         pnv_deepest_stop_psscr_val,
476                         pnv_deepest_stop_psscr_mask);
477         }
478
479         pr_info("cpuidle-powernv: Requested Level (RL) value of first deep stop = 0x%llx\n",
480                 pnv_first_deep_stop_state);
481 out:
482         kfree(psscr_val);
483         kfree(psscr_mask);
484         kfree(residency_ns);
485         return rc;
486 }
487
488 /*
489  * Probe device tree for supported idle states
490  */
491 static void __init pnv_probe_idle_states(void)
492 {
493         struct device_node *np;
494         int dt_idle_states;
495         u32 *flags = NULL;
496         int i;
497
498         np = of_find_node_by_path("/ibm,opal/power-mgt");
499         if (!np) {
500                 pr_warn("opal: PowerMgmt Node not found\n");
501                 goto out;
502         }
503         dt_idle_states = of_property_count_u32_elems(np,
504                         "ibm,cpu-idle-state-flags");
505         if (dt_idle_states < 0) {
506                 pr_warn("cpuidle-powernv: no idle states found in the DT\n");
507                 goto out;
508         }
509
510         flags = kcalloc(dt_idle_states, sizeof(*flags),  GFP_KERNEL);
511
512         if (of_property_read_u32_array(np,
513                         "ibm,cpu-idle-state-flags", flags, dt_idle_states)) {
514                 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n");
515                 goto out;
516         }
517
518         if (cpu_has_feature(CPU_FTR_ARCH_300)) {
519                 if (pnv_power9_idle_init(np, flags, dt_idle_states))
520                         goto out;
521         }
522
523         for (i = 0; i < dt_idle_states; i++)
524                 supported_cpuidle_states |= flags[i];
525
526 out:
527         kfree(flags);
528 }
529 static int __init pnv_init_idle_states(void)
530 {
531
532         supported_cpuidle_states = 0;
533
534         if (cpuidle_disable != IDLE_NO_OVERRIDE)
535                 goto out;
536
537         pnv_probe_idle_states();
538
539         if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
540                 patch_instruction(
541                         (unsigned int *)pnv_fastsleep_workaround_at_entry,
542                         PPC_INST_NOP);
543                 patch_instruction(
544                         (unsigned int *)pnv_fastsleep_workaround_at_exit,
545                         PPC_INST_NOP);
546         } else {
547                 /*
548                  * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that
549                  * workaround is needed to use fastsleep. Provide sysfs
550                  * control to choose how this workaround has to be applied.
551                  */
552                 device_create_file(cpu_subsys.dev_root,
553                                 &dev_attr_fastsleep_workaround_applyonce);
554         }
555
556         pnv_alloc_idle_core_states();
557
558         /*
559          * For each CPU, record its PACA address in each of it's
560          * sibling thread's PACA at the slot corresponding to this
561          * CPU's index in the core.
562          */
563         if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
564                 int cpu;
565
566                 pr_info("powernv: idle: Saving PACA pointers of all CPUs in their thread sibling PACA\n");
567                 for_each_possible_cpu(cpu) {
568                         int base_cpu = cpu_first_thread_sibling(cpu);
569                         int idx = cpu_thread_in_core(cpu);
570                         int i;
571
572                         for (i = 0; i < threads_per_core; i++) {
573                                 int j = base_cpu + i;
574
575                                 paca[j].thread_sibling_pacas[idx] = &paca[cpu];
576                         }
577                 }
578         }
579
580         if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED)
581                 ppc_md.power_save = power7_idle;
582
583 out:
584         return 0;
585 }
586 machine_subsys_initcall(powernv, pnv_init_idle_states);