]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
x86 idle: Repair large-server 50-watt idle-power regression
authorLen Brown <len.brown@intel.com>
Wed, 18 Dec 2013 21:44:57 +0000 (16:44 -0500)
committerH. Peter Anvin <hpa@linux.intel.com>
Thu, 19 Dec 2013 19:47:39 +0000 (11:47 -0800)
Linux 3.10 changed the timing of how thread_info->flags is touched:

x86: Use generic idle loop
(7d1a941731fabf27e5fb6edbebb79fe856edb4e5)

This caused Intel NHM-EX and WSM-EX servers to experience a large number
of immediate MONITOR/MWAIT break wakeups, which caused cpuidle to demote
from deep C-states to shallow C-states, which caused these platforms
to experience a significant increase in idle power.

Note that this issue was already present before the commit above,
however, it wasn't seen often enough to be noticed in power measurements.

Here we extend an errata workaround from the Core2 EX "Dunnington"
to extend to NHM-EX and WSM-EX, to prevent these immediate
returns from MWAIT, reducing idle power on these platforms.

While only acpi_idle ran on Dunnington, intel_idle
may also run on these two newer systems.
As of today, there are no other models that are known
to need this tweak.

Link: http://lkml.kernel.org/r/CAJvTdK=%2BaNN66mYpCGgbHGCHhYQAKx-vB0kJSWjVpsNb_hOAtQ@mail.gmail.com
Signed-off-by: Len Brown <len.brown@intel.com>
Link: http://lkml.kernel.org/r/baff264285f6e585df757d58b17788feabc68918.1387403066.git.len.brown@intel.com
Cc: <stable@vger.kernel.org> # 3.12.x, 3.11.x, 3.10.x
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
arch/x86/kernel/cpu/intel.c
drivers/idle/intel_idle.c

index dc1ec0dff939ecff5e55bcccaeb5bea322ad982a..ea04b342c026f8c8871f70a0c383d80651e12524 100644 (file)
@@ -387,7 +387,8 @@ static void init_intel(struct cpuinfo_x86 *c)
                        set_cpu_cap(c, X86_FEATURE_PEBS);
        }
 
-       if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush)
+       if (c->x86 == 6 && cpu_has_clflush &&
+           (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
                set_cpu_cap(c, X86_FEATURE_CLFLUSH_MONITOR);
 
 #ifdef CONFIG_X86_64
index 92d1206482a62ca57128690e49735dea2f00aaa3..f80b700f821ca4ece2a17ef1db08c099738f74fe 100644 (file)
@@ -377,6 +377,9 @@ static int intel_idle(struct cpuidle_device *dev,
 
        if (!current_set_polling_and_test()) {
 
+               if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
+                       clflush((void *)&current_thread_info()->flags);
+
                __monitor((void *)&current_thread_info()->flags, 0, 0);
                smp_mb();
                if (!need_resched())