]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Merge branch 'linus' into timers/core
authorThomas Gleixner <tglx@linutronix.de>
Wed, 24 Apr 2013 18:33:46 +0000 (20:33 +0200)
committerThomas Gleixner <tglx@linutronix.de>
Wed, 24 Apr 2013 18:33:54 +0000 (20:33 +0200)
Reason: Get upstream fixes before adding conflicting code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
36 files changed:
arch/arm/kernel/process.c
arch/arm/mach-bcm/Kconfig
arch/arm/mach-bcm/board_bcm.c
arch/x86/Kconfig
arch/x86/include/asm/cpufeature.h
arch/x86/kernel/cpu/intel.c
arch/x86/kernel/process.c
arch/x86/kernel/rtc.c
arch/x86/kernel/tsc.c
arch/x86/platform/efi/efi.c
arch/x86/platform/mrst/vrtc.c
drivers/clocksource/Makefile
drivers/clocksource/bcm_kona_timer.c [new file with mode: 0644]
fs/proc/base.c
include/linux/clockchips.h
include/linux/clocksource.h
include/linux/hrtimer.h
include/linux/jiffies.h
include/linux/posix-timers.h
include/linux/sched.h
include/linux/time.h
include/linux/timekeeper_internal.h
include/linux/timex.h
include/uapi/linux/time.h
init/main.c
kernel/hrtimer.c
kernel/posix-timers.c
kernel/time.c
kernel/time/ntp.c
kernel/time/ntp_internal.h [new file with mode: 0644]
kernel/time/tick-broadcast.c
kernel/time/tick-common.c
kernel/time/tick-internal.h
kernel/time/tick-sched.c
kernel/time/timekeeping.c
kernel/time/timer_list.c

index 047d3e40e47062d4946d7f7c11e8330132340654..db4ffd09ee23b460754cb69d608002594f5b5e17 100644 (file)
@@ -199,7 +199,16 @@ void cpu_idle(void)
 #ifdef CONFIG_PL310_ERRATA_769419
                        wmb();
 #endif
-                       if (hlt_counter) {
+                       /*
+                        * In poll mode we reenable interrupts and spin.
+                        *
+                        * Also if we detected in the wakeup from idle
+                        * path that the tick broadcast device expired
+                        * for us, we don't want to go deep idle as we
+                        * know that the IPI is going to arrive right
+                        * away
+                        */
+                       if (hlt_counter || tick_check_broadcast_expired()) {
                                local_irq_enable();
                                cpu_relax();
                        } else if (!need_resched()) {
index bf02471d7e7c773ff1e97efe6ee343218940f4f1..f11289519c399c09c8e9725c78f5097a00644319 100644 (file)
@@ -6,6 +6,7 @@ config ARCH_BCM
        select ARM_ERRATA_764369 if SMP
        select ARM_GIC
        select CPU_V7
+       select CLKSRC_OF
        select GENERIC_CLOCKEVENTS
        select GENERIC_TIME
        select GPIO_BCM
index f0f9abafad293f10390df92fed62f236ba97e6a3..259593540477bf5aad847efcac18ecbb1f054543 100644 (file)
 #include <linux/device.h>
 #include <linux/platform_device.h>
 #include <linux/irqchip.h>
+#include <linux/clocksource.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach/time.h>
 
-static void timer_init(void)
-{
-}
-
 
 static void __init board_init(void)
 {
@@ -35,7 +32,7 @@ static const char * const bcm11351_dt_compat[] = { "bcm,bcm11351", NULL, };
 
 DT_MACHINE_START(BCM11351_DT, "Broadcom Application Processor")
        .init_irq = irqchip_init,
-       .init_time = timer_init,
+       .init_time = clocksource_of_init,
        .init_machine = board_init,
        .dt_compat = bcm11351_dt_compat,
 MACHINE_END
index 15b5cef4aa3857a386cb77cffd2ba74243b532c7..9f74f523dfc66c84c097351f59d0d0bbe2fdad8f 100644 (file)
@@ -120,6 +120,7 @@ config X86
        select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION
        select OLD_SIGACTION if X86_32
        select COMPAT_OLD_SIGACTION if IA32_EMULATION
+       select RTC_LIB
 
 config INSTRUCTION_DECODER
        def_bool y
index 93fe929d1cee20ec1dc183d5c390e5d9325b26ce..a8466f203e6230a3dcd381803e8b2252cd652a39 100644 (file)
 #define X86_FEATURE_AMD_DCM     (3*32+27) /* multi-node processor */
 #define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */
 #define X86_FEATURE_EAGER_FPU  (3*32+29) /* "eagerfpu" Non lazy FPU restore */
+#define X86_FEATURE_NONSTOP_TSC_S3 (3*32+30) /* TSC doesn't stop in S3 state */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3       (4*32+ 0) /* "pni" SSE-3 */
index 1905ce98bee01d667a0b6604d3c18f4d7c34e0fb..e7ae0d89e7e06605cdacfb02b4cbae7c698e689e 100644 (file)
@@ -96,6 +96,18 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
                        sched_clock_stable = 1;
        }
 
+       /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */
+       if (c->x86 == 6) {
+               switch (c->x86_model) {
+               case 0x27:      /* Penwell */
+               case 0x35:      /* Cloverview */
+                       set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3);
+                       break;
+               default:
+                       break;
+               }
+       }
+
        /*
         * There is a known erratum on Pentium III and Core Solo
         * and Core Duo CPUs.
index 14ae10031ff04ba1bbfca4ea6064b6260badb4b3..aa524da03bba805c45912bc780ab0a6ba04ce57e 100644 (file)
@@ -336,6 +336,18 @@ void cpu_idle(void)
                        local_touch_nmi();
                        local_irq_disable();
 
+                       /*
+                        * We detected in the wakeup path that the
+                        * tick broadcast device expired for us, but
+                        * we raced with the other CPU and came back
+                        * here before it was able to fire the IPI.
+                        * No point in going idle.
+                        */
+                       if (tick_check_broadcast_expired()) {
+                               local_irq_enable();
+                               continue;
+                       }
+
                        enter_idle();
 
                        /* Don't trace irqs off for idle */
index 2e8f3d3b56410a0eb10bbc0fe1d18877a46ff2fc..198eb201ed3b937862a5acc1bbaccfd414287f33 100644 (file)
@@ -13,6 +13,7 @@
 #include <asm/x86_init.h>
 #include <asm/time.h>
 #include <asm/mrst.h>
+#include <asm/rtc.h>
 
 #ifdef CONFIG_X86_32
 /*
@@ -36,70 +37,24 @@ EXPORT_SYMBOL(rtc_lock);
  * nowtime is written into the registers of the CMOS clock, it will
  * jump to the next second precisely 500 ms later. Check the Motorola
  * MC146818A or Dallas DS12887 data sheet for details.
- *
- * BUG: This routine does not handle hour overflow properly; it just
- *      sets the minutes. Usually you'll only notice that after reboot!
  */
 int mach_set_rtc_mmss(unsigned long nowtime)
 {
-       int real_seconds, real_minutes, cmos_minutes;
-       unsigned char save_control, save_freq_select;
-       unsigned long flags;
+       struct rtc_time tm;
        int retval = 0;
 
-       spin_lock_irqsave(&rtc_lock, flags);
-
-        /* tell the clock it's being set */
-       save_control = CMOS_READ(RTC_CONTROL);
-       CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
-
-       /* stop and reset prescaler */
-       save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
-       CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
-
-       cmos_minutes = CMOS_READ(RTC_MINUTES);
-       if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-               cmos_minutes = bcd2bin(cmos_minutes);
-
-       /*
-        * since we're only adjusting minutes and seconds,
-        * don't interfere with hour overflow. This avoids
-        * messing with unknown time zones but requires your
-        * RTC not to be off by more than 15 minutes
-        */
-       real_seconds = nowtime % 60;
-       real_minutes = nowtime / 60;
-       /* correct for half hour time zone */
-       if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1)
-               real_minutes += 30;
-       real_minutes %= 60;
-
-       if (abs(real_minutes - cmos_minutes) < 30) {
-               if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-                       real_seconds = bin2bcd(real_seconds);
-                       real_minutes = bin2bcd(real_minutes);
-               }
-               CMOS_WRITE(real_seconds, RTC_SECONDS);
-               CMOS_WRITE(real_minutes, RTC_MINUTES);
+       rtc_time_to_tm(nowtime, &tm);
+       if (!rtc_valid_tm(&tm)) {
+               retval = set_rtc_time(&tm);
+               if (retval)
+                       printk(KERN_ERR "%s: RTC write failed with error %d\n",
+                              __FUNCTION__, retval);
        } else {
-               printk_once(KERN_NOTICE
-                      "set_rtc_mmss: can't update from %d to %d\n",
-                      cmos_minutes, real_minutes);
-               retval = -1;
+               printk(KERN_ERR
+                      "%s: Invalid RTC value: write of %lx to RTC failed\n",
+                       __FUNCTION__, nowtime);
+               retval = -EINVAL;
        }
-
-       /* The following flags have to be released exactly in this order,
-        * otherwise the DS12887 (popular MC146818A clone with integrated
-        * battery and quartz) will not reset the oscillator and will not
-        * update precisely 500 ms later. You won't find this mentioned in
-        * the Dallas Semiconductor data sheets, but who believes data
-        * sheets anyway ...                           -- Markus Kuhn
-        */
-       CMOS_WRITE(save_control, RTC_CONTROL);
-       CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
-
-       spin_unlock_irqrestore(&rtc_lock, flags);
-
        return retval;
 }
 
index 4b9ea101fe3b2923744b5b0b5b61c15ff1c3f296..098b3cfda72ee152ab3ce5505ad5843b3778b699 100644 (file)
@@ -768,7 +768,8 @@ static cycle_t read_tsc(struct clocksource *cs)
 
 static void resume_tsc(struct clocksource *cs)
 {
-       clocksource_tsc.cycle_last = 0;
+       if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
+               clocksource_tsc.cycle_last = 0;
 }
 
 static struct clocksource clocksource_tsc = {
@@ -939,6 +940,9 @@ static int __init init_tsc_clocksource(void)
                clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
        }
 
+       if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
+               clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
+
        /*
         * Trust the results of the earlier calibration on systems
         * exporting a reliable TSC.
index e4a86a677ce163ec5f911fb14db65590d813aee1..b55d174e503446fe6cc47ea34df463f5ce171c5f 100644 (file)
@@ -49,6 +49,7 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 #include <asm/x86_init.h>
+#include <asm/rtc.h>
 
 #define EFI_DEBUG      1
 
@@ -352,10 +353,10 @@ static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
 
 int efi_set_rtc_mmss(unsigned long nowtime)
 {
-       int real_seconds, real_minutes;
        efi_status_t    status;
        efi_time_t      eft;
        efi_time_cap_t  cap;
+       struct rtc_time tm;
 
        status = efi.get_time(&eft, &cap);
        if (status != EFI_SUCCESS) {
@@ -363,13 +364,20 @@ int efi_set_rtc_mmss(unsigned long nowtime)
                return -1;
        }
 
-       real_seconds = nowtime % 60;
-       real_minutes = nowtime / 60;
-       if (((abs(real_minutes - eft.minute) + 15)/30) & 1)
-               real_minutes += 30;
-       real_minutes %= 60;
-       eft.minute = real_minutes;
-       eft.second = real_seconds;
+       rtc_time_to_tm(nowtime, &tm);
+       if (!rtc_valid_tm(&tm)) {
+               eft.year = tm.tm_year + 1900;
+               eft.month = tm.tm_mon + 1;
+               eft.day = tm.tm_mday;
+               eft.minute = tm.tm_min;
+               eft.second = tm.tm_sec;
+               eft.nanosecond = 0;
+       } else {
+               printk(KERN_ERR
+                      "%s: Invalid EFI RTC value: write of %lx to EFI RTC failed\n",
+                      __FUNCTION__, nowtime);
+               return -1;
+       }
 
        status = efi.set_time(&eft);
        if (status != EFI_SUCCESS) {
index 225bd0f0f675395643c47859e61e0e5b6354ecb1..d62b0a3b5c145abf837e52e028dcf39bf923e096 100644 (file)
@@ -85,27 +85,35 @@ unsigned long vrtc_get_time(void)
        return mktime(year, mon, mday, hour, min, sec);
 }
 
-/* Only care about the minutes and seconds */
 int vrtc_set_mmss(unsigned long nowtime)
 {
-       int real_sec, real_min;
        unsigned long flags;
-       int vrtc_min;
-
-       spin_lock_irqsave(&rtc_lock, flags);
-       vrtc_min = vrtc_cmos_read(RTC_MINUTES);
-
-       real_sec = nowtime % 60;
-       real_min = nowtime / 60;
-       if (((abs(real_min - vrtc_min) + 15)/30) & 1)
-               real_min += 30;
-       real_min %= 60;
-
-       vrtc_cmos_write(real_sec, RTC_SECONDS);
-       vrtc_cmos_write(real_min, RTC_MINUTES);
-       spin_unlock_irqrestore(&rtc_lock, flags);
-
-       return 0;
+       struct rtc_time tm;
+       int year;
+       int retval = 0;
+
+       rtc_time_to_tm(nowtime, &tm);
+       if (!rtc_valid_tm(&tm) && tm.tm_year >= 72) {
+               /*
+                * tm.year is the number of years since 1900, and the
+                * vrtc need the years since 1972.
+                */
+               year = tm.tm_year - 72;
+               spin_lock_irqsave(&rtc_lock, flags);
+               vrtc_cmos_write(year, RTC_YEAR);
+               vrtc_cmos_write(tm.tm_mon, RTC_MONTH);
+               vrtc_cmos_write(tm.tm_mday, RTC_DAY_OF_MONTH);
+               vrtc_cmos_write(tm.tm_hour, RTC_HOURS);
+               vrtc_cmos_write(tm.tm_min, RTC_MINUTES);
+               vrtc_cmos_write(tm.tm_sec, RTC_SECONDS);
+               spin_unlock_irqrestore(&rtc_lock, flags);
+       } else {
+               printk(KERN_ERR
+                      "%s: Invalid vRTC value: write of %lx to vRTC failed\n",
+                       __FUNCTION__, nowtime);
+               retval = -EINVAL;
+       }
+       return retval;
 }
 
 void __init mrst_rtc_init(void)
index 4d8283aec5b51286ba3942ce4e3223a9281ab3cf..96e25319659b09ccf80a21aa80539173b50d4b0d 100644 (file)
@@ -19,6 +19,7 @@ obj-$(CONFIG_ARCH_BCM2835)    += bcm2835_timer.o
 obj-$(CONFIG_SUNXI_TIMER)      += sunxi_timer.o
 obj-$(CONFIG_ARCH_TEGRA)       += tegra20_timer.o
 obj-$(CONFIG_VT8500_TIMER)     += vt8500_timer.o
+obj-$(CONFIG_ARCH_BCM)         += bcm_kona_timer.o
 
 obj-$(CONFIG_ARM_ARCH_TIMER)           += arm_arch_timer.o
 obj-$(CONFIG_CLKSRC_METAG_GENERIC)     += metag_generic.o
diff --git a/drivers/clocksource/bcm_kona_timer.c b/drivers/clocksource/bcm_kona_timer.c
new file mode 100644 (file)
index 0000000..350f493
--- /dev/null
@@ -0,0 +1,211 @@
+/*
+ * Copyright (C) 2012 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/jiffies.h>
+#include <linux/clockchips.h>
+#include <linux/types.h>
+
+#include <linux/io.h>
+#include <asm/mach/time.h>
+
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+
+#define KONA_GPTIMER_STCS_OFFSET                       0x00000000
+#define KONA_GPTIMER_STCLO_OFFSET                      0x00000004
+#define KONA_GPTIMER_STCHI_OFFSET                      0x00000008
+#define KONA_GPTIMER_STCM0_OFFSET                      0x0000000C
+
+#define KONA_GPTIMER_STCS_TIMER_MATCH_SHIFT            0
+#define KONA_GPTIMER_STCS_COMPARE_ENABLE_SHIFT         4
+
+struct kona_bcm_timers {
+       int tmr_irq;
+       void __iomem *tmr_regs;
+};
+
+static struct kona_bcm_timers timers;
+
+static u32 arch_timer_rate;
+
+/*
+ * We use the peripheral timers for system tick, the cpu global timer for
+ * profile tick
+ */
+static void kona_timer_disable_and_clear(void __iomem *base)
+{
+       uint32_t reg;
+
+       /*
+        * clear and disable interrupts
+        * We are using compare/match register 0 for our system interrupts
+        */
+       reg = readl(base + KONA_GPTIMER_STCS_OFFSET);
+
+       /* Clear compare (0) interrupt */
+       reg |= 1 << KONA_GPTIMER_STCS_TIMER_MATCH_SHIFT;
+       /* disable compare */
+       reg &= ~(1 << KONA_GPTIMER_STCS_COMPARE_ENABLE_SHIFT);
+
+       writel(reg, base + KONA_GPTIMER_STCS_OFFSET);
+
+}
+
+static void
+kona_timer_get_counter(void *timer_base, uint32_t *msw, uint32_t *lsw)
+{
+       void __iomem *base = IOMEM(timer_base);
+       int loop_limit = 4;
+
+       /*
+        * Read 64-bit free running counter
+        * 1. Read hi-word
+        * 2. Read low-word
+        * 3. Read hi-word again
+        * 4.1
+        *      if new hi-word is not equal to previously read hi-word, then
+        *      start from #1
+        * 4.2
+        *      if new hi-word is equal to previously read hi-word then stop.
+        */
+
+       while (--loop_limit) {
+               *msw = readl(base + KONA_GPTIMER_STCHI_OFFSET);
+               *lsw = readl(base + KONA_GPTIMER_STCLO_OFFSET);
+               if (*msw == readl(base + KONA_GPTIMER_STCHI_OFFSET))
+                       break;
+       }
+       if (!loop_limit) {
+               pr_err("bcm_kona_timer: getting counter failed.\n");
+               pr_err(" Timer will be impacted\n");
+       }
+
+       return;
+}
+
+static const struct of_device_id bcm_timer_ids[] __initconst = {
+       {.compatible = "bcm,kona-timer"},
+       {},
+};
+
+static void __init kona_timers_init(void)
+{
+       struct device_node *node;
+       u32 freq;
+
+       node = of_find_matching_node(NULL, bcm_timer_ids);
+
+       if (!node)
+               panic("No timer");
+
+       if (!of_property_read_u32(node, "clock-frequency", &freq))
+               arch_timer_rate = freq;
+       else
+               panic("clock-frequency not set in the .dts file");
+
+       /* Setup IRQ numbers */
+       timers.tmr_irq = irq_of_parse_and_map(node, 0);
+
+       /* Setup IO addresses */
+       timers.tmr_regs = of_iomap(node, 0);
+
+       kona_timer_disable_and_clear(timers.tmr_regs);
+}
+
+static int kona_timer_set_next_event(unsigned long clc,
+                                 struct clock_event_device *unused)
+{
+       /*
+        * timer (0) is disabled by the timer interrupt already
+        * so, here we reload the next event value and re-enable
+        * the timer.
+        *
+        * This way, we are potentially losing the time between
+        * timer-interrupt->set_next_event. CPU local timers, when
+        * they come in should get rid of skew.
+        */
+
+       uint32_t lsw, msw;
+       uint32_t reg;
+
+       kona_timer_get_counter(timers.tmr_regs, &msw, &lsw);
+
+       /* Load the "next" event tick value */
+       writel(lsw + clc, timers.tmr_regs + KONA_GPTIMER_STCM0_OFFSET);
+
+       /* Enable compare */
+       reg = readl(timers.tmr_regs + KONA_GPTIMER_STCS_OFFSET);
+       reg |= (1 << KONA_GPTIMER_STCS_COMPARE_ENABLE_SHIFT);
+       writel(reg, timers.tmr_regs + KONA_GPTIMER_STCS_OFFSET);
+
+       return 0;
+}
+
+static void kona_timer_set_mode(enum clock_event_mode mode,
+                            struct clock_event_device *unused)
+{
+       switch (mode) {
+       case CLOCK_EVT_MODE_ONESHOT:
+               /* by default mode is one shot don't do any thing */
+               break;
+       case CLOCK_EVT_MODE_UNUSED:
+       case CLOCK_EVT_MODE_SHUTDOWN:
+       default:
+               kona_timer_disable_and_clear(timers.tmr_regs);
+       }
+}
+
+static struct clock_event_device kona_clockevent_timer = {
+       .name = "timer 1",
+       .features = CLOCK_EVT_FEAT_ONESHOT,
+       .set_next_event = kona_timer_set_next_event,
+       .set_mode = kona_timer_set_mode
+};
+
+static void __init kona_timer_clockevents_init(void)
+{
+       kona_clockevent_timer.cpumask = cpumask_of(0);
+       clockevents_config_and_register(&kona_clockevent_timer,
+               arch_timer_rate, 6, 0xffffffff);
+}
+
+static irqreturn_t kona_timer_interrupt(int irq, void *dev_id)
+{
+       struct clock_event_device *evt = &kona_clockevent_timer;
+
+       kona_timer_disable_and_clear(timers.tmr_regs);
+       evt->event_handler(evt);
+       return IRQ_HANDLED;
+}
+
+static struct irqaction kona_timer_irq = {
+       .name = "Kona Timer Tick",
+       .flags = IRQF_TIMER,
+       .handler = kona_timer_interrupt,
+};
+
+static void __init kona_timer_init(void)
+{
+       kona_timers_init();
+       kona_timer_clockevents_init();
+       setup_irq(timers.tmr_irq, &kona_timer_irq);
+       kona_timer_set_next_event((arch_timer_rate / HZ), NULL);
+}
+
+CLOCKSOURCE_OF_DECLARE(bcm_kona, "bcm,kona-timer",
+       kona_timer_init);
index 69078c7cef1fa1443472f21bf4c98bb435fb7824..a19308604145a11ceb9feee8bac5b58ad726ec15 100644 (file)
@@ -86,6 +86,7 @@
 #include <linux/fs_struct.h>
 #include <linux/slab.h>
 #include <linux/flex_array.h>
+#include <linux/posix-timers.h>
 #ifdef CONFIG_HARDWALL
 #include <asm/hardwall.h>
 #endif
@@ -2013,6 +2014,102 @@ static const struct file_operations proc_map_files_operations = {
        .llseek         = default_llseek,
 };
 
+struct timers_private {
+       struct pid *pid;
+       struct task_struct *task;
+       struct sighand_struct *sighand;
+       struct pid_namespace *ns;
+       unsigned long flags;
+};
+
+static void *timers_start(struct seq_file *m, loff_t *pos)
+{
+       struct timers_private *tp = m->private;
+
+       tp->task = get_pid_task(tp->pid, PIDTYPE_PID);
+       if (!tp->task)
+               return ERR_PTR(-ESRCH);
+
+       tp->sighand = lock_task_sighand(tp->task, &tp->flags);
+       if (!tp->sighand)
+               return ERR_PTR(-ESRCH);
+
+       return seq_list_start(&tp->task->signal->posix_timers, *pos);
+}
+
+static void *timers_next(struct seq_file *m, void *v, loff_t *pos)
+{
+       struct timers_private *tp = m->private;
+       return seq_list_next(v, &tp->task->signal->posix_timers, pos);
+}
+
+static void timers_stop(struct seq_file *m, void *v)
+{
+       struct timers_private *tp = m->private;
+
+       if (tp->sighand) {
+               unlock_task_sighand(tp->task, &tp->flags);
+               tp->sighand = NULL;
+       }
+
+       if (tp->task) {
+               put_task_struct(tp->task);
+               tp->task = NULL;
+       }
+}
+
+static int show_timer(struct seq_file *m, void *v)
+{
+       struct k_itimer *timer;
+       struct timers_private *tp = m->private;
+       int notify;
+       static char *nstr[] = {
+               [SIGEV_SIGNAL] = "signal",
+               [SIGEV_NONE] = "none",
+               [SIGEV_THREAD] = "thread",
+       };
+
+       timer = list_entry((struct list_head *)v, struct k_itimer, list);
+       notify = timer->it_sigev_notify;
+
+       seq_printf(m, "ID: %d\n", timer->it_id);
+       seq_printf(m, "signal: %d/%p\n", timer->sigq->info.si_signo,
+                       timer->sigq->info.si_value.sival_ptr);
+       seq_printf(m, "notify: %s/%s.%d\n",
+               nstr[notify & ~SIGEV_THREAD_ID],
+               (notify & SIGEV_THREAD_ID) ? "tid" : "pid",
+               pid_nr_ns(timer->it_pid, tp->ns));
+
+       return 0;
+}
+
+static const struct seq_operations proc_timers_seq_ops = {
+       .start  = timers_start,
+       .next   = timers_next,
+       .stop   = timers_stop,
+       .show   = show_timer,
+};
+
+static int proc_timers_open(struct inode *inode, struct file *file)
+{
+       struct timers_private *tp;
+
+       tp = __seq_open_private(file, &proc_timers_seq_ops,
+                       sizeof(struct timers_private));
+       if (!tp)
+               return -ENOMEM;
+
+       tp->pid = proc_pid(inode);
+       tp->ns = inode->i_sb->s_fs_info;
+       return 0;
+}
+
+static const struct file_operations proc_timers_operations = {
+       .open           = proc_timers_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = seq_release_private,
+};
 #endif /* CONFIG_CHECKPOINT_RESTORE */
 
 static struct dentry *proc_pident_instantiate(struct inode *dir,
@@ -2583,6 +2680,9 @@ static const struct pid_entry tgid_base_stuff[] = {
        REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
        REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
 #endif
+#ifdef CONFIG_CHECKPOINT_RESTORE
+       REG("timers",     S_IRUGO, proc_timers_operations),
+#endif
 };
 
 static int proc_tgid_base_readdir(struct file * filp,
index 66346521cb6561641045297cb74c8d15f7b71f74..464e229e7d84de90eba7b3b40c22023a0c643ec0 100644 (file)
@@ -55,6 +55,11 @@ enum clock_event_nofitiers {
 #define CLOCK_EVT_FEAT_C3STOP          0x000008
 #define CLOCK_EVT_FEAT_DUMMY           0x000010
 
+/*
+ * Core shall set the interrupt affinity dynamically in broadcast mode
+ */
+#define CLOCK_EVT_FEAT_DYNIRQ          0x000020
+
 /**
  * struct clock_event_device - clock event device descriptor
  * @event_handler:     Assigned by the framework to be called by the low
@@ -170,6 +175,12 @@ extern void tick_broadcast(const struct cpumask *mask);
 extern int tick_receive_broadcast(void);
 #endif
 
+#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
+extern int tick_check_broadcast_expired(void);
+#else
+static inline int tick_check_broadcast_expired(void) { return 0; }
+#endif
+
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 extern void clockevents_notify(unsigned long reason, void *arg);
 #else
@@ -182,6 +193,7 @@ static inline void clockevents_suspend(void) {}
 static inline void clockevents_resume(void) {}
 
 #define clockevents_notify(reason, arg) do { } while (0)
+static inline int tick_check_broadcast_expired(void) { return 0; }
 
 #endif
 
index 27cfda427dd9106b15af57c3ad4825445a4b8feb..aa7032c7238fd49988062f509524b2891b2e54a0 100644 (file)
@@ -206,6 +206,7 @@ struct clocksource {
 #define CLOCK_SOURCE_WATCHDOG                  0x10
 #define CLOCK_SOURCE_VALID_FOR_HRES            0x20
 #define CLOCK_SOURCE_UNSTABLE                  0x40
+#define CLOCK_SOURCE_SUSPEND_NONSTOP           0x80
 
 /* simplify initialization of mask field */
 #define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1)
index cc07d2777bbe6b11a632840c5f0a867436bbeac5..d19a5c2d2270ebb9bbe01a167a97875e255da357 100644 (file)
@@ -157,6 +157,7 @@ enum  hrtimer_base_type {
        HRTIMER_BASE_MONOTONIC,
        HRTIMER_BASE_REALTIME,
        HRTIMER_BASE_BOOTTIME,
+       HRTIMER_BASE_TAI,
        HRTIMER_MAX_CLOCK_BASES,
 };
 
@@ -327,7 +328,9 @@ extern ktime_t ktime_get(void);
 extern ktime_t ktime_get_real(void);
 extern ktime_t ktime_get_boottime(void);
 extern ktime_t ktime_get_monotonic_offset(void);
-extern ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot);
+extern ktime_t ktime_get_clocktai(void);
+extern ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot,
+                                        ktime_t *offs_tai);
 
 DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
 
index 82ed068b1ebee619906a1713ce9e8b51c134e78c..8fb8edf12417a4d8b53bbcd8c3aacb5ea6b5054e 100644 (file)
@@ -75,7 +75,6 @@ extern int register_refined_jiffies(long clock_tick_rate);
  */
 extern u64 __jiffy_data jiffies_64;
 extern unsigned long volatile __jiffy_data jiffies;
-extern seqlock_t jiffies_lock;
 
 #if (BITS_PER_LONG < 64)
 u64 get_jiffies_64(void);
index 042058fdb0af3c4adcf7a0e3c5ee317fb73e2a8b..60bac697a91b3471c4fdce394ce315a64463ef0f 100644 (file)
@@ -55,6 +55,7 @@ struct cpu_timer_list {
 /* POSIX.1b interval timer structure. */
 struct k_itimer {
        struct list_head list;          /* free/ allocate list */
+       struct hlist_node t_hash;
        spinlock_t it_lock;
        clockid_t it_clock;             /* which timer type */
        timer_t it_id;                  /* timer id */
index e692a022527bdaaace8b388268b0c280946fc5c2..78694315c1b4301c89a69d6e1bd74d3f908207e5 100644 (file)
@@ -527,7 +527,8 @@ struct signal_struct {
        unsigned int            has_child_subreaper:1;
 
        /* POSIX.1b Interval Timers */
-       struct list_head posix_timers;
+       int                     posix_timer_id;
+       struct list_head        posix_timers;
 
        /* ITIMER_REAL timer for the process */
        struct hrtimer real_timer;
index d4835dfdf25e505f87c59956f09af0a203aba95a..22d81b3c955b533bf73ec0b9353c6c7d4287c010 100644 (file)
@@ -181,6 +181,9 @@ extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
 extern int timekeeping_valid_for_hres(void);
 extern u64 timekeeping_max_deferment(void);
 extern int timekeeping_inject_offset(struct timespec *ts);
+extern s32 timekeeping_get_tai_offset(void);
+extern void timekeeping_set_tai_offset(s32 tai_offset);
+extern void timekeeping_clocktai(struct timespec *ts);
 
 struct tms;
 extern void do_sys_times(struct tms *);
index e1d558e237ecfdd8059106c4e98f488ff406ff12..c1825eb436ed6432372460e676d1949d4d3acc93 100644 (file)
@@ -20,6 +20,8 @@ struct timekeeper {
        u32                     shift;
        /* Number of clock cycles in one NTP interval. */
        cycle_t                 cycle_interval;
+       /* Last cycle value (also stored in clock->cycle_last) */
+       cycle_t                 cycle_last;
        /* Number of clock shifted nano seconds in one NTP interval. */
        u64                     xtime_interval;
        /* shifted nano seconds left over when rounding cycle_interval */
@@ -62,8 +64,11 @@ struct timekeeper {
        ktime_t                 offs_boot;
        /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */
        struct timespec         raw_time;
-       /* Seqlock for all timekeeper values */
-       seqlock_t               lock;
+       /* The current UTC to TAI offset in seconds */
+       s32                     tai_offset;
+       /* Offset clock monotonic -> clock tai */
+       ktime_t                 offs_tai;
+
 };
 
 static inline struct timespec tk_xtime(struct timekeeper *tk)
index 5ec87c60b97c5029a5f58b4282a0428767be3746..b3726e61368e5c0e56bc161a41fdfce0200271b1 100644 (file)
 extern unsigned long tick_usec;                /* USER_HZ period (usec) */
 extern unsigned long tick_nsec;                /* SHIFTED_HZ period (nsec) */
 
-extern void ntp_init(void);
-extern void ntp_clear(void);
-
 /* Required to safely shift negative values */
 #define shift_right(x, s) ({   \
        __typeof__(x) __x = (x);        \
@@ -140,10 +137,6 @@ extern void ntp_clear(void);
 #define NTP_INTERVAL_FREQ  (HZ)
 #define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ)
 
-/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
-extern u64 ntp_tick_length(void);
-
-extern int second_overflow(unsigned long secs);
 extern int do_adjtimex(struct timex *);
 extern void hardpps(const struct timespec *, const struct timespec *);
 
index 0d3c0edc3edaeaef405d7f208e424cc5f3fda2be..e75e1b6ff27f72162a83a7ad59866acf2a8c9c20 100644 (file)
@@ -54,11 +54,9 @@ struct itimerval {
 #define CLOCK_BOOTTIME                 7
 #define CLOCK_REALTIME_ALARM           8
 #define CLOCK_BOOTTIME_ALARM           9
+#define CLOCK_SGI_CYCLE                        10      /* Hardware specific */
+#define CLOCK_TAI                      11
 
-/*
- * The IDs of various hardware clocks:
- */
-#define CLOCK_SGI_CYCLE                        10
 #define MAX_CLOCKS                     16
 #define CLOCKS_MASK                    (CLOCK_REALTIME | CLOCK_MONOTONIC)
 #define CLOCKS_MONO                    CLOCK_MONOTONIC
index 63534a141b4eb6c0f36c5ac0d99d2733028ff96e..b3e061428545cf29a386bab98c18a0c4c4fa5a1f 100644 (file)
@@ -494,7 +494,6 @@ asmlinkage void __init start_kernel(void)
  * Interrupts are still disabled. Do necessary setups, then
  * enable them
  */
-       tick_init();
        boot_cpu_init();
        page_address_init();
        printk(KERN_NOTICE "%s", linux_banner);
@@ -551,6 +550,7 @@ asmlinkage void __init start_kernel(void)
        /* init some links before init_ISA_irqs() */
        early_irq_init();
        init_IRQ();
+       tick_init();
        init_timers();
        hrtimers_init();
        softirq_init();
index 14be27feda491da1c3dc9990a5ae80ce649570aa..609d8ff38b745c7d02b76b69be09b82176050bf6 100644 (file)
@@ -84,6 +84,12 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
                        .get_time = &ktime_get_boottime,
                        .resolution = KTIME_LOW_RES,
                },
+               {
+                       .index = HRTIMER_BASE_TAI,
+                       .clockid = CLOCK_TAI,
+                       .get_time = &ktime_get_clocktai,
+                       .resolution = KTIME_LOW_RES,
+               },
        }
 };
 
@@ -91,6 +97,7 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
        [CLOCK_REALTIME]        = HRTIMER_BASE_REALTIME,
        [CLOCK_MONOTONIC]       = HRTIMER_BASE_MONOTONIC,
        [CLOCK_BOOTTIME]        = HRTIMER_BASE_BOOTTIME,
+       [CLOCK_TAI]             = HRTIMER_BASE_TAI,
 };
 
 static inline int hrtimer_clockid_to_base(clockid_t clock_id)
@@ -107,8 +114,10 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
 {
        ktime_t xtim, mono, boot;
        struct timespec xts, tom, slp;
+       s32 tai_offset;
 
        get_xtime_and_monotonic_and_sleep_offset(&xts, &tom, &slp);
+       tai_offset = timekeeping_get_tai_offset();
 
        xtim = timespec_to_ktime(xts);
        mono = ktime_add(xtim, timespec_to_ktime(tom));
@@ -116,6 +125,8 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
        base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim;
        base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono;
        base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot;
+       base->clock_base[HRTIMER_BASE_TAI].softirq_time =
+                               ktime_add(xtim, ktime_set(tai_offset, 0));
 }
 
 /*
@@ -276,6 +287,10 @@ ktime_t ktime_add_ns(const ktime_t kt, u64 nsec)
        } else {
                unsigned long rem = do_div(nsec, NSEC_PER_SEC);
 
+               /* Make sure nsec fits into long */
+               if (unlikely(nsec > KTIME_SEC_MAX))
+                       return (ktime_t){ .tv64 = KTIME_MAX };
+
                tmp = ktime_set((long)nsec, rem);
        }
 
@@ -652,8 +667,9 @@ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
 {
        ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
        ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
+       ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset;
 
-       return ktime_get_update_offsets(offs_real, offs_boot);
+       return ktime_get_update_offsets(offs_real, offs_boot, offs_tai);
 }
 
 /*
@@ -1011,7 +1027,8 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
  * @timer:     the timer to be added
  * @tim:       expiry time
  * @delta_ns:  "slack" range for the timer
- * @mode:      expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
+ * @mode:      expiry mode: absolute (HRTIMER_MODE_ABS) or
+ *             relative (HRTIMER_MODE_REL)
  *
  * Returns:
  *  0 on success
@@ -1028,7 +1045,8 @@ EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
  * hrtimer_start - (re)start an hrtimer on the current CPU
  * @timer:     the timer to be added
  * @tim:       expiry time
- * @mode:      expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
+ * @mode:      expiry mode: absolute (HRTIMER_MODE_ABS) or
+ *             relative (HRTIMER_MODE_REL)
  *
  * Returns:
  *  0 on success
@@ -1310,6 +1328,8 @@ retry:
 
                                expires = ktime_sub(hrtimer_get_expires(timer),
                                                    base->offset);
+                               if (expires.tv64 < 0)
+                                       expires.tv64 = KTIME_MAX;
                                if (expires.tv64 < expires_next.tv64)
                                        expires_next = expires;
                                break;
index 6edbb2c55c22fd56a61eb6665e32087592a81fc3..424c2d4265c90cca5a484f2d5ae0ab587550e454 100644 (file)
 #include <linux/list.h>
 #include <linux/init.h>
 #include <linux/compiler.h>
-#include <linux/idr.h>
+#include <linux/hash.h>
 #include <linux/posix-clock.h>
 #include <linux/posix-timers.h>
 #include <linux/syscalls.h>
 #include <linux/wait.h>
 #include <linux/workqueue.h>
 #include <linux/export.h>
+#include <linux/hashtable.h>
 
 /*
- * Management arrays for POSIX timers.  Timers are kept in slab memory
- * Timer ids are allocated by an external routine that keeps track of the
- * id and the timer.  The external interface is:
- *
- * void *idr_find(struct idr *idp, int id);           to find timer_id <id>
- * int idr_get_new(struct idr *idp, void *ptr);       to get a new id and
- *                                                    related it to <ptr>
- * void idr_remove(struct idr *idp, int id);          to release <id>
- * void idr_init(struct idr *idp);                    to initialize <idp>
- *                                                    which we supply.
- * The idr_get_new *may* call slab for more memory so it must not be
- * called under a spin lock.  Likewise idr_remore may release memory
- * (but it may be ok to do this under a lock...).
- * idr_find is just a memory look up and is quite fast.  A -1 return
- * indicates that the requested id does not exist.
+ * Management arrays for POSIX timers. Timers are now kept in static hash table
+ * with 512 entries.
+ * Timer ids are allocated by local routine, which selects proper hash head by
+ * key, constructed from current->signal address and per signal struct counter.
+ * This keeps timer ids unique per process, but now they can intersect between
+ * processes.
  */
 
 /*
  * Lets keep our timers in a slab cache :-)
  */
 static struct kmem_cache *posix_timers_cache;
-static struct idr posix_timers_id;
-static DEFINE_SPINLOCK(idr_lock);
+
+static DEFINE_HASHTABLE(posix_timers_hashtable, 9);
+static DEFINE_SPINLOCK(hash_lock);
 
 /*
  * we assume that the new SIGEV_THREAD_ID shares no bits with the other
@@ -152,6 +145,56 @@ static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags);
        __timr;                                                            \
 })
 
+static int hash(struct signal_struct *sig, unsigned int nr)
+{
+       return hash_32(hash32_ptr(sig) ^ nr, HASH_BITS(posix_timers_hashtable));
+}
+
+static struct k_itimer *__posix_timers_find(struct hlist_head *head,
+                                           struct signal_struct *sig,
+                                           timer_t id)
+{
+       struct k_itimer *timer;
+
+       hlist_for_each_entry_rcu(timer, head, t_hash) {
+               if ((timer->it_signal == sig) && (timer->it_id == id))
+                       return timer;
+       }
+       return NULL;
+}
+
+static struct k_itimer *posix_timer_by_id(timer_t id)
+{
+       struct signal_struct *sig = current->signal;
+       struct hlist_head *head = &posix_timers_hashtable[hash(sig, id)];
+
+       return __posix_timers_find(head, sig, id);
+}
+
+static int posix_timer_add(struct k_itimer *timer)
+{
+       struct signal_struct *sig = current->signal;
+       int first_free_id = sig->posix_timer_id;
+       struct hlist_head *head;
+       int ret = -ENOENT;
+
+       do {
+               spin_lock(&hash_lock);
+               head = &posix_timers_hashtable[hash(sig, sig->posix_timer_id)];
+               if (!__posix_timers_find(head, sig, sig->posix_timer_id)) {
+                       hlist_add_head_rcu(&timer->t_hash, head);
+                       ret = sig->posix_timer_id;
+               }
+               if (++sig->posix_timer_id < 0)
+                       sig->posix_timer_id = 0;
+               if ((sig->posix_timer_id == first_free_id) && (ret == -ENOENT))
+                       /* Loop over all possible ids completed */
+                       ret = -EAGAIN;
+               spin_unlock(&hash_lock);
+       } while (ret == -ENOENT);
+       return ret;
+}
+
 static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
 {
        spin_unlock_irqrestore(&timr->it_lock, flags);
@@ -221,6 +264,11 @@ static int posix_get_boottime(const clockid_t which_clock, struct timespec *tp)
        return 0;
 }
 
+static int posix_get_tai(clockid_t which_clock, struct timespec *tp)
+{
+       timekeeping_clocktai(tp);
+       return 0;
+}
 
 /*
  * Initialize everything, well, just everything in Posix clocks/timers ;)
@@ -261,6 +309,16 @@ static __init int init_posix_timers(void)
                .clock_getres   = posix_get_coarse_res,
                .clock_get      = posix_get_monotonic_coarse,
        };
+       struct k_clock clock_tai = {
+               .clock_getres   = hrtimer_get_res,
+               .clock_get      = posix_get_tai,
+               .nsleep         = common_nsleep,
+               .nsleep_restart = hrtimer_nanosleep_restart,
+               .timer_create   = common_timer_create,
+               .timer_set      = common_timer_set,
+               .timer_get      = common_timer_get,
+               .timer_del      = common_timer_del,
+       };
        struct k_clock clock_boottime = {
                .clock_getres   = hrtimer_get_res,
                .clock_get      = posix_get_boottime,
@@ -278,11 +336,11 @@ static __init int init_posix_timers(void)
        posix_timers_register_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse);
        posix_timers_register_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse);
        posix_timers_register_clock(CLOCK_BOOTTIME, &clock_boottime);
+       posix_timers_register_clock(CLOCK_TAI, &clock_tai);
 
        posix_timers_cache = kmem_cache_create("posix_timers_cache",
                                        sizeof (struct k_itimer), 0, SLAB_PANIC,
                                        NULL);
-       idr_init(&posix_timers_id);
        return 0;
 }
 
@@ -504,9 +562,9 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
 {
        if (it_id_set) {
                unsigned long flags;
-               spin_lock_irqsave(&idr_lock, flags);
-               idr_remove(&posix_timers_id, tmr->it_id);
-               spin_unlock_irqrestore(&idr_lock, flags);
+               spin_lock_irqsave(&hash_lock, flags);
+               hlist_del_rcu(&tmr->t_hash);
+               spin_unlock_irqrestore(&hash_lock, flags);
        }
        put_pid(tmr->it_pid);
        sigqueue_free(tmr->sigq);
@@ -552,22 +610,11 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
                return -EAGAIN;
 
        spin_lock_init(&new_timer->it_lock);
-
-       idr_preload(GFP_KERNEL);
-       spin_lock_irq(&idr_lock);
-       error = idr_alloc(&posix_timers_id, new_timer, 0, 0, GFP_NOWAIT);
-       spin_unlock_irq(&idr_lock);
-       idr_preload_end();
-       if (error < 0) {
-               /*
-                * Weird looking, but we return EAGAIN if the IDR is
-                * full (proper POSIX return value for this)
-                */
-               if (error == -ENOSPC)
-                       error = -EAGAIN;
+       new_timer_id = posix_timer_add(new_timer);
+       if (new_timer_id < 0) {
+               error = new_timer_id;
                goto out;
        }
-       new_timer_id = error;
 
        it_id_set = IT_ID_SET;
        new_timer->it_id = (timer_t) new_timer_id;
@@ -645,7 +692,7 @@ static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags)
                return NULL;
 
        rcu_read_lock();
-       timr = idr_find(&posix_timers_id, (int)timer_id);
+       timr = posix_timer_by_id(timer_id);
        if (timr) {
                spin_lock_irqsave(&timr->it_lock, *flags);
                if (timr->it_signal == current->signal) {
index f8342a41efa60de3badbcbd2f48fa1950d0d7f99..d3617dbd3dca6b3844e0814e889026f01af0ddd4 100644 (file)
@@ -138,13 +138,14 @@ int persistent_clock_is_local;
  */
 static inline void warp_clock(void)
 {
-       struct timespec adjust;
+       if (sys_tz.tz_minuteswest != 0) {
+               struct timespec adjust;
 
-       adjust = current_kernel_time();
-       if (sys_tz.tz_minuteswest != 0)
                persistent_clock_is_local = 1;
-       adjust.tv_sec += sys_tz.tz_minuteswest * 60;
-       do_settimeofday(&adjust);
+               adjust.tv_sec = sys_tz.tz_minuteswest * 60;
+               adjust.tv_nsec = 0;
+               timekeeping_inject_offset(&adjust);
+       }
 }
 
 /*
index 072bb066bb7d1b26b4f5f613c3c2be464bc7c323..12ff13a838c652967d83212baf4742d71862720a 100644 (file)
 #include <linux/rtc.h>
 
 #include "tick-internal.h"
+#include "ntp_internal.h"
 
 /*
  * NTP timekeeping variables:
+ *
+ * Note: All of the NTP state is protected by the timekeeping locks.
  */
 
-DEFINE_RAW_SPINLOCK(ntp_lock);
-
 
 /* USER_HZ period (usecs): */
 unsigned long                  tick_usec = TICK_USEC;
@@ -53,9 +54,6 @@ static int                    time_state = TIME_OK;
 /* clock status bits:                                                  */
 static int                     time_status = STA_UNSYNC;
 
-/* TAI offset (secs):                                                  */
-static long                    time_tai;
-
 /* time adjustment (nsecs):                                            */
 static s64                     time_offset;
 
@@ -134,8 +132,6 @@ static inline void pps_reset_freq_interval(void)
 
 /**
  * pps_clear - Clears the PPS state variables
- *
- * Must be called while holding a write on the ntp_lock
  */
 static inline void pps_clear(void)
 {
@@ -150,8 +146,6 @@ static inline void pps_clear(void)
 /* Decrease pps_valid to indicate that another second has passed since
  * the last PPS signal. When it reaches 0, indicate that PPS signal is
  * missing.
- *
- * Must be called while holding a write on the ntp_lock
  */
 static inline void pps_dec_valid(void)
 {
@@ -346,10 +340,6 @@ static void ntp_update_offset(long offset)
  */
 void ntp_clear(void)
 {
-       unsigned long flags;
-
-       raw_spin_lock_irqsave(&ntp_lock, flags);
-
        time_adjust     = 0;            /* stop active adjtime() */
        time_status     |= STA_UNSYNC;
        time_maxerror   = NTP_PHASE_LIMIT;
@@ -362,20 +352,12 @@ void ntp_clear(void)
 
        /* Clear PPS state variables */
        pps_clear();
-       raw_spin_unlock_irqrestore(&ntp_lock, flags);
-
 }
 
 
 u64 ntp_tick_length(void)
 {
-       unsigned long flags;
-       s64 ret;
-
-       raw_spin_lock_irqsave(&ntp_lock, flags);
-       ret = tick_length;
-       raw_spin_unlock_irqrestore(&ntp_lock, flags);
-       return ret;
+       return tick_length;
 }
 
 
@@ -393,9 +375,6 @@ int second_overflow(unsigned long secs)
 {
        s64 delta;
        int leap = 0;
-       unsigned long flags;
-
-       raw_spin_lock_irqsave(&ntp_lock, flags);
 
        /*
         * Leap second processing. If in leap-insert state at the end of the
@@ -415,7 +394,6 @@ int second_overflow(unsigned long secs)
                else if (secs % 86400 == 0) {
                        leap = -1;
                        time_state = TIME_OOP;
-                       time_tai++;
                        printk(KERN_NOTICE
                                "Clock: inserting leap second 23:59:60 UTC\n");
                }
@@ -425,7 +403,6 @@ int second_overflow(unsigned long secs)
                        time_state = TIME_OK;
                else if ((secs + 1) % 86400 == 0) {
                        leap = 1;
-                       time_tai--;
                        time_state = TIME_WAIT;
                        printk(KERN_NOTICE
                                "Clock: deleting leap second 23:59:59 UTC\n");
@@ -479,8 +456,6 @@ int second_overflow(unsigned long secs)
        time_adjust = 0;
 
 out:
-       raw_spin_unlock_irqrestore(&ntp_lock, flags);
-
        return leap;
 }
 
@@ -575,11 +550,10 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts)
        time_status |= txc->status & ~STA_RONLY;
 }
 
-/*
- * Called with ntp_lock held, so we can access and modify
- * all the global NTP state:
- */
-static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts)
+
+static inline void process_adjtimex_modes(struct timex *txc,
+                                               struct timespec *ts,
+                                               s32 *time_tai)
 {
        if (txc->modes & ADJ_STATUS)
                process_adj_status(txc, ts);
@@ -613,7 +587,7 @@ static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts
        }
 
        if (txc->modes & ADJ_TAI && txc->constant > 0)
-               time_tai = txc->constant;
+               *time_tai = txc->constant;
 
        if (txc->modes & ADJ_OFFSET)
                ntp_update_offset(txc->offset);
@@ -625,16 +599,13 @@ static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts
                ntp_update_frequency();
 }
 
-/*
- * adjtimex mainly allows reading (and writing, if superuser) of
- * kernel time-keeping variables. used by xntpd.
+
+
+/**
+ * ntp_validate_timex - Ensures the timex is ok for use in do_adjtimex
  */
-int do_adjtimex(struct timex *txc)
+int ntp_validate_timex(struct timex *txc)
 {
-       struct timespec ts;
-       int result;
-
-       /* Validate the data before disabling interrupts */
        if (txc->modes & ADJ_ADJTIME) {
                /* singleshot must not be used with any other mode bits */
                if (!(txc->modes & ADJ_OFFSET_SINGLESHOT))
@@ -646,7 +617,6 @@ int do_adjtimex(struct timex *txc)
                /* In order to modify anything, you gotta be super-user! */
                 if (txc->modes && !capable(CAP_SYS_TIME))
                        return -EPERM;
-
                /*
                 * if the quartz is off by more than 10% then
                 * something is VERY wrong!
@@ -657,22 +627,20 @@ int do_adjtimex(struct timex *txc)
                        return -EINVAL;
        }
 
-       if (txc->modes & ADJ_SETOFFSET) {
-               struct timespec delta;
-               delta.tv_sec  = txc->time.tv_sec;
-               delta.tv_nsec = txc->time.tv_usec;
-               if (!capable(CAP_SYS_TIME))
-                       return -EPERM;
-               if (!(txc->modes & ADJ_NANO))
-                       delta.tv_nsec *= 1000;
-               result = timekeeping_inject_offset(&delta);
-               if (result)
-                       return result;
-       }
+       if ((txc->modes & ADJ_SETOFFSET) && (!capable(CAP_SYS_TIME)))
+               return -EPERM;
 
-       getnstimeofday(&ts);
+       return 0;
+}
 
-       raw_spin_lock_irq(&ntp_lock);
+
+/*
+ * adjtimex mainly allows reading (and writing, if superuser) of
+ * kernel time-keeping variables. used by xntpd.
+ */
+int __do_adjtimex(struct timex *txc, struct timespec *ts, s32 *time_tai)
+{
+       int result;
 
        if (txc->modes & ADJ_ADJTIME) {
                long save_adjust = time_adjust;
@@ -687,7 +655,7 @@ int do_adjtimex(struct timex *txc)
 
                /* If there are input parameters, then process them: */
                if (txc->modes)
-                       process_adjtimex_modes(txc, &ts);
+                       process_adjtimex_modes(txc, ts, time_tai);
 
                txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ,
                                  NTP_SCALE_SHIFT);
@@ -709,15 +677,13 @@ int do_adjtimex(struct timex *txc)
        txc->precision     = 1;
        txc->tolerance     = MAXFREQ_SCALED / PPM_SCALE;
        txc->tick          = tick_usec;
-       txc->tai           = time_tai;
+       txc->tai           = *time_tai;
 
        /* fill PPS status fields */
        pps_fill_timex(txc);
 
-       raw_spin_unlock_irq(&ntp_lock);
-
-       txc->time.tv_sec = ts.tv_sec;
-       txc->time.tv_usec = ts.tv_nsec;
+       txc->time.tv_sec = ts->tv_sec;
+       txc->time.tv_usec = ts->tv_nsec;
        if (!(time_status & STA_NANO))
                txc->time.tv_usec /= NSEC_PER_USEC;
 
@@ -894,7 +860,7 @@ static void hardpps_update_phase(long error)
 }
 
 /*
- * hardpps() - discipline CPU clock oscillator to external PPS signal
+ * __hardpps() - discipline CPU clock oscillator to external PPS signal
  *
  * This routine is called at each PPS signal arrival in order to
  * discipline the CPU clock oscillator to the PPS signal. It takes two
@@ -905,15 +871,13 @@ static void hardpps_update_phase(long error)
  * This code is based on David Mills's reference nanokernel
  * implementation. It was mostly rewritten but keeps the same idea.
  */
-void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
+void __hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
 {
        struct pps_normtime pts_norm, freq_norm;
        unsigned long flags;
 
        pts_norm = pps_normalize_ts(*phase_ts);
 
-       raw_spin_lock_irqsave(&ntp_lock, flags);
-
        /* clear the error bits, they will be set again if needed */
        time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR);
 
@@ -925,7 +889,6 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
         * just start the frequency interval */
        if (unlikely(pps_fbase.tv_sec == 0)) {
                pps_fbase = *raw_ts;
-               raw_spin_unlock_irqrestore(&ntp_lock, flags);
                return;
        }
 
@@ -940,7 +903,6 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
                time_status |= STA_PPSJITTER;
                /* restart the frequency calibration interval */
                pps_fbase = *raw_ts;
-               raw_spin_unlock_irqrestore(&ntp_lock, flags);
                pr_err("hardpps: PPSJITTER: bad pulse\n");
                return;
        }
@@ -957,10 +919,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
 
        hardpps_update_phase(pts_norm.nsec);
 
-       raw_spin_unlock_irqrestore(&ntp_lock, flags);
 }
-EXPORT_SYMBOL(hardpps);
-
 #endif /* CONFIG_NTP_PPS */
 
 static int __init ntp_tick_adj_setup(char *str)
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
new file mode 100644 (file)
index 0000000..1950cb4
--- /dev/null
@@ -0,0 +1,12 @@
+#ifndef _LINUX_NTP_INTERNAL_H
+#define _LINUX_NTP_INTERNAL_H
+
+extern void ntp_init(void);
+extern void ntp_clear(void);
+/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
+extern u64 ntp_tick_length(void);
+extern int second_overflow(unsigned long secs);
+extern int ntp_validate_timex(struct timex *);
+extern int __do_adjtimex(struct timex *, struct timespec *, s32 *);
+extern void __hardpps(const struct timespec *, const struct timespec *);
+#endif /* _LINUX_NTP_INTERNAL_H */
index 7f32fe0e52cd46489c8d90e4b85f9d74204aab16..6e23fde83dbeb21faeca1d5500003cd7e1a373e0 100644 (file)
@@ -28,9 +28,8 @@
  */
 
 static struct tick_device tick_broadcast_device;
-/* FIXME: Use cpumask_var_t. */
-static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS);
-static DECLARE_BITMAP(tmpmask, NR_CPUS);
+static cpumask_var_t tick_broadcast_mask;
+static cpumask_var_t tmpmask;
 static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
 static int tick_broadcast_force;
 
@@ -50,7 +49,7 @@ struct tick_device *tick_get_broadcast_device(void)
 
 struct cpumask *tick_get_broadcast_mask(void)
 {
-       return to_cpumask(tick_broadcast_mask);
+       return tick_broadcast_mask;
 }
 
 /*
@@ -75,8 +74,18 @@ int tick_check_broadcast_device(struct clock_event_device *dev)
 
        clockevents_exchange_device(tick_broadcast_device.evtdev, dev);
        tick_broadcast_device.evtdev = dev;
-       if (!cpumask_empty(tick_get_broadcast_mask()))
+       if (!cpumask_empty(tick_broadcast_mask))
                tick_broadcast_start_periodic(dev);
+       /*
+        * Inform all cpus about this. We might be in a situation
+        * where we did not switch to oneshot mode because the per cpu
+        * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack
+        * of a oneshot capable broadcast device. Without that
+        * notification the systems stays stuck in periodic mode
+        * forever.
+        */
+       if (dev->features & CLOCK_EVT_FEAT_ONESHOT)
+               tick_clock_notify();
        return 1;
 }
 
@@ -124,7 +133,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
        if (!tick_device_is_functional(dev)) {
                dev->event_handler = tick_handle_periodic;
                tick_device_setup_broadcast_func(dev);
-               cpumask_set_cpu(cpu, tick_get_broadcast_mask());
+               cpumask_set_cpu(cpu, tick_broadcast_mask);
                tick_broadcast_start_periodic(tick_broadcast_device.evtdev);
                ret = 1;
        } else {
@@ -135,7 +144,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
                 */
                if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) {
                        int cpu = smp_processor_id();
-                       cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
+                       cpumask_clear_cpu(cpu, tick_broadcast_mask);
                        tick_broadcast_clear_oneshot(cpu);
                } else {
                        tick_device_setup_broadcast_func(dev);
@@ -199,9 +208,8 @@ static void tick_do_periodic_broadcast(void)
 {
        raw_spin_lock(&tick_broadcast_lock);
 
-       cpumask_and(to_cpumask(tmpmask),
-                   cpu_online_mask, tick_get_broadcast_mask());
-       tick_do_broadcast(to_cpumask(tmpmask));
+       cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);
+       tick_do_broadcast(tmpmask);
 
        raw_spin_unlock(&tick_broadcast_lock);
 }
@@ -264,13 +272,12 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
        if (!tick_device_is_functional(dev))
                goto out;
 
-       bc_stopped = cpumask_empty(tick_get_broadcast_mask());
+       bc_stopped = cpumask_empty(tick_broadcast_mask);
 
        switch (*reason) {
        case CLOCK_EVT_NOTIFY_BROADCAST_ON:
        case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
-               if (!cpumask_test_cpu(cpu, tick_get_broadcast_mask())) {
-                       cpumask_set_cpu(cpu, tick_get_broadcast_mask());
+               if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
                        if (tick_broadcast_device.mode ==
                            TICKDEV_MODE_PERIODIC)
                                clockevents_shutdown(dev);
@@ -280,8 +287,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
                break;
        case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
                if (!tick_broadcast_force &&
-                   cpumask_test_cpu(cpu, tick_get_broadcast_mask())) {
-                       cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
+                   cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
                        if (tick_broadcast_device.mode ==
                            TICKDEV_MODE_PERIODIC)
                                tick_setup_periodic(dev, 0);
@@ -289,7 +295,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
                break;
        }
 
-       if (cpumask_empty(tick_get_broadcast_mask())) {
+       if (cpumask_empty(tick_broadcast_mask)) {
                if (!bc_stopped)
                        clockevents_shutdown(bc);
        } else if (bc_stopped) {
@@ -338,10 +344,10 @@ void tick_shutdown_broadcast(unsigned int *cpup)
        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 
        bc = tick_broadcast_device.evtdev;
-       cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
+       cpumask_clear_cpu(cpu, tick_broadcast_mask);
 
        if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
-               if (bc && cpumask_empty(tick_get_broadcast_mask()))
+               if (bc && cpumask_empty(tick_broadcast_mask))
                        clockevents_shutdown(bc);
        }
 
@@ -377,13 +383,13 @@ int tick_resume_broadcast(void)
 
                switch (tick_broadcast_device.mode) {
                case TICKDEV_MODE_PERIODIC:
-                       if (!cpumask_empty(tick_get_broadcast_mask()))
+                       if (!cpumask_empty(tick_broadcast_mask))
                                tick_broadcast_start_periodic(bc);
                        broadcast = cpumask_test_cpu(smp_processor_id(),
-                                                    tick_get_broadcast_mask());
+                                                    tick_broadcast_mask);
                        break;
                case TICKDEV_MODE_ONESHOT:
-                       if (!cpumask_empty(tick_get_broadcast_mask()))
+                       if (!cpumask_empty(tick_broadcast_mask))
                                broadcast = tick_resume_broadcast_oneshot(bc);
                        break;
                }
@@ -396,25 +402,58 @@ int tick_resume_broadcast(void)
 
 #ifdef CONFIG_TICK_ONESHOT
 
-/* FIXME: use cpumask_var_t. */
-static DECLARE_BITMAP(tick_broadcast_oneshot_mask, NR_CPUS);
+static cpumask_var_t tick_broadcast_oneshot_mask;
+static cpumask_var_t tick_broadcast_pending_mask;
+static cpumask_var_t tick_broadcast_force_mask;
 
 /*
  * Exposed for debugging: see timer_list.c
  */
 struct cpumask *tick_get_broadcast_oneshot_mask(void)
 {
-       return to_cpumask(tick_broadcast_oneshot_mask);
+       return tick_broadcast_oneshot_mask;
 }
 
-static int tick_broadcast_set_event(ktime_t expires, int force)
+/*
+ * Called before going idle with interrupts disabled. Checks whether a
+ * broadcast event from the other core is about to happen. We detected
+ * that in tick_broadcast_oneshot_control(). The callsite can use this
+ * to avoid a deep idle transition as we are about to get the
+ * broadcast IPI right away.
+ */
+int tick_check_broadcast_expired(void)
 {
-       struct clock_event_device *bc = tick_broadcast_device.evtdev;
+       return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
+}
+
+/*
+ * Set broadcast interrupt affinity
+ */
+static void tick_broadcast_set_affinity(struct clock_event_device *bc,
+                                       const struct cpumask *cpumask)
+{
+       if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ))
+               return;
+
+       if (cpumask_equal(bc->cpumask, cpumask))
+               return;
+
+       bc->cpumask = cpumask;
+       irq_set_affinity(bc->irq, bc->cpumask);
+}
+
+static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
+                                   ktime_t expires, int force)
+{
+       int ret;
 
        if (bc->mode != CLOCK_EVT_MODE_ONESHOT)
                clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
 
-       return clockevents_program_event(bc, expires, force);
+       ret = clockevents_program_event(bc, expires, force);
+       if (!ret)
+               tick_broadcast_set_affinity(bc, cpumask_of(cpu));
+       return ret;
 }
 
 int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
@@ -429,7 +468,7 @@ int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
  */
 void tick_check_oneshot_broadcast(int cpu)
 {
-       if (cpumask_test_cpu(cpu, to_cpumask(tick_broadcast_oneshot_mask))) {
+       if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) {
                struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
 
                clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT);
@@ -443,27 +482,39 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
 {
        struct tick_device *td;
        ktime_t now, next_event;
-       int cpu;
+       int cpu, next_cpu = 0;
 
        raw_spin_lock(&tick_broadcast_lock);
 again:
        dev->next_event.tv64 = KTIME_MAX;
        next_event.tv64 = KTIME_MAX;
-       cpumask_clear(to_cpumask(tmpmask));
+       cpumask_clear(tmpmask);
        now = ktime_get();
        /* Find all expired events */
-       for_each_cpu(cpu, tick_get_broadcast_oneshot_mask()) {
+       for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
                td = &per_cpu(tick_cpu_device, cpu);
-               if (td->evtdev->next_event.tv64 <= now.tv64)
-                       cpumask_set_cpu(cpu, to_cpumask(tmpmask));
-               else if (td->evtdev->next_event.tv64 < next_event.tv64)
+               if (td->evtdev->next_event.tv64 <= now.tv64) {
+                       cpumask_set_cpu(cpu, tmpmask);
+                       /*
+                        * Mark the remote cpu in the pending mask, so
+                        * it can avoid reprogramming the cpu local
+                        * timer in tick_broadcast_oneshot_control().
+                        */
+                       cpumask_set_cpu(cpu, tick_broadcast_pending_mask);
+               } else if (td->evtdev->next_event.tv64 < next_event.tv64) {
                        next_event.tv64 = td->evtdev->next_event.tv64;
+                       next_cpu = cpu;
+               }
        }
 
+       /* Take care of enforced broadcast requests */
+       cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask);
+       cpumask_clear(tick_broadcast_force_mask);
+
        /*
         * Wakeup the cpus which have an expired event.
         */
-       tick_do_broadcast(to_cpumask(tmpmask));
+       tick_do_broadcast(tmpmask);
 
        /*
         * Two reasons for reprogram:
@@ -480,7 +531,7 @@ again:
                 * Rearm the broadcast device. If event expired,
                 * repeat the above
                 */
-               if (tick_broadcast_set_event(next_event, 0))
+               if (tick_broadcast_set_event(dev, next_cpu, next_event, 0))
                        goto again;
        }
        raw_spin_unlock(&tick_broadcast_lock);
@@ -495,6 +546,7 @@ void tick_broadcast_oneshot_control(unsigned long reason)
        struct clock_event_device *bc, *dev;
        struct tick_device *td;
        unsigned long flags;
+       ktime_t now;
        int cpu;
 
        /*
@@ -519,21 +571,84 @@ void tick_broadcast_oneshot_control(unsigned long reason)
 
        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
        if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
-               if (!cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) {
-                       cpumask_set_cpu(cpu, tick_get_broadcast_oneshot_mask());
+               WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
+               if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
                        clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
-                       if (dev->next_event.tv64 < bc->next_event.tv64)
-                               tick_broadcast_set_event(dev->next_event, 1);
+                       /*
+                        * We only reprogram the broadcast timer if we
+                        * did not mark ourself in the force mask and
+                        * if the cpu local event is earlier than the
+                        * broadcast event. If the current CPU is in
+                        * the force mask, then we are going to be
+                        * woken by the IPI right away.
+                        */
+                       if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) &&
+                           dev->next_event.tv64 < bc->next_event.tv64)
+                               tick_broadcast_set_event(bc, cpu, dev->next_event, 1);
                }
        } else {
-               if (cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) {
-                       cpumask_clear_cpu(cpu,
-                                         tick_get_broadcast_oneshot_mask());
+               if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
                        clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
-                       if (dev->next_event.tv64 != KTIME_MAX)
-                               tick_program_event(dev->next_event, 1);
+                       if (dev->next_event.tv64 == KTIME_MAX)
+                               goto out;
+                       /*
+                        * The cpu which was handling the broadcast
+                        * timer marked this cpu in the broadcast
+                        * pending mask and fired the broadcast
+                        * IPI. So we are going to handle the expired
+                        * event anyway via the broadcast IPI
+                        * handler. No need to reprogram the timer
+                        * with an already expired event.
+                        */
+                       if (cpumask_test_and_clear_cpu(cpu,
+                                      tick_broadcast_pending_mask))
+                               goto out;
+
+                       /*
+                        * If the pending bit is not set, then we are
+                        * either the CPU handling the broadcast
+                        * interrupt or we got woken by something else.
+                        *
+                        * We are not longer in the broadcast mask, so
+                        * if the cpu local expiry time is already
+                        * reached, we would reprogram the cpu local
+                        * timer with an already expired event.
+                        *
+                        * This can lead to a ping-pong when we return
+                        * to idle and therefor rearm the broadcast
+                        * timer before the cpu local timer was able
+                        * to fire. This happens because the forced
+                        * reprogramming makes sure that the event
+                        * will happen in the future and depending on
+                        * the min_delta setting this might be far
+                        * enough out that the ping-pong starts.
+                        *
+                        * If the cpu local next_event has expired
+                        * then we know that the broadcast timer
+                        * next_event has expired as well and
+                        * broadcast is about to be handled. So we
+                        * avoid reprogramming and enforce that the
+                        * broadcast handler, which did not run yet,
+                        * will invoke the cpu local handler.
+                        *
+                        * We cannot call the handler directly from
+                        * here, because we might be in a NOHZ phase
+                        * and we did not go through the irq_enter()
+                        * nohz fixups.
+                        */
+                       now = ktime_get();
+                       if (dev->next_event.tv64 <= now.tv64) {
+                               cpumask_set_cpu(cpu, tick_broadcast_force_mask);
+                               goto out;
+                       }
+                       /*
+                        * We got woken by something else. Reprogram
+                        * the cpu local timer device.
+                        */
+                       tick_program_event(dev->next_event, 1);
                }
        }
+out:
        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
 
@@ -544,7 +659,7 @@ void tick_broadcast_oneshot_control(unsigned long reason)
  */
 static void tick_broadcast_clear_oneshot(int cpu)
 {
-       cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask());
+       cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
 }
 
 static void tick_broadcast_init_next_event(struct cpumask *mask,
@@ -582,17 +697,16 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
                 * oneshot_mask bits for those and program the
                 * broadcast device to fire.
                 */
-               cpumask_copy(to_cpumask(tmpmask), tick_get_broadcast_mask());
-               cpumask_clear_cpu(cpu, to_cpumask(tmpmask));
-               cpumask_or(tick_get_broadcast_oneshot_mask(),
-                          tick_get_broadcast_oneshot_mask(),
-                          to_cpumask(tmpmask));
+               cpumask_copy(tmpmask, tick_broadcast_mask);
+               cpumask_clear_cpu(cpu, tmpmask);
+               cpumask_or(tick_broadcast_oneshot_mask,
+                          tick_broadcast_oneshot_mask, tmpmask);
 
-               if (was_periodic && !cpumask_empty(to_cpumask(tmpmask))) {
+               if (was_periodic && !cpumask_empty(tmpmask)) {
                        clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
-                       tick_broadcast_init_next_event(to_cpumask(tmpmask),
+                       tick_broadcast_init_next_event(tmpmask,
                                                       tick_next_period);
-                       tick_broadcast_set_event(tick_next_period, 1);
+                       tick_broadcast_set_event(bc, cpu, tick_next_period, 1);
                } else
                        bc->next_event.tv64 = KTIME_MAX;
        } else {
@@ -640,7 +754,7 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
         * Clear the broadcast mask flag for the dead cpu, but do not
         * stop the broadcast device!
         */
-       cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask());
+       cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
 
        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
@@ -664,3 +778,14 @@ bool tick_broadcast_oneshot_available(void)
 }
 
 #endif
+
+void __init tick_broadcast_init(void)
+{
+       alloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
+       alloc_cpumask_var(&tmpmask, GFP_NOWAIT);
+#ifdef CONFIG_TICK_ONESHOT
+       alloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
+       alloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT);
+       alloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT);
+#endif
+}
index b1600a6973f4492d18df06483e2245443fc91201..74413e396acc7793d424e8d341c5c37dba3c7dbc 100644 (file)
@@ -416,4 +416,5 @@ static struct notifier_block tick_notifier = {
 void __init tick_init(void)
 {
        clockevents_register_notifier(&tick_notifier);
+       tick_broadcast_init();
 }
index cf3e59ed6dc0ce7cef9b990f74d0ace7a8a152a4..f0299eae46027accb94a23195c10d1bc35c19273 100644 (file)
@@ -4,6 +4,8 @@
 #include <linux/hrtimer.h>
 #include <linux/tick.h>
 
+extern seqlock_t jiffies_lock;
+
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD
 
 #define TICK_DO_TIMER_NONE     -1
@@ -94,7 +96,7 @@ extern void tick_broadcast_on_off(unsigned long reason, int *oncpu);
 extern void tick_shutdown_broadcast(unsigned int *cpup);
 extern void tick_suspend_broadcast(void);
 extern int tick_resume_broadcast(void);
-
+extern void tick_broadcast_init(void);
 extern void
 tick_set_periodic_handler(struct clock_event_device *dev, int broadcast);
 
@@ -119,6 +121,7 @@ static inline void tick_broadcast_on_off(unsigned long reason, int *oncpu) { }
 static inline void tick_shutdown_broadcast(unsigned int *cpup) { }
 static inline void tick_suspend_broadcast(void) { }
 static inline int tick_resume_broadcast(void) { return 0; }
+static inline void tick_broadcast_init(void) { }
 
 /*
  * Set the periodic handler in non broadcast mode
index a19a39952c1b24d3dd0ab0e9bd354fa9c5a7cb46..225f8bf19095a6f4e4abb5b2bc2458fb5b6baf1b 100644 (file)
@@ -482,8 +482,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
 
                if (ratelimit < 10 &&
                    (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
-                       printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
-                              (unsigned int) local_softirq_pending());
+                       pr_warn("NOHZ: local_softirq_pending %02x\n",
+                               (unsigned int) local_softirq_pending());
                        ratelimit++;
                }
                return false;
index 9a0bc98fbe1db40208b9025bbfcf51f34057f1dc..98cd470bbe4901569dad2a8ffb4a85f7945c5fca 100644 (file)
 #include <linux/stop_machine.h>
 #include <linux/pvclock_gtod.h>
 
+#include "tick-internal.h"
+#include "ntp_internal.h"
 
 static struct timekeeper timekeeper;
+static DEFINE_RAW_SPINLOCK(timekeeper_lock);
+static seqcount_t timekeeper_seq;
+static struct timekeeper shadow_timekeeper;
 
 /* flag for if timekeeping is suspended */
 int __read_mostly timekeeping_suspended;
@@ -67,6 +72,7 @@ static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec wtm)
        tk->wall_to_monotonic = wtm;
        set_normalized_timespec(&tmp, -wtm.tv_sec, -wtm.tv_nsec);
        tk->offs_real = timespec_to_ktime(tmp);
+       tk->offs_tai = ktime_sub(tk->offs_real, ktime_set(tk->tai_offset, 0));
 }
 
 static void tk_set_sleep_time(struct timekeeper *tk, struct timespec t)
@@ -96,7 +102,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 
        old_clock = tk->clock;
        tk->clock = clock;
-       clock->cycle_last = clock->read(clock);
+       tk->cycle_last = clock->cycle_last = clock->read(clock);
 
        /* Do the ns -> cycle conversion first, using original mult */
        tmp = NTP_INTERVAL_LENGTH;
@@ -201,8 +207,6 @@ static void update_pvclock_gtod(struct timekeeper *tk)
 
 /**
  * pvclock_gtod_register_notifier - register a pvclock timedata update listener
- *
- * Must hold write on timekeeper.lock
  */
 int pvclock_gtod_register_notifier(struct notifier_block *nb)
 {
@@ -210,11 +214,10 @@ int pvclock_gtod_register_notifier(struct notifier_block *nb)
        unsigned long flags;
        int ret;
 
-       write_seqlock_irqsave(&tk->lock, flags);
+       raw_spin_lock_irqsave(&timekeeper_lock, flags);
        ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb);
-       /* update timekeeping data */
        update_pvclock_gtod(tk);
-       write_sequnlock_irqrestore(&tk->lock, flags);
+       raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
        return ret;
 }
@@ -223,25 +226,22 @@ EXPORT_SYMBOL_GPL(pvclock_gtod_register_notifier);
 /**
  * pvclock_gtod_unregister_notifier - unregister a pvclock
  * timedata update listener
- *
- * Must hold write on timekeeper.lock
  */
 int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
 {
-       struct timekeeper *tk = &timekeeper;
        unsigned long flags;
        int ret;
 
-       write_seqlock_irqsave(&tk->lock, flags);
+       raw_spin_lock_irqsave(&timekeeper_lock, flags);
        ret = raw_notifier_chain_unregister(&pvclock_gtod_chain, nb);
-       write_sequnlock_irqrestore(&tk->lock, flags);
+       raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
        return ret;
 }
 EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
 
-/* must hold write on timekeeper.lock */
-static void timekeeping_update(struct timekeeper *tk, bool clearntp)
+/* must hold timekeeper_lock */
+static void timekeeping_update(struct timekeeper *tk, bool clearntp, bool mirror)
 {
        if (clearntp) {
                tk->ntp_error = 0;
@@ -249,6 +249,9 @@ static void timekeeping_update(struct timekeeper *tk, bool clearntp)
        }
        update_vsyscall(tk);
        update_pvclock_gtod(tk);
+
+       if (mirror)
+               memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper));
 }
 
 /**
@@ -267,7 +270,7 @@ static void timekeeping_forward_now(struct timekeeper *tk)
        clock = tk->clock;
        cycle_now = clock->read(clock);
        cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
-       clock->cycle_last = cycle_now;
+       tk->cycle_last = clock->cycle_last = cycle_now;
 
        tk->xtime_nsec += cycle_delta * tk->mult;
 
@@ -294,12 +297,12 @@ int __getnstimeofday(struct timespec *ts)
        s64 nsecs = 0;
 
        do {
-               seq = read_seqbegin(&tk->lock);
+               seq = read_seqcount_begin(&timekeeper_seq);
 
                ts->tv_sec = tk->xtime_sec;
                nsecs = timekeeping_get_ns(tk);
 
-       } while (read_seqretry(&tk->lock, seq));
+       } while (read_seqcount_retry(&timekeeper_seq, seq));
 
        ts->tv_nsec = 0;
        timespec_add_ns(ts, nsecs);
@@ -335,11 +338,11 @@ ktime_t ktime_get(void)
        WARN_ON(timekeeping_suspended);
 
        do {
-               seq = read_seqbegin(&tk->lock);
+               seq = read_seqcount_begin(&timekeeper_seq);
                secs = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
                nsecs = timekeeping_get_ns(tk) + tk->wall_to_monotonic.tv_nsec;
 
-       } while (read_seqretry(&tk->lock, seq));
+       } while (read_seqcount_retry(&timekeeper_seq, seq));
        /*
         * Use ktime_set/ktime_add_ns to create a proper ktime on
         * 32-bit architectures without CONFIG_KTIME_SCALAR.
@@ -366,12 +369,12 @@ void ktime_get_ts(struct timespec *ts)
        WARN_ON(timekeeping_suspended);
 
        do {
-               seq = read_seqbegin(&tk->lock);
+               seq = read_seqcount_begin(&timekeeper_seq);
                ts->tv_sec = tk->xtime_sec;
                nsec = timekeeping_get_ns(tk);
                tomono = tk->wall_to_monotonic;
 
-       } while (read_seqretry(&tk->lock, seq));
+       } while (read_seqcount_retry(&timekeeper_seq, seq));
 
        ts->tv_sec += tomono.tv_sec;
        ts->tv_nsec = 0;
@@ -379,6 +382,50 @@ void ktime_get_ts(struct timespec *ts)
 }
 EXPORT_SYMBOL_GPL(ktime_get_ts);
 
+
+/**
+ * timekeeping_clocktai - Returns the TAI time of day in a timespec
+ * @ts:                pointer to the timespec to be set
+ *
+ * Returns the time of day in a timespec.
+ */
+void timekeeping_clocktai(struct timespec *ts)
+{
+       struct timekeeper *tk = &timekeeper;
+       unsigned long seq;
+       u64 nsecs;
+
+       WARN_ON(timekeeping_suspended);
+
+       do {
+               seq = read_seqcount_begin(&timekeeper_seq);
+
+               ts->tv_sec = tk->xtime_sec + tk->tai_offset;
+               nsecs = timekeeping_get_ns(tk);
+
+       } while (read_seqcount_retry(&timekeeper_seq, seq));
+
+       ts->tv_nsec = 0;
+       timespec_add_ns(ts, nsecs);
+
+}
+EXPORT_SYMBOL(timekeeping_clocktai);
+
+
+/**
+ * ktime_get_clocktai - Returns the TAI time of day in a ktime
+ *
+ * Returns the time of day in a ktime.
+ */
+ktime_t ktime_get_clocktai(void)
+{
+       struct timespec ts;
+
+       timekeeping_clocktai(&ts);
+       return timespec_to_ktime(ts);
+}
+EXPORT_SYMBOL(ktime_get_clocktai);
+
 #ifdef CONFIG_NTP_PPS
 
 /**
@@ -399,7 +446,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
        WARN_ON_ONCE(timekeeping_suspended);
 
        do {
-               seq = read_seqbegin(&tk->lock);
+               seq = read_seqcount_begin(&timekeeper_seq);
 
                *ts_raw = tk->raw_time;
                ts_real->tv_sec = tk->xtime_sec;
@@ -408,7 +455,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
                nsecs_raw = timekeeping_get_ns_raw(tk);
                nsecs_real = timekeeping_get_ns(tk);
 
-       } while (read_seqretry(&tk->lock, seq));
+       } while (read_seqcount_retry(&timekeeper_seq, seq));
 
        timespec_add_ns(ts_raw, nsecs_raw);
        timespec_add_ns(ts_real, nsecs_real);
@@ -448,7 +495,8 @@ int do_settimeofday(const struct timespec *tv)
        if (!timespec_valid_strict(tv))
                return -EINVAL;
 
-       write_seqlock_irqsave(&tk->lock, flags);
+       raw_spin_lock_irqsave(&timekeeper_lock, flags);
+       write_seqcount_begin(&timekeeper_seq);
 
        timekeeping_forward_now(tk);
 
@@ -460,9 +508,10 @@ int do_settimeofday(const struct timespec *tv)
 
        tk_set_xtime(tk, tv);
 
-       timekeeping_update(tk, true);
+       timekeeping_update(tk, true, true);
 
-       write_sequnlock_irqrestore(&tk->lock, flags);
+       write_seqcount_end(&timekeeper_seq);
+       raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
        /* signal hrtimers about time change */
        clock_was_set();
@@ -487,7 +536,8 @@ int timekeeping_inject_offset(struct timespec *ts)
        if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
                return -EINVAL;
 
-       write_seqlock_irqsave(&tk->lock, flags);
+       raw_spin_lock_irqsave(&timekeeper_lock, flags);
+       write_seqcount_begin(&timekeeper_seq);
 
        timekeeping_forward_now(tk);
 
@@ -502,9 +552,10 @@ int timekeeping_inject_offset(struct timespec *ts)
        tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts));
 
 error: /* even if we error out, we forwarded the time, so call update */
-       timekeeping_update(tk, true);
+       timekeeping_update(tk, true, true);
 
-       write_sequnlock_irqrestore(&tk->lock, flags);
+       write_seqcount_end(&timekeeper_seq);
+       raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
        /* signal hrtimers about time change */
        clock_was_set();
@@ -513,6 +564,52 @@ error: /* even if we error out, we forwarded the time, so call update */
 }
 EXPORT_SYMBOL(timekeeping_inject_offset);
 
+
+/**
+ * timekeeping_get_tai_offset - Returns current TAI offset from UTC
+ *
+ */
+s32 timekeeping_get_tai_offset(void)
+{
+       struct timekeeper *tk = &timekeeper;
+       unsigned int seq;
+       s32 ret;
+
+       do {
+               seq = read_seqcount_begin(&timekeeper_seq);
+               ret = tk->tai_offset;
+       } while (read_seqcount_retry(&timekeeper_seq, seq));
+
+       return ret;
+}
+
+/**
+ * __timekeeping_set_tai_offset - Lock free worker function
+ *
+ */
+static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset)
+{
+       tk->tai_offset = tai_offset;
+       tk->offs_tai = ktime_sub(tk->offs_real, ktime_set(tai_offset, 0));
+}
+
+/**
+ * timekeeping_set_tai_offset - Sets the current TAI offset from UTC
+ *
+ */
+void timekeeping_set_tai_offset(s32 tai_offset)
+{
+       struct timekeeper *tk = &timekeeper;
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&timekeeper_lock, flags);
+       write_seqcount_begin(&timekeeper_seq);
+       __timekeeping_set_tai_offset(tk, tai_offset);
+       write_seqcount_end(&timekeeper_seq);
+       raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+       clock_was_set();
+}
+
 /**
  * change_clocksource - Swaps clocksources if a new one is available
  *
@@ -526,7 +623,8 @@ static int change_clocksource(void *data)
 
        new = (struct clocksource *) data;
 
-       write_seqlock_irqsave(&tk->lock, flags);
+       raw_spin_lock_irqsave(&timekeeper_lock, flags);
+       write_seqcount_begin(&timekeeper_seq);
 
        timekeeping_forward_now(tk);
        if (!new->enable || new->enable(new) == 0) {
@@ -535,9 +633,10 @@ static int change_clocksource(void *data)
                if (old->disable)
                        old->disable(old);
        }
-       timekeeping_update(tk, true);
+       timekeeping_update(tk, true, true);
 
-       write_sequnlock_irqrestore(&tk->lock, flags);
+       write_seqcount_end(&timekeeper_seq);
+       raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
        return 0;
 }
@@ -587,11 +686,11 @@ void getrawmonotonic(struct timespec *ts)
        s64 nsecs;
 
        do {
-               seq = read_seqbegin(&tk->lock);
+               seq = read_seqcount_begin(&timekeeper_seq);
                nsecs = timekeeping_get_ns_raw(tk);
                *ts = tk->raw_time;
 
-       } while (read_seqretry(&tk->lock, seq));
+       } while (read_seqcount_retry(&timekeeper_seq, seq));
 
        timespec_add_ns(ts, nsecs);
 }
@@ -607,11 +706,11 @@ int timekeeping_valid_for_hres(void)
        int ret;
 
        do {
-               seq = read_seqbegin(&tk->lock);
+               seq = read_seqcount_begin(&timekeeper_seq);
 
                ret = tk->clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
 
-       } while (read_seqretry(&tk->lock, seq));
+       } while (read_seqcount_retry(&timekeeper_seq, seq));
 
        return ret;
 }
@@ -626,11 +725,11 @@ u64 timekeeping_max_deferment(void)
        u64 ret;
 
        do {
-               seq = read_seqbegin(&tk->lock);
+               seq = read_seqcount_begin(&timekeeper_seq);
 
                ret = tk->clock->max_idle_ns;
 
-       } while (read_seqretry(&tk->lock, seq));
+       } while (read_seqcount_retry(&timekeeper_seq, seq));
 
        return ret;
 }
@@ -693,11 +792,10 @@ void __init timekeeping_init(void)
                boot.tv_nsec = 0;
        }
 
-       seqlock_init(&tk->lock);
-
+       raw_spin_lock_irqsave(&timekeeper_lock, flags);
+       write_seqcount_begin(&timekeeper_seq);
        ntp_init();
 
-       write_seqlock_irqsave(&tk->lock, flags);
        clock = clocksource_default_clock();
        if (clock->enable)
                clock->enable(clock);
@@ -716,7 +814,10 @@ void __init timekeeping_init(void)
        tmp.tv_nsec = 0;
        tk_set_sleep_time(tk, tmp);
 
-       write_sequnlock_irqrestore(&tk->lock, flags);
+       memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper));
+
+       write_seqcount_end(&timekeeper_seq);
+       raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 }
 
 /* time in seconds when suspend began */
@@ -764,15 +865,17 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
        if (has_persistent_clock())
                return;
 
-       write_seqlock_irqsave(&tk->lock, flags);
+       raw_spin_lock_irqsave(&timekeeper_lock, flags);
+       write_seqcount_begin(&timekeeper_seq);
 
        timekeeping_forward_now(tk);
 
        __timekeeping_inject_sleeptime(tk, delta);
 
-       timekeeping_update(tk, true);
+       timekeeping_update(tk, true, true);
 
-       write_sequnlock_irqrestore(&tk->lock, flags);
+       write_seqcount_end(&timekeeper_seq);
+       raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
        /* signal hrtimers about time change */
        clock_was_set();
@@ -788,26 +891,72 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
 static void timekeeping_resume(void)
 {
        struct timekeeper *tk = &timekeeper;
+       struct clocksource *clock = tk->clock;
        unsigned long flags;
-       struct timespec ts;
+       struct timespec ts_new, ts_delta;
+       cycle_t cycle_now, cycle_delta;
+       bool suspendtime_found = false;
 
-       read_persistent_clock(&ts);
+       read_persistent_clock(&ts_new);
 
        clockevents_resume();
        clocksource_resume();
 
-       write_seqlock_irqsave(&tk->lock, flags);
+       raw_spin_lock_irqsave(&timekeeper_lock, flags);
+       write_seqcount_begin(&timekeeper_seq);
+
+       /*
+        * After system resumes, we need to calculate the suspended time and
+        * compensate it for the OS time. There are 3 sources that could be
+        * used: Nonstop clocksource during suspend, persistent clock and rtc
+        * device.
+        *
+        * One specific platform may have 1 or 2 or all of them, and the
+        * preference will be:
+        *      suspend-nonstop clocksource -> persistent clock -> rtc
+        * The less preferred source will only be tried if there is no better
+        * usable source. The rtc part is handled separately in rtc core code.
+        */
+       cycle_now = clock->read(clock);
+       if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
+               cycle_now > clock->cycle_last) {
+               u64 num, max = ULLONG_MAX;
+               u32 mult = clock->mult;
+               u32 shift = clock->shift;
+               s64 nsec = 0;
+
+               cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
 
-       if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) {
-               ts = timespec_sub(ts, timekeeping_suspend_time);
-               __timekeeping_inject_sleeptime(tk, &ts);
+               /*
+                * "cycle_delta * mutl" may cause 64 bits overflow, if the
+                * suspended time is too long. In that case we need do the
+                * 64 bits math carefully
+                */
+               do_div(max, mult);
+               if (cycle_delta > max) {
+                       num = div64_u64(cycle_delta, max);
+                       nsec = (((u64) max * mult) >> shift) * num;
+                       cycle_delta -= num * max;
+               }
+               nsec += ((u64) cycle_delta * mult) >> shift;
+
+               ts_delta = ns_to_timespec(nsec);
+               suspendtime_found = true;
+       } else if (timespec_compare(&ts_new, &timekeeping_suspend_time) > 0) {
+               ts_delta = timespec_sub(ts_new, timekeeping_suspend_time);
+               suspendtime_found = true;
        }
-       /* re-base the last cycle value */
-       tk->clock->cycle_last = tk->clock->read(tk->clock);
+
+       if (suspendtime_found)
+               __timekeeping_inject_sleeptime(tk, &ts_delta);
+
+       /* Re-base the last cycle value */
+       tk->cycle_last = clock->cycle_last = cycle_now;
        tk->ntp_error = 0;
        timekeeping_suspended = 0;
-       timekeeping_update(tk, false);
-       write_sequnlock_irqrestore(&tk->lock, flags);
+       timekeeping_update(tk, false, true);
+       write_seqcount_end(&timekeeper_seq);
+       raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
        touch_softlockup_watchdog();
 
@@ -826,7 +975,8 @@ static int timekeeping_suspend(void)
 
        read_persistent_clock(&timekeeping_suspend_time);
 
-       write_seqlock_irqsave(&tk->lock, flags);
+       raw_spin_lock_irqsave(&timekeeper_lock, flags);
+       write_seqcount_begin(&timekeeper_seq);
        timekeeping_forward_now(tk);
        timekeeping_suspended = 1;
 
@@ -849,7 +999,8 @@ static int timekeeping_suspend(void)
                timekeeping_suspend_time =
                        timespec_add(timekeeping_suspend_time, delta_delta);
        }
-       write_sequnlock_irqrestore(&tk->lock, flags);
+       write_seqcount_end(&timekeeper_seq);
+       raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
        clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
        clocksource_suspend();
@@ -1099,6 +1250,8 @@ static inline void accumulate_nsecs_to_secs(struct timekeeper *tk)
                        tk_set_wall_to_mono(tk,
                                timespec_sub(tk->wall_to_monotonic, ts));
 
+                       __timekeeping_set_tai_offset(tk, tk->tai_offset - leap);
+
                        clock_was_set_delayed();
                }
        }
@@ -1116,15 +1269,16 @@ static inline void accumulate_nsecs_to_secs(struct timekeeper *tk)
 static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
                                                u32 shift)
 {
+       cycle_t interval = tk->cycle_interval << shift;
        u64 raw_nsecs;
 
        /* If the offset is smaller then a shifted interval, do nothing */
-       if (offset < tk->cycle_interval<<shift)
+       if (offset < interval)
                return offset;
 
        /* Accumulate one shifted interval */
-       offset -= tk->cycle_interval << shift;
-       tk->clock->cycle_last += tk->cycle_interval << shift;
+       offset -= interval;
+       tk->cycle_last += interval;
 
        tk->xtime_nsec += tk->xtime_interval << shift;
        accumulate_nsecs_to_secs(tk);
@@ -1181,27 +1335,28 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk)
 static void update_wall_time(void)
 {
        struct clocksource *clock;
-       struct timekeeper *tk = &timekeeper;
+       struct timekeeper *real_tk = &timekeeper;
+       struct timekeeper *tk = &shadow_timekeeper;
        cycle_t offset;
        int shift = 0, maxshift;
        unsigned long flags;
 
-       write_seqlock_irqsave(&tk->lock, flags);
+       raw_spin_lock_irqsave(&timekeeper_lock, flags);
 
        /* Make sure we're fully resumed: */
        if (unlikely(timekeeping_suspended))
                goto out;
 
-       clock = tk->clock;
+       clock = real_tk->clock;
 
 #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
-       offset = tk->cycle_interval;
+       offset = real_tk->cycle_interval;
 #else
        offset = (clock->read(clock) - clock->cycle_last) & clock->mask;
 #endif
 
        /* Check if there's really nothing to do */
-       if (offset < tk->cycle_interval)
+       if (offset < real_tk->cycle_interval)
                goto out;
 
        /*
@@ -1238,11 +1393,24 @@ static void update_wall_time(void)
         */
        accumulate_nsecs_to_secs(tk);
 
-       timekeeping_update(tk, false);
-
+       write_seqcount_begin(&timekeeper_seq);
+       /* Update clock->cycle_last with the new value */
+       clock->cycle_last = tk->cycle_last;
+       /*
+        * Update the real timekeeper.
+        *
+        * We could avoid this memcpy by switching pointers, but that
+        * requires changes to all other timekeeper usage sites as
+        * well, i.e. move the timekeeper pointer getter into the
+        * spinlocked/seqcount protected sections. And we trade this
+        * memcpy under the timekeeper_seq against one before we start
+        * updating.
+        */
+       memcpy(real_tk, tk, sizeof(*tk));
+       timekeeping_update(real_tk, false, false);
+       write_seqcount_end(&timekeeper_seq);
 out:
-       write_sequnlock_irqrestore(&tk->lock, flags);
-
+       raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 }
 
 /**
@@ -1289,13 +1457,13 @@ void get_monotonic_boottime(struct timespec *ts)
        WARN_ON(timekeeping_suspended);
 
        do {
-               seq = read_seqbegin(&tk->lock);
+               seq = read_seqcount_begin(&timekeeper_seq);
                ts->tv_sec = tk->xtime_sec;
                nsec = timekeeping_get_ns(tk);
                tomono = tk->wall_to_monotonic;
                sleep = tk->total_sleep_time;
 
-       } while (read_seqretry(&tk->lock, seq));
+       } while (read_seqcount_retry(&timekeeper_seq, seq));
 
        ts->tv_sec += tomono.tv_sec + sleep.tv_sec;
        ts->tv_nsec = 0;
@@ -1354,10 +1522,10 @@ struct timespec current_kernel_time(void)
        unsigned long seq;
 
        do {
-               seq = read_seqbegin(&tk->lock);
+               seq = read_seqcount_begin(&timekeeper_seq);
 
                now = tk_xtime(tk);
-       } while (read_seqretry(&tk->lock, seq));
+       } while (read_seqcount_retry(&timekeeper_seq, seq));
 
        return now;
 }
@@ -1370,11 +1538,11 @@ struct timespec get_monotonic_coarse(void)
        unsigned long seq;
 
        do {
-               seq = read_seqbegin(&tk->lock);
+               seq = read_seqcount_begin(&timekeeper_seq);
 
                now = tk_xtime(tk);
                mono = tk->wall_to_monotonic;
-       } while (read_seqretry(&tk->lock, seq));
+       } while (read_seqcount_retry(&timekeeper_seq, seq));
 
        set_normalized_timespec(&now, now.tv_sec + mono.tv_sec,
                                now.tv_nsec + mono.tv_nsec);
@@ -1405,11 +1573,11 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
        unsigned long seq;
 
        do {
-               seq = read_seqbegin(&tk->lock);
+               seq = read_seqcount_begin(&timekeeper_seq);
                *xtim = tk_xtime(tk);
                *wtom = tk->wall_to_monotonic;
                *sleep = tk->total_sleep_time;
-       } while (read_seqretry(&tk->lock, seq));
+       } while (read_seqcount_retry(&timekeeper_seq, seq));
 }
 
 #ifdef CONFIG_HIGH_RES_TIMERS
@@ -1421,7 +1589,8 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
  * Returns current monotonic time and updates the offsets
  * Called from hrtimer_interupt() or retrigger_next_event()
  */
-ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot)
+ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot,
+                                                       ktime_t *offs_tai)
 {
        struct timekeeper *tk = &timekeeper;
        ktime_t now;
@@ -1429,14 +1598,15 @@ ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot)
        u64 secs, nsecs;
 
        do {
-               seq = read_seqbegin(&tk->lock);
+               seq = read_seqcount_begin(&timekeeper_seq);
 
                secs = tk->xtime_sec;
                nsecs = timekeeping_get_ns(tk);
 
                *offs_real = tk->offs_real;
                *offs_boot = tk->offs_boot;
-       } while (read_seqretry(&tk->lock, seq));
+               *offs_tai = tk->offs_tai;
+       } while (read_seqcount_retry(&timekeeper_seq, seq));
 
        now = ktime_add_ns(ktime_set(secs, 0), nsecs);
        now = ktime_sub(now, *offs_real);
@@ -1454,14 +1624,78 @@ ktime_t ktime_get_monotonic_offset(void)
        struct timespec wtom;
 
        do {
-               seq = read_seqbegin(&tk->lock);
+               seq = read_seqcount_begin(&timekeeper_seq);
                wtom = tk->wall_to_monotonic;
-       } while (read_seqretry(&tk->lock, seq));
+       } while (read_seqcount_retry(&timekeeper_seq, seq));
 
        return timespec_to_ktime(wtom);
 }
 EXPORT_SYMBOL_GPL(ktime_get_monotonic_offset);
 
+/**
+ * do_adjtimex() - Accessor function to NTP __do_adjtimex function
+ */
+int do_adjtimex(struct timex *txc)
+{
+       struct timekeeper *tk = &timekeeper;
+       unsigned long flags;
+       struct timespec ts;
+       s32 orig_tai, tai;
+       int ret;
+
+       /* Validate the data before disabling interrupts */
+       ret = ntp_validate_timex(txc);
+       if (ret)
+               return ret;
+
+       if (txc->modes & ADJ_SETOFFSET) {
+               struct timespec delta;
+               delta.tv_sec  = txc->time.tv_sec;
+               delta.tv_nsec = txc->time.tv_usec;
+               if (!(txc->modes & ADJ_NANO))
+                       delta.tv_nsec *= 1000;
+               ret = timekeeping_inject_offset(&delta);
+               if (ret)
+                       return ret;
+       }
+
+       getnstimeofday(&ts);
+
+       raw_spin_lock_irqsave(&timekeeper_lock, flags);
+       write_seqcount_begin(&timekeeper_seq);
+
+       orig_tai = tai = tk->tai_offset;
+       ret = __do_adjtimex(txc, &ts, &tai);
+
+       if (tai != orig_tai) {
+               __timekeeping_set_tai_offset(tk, tai);
+               clock_was_set_delayed();
+       }
+       write_seqcount_end(&timekeeper_seq);
+       raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+
+       return ret;
+}
+
+#ifdef CONFIG_NTP_PPS
+/**
+ * hardpps() - Accessor function to NTP __hardpps function
+ */
+void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
+{
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&timekeeper_lock, flags);
+       write_seqcount_begin(&timekeeper_seq);
+
+       __hardpps(phase_ts, raw_ts);
+
+       write_seqcount_end(&timekeeper_seq);
+       raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+}
+EXPORT_SYMBOL(hardpps);
+#endif
+
 /**
  * xtime_update() - advances the timekeeping infrastructure
  * @ticks:     number of ticks, that have elapsed since the last call.
index af5a7e9f164b53b3bedbc32ac0b516c7091760db..3bdf28323012639c9cc60b43f3add4ff257cbe91 100644 (file)
 
 #include <asm/uaccess.h>
 
+
+struct timer_list_iter {
+       int cpu;
+       bool second_pass;
+       u64 now;
+};
+
 typedef void (*print_fn_t)(struct seq_file *m, unsigned int *classes);
 
 DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
@@ -133,7 +140,6 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
        struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
        int i;
 
-       SEQ_printf(m, "\n");
        SEQ_printf(m, "cpu: %d\n", cpu);
        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
                SEQ_printf(m, " clock %d:\n", i);
@@ -187,6 +193,7 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
 
 #undef P
 #undef P_ns
+       SEQ_printf(m, "\n");
 }
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
@@ -195,7 +202,6 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
 {
        struct clock_event_device *dev = td->evtdev;
 
-       SEQ_printf(m, "\n");
        SEQ_printf(m, "Tick Device: mode:     %d\n", td->mode);
        if (cpu < 0)
                SEQ_printf(m, "Broadcast device\n");
@@ -230,12 +236,11 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
        print_name_offset(m, dev->event_handler);
        SEQ_printf(m, "\n");
        SEQ_printf(m, " retries:        %lu\n", dev->retries);
+       SEQ_printf(m, "\n");
 }
 
-static void timer_list_show_tickdevices(struct seq_file *m)
+static void timer_list_show_tickdevices_header(struct seq_file *m)
 {
-       int cpu;
-
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
        print_tickdevice(m, tick_get_broadcast_device(), -1);
        SEQ_printf(m, "tick_broadcast_mask: %08lx\n",
@@ -246,47 +251,104 @@ static void timer_list_show_tickdevices(struct seq_file *m)
 #endif
        SEQ_printf(m, "\n");
 #endif
-       for_each_online_cpu(cpu)
-               print_tickdevice(m, tick_get_device(cpu), cpu);
-       SEQ_printf(m, "\n");
 }
-#else
-static void timer_list_show_tickdevices(struct seq_file *m) { }
 #endif
 
+static inline void timer_list_header(struct seq_file *m, u64 now)
+{
+       SEQ_printf(m, "Timer List Version: v0.7\n");
+       SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
+       SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
+       SEQ_printf(m, "\n");
+}
+
 static int timer_list_show(struct seq_file *m, void *v)
+{
+       struct timer_list_iter *iter = v;
+       u64 now = ktime_to_ns(ktime_get());
+
+       if (iter->cpu == -1 && !iter->second_pass)
+               timer_list_header(m, now);
+       else if (!iter->second_pass)
+               print_cpu(m, iter->cpu, iter->now);
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+       else if (iter->cpu == -1 && iter->second_pass)
+               timer_list_show_tickdevices_header(m);
+       else
+               print_tickdevice(m, tick_get_device(iter->cpu), iter->cpu);
+#endif
+       return 0;
+}
+
+void sysrq_timer_list_show(void)
 {
        u64 now = ktime_to_ns(ktime_get());
        int cpu;
 
-       SEQ_printf(m, "Timer List Version: v0.7\n");
-       SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
-       SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
+       timer_list_header(NULL, now);
 
        for_each_online_cpu(cpu)
-               print_cpu(m, cpu, now);
+               print_cpu(NULL, cpu, now);
 
-       SEQ_printf(m, "\n");
-       timer_list_show_tickdevices(m);
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+       timer_list_show_tickdevices_header(NULL);
+       for_each_online_cpu(cpu)
+               print_tickdevice(NULL, tick_get_device(cpu), cpu);
+#endif
+       return;
+}
 
-       return 0;
+static void *timer_list_start(struct seq_file *file, loff_t *offset)
+{
+       struct timer_list_iter *iter = file->private;
+
+       if (!*offset) {
+               iter->cpu = -1;
+               iter->now = ktime_to_ns(ktime_get());
+       } else if (iter->cpu >= nr_cpu_ids) {
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+               if (!iter->second_pass) {
+                       iter->cpu = -1;
+                       iter->second_pass = true;
+               } else
+                       return NULL;
+#else
+               return NULL;
+#endif
+       }
+       return iter;
 }
 
-void sysrq_timer_list_show(void)
+static void *timer_list_next(struct seq_file *file, void *v, loff_t *offset)
+{
+       struct timer_list_iter *iter = file->private;
+       iter->cpu = cpumask_next(iter->cpu, cpu_online_mask);
+       ++*offset;
+       return timer_list_start(file, offset);
+}
+
+static void timer_list_stop(struct seq_file *seq, void *v)
 {
-       timer_list_show(NULL, NULL);
 }
 
+static const struct seq_operations timer_list_sops = {
+       .start = timer_list_start,
+       .next = timer_list_next,
+       .stop = timer_list_stop,
+       .show = timer_list_show,
+};
+
 static int timer_list_open(struct inode *inode, struct file *filp)
 {
-       return single_open(filp, timer_list_show, NULL);
+       return seq_open_private(filp, &timer_list_sops,
+                       sizeof(struct timer_list_iter));
 }
 
 static const struct file_operations timer_list_fops = {
        .open           = timer_list_open,
        .read           = seq_read,
        .llseek         = seq_lseek,
-       .release        = single_release,
+       .release        = seq_release_private,
 };
 
 static int __init init_timer_list_procfs(void)