2 * Implement CPU time clocks for the POSIX clock interface.
5 #include <linux/sched.h>
6 #include <linux/posix-timers.h>
7 #include <linux/errno.h>
8 #include <linux/math64.h>
9 #include <asm/uaccess.h>
10 #include <linux/kernel_stat.h>
11 #include <trace/events/timer.h>
12 #include <linux/random.h>
15 * Called after updating RLIMIT_CPU to run cpu timer and update
16 * tsk->signal->cputime_expires expiration cache if necessary. Needs
17 * siglock protection since other code may update expiration cache as
20 void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new)
22 cputime_t cputime = secs_to_cputime(rlim_new);
24 spin_lock_irq(&task->sighand->siglock);
25 set_process_cpu_timer(task, CPUCLOCK_PROF, &cputime, NULL);
26 spin_unlock_irq(&task->sighand->siglock);
29 static int check_clock(const clockid_t which_clock)
32 struct task_struct *p;
33 const pid_t pid = CPUCLOCK_PID(which_clock);
35 if (CPUCLOCK_WHICH(which_clock) >= CPUCLOCK_MAX)
42 p = find_task_by_vpid(pid);
43 if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ?
44 same_thread_group(p, current) : has_group_leader_pid(p))) {
52 static inline union cpu_time_count
53 timespec_to_sample(const clockid_t which_clock, const struct timespec *tp)
55 union cpu_time_count ret;
56 ret.sched = 0; /* high half always zero when .cpu used */
57 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
58 ret.sched = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec;
60 ret.cpu = timespec_to_cputime(tp);
65 static void sample_to_timespec(const clockid_t which_clock,
66 union cpu_time_count cpu,
69 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED)
70 *tp = ns_to_timespec(cpu.sched);
72 cputime_to_timespec(cpu.cpu, tp);
75 static inline int cpu_time_before(const clockid_t which_clock,
76 union cpu_time_count now,
77 union cpu_time_count then)
79 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
80 return now.sched < then.sched;
82 return now.cpu < then.cpu;
85 static inline void cpu_time_add(const clockid_t which_clock,
86 union cpu_time_count *acc,
87 union cpu_time_count val)
89 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
90 acc->sched += val.sched;
95 static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock,
96 union cpu_time_count a,
97 union cpu_time_count b)
99 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
108 * Update expiry time from increment, and increase overrun count,
109 * given the current clock sample.
111 static void bump_cpu_timer(struct k_itimer *timer,
112 union cpu_time_count now)
116 if (timer->it.cpu.incr.sched == 0)
119 if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) {
120 unsigned long long delta, incr;
122 if (now.sched < timer->it.cpu.expires.sched)
124 incr = timer->it.cpu.incr.sched;
125 delta = now.sched + incr - timer->it.cpu.expires.sched;
126 /* Don't use (incr*2 < delta), incr*2 might overflow. */
127 for (i = 0; incr < delta - incr; i++)
129 for (; i >= 0; incr >>= 1, i--) {
132 timer->it.cpu.expires.sched += incr;
133 timer->it_overrun += 1 << i;
137 cputime_t delta, incr;
139 if (now.cpu < timer->it.cpu.expires.cpu)
141 incr = timer->it.cpu.incr.cpu;
142 delta = now.cpu + incr - timer->it.cpu.expires.cpu;
143 /* Don't use (incr*2 < delta), incr*2 might overflow. */
144 for (i = 0; incr < delta - incr; i++)
146 for (; i >= 0; incr = incr >> 1, i--) {
149 timer->it.cpu.expires.cpu += incr;
150 timer->it_overrun += 1 << i;
156 static inline cputime_t prof_ticks(struct task_struct *p)
158 return p->utime + p->stime;
160 static inline cputime_t virt_ticks(struct task_struct *p)
166 posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
168 int error = check_clock(which_clock);
171 tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ);
172 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
174 * If sched_clock is using a cycle counter, we
175 * don't have any idea of its true resolution
176 * exported, but it is much more than 1s/HZ.
185 posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
188 * You can never reset a CPU clock, but we check for other errors
189 * in the call before failing with EPERM.
191 int error = check_clock(which_clock);
200 * Sample a per-thread clock for the given task.
202 static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
203 union cpu_time_count *cpu)
205 switch (CPUCLOCK_WHICH(which_clock)) {
209 cpu->cpu = prof_ticks(p);
212 cpu->cpu = virt_ticks(p);
215 cpu->sched = task_sched_runtime(p);
221 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
223 struct signal_struct *sig = tsk->signal;
224 struct task_struct *t;
226 times->utime = sig->utime;
227 times->stime = sig->stime;
228 times->sum_exec_runtime = sig->sum_sched_runtime;
231 /* make sure we can trust tsk->thread_group list */
232 if (!likely(pid_alive(tsk)))
237 times->utime += t->utime;
238 times->stime += t->stime;
239 times->sum_exec_runtime += task_sched_runtime(t);
240 } while_each_thread(tsk, t);
245 static void update_gt_cputime(struct task_cputime *a, struct task_cputime *b)
247 if (b->utime > a->utime)
250 if (b->stime > a->stime)
253 if (b->sum_exec_runtime > a->sum_exec_runtime)
254 a->sum_exec_runtime = b->sum_exec_runtime;
257 void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
259 struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
260 struct task_cputime sum;
263 if (!cputimer->running) {
265 * The POSIX timer interface allows for absolute time expiry
266 * values through the TIMER_ABSTIME flag, therefore we have
267 * to synchronize the timer to the clock every time we start
270 thread_group_cputime(tsk, &sum);
271 raw_spin_lock_irqsave(&cputimer->lock, flags);
272 cputimer->running = 1;
273 update_gt_cputime(&cputimer->cputime, &sum);
275 raw_spin_lock_irqsave(&cputimer->lock, flags);
276 *times = cputimer->cputime;
277 raw_spin_unlock_irqrestore(&cputimer->lock, flags);
281 * Sample a process (thread group) clock for the given group_leader task.
282 * Must be called with tasklist_lock held for reading.
284 static int cpu_clock_sample_group(const clockid_t which_clock,
285 struct task_struct *p,
286 union cpu_time_count *cpu)
288 struct task_cputime cputime;
290 switch (CPUCLOCK_WHICH(which_clock)) {
294 thread_group_cputime(p, &cputime);
295 cpu->cpu = cputime.utime + cputime.stime;
298 thread_group_cputime(p, &cputime);
299 cpu->cpu = cputime.utime;
302 thread_group_cputime(p, &cputime);
303 cpu->sched = cputime.sum_exec_runtime;
310 static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
312 const pid_t pid = CPUCLOCK_PID(which_clock);
314 union cpu_time_count rtn;
318 * Special case constant value for our own clocks.
319 * We don't have to do any lookup to find ourselves.
321 if (CPUCLOCK_PERTHREAD(which_clock)) {
323 * Sampling just ourselves we can do with no locking.
325 error = cpu_clock_sample(which_clock,
328 read_lock(&tasklist_lock);
329 error = cpu_clock_sample_group(which_clock,
331 read_unlock(&tasklist_lock);
335 * Find the given PID, and validate that the caller
336 * should be able to see it.
338 struct task_struct *p;
340 p = find_task_by_vpid(pid);
342 if (CPUCLOCK_PERTHREAD(which_clock)) {
343 if (same_thread_group(p, current)) {
344 error = cpu_clock_sample(which_clock,
348 read_lock(&tasklist_lock);
349 if (thread_group_leader(p) && p->sighand) {
351 cpu_clock_sample_group(which_clock,
354 read_unlock(&tasklist_lock);
362 sample_to_timespec(which_clock, rtn, tp);
368 * Validate the clockid_t for a new CPU-clock timer, and initialize the timer.
369 * This is called from sys_timer_create() and do_cpu_nanosleep() with the
370 * new timer already all-zeros initialized.
372 static int posix_cpu_timer_create(struct k_itimer *new_timer)
375 const pid_t pid = CPUCLOCK_PID(new_timer->it_clock);
376 struct task_struct *p;
378 if (CPUCLOCK_WHICH(new_timer->it_clock) >= CPUCLOCK_MAX)
381 INIT_LIST_HEAD(&new_timer->it.cpu.entry);
384 if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) {
388 p = find_task_by_vpid(pid);
389 if (p && !same_thread_group(p, current))
394 p = current->group_leader;
396 p = find_task_by_vpid(pid);
397 if (p && !has_group_leader_pid(p))
401 new_timer->it.cpu.task = p;
413 * Clean up a CPU-clock timer that is about to be destroyed.
414 * This is called from timer deletion with the timer already locked.
415 * If we return TIMER_RETRY, it's necessary to release the timer's lock
416 * and try again. (This happens when the timer is in the middle of firing.)
418 static int posix_cpu_timer_del(struct k_itimer *timer)
420 struct task_struct *p = timer->it.cpu.task;
423 if (likely(p != NULL)) {
424 read_lock(&tasklist_lock);
425 if (unlikely(p->sighand == NULL)) {
427 * We raced with the reaping of the task.
428 * The deletion should have cleared us off the list.
430 BUG_ON(!list_empty(&timer->it.cpu.entry));
432 spin_lock(&p->sighand->siglock);
433 if (timer->it.cpu.firing)
436 list_del(&timer->it.cpu.entry);
437 spin_unlock(&p->sighand->siglock);
439 read_unlock(&tasklist_lock);
449 * Clean out CPU timers still ticking when a thread exited. The task
450 * pointer is cleared, and the expiry time is replaced with the residual
451 * time for later timer_gettime calls to return.
452 * This must be called with the siglock held.
454 static void cleanup_timers(struct list_head *head,
455 cputime_t utime, cputime_t stime,
456 unsigned long long sum_exec_runtime)
458 struct cpu_timer_list *timer, *next;
459 cputime_t ptime = utime + stime;
461 list_for_each_entry_safe(timer, next, head, entry) {
462 list_del_init(&timer->entry);
463 if (timer->expires.cpu < ptime) {
464 timer->expires.cpu = 0;
466 timer->expires.cpu -= ptime;
471 list_for_each_entry_safe(timer, next, head, entry) {
472 list_del_init(&timer->entry);
473 if (timer->expires.cpu < utime) {
474 timer->expires.cpu = 0;
476 timer->expires.cpu -= utime;
481 list_for_each_entry_safe(timer, next, head, entry) {
482 list_del_init(&timer->entry);
483 if (timer->expires.sched < sum_exec_runtime) {
484 timer->expires.sched = 0;
486 timer->expires.sched -= sum_exec_runtime;
492 * These are both called with the siglock held, when the current thread
493 * is being reaped. When the final (leader) thread in the group is reaped,
494 * posix_cpu_timers_exit_group will be called after posix_cpu_timers_exit.
496 void posix_cpu_timers_exit(struct task_struct *tsk)
498 add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
499 sizeof(unsigned long long));
500 cleanup_timers(tsk->cpu_timers,
501 tsk->utime, tsk->stime, tsk->se.sum_exec_runtime);
504 void posix_cpu_timers_exit_group(struct task_struct *tsk)
506 struct signal_struct *const sig = tsk->signal;
508 cleanup_timers(tsk->signal->cpu_timers,
509 tsk->utime + sig->utime, tsk->stime + sig->stime,
510 tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
513 static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
516 * That's all for this thread or process.
517 * We leave our residual in expires to be reported.
519 put_task_struct(timer->it.cpu.task);
520 timer->it.cpu.task = NULL;
521 timer->it.cpu.expires = cpu_time_sub(timer->it_clock,
522 timer->it.cpu.expires,
526 static inline int expires_gt(cputime_t expires, cputime_t new_exp)
528 return expires == 0 || expires > new_exp;
532 * Insert the timer on the appropriate list before any timers that
533 * expire later. This must be called with the tasklist_lock held
534 * for reading, interrupts disabled and p->sighand->siglock taken.
536 static void arm_timer(struct k_itimer *timer)
538 struct task_struct *p = timer->it.cpu.task;
539 struct list_head *head, *listpos;
540 struct task_cputime *cputime_expires;
541 struct cpu_timer_list *const nt = &timer->it.cpu;
542 struct cpu_timer_list *next;
544 if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
545 head = p->cpu_timers;
546 cputime_expires = &p->cputime_expires;
548 head = p->signal->cpu_timers;
549 cputime_expires = &p->signal->cputime_expires;
551 head += CPUCLOCK_WHICH(timer->it_clock);
554 list_for_each_entry(next, head, entry) {
555 if (cpu_time_before(timer->it_clock, nt->expires, next->expires))
557 listpos = &next->entry;
559 list_add(&nt->entry, listpos);
561 if (listpos == head) {
562 union cpu_time_count *exp = &nt->expires;
565 * We are the new earliest-expiring POSIX 1.b timer, hence
566 * need to update expiration cache. Take into account that
567 * for process timers we share expiration cache with itimers
568 * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.
571 switch (CPUCLOCK_WHICH(timer->it_clock)) {
573 if (expires_gt(cputime_expires->prof_exp, exp->cpu))
574 cputime_expires->prof_exp = exp->cpu;
577 if (expires_gt(cputime_expires->virt_exp, exp->cpu))
578 cputime_expires->virt_exp = exp->cpu;
581 if (cputime_expires->sched_exp == 0 ||
582 cputime_expires->sched_exp > exp->sched)
583 cputime_expires->sched_exp = exp->sched;
590 * The timer is locked, fire it and arrange for its reload.
592 static void cpu_timer_fire(struct k_itimer *timer)
594 if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
596 * User don't want any signal.
598 timer->it.cpu.expires.sched = 0;
599 } else if (unlikely(timer->sigq == NULL)) {
601 * This a special case for clock_nanosleep,
602 * not a normal timer from sys_timer_create.
604 wake_up_process(timer->it_process);
605 timer->it.cpu.expires.sched = 0;
606 } else if (timer->it.cpu.incr.sched == 0) {
608 * One-shot timer. Clear it as soon as it's fired.
610 posix_timer_event(timer, 0);
611 timer->it.cpu.expires.sched = 0;
612 } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) {
614 * The signal did not get queued because the signal
615 * was ignored, so we won't get any callback to
616 * reload the timer. But we need to keep it
617 * ticking in case the signal is deliverable next time.
619 posix_cpu_timer_schedule(timer);
624 * Sample a process (thread group) timer for the given group_leader task.
625 * Must be called with tasklist_lock held for reading.
627 static int cpu_timer_sample_group(const clockid_t which_clock,
628 struct task_struct *p,
629 union cpu_time_count *cpu)
631 struct task_cputime cputime;
633 thread_group_cputimer(p, &cputime);
634 switch (CPUCLOCK_WHICH(which_clock)) {
638 cpu->cpu = cputime.utime + cputime.stime;
641 cpu->cpu = cputime.utime;
644 cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
651 * Guts of sys_timer_settime for CPU timers.
652 * This is called with the timer locked and interrupts disabled.
653 * If we return TIMER_RETRY, it's necessary to release the timer's lock
654 * and try again. (This happens when the timer is in the middle of firing.)
656 static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
657 struct itimerspec *new, struct itimerspec *old)
659 struct task_struct *p = timer->it.cpu.task;
660 union cpu_time_count old_expires, new_expires, old_incr, val;
663 if (unlikely(p == NULL)) {
665 * Timer refers to a dead task's clock.
670 new_expires = timespec_to_sample(timer->it_clock, &new->it_value);
672 read_lock(&tasklist_lock);
674 * We need the tasklist_lock to protect against reaping that
675 * clears p->sighand. If p has just been reaped, we can no
676 * longer get any information about it at all.
678 if (unlikely(p->sighand == NULL)) {
679 read_unlock(&tasklist_lock);
681 timer->it.cpu.task = NULL;
686 * Disarm any old timer after extracting its expiry time.
688 BUG_ON(!irqs_disabled());
691 old_incr = timer->it.cpu.incr;
692 spin_lock(&p->sighand->siglock);
693 old_expires = timer->it.cpu.expires;
694 if (unlikely(timer->it.cpu.firing)) {
695 timer->it.cpu.firing = -1;
698 list_del_init(&timer->it.cpu.entry);
701 * We need to sample the current value to convert the new
702 * value from to relative and absolute, and to convert the
703 * old value from absolute to relative. To set a process
704 * timer, we need a sample to balance the thread expiry
705 * times (in arm_timer). With an absolute time, we must
706 * check if it's already passed. In short, we need a sample.
708 if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
709 cpu_clock_sample(timer->it_clock, p, &val);
711 cpu_timer_sample_group(timer->it_clock, p, &val);
715 if (old_expires.sched == 0) {
716 old->it_value.tv_sec = 0;
717 old->it_value.tv_nsec = 0;
720 * Update the timer in case it has
721 * overrun already. If it has,
722 * we'll report it as having overrun
723 * and with the next reloaded timer
724 * already ticking, though we are
725 * swallowing that pending
726 * notification here to install the
729 bump_cpu_timer(timer, val);
730 if (cpu_time_before(timer->it_clock, val,
731 timer->it.cpu.expires)) {
732 old_expires = cpu_time_sub(
734 timer->it.cpu.expires, val);
735 sample_to_timespec(timer->it_clock,
739 old->it_value.tv_nsec = 1;
740 old->it_value.tv_sec = 0;
747 * We are colliding with the timer actually firing.
748 * Punt after filling in the timer's old value, and
749 * disable this firing since we are already reporting
750 * it as an overrun (thanks to bump_cpu_timer above).
752 spin_unlock(&p->sighand->siglock);
753 read_unlock(&tasklist_lock);
757 if (new_expires.sched != 0 && !(flags & TIMER_ABSTIME)) {
758 cpu_time_add(timer->it_clock, &new_expires, val);
762 * Install the new expiry time (or zero).
763 * For a timer with no notification action, we don't actually
764 * arm the timer (we'll just fake it for timer_gettime).
766 timer->it.cpu.expires = new_expires;
767 if (new_expires.sched != 0 &&
768 cpu_time_before(timer->it_clock, val, new_expires)) {
772 spin_unlock(&p->sighand->siglock);
773 read_unlock(&tasklist_lock);
776 * Install the new reload setting, and
777 * set up the signal and overrun bookkeeping.
779 timer->it.cpu.incr = timespec_to_sample(timer->it_clock,
783 * This acts as a modification timestamp for the timer,
784 * so any automatic reload attempt will punt on seeing
785 * that we have reset the timer manually.
787 timer->it_requeue_pending = (timer->it_requeue_pending + 2) &
789 timer->it_overrun_last = 0;
790 timer->it_overrun = -1;
792 if (new_expires.sched != 0 &&
793 !cpu_time_before(timer->it_clock, val, new_expires)) {
795 * The designated time already passed, so we notify
796 * immediately, even if the thread never runs to
797 * accumulate more time on this clock.
799 cpu_timer_fire(timer);
805 sample_to_timespec(timer->it_clock,
806 old_incr, &old->it_interval);
811 static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
813 union cpu_time_count now;
814 struct task_struct *p = timer->it.cpu.task;
818 * Easy part: convert the reload time.
820 sample_to_timespec(timer->it_clock,
821 timer->it.cpu.incr, &itp->it_interval);
823 if (timer->it.cpu.expires.sched == 0) { /* Timer not armed at all. */
824 itp->it_value.tv_sec = itp->it_value.tv_nsec = 0;
828 if (unlikely(p == NULL)) {
830 * This task already died and the timer will never fire.
831 * In this case, expires is actually the dead value.
834 sample_to_timespec(timer->it_clock, timer->it.cpu.expires,
840 * Sample the clock to take the difference with the expiry time.
842 if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
843 cpu_clock_sample(timer->it_clock, p, &now);
844 clear_dead = p->exit_state;
846 read_lock(&tasklist_lock);
847 if (unlikely(p->sighand == NULL)) {
849 * The process has been reaped.
850 * We can't even collect a sample any more.
851 * Call the timer disarmed, nothing else to do.
854 timer->it.cpu.task = NULL;
855 timer->it.cpu.expires.sched = 0;
856 read_unlock(&tasklist_lock);
859 cpu_timer_sample_group(timer->it_clock, p, &now);
860 clear_dead = (unlikely(p->exit_state) &&
861 thread_group_empty(p));
863 read_unlock(&tasklist_lock);
866 if (unlikely(clear_dead)) {
868 * We've noticed that the thread is dead, but
869 * not yet reaped. Take this opportunity to
872 clear_dead_task(timer, now);
876 if (cpu_time_before(timer->it_clock, now, timer->it.cpu.expires)) {
877 sample_to_timespec(timer->it_clock,
878 cpu_time_sub(timer->it_clock,
879 timer->it.cpu.expires, now),
883 * The timer should have expired already, but the firing
884 * hasn't taken place yet. Say it's just about to expire.
886 itp->it_value.tv_nsec = 1;
887 itp->it_value.tv_sec = 0;
892 * Check for any per-thread CPU timers that have fired and move them off
893 * the tsk->cpu_timers[N] list onto the firing list. Here we update the
894 * tsk->it_*_expires values to reflect the remaining thread CPU timers.
896 static void check_thread_timers(struct task_struct *tsk,
897 struct list_head *firing)
900 struct list_head *timers = tsk->cpu_timers;
901 struct signal_struct *const sig = tsk->signal;
905 tsk->cputime_expires.prof_exp = 0;
906 while (!list_empty(timers)) {
907 struct cpu_timer_list *t = list_first_entry(timers,
908 struct cpu_timer_list,
910 if (!--maxfire || prof_ticks(tsk) < t->expires.cpu) {
911 tsk->cputime_expires.prof_exp = t->expires.cpu;
915 list_move_tail(&t->entry, firing);
920 tsk->cputime_expires.virt_exp = 0;
921 while (!list_empty(timers)) {
922 struct cpu_timer_list *t = list_first_entry(timers,
923 struct cpu_timer_list,
925 if (!--maxfire || virt_ticks(tsk) < t->expires.cpu) {
926 tsk->cputime_expires.virt_exp = t->expires.cpu;
930 list_move_tail(&t->entry, firing);
935 tsk->cputime_expires.sched_exp = 0;
936 while (!list_empty(timers)) {
937 struct cpu_timer_list *t = list_first_entry(timers,
938 struct cpu_timer_list,
940 if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) {
941 tsk->cputime_expires.sched_exp = t->expires.sched;
945 list_move_tail(&t->entry, firing);
949 * Check for the special case thread timers.
951 soft = ACCESS_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_cur);
952 if (soft != RLIM_INFINITY) {
954 ACCESS_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_max);
956 if (hard != RLIM_INFINITY &&
957 tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
959 * At the hard limit, we just die.
960 * No need to calculate anything else now.
962 __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
965 if (tsk->rt.timeout > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) {
967 * At the soft limit, send a SIGXCPU every second.
970 soft += USEC_PER_SEC;
971 sig->rlim[RLIMIT_RTTIME].rlim_cur = soft;
974 "RT Watchdog Timeout: %s[%d]\n",
975 tsk->comm, task_pid_nr(tsk));
976 __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
981 static void stop_process_timers(struct signal_struct *sig)
983 struct thread_group_cputimer *cputimer = &sig->cputimer;
986 raw_spin_lock_irqsave(&cputimer->lock, flags);
987 cputimer->running = 0;
988 raw_spin_unlock_irqrestore(&cputimer->lock, flags);
991 static u32 onecputick;
993 static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
994 cputime_t *expires, cputime_t cur_time, int signo)
999 if (cur_time >= it->expires) {
1001 it->expires += it->incr;
1002 it->error += it->incr_error;
1003 if (it->error >= onecputick) {
1004 it->expires -= cputime_one_jiffy;
1005 it->error -= onecputick;
1011 trace_itimer_expire(signo == SIGPROF ?
1012 ITIMER_PROF : ITIMER_VIRTUAL,
1013 tsk->signal->leader_pid, cur_time);
1014 __group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
1017 if (it->expires && (!*expires || it->expires < *expires)) {
1018 *expires = it->expires;
1023 * task_cputime_zero - Check a task_cputime struct for all zero fields.
1025 * @cputime: The struct to compare.
1027 * Checks @cputime to see if all fields are zero. Returns true if all fields
1028 * are zero, false if any field is nonzero.
1030 static inline int task_cputime_zero(const struct task_cputime *cputime)
1032 if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime)
1038 * Check for any per-thread CPU timers that have fired and move them
1039 * off the tsk->*_timers list onto the firing list. Per-thread timers
1040 * have already been taken off.
1042 static void check_process_timers(struct task_struct *tsk,
1043 struct list_head *firing)
1046 struct signal_struct *const sig = tsk->signal;
1047 cputime_t utime, ptime, virt_expires, prof_expires;
1048 unsigned long long sum_sched_runtime, sched_expires;
1049 struct list_head *timers = sig->cpu_timers;
1050 struct task_cputime cputime;
1054 * Collect the current process totals.
1056 thread_group_cputimer(tsk, &cputime);
1057 utime = cputime.utime;
1058 ptime = utime + cputime.stime;
1059 sum_sched_runtime = cputime.sum_exec_runtime;
1062 while (!list_empty(timers)) {
1063 struct cpu_timer_list *tl = list_first_entry(timers,
1064 struct cpu_timer_list,
1066 if (!--maxfire || ptime < tl->expires.cpu) {
1067 prof_expires = tl->expires.cpu;
1071 list_move_tail(&tl->entry, firing);
1077 while (!list_empty(timers)) {
1078 struct cpu_timer_list *tl = list_first_entry(timers,
1079 struct cpu_timer_list,
1081 if (!--maxfire || utime < tl->expires.cpu) {
1082 virt_expires = tl->expires.cpu;
1086 list_move_tail(&tl->entry, firing);
1092 while (!list_empty(timers)) {
1093 struct cpu_timer_list *tl = list_first_entry(timers,
1094 struct cpu_timer_list,
1096 if (!--maxfire || sum_sched_runtime < tl->expires.sched) {
1097 sched_expires = tl->expires.sched;
1101 list_move_tail(&tl->entry, firing);
1105 * Check for the special case process timers.
1107 check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF], &prof_expires, ptime,
1109 check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &virt_expires, utime,
1111 soft = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
1112 if (soft != RLIM_INFINITY) {
1113 unsigned long psecs = cputime_to_secs(ptime);
1114 unsigned long hard =
1115 ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_max);
1117 if (psecs >= hard) {
1119 * At the hard limit, we just die.
1120 * No need to calculate anything else now.
1122 __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
1125 if (psecs >= soft) {
1127 * At the soft limit, send a SIGXCPU every second.
1129 __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
1132 sig->rlim[RLIMIT_CPU].rlim_cur = soft;
1135 x = secs_to_cputime(soft);
1136 if (!prof_expires || x < prof_expires) {
1141 sig->cputime_expires.prof_exp = prof_expires;
1142 sig->cputime_expires.virt_exp = virt_expires;
1143 sig->cputime_expires.sched_exp = sched_expires;
1144 if (task_cputime_zero(&sig->cputime_expires))
1145 stop_process_timers(sig);
1149 * This is called from the signal code (via do_schedule_next_timer)
1150 * when the last timer signal was delivered and we have to reload the timer.
1152 void posix_cpu_timer_schedule(struct k_itimer *timer)
1154 struct task_struct *p = timer->it.cpu.task;
1155 union cpu_time_count now;
1157 if (unlikely(p == NULL))
1159 * The task was cleaned up already, no future firings.
1164 * Fetch the current sample and update the timer's expiry time.
1166 if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
1167 cpu_clock_sample(timer->it_clock, p, &now);
1168 bump_cpu_timer(timer, now);
1169 if (unlikely(p->exit_state)) {
1170 clear_dead_task(timer, now);
1173 read_lock(&tasklist_lock); /* arm_timer needs it. */
1174 spin_lock(&p->sighand->siglock);
1176 read_lock(&tasklist_lock);
1177 if (unlikely(p->sighand == NULL)) {
1179 * The process has been reaped.
1180 * We can't even collect a sample any more.
1183 timer->it.cpu.task = p = NULL;
1184 timer->it.cpu.expires.sched = 0;
1186 } else if (unlikely(p->exit_state) && thread_group_empty(p)) {
1188 * We've noticed that the thread is dead, but
1189 * not yet reaped. Take this opportunity to
1190 * drop our task ref.
1192 clear_dead_task(timer, now);
1195 spin_lock(&p->sighand->siglock);
1196 cpu_timer_sample_group(timer->it_clock, p, &now);
1197 bump_cpu_timer(timer, now);
1198 /* Leave the tasklist_lock locked for the call below. */
1202 * Now re-arm for the new expiry time.
1204 BUG_ON(!irqs_disabled());
1206 spin_unlock(&p->sighand->siglock);
1209 read_unlock(&tasklist_lock);
1212 timer->it_overrun_last = timer->it_overrun;
1213 timer->it_overrun = -1;
1214 ++timer->it_requeue_pending;
1218 * task_cputime_expired - Compare two task_cputime entities.
1220 * @sample: The task_cputime structure to be checked for expiration.
1221 * @expires: Expiration times, against which @sample will be checked.
1223 * Checks @sample against @expires to see if any field of @sample has expired.
1224 * Returns true if any field of the former is greater than the corresponding
1225 * field of the latter if the latter field is set. Otherwise returns false.
1227 static inline int task_cputime_expired(const struct task_cputime *sample,
1228 const struct task_cputime *expires)
1230 if (expires->utime && sample->utime >= expires->utime)
1232 if (expires->stime && sample->utime + sample->stime >= expires->stime)
1234 if (expires->sum_exec_runtime != 0 &&
1235 sample->sum_exec_runtime >= expires->sum_exec_runtime)
1241 * fastpath_timer_check - POSIX CPU timers fast path.
1243 * @tsk: The task (thread) being checked.
1245 * Check the task and thread group timers. If both are zero (there are no
1246 * timers set) return false. Otherwise snapshot the task and thread group
1247 * timers and compare them with the corresponding expiration times. Return
1248 * true if a timer has expired, else return false.
1250 static inline int fastpath_timer_check(struct task_struct *tsk)
1252 struct signal_struct *sig;
1254 if (!task_cputime_zero(&tsk->cputime_expires)) {
1255 struct task_cputime task_sample = {
1256 .utime = tsk->utime,
1257 .stime = tsk->stime,
1258 .sum_exec_runtime = tsk->se.sum_exec_runtime
1261 if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
1266 if (sig->cputimer.running) {
1267 struct task_cputime group_sample;
1269 raw_spin_lock(&sig->cputimer.lock);
1270 group_sample = sig->cputimer.cputime;
1271 raw_spin_unlock(&sig->cputimer.lock);
1273 if (task_cputime_expired(&group_sample, &sig->cputime_expires))
1281 * This is called from the timer interrupt handler. The irq handler has
1282 * already updated our counts. We need to check if any timers fire now.
1283 * Interrupts are disabled.
1285 void run_posix_cpu_timers(struct task_struct *tsk)
1288 struct k_itimer *timer, *next;
1289 unsigned long flags;
1291 BUG_ON(!irqs_disabled());
1294 * The fast path checks that there are no expired thread or thread
1295 * group timers. If that's so, just return.
1297 if (!fastpath_timer_check(tsk))
1300 if (!lock_task_sighand(tsk, &flags))
1303 * Here we take off tsk->signal->cpu_timers[N] and
1304 * tsk->cpu_timers[N] all the timers that are firing, and
1305 * put them on the firing list.
1307 check_thread_timers(tsk, &firing);
1309 * If there are any active process wide timers (POSIX 1.b, itimers,
1310 * RLIMIT_CPU) cputimer must be running.
1312 if (tsk->signal->cputimer.running)
1313 check_process_timers(tsk, &firing);
1316 * We must release these locks before taking any timer's lock.
1317 * There is a potential race with timer deletion here, as the
1318 * siglock now protects our private firing list. We have set
1319 * the firing flag in each timer, so that a deletion attempt
1320 * that gets the timer lock before we do will give it up and
1321 * spin until we've taken care of that timer below.
1323 unlock_task_sighand(tsk, &flags);
1326 * Now that all the timers on our list have the firing flag,
1327 * no one will touch their list entries but us. We'll take
1328 * each timer's lock before clearing its firing flag, so no
1329 * timer call will interfere.
1331 list_for_each_entry_safe(timer, next, &firing, it.cpu.entry) {
1334 spin_lock(&timer->it_lock);
1335 list_del_init(&timer->it.cpu.entry);
1336 cpu_firing = timer->it.cpu.firing;
1337 timer->it.cpu.firing = 0;
1339 * The firing flag is -1 if we collided with a reset
1340 * of the timer, which already reported this
1341 * almost-firing as an overrun. So don't generate an event.
1343 if (likely(cpu_firing >= 0))
1344 cpu_timer_fire(timer);
1345 spin_unlock(&timer->it_lock);
1350 * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
1351 * The tsk->sighand->siglock must be held by the caller.
1353 void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
1354 cputime_t *newval, cputime_t *oldval)
1356 union cpu_time_count now;
1358 BUG_ON(clock_idx == CPUCLOCK_SCHED);
1359 cpu_timer_sample_group(clock_idx, tsk, &now);
1363 * We are setting itimer. The *oldval is absolute and we update
1364 * it to be relative, *newval argument is relative and we update
1365 * it to be absolute.
1368 if (*oldval <= now.cpu) {
1369 /* Just about to fire. */
1370 *oldval = cputime_one_jiffy;
1382 * Update expiration cache if we are the earliest timer, or eventually
1383 * RLIMIT_CPU limit is earlier than prof_exp cpu timer expire.
1385 switch (clock_idx) {
1387 if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval))
1388 tsk->signal->cputime_expires.prof_exp = *newval;
1391 if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval))
1392 tsk->signal->cputime_expires.virt_exp = *newval;
1397 static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
1398 struct timespec *rqtp, struct itimerspec *it)
1400 struct k_itimer timer;
1404 * Set up a temporary timer and then wait for it to go off.
1406 memset(&timer, 0, sizeof timer);
1407 spin_lock_init(&timer.it_lock);
1408 timer.it_clock = which_clock;
1409 timer.it_overrun = -1;
1410 error = posix_cpu_timer_create(&timer);
1411 timer.it_process = current;
1413 static struct itimerspec zero_it;
1415 memset(it, 0, sizeof *it);
1416 it->it_value = *rqtp;
1418 spin_lock_irq(&timer.it_lock);
1419 error = posix_cpu_timer_set(&timer, flags, it, NULL);
1421 spin_unlock_irq(&timer.it_lock);
1425 while (!signal_pending(current)) {
1426 if (timer.it.cpu.expires.sched == 0) {
1428 * Our timer fired and was reset.
1430 spin_unlock_irq(&timer.it_lock);
1435 * Block until cpu_timer_fire (or a signal) wakes us.
1437 __set_current_state(TASK_INTERRUPTIBLE);
1438 spin_unlock_irq(&timer.it_lock);
1440 spin_lock_irq(&timer.it_lock);
1444 * We were interrupted by a signal.
1446 sample_to_timespec(which_clock, timer.it.cpu.expires, rqtp);
1447 posix_cpu_timer_set(&timer, 0, &zero_it, it);
1448 spin_unlock_irq(&timer.it_lock);
1450 if ((it->it_value.tv_sec | it->it_value.tv_nsec) == 0) {
1452 * It actually did fire already.
1457 error = -ERESTART_RESTARTBLOCK;
1463 static long posix_cpu_nsleep_restart(struct restart_block *restart_block);
1465 static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
1466 struct timespec *rqtp, struct timespec __user *rmtp)
1468 struct restart_block *restart_block =
1469 ¤t_thread_info()->restart_block;
1470 struct itimerspec it;
1474 * Diagnose required errors first.
1476 if (CPUCLOCK_PERTHREAD(which_clock) &&
1477 (CPUCLOCK_PID(which_clock) == 0 ||
1478 CPUCLOCK_PID(which_clock) == current->pid))
1481 error = do_cpu_nanosleep(which_clock, flags, rqtp, &it);
1483 if (error == -ERESTART_RESTARTBLOCK) {
1485 if (flags & TIMER_ABSTIME)
1486 return -ERESTARTNOHAND;
1488 * Report back to the user the time still remaining.
1490 if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
1493 restart_block->fn = posix_cpu_nsleep_restart;
1494 restart_block->nanosleep.clockid = which_clock;
1495 restart_block->nanosleep.rmtp = rmtp;
1496 restart_block->nanosleep.expires = timespec_to_ns(rqtp);
1501 static long posix_cpu_nsleep_restart(struct restart_block *restart_block)
1503 clockid_t which_clock = restart_block->nanosleep.clockid;
1505 struct itimerspec it;
1508 t = ns_to_timespec(restart_block->nanosleep.expires);
1510 error = do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t, &it);
1512 if (error == -ERESTART_RESTARTBLOCK) {
1513 struct timespec __user *rmtp = restart_block->nanosleep.rmtp;
1515 * Report back to the user the time still remaining.
1517 if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
1520 restart_block->nanosleep.expires = timespec_to_ns(&t);
1526 #define PROCESS_CLOCK MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED)
1527 #define THREAD_CLOCK MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED)
1529 static int process_cpu_clock_getres(const clockid_t which_clock,
1530 struct timespec *tp)
1532 return posix_cpu_clock_getres(PROCESS_CLOCK, tp);
1534 static int process_cpu_clock_get(const clockid_t which_clock,
1535 struct timespec *tp)
1537 return posix_cpu_clock_get(PROCESS_CLOCK, tp);
1539 static int process_cpu_timer_create(struct k_itimer *timer)
1541 timer->it_clock = PROCESS_CLOCK;
1542 return posix_cpu_timer_create(timer);
1544 static int process_cpu_nsleep(const clockid_t which_clock, int flags,
1545 struct timespec *rqtp,
1546 struct timespec __user *rmtp)
1548 return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp, rmtp);
1550 static long process_cpu_nsleep_restart(struct restart_block *restart_block)
1554 static int thread_cpu_clock_getres(const clockid_t which_clock,
1555 struct timespec *tp)
1557 return posix_cpu_clock_getres(THREAD_CLOCK, tp);
1559 static int thread_cpu_clock_get(const clockid_t which_clock,
1560 struct timespec *tp)
1562 return posix_cpu_clock_get(THREAD_CLOCK, tp);
1564 static int thread_cpu_timer_create(struct k_itimer *timer)
1566 timer->it_clock = THREAD_CLOCK;
1567 return posix_cpu_timer_create(timer);
1570 struct k_clock clock_posix_cpu = {
1571 .clock_getres = posix_cpu_clock_getres,
1572 .clock_set = posix_cpu_clock_set,
1573 .clock_get = posix_cpu_clock_get,
1574 .timer_create = posix_cpu_timer_create,
1575 .nsleep = posix_cpu_nsleep,
1576 .nsleep_restart = posix_cpu_nsleep_restart,
1577 .timer_set = posix_cpu_timer_set,
1578 .timer_del = posix_cpu_timer_del,
1579 .timer_get = posix_cpu_timer_get,
1582 static __init int init_posix_cpu_timers(void)
1584 struct k_clock process = {
1585 .clock_getres = process_cpu_clock_getres,
1586 .clock_get = process_cpu_clock_get,
1587 .timer_create = process_cpu_timer_create,
1588 .nsleep = process_cpu_nsleep,
1589 .nsleep_restart = process_cpu_nsleep_restart,
1591 struct k_clock thread = {
1592 .clock_getres = thread_cpu_clock_getres,
1593 .clock_get = thread_cpu_clock_get,
1594 .timer_create = thread_cpu_timer_create,
1598 posix_timers_register_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
1599 posix_timers_register_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
1601 cputime_to_timespec(cputime_one_jiffy, &ts);
1602 onecputick = ts.tv_nsec;
1603 WARN_ON(ts.tv_sec != 0);
1607 __initcall(init_posix_cpu_timers);