Merge branch 'akpm-current/current'

[karo-tx-linux.git] / include / linux / sched.h
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 53f97eb8dbc7660195f0ebc27893bf68c65cdff6..68a0e84463a0eb86b273fe14b49bd9b01feab21f 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -16,6 +16,7 @@ struct sched_param {
  #include <linux/types.h>
  #include <linux/timex.h>
  #include <linux/jiffies.h>
+#include <linux/plist.h>
  #include <linux/rbtree.h>
  #include <linux/thread_info.h>
  #include <linux/cpumask.h>
@@ -56,6 +57,70 @@ struct sched_param {
  
  #include <asm/processor.h>
  
+#define SCHED_ATTR_SIZE_VER0   48      /* sizeof first published struct */
+
+/*
+ * Extended scheduling parameters data structure.
+ *
+ * This is needed because the original struct sched_param can not be
+ * altered without introducing ABI issues with legacy applications
+ * (e.g., in sched_getparam()).
+ *
+ * However, the possibility of specifying more than just a priority for
+ * the tasks may be useful for a wide variety of application fields, e.g.,
+ * multimedia, streaming, automation and control, and many others.
+ *
+ * This variant (sched_attr) is meant at describing a so-called
+ * sporadic time-constrained task. In such model a task is specified by:
+ *  - the activation period or minimum instance inter-arrival time;
+ *  - the maximum (or average, depending on the actual scheduling
+ *    discipline) computation time of all instances, a.k.a. runtime;
+ *  - the deadline (relative to the actual activation time) of each
+ *    instance.
+ * Very briefly, a periodic (sporadic) task asks for the execution of
+ * some specific computation --which is typically called an instance--
+ * (at most) every period. Moreover, each instance typically lasts no more
+ * than the runtime and must be completed by time instant t equal to
+ * the instance activation time + the deadline.
+ *
+ * This is reflected by the actual fields of the sched_attr structure:
+ *
+ *  @size              size of the structure, for fwd/bwd compat.
+ *
+ *  @sched_policy      task's scheduling policy
+ *  @sched_flags       for customizing the scheduler behaviour
+ *  @sched_nice                task's nice value      (SCHED_NORMAL/BATCH)
+ *  @sched_priority    task's static priority (SCHED_FIFO/RR)
+ *  @sched_deadline    representative of the task's deadline
+ *  @sched_runtime     representative of the task's runtime
+ *  @sched_period      representative of the task's period
+ *
+ * Given this task model, there are a multiplicity of scheduling algorithms
+ * and policies, that can be used to ensure all the tasks will make their
+ * timing constraints.
+ *
+ * As of now, the SCHED_DEADLINE policy (sched_dl scheduling class) is the
+ * only user of this new interface. More information about the algorithm
+ * available in the scheduling class file or in Documentation/.
+ */
+struct sched_attr {
+       u32 size;
+
+       u32 sched_policy;
+       u64 sched_flags;
+
+       /* SCHED_NORMAL, SCHED_BATCH */
+       s32 sched_nice;
+
+       /* SCHED_FIFO, SCHED_RR */
+       u32 sched_priority;
+
+       /* SCHED_DEADLINE */
+       u64 sched_runtime;
+       u64 sched_deadline;
+       u64 sched_period;
+};
+
  struct exec_domain;
  struct futex_pi_state;
  struct robust_list_head;
@@ -164,11 +229,10 @@ extern char ___assert_task_state[1 - 2*!!(
  /* get_task_state() */
  #define TASK_REPORT            (TASK_RUNNING | TASK_INTERRUPTIBLE | \
                                  TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
-                                __TASK_TRACED)
+                                __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD)
  
  #define task_is_traced(task)   ((task->state & __TASK_TRACED) != 0)
  #define task_is_stopped(task)  ((task->state & __TASK_STOPPED) != 0)
-#define task_is_dead(task)     ((task)->exit_state != 0)
  #define task_is_stopped_or_traced(task)        \
                         ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
  #define task_contributes_to_load(task) \
@@ -327,22 +391,33 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
  static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
  #endif
  
-
-extern void set_dumpable(struct mm_struct *mm, int value);
-extern int get_dumpable(struct mm_struct *mm);
-
  #define SUID_DUMP_DISABLE      0       /* No setuid dumping */
  #define SUID_DUMP_USER         1       /* Dump as user of process */
  #define SUID_DUMP_ROOT         2       /* Dump as root */
  
  /* mm flags */
-/* dumpable bits */
-#define MMF_DUMPABLE      0  /* core dump is permitted */
-#define MMF_DUMP_SECURELY 1  /* core file is readable only by root */
  
+/* for SUID_DUMP_* above */
  #define MMF_DUMPABLE_BITS 2
  #define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1)
  
+extern void set_dumpable(struct mm_struct *mm, int value);
+/*
+ * This returns the actual value of the suid_dumpable flag. For things
+ * that are using this for checking for privilege transitions, it must
+ * test against SUID_DUMP_USER rather than treating it as a boolean
+ * value.
+ */
+static inline int __get_dumpable(unsigned long mm_flags)
+{
+       return mm_flags & MMF_DUMPABLE_MASK;
+}
+
+static inline int get_dumpable(struct mm_struct *mm)
+{
+       return __get_dumpable(mm->flags);
+}
+
  /* coredump filter bits */
  #define MMF_DUMP_ANON_PRIVATE  2
  #define MMF_DUMP_ANON_SHARED   3
@@ -485,6 +560,7 @@ struct signal_struct {
         atomic_t                sigcnt;
         atomic_t                live;
         int                     nr_threads;
+       struct list_head        thread_head;
  
         wait_queue_head_t       wait_chldexit;  /* for wait4() */
  
@@ -1029,6 +1105,51 @@ struct sched_rt_entity {
  #endif
  };
  
+struct sched_dl_entity {
+       struct rb_node  rb_node;
+
+       /*
+        * Original scheduling parameters. Copied here from sched_attr
+        * during sched_setscheduler2(), they will remain the same until
+        * the next sched_setscheduler2().
+        */
+       u64 dl_runtime;         /* maximum runtime for each instance    */
+       u64 dl_deadline;        /* relative deadline of each instance   */
+       u64 dl_period;          /* separation of two instances (period) */
+       u64 dl_bw;              /* dl_runtime / dl_deadline             */
+
+       /*
+        * Actual scheduling parameters. Initialized with the values above,
+        * they are continously updated during task execution. Note that
+        * the remaining runtime could be < 0 in case we are in overrun.
+        */
+       s64 runtime;            /* remaining runtime for this instance  */
+       u64 deadline;           /* absolute deadline for this instance  */
+       unsigned int flags;     /* specifying the scheduler behaviour   */
+
+       /*
+        * Some bool flags:
+        *
+        * @dl_throttled tells if we exhausted the runtime. If so, the
+        * task has to wait for a replenishment to be performed at the
+        * next firing of dl_timer.
+        *
+        * @dl_new tells if a new instance arrived. If so we must
+        * start executing it with full runtime and reset its absolute
+        * deadline;
+        *
+        * @dl_boosted tells if we are boosted due to DI. If so we are
+        * outside bandwidth enforcement mechanism (but only until we
+        * exit the critical section).
+        */
+       int dl_throttled, dl_new, dl_boosted;
+
+       /*
+        * Bandwidth enforcement timer. Each -deadline task has its
+        * own bandwidth to be enforced, thus we need one timer per task.
+        */
+       struct hrtimer dl_timer;
+};
  
  struct rcu_node;
  
@@ -1065,6 +1186,7 @@ struct task_struct {
  #ifdef CONFIG_CGROUP_SCHED
         struct task_group *sched_task_group;
  #endif
+       struct sched_dl_entity dl;
  
  #ifdef CONFIG_PREEMPT_NOTIFIERS
         /* list of struct preempt_notifier: */
@@ -1098,6 +1220,7 @@ struct task_struct {
         struct list_head tasks;
  #ifdef CONFIG_SMP
         struct plist_node pushable_tasks;
+       struct rb_node pushable_dl_tasks;
  #endif
  
         struct mm_struct *mm, *active_mm;
@@ -1116,7 +1239,6 @@ struct task_struct {
         /* Used for emulating ABI behavior of previous Linux versions */
         unsigned int personality;
  
-       unsigned did_exec:1;
         unsigned in_execve:1;   /* Tell the LSMs that the process is doing an
                                  * execve */
         unsigned in_iowait:1;
@@ -1160,6 +1282,7 @@ struct task_struct {
         /* PID/PID hash table linkage. */
         struct pid_link pids[PIDTYPE_MAX];
         struct list_head thread_group;
+       struct list_head thread_node;
  
         struct completion *vfork_done;          /* for vfork() */
         int __user *set_child_tid;              /* CLONE_CHILD_SETTID */
@@ -1249,9 +1372,12 @@ struct task_struct {
  
  #ifdef CONFIG_RT_MUTEXES
         /* PI waiters blocked on a rt_mutex held by this task */
-       struct plist_head pi_waiters;
+       struct rb_root pi_waiters;
+       struct rb_node *pi_waiters_leftmost;
         /* Deadlock detection and priority inheritance handling */
         struct rt_mutex_waiter *pi_blocked_on;
+       /* Top pi_waiters task */
+       struct task_struct *pi_top_task;
  #endif
  
  #ifdef CONFIG_DEBUG_MUTEXES
@@ -1880,7 +2006,9 @@ static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
   * but then during bootup it turns out that sched_clock()
   * is reliable after all:
   */
-extern int sched_clock_stable;
+extern int sched_clock_stable(void);
+extern void set_sched_clock_stable(void);
+extern void clear_sched_clock_stable(void);
  
  extern void sched_clock_tick(void);
  extern void sched_clock_idle_sleep_event(void);
@@ -1959,6 +2087,8 @@ extern int sched_setscheduler(struct task_struct *, int,
                               const struct sched_param *);
  extern int sched_setscheduler_nocheck(struct task_struct *, int,
                                       const struct sched_param *);
+extern int sched_setattr(struct task_struct *,
+                        const struct sched_attr *);
  extern struct task_struct *idle_task(int cpu);
  /**
   * is_idle_task - is the specified task an idle task?
@@ -2038,7 +2168,7 @@ extern void wake_up_new_task(struct task_struct *tsk);
  #else
   static inline void kick_process(struct task_struct *tsk) { }
  #endif
-extern void sched_fork(unsigned long clone_flags, struct task_struct *p);
+extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
  extern void sched_dead(struct task_struct *p);
  
  extern void proc_caches_init(void);
@@ -2164,8 +2294,6 @@ extern struct mm_struct *get_task_mm(struct task_struct *task);
  extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
  /* Remove the current tasks stale references to the old mm_struct */
  extern void mm_release(struct task_struct *, struct mm_struct *);
-/* Allocate a new mm structure and copy contents from tsk->mm */
-extern struct mm_struct *dup_mm(struct task_struct *tsk);
  
  extern int copy_thread(unsigned long, unsigned long, unsigned long,
                         struct task_struct *);
@@ -2223,6 +2351,16 @@ extern bool current_is_single_threaded(void);
  #define while_each_thread(g, t) \
         while ((t = next_thread(t)) != g)
  
+#define __for_each_thread(signal, t)   \
+       list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node)
+
+#define for_each_thread(p, t)          \
+       __for_each_thread((p)->signal, t)
+
+/* Careful: this is a double loop, 'break' won't work as expected. */
+#define for_each_process_thread(p, t)  \
+       for_each_process(p) for_each_thread(p, t)
+
  static inline int get_nr_threads(struct task_struct *tsk)
  {
         return tsk->signal->nr_threads;
@@ -2627,6 +2765,21 @@ static inline bool __must_check current_clr_polling_and_test(void)
  }
  #endif
  
+static inline void current_clr_polling(void)
+{
+       __current_clr_polling();
+
+       /*
+        * Ensure we check TIF_NEED_RESCHED after we clear the polling bit.
+        * Once the bit is cleared, we'll get IPIs with every new
+        * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also
+        * fold.
+        */
+       smp_mb(); /* paired with resched_task() */
+
+       preempt_fold_need_resched();
+}
+
  static __always_inline bool need_resched(void)
  {
         return unlikely(tif_need_resched());