*/
adj = (long)p->signal->oom_score_adj;
if (adj == OOM_SCORE_ADJ_MIN ||
- test_bit(MMF_OOM_REAPED, &p->mm->flags) ||
+ test_bit(MMF_OOM_SKIP, &p->mm->flags) ||
in_vfork(p)) {
task_unlock(p);
return 0;
/*
* This task already has access to memory reserves and is being killed.
* Don't allow any other task to have access to the reserves unless
- * the task has MMF_OOM_REAPED because chances that it would release
+ * the task has MMF_OOM_SKIP because chances that it would release
* any memory is quite low.
*/
- if (!is_sysrq_oom(oc) && atomic_read(&task->signal->oom_victims)) {
- struct task_struct *p = find_lock_task_mm(task);
- bool reaped = false;
-
- if (p) {
- reaped = test_bit(MMF_OOM_REAPED, &p->mm->flags);
- task_unlock(p);
- }
- if (reaped)
+ if (!is_sysrq_oom(oc) && tsk_is_oom_victim(task)) {
+ if (test_bit(MMF_OOM_SKIP, &task->signal->oom_mm->flags))
goto next;
goto abort;
}
static void dump_header(struct oom_control *oc, struct task_struct *p)
{
- pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, oom_score_adj=%hd\n",
- current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order,
+ nodemask_t *nm = (oc->nodemask) ? oc->nodemask : &cpuset_current_mems_allowed;
+
+ pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), nodemask=%*pbl, order=%d, oom_score_adj=%hd\n",
+ current->comm, oc->gfp_mask, &oc->gfp_mask,
+ nodemask_pr_args(nm), oc->order,
current->signal->oom_score_adj);
+ if (!IS_ENABLED(CONFIG_COMPACTION) && oc->order)
+ pr_warn("COMPACTION is disabled!!!\n");
cpuset_print_current_mems_allowed();
dump_stack();
goto unlock_oom;
}
+ /*
+ * Tell all users of get_user/copy_from_user etc... that the content
+ * is no longer stable. No barriers really needed because unmapping
+ * should imply barriers already and the reader would hit a page fault
+ * if it stumbled over a reaped memory.
+ */
+ set_bit(MMF_UNSTABLE, &mm->flags);
+
tlb_gather_mmu(&tlb, mm, 0, -1);
for (vma = mm->mmap ; vma; vma = vma->vm_next) {
if (is_vm_hugetlb_page(vma))
K(get_mm_counter(mm, MM_SHMEMPAGES)));
up_read(&mm->mmap_sem);
- /*
- * This task can be safely ignored because we cannot do much more
- * to release its memory.
- */
- set_bit(MMF_OOM_REAPED, &mm->flags);
/*
* Drop our reference but make sure the mmput slow path is called from a
* different context because we shouldn't risk we get stuck there and
static void oom_reap_task(struct task_struct *tsk)
{
int attempts = 0;
- struct mm_struct *mm = NULL;
- struct task_struct *p = find_lock_task_mm(tsk);
-
- /*
- * Make sure we find the associated mm_struct even when the particular
- * thread has already terminated and cleared its mm.
- * We might have race with exit path so consider our work done if there
- * is no mm.
- */
- if (!p)
- goto done;
- mm = p->mm;
- atomic_inc(&mm->mm_count);
- task_unlock(p);
+ struct mm_struct *mm = tsk->signal->oom_mm;
/* Retry the down_read_trylock(mmap_sem) a few times */
while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task_mm(tsk, mm))
if (attempts <= MAX_OOM_REAP_RETRIES)
goto done;
+
pr_info("oom_reaper: unable to reap pid:%d (%s)\n",
task_pid_nr(tsk), tsk->comm);
-
- /*
- * If we've already tried to reap this task in the past and
- * failed it probably doesn't make much sense to try yet again
- * so hide the mm from the oom killer so that it can move on
- * to another task with a different mm struct.
- */
- if (test_and_set_bit(MMF_OOM_NOT_REAPABLE, &mm->flags)) {
- pr_info("oom_reaper: giving up pid:%d (%s)\n",
- task_pid_nr(tsk), tsk->comm);
- set_bit(MMF_OOM_REAPED, &mm->flags);
- }
debug_show_all_locks();
done:
+ tsk->oom_reaper_list = NULL;
+
/*
- * Clear TIF_MEMDIE because the task shouldn't be sitting on a
- * reasonably reclaimable memory anymore or it is not a good candidate
- * for the oom victim right now because it cannot release its memory
- * itself nor by the oom reaper.
+ * Hide this mm from OOM killer because it has been either reaped or
+ * somebody can't call up_write(mmap_sem).
*/
- tsk->oom_reaper_list = NULL;
- exit_oom_victim(tsk);
+ set_bit(MMF_OOM_SKIP, &mm->flags);
/* Drop a reference taken by wake_oom_reaper */
put_task_struct(tsk);
- /* Drop a reference taken above. */
- if (mm)
- mmdrop(mm);
}
static int oom_reaper(void *unused)
{
- set_freezable();
-
while (true) {
struct task_struct *tsk = NULL;
*
* Has to be called with oom_lock held and never after
* oom has been disabled already.
+ *
+ * tsk->mm has to be non NULL and caller has to guarantee it is stable (either
+ * under task_lock or operate on the current).
*/
static void mark_oom_victim(struct task_struct *tsk)
{
+ struct mm_struct *mm = tsk->mm;
+
WARN_ON(oom_killer_disabled);
/* OOM killer might race with memcg OOM */
if (test_and_set_tsk_thread_flag(tsk, TIF_MEMDIE))
return;
- atomic_inc(&tsk->signal->oom_victims);
+
+ /* oom_mm is bound to the signal struct life time. */
+ if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm))
+ atomic_inc(&tsk->signal->oom_mm->mm_count);
+
/*
* Make sure that the task is woken up from uninterruptible sleep
* if it is frozen because OOM killer wouldn't be able to free
/**
* exit_oom_victim - note the exit of an OOM victim
*/
-void exit_oom_victim(struct task_struct *tsk)
+void exit_oom_victim(void)
{
- if (!test_and_clear_tsk_thread_flag(tsk, TIF_MEMDIE))
- return;
- atomic_dec(&tsk->signal->oom_victims);
+ clear_thread_flag(TIF_MEMDIE);
if (!atomic_dec_return(&oom_victims))
wake_up_all(&oom_victims_wait);
}
+/**
+ * oom_killer_enable - enable OOM killer
+ */
+void oom_killer_enable(void)
+{
+ oom_killer_disabled = false;
+}
+
/**
* oom_killer_disable - disable OOM killer
+ * @timeout: maximum timeout to wait for oom victims in jiffies
*
* Forces all page allocations to fail rather than trigger OOM killer.
- * Will block and wait until all OOM victims are killed.
+ * Will block and wait until all OOM victims are killed or the given
+ * timeout expires.
*
* The function cannot be called when there are runnable user tasks because
* the userspace would see unexpected allocation failures as a result. Any
* Returns true if successful and false if the OOM killer cannot be
* disabled.
*/
-bool oom_killer_disable(void)
+bool oom_killer_disable(signed long timeout)
{
+ signed long ret;
+
/*
* Make sure to not race with an ongoing OOM killer. Check that the
* current is not killed (possibly due to sharing the victim's memory).
oom_killer_disabled = true;
mutex_unlock(&oom_lock);
- wait_event(oom_victims_wait, !atomic_read(&oom_victims));
+ ret = wait_event_interruptible_timeout(oom_victims_wait,
+ !atomic_read(&oom_victims), timeout);
+ if (ret <= 0) {
+ oom_killer_enable();
+ return false;
+ }
return true;
}
-/**
- * oom_killer_enable - enable OOM killer
- */
-void oom_killer_enable(void)
-{
- oom_killer_disabled = false;
-}
-
static inline bool __task_will_free_mem(struct task_struct *task)
{
struct signal_struct *sig = task->signal;
* This task has already been drained by the oom reaper so there are
* only small chances it will free some more
*/
- if (test_bit(MMF_OOM_REAPED, &mm->flags))
+ if (test_bit(MMF_OOM_SKIP, &mm->flags))
return false;
if (atomic_read(&mm->mm_users) <= 1)
continue;
if (same_thread_group(p, victim))
continue;
- if (unlikely(p->flags & PF_KTHREAD) || is_global_init(p)) {
- /*
- * We cannot use oom_reaper for the mm shared by this
- * process because it wouldn't get killed and so the
- * memory might be still used. Hide the mm from the oom
- * killer to guarantee OOM forward progress.
- */
+ if (is_global_init(p)) {
can_oom_reap = false;
- set_bit(MMF_OOM_REAPED, &mm->flags);
+ set_bit(MMF_OOM_SKIP, &mm->flags);
pr_info("oom killer %d (%s) has mm pinned by %d (%s)\n",
task_pid_nr(victim), victim->comm,
task_pid_nr(p), p->comm);
continue;
}
+ /*
+ * No use_mm() user needs to read from the userspace so we are
+ * ok to reap it.
+ */
+ if (unlikely(p->flags & PF_KTHREAD))
+ continue;
do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true);
}
rcu_read_unlock();
if (!mutex_trylock(&oom_lock))
return;
-
- if (!out_of_memory(&oc)) {
- /*
- * There shouldn't be any user tasks runnable while the
- * OOM killer is disabled, so the current task has to
- * be a racing OOM victim for which oom_killer_disable()
- * is waiting for.
- */
- WARN_ON(test_thread_flag(TIF_MEMDIE));
- }
-
+ out_of_memory(&oc);
mutex_unlock(&oom_lock);
}