X-Git-Url: http://git.cascardo.eti.br/?a=blobdiff_plain;f=mm%2Foom_kill.c;h=ec9f11d4f094467e98b8190b7195379174d6be0a;hb=10e15a639caac3be3c142f8837a17520da84db9f;hp=87fad956c96bbb2c9bfd977c7c75860cedcb2eea;hpb=7ebffa45551fe7db86a2b32bf586f124ef484e6e;p=cascardo%2Flinux.git diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 87fad956c96b..ec9f11d4f094 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -186,7 +186,7 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, */ adj = (long)p->signal->oom_score_adj; if (adj == OOM_SCORE_ADJ_MIN || - test_bit(MMF_OOM_REAPED, &p->mm->flags) || + test_bit(MMF_OOM_SKIP, &p->mm->flags) || in_vfork(p)) { task_unlock(p); return 0; @@ -296,18 +296,11 @@ static int oom_evaluate_task(struct task_struct *task, void *arg) /* * This task already has access to memory reserves and is being killed. * Don't allow any other task to have access to the reserves unless - * the task has MMF_OOM_REAPED because chances that it would release + * the task has MMF_OOM_SKIP because chances that it would release * any memory is quite low. */ - if (!is_sysrq_oom(oc) && atomic_read(&task->signal->oom_victims)) { - struct task_struct *p = find_lock_task_mm(task); - bool reaped = false; - - if (p) { - reaped = test_bit(MMF_OOM_REAPED, &p->mm->flags); - task_unlock(p); - } - if (reaped) + if (!is_sysrq_oom(oc) && tsk_is_oom_victim(task)) { + if (test_bit(MMF_OOM_SKIP, &task->signal->oom_mm->flags)) goto next; goto abort; } @@ -410,9 +403,14 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask) static void dump_header(struct oom_control *oc, struct task_struct *p) { - pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, oom_score_adj=%hd\n", - current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order, + nodemask_t *nm = (oc->nodemask) ? oc->nodemask : &cpuset_current_mems_allowed; + + pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), nodemask=%*pbl, order=%d, oom_score_adj=%hd\n", + current->comm, oc->gfp_mask, &oc->gfp_mask, + nodemask_pr_args(nm), oc->order, current->signal->oom_score_adj); + if (!IS_ENABLED(CONFIG_COMPACTION) && oc->order) + pr_warn("COMPACTION is disabled!!!\n"); cpuset_print_current_mems_allowed(); dump_stack(); @@ -502,6 +500,14 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) goto unlock_oom; } + /* + * Tell all users of get_user/copy_from_user etc... that the content + * is no longer stable. No barriers really needed because unmapping + * should imply barriers already and the reader would hit a page fault + * if it stumbled over a reaped memory. + */ + set_bit(MMF_UNSTABLE, &mm->flags); + tlb_gather_mmu(&tlb, mm, 0, -1); for (vma = mm->mmap ; vma; vma = vma->vm_next) { if (is_vm_hugetlb_page(vma)) @@ -536,11 +542,6 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) K(get_mm_counter(mm, MM_SHMEMPAGES))); up_read(&mm->mmap_sem); - /* - * This task can be safely ignored because we cannot do much more - * to release its memory. - */ - set_bit(MMF_OOM_REAPED, &mm->flags); /* * Drop our reference but make sure the mmput slow path is called from a * different context because we shouldn't risk we get stuck there and @@ -556,20 +557,7 @@ unlock_oom: static void oom_reap_task(struct task_struct *tsk) { int attempts = 0; - struct mm_struct *mm = NULL; - struct task_struct *p = find_lock_task_mm(tsk); - - /* - * Make sure we find the associated mm_struct even when the particular - * thread has already terminated and cleared its mm. - * We might have race with exit path so consider our work done if there - * is no mm. - */ - if (!p) - goto done; - mm = p->mm; - atomic_inc(&mm->mm_count); - task_unlock(p); + struct mm_struct *mm = tsk->signal->oom_mm; /* Retry the down_read_trylock(mmap_sem) a few times */ while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task_mm(tsk, mm)) @@ -578,43 +566,26 @@ static void oom_reap_task(struct task_struct *tsk) if (attempts <= MAX_OOM_REAP_RETRIES) goto done; + pr_info("oom_reaper: unable to reap pid:%d (%s)\n", task_pid_nr(tsk), tsk->comm); - - /* - * If we've already tried to reap this task in the past and - * failed it probably doesn't make much sense to try yet again - * so hide the mm from the oom killer so that it can move on - * to another task with a different mm struct. - */ - if (test_and_set_bit(MMF_OOM_NOT_REAPABLE, &mm->flags)) { - pr_info("oom_reaper: giving up pid:%d (%s)\n", - task_pid_nr(tsk), tsk->comm); - set_bit(MMF_OOM_REAPED, &mm->flags); - } debug_show_all_locks(); done: + tsk->oom_reaper_list = NULL; + /* - * Clear TIF_MEMDIE because the task shouldn't be sitting on a - * reasonably reclaimable memory anymore or it is not a good candidate - * for the oom victim right now because it cannot release its memory - * itself nor by the oom reaper. + * Hide this mm from OOM killer because it has been either reaped or + * somebody can't call up_write(mmap_sem). */ - tsk->oom_reaper_list = NULL; - exit_oom_victim(tsk); + set_bit(MMF_OOM_SKIP, &mm->flags); /* Drop a reference taken by wake_oom_reaper */ put_task_struct(tsk); - /* Drop a reference taken above. */ - if (mm) - mmdrop(mm); } static int oom_reaper(void *unused) { - set_freezable(); - while (true) { struct task_struct *tsk = NULL; @@ -674,14 +645,23 @@ static inline void wake_oom_reaper(struct task_struct *tsk) * * Has to be called with oom_lock held and never after * oom has been disabled already. + * + * tsk->mm has to be non NULL and caller has to guarantee it is stable (either + * under task_lock or operate on the current). */ static void mark_oom_victim(struct task_struct *tsk) { + struct mm_struct *mm = tsk->mm; + WARN_ON(oom_killer_disabled); /* OOM killer might race with memcg OOM */ if (test_and_set_tsk_thread_flag(tsk, TIF_MEMDIE)) return; - atomic_inc(&tsk->signal->oom_victims); + + /* oom_mm is bound to the signal struct life time. */ + if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) + atomic_inc(&tsk->signal->oom_mm->mm_count); + /* * Make sure that the task is woken up from uninterruptible sleep * if it is frozen because OOM killer wouldn't be able to free @@ -695,21 +675,29 @@ static void mark_oom_victim(struct task_struct *tsk) /** * exit_oom_victim - note the exit of an OOM victim */ -void exit_oom_victim(struct task_struct *tsk) +void exit_oom_victim(void) { - if (!test_and_clear_tsk_thread_flag(tsk, TIF_MEMDIE)) - return; - atomic_dec(&tsk->signal->oom_victims); + clear_thread_flag(TIF_MEMDIE); if (!atomic_dec_return(&oom_victims)) wake_up_all(&oom_victims_wait); } +/** + * oom_killer_enable - enable OOM killer + */ +void oom_killer_enable(void) +{ + oom_killer_disabled = false; +} + /** * oom_killer_disable - disable OOM killer + * @timeout: maximum timeout to wait for oom victims in jiffies * * Forces all page allocations to fail rather than trigger OOM killer. - * Will block and wait until all OOM victims are killed. + * Will block and wait until all OOM victims are killed or the given + * timeout expires. * * The function cannot be called when there are runnable user tasks because * the userspace would see unexpected allocation failures as a result. Any @@ -718,8 +706,10 @@ void exit_oom_victim(struct task_struct *tsk) * Returns true if successful and false if the OOM killer cannot be * disabled. */ -bool oom_killer_disable(void) +bool oom_killer_disable(signed long timeout) { + signed long ret; + /* * Make sure to not race with an ongoing OOM killer. Check that the * current is not killed (possibly due to sharing the victim's memory). @@ -729,19 +719,16 @@ bool oom_killer_disable(void) oom_killer_disabled = true; mutex_unlock(&oom_lock); - wait_event(oom_victims_wait, !atomic_read(&oom_victims)); + ret = wait_event_interruptible_timeout(oom_victims_wait, + !atomic_read(&oom_victims), timeout); + if (ret <= 0) { + oom_killer_enable(); + return false; + } return true; } -/** - * oom_killer_enable - enable OOM killer - */ -void oom_killer_enable(void) -{ - oom_killer_disabled = false; -} - static inline bool __task_will_free_mem(struct task_struct *task) { struct signal_struct *sig = task->signal; @@ -791,7 +778,7 @@ static bool task_will_free_mem(struct task_struct *task) * This task has already been drained by the oom reaper so there are * only small chances it will free some more */ - if (test_bit(MMF_OOM_REAPED, &mm->flags)) + if (test_bit(MMF_OOM_SKIP, &mm->flags)) return false; if (atomic_read(&mm->mm_users) <= 1) @@ -920,20 +907,20 @@ static void oom_kill_process(struct oom_control *oc, const char *message) continue; if (same_thread_group(p, victim)) continue; - if (unlikely(p->flags & PF_KTHREAD) || is_global_init(p)) { - /* - * We cannot use oom_reaper for the mm shared by this - * process because it wouldn't get killed and so the - * memory might be still used. Hide the mm from the oom - * killer to guarantee OOM forward progress. - */ + if (is_global_init(p)) { can_oom_reap = false; - set_bit(MMF_OOM_REAPED, &mm->flags); + set_bit(MMF_OOM_SKIP, &mm->flags); pr_info("oom killer %d (%s) has mm pinned by %d (%s)\n", task_pid_nr(victim), victim->comm, task_pid_nr(p), p->comm); continue; } + /* + * No use_mm() user needs to read from the userspace so we are + * ok to reap it. + */ + if (unlikely(p->flags & PF_KTHREAD)) + continue; do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true); } rcu_read_unlock(); @@ -1085,16 +1072,6 @@ void pagefault_out_of_memory(void) if (!mutex_trylock(&oom_lock)) return; - - if (!out_of_memory(&oc)) { - /* - * There shouldn't be any user tasks runnable while the - * OOM killer is disabled, so the current task has to - * be a racing OOM victim for which oom_killer_disable() - * is waiting for. - */ - WARN_ON(test_thread_flag(TIF_MEMDIE)); - } - + out_of_memory(&oc); mutex_unlock(&oom_lock); }