tg3: prevent ifup/ifdown during PCI error recovery
[cascardo/linux.git] / kernel / fork.c
index 6a13c46..0cf9cdb 100644 (file)
@@ -315,6 +315,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
                goto free_ti;
 
        tsk->stack = ti;
+#ifdef CONFIG_SECCOMP
+       /*
+        * We must handle setting up seccomp filters once we're under
+        * the sighand lock in case orig has changed between now and
+        * then. Until then, filter must be NULL to avoid messing up
+        * the usage counts on the error path calling free_task.
+        */
+       tsk->seccomp.filter = NULL;
+#endif
 
        setup_thread_stack(tsk, orig);
        clear_user_return_notifier(tsk);
@@ -365,12 +374,11 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
         */
        down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
 
-       mm->locked_vm = 0;
-       mm->mmap = NULL;
-       mm->vmacache_seqnum = 0;
-       mm->map_count = 0;
-       cpumask_clear(mm_cpumask(mm));
-       mm->mm_rb = RB_ROOT;
+       mm->total_vm = oldmm->total_vm;
+       mm->shared_vm = oldmm->shared_vm;
+       mm->exec_vm = oldmm->exec_vm;
+       mm->stack_vm = oldmm->stack_vm;
+
        rb_link = &mm->mm_rb.rb_node;
        rb_parent = NULL;
        pprev = &mm->mmap;
@@ -421,7 +429,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
                                atomic_dec(&inode->i_writecount);
                        mutex_lock(&mapping->i_mmap_mutex);
                        if (tmp->vm_flags & VM_SHARED)
-                               mapping->i_mmap_writable++;
+                               atomic_inc(&mapping->i_mmap_writable);
                        flush_dcache_mmap_lock(mapping);
                        /* insert tmp into the share list, just after mpnt */
                        if (unlikely(tmp->vm_flags & VM_NONLINEAR))
@@ -527,19 +535,37 @@ static void mm_init_aio(struct mm_struct *mm)
 #endif
 }
 
+static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
+{
+#ifdef CONFIG_MEMCG
+       mm->owner = p;
+#endif
+}
+
 static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
 {
+       mm->mmap = NULL;
+       mm->mm_rb = RB_ROOT;
+       mm->vmacache_seqnum = 0;
        atomic_set(&mm->mm_users, 1);
        atomic_set(&mm->mm_count, 1);
        init_rwsem(&mm->mmap_sem);
        INIT_LIST_HEAD(&mm->mmlist);
        mm->core_state = NULL;
        atomic_long_set(&mm->nr_ptes, 0);
+       mm->map_count = 0;
+       mm->locked_vm = 0;
+       mm->pinned_vm = 0;
        memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
        spin_lock_init(&mm->page_table_lock);
+       mm_init_cpumask(mm);
        mm_init_aio(mm);
        mm_init_owner(mm, p);
+       mmu_notifier_mm_init(mm);
        clear_tlb_flush_pending(mm);
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
+       mm->pmd_huge_pte = NULL;
+#endif
 
        if (current->mm) {
                mm->flags = current->mm->flags & MMF_INIT_MASK;
@@ -549,11 +575,17 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
                mm->def_flags = 0;
        }
 
-       if (likely(!mm_alloc_pgd(mm))) {
-               mmu_notifier_mm_init(mm);
-               return mm;
-       }
+       if (mm_alloc_pgd(mm))
+               goto fail_nopgd;
+
+       if (init_new_context(p, mm))
+               goto fail_nocontext;
 
+       return mm;
+
+fail_nocontext:
+       mm_free_pgd(mm);
+fail_nopgd:
        free_mm(mm);
        return NULL;
 }
@@ -587,7 +619,6 @@ struct mm_struct *mm_alloc(void)
                return NULL;
 
        memset(mm, 0, sizeof(*mm));
-       mm_init_cpumask(mm);
        return mm_init(mm, current);
 }
 
@@ -819,17 +850,10 @@ static struct mm_struct *dup_mm(struct task_struct *tsk)
                goto fail_nomem;
 
        memcpy(mm, oldmm, sizeof(*mm));
-       mm_init_cpumask(mm);
 
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
-       mm->pmd_huge_pte = NULL;
-#endif
        if (!mm_init(mm, tsk))
                goto fail_nomem;
 
-       if (init_new_context(tsk, mm))
-               goto fail_nocontext;
-
        dup_mm_exe_file(oldmm, mm);
 
        err = dup_mmap(mm, oldmm);
@@ -851,15 +875,6 @@ free_pt:
 
 fail_nomem:
        return NULL;
-
-fail_nocontext:
-       /*
-        * If init_new_context() failed, we cannot use mmput() to free the mm
-        * because it calls destroy_context()
-        */
-       mm_free_pgd(mm);
-       free_mm(mm);
-       return NULL;
 }
 
 static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
@@ -1081,6 +1096,39 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
        return 0;
 }
 
+static void copy_seccomp(struct task_struct *p)
+{
+#ifdef CONFIG_SECCOMP
+       /*
+        * Must be called with sighand->lock held, which is common to
+        * all threads in the group. Holding cred_guard_mutex is not
+        * needed because this new task is not yet running and cannot
+        * be racing exec.
+        */
+       assert_spin_locked(&current->sighand->siglock);
+
+       /* Ref-count the new filter user, and assign it. */
+       get_seccomp_filter(current);
+       p->seccomp = current->seccomp;
+
+       /*
+        * Explicitly enable no_new_privs here in case it got set
+        * between the task_struct being duplicated and holding the
+        * sighand lock. The seccomp state and nnp must be in sync.
+        */
+       if (task_no_new_privs(current))
+               task_set_no_new_privs(p);
+
+       /*
+        * If the parent gained a seccomp mode after copying thread
+        * flags and between before we held the sighand lock, we have
+        * to manually enable the seccomp thread flag here.
+        */
+       if (p->seccomp.mode != SECCOMP_MODE_DISABLED)
+               set_tsk_thread_flag(p, TIF_SECCOMP);
+#endif
+}
+
 SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
 {
        current->clear_child_tid = tidptr;
@@ -1095,17 +1143,9 @@ static void rt_mutex_init_task(struct task_struct *p)
        p->pi_waiters = RB_ROOT;
        p->pi_waiters_leftmost = NULL;
        p->pi_blocked_on = NULL;
-       p->pi_top_task = NULL;
 #endif
 }
 
-#ifdef CONFIG_MEMCG
-void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
-{
-       mm->owner = p;
-}
-#endif /* CONFIG_MEMCG */
-
 /*
  * Initialize POSIX timer handling for a single task.
  */
@@ -1196,7 +1236,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                goto fork_out;
 
        ftrace_graph_init_task(p);
-       get_seccomp_filter(p);
 
        rt_mutex_init_task(p);
 
@@ -1262,9 +1301,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
        posix_cpu_timers_init(p);
 
-       do_posix_clock_monotonic_gettime(&p->start_time);
-       p->real_start_time = p->start_time;
-       monotonic_to_bootbased(&p->real_start_time);
+       p->start_time = ktime_get_ns();
+       p->real_start_time = ktime_get_boot_ns();
        p->io_context = NULL;
        p->audit_context = NULL;
        if (clone_flags & CLONE_THREAD)
@@ -1307,10 +1345,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #ifdef CONFIG_DEBUG_MUTEXES
        p->blocked_on = NULL; /* not blocked yet */
 #endif
-#ifdef CONFIG_MEMCG
-       p->memcg_batch.do_batch = 0;
-       p->memcg_batch.memcg = NULL;
-#endif
 #ifdef CONFIG_BCACHE
        p->sequential_io        = 0;
        p->sequential_io_avg    = 0;
@@ -1328,6 +1362,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        if (retval)
                goto bad_fork_cleanup_policy;
        /* copy all the process information */
+       shm_init_task(p);
        retval = copy_semundo(clone_flags, p);
        if (retval)
                goto bad_fork_cleanup_audit;
@@ -1436,6 +1471,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
        spin_lock(&current->sighand->siglock);
 
+       /*
+        * Copy seccomp details explicitly here, in case they were changed
+        * before holding sighand lock.
+        */
+       copy_seccomp(p);
+
        /*
         * Process group and session signals need to be delivered to just the
         * parent before the fork or both the parent and the child after the
@@ -1873,6 +1914,11 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
                         */
                        exit_sem(current);
                }
+               if (unshare_flags & CLONE_NEWIPC) {
+                       /* Orphan segments in old ns (see sem above). */
+                       exit_shm(current);
+                       shm_init_task(current);
+               }
 
                if (new_nsproxy)
                        switch_task_namespaces(current, new_nsproxy);