tg3: prevent ifup/ifdown during PCI error recovery

[cascardo/linux.git] / kernel / fork.c
diff --git a/kernel/fork.c b/kernel/fork.c

index 6a13c46..0cf9cdb 100644 (file)
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -315,6 +315,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
                 goto free_ti;
  
         tsk->stack = ti;
+#ifdef CONFIG_SECCOMP
+       /*
+        * We must handle setting up seccomp filters once we're under
+        * the sighand lock in case orig has changed between now and
+        * then. Until then, filter must be NULL to avoid messing up
+        * the usage counts on the error path calling free_task.
+        */
+       tsk->seccomp.filter = NULL;
+#endif
  
         setup_thread_stack(tsk, orig);
         clear_user_return_notifier(tsk);
@@ -365,12 +374,11 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
          */
         down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
  
-       mm->locked_vm = 0;
-       mm->mmap = NULL;
-       mm->vmacache_seqnum = 0;
-       mm->map_count = 0;
-       cpumask_clear(mm_cpumask(mm));
-       mm->mm_rb = RB_ROOT;
+       mm->total_vm = oldmm->total_vm;
+       mm->shared_vm = oldmm->shared_vm;
+       mm->exec_vm = oldmm->exec_vm;
+       mm->stack_vm = oldmm->stack_vm;
+
         rb_link = &mm->mm_rb.rb_node;
         rb_parent = NULL;
         pprev = &mm->mmap;
@@ -421,7 +429,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
                                 atomic_dec(&inode->i_writecount);
                         mutex_lock(&mapping->i_mmap_mutex);
                         if (tmp->vm_flags & VM_SHARED)
-                               mapping->i_mmap_writable++;
+                               atomic_inc(&mapping->i_mmap_writable);
                         flush_dcache_mmap_lock(mapping);
                         /* insert tmp into the share list, just after mpnt */
                         if (unlikely(tmp->vm_flags & VM_NONLINEAR))
@@ -527,19 +535,37 @@ static void mm_init_aio(struct mm_struct *mm)
  #endif
  }
  
+static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
+{
+#ifdef CONFIG_MEMCG
+       mm->owner = p;
+#endif
+}
+
  static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
  {
+       mm->mmap = NULL;
+       mm->mm_rb = RB_ROOT;
+       mm->vmacache_seqnum = 0;
         atomic_set(&mm->mm_users, 1);
         atomic_set(&mm->mm_count, 1);
         init_rwsem(&mm->mmap_sem);
         INIT_LIST_HEAD(&mm->mmlist);
         mm->core_state = NULL;
         atomic_long_set(&mm->nr_ptes, 0);
+       mm->map_count = 0;
+       mm->locked_vm = 0;
+       mm->pinned_vm = 0;
         memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
         spin_lock_init(&mm->page_table_lock);
+       mm_init_cpumask(mm);
         mm_init_aio(mm);
         mm_init_owner(mm, p);
+       mmu_notifier_mm_init(mm);
         clear_tlb_flush_pending(mm);
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
+       mm->pmd_huge_pte = NULL;
+#endif
  
         if (current->mm) {
                 mm->flags = current->mm->flags & MMF_INIT_MASK;
@@ -549,11 +575,17 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
                 mm->def_flags = 0;
         }
  
-       if (likely(!mm_alloc_pgd(mm))) {
-               mmu_notifier_mm_init(mm);
-               return mm;
-       }
+       if (mm_alloc_pgd(mm))
+               goto fail_nopgd;
+
+       if (init_new_context(p, mm))
+               goto fail_nocontext;
  
+       return mm;
+
+fail_nocontext:
+       mm_free_pgd(mm);
+fail_nopgd:
         free_mm(mm);
         return NULL;
  }
@@ -587,7 +619,6 @@ struct mm_struct *mm_alloc(void)
                 return NULL;
  
         memset(mm, 0, sizeof(*mm));
-       mm_init_cpumask(mm);
         return mm_init(mm, current);
  }
  
@@ -819,17 +850,10 @@ static struct mm_struct *dup_mm(struct task_struct *tsk)
                 goto fail_nomem;
  
         memcpy(mm, oldmm, sizeof(*mm));
-       mm_init_cpumask(mm);
  
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
-       mm->pmd_huge_pte = NULL;
-#endif
         if (!mm_init(mm, tsk))
                 goto fail_nomem;
  
-       if (init_new_context(tsk, mm))
-               goto fail_nocontext;
-
         dup_mm_exe_file(oldmm, mm);
  
         err = dup_mmap(mm, oldmm);
@@ -851,15 +875,6 @@ free_pt:
  
  fail_nomem:
         return NULL;
-
-fail_nocontext:
-       /*
-        * If init_new_context() failed, we cannot use mmput() to free the mm
-        * because it calls destroy_context()
-        */
-       mm_free_pgd(mm);
-       free_mm(mm);
-       return NULL;
  }
  
  static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
@@ -1081,6 +1096,39 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
         return 0;
  }
  
+static void copy_seccomp(struct task_struct *p)
+{
+#ifdef CONFIG_SECCOMP
+       /*
+        * Must be called with sighand->lock held, which is common to
+        * all threads in the group. Holding cred_guard_mutex is not
+        * needed because this new task is not yet running and cannot
+        * be racing exec.
+        */
+       assert_spin_locked(&current->sighand->siglock);
+
+       /* Ref-count the new filter user, and assign it. */
+       get_seccomp_filter(current);
+       p->seccomp = current->seccomp;
+
+       /*
+        * Explicitly enable no_new_privs here in case it got set
+        * between the task_struct being duplicated and holding the
+        * sighand lock. The seccomp state and nnp must be in sync.
+        */
+       if (task_no_new_privs(current))
+               task_set_no_new_privs(p);
+
+       /*
+        * If the parent gained a seccomp mode after copying thread
+        * flags and between before we held the sighand lock, we have
+        * to manually enable the seccomp thread flag here.
+        */
+       if (p->seccomp.mode != SECCOMP_MODE_DISABLED)
+               set_tsk_thread_flag(p, TIF_SECCOMP);
+#endif
+}
+
  SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
  {
         current->clear_child_tid = tidptr;
@@ -1095,17 +1143,9 @@ static void rt_mutex_init_task(struct task_struct *p)
         p->pi_waiters = RB_ROOT;
         p->pi_waiters_leftmost = NULL;
         p->pi_blocked_on = NULL;
-       p->pi_top_task = NULL;
  #endif
  }
  
-#ifdef CONFIG_MEMCG
-void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
-{
-       mm->owner = p;
-}
-#endif /* CONFIG_MEMCG */
-
  /*
   * Initialize POSIX timer handling for a single task.
   */
@@ -1196,7 +1236,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                 goto fork_out;
  
         ftrace_graph_init_task(p);
-       get_seccomp_filter(p);
  
         rt_mutex_init_task(p);
  
@@ -1262,9 +1301,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
  
         posix_cpu_timers_init(p);
  
-       do_posix_clock_monotonic_gettime(&p->start_time);
-       p->real_start_time = p->start_time;
-       monotonic_to_bootbased(&p->real_start_time);
+       p->start_time = ktime_get_ns();
+       p->real_start_time = ktime_get_boot_ns();
         p->io_context = NULL;
         p->audit_context = NULL;
         if (clone_flags & CLONE_THREAD)
@@ -1307,10 +1345,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
  #ifdef CONFIG_DEBUG_MUTEXES
         p->blocked_on = NULL; /* not blocked yet */
  #endif
-#ifdef CONFIG_MEMCG
-       p->memcg_batch.do_batch = 0;
-       p->memcg_batch.memcg = NULL;
-#endif
  #ifdef CONFIG_BCACHE
         p->sequential_io        = 0;
         p->sequential_io_avg    = 0;
@@ -1328,6 +1362,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
         if (retval)
                 goto bad_fork_cleanup_policy;
         /* copy all the process information */
+       shm_init_task(p);
         retval = copy_semundo(clone_flags, p);
         if (retval)
                 goto bad_fork_cleanup_audit;
@@ -1436,6 +1471,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
  
         spin_lock(&current->sighand->siglock);
  
+       /*
+        * Copy seccomp details explicitly here, in case they were changed
+        * before holding sighand lock.
+        */
+       copy_seccomp(p);
+
         /*
          * Process group and session signals need to be delivered to just the
          * parent before the fork or both the parent and the child after the
@@ -1873,6 +1914,11 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
                          */
                         exit_sem(current);
                 }
+               if (unshare_flags & CLONE_NEWIPC) {
+                       /* Orphan segments in old ns (see sem above). */
+                       exit_shm(current);
+                       shm_init_task(current);
+               }
  
                 if (new_nsproxy)
                         switch_task_namespaces(current, new_nsproxy);