Merge tag 'mmc-merge-for-3.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git...
[cascardo/linux.git] / mm / mempolicy.c
index 4ada3be..0b78fb9 100644 (file)
@@ -607,6 +607,42 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
        return first;
 }
 
+/*
+ * Apply policy to a single VMA
+ * This must be called with the mmap_sem held for writing.
+ */
+static int vma_replace_policy(struct vm_area_struct *vma,
+                                               struct mempolicy *pol)
+{
+       int err;
+       struct mempolicy *old;
+       struct mempolicy *new;
+
+       pr_debug("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n",
+                vma->vm_start, vma->vm_end, vma->vm_pgoff,
+                vma->vm_ops, vma->vm_file,
+                vma->vm_ops ? vma->vm_ops->set_policy : NULL);
+
+       new = mpol_dup(pol);
+       if (IS_ERR(new))
+               return PTR_ERR(new);
+
+       if (vma->vm_ops && vma->vm_ops->set_policy) {
+               err = vma->vm_ops->set_policy(vma, new);
+               if (err)
+                       goto err_out;
+       }
+
+       old = vma->vm_policy;
+       vma->vm_policy = new; /* protected by mmap_sem */
+       mpol_put(old);
+
+       return 0;
+ err_out:
+       mpol_put(new);
+       return err;
+}
+
 /* Step 2: apply policy to a range and do splits. */
 static int mbind_range(struct mm_struct *mm, unsigned long start,
                       unsigned long end, struct mempolicy *new_pol)
@@ -655,23 +691,9 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
                        if (err)
                                goto out;
                }
-
-               /*
-                * Apply policy to a single VMA. The reference counting of
-                * policy for vma_policy linkages has already been handled by
-                * vma_merge and split_vma as necessary. If this is a shared
-                * policy then ->set_policy will increment the reference count
-                * for an sp node.
-                */
-               pr_debug("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n",
-                       vma->vm_start, vma->vm_end, vma->vm_pgoff,
-                       vma->vm_ops, vma->vm_file,
-                       vma->vm_ops ? vma->vm_ops->set_policy : NULL);
-               if (vma->vm_ops && vma->vm_ops->set_policy) {
-                       err = vma->vm_ops->set_policy(vma, new_pol);
-                       if (err)
-                               goto out;
-               }
+               err = vma_replace_policy(vma, new_pol);
+               if (err)
+                       goto out;
        }
 
  out:
@@ -924,15 +946,18 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
        nodemask_t nmask;
        LIST_HEAD(pagelist);
        int err = 0;
-       struct vm_area_struct *vma;
 
        nodes_clear(nmask);
        node_set(source, nmask);
 
-       vma = check_range(mm, mm->mmap->vm_start, mm->task_size, &nmask,
+       /*
+        * This does not "check" the range but isolates all pages that
+        * need migration.  Between passing in the full user address
+        * space range and MPOL_MF_DISCONTIG_OK, this call can not fail.
+        */
+       VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)));
+       check_range(mm, mm->mmap->vm_start, mm->task_size, &nmask,
                        flags | MPOL_MF_DISCONTIG_OK, &pagelist);
-       if (IS_ERR(vma))
-               return PTR_ERR(vma);
 
        if (!list_empty(&pagelist)) {
                err = migrate_pages(&pagelist, new_node_page, dest,
@@ -1530,8 +1555,18 @@ struct mempolicy *get_vma_policy(struct task_struct *task,
                                                                        addr);
                        if (vpol)
                                pol = vpol;
-               } else if (vma->vm_policy)
+               } else if (vma->vm_policy) {
                        pol = vma->vm_policy;
+
+                       /*
+                        * shmem_alloc_page() passes MPOL_F_SHARED policy with
+                        * a pseudo vma whose vma->vm_ops=NULL. Take a reference
+                        * count on these policies which will be dropped by
+                        * mpol_cond_put() later
+                        */
+                       if (mpol_needs_cond_ref(pol))
+                               mpol_get(pol);
+               }
        }
        if (!pol)
                pol = &default_policy;
@@ -2061,7 +2096,7 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
  */
 
 /* lookup first element intersecting start-end */
-/* Caller holds sp->lock */
+/* Caller holds sp->mutex */
 static struct sp_node *
 sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end)
 {
@@ -2125,36 +2160,50 @@ mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx)
 
        if (!sp->root.rb_node)
                return NULL;
-       spin_lock(&sp->lock);
+       mutex_lock(&sp->mutex);
        sn = sp_lookup(sp, idx, idx+1);
        if (sn) {
                mpol_get(sn->policy);
                pol = sn->policy;
        }
-       spin_unlock(&sp->lock);
+       mutex_unlock(&sp->mutex);
        return pol;
 }
 
+static void sp_free(struct sp_node *n)
+{
+       mpol_put(n->policy);
+       kmem_cache_free(sn_cache, n);
+}
+
 static void sp_delete(struct shared_policy *sp, struct sp_node *n)
 {
        pr_debug("deleting %lx-l%lx\n", n->start, n->end);
        rb_erase(&n->nd, &sp->root);
-       mpol_put(n->policy);
-       kmem_cache_free(sn_cache, n);
+       sp_free(n);
 }
 
 static struct sp_node *sp_alloc(unsigned long start, unsigned long end,
                                struct mempolicy *pol)
 {
-       struct sp_node *n = kmem_cache_alloc(sn_cache, GFP_KERNEL);
+       struct sp_node *n;
+       struct mempolicy *newpol;
 
+       n = kmem_cache_alloc(sn_cache, GFP_KERNEL);
        if (!n)
                return NULL;
+
+       newpol = mpol_dup(pol);
+       if (IS_ERR(newpol)) {
+               kmem_cache_free(sn_cache, n);
+               return NULL;
+       }
+       newpol->flags |= MPOL_F_SHARED;
+
        n->start = start;
        n->end = end;
-       mpol_get(pol);
-       pol->flags |= MPOL_F_SHARED;    /* for unref */
-       n->policy = pol;
+       n->policy = newpol;
+
        return n;
 }
 
@@ -2162,10 +2211,10 @@ static struct sp_node *sp_alloc(unsigned long start, unsigned long end,
 static int shared_policy_replace(struct shared_policy *sp, unsigned long start,
                                 unsigned long end, struct sp_node *new)
 {
-       struct sp_node *n, *new2 = NULL;
+       struct sp_node *n;
+       int ret = 0;
 
-restart:
-       spin_lock(&sp->lock);
+       mutex_lock(&sp->mutex);
        n = sp_lookup(sp, start, end);
        /* Take care of old policies in the same range. */
        while (n && n->start < end) {
@@ -2178,16 +2227,14 @@ restart:
                } else {
                        /* Old policy spanning whole new range. */
                        if (n->end > end) {
+                               struct sp_node *new2;
+                               new2 = sp_alloc(end, n->end, n->policy);
                                if (!new2) {
-                                       spin_unlock(&sp->lock);
-                                       new2 = sp_alloc(end, n->end, n->policy);
-                                       if (!new2)
-                                               return -ENOMEM;
-                                       goto restart;
+                                       ret = -ENOMEM;
+                                       goto out;
                                }
                                n->end = start;
                                sp_insert(sp, new2);
-                               new2 = NULL;
                                break;
                        } else
                                n->end = start;
@@ -2198,12 +2245,9 @@ restart:
        }
        if (new)
                sp_insert(sp, new);
-       spin_unlock(&sp->lock);
-       if (new2) {
-               mpol_put(new2->policy);
-               kmem_cache_free(sn_cache, new2);
-       }
-       return 0;
+out:
+       mutex_unlock(&sp->mutex);
+       return ret;
 }
 
 /**
@@ -2221,7 +2265,7 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
        int ret;
 
        sp->root = RB_ROOT;             /* empty tree == default mempolicy */
-       spin_lock_init(&sp->lock);
+       mutex_init(&sp->mutex);
 
        if (mpol) {
                struct vm_area_struct pvma;
@@ -2275,7 +2319,7 @@ int mpol_set_shared_policy(struct shared_policy *info,
        }
        err = shared_policy_replace(info, vma->vm_pgoff, vma->vm_pgoff+sz, new);
        if (err && new)
-               kmem_cache_free(sn_cache, new);
+               sp_free(new);
        return err;
 }
 
@@ -2287,16 +2331,14 @@ void mpol_free_shared_policy(struct shared_policy *p)
 
        if (!p->root.rb_node)
                return;
-       spin_lock(&p->lock);
+       mutex_lock(&p->mutex);
        next = rb_first(&p->root);
        while (next) {
                n = rb_entry(next, struct sp_node, nd);
                next = rb_next(&n->nd);
-               rb_erase(&n->nd, &p->root);
-               mpol_put(n->policy);
-               kmem_cache_free(sn_cache, n);
+               sp_delete(p, n);
        }
-       spin_unlock(&p->lock);
+       mutex_unlock(&p->mutex);
 }
 
 /* assumes fs == KERNEL_DS */