Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
[cascardo/linux.git] / fs / btrfs / delayed-ref.c
index 8f8ed7d..ac3e81d 100644 (file)
@@ -22,6 +22,7 @@
 #include "ctree.h"
 #include "delayed-ref.h"
 #include "transaction.h"
+#include "qgroup.h"
 
 struct kmem_cache *btrfs_delayed_ref_head_cachep;
 struct kmem_cache *btrfs_delayed_tree_ref_cachep;
@@ -84,87 +85,6 @@ static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
        return 0;
 }
 
-/*
- * entries in the rb tree are ordered by the byte number of the extent,
- * type of the delayed backrefs and content of delayed backrefs.
- */
-static int comp_entry(struct btrfs_delayed_ref_node *ref2,
-                     struct btrfs_delayed_ref_node *ref1,
-                     bool compare_seq)
-{
-       if (ref1->bytenr < ref2->bytenr)
-               return -1;
-       if (ref1->bytenr > ref2->bytenr)
-               return 1;
-       if (ref1->is_head && ref2->is_head)
-               return 0;
-       if (ref2->is_head)
-               return -1;
-       if (ref1->is_head)
-               return 1;
-       if (ref1->type < ref2->type)
-               return -1;
-       if (ref1->type > ref2->type)
-               return 1;
-       if (ref1->no_quota > ref2->no_quota)
-               return 1;
-       if (ref1->no_quota < ref2->no_quota)
-               return -1;
-       /* merging of sequenced refs is not allowed */
-       if (compare_seq) {
-               if (ref1->seq < ref2->seq)
-                       return -1;
-               if (ref1->seq > ref2->seq)
-                       return 1;
-       }
-       if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
-           ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
-               return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
-                                     btrfs_delayed_node_to_tree_ref(ref1),
-                                     ref1->type);
-       } else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY ||
-                  ref1->type == BTRFS_SHARED_DATA_REF_KEY) {
-               return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2),
-                                     btrfs_delayed_node_to_data_ref(ref1));
-       }
-       BUG();
-       return 0;
-}
-
-/*
- * insert a new ref into the rbtree.  This returns any existing refs
- * for the same (bytenr,parent) tuple, or NULL if the new node was properly
- * inserted.
- */
-static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
-                                                 struct rb_node *node)
-{
-       struct rb_node **p = &root->rb_node;
-       struct rb_node *parent_node = NULL;
-       struct btrfs_delayed_ref_node *entry;
-       struct btrfs_delayed_ref_node *ins;
-       int cmp;
-
-       ins = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
-       while (*p) {
-               parent_node = *p;
-               entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
-                                rb_node);
-
-               cmp = comp_entry(entry, ins, 1);
-               if (cmp < 0)
-                       p = &(*p)->rb_left;
-               else if (cmp > 0)
-                       p = &(*p)->rb_right;
-               else
-                       return entry;
-       }
-
-       rb_link_node(node, parent_node, p);
-       rb_insert_color(node, root);
-       return NULL;
-}
-
 /* insert a new ref to head ref rbtree */
 static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
                                                   struct rb_node *node)
@@ -268,7 +188,7 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
                rb_erase(&head->href_node, &delayed_refs->href_root);
        } else {
                assert_spin_locked(&head->lock);
-               rb_erase(&ref->rb_node, &head->ref_root);
+               list_del(&ref->list);
        }
        ref->in_tree = 0;
        btrfs_put_delayed_ref(ref);
@@ -277,99 +197,6 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
                trans->delayed_ref_updates--;
 }
 
-static int merge_ref(struct btrfs_trans_handle *trans,
-                    struct btrfs_delayed_ref_root *delayed_refs,
-                    struct btrfs_delayed_ref_head *head,
-                    struct btrfs_delayed_ref_node *ref, u64 seq)
-{
-       struct rb_node *node;
-       int mod = 0;
-       int done = 0;
-
-       node = rb_next(&ref->rb_node);
-       while (!done && node) {
-               struct btrfs_delayed_ref_node *next;
-
-               next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
-               node = rb_next(node);
-               if (seq && next->seq >= seq)
-                       break;
-               if (comp_entry(ref, next, 0))
-                       continue;
-
-               if (ref->action == next->action) {
-                       mod = next->ref_mod;
-               } else {
-                       if (ref->ref_mod < next->ref_mod) {
-                               struct btrfs_delayed_ref_node *tmp;
-
-                               tmp = ref;
-                               ref = next;
-                               next = tmp;
-                               done = 1;
-                       }
-                       mod = -next->ref_mod;
-               }
-
-               drop_delayed_ref(trans, delayed_refs, head, next);
-               ref->ref_mod += mod;
-               if (ref->ref_mod == 0) {
-                       drop_delayed_ref(trans, delayed_refs, head, ref);
-                       done = 1;
-               } else {
-                       /*
-                        * You can't have multiples of the same ref on a tree
-                        * block.
-                        */
-                       WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
-                               ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
-               }
-       }
-       return done;
-}
-
-void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
-                             struct btrfs_fs_info *fs_info,
-                             struct btrfs_delayed_ref_root *delayed_refs,
-                             struct btrfs_delayed_ref_head *head)
-{
-       struct rb_node *node;
-       u64 seq = 0;
-
-       assert_spin_locked(&head->lock);
-       /*
-        * We don't have too much refs to merge in the case of delayed data
-        * refs.
-        */
-       if (head->is_data)
-               return;
-
-       spin_lock(&fs_info->tree_mod_seq_lock);
-       if (!list_empty(&fs_info->tree_mod_seq_list)) {
-               struct seq_list *elem;
-
-               elem = list_first_entry(&fs_info->tree_mod_seq_list,
-                                       struct seq_list, list);
-               seq = elem->seq;
-       }
-       spin_unlock(&fs_info->tree_mod_seq_lock);
-
-       node = rb_first(&head->ref_root);
-       while (node) {
-               struct btrfs_delayed_ref_node *ref;
-
-               ref = rb_entry(node, struct btrfs_delayed_ref_node,
-                              rb_node);
-               /* We can't merge refs that are outside of our seq count */
-               if (seq && ref->seq >= seq)
-                       break;
-               if (merge_ref(trans, delayed_refs, head, ref, seq))
-                       node = rb_first(&head->ref_root);
-               else
-                       node = rb_next(&ref->rb_node);
-       }
-}
-
 int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
                            struct btrfs_delayed_ref_root *delayed_refs,
                            u64 seq)
@@ -443,45 +270,71 @@ again:
 }
 
 /*
- * helper function to update an extent delayed ref in the
- * rbtree.  existing and update must both have the same
- * bytenr and parent
+ * Helper to insert the ref_node to the tail or merge with tail.
  *
- * This may free existing if the update cancels out whatever
- * operation it was doing.
+ * Return 0 for insert.
+ * Return >0 for merge.
  */
-static noinline void
-update_existing_ref(struct btrfs_trans_handle *trans,
-                   struct btrfs_delayed_ref_root *delayed_refs,
-                   struct btrfs_delayed_ref_head *head,
-                   struct btrfs_delayed_ref_node *existing,
-                   struct btrfs_delayed_ref_node *update)
+static int
+add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans,
+                          struct btrfs_delayed_ref_root *root,
+                          struct btrfs_delayed_ref_head *href,
+                          struct btrfs_delayed_ref_node *ref)
 {
-       if (update->action != existing->action) {
-               /*
-                * this is effectively undoing either an add or a
-                * drop.  We decrement the ref_mod, and if it goes
-                * down to zero we just delete the entry without
-                * every changing the extent allocation tree.
-                */
-               existing->ref_mod--;
-               if (existing->ref_mod == 0)
-                       drop_delayed_ref(trans, delayed_refs, head, existing);
-               else
-                       WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
-                               existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
+       struct btrfs_delayed_ref_node *exist;
+       int mod;
+       int ret = 0;
+
+       spin_lock(&href->lock);
+       /* Check whether we can merge the tail node with ref */
+       if (list_empty(&href->ref_list))
+               goto add_tail;
+       exist = list_entry(href->ref_list.prev, struct btrfs_delayed_ref_node,
+                          list);
+       /* No need to compare bytenr nor is_head */
+       if (exist->type != ref->type || exist->no_quota != ref->no_quota ||
+           exist->seq != ref->seq)
+               goto add_tail;
+
+       if ((exist->type == BTRFS_TREE_BLOCK_REF_KEY ||
+            exist->type == BTRFS_SHARED_BLOCK_REF_KEY) &&
+           comp_tree_refs(btrfs_delayed_node_to_tree_ref(exist),
+                          btrfs_delayed_node_to_tree_ref(ref),
+                          ref->type))
+               goto add_tail;
+       if ((exist->type == BTRFS_EXTENT_DATA_REF_KEY ||
+            exist->type == BTRFS_SHARED_DATA_REF_KEY) &&
+           comp_data_refs(btrfs_delayed_node_to_data_ref(exist),
+                          btrfs_delayed_node_to_data_ref(ref)))
+               goto add_tail;
+
+       /* Now we are sure we can merge */
+       ret = 1;
+       if (exist->action == ref->action) {
+               mod = ref->ref_mod;
        } else {
-               WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
-                       existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
-               /*
-                * the action on the existing ref matches
-                * the action on the ref we're trying to add.
-                * Bump the ref_mod by one so the backref that
-                * is eventually added/removed has the correct
-                * reference count
-                */
-               existing->ref_mod += update->ref_mod;
+               /* Need to change action */
+               if (exist->ref_mod < ref->ref_mod) {
+                       exist->action = ref->action;
+                       mod = -exist->ref_mod;
+                       exist->ref_mod = ref->ref_mod;
+               } else
+                       mod = -ref->ref_mod;
        }
+       exist->ref_mod += mod;
+
+       /* remove existing tail if its ref_mod is zero */
+       if (exist->ref_mod == 0)
+               drop_delayed_ref(trans, root, href, exist);
+       spin_unlock(&href->lock);
+       return ret;
+
+add_tail:
+       list_add_tail(&ref->list, &href->ref_list);
+       atomic_inc(&root->num_entries);
+       trans->delayed_ref_updates++;
+       spin_unlock(&href->lock);
+       return ret;
 }
 
 /*
@@ -568,12 +421,14 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
 static noinline struct btrfs_delayed_ref_head *
 add_delayed_ref_head(struct btrfs_fs_info *fs_info,
                     struct btrfs_trans_handle *trans,
-                    struct btrfs_delayed_ref_node *ref, u64 bytenr,
-                    u64 num_bytes, int action, int is_data)
+                    struct btrfs_delayed_ref_node *ref,
+                    struct btrfs_qgroup_extent_record *qrecord,
+                    u64 bytenr, u64 num_bytes, int action, int is_data)
 {
        struct btrfs_delayed_ref_head *existing;
        struct btrfs_delayed_ref_head *head_ref = NULL;
        struct btrfs_delayed_ref_root *delayed_refs;
+       struct btrfs_qgroup_extent_record *qexisting;
        int count_mod = 1;
        int must_insert_reserved = 0;
 
@@ -618,10 +473,22 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
        head_ref = btrfs_delayed_node_to_head(ref);
        head_ref->must_insert_reserved = must_insert_reserved;
        head_ref->is_data = is_data;
-       head_ref->ref_root = RB_ROOT;
+       INIT_LIST_HEAD(&head_ref->ref_list);
        head_ref->processing = 0;
        head_ref->total_ref_mod = count_mod;
 
+       /* Record qgroup extent info if provided */
+       if (qrecord) {
+               qrecord->bytenr = bytenr;
+               qrecord->num_bytes = num_bytes;
+               qrecord->old_roots = NULL;
+
+               qexisting = btrfs_qgroup_insert_dirty_extent(delayed_refs,
+                                                            qrecord);
+               if (qexisting)
+                       kfree(qrecord);
+       }
+
        spin_lock_init(&head_ref->lock);
        mutex_init(&head_ref->mutex);
 
@@ -659,10 +526,10 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
                     u64 num_bytes, u64 parent, u64 ref_root, int level,
                     int action, int no_quota)
 {
-       struct btrfs_delayed_ref_node *existing;
        struct btrfs_delayed_tree_ref *full_ref;
        struct btrfs_delayed_ref_root *delayed_refs;
        u64 seq = 0;
+       int ret;
 
        if (action == BTRFS_ADD_DELAYED_EXTENT)
                action = BTRFS_ADD_DELAYED_REF;
@@ -693,21 +560,14 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
 
        trace_add_delayed_tree_ref(ref, full_ref, action);
 
-       spin_lock(&head_ref->lock);
-       existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
-       if (existing) {
-               update_existing_ref(trans, delayed_refs, head_ref, existing,
-                                   ref);
-               /*
-                * we've updated the existing ref, free the newly
-                * allocated ref
-                */
+       ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref);
+
+       /*
+        * XXX: memory should be freed at the same level allocated.
+        * But bad practice is anywhere... Follow it now. Need cleanup.
+        */
+       if (ret > 0)
                kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
-       } else {
-               atomic_inc(&delayed_refs->num_entries);
-               trans->delayed_ref_updates++;
-       }
-       spin_unlock(&head_ref->lock);
 }
 
 /*
@@ -721,10 +581,10 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
                     u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
                     u64 offset, int action, int no_quota)
 {
-       struct btrfs_delayed_ref_node *existing;
        struct btrfs_delayed_data_ref *full_ref;
        struct btrfs_delayed_ref_root *delayed_refs;
        u64 seq = 0;
+       int ret;
 
        if (action == BTRFS_ADD_DELAYED_EXTENT)
                action = BTRFS_ADD_DELAYED_REF;
@@ -758,21 +618,10 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
 
        trace_add_delayed_data_ref(ref, full_ref, action);
 
-       spin_lock(&head_ref->lock);
-       existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
-       if (existing) {
-               update_existing_ref(trans, delayed_refs, head_ref, existing,
-                                   ref);
-               /*
-                * we've updated the existing ref, free the newly
-                * allocated ref
-                */
+       ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref);
+
+       if (ret > 0)
                kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
-       } else {
-               atomic_inc(&delayed_refs->num_entries);
-               trans->delayed_ref_updates++;
-       }
-       spin_unlock(&head_ref->lock);
 }
 
 /*
@@ -790,6 +639,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
        struct btrfs_delayed_tree_ref *ref;
        struct btrfs_delayed_ref_head *head_ref;
        struct btrfs_delayed_ref_root *delayed_refs;
+       struct btrfs_qgroup_extent_record *record = NULL;
 
        if (!is_fstree(ref_root) || !fs_info->quota_enabled)
                no_quota = 0;
@@ -800,9 +650,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
                return -ENOMEM;
 
        head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
-       if (!head_ref) {
-               kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
-               return -ENOMEM;
+       if (!head_ref)
+               goto free_ref;
+
+       if (fs_info->quota_enabled && is_fstree(ref_root)) {
+               record = kmalloc(sizeof(*record), GFP_NOFS);
+               if (!record)
+                       goto free_head_ref;
        }
 
        head_ref->extent_op = extent_op;
@@ -814,7 +668,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
         * insert both the head node and the new ref without dropping
         * the spin lock
         */
-       head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node,
+       head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
                                        bytenr, num_bytes, action, 0);
 
        add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
@@ -823,6 +677,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
        spin_unlock(&delayed_refs->lock);
 
        return 0;
+
+free_head_ref:
+       kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
+free_ref:
+       kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
+
+       return -ENOMEM;
 }
 
 /*
@@ -839,6 +700,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
        struct btrfs_delayed_data_ref *ref;
        struct btrfs_delayed_ref_head *head_ref;
        struct btrfs_delayed_ref_root *delayed_refs;
+       struct btrfs_qgroup_extent_record *record = NULL;
 
        if (!is_fstree(ref_root) || !fs_info->quota_enabled)
                no_quota = 0;
@@ -854,6 +716,16 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
                return -ENOMEM;
        }
 
+       if (fs_info->quota_enabled && is_fstree(ref_root)) {
+               record = kmalloc(sizeof(*record), GFP_NOFS);
+               if (!record) {
+                       kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
+                       kmem_cache_free(btrfs_delayed_ref_head_cachep,
+                                       head_ref);
+                       return -ENOMEM;
+               }
+       }
+
        head_ref->extent_op = extent_op;
 
        delayed_refs = &trans->transaction->delayed_refs;
@@ -863,7 +735,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
         * insert both the head node and the new ref without dropping
         * the spin lock
         */
-       head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node,
+       head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
                                        bytenr, num_bytes, action, 1);
 
        add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
@@ -891,9 +763,9 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
        delayed_refs = &trans->transaction->delayed_refs;
        spin_lock(&delayed_refs->lock);
 
-       add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
-                                  num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
-                                  extent_op->is_data);
+       add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr,
+                            num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
+                            extent_op->is_data);
 
        spin_unlock(&delayed_refs->lock);
        return 0;