Btrfs: just wait or commit our own log sub-transaction
[cascardo/linux.git] / fs / btrfs / tree-log.c
index 9f7fc51..57d4ca7 100644 (file)
@@ -136,10 +136,11 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
  * syncing the tree wait for us to finish
  */
 static int start_log_trans(struct btrfs_trans_handle *trans,
-                          struct btrfs_root *root)
+                          struct btrfs_root *root,
+                          struct btrfs_log_ctx *ctx)
 {
+       int index;
        int ret;
-       int err = 0;
 
        mutex_lock(&root->log_mutex);
        if (root->log_root) {
@@ -152,27 +153,40 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
 
                atomic_inc(&root->log_batch);
                atomic_inc(&root->log_writers);
+               if (ctx) {
+                       index = root->log_transid % 2;
+                       list_add_tail(&ctx->list, &root->log_ctxs[index]);
+                       ctx->log_transid = root->log_transid;
+               }
                mutex_unlock(&root->log_mutex);
                return 0;
        }
-       root->log_multiple_pids = false;
-       root->log_start_pid = current->pid;
+
+       ret = 0;
        mutex_lock(&root->fs_info->tree_log_mutex);
-       if (!root->fs_info->log_root_tree) {
+       if (!root->fs_info->log_root_tree)
                ret = btrfs_init_log_root_tree(trans, root->fs_info);
-               if (ret)
-                       err = ret;
-       }
-       if (err == 0 && !root->log_root) {
+       mutex_unlock(&root->fs_info->tree_log_mutex);
+       if (ret)
+               goto out;
+
+       if (!root->log_root) {
                ret = btrfs_add_log_tree(trans, root);
                if (ret)
-                       err = ret;
+                       goto out;
        }
-       mutex_unlock(&root->fs_info->tree_log_mutex);
+       root->log_multiple_pids = false;
+       root->log_start_pid = current->pid;
        atomic_inc(&root->log_batch);
        atomic_inc(&root->log_writers);
+       if (ctx) {
+               index = root->log_transid % 2;
+               list_add_tail(&ctx->list, &root->log_ctxs[index]);
+               ctx->log_transid = root->log_transid;
+       }
+out:
        mutex_unlock(&root->log_mutex);
-       return err;
+       return ret;
 }
 
 /*
@@ -570,7 +584,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
                if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
                        nbytes = 0;
        } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
-               size = btrfs_file_extent_inline_len(eb, item);
+               size = btrfs_file_extent_inline_len(eb, slot, item);
                nbytes = btrfs_file_extent_ram_bytes(eb, item);
                extent_end = ALIGN(start + size, root->sectorsize);
        } else {
@@ -1238,7 +1252,8 @@ static int insert_orphan_item(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root, u64 offset)
 {
        int ret;
-       ret = btrfs_find_orphan_item(root, offset);
+       ret = btrfs_find_item(root, NULL, BTRFS_ORPHAN_OBJECTID,
+                       offset, BTRFS_ORPHAN_ITEM_KEY, NULL);
        if (ret > 0)
                ret = btrfs_insert_orphan_item(trans, root, offset);
        return ret;
@@ -2358,8 +2373,8 @@ static int update_log_root(struct btrfs_trans_handle *trans,
        return ret;
 }
 
-static int wait_log_commit(struct btrfs_trans_handle *trans,
-                          struct btrfs_root *root, unsigned long transid)
+static void wait_log_commit(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root, int transid)
 {
        DEFINE_WAIT(wait);
        int index = transid % 2;
@@ -2374,36 +2389,63 @@ static int wait_log_commit(struct btrfs_trans_handle *trans,
                                &wait, TASK_UNINTERRUPTIBLE);
                mutex_unlock(&root->log_mutex);
 
-               if (root->fs_info->last_trans_log_full_commit !=
-                   trans->transid && root->log_transid < transid + 2 &&
+               if (root->log_transid_committed < transid &&
                    atomic_read(&root->log_commit[index]))
                        schedule();
 
                finish_wait(&root->log_commit_wait[index], &wait);
                mutex_lock(&root->log_mutex);
-       } while (root->fs_info->last_trans_log_full_commit !=
-                trans->transid && root->log_transid < transid + 2 &&
+       } while (root->log_transid_committed < transid &&
                 atomic_read(&root->log_commit[index]));
-       return 0;
 }
 
 static void wait_for_writer(struct btrfs_trans_handle *trans,
                            struct btrfs_root *root)
 {
        DEFINE_WAIT(wait);
-       while (root->fs_info->last_trans_log_full_commit !=
-              trans->transid && atomic_read(&root->log_writers)) {
+
+       while (atomic_read(&root->log_writers)) {
                prepare_to_wait(&root->log_writer_wait,
                                &wait, TASK_UNINTERRUPTIBLE);
                mutex_unlock(&root->log_mutex);
-               if (root->fs_info->last_trans_log_full_commit !=
-                   trans->transid && atomic_read(&root->log_writers))
+               if (atomic_read(&root->log_writers))
                        schedule();
                mutex_lock(&root->log_mutex);
                finish_wait(&root->log_writer_wait, &wait);
        }
 }
 
+static inline void btrfs_remove_log_ctx(struct btrfs_root *root,
+                                       struct btrfs_log_ctx *ctx)
+{
+       if (!ctx)
+               return;
+
+       mutex_lock(&root->log_mutex);
+       list_del_init(&ctx->list);
+       mutex_unlock(&root->log_mutex);
+}
+
+/* 
+ * Invoked in log mutex context, or be sure there is no other task which
+ * can access the list.
+ */
+static inline void btrfs_remove_all_log_ctxs(struct btrfs_root *root,
+                                            int index, int error)
+{
+       struct btrfs_log_ctx *ctx;
+
+       if (!error) {
+               INIT_LIST_HEAD(&root->log_ctxs[index]);
+               return;
+       }
+
+       list_for_each_entry(ctx, &root->log_ctxs[index], list)
+               ctx->log_ret = error;
+
+       INIT_LIST_HEAD(&root->log_ctxs[index]);
+}
+
 /*
  * btrfs_sync_log does sends a given tree log down to the disk and
  * updates the super blocks to record it.  When this call is done,
@@ -2417,7 +2459,7 @@ static void wait_for_writer(struct btrfs_trans_handle *trans,
  * that has happened.
  */
 int btrfs_sync_log(struct btrfs_trans_handle *trans,
-                  struct btrfs_root *root)
+                  struct btrfs_root *root, struct btrfs_log_ctx *ctx)
 {
        int index1;
        int index2;
@@ -2425,22 +2467,30 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
        int ret;
        struct btrfs_root *log = root->log_root;
        struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
-       unsigned long log_transid = 0;
+       int log_transid = 0;
+       struct btrfs_log_ctx root_log_ctx;
        struct blk_plug plug;
 
        mutex_lock(&root->log_mutex);
-       log_transid = root->log_transid;
-       index1 = root->log_transid % 2;
+       log_transid = ctx->log_transid;
+       if (root->log_transid_committed >= log_transid) {
+               mutex_unlock(&root->log_mutex);
+               return ctx->log_ret;
+       }
+
+       index1 = log_transid % 2;
        if (atomic_read(&root->log_commit[index1])) {
-               wait_log_commit(trans, root, root->log_transid);
+               wait_log_commit(trans, root, log_transid);
                mutex_unlock(&root->log_mutex);
-               return 0;
+               return ctx->log_ret;
        }
+       ASSERT(log_transid == root->log_transid);
        atomic_set(&root->log_commit[index1], 1);
 
        /* wait for previous tree log sync to complete */
        if (atomic_read(&root->log_commit[(index1 + 1) % 2]))
-               wait_log_commit(trans, root, root->log_transid - 1);
+               wait_log_commit(trans, root, log_transid - 1);
+
        while (1) {
                int batch = atomic_read(&root->log_batch);
                /* when we're on an ssd, just kick the log commit out */
@@ -2455,7 +2505,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
        }
 
        /* bail out if we need to do a full commit */
-       if (root->fs_info->last_trans_log_full_commit == trans->transid) {
+       if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) ==
+           trans->transid) {
                ret = -EAGAIN;
                btrfs_free_logged_extents(log, log_transid);
                mutex_unlock(&root->log_mutex);
@@ -2485,7 +2536,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
        root->log_transid++;
        log->log_transid = root->log_transid;
        root->log_start_pid = 0;
-       smp_mb();
        /*
         * IO has been started, blocks of the log tree have WRITTEN flag set
         * in their headers. new modifications of the log will be written to
@@ -2493,9 +2543,16 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
         */
        mutex_unlock(&root->log_mutex);
 
+       btrfs_init_log_ctx(&root_log_ctx);
+
        mutex_lock(&log_root_tree->log_mutex);
        atomic_inc(&log_root_tree->log_batch);
        atomic_inc(&log_root_tree->log_writers);
+
+       index2 = log_root_tree->log_transid % 2;
+       list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]);
+       root_log_ctx.log_transid = log_root_tree->log_transid;
+
        mutex_unlock(&log_root_tree->log_mutex);
 
        ret = update_log_root(trans, log);
@@ -2508,13 +2565,17 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
        }
 
        if (ret) {
+               if (!list_empty(&root_log_ctx.list))
+                       list_del_init(&root_log_ctx.list);
+
                blk_finish_plug(&plug);
                if (ret != -ENOSPC) {
                        btrfs_abort_transaction(trans, root, ret);
                        mutex_unlock(&log_root_tree->log_mutex);
                        goto out;
                }
-               root->fs_info->last_trans_log_full_commit = trans->transid;
+               ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) =
+                                                               trans->transid;
                btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
                btrfs_free_logged_extents(log, log_transid);
                mutex_unlock(&log_root_tree->log_mutex);
@@ -2522,22 +2583,29 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
                goto out;
        }
 
-       index2 = log_root_tree->log_transid % 2;
+       if (log_root_tree->log_transid_committed >= root_log_ctx.log_transid) {
+               mutex_unlock(&log_root_tree->log_mutex);
+               ret = root_log_ctx.log_ret;
+               goto out;
+       }
+
+       index2 = root_log_ctx.log_transid % 2;
        if (atomic_read(&log_root_tree->log_commit[index2])) {
                blk_finish_plug(&plug);
                btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
                wait_log_commit(trans, log_root_tree,
-                               log_root_tree->log_transid);
+                               root_log_ctx.log_transid);
                btrfs_free_logged_extents(log, log_transid);
                mutex_unlock(&log_root_tree->log_mutex);
-               ret = 0;
+               ret = root_log_ctx.log_ret;
                goto out;
        }
+       ASSERT(root_log_ctx.log_transid == log_root_tree->log_transid);
        atomic_set(&log_root_tree->log_commit[index2], 1);
 
        if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) {
                wait_log_commit(trans, log_root_tree,
-                               log_root_tree->log_transid - 1);
+                               root_log_ctx.log_transid - 1);
        }
 
        wait_for_writer(trans, log_root_tree);
@@ -2546,7 +2614,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
         * now that we've moved on to the tree of log tree roots,
         * check the full commit flag again
         */
-       if (root->fs_info->last_trans_log_full_commit == trans->transid) {
+       if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) ==
+           trans->transid) {
                blk_finish_plug(&plug);
                btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
                btrfs_free_logged_extents(log, log_transid);
@@ -2577,8 +2646,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
                                btrfs_header_level(log_root_tree->node));
 
        log_root_tree->log_transid++;
-       smp_mb();
-
        mutex_unlock(&log_root_tree->log_mutex);
 
        /*
@@ -2600,13 +2667,28 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
        mutex_unlock(&root->log_mutex);
 
 out_wake_log_root:
+       /*
+        * We needn't get log_mutex here because we are sure all
+        * the other tasks are blocked.
+        */
+       btrfs_remove_all_log_ctxs(log_root_tree, index2, ret);
+
+       mutex_lock(&log_root_tree->log_mutex);
+       log_root_tree->log_transid_committed++;
        atomic_set(&log_root_tree->log_commit[index2], 0);
-       smp_mb();
+       mutex_unlock(&log_root_tree->log_mutex);
+
        if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
                wake_up(&log_root_tree->log_commit_wait[index2]);
 out:
+       /* See above. */
+       btrfs_remove_all_log_ctxs(root, index1, ret);
+
+       mutex_lock(&root->log_mutex);
+       root->log_transid_committed++;
        atomic_set(&root->log_commit[index1], 0);
-       smp_mb();
+       mutex_unlock(&root->log_mutex);
+
        if (waitqueue_active(&root->log_commit_wait[index1]))
                wake_up(&root->log_commit_wait[index1]);
        return ret;
@@ -3194,7 +3276,7 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
 static noinline int copy_items(struct btrfs_trans_handle *trans,
                               struct inode *inode,
                               struct btrfs_path *dst_path,
-                              struct extent_buffer *src,
+                              struct btrfs_path *src_path, u64 *last_extent,
                               int start_slot, int nr, int inode_only)
 {
        unsigned long src_offset;
@@ -3202,6 +3284,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
        struct btrfs_root *log = BTRFS_I(inode)->root->log_root;
        struct btrfs_file_extent_item *extent;
        struct btrfs_inode_item *inode_item;
+       struct extent_buffer *src = src_path->nodes[0];
+       struct btrfs_key first_key, last_key, key;
        int ret;
        struct btrfs_key *ins_keys;
        u32 *ins_sizes;
@@ -3209,6 +3293,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
        int i;
        struct list_head ordered_sums;
        int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
+       bool has_extents = false;
+       bool need_find_last_extent = (*last_extent == 0);
+       bool done = false;
 
        INIT_LIST_HEAD(&ordered_sums);
 
@@ -3217,6 +3304,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
        if (!ins_data)
                return -ENOMEM;
 
+       first_key.objectid = (u64)-1;
+
        ins_sizes = (u32 *)ins_data;
        ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32));
 
@@ -3237,6 +3326,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
 
                src_offset = btrfs_item_ptr_offset(src, start_slot + i);
 
+               if ((i == (nr - 1)))
+                       last_key = ins_keys[i];
+
                if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) {
                        inode_item = btrfs_item_ptr(dst_path->nodes[0],
                                                    dst_path->slots[0],
@@ -3248,6 +3340,21 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
                                           src_offset, ins_sizes[i]);
                }
 
+               /*
+                * We set need_find_last_extent here in case we know we were
+                * processing other items and then walk into the first extent in
+                * the inode.  If we don't hit an extent then nothing changes,
+                * we'll do the last search the next time around.
+                */
+               if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) {
+                       has_extents = true;
+                       if (need_find_last_extent &&
+                           first_key.objectid == (u64)-1)
+                               first_key = ins_keys[i];
+               } else {
+                       need_find_last_extent = false;
+               }
+
                /* take a reference on file data extents so that truncates
                 * or deletes of this inode don't have to relog the inode
                 * again
@@ -3312,6 +3419,128 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
                list_del(&sums->list);
                kfree(sums);
        }
+
+       if (!has_extents)
+               return ret;
+
+       /*
+        * Because we use btrfs_search_forward we could skip leaves that were
+        * not modified and then assume *last_extent is valid when it really
+        * isn't.  So back up to the previous leaf and read the end of the last
+        * extent before we go and fill in holes.
+        */
+       if (need_find_last_extent) {
+               u64 len;
+
+               ret = btrfs_prev_leaf(BTRFS_I(inode)->root, src_path);
+               if (ret < 0)
+                       return ret;
+               if (ret)
+                       goto fill_holes;
+               if (src_path->slots[0])
+                       src_path->slots[0]--;
+               src = src_path->nodes[0];
+               btrfs_item_key_to_cpu(src, &key, src_path->slots[0]);
+               if (key.objectid != btrfs_ino(inode) ||
+                   key.type != BTRFS_EXTENT_DATA_KEY)
+                       goto fill_holes;
+               extent = btrfs_item_ptr(src, src_path->slots[0],
+                                       struct btrfs_file_extent_item);
+               if (btrfs_file_extent_type(src, extent) ==
+                   BTRFS_FILE_EXTENT_INLINE) {
+                       len = btrfs_file_extent_inline_len(src,
+                                                          src_path->slots[0],
+                                                          extent);
+                       *last_extent = ALIGN(key.offset + len,
+                                            log->sectorsize);
+               } else {
+                       len = btrfs_file_extent_num_bytes(src, extent);
+                       *last_extent = key.offset + len;
+               }
+       }
+fill_holes:
+       /* So we did prev_leaf, now we need to move to the next leaf, but a few
+        * things could have happened
+        *
+        * 1) A merge could have happened, so we could currently be on a leaf
+        * that holds what we were copying in the first place.
+        * 2) A split could have happened, and now not all of the items we want
+        * are on the same leaf.
+        *
+        * So we need to adjust how we search for holes, we need to drop the
+        * path and re-search for the first extent key we found, and then walk
+        * forward until we hit the last one we copied.
+        */
+       if (need_find_last_extent) {
+               /* btrfs_prev_leaf could return 1 without releasing the path */
+               btrfs_release_path(src_path);
+               ret = btrfs_search_slot(NULL, BTRFS_I(inode)->root, &first_key,
+                                       src_path, 0, 0);
+               if (ret < 0)
+                       return ret;
+               ASSERT(ret == 0);
+               src = src_path->nodes[0];
+               i = src_path->slots[0];
+       } else {
+               i = start_slot;
+       }
+
+       /*
+        * Ok so here we need to go through and fill in any holes we may have
+        * to make sure that holes are punched for those areas in case they had
+        * extents previously.
+        */
+       while (!done) {
+               u64 offset, len;
+               u64 extent_end;
+
+               if (i >= btrfs_header_nritems(src_path->nodes[0])) {
+                       ret = btrfs_next_leaf(BTRFS_I(inode)->root, src_path);
+                       if (ret < 0)
+                               return ret;
+                       ASSERT(ret == 0);
+                       src = src_path->nodes[0];
+                       i = 0;
+               }
+
+               btrfs_item_key_to_cpu(src, &key, i);
+               if (!btrfs_comp_cpu_keys(&key, &last_key))
+                       done = true;
+               if (key.objectid != btrfs_ino(inode) ||
+                   key.type != BTRFS_EXTENT_DATA_KEY) {
+                       i++;
+                       continue;
+               }
+               extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
+               if (btrfs_file_extent_type(src, extent) ==
+                   BTRFS_FILE_EXTENT_INLINE) {
+                       len = btrfs_file_extent_inline_len(src, i, extent);
+                       extent_end = ALIGN(key.offset + len, log->sectorsize);
+               } else {
+                       len = btrfs_file_extent_num_bytes(src, extent);
+                       extent_end = key.offset + len;
+               }
+               i++;
+
+               if (*last_extent == key.offset) {
+                       *last_extent = extent_end;
+                       continue;
+               }
+               offset = *last_extent;
+               len = key.offset - *last_extent;
+               ret = btrfs_insert_file_extent(trans, log, btrfs_ino(inode),
+                                              offset, 0, 0, len, 0, len, 0,
+                                              0, 0);
+               if (ret)
+                       break;
+               *last_extent = offset + len;
+       }
+       /*
+        * Need to let the callers know we dropped the path so they should
+        * re-search.
+        */
+       if (!ret && need_find_last_extent)
+               ret = 1;
        return ret;
 }
 
@@ -3331,7 +3560,8 @@ static int extent_cmp(void *priv, struct list_head *a, struct list_head *b)
 
 static int log_one_extent(struct btrfs_trans_handle *trans,
                          struct inode *inode, struct btrfs_root *root,
-                         struct extent_map *em, struct btrfs_path *path)
+                         struct extent_map *em, struct btrfs_path *path,
+                         struct list_head *logged_list)
 {
        struct btrfs_root *log = root->log_root;
        struct btrfs_file_extent_item *fi;
@@ -3347,23 +3577,28 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
        u64 extent_offset = em->start - em->orig_start;
        u64 block_len;
        int ret;
-       int index = log->log_transid % 2;
        bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
-
-       ret = __btrfs_drop_extents(trans, log, inode, path, em->start,
-                                  em->start + em->len, NULL, 0);
-       if (ret)
-               return ret;
+       int extent_inserted = 0;
 
        INIT_LIST_HEAD(&ordered_sums);
        btrfs_init_map_token(&token);
-       key.objectid = btrfs_ino(inode);
-       key.type = BTRFS_EXTENT_DATA_KEY;
-       key.offset = em->start;
 
-       ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*fi));
+       ret = __btrfs_drop_extents(trans, log, inode, path, em->start,
+                                  em->start + em->len, NULL, 0, 1,
+                                  sizeof(*fi), &extent_inserted);
        if (ret)
                return ret;
+
+       if (!extent_inserted) {
+               key.objectid = btrfs_ino(inode);
+               key.type = BTRFS_EXTENT_DATA_KEY;
+               key.offset = em->start;
+
+               ret = btrfs_insert_empty_item(trans, log, path, &key,
+                                             sizeof(*fi));
+               if (ret)
+                       return ret;
+       }
        leaf = path->nodes[0];
        fi = btrfs_item_ptr(leaf, path->slots[0],
                            struct btrfs_file_extent_item);
@@ -3425,17 +3660,12 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
         * First check and see if our csums are on our outstanding ordered
         * extents.
         */
-again:
-       spin_lock_irq(&log->log_extents_lock[index]);
-       list_for_each_entry(ordered, &log->logged_list[index], log_list) {
+       list_for_each_entry(ordered, logged_list, log_list) {
                struct btrfs_ordered_sum *sum;
 
                if (!mod_len)
                        break;
 
-               if (ordered->inode != inode)
-                       continue;
-
                if (ordered->file_offset + ordered->len <= mod_start ||
                    mod_start + mod_len <= ordered->file_offset)
                        continue;
@@ -3478,27 +3708,20 @@ again:
                if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM,
                                     &ordered->flags))
                        continue;
-               atomic_inc(&ordered->refs);
-               spin_unlock_irq(&log->log_extents_lock[index]);
-               /*
-                * we've dropped the lock, we must either break or
-                * start over after this.
-                */
 
-               wait_event(ordered->wait, ordered->csum_bytes_left == 0);
+               if (ordered->csum_bytes_left) {
+                       btrfs_start_ordered_extent(inode, ordered, 0);
+                       wait_event(ordered->wait,
+                                  ordered->csum_bytes_left == 0);
+               }
 
                list_for_each_entry(sum, &ordered->list, list) {
                        ret = btrfs_csum_file_blocks(trans, log, sum);
-                       if (ret) {
-                               btrfs_put_ordered_extent(ordered);
+                       if (ret)
                                goto unlocked;
-                       }
                }
-               btrfs_put_ordered_extent(ordered);
-               goto again;
 
        }
-       spin_unlock_irq(&log->log_extents_lock[index]);
 unlocked:
 
        if (!mod_len || ret)
@@ -3536,7 +3759,8 @@ unlocked:
 static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
                                     struct btrfs_root *root,
                                     struct inode *inode,
-                                    struct btrfs_path *path)
+                                    struct btrfs_path *path,
+                                    struct list_head *logged_list)
 {
        struct extent_map *em, *n;
        struct list_head extents;
@@ -3594,7 +3818,7 @@ process:
 
                write_unlock(&tree->lock);
 
-               ret = log_one_extent(trans, inode, root, em, path);
+               ret = log_one_extent(trans, inode, root, em, path, logged_list);
                write_lock(&tree->lock);
                clear_em_logging(tree, em);
                free_extent_map(em);
@@ -3630,6 +3854,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
        struct btrfs_key max_key;
        struct btrfs_root *log = root->log_root;
        struct extent_buffer *src = NULL;
+       LIST_HEAD(logged_list);
+       u64 last_extent = 0;
        int err = 0;
        int ret;
        int nritems;
@@ -3677,7 +3903,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 
        mutex_lock(&BTRFS_I(inode)->log_mutex);
 
-       btrfs_get_logged_extents(log, inode);
+       btrfs_get_logged_extents(inode, &logged_list);
 
        /*
         * a brute force approach to making sure we get the most uptodate
@@ -3745,11 +3971,15 @@ again:
                        goto next_slot;
                }
 
-               ret = copy_items(trans, inode, dst_path, src, ins_start_slot,
-                                ins_nr, inode_only);
-               if (ret) {
+               ret = copy_items(trans, inode, dst_path, path, &last_extent,
+                                ins_start_slot, ins_nr, inode_only);
+               if (ret < 0) {
                        err = ret;
                        goto out_unlock;
+               } if (ret) {
+                       ins_nr = 0;
+                       btrfs_release_path(path);
+                       continue;
                }
                ins_nr = 1;
                ins_start_slot = path->slots[0];
@@ -3763,13 +3993,14 @@ next_slot:
                        goto again;
                }
                if (ins_nr) {
-                       ret = copy_items(trans, inode, dst_path, src,
-                                        ins_start_slot,
+                       ret = copy_items(trans, inode, dst_path, path,
+                                        &last_extent, ins_start_slot,
                                         ins_nr, inode_only);
-                       if (ret) {
+                       if (ret < 0) {
                                err = ret;
                                goto out_unlock;
                        }
+                       ret = 0;
                        ins_nr = 0;
                }
                btrfs_release_path(path);
@@ -3784,12 +4015,13 @@ next_slot:
                }
        }
        if (ins_nr) {
-               ret = copy_items(trans, inode, dst_path, src, ins_start_slot,
-                                ins_nr, inode_only);
-               if (ret) {
+               ret = copy_items(trans, inode, dst_path, path, &last_extent,
+                                ins_start_slot, ins_nr, inode_only);
+               if (ret < 0) {
                        err = ret;
                        goto out_unlock;
                }
+               ret = 0;
                ins_nr = 0;
        }
 
@@ -3797,7 +4029,8 @@ log_extents:
        btrfs_release_path(path);
        btrfs_release_path(dst_path);
        if (fast_search) {
-               ret = btrfs_log_changed_extents(trans, root, inode, dst_path);
+               ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
+                                               &logged_list);
                if (ret) {
                        err = ret;
                        goto out_unlock;
@@ -3822,8 +4055,10 @@ log_extents:
        BTRFS_I(inode)->logged_trans = trans->transid;
        BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans;
 out_unlock:
-       if (err)
-               btrfs_free_logged_extents(log, log->log_transid);
+       if (unlikely(err))
+               btrfs_put_logged_extents(&logged_list);
+       else
+               btrfs_submit_logged_extents(&logged_list, log);
        mutex_unlock(&BTRFS_I(inode)->log_mutex);
 
        btrfs_free_path(path);
@@ -3914,7 +4149,8 @@ out:
  */
 static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
                                  struct btrfs_root *root, struct inode *inode,
-                                 struct dentry *parent, int exists_only)
+                                 struct dentry *parent, int exists_only,
+                                 struct btrfs_log_ctx *ctx)
 {
        int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
        struct super_block *sb;
@@ -3951,9 +4187,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
                goto end_no_trans;
        }
 
-       ret = start_log_trans(trans, root);
+       ret = start_log_trans(trans, root, ctx);
        if (ret)
-               goto end_trans;
+               goto end_no_trans;
 
        ret = btrfs_log_inode(trans, root, inode, inode_only);
        if (ret)
@@ -4001,6 +4237,9 @@ end_trans:
                root->fs_info->last_trans_log_full_commit = trans->transid;
                ret = 1;
        }
+
+       if (ret)
+               btrfs_remove_log_ctx(root, ctx);
        btrfs_end_log_trans(root);
 end_no_trans:
        return ret;
@@ -4013,12 +4252,14 @@ end_no_trans:
  * data on disk.
  */
 int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
-                         struct btrfs_root *root, struct dentry *dentry)
+                         struct btrfs_root *root, struct dentry *dentry,
+                         struct btrfs_log_ctx *ctx)
 {
        struct dentry *parent = dget_parent(dentry);
        int ret;
 
-       ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 0);
+       ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent,
+                                    0, ctx);
        dput(parent);
 
        return ret;
@@ -4255,6 +4496,6 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
                    root->fs_info->last_trans_committed))
                return 0;
 
-       return btrfs_log_inode_parent(trans, root, inode, parent, 1);
+       return btrfs_log_inode_parent(trans, root, inode, parent, 1, NULL);
 }