Btrfs: fix BUG_ON in btrfs_submit_compressed_write
[cascardo/linux.git] / fs / btrfs / compression.c
index ff61a41..7a4d9c8 100644 (file)
@@ -402,7 +402,10 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
                        }
 
                        ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
-                       BUG_ON(ret); /* -ENOMEM */
+                       if (ret) {
+                               bio->bi_error = ret;
+                               bio_endio(bio);
+                       }
 
                        bio_put(bio);
 
@@ -432,7 +435,10 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
        }
 
        ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
-       BUG_ON(ret); /* -ENOMEM */
+       if (ret) {
+               bio->bi_error = ret;
+               bio_endio(bio);
+       }
 
        bio_put(bio);
        return 0;
@@ -743,8 +749,11 @@ out:
 static struct {
        struct list_head idle_ws;
        spinlock_t ws_lock;
-       int num_ws;
-       atomic_t alloc_ws;
+       /* Number of free workspaces */
+       int free_ws;
+       /* Total number of allocated workspaces */
+       atomic_t total_ws;
+       /* Waiters for a free workspace */
        wait_queue_head_t ws_wait;
 } btrfs_comp_ws[BTRFS_COMPRESS_TYPES];
 
@@ -758,16 +767,34 @@ void __init btrfs_init_compress(void)
        int i;
 
        for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
+               struct list_head *workspace;
+
                INIT_LIST_HEAD(&btrfs_comp_ws[i].idle_ws);
                spin_lock_init(&btrfs_comp_ws[i].ws_lock);
-               atomic_set(&btrfs_comp_ws[i].alloc_ws, 0);
+               atomic_set(&btrfs_comp_ws[i].total_ws, 0);
                init_waitqueue_head(&btrfs_comp_ws[i].ws_wait);
+
+               /*
+                * Preallocate one workspace for each compression type so
+                * we can guarantee forward progress in the worst case
+                */
+               workspace = btrfs_compress_op[i]->alloc_workspace();
+               if (IS_ERR(workspace)) {
+                       printk(KERN_WARNING
+       "BTRFS: cannot preallocate compression workspace, will try later");
+               } else {
+                       atomic_set(&btrfs_comp_ws[i].total_ws, 1);
+                       btrfs_comp_ws[i].free_ws = 1;
+                       list_add(workspace, &btrfs_comp_ws[i].idle_ws);
+               }
        }
 }
 
 /*
- * this finds an available workspace or allocates a new one
- * ERR_PTR is returned if things go bad.
+ * This finds an available workspace or allocates a new one.
+ * If it's not possible to allocate a new one, waits until there's one.
+ * Preallocation makes a forward progress guarantees and we do not return
+ * errors.
  */
 static struct list_head *find_workspace(int type)
 {
@@ -777,36 +804,58 @@ static struct list_head *find_workspace(int type)
 
        struct list_head *idle_ws       = &btrfs_comp_ws[idx].idle_ws;
        spinlock_t *ws_lock             = &btrfs_comp_ws[idx].ws_lock;
-       atomic_t *alloc_ws              = &btrfs_comp_ws[idx].alloc_ws;
+       atomic_t *total_ws              = &btrfs_comp_ws[idx].total_ws;
        wait_queue_head_t *ws_wait      = &btrfs_comp_ws[idx].ws_wait;
-       int *num_ws                     = &btrfs_comp_ws[idx].num_ws;
+       int *free_ws                    = &btrfs_comp_ws[idx].free_ws;
 again:
        spin_lock(ws_lock);
        if (!list_empty(idle_ws)) {
                workspace = idle_ws->next;
                list_del(workspace);
-               (*num_ws)--;
+               (*free_ws)--;
                spin_unlock(ws_lock);
                return workspace;
 
        }
-       if (atomic_read(alloc_ws) > cpus) {
+       if (atomic_read(total_ws) > cpus) {
                DEFINE_WAIT(wait);
 
                spin_unlock(ws_lock);
                prepare_to_wait(ws_wait, &wait, TASK_UNINTERRUPTIBLE);
-               if (atomic_read(alloc_ws) > cpus && !*num_ws)
+               if (atomic_read(total_ws) > cpus && !*free_ws)
                        schedule();
                finish_wait(ws_wait, &wait);
                goto again;
        }
-       atomic_inc(alloc_ws);
+       atomic_inc(total_ws);
        spin_unlock(ws_lock);
 
        workspace = btrfs_compress_op[idx]->alloc_workspace();
        if (IS_ERR(workspace)) {
-               atomic_dec(alloc_ws);
+               atomic_dec(total_ws);
                wake_up(ws_wait);
+
+               /*
+                * Do not return the error but go back to waiting. There's a
+                * workspace preallocated for each type and the compression
+                * time is bounded so we get to a workspace eventually. This
+                * makes our caller's life easier.
+                *
+                * To prevent silent and low-probability deadlocks (when the
+                * initial preallocation fails), check if there are any
+                * workspaces at all.
+                */
+               if (atomic_read(total_ws) == 0) {
+                       static DEFINE_RATELIMIT_STATE(_rs,
+                                       /* once per minute */ 60 * HZ,
+                                       /* no burst */ 1);
+
+                       if (__ratelimit(&_rs)) {
+                               printk(KERN_WARNING
+                           "no compression workspaces, low memory, retrying");
+                       }
+               }
+               goto again;
        }
        return workspace;
 }
@@ -820,21 +869,21 @@ static void free_workspace(int type, struct list_head *workspace)
        int idx = type - 1;
        struct list_head *idle_ws       = &btrfs_comp_ws[idx].idle_ws;
        spinlock_t *ws_lock             = &btrfs_comp_ws[idx].ws_lock;
-       atomic_t *alloc_ws              = &btrfs_comp_ws[idx].alloc_ws;
+       atomic_t *total_ws              = &btrfs_comp_ws[idx].total_ws;
        wait_queue_head_t *ws_wait      = &btrfs_comp_ws[idx].ws_wait;
-       int *num_ws                     = &btrfs_comp_ws[idx].num_ws;
+       int *free_ws                    = &btrfs_comp_ws[idx].free_ws;
 
        spin_lock(ws_lock);
-       if (*num_ws < num_online_cpus()) {
+       if (*free_ws < num_online_cpus()) {
                list_add(workspace, idle_ws);
-               (*num_ws)++;
+               (*free_ws)++;
                spin_unlock(ws_lock);
                goto wake;
        }
        spin_unlock(ws_lock);
 
        btrfs_compress_op[idx]->free_workspace(workspace);
-       atomic_dec(alloc_ws);
+       atomic_dec(total_ws);
 wake:
        /*
         * Make sure counter is updated before we wake up waiters.
@@ -857,7 +906,7 @@ static void free_workspaces(void)
                        workspace = btrfs_comp_ws[i].idle_ws.next;
                        list_del(workspace);
                        btrfs_compress_op[i]->free_workspace(workspace);
-                       atomic_dec(&btrfs_comp_ws[i].alloc_ws);
+                       atomic_dec(&btrfs_comp_ws[i].total_ws);
                }
        }
 }
@@ -894,8 +943,6 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
        int ret;
 
        workspace = find_workspace(type);
-       if (IS_ERR(workspace))
-               return PTR_ERR(workspace);
 
        ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
                                                      start, len, pages,
@@ -930,8 +977,6 @@ static int btrfs_decompress_biovec(int type, struct page **pages_in,
        int ret;
 
        workspace = find_workspace(type);
-       if (IS_ERR(workspace))
-               return PTR_ERR(workspace);
 
        ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in,
                                                         disk_start,
@@ -952,8 +997,6 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
        int ret;
 
        workspace = find_workspace(type);
-       if (IS_ERR(workspace))
-               return PTR_ERR(workspace);
 
        ret = btrfs_compress_op[type-1]->decompress(workspace, data_in,
                                                  dest_page, start_byte,