Merge branch 'dunlap' (Randy's Documentation patches)
[cascardo/linux.git] / fs / btrfs / free-space-cache.c
1 /*
2  * Copyright (C) 2008 Red Hat.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/pagemap.h>
20 #include <linux/sched.h>
21 #include <linux/slab.h>
22 #include <linux/math64.h>
23 #include <linux/ratelimit.h>
24 #include "ctree.h"
25 #include "free-space-cache.h"
26 #include "transaction.h"
27 #include "disk-io.h"
28 #include "extent_io.h"
29 #include "inode-map.h"
30
31 #define BITS_PER_BITMAP         (PAGE_CACHE_SIZE * 8)
32 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
33
34 static int link_free_space(struct btrfs_free_space_ctl *ctl,
35                            struct btrfs_free_space *info);
36
37 static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
38                                                struct btrfs_path *path,
39                                                u64 offset)
40 {
41         struct btrfs_key key;
42         struct btrfs_key location;
43         struct btrfs_disk_key disk_key;
44         struct btrfs_free_space_header *header;
45         struct extent_buffer *leaf;
46         struct inode *inode = NULL;
47         int ret;
48
49         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
50         key.offset = offset;
51         key.type = 0;
52
53         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
54         if (ret < 0)
55                 return ERR_PTR(ret);
56         if (ret > 0) {
57                 btrfs_release_path(path);
58                 return ERR_PTR(-ENOENT);
59         }
60
61         leaf = path->nodes[0];
62         header = btrfs_item_ptr(leaf, path->slots[0],
63                                 struct btrfs_free_space_header);
64         btrfs_free_space_key(leaf, header, &disk_key);
65         btrfs_disk_key_to_cpu(&location, &disk_key);
66         btrfs_release_path(path);
67
68         inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
69         if (!inode)
70                 return ERR_PTR(-ENOENT);
71         if (IS_ERR(inode))
72                 return inode;
73         if (is_bad_inode(inode)) {
74                 iput(inode);
75                 return ERR_PTR(-ENOENT);
76         }
77
78         inode->i_mapping->flags &= ~__GFP_FS;
79
80         return inode;
81 }
82
83 struct inode *lookup_free_space_inode(struct btrfs_root *root,
84                                       struct btrfs_block_group_cache
85                                       *block_group, struct btrfs_path *path)
86 {
87         struct inode *inode = NULL;
88         u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
89
90         spin_lock(&block_group->lock);
91         if (block_group->inode)
92                 inode = igrab(block_group->inode);
93         spin_unlock(&block_group->lock);
94         if (inode)
95                 return inode;
96
97         inode = __lookup_free_space_inode(root, path,
98                                           block_group->key.objectid);
99         if (IS_ERR(inode))
100                 return inode;
101
102         spin_lock(&block_group->lock);
103         if (!((BTRFS_I(inode)->flags & flags) == flags)) {
104                 printk(KERN_INFO "Old style space inode found, converting.\n");
105                 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM |
106                         BTRFS_INODE_NODATACOW;
107                 block_group->disk_cache_state = BTRFS_DC_CLEAR;
108         }
109
110         if (!block_group->iref) {
111                 block_group->inode = igrab(inode);
112                 block_group->iref = 1;
113         }
114         spin_unlock(&block_group->lock);
115
116         return inode;
117 }
118
119 int __create_free_space_inode(struct btrfs_root *root,
120                               struct btrfs_trans_handle *trans,
121                               struct btrfs_path *path, u64 ino, u64 offset)
122 {
123         struct btrfs_key key;
124         struct btrfs_disk_key disk_key;
125         struct btrfs_free_space_header *header;
126         struct btrfs_inode_item *inode_item;
127         struct extent_buffer *leaf;
128         u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
129         int ret;
130
131         ret = btrfs_insert_empty_inode(trans, root, path, ino);
132         if (ret)
133                 return ret;
134
135         /* We inline crc's for the free disk space cache */
136         if (ino != BTRFS_FREE_INO_OBJECTID)
137                 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
138
139         leaf = path->nodes[0];
140         inode_item = btrfs_item_ptr(leaf, path->slots[0],
141                                     struct btrfs_inode_item);
142         btrfs_item_key(leaf, &disk_key, path->slots[0]);
143         memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
144                              sizeof(*inode_item));
145         btrfs_set_inode_generation(leaf, inode_item, trans->transid);
146         btrfs_set_inode_size(leaf, inode_item, 0);
147         btrfs_set_inode_nbytes(leaf, inode_item, 0);
148         btrfs_set_inode_uid(leaf, inode_item, 0);
149         btrfs_set_inode_gid(leaf, inode_item, 0);
150         btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
151         btrfs_set_inode_flags(leaf, inode_item, flags);
152         btrfs_set_inode_nlink(leaf, inode_item, 1);
153         btrfs_set_inode_transid(leaf, inode_item, trans->transid);
154         btrfs_set_inode_block_group(leaf, inode_item, offset);
155         btrfs_mark_buffer_dirty(leaf);
156         btrfs_release_path(path);
157
158         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
159         key.offset = offset;
160         key.type = 0;
161
162         ret = btrfs_insert_empty_item(trans, root, path, &key,
163                                       sizeof(struct btrfs_free_space_header));
164         if (ret < 0) {
165                 btrfs_release_path(path);
166                 return ret;
167         }
168         leaf = path->nodes[0];
169         header = btrfs_item_ptr(leaf, path->slots[0],
170                                 struct btrfs_free_space_header);
171         memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
172         btrfs_set_free_space_key(leaf, header, &disk_key);
173         btrfs_mark_buffer_dirty(leaf);
174         btrfs_release_path(path);
175
176         return 0;
177 }
178
179 int create_free_space_inode(struct btrfs_root *root,
180                             struct btrfs_trans_handle *trans,
181                             struct btrfs_block_group_cache *block_group,
182                             struct btrfs_path *path)
183 {
184         int ret;
185         u64 ino;
186
187         ret = btrfs_find_free_objectid(root, &ino);
188         if (ret < 0)
189                 return ret;
190
191         return __create_free_space_inode(root, trans, path, ino,
192                                          block_group->key.objectid);
193 }
194
195 int btrfs_truncate_free_space_cache(struct btrfs_root *root,
196                                     struct btrfs_trans_handle *trans,
197                                     struct btrfs_path *path,
198                                     struct inode *inode)
199 {
200         struct btrfs_block_rsv *rsv;
201         u64 needed_bytes;
202         loff_t oldsize;
203         int ret = 0;
204
205         rsv = trans->block_rsv;
206         trans->block_rsv = &root->fs_info->global_block_rsv;
207
208         /* 1 for slack space, 1 for updating the inode */
209         needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) +
210                 btrfs_calc_trans_metadata_size(root, 1);
211
212         spin_lock(&trans->block_rsv->lock);
213         if (trans->block_rsv->reserved < needed_bytes) {
214                 spin_unlock(&trans->block_rsv->lock);
215                 trans->block_rsv = rsv;
216                 return -ENOSPC;
217         }
218         spin_unlock(&trans->block_rsv->lock);
219
220         oldsize = i_size_read(inode);
221         btrfs_i_size_write(inode, 0);
222         truncate_pagecache(inode, oldsize, 0);
223
224         /*
225          * We don't need an orphan item because truncating the free space cache
226          * will never be split across transactions.
227          */
228         ret = btrfs_truncate_inode_items(trans, root, inode,
229                                          0, BTRFS_EXTENT_DATA_KEY);
230
231         if (ret) {
232                 trans->block_rsv = rsv;
233                 btrfs_abort_transaction(trans, root, ret);
234                 return ret;
235         }
236
237         ret = btrfs_update_inode(trans, root, inode);
238         if (ret)
239                 btrfs_abort_transaction(trans, root, ret);
240         trans->block_rsv = rsv;
241
242         return ret;
243 }
244
245 static int readahead_cache(struct inode *inode)
246 {
247         struct file_ra_state *ra;
248         unsigned long last_index;
249
250         ra = kzalloc(sizeof(*ra), GFP_NOFS);
251         if (!ra)
252                 return -ENOMEM;
253
254         file_ra_state_init(ra, inode->i_mapping);
255         last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
256
257         page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
258
259         kfree(ra);
260
261         return 0;
262 }
263
264 struct io_ctl {
265         void *cur, *orig;
266         struct page *page;
267         struct page **pages;
268         struct btrfs_root *root;
269         unsigned long size;
270         int index;
271         int num_pages;
272         unsigned check_crcs:1;
273 };
274
275 static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode,
276                        struct btrfs_root *root)
277 {
278         memset(io_ctl, 0, sizeof(struct io_ctl));
279         io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
280                 PAGE_CACHE_SHIFT;
281         io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages,
282                                 GFP_NOFS);
283         if (!io_ctl->pages)
284                 return -ENOMEM;
285         io_ctl->root = root;
286         if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID)
287                 io_ctl->check_crcs = 1;
288         return 0;
289 }
290
291 static void io_ctl_free(struct io_ctl *io_ctl)
292 {
293         kfree(io_ctl->pages);
294 }
295
296 static void io_ctl_unmap_page(struct io_ctl *io_ctl)
297 {
298         if (io_ctl->cur) {
299                 kunmap(io_ctl->page);
300                 io_ctl->cur = NULL;
301                 io_ctl->orig = NULL;
302         }
303 }
304
305 static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
306 {
307         WARN_ON(io_ctl->cur);
308         BUG_ON(io_ctl->index >= io_ctl->num_pages);
309         io_ctl->page = io_ctl->pages[io_ctl->index++];
310         io_ctl->cur = kmap(io_ctl->page);
311         io_ctl->orig = io_ctl->cur;
312         io_ctl->size = PAGE_CACHE_SIZE;
313         if (clear)
314                 memset(io_ctl->cur, 0, PAGE_CACHE_SIZE);
315 }
316
317 static void io_ctl_drop_pages(struct io_ctl *io_ctl)
318 {
319         int i;
320
321         io_ctl_unmap_page(io_ctl);
322
323         for (i = 0; i < io_ctl->num_pages; i++) {
324                 if (io_ctl->pages[i]) {
325                         ClearPageChecked(io_ctl->pages[i]);
326                         unlock_page(io_ctl->pages[i]);
327                         page_cache_release(io_ctl->pages[i]);
328                 }
329         }
330 }
331
332 static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
333                                 int uptodate)
334 {
335         struct page *page;
336         gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
337         int i;
338
339         for (i = 0; i < io_ctl->num_pages; i++) {
340                 page = find_or_create_page(inode->i_mapping, i, mask);
341                 if (!page) {
342                         io_ctl_drop_pages(io_ctl);
343                         return -ENOMEM;
344                 }
345                 io_ctl->pages[i] = page;
346                 if (uptodate && !PageUptodate(page)) {
347                         btrfs_readpage(NULL, page);
348                         lock_page(page);
349                         if (!PageUptodate(page)) {
350                                 printk(KERN_ERR "btrfs: error reading free "
351                                        "space cache\n");
352                                 io_ctl_drop_pages(io_ctl);
353                                 return -EIO;
354                         }
355                 }
356         }
357
358         for (i = 0; i < io_ctl->num_pages; i++) {
359                 clear_page_dirty_for_io(io_ctl->pages[i]);
360                 set_page_extent_mapped(io_ctl->pages[i]);
361         }
362
363         return 0;
364 }
365
366 static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation)
367 {
368         u64 *val;
369
370         io_ctl_map_page(io_ctl, 1);
371
372         /*
373          * Skip the csum areas.  If we don't check crcs then we just have a
374          * 64bit chunk at the front of the first page.
375          */
376         if (io_ctl->check_crcs) {
377                 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages);
378                 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
379         } else {
380                 io_ctl->cur += sizeof(u64);
381                 io_ctl->size -= sizeof(u64) * 2;
382         }
383
384         val = io_ctl->cur;
385         *val = cpu_to_le64(generation);
386         io_ctl->cur += sizeof(u64);
387 }
388
389 static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
390 {
391         u64 *gen;
392
393         /*
394          * Skip the crc area.  If we don't check crcs then we just have a 64bit
395          * chunk at the front of the first page.
396          */
397         if (io_ctl->check_crcs) {
398                 io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
399                 io_ctl->size -= sizeof(u64) +
400                         (sizeof(u32) * io_ctl->num_pages);
401         } else {
402                 io_ctl->cur += sizeof(u64);
403                 io_ctl->size -= sizeof(u64) * 2;
404         }
405
406         gen = io_ctl->cur;
407         if (le64_to_cpu(*gen) != generation) {
408                 printk_ratelimited(KERN_ERR "btrfs: space cache generation "
409                                    "(%Lu) does not match inode (%Lu)\n", *gen,
410                                    generation);
411                 io_ctl_unmap_page(io_ctl);
412                 return -EIO;
413         }
414         io_ctl->cur += sizeof(u64);
415         return 0;
416 }
417
418 static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
419 {
420         u32 *tmp;
421         u32 crc = ~(u32)0;
422         unsigned offset = 0;
423
424         if (!io_ctl->check_crcs) {
425                 io_ctl_unmap_page(io_ctl);
426                 return;
427         }
428
429         if (index == 0)
430                 offset = sizeof(u32) * io_ctl->num_pages;
431
432         crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
433                               PAGE_CACHE_SIZE - offset);
434         btrfs_csum_final(crc, (char *)&crc);
435         io_ctl_unmap_page(io_ctl);
436         tmp = kmap(io_ctl->pages[0]);
437         tmp += index;
438         *tmp = crc;
439         kunmap(io_ctl->pages[0]);
440 }
441
442 static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
443 {
444         u32 *tmp, val;
445         u32 crc = ~(u32)0;
446         unsigned offset = 0;
447
448         if (!io_ctl->check_crcs) {
449                 io_ctl_map_page(io_ctl, 0);
450                 return 0;
451         }
452
453         if (index == 0)
454                 offset = sizeof(u32) * io_ctl->num_pages;
455
456         tmp = kmap(io_ctl->pages[0]);
457         tmp += index;
458         val = *tmp;
459         kunmap(io_ctl->pages[0]);
460
461         io_ctl_map_page(io_ctl, 0);
462         crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
463                               PAGE_CACHE_SIZE - offset);
464         btrfs_csum_final(crc, (char *)&crc);
465         if (val != crc) {
466                 printk_ratelimited(KERN_ERR "btrfs: csum mismatch on free "
467                                    "space cache\n");
468                 io_ctl_unmap_page(io_ctl);
469                 return -EIO;
470         }
471
472         return 0;
473 }
474
475 static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes,
476                             void *bitmap)
477 {
478         struct btrfs_free_space_entry *entry;
479
480         if (!io_ctl->cur)
481                 return -ENOSPC;
482
483         entry = io_ctl->cur;
484         entry->offset = cpu_to_le64(offset);
485         entry->bytes = cpu_to_le64(bytes);
486         entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP :
487                 BTRFS_FREE_SPACE_EXTENT;
488         io_ctl->cur += sizeof(struct btrfs_free_space_entry);
489         io_ctl->size -= sizeof(struct btrfs_free_space_entry);
490
491         if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
492                 return 0;
493
494         io_ctl_set_crc(io_ctl, io_ctl->index - 1);
495
496         /* No more pages to map */
497         if (io_ctl->index >= io_ctl->num_pages)
498                 return 0;
499
500         /* map the next page */
501         io_ctl_map_page(io_ctl, 1);
502         return 0;
503 }
504
505 static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap)
506 {
507         if (!io_ctl->cur)
508                 return -ENOSPC;
509
510         /*
511          * If we aren't at the start of the current page, unmap this one and
512          * map the next one if there is any left.
513          */
514         if (io_ctl->cur != io_ctl->orig) {
515                 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
516                 if (io_ctl->index >= io_ctl->num_pages)
517                         return -ENOSPC;
518                 io_ctl_map_page(io_ctl, 0);
519         }
520
521         memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE);
522         io_ctl_set_crc(io_ctl, io_ctl->index - 1);
523         if (io_ctl->index < io_ctl->num_pages)
524                 io_ctl_map_page(io_ctl, 0);
525         return 0;
526 }
527
528 static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl)
529 {
530         /*
531          * If we're not on the boundary we know we've modified the page and we
532          * need to crc the page.
533          */
534         if (io_ctl->cur != io_ctl->orig)
535                 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
536         else
537                 io_ctl_unmap_page(io_ctl);
538
539         while (io_ctl->index < io_ctl->num_pages) {
540                 io_ctl_map_page(io_ctl, 1);
541                 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
542         }
543 }
544
545 static int io_ctl_read_entry(struct io_ctl *io_ctl,
546                             struct btrfs_free_space *entry, u8 *type)
547 {
548         struct btrfs_free_space_entry *e;
549         int ret;
550
551         if (!io_ctl->cur) {
552                 ret = io_ctl_check_crc(io_ctl, io_ctl->index);
553                 if (ret)
554                         return ret;
555         }
556
557         e = io_ctl->cur;
558         entry->offset = le64_to_cpu(e->offset);
559         entry->bytes = le64_to_cpu(e->bytes);
560         *type = e->type;
561         io_ctl->cur += sizeof(struct btrfs_free_space_entry);
562         io_ctl->size -= sizeof(struct btrfs_free_space_entry);
563
564         if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
565                 return 0;
566
567         io_ctl_unmap_page(io_ctl);
568
569         return 0;
570 }
571
572 static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
573                               struct btrfs_free_space *entry)
574 {
575         int ret;
576
577         ret = io_ctl_check_crc(io_ctl, io_ctl->index);
578         if (ret)
579                 return ret;
580
581         memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE);
582         io_ctl_unmap_page(io_ctl);
583
584         return 0;
585 }
586
587 int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
588                             struct btrfs_free_space_ctl *ctl,
589                             struct btrfs_path *path, u64 offset)
590 {
591         struct btrfs_free_space_header *header;
592         struct extent_buffer *leaf;
593         struct io_ctl io_ctl;
594         struct btrfs_key key;
595         struct btrfs_free_space *e, *n;
596         struct list_head bitmaps;
597         u64 num_entries;
598         u64 num_bitmaps;
599         u64 generation;
600         u8 type;
601         int ret = 0;
602
603         INIT_LIST_HEAD(&bitmaps);
604
605         /* Nothing in the space cache, goodbye */
606         if (!i_size_read(inode))
607                 return 0;
608
609         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
610         key.offset = offset;
611         key.type = 0;
612
613         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
614         if (ret < 0)
615                 return 0;
616         else if (ret > 0) {
617                 btrfs_release_path(path);
618                 return 0;
619         }
620
621         ret = -1;
622
623         leaf = path->nodes[0];
624         header = btrfs_item_ptr(leaf, path->slots[0],
625                                 struct btrfs_free_space_header);
626         num_entries = btrfs_free_space_entries(leaf, header);
627         num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
628         generation = btrfs_free_space_generation(leaf, header);
629         btrfs_release_path(path);
630
631         if (BTRFS_I(inode)->generation != generation) {
632                 printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
633                        " not match free space cache generation (%llu)\n",
634                        (unsigned long long)BTRFS_I(inode)->generation,
635                        (unsigned long long)generation);
636                 return 0;
637         }
638
639         if (!num_entries)
640                 return 0;
641
642         ret = io_ctl_init(&io_ctl, inode, root);
643         if (ret)
644                 return ret;
645
646         ret = readahead_cache(inode);
647         if (ret)
648                 goto out;
649
650         ret = io_ctl_prepare_pages(&io_ctl, inode, 1);
651         if (ret)
652                 goto out;
653
654         ret = io_ctl_check_crc(&io_ctl, 0);
655         if (ret)
656                 goto free_cache;
657
658         ret = io_ctl_check_generation(&io_ctl, generation);
659         if (ret)
660                 goto free_cache;
661
662         while (num_entries) {
663                 e = kmem_cache_zalloc(btrfs_free_space_cachep,
664                                       GFP_NOFS);
665                 if (!e)
666                         goto free_cache;
667
668                 ret = io_ctl_read_entry(&io_ctl, e, &type);
669                 if (ret) {
670                         kmem_cache_free(btrfs_free_space_cachep, e);
671                         goto free_cache;
672                 }
673
674                 if (!e->bytes) {
675                         kmem_cache_free(btrfs_free_space_cachep, e);
676                         goto free_cache;
677                 }
678
679                 if (type == BTRFS_FREE_SPACE_EXTENT) {
680                         spin_lock(&ctl->tree_lock);
681                         ret = link_free_space(ctl, e);
682                         spin_unlock(&ctl->tree_lock);
683                         if (ret) {
684                                 printk(KERN_ERR "Duplicate entries in "
685                                        "free space cache, dumping\n");
686                                 kmem_cache_free(btrfs_free_space_cachep, e);
687                                 goto free_cache;
688                         }
689                 } else {
690                         BUG_ON(!num_bitmaps);
691                         num_bitmaps--;
692                         e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
693                         if (!e->bitmap) {
694                                 kmem_cache_free(
695                                         btrfs_free_space_cachep, e);
696                                 goto free_cache;
697                         }
698                         spin_lock(&ctl->tree_lock);
699                         ret = link_free_space(ctl, e);
700                         ctl->total_bitmaps++;
701                         ctl->op->recalc_thresholds(ctl);
702                         spin_unlock(&ctl->tree_lock);
703                         if (ret) {
704                                 printk(KERN_ERR "Duplicate entries in "
705                                        "free space cache, dumping\n");
706                                 kmem_cache_free(btrfs_free_space_cachep, e);
707                                 goto free_cache;
708                         }
709                         list_add_tail(&e->list, &bitmaps);
710                 }
711
712                 num_entries--;
713         }
714
715         io_ctl_unmap_page(&io_ctl);
716
717         /*
718          * We add the bitmaps at the end of the entries in order that
719          * the bitmap entries are added to the cache.
720          */
721         list_for_each_entry_safe(e, n, &bitmaps, list) {
722                 list_del_init(&e->list);
723                 ret = io_ctl_read_bitmap(&io_ctl, e);
724                 if (ret)
725                         goto free_cache;
726         }
727
728         io_ctl_drop_pages(&io_ctl);
729         ret = 1;
730 out:
731         io_ctl_free(&io_ctl);
732         return ret;
733 free_cache:
734         io_ctl_drop_pages(&io_ctl);
735         __btrfs_remove_free_space_cache(ctl);
736         goto out;
737 }
738
739 int load_free_space_cache(struct btrfs_fs_info *fs_info,
740                           struct btrfs_block_group_cache *block_group)
741 {
742         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
743         struct btrfs_root *root = fs_info->tree_root;
744         struct inode *inode;
745         struct btrfs_path *path;
746         int ret = 0;
747         bool matched;
748         u64 used = btrfs_block_group_used(&block_group->item);
749
750         /*
751          * If we're unmounting then just return, since this does a search on the
752          * normal root and not the commit root and we could deadlock.
753          */
754         if (btrfs_fs_closing(fs_info))
755                 return 0;
756
757         /*
758          * If this block group has been marked to be cleared for one reason or
759          * another then we can't trust the on disk cache, so just return.
760          */
761         spin_lock(&block_group->lock);
762         if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
763                 spin_unlock(&block_group->lock);
764                 return 0;
765         }
766         spin_unlock(&block_group->lock);
767
768         path = btrfs_alloc_path();
769         if (!path)
770                 return 0;
771
772         inode = lookup_free_space_inode(root, block_group, path);
773         if (IS_ERR(inode)) {
774                 btrfs_free_path(path);
775                 return 0;
776         }
777
778         /* We may have converted the inode and made the cache invalid. */
779         spin_lock(&block_group->lock);
780         if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
781                 spin_unlock(&block_group->lock);
782                 btrfs_free_path(path);
783                 goto out;
784         }
785         spin_unlock(&block_group->lock);
786
787         ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
788                                       path, block_group->key.objectid);
789         btrfs_free_path(path);
790         if (ret <= 0)
791                 goto out;
792
793         spin_lock(&ctl->tree_lock);
794         matched = (ctl->free_space == (block_group->key.offset - used -
795                                        block_group->bytes_super));
796         spin_unlock(&ctl->tree_lock);
797
798         if (!matched) {
799                 __btrfs_remove_free_space_cache(ctl);
800                 printk(KERN_ERR "block group %llu has an wrong amount of free "
801                        "space\n", block_group->key.objectid);
802                 ret = -1;
803         }
804 out:
805         if (ret < 0) {
806                 /* This cache is bogus, make sure it gets cleared */
807                 spin_lock(&block_group->lock);
808                 block_group->disk_cache_state = BTRFS_DC_CLEAR;
809                 spin_unlock(&block_group->lock);
810                 ret = 0;
811
812                 printk(KERN_ERR "btrfs: failed to load free space cache "
813                        "for block group %llu\n", block_group->key.objectid);
814         }
815
816         iput(inode);
817         return ret;
818 }
819
820 /**
821  * __btrfs_write_out_cache - write out cached info to an inode
822  * @root - the root the inode belongs to
823  * @ctl - the free space cache we are going to write out
824  * @block_group - the block_group for this cache if it belongs to a block_group
825  * @trans - the trans handle
826  * @path - the path to use
827  * @offset - the offset for the key we'll insert
828  *
829  * This function writes out a free space cache struct to disk for quick recovery
830  * on mount.  This will return 0 if it was successfull in writing the cache out,
831  * and -1 if it was not.
832  */
833 int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
834                             struct btrfs_free_space_ctl *ctl,
835                             struct btrfs_block_group_cache *block_group,
836                             struct btrfs_trans_handle *trans,
837                             struct btrfs_path *path, u64 offset)
838 {
839         struct btrfs_free_space_header *header;
840         struct extent_buffer *leaf;
841         struct rb_node *node;
842         struct list_head *pos, *n;
843         struct extent_state *cached_state = NULL;
844         struct btrfs_free_cluster *cluster = NULL;
845         struct extent_io_tree *unpin = NULL;
846         struct io_ctl io_ctl;
847         struct list_head bitmap_list;
848         struct btrfs_key key;
849         u64 start, extent_start, extent_end, len;
850         int entries = 0;
851         int bitmaps = 0;
852         int ret;
853         int err = -1;
854
855         INIT_LIST_HEAD(&bitmap_list);
856
857         if (!i_size_read(inode))
858                 return -1;
859
860         ret = io_ctl_init(&io_ctl, inode, root);
861         if (ret)
862                 return -1;
863
864         /* Get the cluster for this block_group if it exists */
865         if (block_group && !list_empty(&block_group->cluster_list))
866                 cluster = list_entry(block_group->cluster_list.next,
867                                      struct btrfs_free_cluster,
868                                      block_group_list);
869
870         /* Lock all pages first so we can lock the extent safely. */
871         io_ctl_prepare_pages(&io_ctl, inode, 0);
872
873         lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
874                          0, &cached_state);
875
876         node = rb_first(&ctl->free_space_offset);
877         if (!node && cluster) {
878                 node = rb_first(&cluster->root);
879                 cluster = NULL;
880         }
881
882         /* Make sure we can fit our crcs into the first page */
883         if (io_ctl.check_crcs &&
884             (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) {
885                 WARN_ON(1);
886                 goto out_nospc;
887         }
888
889         io_ctl_set_generation(&io_ctl, trans->transid);
890
891         /* Write out the extent entries */
892         while (node) {
893                 struct btrfs_free_space *e;
894
895                 e = rb_entry(node, struct btrfs_free_space, offset_index);
896                 entries++;
897
898                 ret = io_ctl_add_entry(&io_ctl, e->offset, e->bytes,
899                                        e->bitmap);
900                 if (ret)
901                         goto out_nospc;
902
903                 if (e->bitmap) {
904                         list_add_tail(&e->list, &bitmap_list);
905                         bitmaps++;
906                 }
907                 node = rb_next(node);
908                 if (!node && cluster) {
909                         node = rb_first(&cluster->root);
910                         cluster = NULL;
911                 }
912         }
913
914         /*
915          * We want to add any pinned extents to our free space cache
916          * so we don't leak the space
917          */
918
919         /*
920          * We shouldn't have switched the pinned extents yet so this is the
921          * right one
922          */
923         unpin = root->fs_info->pinned_extents;
924
925         if (block_group)
926                 start = block_group->key.objectid;
927
928         while (block_group && (start < block_group->key.objectid +
929                                block_group->key.offset)) {
930                 ret = find_first_extent_bit(unpin, start,
931                                             &extent_start, &extent_end,
932                                             EXTENT_DIRTY);
933                 if (ret) {
934                         ret = 0;
935                         break;
936                 }
937
938                 /* This pinned extent is out of our range */
939                 if (extent_start >= block_group->key.objectid +
940                     block_group->key.offset)
941                         break;
942
943                 extent_start = max(extent_start, start);
944                 extent_end = min(block_group->key.objectid +
945                                  block_group->key.offset, extent_end + 1);
946                 len = extent_end - extent_start;
947
948                 entries++;
949                 ret = io_ctl_add_entry(&io_ctl, extent_start, len, NULL);
950                 if (ret)
951                         goto out_nospc;
952
953                 start = extent_end;
954         }
955
956         /* Write out the bitmaps */
957         list_for_each_safe(pos, n, &bitmap_list) {
958                 struct btrfs_free_space *entry =
959                         list_entry(pos, struct btrfs_free_space, list);
960
961                 ret = io_ctl_add_bitmap(&io_ctl, entry->bitmap);
962                 if (ret)
963                         goto out_nospc;
964                 list_del_init(&entry->list);
965         }
966
967         /* Zero out the rest of the pages just to make sure */
968         io_ctl_zero_remaining_pages(&io_ctl);
969
970         ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages,
971                                 0, i_size_read(inode), &cached_state);
972         io_ctl_drop_pages(&io_ctl);
973         unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
974                              i_size_read(inode) - 1, &cached_state, GFP_NOFS);
975
976         if (ret)
977                 goto out;
978
979
980         ret = filemap_write_and_wait(inode->i_mapping);
981         if (ret)
982                 goto out;
983
984         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
985         key.offset = offset;
986         key.type = 0;
987
988         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
989         if (ret < 0) {
990                 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
991                                  EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
992                                  GFP_NOFS);
993                 goto out;
994         }
995         leaf = path->nodes[0];
996         if (ret > 0) {
997                 struct btrfs_key found_key;
998                 BUG_ON(!path->slots[0]);
999                 path->slots[0]--;
1000                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1001                 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
1002                     found_key.offset != offset) {
1003                         clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
1004                                          inode->i_size - 1,
1005                                          EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
1006                                          NULL, GFP_NOFS);
1007                         btrfs_release_path(path);
1008                         goto out;
1009                 }
1010         }
1011
1012         BTRFS_I(inode)->generation = trans->transid;
1013         header = btrfs_item_ptr(leaf, path->slots[0],
1014                                 struct btrfs_free_space_header);
1015         btrfs_set_free_space_entries(leaf, header, entries);
1016         btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
1017         btrfs_set_free_space_generation(leaf, header, trans->transid);
1018         btrfs_mark_buffer_dirty(leaf);
1019         btrfs_release_path(path);
1020
1021         err = 0;
1022 out:
1023         io_ctl_free(&io_ctl);
1024         if (err) {
1025                 invalidate_inode_pages2(inode->i_mapping);
1026                 BTRFS_I(inode)->generation = 0;
1027         }
1028         btrfs_update_inode(trans, root, inode);
1029         return err;
1030
1031 out_nospc:
1032         list_for_each_safe(pos, n, &bitmap_list) {
1033                 struct btrfs_free_space *entry =
1034                         list_entry(pos, struct btrfs_free_space, list);
1035                 list_del_init(&entry->list);
1036         }
1037         io_ctl_drop_pages(&io_ctl);
1038         unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1039                              i_size_read(inode) - 1, &cached_state, GFP_NOFS);
1040         goto out;
1041 }
1042
1043 int btrfs_write_out_cache(struct btrfs_root *root,
1044                           struct btrfs_trans_handle *trans,
1045                           struct btrfs_block_group_cache *block_group,
1046                           struct btrfs_path *path)
1047 {
1048         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1049         struct inode *inode;
1050         int ret = 0;
1051
1052         root = root->fs_info->tree_root;
1053
1054         spin_lock(&block_group->lock);
1055         if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
1056                 spin_unlock(&block_group->lock);
1057                 return 0;
1058         }
1059         spin_unlock(&block_group->lock);
1060
1061         inode = lookup_free_space_inode(root, block_group, path);
1062         if (IS_ERR(inode))
1063                 return 0;
1064
1065         ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
1066                                       path, block_group->key.objectid);
1067         if (ret) {
1068                 spin_lock(&block_group->lock);
1069                 block_group->disk_cache_state = BTRFS_DC_ERROR;
1070                 spin_unlock(&block_group->lock);
1071                 ret = 0;
1072 #ifdef DEBUG
1073                 printk(KERN_ERR "btrfs: failed to write free space cache "
1074                        "for block group %llu\n", block_group->key.objectid);
1075 #endif
1076         }
1077
1078         iput(inode);
1079         return ret;
1080 }
1081
1082 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
1083                                           u64 offset)
1084 {
1085         BUG_ON(offset < bitmap_start);
1086         offset -= bitmap_start;
1087         return (unsigned long)(div_u64(offset, unit));
1088 }
1089
1090 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
1091 {
1092         return (unsigned long)(div_u64(bytes, unit));
1093 }
1094
1095 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
1096                                    u64 offset)
1097 {
1098         u64 bitmap_start;
1099         u64 bytes_per_bitmap;
1100
1101         bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
1102         bitmap_start = offset - ctl->start;
1103         bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
1104         bitmap_start *= bytes_per_bitmap;
1105         bitmap_start += ctl->start;
1106
1107         return bitmap_start;
1108 }
1109
1110 static int tree_insert_offset(struct rb_root *root, u64 offset,
1111                               struct rb_node *node, int bitmap)
1112 {
1113         struct rb_node **p = &root->rb_node;
1114         struct rb_node *parent = NULL;
1115         struct btrfs_free_space *info;
1116
1117         while (*p) {
1118                 parent = *p;
1119                 info = rb_entry(parent, struct btrfs_free_space, offset_index);
1120
1121                 if (offset < info->offset) {
1122                         p = &(*p)->rb_left;
1123                 } else if (offset > info->offset) {
1124                         p = &(*p)->rb_right;
1125                 } else {
1126                         /*
1127                          * we could have a bitmap entry and an extent entry
1128                          * share the same offset.  If this is the case, we want
1129                          * the extent entry to always be found first if we do a
1130                          * linear search through the tree, since we want to have
1131                          * the quickest allocation time, and allocating from an
1132                          * extent is faster than allocating from a bitmap.  So
1133                          * if we're inserting a bitmap and we find an entry at
1134                          * this offset, we want to go right, or after this entry
1135                          * logically.  If we are inserting an extent and we've
1136                          * found a bitmap, we want to go left, or before
1137                          * logically.
1138                          */
1139                         if (bitmap) {
1140                                 if (info->bitmap) {
1141                                         WARN_ON_ONCE(1);
1142                                         return -EEXIST;
1143                                 }
1144                                 p = &(*p)->rb_right;
1145                         } else {
1146                                 if (!info->bitmap) {
1147                                         WARN_ON_ONCE(1);
1148                                         return -EEXIST;
1149                                 }
1150                                 p = &(*p)->rb_left;
1151                         }
1152                 }
1153         }
1154
1155         rb_link_node(node, parent, p);
1156         rb_insert_color(node, root);
1157
1158         return 0;
1159 }
1160
1161 /*
1162  * searches the tree for the given offset.
1163  *
1164  * fuzzy - If this is set, then we are trying to make an allocation, and we just
1165  * want a section that has at least bytes size and comes at or after the given
1166  * offset.
1167  */
1168 static struct btrfs_free_space *
1169 tree_search_offset(struct btrfs_free_space_ctl *ctl,
1170                    u64 offset, int bitmap_only, int fuzzy)
1171 {
1172         struct rb_node *n = ctl->free_space_offset.rb_node;
1173         struct btrfs_free_space *entry, *prev = NULL;
1174
1175         /* find entry that is closest to the 'offset' */
1176         while (1) {
1177                 if (!n) {
1178                         entry = NULL;
1179                         break;
1180                 }
1181
1182                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1183                 prev = entry;
1184
1185                 if (offset < entry->offset)
1186                         n = n->rb_left;
1187                 else if (offset > entry->offset)
1188                         n = n->rb_right;
1189                 else
1190                         break;
1191         }
1192
1193         if (bitmap_only) {
1194                 if (!entry)
1195                         return NULL;
1196                 if (entry->bitmap)
1197                         return entry;
1198
1199                 /*
1200                  * bitmap entry and extent entry may share same offset,
1201                  * in that case, bitmap entry comes after extent entry.
1202                  */
1203                 n = rb_next(n);
1204                 if (!n)
1205                         return NULL;
1206                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1207                 if (entry->offset != offset)
1208                         return NULL;
1209
1210                 WARN_ON(!entry->bitmap);
1211                 return entry;
1212         } else if (entry) {
1213                 if (entry->bitmap) {
1214                         /*
1215                          * if previous extent entry covers the offset,
1216                          * we should return it instead of the bitmap entry
1217                          */
1218                         n = &entry->offset_index;
1219                         while (1) {
1220                                 n = rb_prev(n);
1221                                 if (!n)
1222                                         break;
1223                                 prev = rb_entry(n, struct btrfs_free_space,
1224                                                 offset_index);
1225                                 if (!prev->bitmap) {
1226                                         if (prev->offset + prev->bytes > offset)
1227                                                 entry = prev;
1228                                         break;
1229                                 }
1230                         }
1231                 }
1232                 return entry;
1233         }
1234
1235         if (!prev)
1236                 return NULL;
1237
1238         /* find last entry before the 'offset' */
1239         entry = prev;
1240         if (entry->offset > offset) {
1241                 n = rb_prev(&entry->offset_index);
1242                 if (n) {
1243                         entry = rb_entry(n, struct btrfs_free_space,
1244                                         offset_index);
1245                         BUG_ON(entry->offset > offset);
1246                 } else {
1247                         if (fuzzy)
1248                                 return entry;
1249                         else
1250                                 return NULL;
1251                 }
1252         }
1253
1254         if (entry->bitmap) {
1255                 n = &entry->offset_index;
1256                 while (1) {
1257                         n = rb_prev(n);
1258                         if (!n)
1259                                 break;
1260                         prev = rb_entry(n, struct btrfs_free_space,
1261                                         offset_index);
1262                         if (!prev->bitmap) {
1263                                 if (prev->offset + prev->bytes > offset)
1264                                         return prev;
1265                                 break;
1266                         }
1267                 }
1268                 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
1269                         return entry;
1270         } else if (entry->offset + entry->bytes > offset)
1271                 return entry;
1272
1273         if (!fuzzy)
1274                 return NULL;
1275
1276         while (1) {
1277                 if (entry->bitmap) {
1278                         if (entry->offset + BITS_PER_BITMAP *
1279                             ctl->unit > offset)
1280                                 break;
1281                 } else {
1282                         if (entry->offset + entry->bytes > offset)
1283                                 break;
1284                 }
1285
1286                 n = rb_next(&entry->offset_index);
1287                 if (!n)
1288                         return NULL;
1289                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1290         }
1291         return entry;
1292 }
1293
1294 static inline void
1295 __unlink_free_space(struct btrfs_free_space_ctl *ctl,
1296                     struct btrfs_free_space *info)
1297 {
1298         rb_erase(&info->offset_index, &ctl->free_space_offset);
1299         ctl->free_extents--;
1300 }
1301
1302 static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
1303                               struct btrfs_free_space *info)
1304 {
1305         __unlink_free_space(ctl, info);
1306         ctl->free_space -= info->bytes;
1307 }
1308
1309 static int link_free_space(struct btrfs_free_space_ctl *ctl,
1310                            struct btrfs_free_space *info)
1311 {
1312         int ret = 0;
1313
1314         BUG_ON(!info->bitmap && !info->bytes);
1315         ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
1316                                  &info->offset_index, (info->bitmap != NULL));
1317         if (ret)
1318                 return ret;
1319
1320         ctl->free_space += info->bytes;
1321         ctl->free_extents++;
1322         return ret;
1323 }
1324
1325 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1326 {
1327         struct btrfs_block_group_cache *block_group = ctl->private;
1328         u64 max_bytes;
1329         u64 bitmap_bytes;
1330         u64 extent_bytes;
1331         u64 size = block_group->key.offset;
1332         u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
1333         int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
1334
1335         BUG_ON(ctl->total_bitmaps > max_bitmaps);
1336
1337         /*
1338          * The goal is to keep the total amount of memory used per 1gb of space
1339          * at or below 32k, so we need to adjust how much memory we allow to be
1340          * used by extent based free space tracking
1341          */
1342         if (size < 1024 * 1024 * 1024)
1343                 max_bytes = MAX_CACHE_BYTES_PER_GIG;
1344         else
1345                 max_bytes = MAX_CACHE_BYTES_PER_GIG *
1346                         div64_u64(size, 1024 * 1024 * 1024);
1347
1348         /*
1349          * we want to account for 1 more bitmap than what we have so we can make
1350          * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
1351          * we add more bitmaps.
1352          */
1353         bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE;
1354
1355         if (bitmap_bytes >= max_bytes) {
1356                 ctl->extents_thresh = 0;
1357                 return;
1358         }
1359
1360         /*
1361          * we want the extent entry threshold to always be at most 1/2 the maxw
1362          * bytes we can have, or whatever is less than that.
1363          */
1364         extent_bytes = max_bytes - bitmap_bytes;
1365         extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
1366
1367         ctl->extents_thresh =
1368                 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
1369 }
1370
1371 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1372                                        struct btrfs_free_space *info,
1373                                        u64 offset, u64 bytes)
1374 {
1375         unsigned long start, count;
1376
1377         start = offset_to_bit(info->offset, ctl->unit, offset);
1378         count = bytes_to_bits(bytes, ctl->unit);
1379         BUG_ON(start + count > BITS_PER_BITMAP);
1380
1381         bitmap_clear(info->bitmap, start, count);
1382
1383         info->bytes -= bytes;
1384 }
1385
1386 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1387                               struct btrfs_free_space *info, u64 offset,
1388                               u64 bytes)
1389 {
1390         __bitmap_clear_bits(ctl, info, offset, bytes);
1391         ctl->free_space -= bytes;
1392 }
1393
1394 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
1395                             struct btrfs_free_space *info, u64 offset,
1396                             u64 bytes)
1397 {
1398         unsigned long start, count;
1399
1400         start = offset_to_bit(info->offset, ctl->unit, offset);
1401         count = bytes_to_bits(bytes, ctl->unit);
1402         BUG_ON(start + count > BITS_PER_BITMAP);
1403
1404         bitmap_set(info->bitmap, start, count);
1405
1406         info->bytes += bytes;
1407         ctl->free_space += bytes;
1408 }
1409
1410 static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1411                          struct btrfs_free_space *bitmap_info, u64 *offset,
1412                          u64 *bytes)
1413 {
1414         unsigned long found_bits = 0;
1415         unsigned long bits, i;
1416         unsigned long next_zero;
1417
1418         i = offset_to_bit(bitmap_info->offset, ctl->unit,
1419                           max_t(u64, *offset, bitmap_info->offset));
1420         bits = bytes_to_bits(*bytes, ctl->unit);
1421
1422         for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
1423              i < BITS_PER_BITMAP;
1424              i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
1425                 next_zero = find_next_zero_bit(bitmap_info->bitmap,
1426                                                BITS_PER_BITMAP, i);
1427                 if ((next_zero - i) >= bits) {
1428                         found_bits = next_zero - i;
1429                         break;
1430                 }
1431                 i = next_zero;
1432         }
1433
1434         if (found_bits) {
1435                 *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1436                 *bytes = (u64)(found_bits) * ctl->unit;
1437                 return 0;
1438         }
1439
1440         return -1;
1441 }
1442
1443 static struct btrfs_free_space *
1444 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes)
1445 {
1446         struct btrfs_free_space *entry;
1447         struct rb_node *node;
1448         int ret;
1449
1450         if (!ctl->free_space_offset.rb_node)
1451                 return NULL;
1452
1453         entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
1454         if (!entry)
1455                 return NULL;
1456
1457         for (node = &entry->offset_index; node; node = rb_next(node)) {
1458                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1459                 if (entry->bytes < *bytes)
1460                         continue;
1461
1462                 if (entry->bitmap) {
1463                         ret = search_bitmap(ctl, entry, offset, bytes);
1464                         if (!ret)
1465                                 return entry;
1466                         continue;
1467                 }
1468
1469                 *offset = entry->offset;
1470                 *bytes = entry->bytes;
1471                 return entry;
1472         }
1473
1474         return NULL;
1475 }
1476
1477 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
1478                            struct btrfs_free_space *info, u64 offset)
1479 {
1480         info->offset = offset_to_bitmap(ctl, offset);
1481         info->bytes = 0;
1482         INIT_LIST_HEAD(&info->list);
1483         link_free_space(ctl, info);
1484         ctl->total_bitmaps++;
1485
1486         ctl->op->recalc_thresholds(ctl);
1487 }
1488
1489 static void free_bitmap(struct btrfs_free_space_ctl *ctl,
1490                         struct btrfs_free_space *bitmap_info)
1491 {
1492         unlink_free_space(ctl, bitmap_info);
1493         kfree(bitmap_info->bitmap);
1494         kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
1495         ctl->total_bitmaps--;
1496         ctl->op->recalc_thresholds(ctl);
1497 }
1498
1499 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
1500                               struct btrfs_free_space *bitmap_info,
1501                               u64 *offset, u64 *bytes)
1502 {
1503         u64 end;
1504         u64 search_start, search_bytes;
1505         int ret;
1506
1507 again:
1508         end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
1509
1510         /*
1511          * XXX - this can go away after a few releases.
1512          *
1513          * since the only user of btrfs_remove_free_space is the tree logging
1514          * stuff, and the only way to test that is under crash conditions, we
1515          * want to have this debug stuff here just in case somethings not
1516          * working.  Search the bitmap for the space we are trying to use to
1517          * make sure its actually there.  If its not there then we need to stop
1518          * because something has gone wrong.
1519          */
1520         search_start = *offset;
1521         search_bytes = *bytes;
1522         search_bytes = min(search_bytes, end - search_start + 1);
1523         ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
1524         BUG_ON(ret < 0 || search_start != *offset);
1525
1526         if (*offset > bitmap_info->offset && *offset + *bytes > end) {
1527                 bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1);
1528                 *bytes -= end - *offset + 1;
1529                 *offset = end + 1;
1530         } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
1531                 bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes);
1532                 *bytes = 0;
1533         }
1534
1535         if (*bytes) {
1536                 struct rb_node *next = rb_next(&bitmap_info->offset_index);
1537                 if (!bitmap_info->bytes)
1538                         free_bitmap(ctl, bitmap_info);
1539
1540                 /*
1541                  * no entry after this bitmap, but we still have bytes to
1542                  * remove, so something has gone wrong.
1543                  */
1544                 if (!next)
1545                         return -EINVAL;
1546
1547                 bitmap_info = rb_entry(next, struct btrfs_free_space,
1548                                        offset_index);
1549
1550                 /*
1551                  * if the next entry isn't a bitmap we need to return to let the
1552                  * extent stuff do its work.
1553                  */
1554                 if (!bitmap_info->bitmap)
1555                         return -EAGAIN;
1556
1557                 /*
1558                  * Ok the next item is a bitmap, but it may not actually hold
1559                  * the information for the rest of this free space stuff, so
1560                  * look for it, and if we don't find it return so we can try
1561                  * everything over again.
1562                  */
1563                 search_start = *offset;
1564                 search_bytes = *bytes;
1565                 ret = search_bitmap(ctl, bitmap_info, &search_start,
1566                                     &search_bytes);
1567                 if (ret < 0 || search_start != *offset)
1568                         return -EAGAIN;
1569
1570                 goto again;
1571         } else if (!bitmap_info->bytes)
1572                 free_bitmap(ctl, bitmap_info);
1573
1574         return 0;
1575 }
1576
1577 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
1578                                struct btrfs_free_space *info, u64 offset,
1579                                u64 bytes)
1580 {
1581         u64 bytes_to_set = 0;
1582         u64 end;
1583
1584         end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
1585
1586         bytes_to_set = min(end - offset, bytes);
1587
1588         bitmap_set_bits(ctl, info, offset, bytes_to_set);
1589
1590         return bytes_to_set;
1591
1592 }
1593
1594 static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1595                       struct btrfs_free_space *info)
1596 {
1597         struct btrfs_block_group_cache *block_group = ctl->private;
1598
1599         /*
1600          * If we are below the extents threshold then we can add this as an
1601          * extent, and don't have to deal with the bitmap
1602          */
1603         if (ctl->free_extents < ctl->extents_thresh) {
1604                 /*
1605                  * If this block group has some small extents we don't want to
1606                  * use up all of our free slots in the cache with them, we want
1607                  * to reserve them to larger extents, however if we have plent
1608                  * of cache left then go ahead an dadd them, no sense in adding
1609                  * the overhead of a bitmap if we don't have to.
1610                  */
1611                 if (info->bytes <= block_group->sectorsize * 4) {
1612                         if (ctl->free_extents * 2 <= ctl->extents_thresh)
1613                                 return false;
1614                 } else {
1615                         return false;
1616                 }
1617         }
1618
1619         /*
1620          * some block groups are so tiny they can't be enveloped by a bitmap, so
1621          * don't even bother to create a bitmap for this
1622          */
1623         if (BITS_PER_BITMAP * block_group->sectorsize >
1624             block_group->key.offset)
1625                 return false;
1626
1627         return true;
1628 }
1629
1630 static struct btrfs_free_space_op free_space_op = {
1631         .recalc_thresholds      = recalculate_thresholds,
1632         .use_bitmap             = use_bitmap,
1633 };
1634
1635 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1636                               struct btrfs_free_space *info)
1637 {
1638         struct btrfs_free_space *bitmap_info;
1639         struct btrfs_block_group_cache *block_group = NULL;
1640         int added = 0;
1641         u64 bytes, offset, bytes_added;
1642         int ret;
1643
1644         bytes = info->bytes;
1645         offset = info->offset;
1646
1647         if (!ctl->op->use_bitmap(ctl, info))
1648                 return 0;
1649
1650         if (ctl->op == &free_space_op)
1651                 block_group = ctl->private;
1652 again:
1653         /*
1654          * Since we link bitmaps right into the cluster we need to see if we
1655          * have a cluster here, and if so and it has our bitmap we need to add
1656          * the free space to that bitmap.
1657          */
1658         if (block_group && !list_empty(&block_group->cluster_list)) {
1659                 struct btrfs_free_cluster *cluster;
1660                 struct rb_node *node;
1661                 struct btrfs_free_space *entry;
1662
1663                 cluster = list_entry(block_group->cluster_list.next,
1664                                      struct btrfs_free_cluster,
1665                                      block_group_list);
1666                 spin_lock(&cluster->lock);
1667                 node = rb_first(&cluster->root);
1668                 if (!node) {
1669                         spin_unlock(&cluster->lock);
1670                         goto no_cluster_bitmap;
1671                 }
1672
1673                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1674                 if (!entry->bitmap) {
1675                         spin_unlock(&cluster->lock);
1676                         goto no_cluster_bitmap;
1677                 }
1678
1679                 if (entry->offset == offset_to_bitmap(ctl, offset)) {
1680                         bytes_added = add_bytes_to_bitmap(ctl, entry,
1681                                                           offset, bytes);
1682                         bytes -= bytes_added;
1683                         offset += bytes_added;
1684                 }
1685                 spin_unlock(&cluster->lock);
1686                 if (!bytes) {
1687                         ret = 1;
1688                         goto out;
1689                 }
1690         }
1691
1692 no_cluster_bitmap:
1693         bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1694                                          1, 0);
1695         if (!bitmap_info) {
1696                 BUG_ON(added);
1697                 goto new_bitmap;
1698         }
1699
1700         bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
1701         bytes -= bytes_added;
1702         offset += bytes_added;
1703         added = 0;
1704
1705         if (!bytes) {
1706                 ret = 1;
1707                 goto out;
1708         } else
1709                 goto again;
1710
1711 new_bitmap:
1712         if (info && info->bitmap) {
1713                 add_new_bitmap(ctl, info, offset);
1714                 added = 1;
1715                 info = NULL;
1716                 goto again;
1717         } else {
1718                 spin_unlock(&ctl->tree_lock);
1719
1720                 /* no pre-allocated info, allocate a new one */
1721                 if (!info) {
1722                         info = kmem_cache_zalloc(btrfs_free_space_cachep,
1723                                                  GFP_NOFS);
1724                         if (!info) {
1725                                 spin_lock(&ctl->tree_lock);
1726                                 ret = -ENOMEM;
1727                                 goto out;
1728                         }
1729                 }
1730
1731                 /* allocate the bitmap */
1732                 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
1733                 spin_lock(&ctl->tree_lock);
1734                 if (!info->bitmap) {
1735                         ret = -ENOMEM;
1736                         goto out;
1737                 }
1738                 goto again;
1739         }
1740
1741 out:
1742         if (info) {
1743                 if (info->bitmap)
1744                         kfree(info->bitmap);
1745                 kmem_cache_free(btrfs_free_space_cachep, info);
1746         }
1747
1748         return ret;
1749 }
1750
1751 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
1752                           struct btrfs_free_space *info, bool update_stat)
1753 {
1754         struct btrfs_free_space *left_info;
1755         struct btrfs_free_space *right_info;
1756         bool merged = false;
1757         u64 offset = info->offset;
1758         u64 bytes = info->bytes;
1759
1760         /*
1761          * first we want to see if there is free space adjacent to the range we
1762          * are adding, if there is remove that struct and add a new one to
1763          * cover the entire range
1764          */
1765         right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
1766         if (right_info && rb_prev(&right_info->offset_index))
1767                 left_info = rb_entry(rb_prev(&right_info->offset_index),
1768                                      struct btrfs_free_space, offset_index);
1769         else
1770                 left_info = tree_search_offset(ctl, offset - 1, 0, 0);
1771
1772         if (right_info && !right_info->bitmap) {
1773                 if (update_stat)
1774                         unlink_free_space(ctl, right_info);
1775                 else
1776                         __unlink_free_space(ctl, right_info);
1777                 info->bytes += right_info->bytes;
1778                 kmem_cache_free(btrfs_free_space_cachep, right_info);
1779                 merged = true;
1780         }
1781
1782         if (left_info && !left_info->bitmap &&
1783             left_info->offset + left_info->bytes == offset) {
1784                 if (update_stat)
1785                         unlink_free_space(ctl, left_info);
1786                 else
1787                         __unlink_free_space(ctl, left_info);
1788                 info->offset = left_info->offset;
1789                 info->bytes += left_info->bytes;
1790                 kmem_cache_free(btrfs_free_space_cachep, left_info);
1791                 merged = true;
1792         }
1793
1794         return merged;
1795 }
1796
1797 int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
1798                            u64 offset, u64 bytes)
1799 {
1800         struct btrfs_free_space *info;
1801         int ret = 0;
1802
1803         info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
1804         if (!info)
1805                 return -ENOMEM;
1806
1807         info->offset = offset;
1808         info->bytes = bytes;
1809
1810         spin_lock(&ctl->tree_lock);
1811
1812         if (try_merge_free_space(ctl, info, true))
1813                 goto link;
1814
1815         /*
1816          * There was no extent directly to the left or right of this new
1817          * extent then we know we're going to have to allocate a new extent, so
1818          * before we do that see if we need to drop this into a bitmap
1819          */
1820         ret = insert_into_bitmap(ctl, info);
1821         if (ret < 0) {
1822                 goto out;
1823         } else if (ret) {
1824                 ret = 0;
1825                 goto out;
1826         }
1827 link:
1828         ret = link_free_space(ctl, info);
1829         if (ret)
1830                 kmem_cache_free(btrfs_free_space_cachep, info);
1831 out:
1832         spin_unlock(&ctl->tree_lock);
1833
1834         if (ret) {
1835                 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
1836                 BUG_ON(ret == -EEXIST);
1837         }
1838
1839         return ret;
1840 }
1841
1842 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1843                             u64 offset, u64 bytes)
1844 {
1845         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1846         struct btrfs_free_space *info;
1847         struct btrfs_free_space *next_info = NULL;
1848         int ret = 0;
1849
1850         spin_lock(&ctl->tree_lock);
1851
1852 again:
1853         info = tree_search_offset(ctl, offset, 0, 0);
1854         if (!info) {
1855                 /*
1856                  * oops didn't find an extent that matched the space we wanted
1857                  * to remove, look for a bitmap instead
1858                  */
1859                 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1860                                           1, 0);
1861                 if (!info) {
1862                         /* the tree logging code might be calling us before we
1863                          * have fully loaded the free space rbtree for this
1864                          * block group.  So it is possible the entry won't
1865                          * be in the rbtree yet at all.  The caching code
1866                          * will make sure not to put it in the rbtree if
1867                          * the logging code has pinned it.
1868                          */
1869                         goto out_lock;
1870                 }
1871         }
1872
1873         if (info->bytes < bytes && rb_next(&info->offset_index)) {
1874                 u64 end;
1875                 next_info = rb_entry(rb_next(&info->offset_index),
1876                                              struct btrfs_free_space,
1877                                              offset_index);
1878
1879                 if (next_info->bitmap)
1880                         end = next_info->offset +
1881                               BITS_PER_BITMAP * ctl->unit - 1;
1882                 else
1883                         end = next_info->offset + next_info->bytes;
1884
1885                 if (next_info->bytes < bytes ||
1886                     next_info->offset > offset || offset > end) {
1887                         printk(KERN_CRIT "Found free space at %llu, size %llu,"
1888                               " trying to use %llu\n",
1889                               (unsigned long long)info->offset,
1890                               (unsigned long long)info->bytes,
1891                               (unsigned long long)bytes);
1892                         WARN_ON(1);
1893                         ret = -EINVAL;
1894                         goto out_lock;
1895                 }
1896
1897                 info = next_info;
1898         }
1899
1900         if (info->bytes == bytes) {
1901                 unlink_free_space(ctl, info);
1902                 if (info->bitmap) {
1903                         kfree(info->bitmap);
1904                         ctl->total_bitmaps--;
1905                 }
1906                 kmem_cache_free(btrfs_free_space_cachep, info);
1907                 ret = 0;
1908                 goto out_lock;
1909         }
1910
1911         if (!info->bitmap && info->offset == offset) {
1912                 unlink_free_space(ctl, info);
1913                 info->offset += bytes;
1914                 info->bytes -= bytes;
1915                 ret = link_free_space(ctl, info);
1916                 WARN_ON(ret);
1917                 goto out_lock;
1918         }
1919
1920         if (!info->bitmap && info->offset <= offset &&
1921             info->offset + info->bytes >= offset + bytes) {
1922                 u64 old_start = info->offset;
1923                 /*
1924                  * we're freeing space in the middle of the info,
1925                  * this can happen during tree log replay
1926                  *
1927                  * first unlink the old info and then
1928                  * insert it again after the hole we're creating
1929                  */
1930                 unlink_free_space(ctl, info);
1931                 if (offset + bytes < info->offset + info->bytes) {
1932                         u64 old_end = info->offset + info->bytes;
1933
1934                         info->offset = offset + bytes;
1935                         info->bytes = old_end - info->offset;
1936                         ret = link_free_space(ctl, info);
1937                         WARN_ON(ret);
1938                         if (ret)
1939                                 goto out_lock;
1940                 } else {
1941                         /* the hole we're creating ends at the end
1942                          * of the info struct, just free the info
1943                          */
1944                         kmem_cache_free(btrfs_free_space_cachep, info);
1945                 }
1946                 spin_unlock(&ctl->tree_lock);
1947
1948                 /* step two, insert a new info struct to cover
1949                  * anything before the hole
1950                  */
1951                 ret = btrfs_add_free_space(block_group, old_start,
1952                                            offset - old_start);
1953                 WARN_ON(ret); /* -ENOMEM */
1954                 goto out;
1955         }
1956
1957         ret = remove_from_bitmap(ctl, info, &offset, &bytes);
1958         if (ret == -EAGAIN)
1959                 goto again;
1960         BUG_ON(ret); /* logic error */
1961 out_lock:
1962         spin_unlock(&ctl->tree_lock);
1963 out:
1964         return ret;
1965 }
1966
1967 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1968                            u64 bytes)
1969 {
1970         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1971         struct btrfs_free_space *info;
1972         struct rb_node *n;
1973         int count = 0;
1974
1975         for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
1976                 info = rb_entry(n, struct btrfs_free_space, offset_index);
1977                 if (info->bytes >= bytes)
1978                         count++;
1979                 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
1980                        (unsigned long long)info->offset,
1981                        (unsigned long long)info->bytes,
1982                        (info->bitmap) ? "yes" : "no");
1983         }
1984         printk(KERN_INFO "block group has cluster?: %s\n",
1985                list_empty(&block_group->cluster_list) ? "no" : "yes");
1986         printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
1987                "\n", count);
1988 }
1989
1990 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
1991 {
1992         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1993
1994         spin_lock_init(&ctl->tree_lock);
1995         ctl->unit = block_group->sectorsize;
1996         ctl->start = block_group->key.objectid;
1997         ctl->private = block_group;
1998         ctl->op = &free_space_op;
1999
2000         /*
2001          * we only want to have 32k of ram per block group for keeping
2002          * track of free space, and if we pass 1/2 of that we want to
2003          * start converting things over to using bitmaps
2004          */
2005         ctl->extents_thresh = ((1024 * 32) / 2) /
2006                                 sizeof(struct btrfs_free_space);
2007 }
2008
2009 /*
2010  * for a given cluster, put all of its extents back into the free
2011  * space cache.  If the block group passed doesn't match the block group
2012  * pointed to by the cluster, someone else raced in and freed the
2013  * cluster already.  In that case, we just return without changing anything
2014  */
2015 static int
2016 __btrfs_return_cluster_to_free_space(
2017                              struct btrfs_block_group_cache *block_group,
2018                              struct btrfs_free_cluster *cluster)
2019 {
2020         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2021         struct btrfs_free_space *entry;
2022         struct rb_node *node;
2023
2024         spin_lock(&cluster->lock);
2025         if (cluster->block_group != block_group)
2026                 goto out;
2027
2028         cluster->block_group = NULL;
2029         cluster->window_start = 0;
2030         list_del_init(&cluster->block_group_list);
2031
2032         node = rb_first(&cluster->root);
2033         while (node) {
2034                 bool bitmap;
2035
2036                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2037                 node = rb_next(&entry->offset_index);
2038                 rb_erase(&entry->offset_index, &cluster->root);
2039
2040                 bitmap = (entry->bitmap != NULL);
2041                 if (!bitmap)
2042                         try_merge_free_space(ctl, entry, false);
2043                 tree_insert_offset(&ctl->free_space_offset,
2044                                    entry->offset, &entry->offset_index, bitmap);
2045         }
2046         cluster->root = RB_ROOT;
2047
2048 out:
2049         spin_unlock(&cluster->lock);
2050         btrfs_put_block_group(block_group);
2051         return 0;
2052 }
2053
2054 void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
2055 {
2056         struct btrfs_free_space *info;
2057         struct rb_node *node;
2058
2059         while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
2060                 info = rb_entry(node, struct btrfs_free_space, offset_index);
2061                 if (!info->bitmap) {
2062                         unlink_free_space(ctl, info);
2063                         kmem_cache_free(btrfs_free_space_cachep, info);
2064                 } else {
2065                         free_bitmap(ctl, info);
2066                 }
2067                 if (need_resched()) {
2068                         spin_unlock(&ctl->tree_lock);
2069                         cond_resched();
2070                         spin_lock(&ctl->tree_lock);
2071                 }
2072         }
2073 }
2074
2075 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
2076 {
2077         spin_lock(&ctl->tree_lock);
2078         __btrfs_remove_free_space_cache_locked(ctl);
2079         spin_unlock(&ctl->tree_lock);
2080 }
2081
2082 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
2083 {
2084         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2085         struct btrfs_free_cluster *cluster;
2086         struct list_head *head;
2087
2088         spin_lock(&ctl->tree_lock);
2089         while ((head = block_group->cluster_list.next) !=
2090                &block_group->cluster_list) {
2091                 cluster = list_entry(head, struct btrfs_free_cluster,
2092                                      block_group_list);
2093
2094                 WARN_ON(cluster->block_group != block_group);
2095                 __btrfs_return_cluster_to_free_space(block_group, cluster);
2096                 if (need_resched()) {
2097                         spin_unlock(&ctl->tree_lock);
2098                         cond_resched();
2099                         spin_lock(&ctl->tree_lock);
2100                 }
2101         }
2102         __btrfs_remove_free_space_cache_locked(ctl);
2103         spin_unlock(&ctl->tree_lock);
2104
2105 }
2106
2107 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
2108                                u64 offset, u64 bytes, u64 empty_size)
2109 {
2110         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2111         struct btrfs_free_space *entry = NULL;
2112         u64 bytes_search = bytes + empty_size;
2113         u64 ret = 0;
2114
2115         spin_lock(&ctl->tree_lock);
2116         entry = find_free_space(ctl, &offset, &bytes_search);
2117         if (!entry)
2118                 goto out;
2119
2120         ret = offset;
2121         if (entry->bitmap) {
2122                 bitmap_clear_bits(ctl, entry, offset, bytes);
2123                 if (!entry->bytes)
2124                         free_bitmap(ctl, entry);
2125         } else {
2126                 unlink_free_space(ctl, entry);
2127                 entry->offset += bytes;
2128                 entry->bytes -= bytes;
2129                 if (!entry->bytes)
2130                         kmem_cache_free(btrfs_free_space_cachep, entry);
2131                 else
2132                         link_free_space(ctl, entry);
2133         }
2134
2135 out:
2136         spin_unlock(&ctl->tree_lock);
2137
2138         return ret;
2139 }
2140
2141 /*
2142  * given a cluster, put all of its extents back into the free space
2143  * cache.  If a block group is passed, this function will only free
2144  * a cluster that belongs to the passed block group.
2145  *
2146  * Otherwise, it'll get a reference on the block group pointed to by the
2147  * cluster and remove the cluster from it.
2148  */
2149 int btrfs_return_cluster_to_free_space(
2150                                struct btrfs_block_group_cache *block_group,
2151                                struct btrfs_free_cluster *cluster)
2152 {
2153         struct btrfs_free_space_ctl *ctl;
2154         int ret;
2155
2156         /* first, get a safe pointer to the block group */
2157         spin_lock(&cluster->lock);
2158         if (!block_group) {
2159                 block_group = cluster->block_group;
2160                 if (!block_group) {
2161                         spin_unlock(&cluster->lock);
2162                         return 0;
2163                 }
2164         } else if (cluster->block_group != block_group) {
2165                 /* someone else has already freed it don't redo their work */
2166                 spin_unlock(&cluster->lock);
2167                 return 0;
2168         }
2169         atomic_inc(&block_group->count);
2170         spin_unlock(&cluster->lock);
2171
2172         ctl = block_group->free_space_ctl;
2173
2174         /* now return any extents the cluster had on it */
2175         spin_lock(&ctl->tree_lock);
2176         ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
2177         spin_unlock(&ctl->tree_lock);
2178
2179         /* finally drop our ref */
2180         btrfs_put_block_group(block_group);
2181         return ret;
2182 }
2183
2184 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
2185                                    struct btrfs_free_cluster *cluster,
2186                                    struct btrfs_free_space *entry,
2187                                    u64 bytes, u64 min_start)
2188 {
2189         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2190         int err;
2191         u64 search_start = cluster->window_start;
2192         u64 search_bytes = bytes;
2193         u64 ret = 0;
2194
2195         search_start = min_start;
2196         search_bytes = bytes;
2197
2198         err = search_bitmap(ctl, entry, &search_start, &search_bytes);
2199         if (err)
2200                 return 0;
2201
2202         ret = search_start;
2203         __bitmap_clear_bits(ctl, entry, ret, bytes);
2204
2205         return ret;
2206 }
2207
2208 /*
2209  * given a cluster, try to allocate 'bytes' from it, returns 0
2210  * if it couldn't find anything suitably large, or a logical disk offset
2211  * if things worked out
2212  */
2213 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2214                              struct btrfs_free_cluster *cluster, u64 bytes,
2215                              u64 min_start)
2216 {
2217         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2218         struct btrfs_free_space *entry = NULL;
2219         struct rb_node *node;
2220         u64 ret = 0;
2221
2222         spin_lock(&cluster->lock);
2223         if (bytes > cluster->max_size)
2224                 goto out;
2225
2226         if (cluster->block_group != block_group)
2227                 goto out;
2228
2229         node = rb_first(&cluster->root);
2230         if (!node)
2231                 goto out;
2232
2233         entry = rb_entry(node, struct btrfs_free_space, offset_index);
2234         while(1) {
2235                 if (entry->bytes < bytes ||
2236                     (!entry->bitmap && entry->offset < min_start)) {
2237                         node = rb_next(&entry->offset_index);
2238                         if (!node)
2239                                 break;
2240                         entry = rb_entry(node, struct btrfs_free_space,
2241                                          offset_index);
2242                         continue;
2243                 }
2244
2245                 if (entry->bitmap) {
2246                         ret = btrfs_alloc_from_bitmap(block_group,
2247                                                       cluster, entry, bytes,
2248                                                       cluster->window_start);
2249                         if (ret == 0) {
2250                                 node = rb_next(&entry->offset_index);
2251                                 if (!node)
2252                                         break;
2253                                 entry = rb_entry(node, struct btrfs_free_space,
2254                                                  offset_index);
2255                                 continue;
2256                         }
2257                         cluster->window_start += bytes;
2258                 } else {
2259                         ret = entry->offset;
2260
2261                         entry->offset += bytes;
2262                         entry->bytes -= bytes;
2263                 }
2264
2265                 if (entry->bytes == 0)
2266                         rb_erase(&entry->offset_index, &cluster->root);
2267                 break;
2268         }
2269 out:
2270         spin_unlock(&cluster->lock);
2271
2272         if (!ret)
2273                 return 0;
2274
2275         spin_lock(&ctl->tree_lock);
2276
2277         ctl->free_space -= bytes;
2278         if (entry->bytes == 0) {
2279                 ctl->free_extents--;
2280                 if (entry->bitmap) {
2281                         kfree(entry->bitmap);
2282                         ctl->total_bitmaps--;
2283                         ctl->op->recalc_thresholds(ctl);
2284                 }
2285                 kmem_cache_free(btrfs_free_space_cachep, entry);
2286         }
2287
2288         spin_unlock(&ctl->tree_lock);
2289
2290         return ret;
2291 }
2292
2293 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
2294                                 struct btrfs_free_space *entry,
2295                                 struct btrfs_free_cluster *cluster,
2296                                 u64 offset, u64 bytes,
2297                                 u64 cont1_bytes, u64 min_bytes)
2298 {
2299         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2300         unsigned long next_zero;
2301         unsigned long i;
2302         unsigned long want_bits;
2303         unsigned long min_bits;
2304         unsigned long found_bits;
2305         unsigned long start = 0;
2306         unsigned long total_found = 0;
2307         int ret;
2308
2309         i = offset_to_bit(entry->offset, block_group->sectorsize,
2310                           max_t(u64, offset, entry->offset));
2311         want_bits = bytes_to_bits(bytes, block_group->sectorsize);
2312         min_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
2313
2314 again:
2315         found_bits = 0;
2316         for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
2317              i < BITS_PER_BITMAP;
2318              i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
2319                 next_zero = find_next_zero_bit(entry->bitmap,
2320                                                BITS_PER_BITMAP, i);
2321                 if (next_zero - i >= min_bits) {
2322                         found_bits = next_zero - i;
2323                         break;
2324                 }
2325                 i = next_zero;
2326         }
2327
2328         if (!found_bits)
2329                 return -ENOSPC;
2330
2331         if (!total_found) {
2332                 start = i;
2333                 cluster->max_size = 0;
2334         }
2335
2336         total_found += found_bits;
2337
2338         if (cluster->max_size < found_bits * block_group->sectorsize)
2339                 cluster->max_size = found_bits * block_group->sectorsize;
2340
2341         if (total_found < want_bits || cluster->max_size < cont1_bytes) {
2342                 i = next_zero + 1;
2343                 goto again;
2344         }
2345
2346         cluster->window_start = start * block_group->sectorsize +
2347                 entry->offset;
2348         rb_erase(&entry->offset_index, &ctl->free_space_offset);
2349         ret = tree_insert_offset(&cluster->root, entry->offset,
2350                                  &entry->offset_index, 1);
2351         BUG_ON(ret); /* -EEXIST; Logic error */
2352
2353         trace_btrfs_setup_cluster(block_group, cluster,
2354                                   total_found * block_group->sectorsize, 1);
2355         return 0;
2356 }
2357
2358 /*
2359  * This searches the block group for just extents to fill the cluster with.
2360  * Try to find a cluster with at least bytes total bytes, at least one
2361  * extent of cont1_bytes, and other clusters of at least min_bytes.
2362  */
2363 static noinline int
2364 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2365                         struct btrfs_free_cluster *cluster,
2366                         struct list_head *bitmaps, u64 offset, u64 bytes,
2367                         u64 cont1_bytes, u64 min_bytes)
2368 {
2369         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2370         struct btrfs_free_space *first = NULL;
2371         struct btrfs_free_space *entry = NULL;
2372         struct btrfs_free_space *last;
2373         struct rb_node *node;
2374         u64 window_start;
2375         u64 window_free;
2376         u64 max_extent;
2377         u64 total_size = 0;
2378
2379         entry = tree_search_offset(ctl, offset, 0, 1);
2380         if (!entry)
2381                 return -ENOSPC;
2382
2383         /*
2384          * We don't want bitmaps, so just move along until we find a normal
2385          * extent entry.
2386          */
2387         while (entry->bitmap || entry->bytes < min_bytes) {
2388                 if (entry->bitmap && list_empty(&entry->list))
2389                         list_add_tail(&entry->list, bitmaps);
2390                 node = rb_next(&entry->offset_index);
2391                 if (!node)
2392                         return -ENOSPC;
2393                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2394         }
2395
2396         window_start = entry->offset;
2397         window_free = entry->bytes;
2398         max_extent = entry->bytes;
2399         first = entry;
2400         last = entry;
2401
2402         for (node = rb_next(&entry->offset_index); node;
2403              node = rb_next(&entry->offset_index)) {
2404                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2405
2406                 if (entry->bitmap) {
2407                         if (list_empty(&entry->list))
2408                                 list_add_tail(&entry->list, bitmaps);
2409                         continue;
2410                 }
2411
2412                 if (entry->bytes < min_bytes)
2413                         continue;
2414
2415                 last = entry;
2416                 window_free += entry->bytes;
2417                 if (entry->bytes > max_extent)
2418                         max_extent = entry->bytes;
2419         }
2420
2421         if (window_free < bytes || max_extent < cont1_bytes)
2422                 return -ENOSPC;
2423
2424         cluster->window_start = first->offset;
2425
2426         node = &first->offset_index;
2427
2428         /*
2429          * now we've found our entries, pull them out of the free space
2430          * cache and put them into the cluster rbtree
2431          */
2432         do {
2433                 int ret;
2434
2435                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2436                 node = rb_next(&entry->offset_index);
2437                 if (entry->bitmap || entry->bytes < min_bytes)
2438                         continue;
2439
2440                 rb_erase(&entry->offset_index, &ctl->free_space_offset);
2441                 ret = tree_insert_offset(&cluster->root, entry->offset,
2442                                          &entry->offset_index, 0);
2443                 total_size += entry->bytes;
2444                 BUG_ON(ret); /* -EEXIST; Logic error */
2445         } while (node && entry != last);
2446
2447         cluster->max_size = max_extent;
2448         trace_btrfs_setup_cluster(block_group, cluster, total_size, 0);
2449         return 0;
2450 }
2451
2452 /*
2453  * This specifically looks for bitmaps that may work in the cluster, we assume
2454  * that we have already failed to find extents that will work.
2455  */
2456 static noinline int
2457 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2458                      struct btrfs_free_cluster *cluster,
2459                      struct list_head *bitmaps, u64 offset, u64 bytes,
2460                      u64 cont1_bytes, u64 min_bytes)
2461 {
2462         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2463         struct btrfs_free_space *entry;
2464         int ret = -ENOSPC;
2465         u64 bitmap_offset = offset_to_bitmap(ctl, offset);
2466
2467         if (ctl->total_bitmaps == 0)
2468                 return -ENOSPC;
2469
2470         /*
2471          * The bitmap that covers offset won't be in the list unless offset
2472          * is just its start offset.
2473          */
2474         entry = list_first_entry(bitmaps, struct btrfs_free_space, list);
2475         if (entry->offset != bitmap_offset) {
2476                 entry = tree_search_offset(ctl, bitmap_offset, 1, 0);
2477                 if (entry && list_empty(&entry->list))
2478                         list_add(&entry->list, bitmaps);
2479         }
2480
2481         list_for_each_entry(entry, bitmaps, list) {
2482                 if (entry->bytes < bytes)
2483                         continue;
2484                 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2485                                            bytes, cont1_bytes, min_bytes);
2486                 if (!ret)
2487                         return 0;
2488         }
2489
2490         /*
2491          * The bitmaps list has all the bitmaps that record free space
2492          * starting after offset, so no more search is required.
2493          */
2494         return -ENOSPC;
2495 }
2496
2497 /*
2498  * here we try to find a cluster of blocks in a block group.  The goal
2499  * is to find at least bytes+empty_size.
2500  * We might not find them all in one contiguous area.
2501  *
2502  * returns zero and sets up cluster if things worked out, otherwise
2503  * it returns -enospc
2504  */
2505 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2506                              struct btrfs_root *root,
2507                              struct btrfs_block_group_cache *block_group,
2508                              struct btrfs_free_cluster *cluster,
2509                              u64 offset, u64 bytes, u64 empty_size)
2510 {
2511         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2512         struct btrfs_free_space *entry, *tmp;
2513         LIST_HEAD(bitmaps);
2514         u64 min_bytes;
2515         u64 cont1_bytes;
2516         int ret;
2517
2518         /*
2519          * Choose the minimum extent size we'll require for this
2520          * cluster.  For SSD_SPREAD, don't allow any fragmentation.
2521          * For metadata, allow allocates with smaller extents.  For
2522          * data, keep it dense.
2523          */
2524         if (btrfs_test_opt(root, SSD_SPREAD)) {
2525                 cont1_bytes = min_bytes = bytes + empty_size;
2526         } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
2527                 cont1_bytes = bytes;
2528                 min_bytes = block_group->sectorsize;
2529         } else {
2530                 cont1_bytes = max(bytes, (bytes + empty_size) >> 2);
2531                 min_bytes = block_group->sectorsize;
2532         }
2533
2534         spin_lock(&ctl->tree_lock);
2535
2536         /*
2537          * If we know we don't have enough space to make a cluster don't even
2538          * bother doing all the work to try and find one.
2539          */
2540         if (ctl->free_space < bytes) {
2541                 spin_unlock(&ctl->tree_lock);
2542                 return -ENOSPC;
2543         }
2544
2545         spin_lock(&cluster->lock);
2546
2547         /* someone already found a cluster, hooray */
2548         if (cluster->block_group) {
2549                 ret = 0;
2550                 goto out;
2551         }
2552
2553         trace_btrfs_find_cluster(block_group, offset, bytes, empty_size,
2554                                  min_bytes);
2555
2556         INIT_LIST_HEAD(&bitmaps);
2557         ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
2558                                       bytes + empty_size,
2559                                       cont1_bytes, min_bytes);
2560         if (ret)
2561                 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
2562                                            offset, bytes + empty_size,
2563                                            cont1_bytes, min_bytes);
2564
2565         /* Clear our temporary list */
2566         list_for_each_entry_safe(entry, tmp, &bitmaps, list)
2567                 list_del_init(&entry->list);
2568
2569         if (!ret) {
2570                 atomic_inc(&block_group->count);
2571                 list_add_tail(&cluster->block_group_list,
2572                               &block_group->cluster_list);
2573                 cluster->block_group = block_group;
2574         } else {
2575                 trace_btrfs_failed_cluster_setup(block_group);
2576         }
2577 out:
2578         spin_unlock(&cluster->lock);
2579         spin_unlock(&ctl->tree_lock);
2580
2581         return ret;
2582 }
2583
2584 /*
2585  * simple code to zero out a cluster
2586  */
2587 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
2588 {
2589         spin_lock_init(&cluster->lock);
2590         spin_lock_init(&cluster->refill_lock);
2591         cluster->root = RB_ROOT;
2592         cluster->max_size = 0;
2593         INIT_LIST_HEAD(&cluster->block_group_list);
2594         cluster->block_group = NULL;
2595 }
2596
2597 static int do_trimming(struct btrfs_block_group_cache *block_group,
2598                        u64 *total_trimmed, u64 start, u64 bytes,
2599                        u64 reserved_start, u64 reserved_bytes)
2600 {
2601         struct btrfs_space_info *space_info = block_group->space_info;
2602         struct btrfs_fs_info *fs_info = block_group->fs_info;
2603         int ret;
2604         int update = 0;
2605         u64 trimmed = 0;
2606
2607         spin_lock(&space_info->lock);
2608         spin_lock(&block_group->lock);
2609         if (!block_group->ro) {
2610                 block_group->reserved += reserved_bytes;
2611                 space_info->bytes_reserved += reserved_bytes;
2612                 update = 1;
2613         }
2614         spin_unlock(&block_group->lock);
2615         spin_unlock(&space_info->lock);
2616
2617         ret = btrfs_error_discard_extent(fs_info->extent_root,
2618                                          start, bytes, &trimmed);
2619         if (!ret)
2620                 *total_trimmed += trimmed;
2621
2622         btrfs_add_free_space(block_group, reserved_start, reserved_bytes);
2623
2624         if (update) {
2625                 spin_lock(&space_info->lock);
2626                 spin_lock(&block_group->lock);
2627                 if (block_group->ro)
2628                         space_info->bytes_readonly += reserved_bytes;
2629                 block_group->reserved -= reserved_bytes;
2630                 space_info->bytes_reserved -= reserved_bytes;
2631                 spin_unlock(&space_info->lock);
2632                 spin_unlock(&block_group->lock);
2633         }
2634
2635         return ret;
2636 }
2637
2638 static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
2639                           u64 *total_trimmed, u64 start, u64 end, u64 minlen)
2640 {
2641         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2642         struct btrfs_free_space *entry;
2643         struct rb_node *node;
2644         int ret = 0;
2645         u64 extent_start;
2646         u64 extent_bytes;
2647         u64 bytes;
2648
2649         while (start < end) {
2650                 spin_lock(&ctl->tree_lock);
2651
2652                 if (ctl->free_space < minlen) {
2653                         spin_unlock(&ctl->tree_lock);
2654                         break;
2655                 }
2656
2657                 entry = tree_search_offset(ctl, start, 0, 1);
2658                 if (!entry) {
2659                         spin_unlock(&ctl->tree_lock);
2660                         break;
2661                 }
2662
2663                 /* skip bitmaps */
2664                 while (entry->bitmap) {
2665                         node = rb_next(&entry->offset_index);
2666                         if (!node) {
2667                                 spin_unlock(&ctl->tree_lock);
2668                                 goto out;
2669                         }
2670                         entry = rb_entry(node, struct btrfs_free_space,
2671                                          offset_index);
2672                 }
2673
2674                 if (entry->offset >= end) {
2675                         spin_unlock(&ctl->tree_lock);
2676                         break;
2677                 }
2678
2679                 extent_start = entry->offset;
2680                 extent_bytes = entry->bytes;
2681                 start = max(start, extent_start);
2682                 bytes = min(extent_start + extent_bytes, end) - start;
2683                 if (bytes < minlen) {
2684                         spin_unlock(&ctl->tree_lock);
2685                         goto next;
2686                 }
2687
2688                 unlink_free_space(ctl, entry);
2689                 kmem_cache_free(btrfs_free_space_cachep, entry);
2690
2691                 spin_unlock(&ctl->tree_lock);
2692
2693                 ret = do_trimming(block_group, total_trimmed, start, bytes,
2694                                   extent_start, extent_bytes);
2695                 if (ret)
2696                         break;
2697 next:
2698                 start += bytes;
2699
2700                 if (fatal_signal_pending(current)) {
2701                         ret = -ERESTARTSYS;
2702                         break;
2703                 }
2704
2705                 cond_resched();
2706         }
2707 out:
2708         return ret;
2709 }
2710
2711 static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
2712                         u64 *total_trimmed, u64 start, u64 end, u64 minlen)
2713 {
2714         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2715         struct btrfs_free_space *entry;
2716         int ret = 0;
2717         int ret2;
2718         u64 bytes;
2719         u64 offset = offset_to_bitmap(ctl, start);
2720
2721         while (offset < end) {
2722                 bool next_bitmap = false;
2723
2724                 spin_lock(&ctl->tree_lock);
2725
2726                 if (ctl->free_space < minlen) {
2727                         spin_unlock(&ctl->tree_lock);
2728                         break;
2729                 }
2730
2731                 entry = tree_search_offset(ctl, offset, 1, 0);
2732                 if (!entry) {
2733                         spin_unlock(&ctl->tree_lock);
2734                         next_bitmap = true;
2735                         goto next;
2736                 }
2737
2738                 bytes = minlen;
2739                 ret2 = search_bitmap(ctl, entry, &start, &bytes);
2740                 if (ret2 || start >= end) {
2741                         spin_unlock(&ctl->tree_lock);
2742                         next_bitmap = true;
2743                         goto next;
2744                 }
2745
2746                 bytes = min(bytes, end - start);
2747                 if (bytes < minlen) {
2748                         spin_unlock(&ctl->tree_lock);
2749                         goto next;
2750                 }
2751
2752                 bitmap_clear_bits(ctl, entry, start, bytes);
2753                 if (entry->bytes == 0)
2754                         free_bitmap(ctl, entry);
2755
2756                 spin_unlock(&ctl->tree_lock);
2757
2758                 ret = do_trimming(block_group, total_trimmed, start, bytes,
2759                                   start, bytes);
2760                 if (ret)
2761                         break;
2762 next:
2763                 if (next_bitmap) {
2764                         offset += BITS_PER_BITMAP * ctl->unit;
2765                 } else {
2766                         start += bytes;
2767                         if (start >= offset + BITS_PER_BITMAP * ctl->unit)
2768                                 offset += BITS_PER_BITMAP * ctl->unit;
2769                 }
2770
2771                 if (fatal_signal_pending(current)) {
2772                         ret = -ERESTARTSYS;
2773                         break;
2774                 }
2775
2776                 cond_resched();
2777         }
2778
2779         return ret;
2780 }
2781
2782 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2783                            u64 *trimmed, u64 start, u64 end, u64 minlen)
2784 {
2785         int ret;
2786
2787         *trimmed = 0;
2788
2789         ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
2790         if (ret)
2791                 return ret;
2792
2793         ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
2794
2795         return ret;
2796 }
2797
2798 /*
2799  * Find the left-most item in the cache tree, and then return the
2800  * smallest inode number in the item.
2801  *
2802  * Note: the returned inode number may not be the smallest one in
2803  * the tree, if the left-most item is a bitmap.
2804  */
2805 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
2806 {
2807         struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
2808         struct btrfs_free_space *entry = NULL;
2809         u64 ino = 0;
2810
2811         spin_lock(&ctl->tree_lock);
2812
2813         if (RB_EMPTY_ROOT(&ctl->free_space_offset))
2814                 goto out;
2815
2816         entry = rb_entry(rb_first(&ctl->free_space_offset),
2817                          struct btrfs_free_space, offset_index);
2818
2819         if (!entry->bitmap) {
2820                 ino = entry->offset;
2821
2822                 unlink_free_space(ctl, entry);
2823                 entry->offset++;
2824                 entry->bytes--;
2825                 if (!entry->bytes)
2826                         kmem_cache_free(btrfs_free_space_cachep, entry);
2827                 else
2828                         link_free_space(ctl, entry);
2829         } else {
2830                 u64 offset = 0;
2831                 u64 count = 1;
2832                 int ret;
2833
2834                 ret = search_bitmap(ctl, entry, &offset, &count);
2835                 /* Logic error; Should be empty if it can't find anything */
2836                 BUG_ON(ret);
2837
2838                 ino = offset;
2839                 bitmap_clear_bits(ctl, entry, offset, 1);
2840                 if (entry->bytes == 0)
2841                         free_bitmap(ctl, entry);
2842         }
2843 out:
2844         spin_unlock(&ctl->tree_lock);
2845
2846         return ino;
2847 }
2848
2849 struct inode *lookup_free_ino_inode(struct btrfs_root *root,
2850                                     struct btrfs_path *path)
2851 {
2852         struct inode *inode = NULL;
2853
2854         spin_lock(&root->cache_lock);
2855         if (root->cache_inode)
2856                 inode = igrab(root->cache_inode);
2857         spin_unlock(&root->cache_lock);
2858         if (inode)
2859                 return inode;
2860
2861         inode = __lookup_free_space_inode(root, path, 0);
2862         if (IS_ERR(inode))
2863                 return inode;
2864
2865         spin_lock(&root->cache_lock);
2866         if (!btrfs_fs_closing(root->fs_info))
2867                 root->cache_inode = igrab(inode);
2868         spin_unlock(&root->cache_lock);
2869
2870         return inode;
2871 }
2872
2873 int create_free_ino_inode(struct btrfs_root *root,
2874                           struct btrfs_trans_handle *trans,
2875                           struct btrfs_path *path)
2876 {
2877         return __create_free_space_inode(root, trans, path,
2878                                          BTRFS_FREE_INO_OBJECTID, 0);
2879 }
2880
2881 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2882 {
2883         struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2884         struct btrfs_path *path;
2885         struct inode *inode;
2886         int ret = 0;
2887         u64 root_gen = btrfs_root_generation(&root->root_item);
2888
2889         if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2890                 return 0;
2891
2892         /*
2893          * If we're unmounting then just return, since this does a search on the
2894          * normal root and not the commit root and we could deadlock.
2895          */
2896         if (btrfs_fs_closing(fs_info))
2897                 return 0;
2898
2899         path = btrfs_alloc_path();
2900         if (!path)
2901                 return 0;
2902
2903         inode = lookup_free_ino_inode(root, path);
2904         if (IS_ERR(inode))
2905                 goto out;
2906
2907         if (root_gen != BTRFS_I(inode)->generation)
2908                 goto out_put;
2909
2910         ret = __load_free_space_cache(root, inode, ctl, path, 0);
2911
2912         if (ret < 0)
2913                 printk(KERN_ERR "btrfs: failed to load free ino cache for "
2914                        "root %llu\n", root->root_key.objectid);
2915 out_put:
2916         iput(inode);
2917 out:
2918         btrfs_free_path(path);
2919         return ret;
2920 }
2921
2922 int btrfs_write_out_ino_cache(struct btrfs_root *root,
2923                               struct btrfs_trans_handle *trans,
2924                               struct btrfs_path *path)
2925 {
2926         struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2927         struct inode *inode;
2928         int ret;
2929
2930         if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2931                 return 0;
2932
2933         inode = lookup_free_ino_inode(root, path);
2934         if (IS_ERR(inode))
2935                 return 0;
2936
2937         ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
2938         if (ret) {
2939                 btrfs_delalloc_release_metadata(inode, inode->i_size);
2940 #ifdef DEBUG
2941                 printk(KERN_ERR "btrfs: failed to write free ino cache "
2942                        "for root %llu\n", root->root_key.objectid);
2943 #endif
2944         }
2945
2946         iput(inode);
2947         return ret;
2948 }