btrfs: Update btrfs qgroup status item when rescan is done.
[cascardo/linux.git] / fs / btrfs / qgroup.c
1 /*
2  * Copyright (C) 2011 STRATO.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include <linux/pagemap.h>
21 #include <linux/writeback.h>
22 #include <linux/blkdev.h>
23 #include <linux/rbtree.h>
24 #include <linux/slab.h>
25 #include <linux/workqueue.h>
26 #include <linux/btrfs.h>
27
28 #include "ctree.h"
29 #include "transaction.h"
30 #include "disk-io.h"
31 #include "locking.h"
32 #include "ulist.h"
33 #include "backref.h"
34 #include "extent_io.h"
35 #include "qgroup.h"
36
37 /* TODO XXX FIXME
38  *  - subvol delete -> delete when ref goes to 0? delete limits also?
39  *  - reorganize keys
40  *  - compressed
41  *  - sync
42  *  - copy also limits on subvol creation
43  *  - limit
44  *  - caches fuer ulists
45  *  - performance benchmarks
46  *  - check all ioctl parameters
47  */
48
49 /*
50  * one struct for each qgroup, organized in fs_info->qgroup_tree.
51  */
52 struct btrfs_qgroup {
53         u64 qgroupid;
54
55         /*
56          * state
57          */
58         u64 rfer;       /* referenced */
59         u64 rfer_cmpr;  /* referenced compressed */
60         u64 excl;       /* exclusive */
61         u64 excl_cmpr;  /* exclusive compressed */
62
63         /*
64          * limits
65          */
66         u64 lim_flags;  /* which limits are set */
67         u64 max_rfer;
68         u64 max_excl;
69         u64 rsv_rfer;
70         u64 rsv_excl;
71
72         /*
73          * reservation tracking
74          */
75         u64 reserved;
76
77         /*
78          * lists
79          */
80         struct list_head groups;  /* groups this group is member of */
81         struct list_head members; /* groups that are members of this group */
82         struct list_head dirty;   /* dirty groups */
83         struct rb_node node;      /* tree of qgroups */
84
85         /*
86          * temp variables for accounting operations
87          */
88         u64 old_refcnt;
89         u64 new_refcnt;
90 };
91
92 /*
93  * glue structure to represent the relations between qgroups.
94  */
95 struct btrfs_qgroup_list {
96         struct list_head next_group;
97         struct list_head next_member;
98         struct btrfs_qgroup *group;
99         struct btrfs_qgroup *member;
100 };
101
102 #define ptr_to_u64(x) ((u64)(uintptr_t)x)
103 #define u64_to_ptr(x) ((struct btrfs_qgroup *)(uintptr_t)x)
104
105 static int
106 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
107                    int init_flags);
108 static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info);
109
110 /* must be called with qgroup_ioctl_lock held */
111 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
112                                            u64 qgroupid)
113 {
114         struct rb_node *n = fs_info->qgroup_tree.rb_node;
115         struct btrfs_qgroup *qgroup;
116
117         while (n) {
118                 qgroup = rb_entry(n, struct btrfs_qgroup, node);
119                 if (qgroup->qgroupid < qgroupid)
120                         n = n->rb_left;
121                 else if (qgroup->qgroupid > qgroupid)
122                         n = n->rb_right;
123                 else
124                         return qgroup;
125         }
126         return NULL;
127 }
128
129 /* must be called with qgroup_lock held */
130 static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
131                                           u64 qgroupid)
132 {
133         struct rb_node **p = &fs_info->qgroup_tree.rb_node;
134         struct rb_node *parent = NULL;
135         struct btrfs_qgroup *qgroup;
136
137         while (*p) {
138                 parent = *p;
139                 qgroup = rb_entry(parent, struct btrfs_qgroup, node);
140
141                 if (qgroup->qgroupid < qgroupid)
142                         p = &(*p)->rb_left;
143                 else if (qgroup->qgroupid > qgroupid)
144                         p = &(*p)->rb_right;
145                 else
146                         return qgroup;
147         }
148
149         qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC);
150         if (!qgroup)
151                 return ERR_PTR(-ENOMEM);
152
153         qgroup->qgroupid = qgroupid;
154         INIT_LIST_HEAD(&qgroup->groups);
155         INIT_LIST_HEAD(&qgroup->members);
156         INIT_LIST_HEAD(&qgroup->dirty);
157
158         rb_link_node(&qgroup->node, parent, p);
159         rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
160
161         return qgroup;
162 }
163
164 static void __del_qgroup_rb(struct btrfs_qgroup *qgroup)
165 {
166         struct btrfs_qgroup_list *list;
167
168         list_del(&qgroup->dirty);
169         while (!list_empty(&qgroup->groups)) {
170                 list = list_first_entry(&qgroup->groups,
171                                         struct btrfs_qgroup_list, next_group);
172                 list_del(&list->next_group);
173                 list_del(&list->next_member);
174                 kfree(list);
175         }
176
177         while (!list_empty(&qgroup->members)) {
178                 list = list_first_entry(&qgroup->members,
179                                         struct btrfs_qgroup_list, next_member);
180                 list_del(&list->next_group);
181                 list_del(&list->next_member);
182                 kfree(list);
183         }
184         kfree(qgroup);
185 }
186
187 /* must be called with qgroup_lock held */
188 static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
189 {
190         struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
191
192         if (!qgroup)
193                 return -ENOENT;
194
195         rb_erase(&qgroup->node, &fs_info->qgroup_tree);
196         __del_qgroup_rb(qgroup);
197         return 0;
198 }
199
200 /* must be called with qgroup_lock held */
201 static int add_relation_rb(struct btrfs_fs_info *fs_info,
202                            u64 memberid, u64 parentid)
203 {
204         struct btrfs_qgroup *member;
205         struct btrfs_qgroup *parent;
206         struct btrfs_qgroup_list *list;
207
208         member = find_qgroup_rb(fs_info, memberid);
209         parent = find_qgroup_rb(fs_info, parentid);
210         if (!member || !parent)
211                 return -ENOENT;
212
213         list = kzalloc(sizeof(*list), GFP_ATOMIC);
214         if (!list)
215                 return -ENOMEM;
216
217         list->group = parent;
218         list->member = member;
219         list_add_tail(&list->next_group, &member->groups);
220         list_add_tail(&list->next_member, &parent->members);
221
222         return 0;
223 }
224
225 /* must be called with qgroup_lock held */
226 static int del_relation_rb(struct btrfs_fs_info *fs_info,
227                            u64 memberid, u64 parentid)
228 {
229         struct btrfs_qgroup *member;
230         struct btrfs_qgroup *parent;
231         struct btrfs_qgroup_list *list;
232
233         member = find_qgroup_rb(fs_info, memberid);
234         parent = find_qgroup_rb(fs_info, parentid);
235         if (!member || !parent)
236                 return -ENOENT;
237
238         list_for_each_entry(list, &member->groups, next_group) {
239                 if (list->group == parent) {
240                         list_del(&list->next_group);
241                         list_del(&list->next_member);
242                         kfree(list);
243                         return 0;
244                 }
245         }
246         return -ENOENT;
247 }
248
249 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
250 int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
251                                u64 rfer, u64 excl)
252 {
253         struct btrfs_qgroup *qgroup;
254
255         qgroup = find_qgroup_rb(fs_info, qgroupid);
256         if (!qgroup)
257                 return -EINVAL;
258         if (qgroup->rfer != rfer || qgroup->excl != excl)
259                 return -EINVAL;
260         return 0;
261 }
262 #endif
263
264 /*
265  * The full config is read in one go, only called from open_ctree()
266  * It doesn't use any locking, as at this point we're still single-threaded
267  */
268 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
269 {
270         struct btrfs_key key;
271         struct btrfs_key found_key;
272         struct btrfs_root *quota_root = fs_info->quota_root;
273         struct btrfs_path *path = NULL;
274         struct extent_buffer *l;
275         int slot;
276         int ret = 0;
277         u64 flags = 0;
278         u64 rescan_progress = 0;
279
280         if (!fs_info->quota_enabled)
281                 return 0;
282
283         fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
284         if (!fs_info->qgroup_ulist) {
285                 ret = -ENOMEM;
286                 goto out;
287         }
288
289         path = btrfs_alloc_path();
290         if (!path) {
291                 ret = -ENOMEM;
292                 goto out;
293         }
294
295         /* default this to quota off, in case no status key is found */
296         fs_info->qgroup_flags = 0;
297
298         /*
299          * pass 1: read status, all qgroup infos and limits
300          */
301         key.objectid = 0;
302         key.type = 0;
303         key.offset = 0;
304         ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1);
305         if (ret)
306                 goto out;
307
308         while (1) {
309                 struct btrfs_qgroup *qgroup;
310
311                 slot = path->slots[0];
312                 l = path->nodes[0];
313                 btrfs_item_key_to_cpu(l, &found_key, slot);
314
315                 if (found_key.type == BTRFS_QGROUP_STATUS_KEY) {
316                         struct btrfs_qgroup_status_item *ptr;
317
318                         ptr = btrfs_item_ptr(l, slot,
319                                              struct btrfs_qgroup_status_item);
320
321                         if (btrfs_qgroup_status_version(l, ptr) !=
322                             BTRFS_QGROUP_STATUS_VERSION) {
323                                 btrfs_err(fs_info,
324                                  "old qgroup version, quota disabled");
325                                 goto out;
326                         }
327                         if (btrfs_qgroup_status_generation(l, ptr) !=
328                             fs_info->generation) {
329                                 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
330                                 btrfs_err(fs_info,
331                                         "qgroup generation mismatch, "
332                                         "marked as inconsistent");
333                         }
334                         fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
335                                                                           ptr);
336                         rescan_progress = btrfs_qgroup_status_rescan(l, ptr);
337                         goto next1;
338                 }
339
340                 if (found_key.type != BTRFS_QGROUP_INFO_KEY &&
341                     found_key.type != BTRFS_QGROUP_LIMIT_KEY)
342                         goto next1;
343
344                 qgroup = find_qgroup_rb(fs_info, found_key.offset);
345                 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
346                     (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
347                         btrfs_err(fs_info, "inconsitent qgroup config");
348                         flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
349                 }
350                 if (!qgroup) {
351                         qgroup = add_qgroup_rb(fs_info, found_key.offset);
352                         if (IS_ERR(qgroup)) {
353                                 ret = PTR_ERR(qgroup);
354                                 goto out;
355                         }
356                 }
357                 switch (found_key.type) {
358                 case BTRFS_QGROUP_INFO_KEY: {
359                         struct btrfs_qgroup_info_item *ptr;
360
361                         ptr = btrfs_item_ptr(l, slot,
362                                              struct btrfs_qgroup_info_item);
363                         qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr);
364                         qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr);
365                         qgroup->excl = btrfs_qgroup_info_excl(l, ptr);
366                         qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr);
367                         /* generation currently unused */
368                         break;
369                 }
370                 case BTRFS_QGROUP_LIMIT_KEY: {
371                         struct btrfs_qgroup_limit_item *ptr;
372
373                         ptr = btrfs_item_ptr(l, slot,
374                                              struct btrfs_qgroup_limit_item);
375                         qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr);
376                         qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr);
377                         qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr);
378                         qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr);
379                         qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr);
380                         break;
381                 }
382                 }
383 next1:
384                 ret = btrfs_next_item(quota_root, path);
385                 if (ret < 0)
386                         goto out;
387                 if (ret)
388                         break;
389         }
390         btrfs_release_path(path);
391
392         /*
393          * pass 2: read all qgroup relations
394          */
395         key.objectid = 0;
396         key.type = BTRFS_QGROUP_RELATION_KEY;
397         key.offset = 0;
398         ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0);
399         if (ret)
400                 goto out;
401         while (1) {
402                 slot = path->slots[0];
403                 l = path->nodes[0];
404                 btrfs_item_key_to_cpu(l, &found_key, slot);
405
406                 if (found_key.type != BTRFS_QGROUP_RELATION_KEY)
407                         goto next2;
408
409                 if (found_key.objectid > found_key.offset) {
410                         /* parent <- member, not needed to build config */
411                         /* FIXME should we omit the key completely? */
412                         goto next2;
413                 }
414
415                 ret = add_relation_rb(fs_info, found_key.objectid,
416                                       found_key.offset);
417                 if (ret == -ENOENT) {
418                         btrfs_warn(fs_info,
419                                 "orphan qgroup relation 0x%llx->0x%llx",
420                                 found_key.objectid, found_key.offset);
421                         ret = 0;        /* ignore the error */
422                 }
423                 if (ret)
424                         goto out;
425 next2:
426                 ret = btrfs_next_item(quota_root, path);
427                 if (ret < 0)
428                         goto out;
429                 if (ret)
430                         break;
431         }
432 out:
433         fs_info->qgroup_flags |= flags;
434         if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) {
435                 fs_info->quota_enabled = 0;
436                 fs_info->pending_quota_state = 0;
437         } else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
438                    ret >= 0) {
439                 ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
440         }
441         btrfs_free_path(path);
442
443         if (ret < 0) {
444                 ulist_free(fs_info->qgroup_ulist);
445                 fs_info->qgroup_ulist = NULL;
446                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
447         }
448
449         return ret < 0 ? ret : 0;
450 }
451
452 /*
453  * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
454  * first two are in single-threaded paths.And for the third one, we have set
455  * quota_root to be null with qgroup_lock held before, so it is safe to clean
456  * up the in-memory structures without qgroup_lock held.
457  */
458 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
459 {
460         struct rb_node *n;
461         struct btrfs_qgroup *qgroup;
462
463         while ((n = rb_first(&fs_info->qgroup_tree))) {
464                 qgroup = rb_entry(n, struct btrfs_qgroup, node);
465                 rb_erase(n, &fs_info->qgroup_tree);
466                 __del_qgroup_rb(qgroup);
467         }
468         /*
469          * we call btrfs_free_qgroup_config() when umounting
470          * filesystem and disabling quota, so we set qgroup_ulit
471          * to be null here to avoid double free.
472          */
473         ulist_free(fs_info->qgroup_ulist);
474         fs_info->qgroup_ulist = NULL;
475 }
476
477 static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
478                                     struct btrfs_root *quota_root,
479                                     u64 src, u64 dst)
480 {
481         int ret;
482         struct btrfs_path *path;
483         struct btrfs_key key;
484
485         path = btrfs_alloc_path();
486         if (!path)
487                 return -ENOMEM;
488
489         key.objectid = src;
490         key.type = BTRFS_QGROUP_RELATION_KEY;
491         key.offset = dst;
492
493         ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
494
495         btrfs_mark_buffer_dirty(path->nodes[0]);
496
497         btrfs_free_path(path);
498         return ret;
499 }
500
501 static int del_qgroup_relation_item(struct btrfs_trans_handle *trans,
502                                     struct btrfs_root *quota_root,
503                                     u64 src, u64 dst)
504 {
505         int ret;
506         struct btrfs_path *path;
507         struct btrfs_key key;
508
509         path = btrfs_alloc_path();
510         if (!path)
511                 return -ENOMEM;
512
513         key.objectid = src;
514         key.type = BTRFS_QGROUP_RELATION_KEY;
515         key.offset = dst;
516
517         ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
518         if (ret < 0)
519                 goto out;
520
521         if (ret > 0) {
522                 ret = -ENOENT;
523                 goto out;
524         }
525
526         ret = btrfs_del_item(trans, quota_root, path);
527 out:
528         btrfs_free_path(path);
529         return ret;
530 }
531
532 static int add_qgroup_item(struct btrfs_trans_handle *trans,
533                            struct btrfs_root *quota_root, u64 qgroupid)
534 {
535         int ret;
536         struct btrfs_path *path;
537         struct btrfs_qgroup_info_item *qgroup_info;
538         struct btrfs_qgroup_limit_item *qgroup_limit;
539         struct extent_buffer *leaf;
540         struct btrfs_key key;
541
542         if (btrfs_test_is_dummy_root(quota_root))
543                 return 0;
544
545         path = btrfs_alloc_path();
546         if (!path)
547                 return -ENOMEM;
548
549         key.objectid = 0;
550         key.type = BTRFS_QGROUP_INFO_KEY;
551         key.offset = qgroupid;
552
553         /*
554          * Avoid a transaction abort by catching -EEXIST here. In that
555          * case, we proceed by re-initializing the existing structure
556          * on disk.
557          */
558
559         ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
560                                       sizeof(*qgroup_info));
561         if (ret && ret != -EEXIST)
562                 goto out;
563
564         leaf = path->nodes[0];
565         qgroup_info = btrfs_item_ptr(leaf, path->slots[0],
566                                  struct btrfs_qgroup_info_item);
567         btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid);
568         btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0);
569         btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0);
570         btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0);
571         btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0);
572
573         btrfs_mark_buffer_dirty(leaf);
574
575         btrfs_release_path(path);
576
577         key.type = BTRFS_QGROUP_LIMIT_KEY;
578         ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
579                                       sizeof(*qgroup_limit));
580         if (ret && ret != -EEXIST)
581                 goto out;
582
583         leaf = path->nodes[0];
584         qgroup_limit = btrfs_item_ptr(leaf, path->slots[0],
585                                   struct btrfs_qgroup_limit_item);
586         btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0);
587         btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0);
588         btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0);
589         btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
590         btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
591
592         btrfs_mark_buffer_dirty(leaf);
593
594         ret = 0;
595 out:
596         btrfs_free_path(path);
597         return ret;
598 }
599
600 static int del_qgroup_item(struct btrfs_trans_handle *trans,
601                            struct btrfs_root *quota_root, u64 qgroupid)
602 {
603         int ret;
604         struct btrfs_path *path;
605         struct btrfs_key key;
606
607         path = btrfs_alloc_path();
608         if (!path)
609                 return -ENOMEM;
610
611         key.objectid = 0;
612         key.type = BTRFS_QGROUP_INFO_KEY;
613         key.offset = qgroupid;
614         ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
615         if (ret < 0)
616                 goto out;
617
618         if (ret > 0) {
619                 ret = -ENOENT;
620                 goto out;
621         }
622
623         ret = btrfs_del_item(trans, quota_root, path);
624         if (ret)
625                 goto out;
626
627         btrfs_release_path(path);
628
629         key.type = BTRFS_QGROUP_LIMIT_KEY;
630         ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
631         if (ret < 0)
632                 goto out;
633
634         if (ret > 0) {
635                 ret = -ENOENT;
636                 goto out;
637         }
638
639         ret = btrfs_del_item(trans, quota_root, path);
640
641 out:
642         btrfs_free_path(path);
643         return ret;
644 }
645
646 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
647                                     struct btrfs_root *root,
648                                     struct btrfs_qgroup *qgroup)
649 {
650         struct btrfs_path *path;
651         struct btrfs_key key;
652         struct extent_buffer *l;
653         struct btrfs_qgroup_limit_item *qgroup_limit;
654         int ret;
655         int slot;
656
657         key.objectid = 0;
658         key.type = BTRFS_QGROUP_LIMIT_KEY;
659         key.offset = qgroup->qgroupid;
660
661         path = btrfs_alloc_path();
662         if (!path)
663                 return -ENOMEM;
664
665         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
666         if (ret > 0)
667                 ret = -ENOENT;
668
669         if (ret)
670                 goto out;
671
672         l = path->nodes[0];
673         slot = path->slots[0];
674         qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item);
675         btrfs_set_qgroup_limit_flags(l, qgroup_limit, qgroup->lim_flags);
676         btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, qgroup->max_rfer);
677         btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl);
678         btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer);
679         btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl);
680
681         btrfs_mark_buffer_dirty(l);
682
683 out:
684         btrfs_free_path(path);
685         return ret;
686 }
687
688 static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
689                                    struct btrfs_root *root,
690                                    struct btrfs_qgroup *qgroup)
691 {
692         struct btrfs_path *path;
693         struct btrfs_key key;
694         struct extent_buffer *l;
695         struct btrfs_qgroup_info_item *qgroup_info;
696         int ret;
697         int slot;
698
699         if (btrfs_test_is_dummy_root(root))
700                 return 0;
701
702         key.objectid = 0;
703         key.type = BTRFS_QGROUP_INFO_KEY;
704         key.offset = qgroup->qgroupid;
705
706         path = btrfs_alloc_path();
707         if (!path)
708                 return -ENOMEM;
709
710         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
711         if (ret > 0)
712                 ret = -ENOENT;
713
714         if (ret)
715                 goto out;
716
717         l = path->nodes[0];
718         slot = path->slots[0];
719         qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item);
720         btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
721         btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
722         btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
723         btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
724         btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
725
726         btrfs_mark_buffer_dirty(l);
727
728 out:
729         btrfs_free_path(path);
730         return ret;
731 }
732
733 static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
734                                      struct btrfs_fs_info *fs_info,
735                                     struct btrfs_root *root)
736 {
737         struct btrfs_path *path;
738         struct btrfs_key key;
739         struct extent_buffer *l;
740         struct btrfs_qgroup_status_item *ptr;
741         int ret;
742         int slot;
743
744         key.objectid = 0;
745         key.type = BTRFS_QGROUP_STATUS_KEY;
746         key.offset = 0;
747
748         path = btrfs_alloc_path();
749         if (!path)
750                 return -ENOMEM;
751
752         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
753         if (ret > 0)
754                 ret = -ENOENT;
755
756         if (ret)
757                 goto out;
758
759         l = path->nodes[0];
760         slot = path->slots[0];
761         ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
762         btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
763         btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
764         btrfs_set_qgroup_status_rescan(l, ptr,
765                                 fs_info->qgroup_rescan_progress.objectid);
766
767         btrfs_mark_buffer_dirty(l);
768
769 out:
770         btrfs_free_path(path);
771         return ret;
772 }
773
774 /*
775  * called with qgroup_lock held
776  */
777 static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
778                                   struct btrfs_root *root)
779 {
780         struct btrfs_path *path;
781         struct btrfs_key key;
782         struct extent_buffer *leaf = NULL;
783         int ret;
784         int nr = 0;
785
786         path = btrfs_alloc_path();
787         if (!path)
788                 return -ENOMEM;
789
790         path->leave_spinning = 1;
791
792         key.objectid = 0;
793         key.offset = 0;
794         key.type = 0;
795
796         while (1) {
797                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
798                 if (ret < 0)
799                         goto out;
800                 leaf = path->nodes[0];
801                 nr = btrfs_header_nritems(leaf);
802                 if (!nr)
803                         break;
804                 /*
805                  * delete the leaf one by one
806                  * since the whole tree is going
807                  * to be deleted.
808                  */
809                 path->slots[0] = 0;
810                 ret = btrfs_del_items(trans, root, path, 0, nr);
811                 if (ret)
812                         goto out;
813
814                 btrfs_release_path(path);
815         }
816         ret = 0;
817 out:
818         root->fs_info->pending_quota_state = 0;
819         btrfs_free_path(path);
820         return ret;
821 }
822
823 int btrfs_quota_enable(struct btrfs_trans_handle *trans,
824                        struct btrfs_fs_info *fs_info)
825 {
826         struct btrfs_root *quota_root;
827         struct btrfs_root *tree_root = fs_info->tree_root;
828         struct btrfs_path *path = NULL;
829         struct btrfs_qgroup_status_item *ptr;
830         struct extent_buffer *leaf;
831         struct btrfs_key key;
832         struct btrfs_key found_key;
833         struct btrfs_qgroup *qgroup = NULL;
834         int ret = 0;
835         int slot;
836
837         mutex_lock(&fs_info->qgroup_ioctl_lock);
838         if (fs_info->quota_root) {
839                 fs_info->pending_quota_state = 1;
840                 goto out;
841         }
842
843         fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
844         if (!fs_info->qgroup_ulist) {
845                 ret = -ENOMEM;
846                 goto out;
847         }
848
849         /*
850          * initially create the quota tree
851          */
852         quota_root = btrfs_create_tree(trans, fs_info,
853                                        BTRFS_QUOTA_TREE_OBJECTID);
854         if (IS_ERR(quota_root)) {
855                 ret =  PTR_ERR(quota_root);
856                 goto out;
857         }
858
859         path = btrfs_alloc_path();
860         if (!path) {
861                 ret = -ENOMEM;
862                 goto out_free_root;
863         }
864
865         key.objectid = 0;
866         key.type = BTRFS_QGROUP_STATUS_KEY;
867         key.offset = 0;
868
869         ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
870                                       sizeof(*ptr));
871         if (ret)
872                 goto out_free_path;
873
874         leaf = path->nodes[0];
875         ptr = btrfs_item_ptr(leaf, path->slots[0],
876                                  struct btrfs_qgroup_status_item);
877         btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
878         btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
879         fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
880                                 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
881         btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
882         btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
883
884         btrfs_mark_buffer_dirty(leaf);
885
886         key.objectid = 0;
887         key.type = BTRFS_ROOT_REF_KEY;
888         key.offset = 0;
889
890         btrfs_release_path(path);
891         ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
892         if (ret > 0)
893                 goto out_add_root;
894         if (ret < 0)
895                 goto out_free_path;
896
897
898         while (1) {
899                 slot = path->slots[0];
900                 leaf = path->nodes[0];
901                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
902
903                 if (found_key.type == BTRFS_ROOT_REF_KEY) {
904                         ret = add_qgroup_item(trans, quota_root,
905                                               found_key.offset);
906                         if (ret)
907                                 goto out_free_path;
908
909                         qgroup = add_qgroup_rb(fs_info, found_key.offset);
910                         if (IS_ERR(qgroup)) {
911                                 ret = PTR_ERR(qgroup);
912                                 goto out_free_path;
913                         }
914                 }
915                 ret = btrfs_next_item(tree_root, path);
916                 if (ret < 0)
917                         goto out_free_path;
918                 if (ret)
919                         break;
920         }
921
922 out_add_root:
923         btrfs_release_path(path);
924         ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
925         if (ret)
926                 goto out_free_path;
927
928         qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
929         if (IS_ERR(qgroup)) {
930                 ret = PTR_ERR(qgroup);
931                 goto out_free_path;
932         }
933         spin_lock(&fs_info->qgroup_lock);
934         fs_info->quota_root = quota_root;
935         fs_info->pending_quota_state = 1;
936         spin_unlock(&fs_info->qgroup_lock);
937 out_free_path:
938         btrfs_free_path(path);
939 out_free_root:
940         if (ret) {
941                 free_extent_buffer(quota_root->node);
942                 free_extent_buffer(quota_root->commit_root);
943                 kfree(quota_root);
944         }
945 out:
946         if (ret) {
947                 ulist_free(fs_info->qgroup_ulist);
948                 fs_info->qgroup_ulist = NULL;
949         }
950         mutex_unlock(&fs_info->qgroup_ioctl_lock);
951         return ret;
952 }
953
954 int btrfs_quota_disable(struct btrfs_trans_handle *trans,
955                         struct btrfs_fs_info *fs_info)
956 {
957         struct btrfs_root *tree_root = fs_info->tree_root;
958         struct btrfs_root *quota_root;
959         int ret = 0;
960
961         mutex_lock(&fs_info->qgroup_ioctl_lock);
962         if (!fs_info->quota_root)
963                 goto out;
964         spin_lock(&fs_info->qgroup_lock);
965         fs_info->quota_enabled = 0;
966         fs_info->pending_quota_state = 0;
967         quota_root = fs_info->quota_root;
968         fs_info->quota_root = NULL;
969         spin_unlock(&fs_info->qgroup_lock);
970
971         btrfs_free_qgroup_config(fs_info);
972
973         ret = btrfs_clean_quota_tree(trans, quota_root);
974         if (ret)
975                 goto out;
976
977         ret = btrfs_del_root(trans, tree_root, &quota_root->root_key);
978         if (ret)
979                 goto out;
980
981         list_del(&quota_root->dirty_list);
982
983         btrfs_tree_lock(quota_root->node);
984         clean_tree_block(trans, tree_root->fs_info, quota_root->node);
985         btrfs_tree_unlock(quota_root->node);
986         btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
987
988         free_extent_buffer(quota_root->node);
989         free_extent_buffer(quota_root->commit_root);
990         kfree(quota_root);
991 out:
992         mutex_unlock(&fs_info->qgroup_ioctl_lock);
993         return ret;
994 }
995
996 static void qgroup_dirty(struct btrfs_fs_info *fs_info,
997                          struct btrfs_qgroup *qgroup)
998 {
999         if (list_empty(&qgroup->dirty))
1000                 list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
1001 }
1002
1003 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
1004                               struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1005 {
1006         struct btrfs_root *quota_root;
1007         struct btrfs_qgroup *parent;
1008         struct btrfs_qgroup *member;
1009         struct btrfs_qgroup_list *list;
1010         int ret = 0;
1011
1012         /* Check the level of src and dst first */
1013         if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
1014                 return -EINVAL;
1015
1016         mutex_lock(&fs_info->qgroup_ioctl_lock);
1017         quota_root = fs_info->quota_root;
1018         if (!quota_root) {
1019                 ret = -EINVAL;
1020                 goto out;
1021         }
1022         member = find_qgroup_rb(fs_info, src);
1023         parent = find_qgroup_rb(fs_info, dst);
1024         if (!member || !parent) {
1025                 ret = -EINVAL;
1026                 goto out;
1027         }
1028
1029         /* check if such qgroup relation exist firstly */
1030         list_for_each_entry(list, &member->groups, next_group) {
1031                 if (list->group == parent) {
1032                         ret = -EEXIST;
1033                         goto out;
1034                 }
1035         }
1036
1037         ret = add_qgroup_relation_item(trans, quota_root, src, dst);
1038         if (ret)
1039                 goto out;
1040
1041         ret = add_qgroup_relation_item(trans, quota_root, dst, src);
1042         if (ret) {
1043                 del_qgroup_relation_item(trans, quota_root, src, dst);
1044                 goto out;
1045         }
1046
1047         spin_lock(&fs_info->qgroup_lock);
1048         ret = add_relation_rb(quota_root->fs_info, src, dst);
1049         spin_unlock(&fs_info->qgroup_lock);
1050 out:
1051         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1052         return ret;
1053 }
1054
1055 int __del_qgroup_relation(struct btrfs_trans_handle *trans,
1056                               struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1057 {
1058         struct btrfs_root *quota_root;
1059         struct btrfs_qgroup *parent;
1060         struct btrfs_qgroup *member;
1061         struct btrfs_qgroup_list *list;
1062         int ret = 0;
1063         int err;
1064
1065         quota_root = fs_info->quota_root;
1066         if (!quota_root) {
1067                 ret = -EINVAL;
1068                 goto out;
1069         }
1070
1071         member = find_qgroup_rb(fs_info, src);
1072         parent = find_qgroup_rb(fs_info, dst);
1073         if (!member || !parent) {
1074                 ret = -EINVAL;
1075                 goto out;
1076         }
1077
1078         /* check if such qgroup relation exist firstly */
1079         list_for_each_entry(list, &member->groups, next_group) {
1080                 if (list->group == parent)
1081                         goto exist;
1082         }
1083         ret = -ENOENT;
1084         goto out;
1085 exist:
1086         ret = del_qgroup_relation_item(trans, quota_root, src, dst);
1087         err = del_qgroup_relation_item(trans, quota_root, dst, src);
1088         if (err && !ret)
1089                 ret = err;
1090
1091         spin_lock(&fs_info->qgroup_lock);
1092         del_relation_rb(fs_info, src, dst);
1093         spin_unlock(&fs_info->qgroup_lock);
1094 out:
1095         return ret;
1096 }
1097
1098 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
1099                               struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1100 {
1101         int ret = 0;
1102
1103         mutex_lock(&fs_info->qgroup_ioctl_lock);
1104         ret = __del_qgroup_relation(trans, fs_info, src, dst);
1105         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1106
1107         return ret;
1108 }
1109
1110 int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
1111                         struct btrfs_fs_info *fs_info, u64 qgroupid)
1112 {
1113         struct btrfs_root *quota_root;
1114         struct btrfs_qgroup *qgroup;
1115         int ret = 0;
1116
1117         mutex_lock(&fs_info->qgroup_ioctl_lock);
1118         quota_root = fs_info->quota_root;
1119         if (!quota_root) {
1120                 ret = -EINVAL;
1121                 goto out;
1122         }
1123         qgroup = find_qgroup_rb(fs_info, qgroupid);
1124         if (qgroup) {
1125                 ret = -EEXIST;
1126                 goto out;
1127         }
1128
1129         ret = add_qgroup_item(trans, quota_root, qgroupid);
1130         if (ret)
1131                 goto out;
1132
1133         spin_lock(&fs_info->qgroup_lock);
1134         qgroup = add_qgroup_rb(fs_info, qgroupid);
1135         spin_unlock(&fs_info->qgroup_lock);
1136
1137         if (IS_ERR(qgroup))
1138                 ret = PTR_ERR(qgroup);
1139 out:
1140         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1141         return ret;
1142 }
1143
1144 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
1145                         struct btrfs_fs_info *fs_info, u64 qgroupid)
1146 {
1147         struct btrfs_root *quota_root;
1148         struct btrfs_qgroup *qgroup;
1149         struct btrfs_qgroup_list *list;
1150         int ret = 0;
1151
1152         mutex_lock(&fs_info->qgroup_ioctl_lock);
1153         quota_root = fs_info->quota_root;
1154         if (!quota_root) {
1155                 ret = -EINVAL;
1156                 goto out;
1157         }
1158
1159         qgroup = find_qgroup_rb(fs_info, qgroupid);
1160         if (!qgroup) {
1161                 ret = -ENOENT;
1162                 goto out;
1163         } else {
1164                 /* check if there are no children of this qgroup */
1165                 if (!list_empty(&qgroup->members)) {
1166                         ret = -EBUSY;
1167                         goto out;
1168                 }
1169         }
1170         ret = del_qgroup_item(trans, quota_root, qgroupid);
1171
1172         while (!list_empty(&qgroup->groups)) {
1173                 list = list_first_entry(&qgroup->groups,
1174                                         struct btrfs_qgroup_list, next_group);
1175                 ret = __del_qgroup_relation(trans, fs_info,
1176                                            qgroupid,
1177                                            list->group->qgroupid);
1178                 if (ret)
1179                         goto out;
1180         }
1181
1182         spin_lock(&fs_info->qgroup_lock);
1183         del_qgroup_rb(quota_root->fs_info, qgroupid);
1184         spin_unlock(&fs_info->qgroup_lock);
1185 out:
1186         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1187         return ret;
1188 }
1189
1190 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1191                        struct btrfs_fs_info *fs_info, u64 qgroupid,
1192                        struct btrfs_qgroup_limit *limit)
1193 {
1194         struct btrfs_root *quota_root;
1195         struct btrfs_qgroup *qgroup;
1196         int ret = 0;
1197
1198         mutex_lock(&fs_info->qgroup_ioctl_lock);
1199         quota_root = fs_info->quota_root;
1200         if (!quota_root) {
1201                 ret = -EINVAL;
1202                 goto out;
1203         }
1204
1205         qgroup = find_qgroup_rb(fs_info, qgroupid);
1206         if (!qgroup) {
1207                 ret = -ENOENT;
1208                 goto out;
1209         }
1210
1211         spin_lock(&fs_info->qgroup_lock);
1212         if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER)
1213                 qgroup->max_rfer = limit->max_rfer;
1214         if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL)
1215                 qgroup->max_excl = limit->max_excl;
1216         if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER)
1217                 qgroup->rsv_rfer = limit->rsv_rfer;
1218         if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL)
1219                 qgroup->rsv_excl = limit->rsv_excl;
1220         qgroup->lim_flags |= limit->flags;
1221
1222         spin_unlock(&fs_info->qgroup_lock);
1223
1224         ret = update_qgroup_limit_item(trans, quota_root, qgroup);
1225         if (ret) {
1226                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1227                 btrfs_info(fs_info, "unable to update quota limit for %llu",
1228                        qgroupid);
1229         }
1230
1231 out:
1232         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1233         return ret;
1234 }
1235
1236 static int comp_oper_exist(struct btrfs_qgroup_operation *oper1,
1237                            struct btrfs_qgroup_operation *oper2)
1238 {
1239         /*
1240          * Ignore seq and type here, we're looking for any operation
1241          * at all related to this extent on that root.
1242          */
1243         if (oper1->bytenr < oper2->bytenr)
1244                 return -1;
1245         if (oper1->bytenr > oper2->bytenr)
1246                 return 1;
1247         if (oper1->ref_root < oper2->ref_root)
1248                 return -1;
1249         if (oper1->ref_root > oper2->ref_root)
1250                 return 1;
1251         return 0;
1252 }
1253
1254 static int qgroup_oper_exists(struct btrfs_fs_info *fs_info,
1255                               struct btrfs_qgroup_operation *oper)
1256 {
1257         struct rb_node *n;
1258         struct btrfs_qgroup_operation *cur;
1259         int cmp;
1260
1261         spin_lock(&fs_info->qgroup_op_lock);
1262         n = fs_info->qgroup_op_tree.rb_node;
1263         while (n) {
1264                 cur = rb_entry(n, struct btrfs_qgroup_operation, n);
1265                 cmp = comp_oper_exist(cur, oper);
1266                 if (cmp < 0) {
1267                         n = n->rb_right;
1268                 } else if (cmp) {
1269                         n = n->rb_left;
1270                 } else {
1271                         spin_unlock(&fs_info->qgroup_op_lock);
1272                         return -EEXIST;
1273                 }
1274         }
1275         spin_unlock(&fs_info->qgroup_op_lock);
1276         return 0;
1277 }
1278
1279 static int comp_oper(struct btrfs_qgroup_operation *oper1,
1280                      struct btrfs_qgroup_operation *oper2)
1281 {
1282         if (oper1->bytenr < oper2->bytenr)
1283                 return -1;
1284         if (oper1->bytenr > oper2->bytenr)
1285                 return 1;
1286         if (oper1->ref_root < oper2->ref_root)
1287                 return -1;
1288         if (oper1->ref_root > oper2->ref_root)
1289                 return 1;
1290         if (oper1->seq < oper2->seq)
1291                 return -1;
1292         if (oper1->seq > oper2->seq)
1293                 return 1;
1294         if (oper1->type < oper2->type)
1295                 return -1;
1296         if (oper1->type > oper2->type)
1297                 return 1;
1298         return 0;
1299 }
1300
1301 static int insert_qgroup_oper(struct btrfs_fs_info *fs_info,
1302                               struct btrfs_qgroup_operation *oper)
1303 {
1304         struct rb_node **p;
1305         struct rb_node *parent = NULL;
1306         struct btrfs_qgroup_operation *cur;
1307         int cmp;
1308
1309         spin_lock(&fs_info->qgroup_op_lock);
1310         p = &fs_info->qgroup_op_tree.rb_node;
1311         while (*p) {
1312                 parent = *p;
1313                 cur = rb_entry(parent, struct btrfs_qgroup_operation, n);
1314                 cmp = comp_oper(cur, oper);
1315                 if (cmp < 0) {
1316                         p = &(*p)->rb_right;
1317                 } else if (cmp) {
1318                         p = &(*p)->rb_left;
1319                 } else {
1320                         spin_unlock(&fs_info->qgroup_op_lock);
1321                         return -EEXIST;
1322                 }
1323         }
1324         rb_link_node(&oper->n, parent, p);
1325         rb_insert_color(&oper->n, &fs_info->qgroup_op_tree);
1326         spin_unlock(&fs_info->qgroup_op_lock);
1327         return 0;
1328 }
1329
1330 /*
1331  * Record a quota operation for processing later on.
1332  * @trans: the transaction we are adding the delayed op to.
1333  * @fs_info: the fs_info for this fs.
1334  * @ref_root: the root of the reference we are acting on,
1335  * @bytenr: the bytenr we are acting on.
1336  * @num_bytes: the number of bytes in the reference.
1337  * @type: the type of operation this is.
1338  * @mod_seq: do we need to get a sequence number for looking up roots.
1339  *
1340  * We just add it to our trans qgroup_ref_list and carry on and process these
1341  * operations in order at some later point.  If the reference root isn't a fs
1342  * root then we don't bother with doing anything.
1343  *
1344  * MUST BE HOLDING THE REF LOCK.
1345  */
1346 int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
1347                             struct btrfs_fs_info *fs_info, u64 ref_root,
1348                             u64 bytenr, u64 num_bytes,
1349                             enum btrfs_qgroup_operation_type type, int mod_seq)
1350 {
1351         struct btrfs_qgroup_operation *oper;
1352         int ret;
1353
1354         if (!is_fstree(ref_root) || !fs_info->quota_enabled)
1355                 return 0;
1356
1357         oper = kmalloc(sizeof(*oper), GFP_NOFS);
1358         if (!oper)
1359                 return -ENOMEM;
1360
1361         oper->ref_root = ref_root;
1362         oper->bytenr = bytenr;
1363         oper->num_bytes = num_bytes;
1364         oper->type = type;
1365         oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq);
1366         INIT_LIST_HEAD(&oper->elem.list);
1367         oper->elem.seq = 0;
1368
1369         trace_btrfs_qgroup_record_ref(oper);
1370
1371         if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) {
1372                 /*
1373                  * If any operation for this bytenr/ref_root combo
1374                  * exists, then we know it's not exclusively owned and
1375                  * shouldn't be queued up.
1376                  *
1377                  * This also catches the case where we have a cloned
1378                  * extent that gets queued up multiple times during
1379                  * drop snapshot.
1380                  */
1381                 if (qgroup_oper_exists(fs_info, oper)) {
1382                         kfree(oper);
1383                         return 0;
1384                 }
1385         }
1386
1387         ret = insert_qgroup_oper(fs_info, oper);
1388         if (ret) {
1389                 /* Shouldn't happen so have an assert for developers */
1390                 ASSERT(0);
1391                 kfree(oper);
1392                 return ret;
1393         }
1394         list_add_tail(&oper->list, &trans->qgroup_ref_list);
1395
1396         if (mod_seq)
1397                 btrfs_get_tree_mod_seq(fs_info, &oper->elem);
1398
1399         return 0;
1400 }
1401
1402 /*
1403  * The easy accounting, if we are adding/removing the only ref for an extent
1404  * then this qgroup and all of the parent qgroups get their refrence and
1405  * exclusive counts adjusted.
1406  */
1407 static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
1408                                   struct btrfs_qgroup_operation *oper)
1409 {
1410         struct btrfs_qgroup *qgroup;
1411         struct ulist *tmp;
1412         struct btrfs_qgroup_list *glist;
1413         struct ulist_node *unode;
1414         struct ulist_iterator uiter;
1415         int sign = 0;
1416         int ret = 0;
1417
1418         tmp = ulist_alloc(GFP_NOFS);
1419         if (!tmp)
1420                 return -ENOMEM;
1421
1422         spin_lock(&fs_info->qgroup_lock);
1423         if (!fs_info->quota_root)
1424                 goto out;
1425         qgroup = find_qgroup_rb(fs_info, oper->ref_root);
1426         if (!qgroup)
1427                 goto out;
1428         switch (oper->type) {
1429         case BTRFS_QGROUP_OPER_ADD_EXCL:
1430                 sign = 1;
1431                 break;
1432         case BTRFS_QGROUP_OPER_SUB_EXCL:
1433                 sign = -1;
1434                 break;
1435         default:
1436                 ASSERT(0);
1437         }
1438         qgroup->rfer += sign * oper->num_bytes;
1439         qgroup->rfer_cmpr += sign * oper->num_bytes;
1440
1441         WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes);
1442         qgroup->excl += sign * oper->num_bytes;
1443         qgroup->excl_cmpr += sign * oper->num_bytes;
1444         if (sign > 0)
1445                 qgroup->reserved -= oper->num_bytes;
1446
1447         qgroup_dirty(fs_info, qgroup);
1448
1449         /* Get all of the parent groups that contain this qgroup */
1450         list_for_each_entry(glist, &qgroup->groups, next_group) {
1451                 ret = ulist_add(tmp, glist->group->qgroupid,
1452                                 ptr_to_u64(glist->group), GFP_ATOMIC);
1453                 if (ret < 0)
1454                         goto out;
1455         }
1456
1457         /* Iterate all of the parents and adjust their reference counts */
1458         ULIST_ITER_INIT(&uiter);
1459         while ((unode = ulist_next(tmp, &uiter))) {
1460                 qgroup = u64_to_ptr(unode->aux);
1461                 qgroup->rfer += sign * oper->num_bytes;
1462                 qgroup->rfer_cmpr += sign * oper->num_bytes;
1463                 WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes);
1464                 qgroup->excl += sign * oper->num_bytes;
1465                 if (sign > 0)
1466                         qgroup->reserved -= oper->num_bytes;
1467                 qgroup->excl_cmpr += sign * oper->num_bytes;
1468                 qgroup_dirty(fs_info, qgroup);
1469
1470                 /* Add any parents of the parents */
1471                 list_for_each_entry(glist, &qgroup->groups, next_group) {
1472                         ret = ulist_add(tmp, glist->group->qgroupid,
1473                                         ptr_to_u64(glist->group), GFP_ATOMIC);
1474                         if (ret < 0)
1475                                 goto out;
1476                 }
1477         }
1478         ret = 0;
1479 out:
1480         spin_unlock(&fs_info->qgroup_lock);
1481         ulist_free(tmp);
1482         return ret;
1483 }
1484
1485 /*
1486  * Walk all of the roots that pointed to our bytenr and adjust their refcnts as
1487  * properly.
1488  */
1489 static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info,
1490                                   u64 root_to_skip, struct ulist *tmp,
1491                                   struct ulist *roots, struct ulist *qgroups,
1492                                   u64 seq, int *old_roots, int rescan)
1493 {
1494         struct ulist_node *unode;
1495         struct ulist_iterator uiter;
1496         struct ulist_node *tmp_unode;
1497         struct ulist_iterator tmp_uiter;
1498         struct btrfs_qgroup *qg;
1499         int ret;
1500
1501         ULIST_ITER_INIT(&uiter);
1502         while ((unode = ulist_next(roots, &uiter))) {
1503                 /* We don't count our current root here */
1504                 if (unode->val == root_to_skip)
1505                         continue;
1506                 qg = find_qgroup_rb(fs_info, unode->val);
1507                 if (!qg)
1508                         continue;
1509                 /*
1510                  * We could have a pending removal of this same ref so we may
1511                  * not have actually found our ref root when doing
1512                  * btrfs_find_all_roots, so we need to keep track of how many
1513                  * old roots we find in case we removed ours and added a
1514                  * different one at the same time.  I don't think this could
1515                  * happen in practice but that sort of thinking leads to pain
1516                  * and suffering and to the dark side.
1517                  */
1518                 (*old_roots)++;
1519
1520                 ulist_reinit(tmp);
1521                 ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
1522                                 GFP_ATOMIC);
1523                 if (ret < 0)
1524                         return ret;
1525                 ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), GFP_ATOMIC);
1526                 if (ret < 0)
1527                         return ret;
1528                 ULIST_ITER_INIT(&tmp_uiter);
1529                 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1530                         struct btrfs_qgroup_list *glist;
1531
1532                         qg = u64_to_ptr(tmp_unode->aux);
1533                         /*
1534                          * We use this sequence number to keep from having to
1535                          * run the whole list and 0 out the refcnt every time.
1536                          * We basically use sequnce as the known 0 count and
1537                          * then add 1 everytime we see a qgroup.  This is how we
1538                          * get how many of the roots actually point up to the
1539                          * upper level qgroups in order to determine exclusive
1540                          * counts.
1541                          *
1542                          * For rescan we want to set old_refcnt to seq so our
1543                          * exclusive calculations end up correct.
1544                          */
1545                         if (rescan)
1546                                 qg->old_refcnt = seq;
1547                         else if (qg->old_refcnt < seq)
1548                                 qg->old_refcnt = seq + 1;
1549                         else
1550                                 qg->old_refcnt++;
1551
1552                         if (qg->new_refcnt < seq)
1553                                 qg->new_refcnt = seq + 1;
1554                         else
1555                                 qg->new_refcnt++;
1556                         list_for_each_entry(glist, &qg->groups, next_group) {
1557                                 ret = ulist_add(qgroups, glist->group->qgroupid,
1558                                                 ptr_to_u64(glist->group),
1559                                                 GFP_ATOMIC);
1560                                 if (ret < 0)
1561                                         return ret;
1562                                 ret = ulist_add(tmp, glist->group->qgroupid,
1563                                                 ptr_to_u64(glist->group),
1564                                                 GFP_ATOMIC);
1565                                 if (ret < 0)
1566                                         return ret;
1567                         }
1568                 }
1569         }
1570         return 0;
1571 }
1572
1573 /*
1574  * We need to walk forward in our operation tree and account for any roots that
1575  * were deleted after we made this operation.
1576  */
1577 static int qgroup_account_deleted_refs(struct btrfs_fs_info *fs_info,
1578                                        struct btrfs_qgroup_operation *oper,
1579                                        struct ulist *tmp,
1580                                        struct ulist *qgroups, u64 seq,
1581                                        int *old_roots)
1582 {
1583         struct ulist_node *unode;
1584         struct ulist_iterator uiter;
1585         struct btrfs_qgroup *qg;
1586         struct btrfs_qgroup_operation *tmp_oper;
1587         struct rb_node *n;
1588         int ret;
1589
1590         ulist_reinit(tmp);
1591
1592         /*
1593          * We only walk forward in the tree since we're only interested in
1594          * removals that happened _after_  our operation.
1595          */
1596         spin_lock(&fs_info->qgroup_op_lock);
1597         n = rb_next(&oper->n);
1598         spin_unlock(&fs_info->qgroup_op_lock);
1599         if (!n)
1600                 return 0;
1601         tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
1602         while (tmp_oper->bytenr == oper->bytenr) {
1603                 /*
1604                  * If it's not a removal we don't care, additions work out
1605                  * properly with our refcnt tracking.
1606                  */
1607                 if (tmp_oper->type != BTRFS_QGROUP_OPER_SUB_SHARED &&
1608                     tmp_oper->type != BTRFS_QGROUP_OPER_SUB_EXCL)
1609                         goto next;
1610                 qg = find_qgroup_rb(fs_info, tmp_oper->ref_root);
1611                 if (!qg)
1612                         goto next;
1613                 ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
1614                                 GFP_ATOMIC);
1615                 if (ret) {
1616                         if (ret < 0)
1617                                 return ret;
1618                         /*
1619                          * We only want to increase old_roots if this qgroup is
1620                          * not already in the list of qgroups.  If it is already
1621                          * there then that means it must have been re-added or
1622                          * the delete will be discarded because we had an
1623                          * existing ref that we haven't looked up yet.  In this
1624                          * case we don't want to increase old_roots.  So if ret
1625                          * == 1 then we know that this is the first time we've
1626                          * seen this qgroup and we can bump the old_roots.
1627                          */
1628                         (*old_roots)++;
1629                         ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg),
1630                                         GFP_ATOMIC);
1631                         if (ret < 0)
1632                                 return ret;
1633                 }
1634 next:
1635                 spin_lock(&fs_info->qgroup_op_lock);
1636                 n = rb_next(&tmp_oper->n);
1637                 spin_unlock(&fs_info->qgroup_op_lock);
1638                 if (!n)
1639                         break;
1640                 tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
1641         }
1642
1643         /* Ok now process the qgroups we found */
1644         ULIST_ITER_INIT(&uiter);
1645         while ((unode = ulist_next(tmp, &uiter))) {
1646                 struct btrfs_qgroup_list *glist;
1647
1648                 qg = u64_to_ptr(unode->aux);
1649                 if (qg->old_refcnt < seq)
1650                         qg->old_refcnt = seq + 1;
1651                 else
1652                         qg->old_refcnt++;
1653                 if (qg->new_refcnt < seq)
1654                         qg->new_refcnt = seq + 1;
1655                 else
1656                         qg->new_refcnt++;
1657                 list_for_each_entry(glist, &qg->groups, next_group) {
1658                         ret = ulist_add(qgroups, glist->group->qgroupid,
1659                                         ptr_to_u64(glist->group), GFP_ATOMIC);
1660                         if (ret < 0)
1661                                 return ret;
1662                         ret = ulist_add(tmp, glist->group->qgroupid,
1663                                         ptr_to_u64(glist->group), GFP_ATOMIC);
1664                         if (ret < 0)
1665                                 return ret;
1666                 }
1667         }
1668         return 0;
1669 }
1670
1671 /* Add refcnt for the newly added reference. */
1672 static int qgroup_calc_new_refcnt(struct btrfs_fs_info *fs_info,
1673                                   struct btrfs_qgroup_operation *oper,
1674                                   struct btrfs_qgroup *qgroup,
1675                                   struct ulist *tmp, struct ulist *qgroups,
1676                                   u64 seq)
1677 {
1678         struct ulist_node *unode;
1679         struct ulist_iterator uiter;
1680         struct btrfs_qgroup *qg;
1681         int ret;
1682
1683         ulist_reinit(tmp);
1684         ret = ulist_add(qgroups, qgroup->qgroupid, ptr_to_u64(qgroup),
1685                         GFP_ATOMIC);
1686         if (ret < 0)
1687                 return ret;
1688         ret = ulist_add(tmp, qgroup->qgroupid, ptr_to_u64(qgroup),
1689                         GFP_ATOMIC);
1690         if (ret < 0)
1691                 return ret;
1692         ULIST_ITER_INIT(&uiter);
1693         while ((unode = ulist_next(tmp, &uiter))) {
1694                 struct btrfs_qgroup_list *glist;
1695
1696                 qg = u64_to_ptr(unode->aux);
1697                 if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
1698                         if (qg->new_refcnt < seq)
1699                                 qg->new_refcnt = seq + 1;
1700                         else
1701                                 qg->new_refcnt++;
1702                 } else {
1703                         if (qg->old_refcnt < seq)
1704                                 qg->old_refcnt = seq + 1;
1705                         else
1706                                 qg->old_refcnt++;
1707                 }
1708                 list_for_each_entry(glist, &qg->groups, next_group) {
1709                         ret = ulist_add(tmp, glist->group->qgroupid,
1710                                         ptr_to_u64(glist->group), GFP_ATOMIC);
1711                         if (ret < 0)
1712                                 return ret;
1713                         ret = ulist_add(qgroups, glist->group->qgroupid,
1714                                         ptr_to_u64(glist->group), GFP_ATOMIC);
1715                         if (ret < 0)
1716                                 return ret;
1717                 }
1718         }
1719         return 0;
1720 }
1721
1722 /*
1723  * This adjusts the counters for all referenced qgroups if need be.
1724  */
1725 static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info,
1726                                   u64 root_to_skip, u64 num_bytes,
1727                                   struct ulist *qgroups, u64 seq,
1728                                   int old_roots, int new_roots, int rescan)
1729 {
1730         struct ulist_node *unode;
1731         struct ulist_iterator uiter;
1732         struct btrfs_qgroup *qg;
1733         u64 cur_new_count, cur_old_count;
1734
1735         ULIST_ITER_INIT(&uiter);
1736         while ((unode = ulist_next(qgroups, &uiter))) {
1737                 bool dirty = false;
1738
1739                 qg = u64_to_ptr(unode->aux);
1740                 /*
1741                  * Wasn't referenced before but is now, add to the reference
1742                  * counters.
1743                  */
1744                 if (qg->old_refcnt <= seq && qg->new_refcnt > seq) {
1745                         qg->rfer += num_bytes;
1746                         qg->rfer_cmpr += num_bytes;
1747                         dirty = true;
1748                 }
1749
1750                 /*
1751                  * Was referenced before but isn't now, subtract from the
1752                  * reference counters.
1753                  */
1754                 if (qg->old_refcnt > seq && qg->new_refcnt <= seq) {
1755                         qg->rfer -= num_bytes;
1756                         qg->rfer_cmpr -= num_bytes;
1757                         dirty = true;
1758                 }
1759
1760                 if (qg->old_refcnt < seq)
1761                         cur_old_count = 0;
1762                 else
1763                         cur_old_count = qg->old_refcnt - seq;
1764                 if (qg->new_refcnt < seq)
1765                         cur_new_count = 0;
1766                 else
1767                         cur_new_count = qg->new_refcnt - seq;
1768
1769                 /*
1770                  * If our refcount was the same as the roots previously but our
1771                  * new count isn't the same as the number of roots now then we
1772                  * went from having a exclusive reference on this range to not.
1773                  */
1774                 if (old_roots && cur_old_count == old_roots &&
1775                     (cur_new_count != new_roots || new_roots == 0)) {
1776                         WARN_ON(cur_new_count != new_roots && new_roots == 0);
1777                         qg->excl -= num_bytes;
1778                         qg->excl_cmpr -= num_bytes;
1779                         dirty = true;
1780                 }
1781
1782                 /*
1783                  * If we didn't reference all the roots before but now we do we
1784                  * have an exclusive reference to this range.
1785                  */
1786                 if ((!old_roots || (old_roots && cur_old_count != old_roots))
1787                     && cur_new_count == new_roots) {
1788                         qg->excl += num_bytes;
1789                         qg->excl_cmpr += num_bytes;
1790                         dirty = true;
1791                 }
1792
1793                 if (dirty)
1794                         qgroup_dirty(fs_info, qg);
1795         }
1796         return 0;
1797 }
1798
1799 /*
1800  * If we removed a data extent and there were other references for that bytenr
1801  * then we need to lookup all referenced roots to make sure we still don't
1802  * reference this bytenr.  If we do then we can just discard this operation.
1803  */
1804 static int check_existing_refs(struct btrfs_trans_handle *trans,
1805                                struct btrfs_fs_info *fs_info,
1806                                struct btrfs_qgroup_operation *oper)
1807 {
1808         struct ulist *roots = NULL;
1809         struct ulist_node *unode;
1810         struct ulist_iterator uiter;
1811         int ret = 0;
1812
1813         ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
1814                                    oper->elem.seq, &roots);
1815         if (ret < 0)
1816                 return ret;
1817         ret = 0;
1818
1819         ULIST_ITER_INIT(&uiter);
1820         while ((unode = ulist_next(roots, &uiter))) {
1821                 if (unode->val == oper->ref_root) {
1822                         ret = 1;
1823                         break;
1824                 }
1825         }
1826         ulist_free(roots);
1827         btrfs_put_tree_mod_seq(fs_info, &oper->elem);
1828
1829         return ret;
1830 }
1831
1832 /*
1833  * If we share a reference across multiple roots then we may need to adjust
1834  * various qgroups referenced and exclusive counters.  The basic premise is this
1835  *
1836  * 1) We have seq to represent a 0 count.  Instead of looping through all of the
1837  * qgroups and resetting their refcount to 0 we just constantly bump this
1838  * sequence number to act as the base reference count.  This means that if
1839  * anybody is equal to or below this sequence they were never referenced.  We
1840  * jack this sequence up by the number of roots we found each time in order to
1841  * make sure we don't have any overlap.
1842  *
1843  * 2) We first search all the roots that reference the area _except_ the root
1844  * we're acting on currently.  This makes up the old_refcnt of all the qgroups
1845  * before.
1846  *
1847  * 3) We walk all of the qgroups referenced by the root we are currently acting
1848  * on, and will either adjust old_refcnt in the case of a removal or the
1849  * new_refcnt in the case of an addition.
1850  *
1851  * 4) Finally we walk all the qgroups that are referenced by this range
1852  * including the root we are acting on currently.  We will adjust the counters
1853  * based on the number of roots we had and will have after this operation.
1854  *
1855  * Take this example as an illustration
1856  *
1857  *                      [qgroup 1/0]
1858  *                   /         |          \
1859  *              [qg 0/0]   [qg 0/1]     [qg 0/2]
1860  *                 \          |            /
1861  *                [        extent           ]
1862  *
1863  * Say we are adding a reference that is covered by qg 0/0.  The first step
1864  * would give a refcnt of 1 to qg 0/1 and 0/2 and a refcnt of 2 to qg 1/0 with
1865  * old_roots being 2.  Because it is adding new_roots will be 1.  We then go
1866  * through qg 0/0 which will get the new_refcnt set to 1 and add 1 to qg 1/0's
1867  * new_refcnt, bringing it to 3.  We then walk through all of the qgroups, we
1868  * notice that the old refcnt for qg 0/0 < the new refcnt, so we added a
1869  * reference and thus must add the size to the referenced bytes.  Everything
1870  * else is the same so nothing else changes.
1871  */
1872 static int qgroup_shared_accounting(struct btrfs_trans_handle *trans,
1873                                     struct btrfs_fs_info *fs_info,
1874                                     struct btrfs_qgroup_operation *oper)
1875 {
1876         struct ulist *roots = NULL;
1877         struct ulist *qgroups, *tmp;
1878         struct btrfs_qgroup *qgroup;
1879         struct seq_list elem = SEQ_LIST_INIT(elem);
1880         u64 seq;
1881         int old_roots = 0;
1882         int new_roots = 0;
1883         int ret = 0;
1884
1885         if (oper->elem.seq) {
1886                 ret = check_existing_refs(trans, fs_info, oper);
1887                 if (ret < 0)
1888                         return ret;
1889                 if (ret)
1890                         return 0;
1891         }
1892
1893         qgroups = ulist_alloc(GFP_NOFS);
1894         if (!qgroups)
1895                 return -ENOMEM;
1896
1897         tmp = ulist_alloc(GFP_NOFS);
1898         if (!tmp) {
1899                 ulist_free(qgroups);
1900                 return -ENOMEM;
1901         }
1902
1903         btrfs_get_tree_mod_seq(fs_info, &elem);
1904         ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq,
1905                                    &roots);
1906         btrfs_put_tree_mod_seq(fs_info, &elem);
1907         if (ret < 0) {
1908                 ulist_free(qgroups);
1909                 ulist_free(tmp);
1910                 return ret;
1911         }
1912         spin_lock(&fs_info->qgroup_lock);
1913         qgroup = find_qgroup_rb(fs_info, oper->ref_root);
1914         if (!qgroup)
1915                 goto out;
1916         seq = fs_info->qgroup_seq;
1917
1918         /*
1919          * So roots is the list of all the roots currently pointing at the
1920          * bytenr, including the ref we are adding if we are adding, or not if
1921          * we are removing a ref.  So we pass in the ref_root to skip that root
1922          * in our calculations.  We set old_refnct and new_refcnt cause who the
1923          * hell knows what everything looked like before, and it doesn't matter
1924          * except...
1925          */
1926         ret = qgroup_calc_old_refcnt(fs_info, oper->ref_root, tmp, roots, qgroups,
1927                                      seq, &old_roots, 0);
1928         if (ret < 0)
1929                 goto out;
1930
1931         /*
1932          * Now adjust the refcounts of the qgroups that care about this
1933          * reference, either the old_count in the case of removal or new_count
1934          * in the case of an addition.
1935          */
1936         ret = qgroup_calc_new_refcnt(fs_info, oper, qgroup, tmp, qgroups,
1937                                      seq);
1938         if (ret < 0)
1939                 goto out;
1940
1941         /*
1942          * ...in the case of removals.  If we had a removal before we got around
1943          * to processing this operation then we need to find that guy and count
1944          * his references as if they really existed so we don't end up screwing
1945          * up the exclusive counts.  Then whenever we go to process the delete
1946          * everything will be grand and we can account for whatever exclusive
1947          * changes need to be made there.  We also have to pass in old_roots so
1948          * we have an accurate count of the roots as it pertains to this
1949          * operations view of the world.
1950          */
1951         ret = qgroup_account_deleted_refs(fs_info, oper, tmp, qgroups, seq,
1952                                           &old_roots);
1953         if (ret < 0)
1954                 goto out;
1955
1956         /*
1957          * We are adding our root, need to adjust up the number of roots,
1958          * otherwise old_roots is the number of roots we want.
1959          */
1960         if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
1961                 new_roots = old_roots + 1;
1962         } else {
1963                 new_roots = old_roots;
1964                 old_roots++;
1965         }
1966         fs_info->qgroup_seq += old_roots + 1;
1967
1968
1969         /*
1970          * And now the magic happens, bless Arne for having a pretty elegant
1971          * solution for this.
1972          */
1973         qgroup_adjust_counters(fs_info, oper->ref_root, oper->num_bytes,
1974                                qgroups, seq, old_roots, new_roots, 0);
1975 out:
1976         spin_unlock(&fs_info->qgroup_lock);
1977         ulist_free(qgroups);
1978         ulist_free(roots);
1979         ulist_free(tmp);
1980         return ret;
1981 }
1982
1983 /*
1984  * Process a reference to a shared subtree. This type of operation is
1985  * queued during snapshot removal when we encounter extents which are
1986  * shared between more than one root.
1987  */
1988 static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
1989                                      struct btrfs_fs_info *fs_info,
1990                                      struct btrfs_qgroup_operation *oper)
1991 {
1992         struct ulist *roots = NULL;
1993         struct ulist_node *unode;
1994         struct ulist_iterator uiter;
1995         struct btrfs_qgroup_list *glist;
1996         struct ulist *parents;
1997         int ret = 0;
1998         int err;
1999         struct btrfs_qgroup *qg;
2000         u64 root_obj = 0;
2001         struct seq_list elem = SEQ_LIST_INIT(elem);
2002
2003         parents = ulist_alloc(GFP_NOFS);
2004         if (!parents)
2005                 return -ENOMEM;
2006
2007         btrfs_get_tree_mod_seq(fs_info, &elem);
2008         ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
2009                                    elem.seq, &roots);
2010         btrfs_put_tree_mod_seq(fs_info, &elem);
2011         if (ret < 0)
2012                 goto out;
2013
2014         if (roots->nnodes != 1)
2015                 goto out;
2016
2017         ULIST_ITER_INIT(&uiter);
2018         unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */
2019         /*
2020          * If we find our ref root then that means all refs
2021          * this extent has to the root have not yet been
2022          * deleted. In that case, we do nothing and let the
2023          * last ref for this bytenr drive our update.
2024          *
2025          * This can happen for example if an extent is
2026          * referenced multiple times in a snapshot (clone,
2027          * etc). If we are in the middle of snapshot removal,
2028          * queued updates for such an extent will find the
2029          * root if we have not yet finished removing the
2030          * snapshot.
2031          */
2032         if (unode->val == oper->ref_root)
2033                 goto out;
2034
2035         root_obj = unode->val;
2036         BUG_ON(!root_obj);
2037
2038         spin_lock(&fs_info->qgroup_lock);
2039         qg = find_qgroup_rb(fs_info, root_obj);
2040         if (!qg)
2041                 goto out_unlock;
2042
2043         qg->excl += oper->num_bytes;
2044         qg->excl_cmpr += oper->num_bytes;
2045         qgroup_dirty(fs_info, qg);
2046
2047         /*
2048          * Adjust counts for parent groups. First we find all
2049          * parents, then in the 2nd loop we do the adjustment
2050          * while adding parents of the parents to our ulist.
2051          */
2052         list_for_each_entry(glist, &qg->groups, next_group) {
2053                 err = ulist_add(parents, glist->group->qgroupid,
2054                                 ptr_to_u64(glist->group), GFP_ATOMIC);
2055                 if (err < 0) {
2056                         ret = err;
2057                         goto out_unlock;
2058                 }
2059         }
2060
2061         ULIST_ITER_INIT(&uiter);
2062         while ((unode = ulist_next(parents, &uiter))) {
2063                 qg = u64_to_ptr(unode->aux);
2064                 qg->excl += oper->num_bytes;
2065                 qg->excl_cmpr += oper->num_bytes;
2066                 qgroup_dirty(fs_info, qg);
2067
2068                 /* Add any parents of the parents */
2069                 list_for_each_entry(glist, &qg->groups, next_group) {
2070                         err = ulist_add(parents, glist->group->qgroupid,
2071                                         ptr_to_u64(glist->group), GFP_ATOMIC);
2072                         if (err < 0) {
2073                                 ret = err;
2074                                 goto out_unlock;
2075                         }
2076                 }
2077         }
2078
2079 out_unlock:
2080         spin_unlock(&fs_info->qgroup_lock);
2081
2082 out:
2083         ulist_free(roots);
2084         ulist_free(parents);
2085         return ret;
2086 }
2087
2088 /*
2089  * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
2090  * from the fs. First, all roots referencing the extent are searched, and
2091  * then the space is accounted accordingly to the different roots. The
2092  * accounting algorithm works in 3 steps documented inline.
2093  */
2094 static int btrfs_qgroup_account(struct btrfs_trans_handle *trans,
2095                                 struct btrfs_fs_info *fs_info,
2096                                 struct btrfs_qgroup_operation *oper)
2097 {
2098         int ret = 0;
2099
2100         if (!fs_info->quota_enabled)
2101                 return 0;
2102
2103         BUG_ON(!fs_info->quota_root);
2104
2105         mutex_lock(&fs_info->qgroup_rescan_lock);
2106         if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
2107                 if (fs_info->qgroup_rescan_progress.objectid <= oper->bytenr) {
2108                         mutex_unlock(&fs_info->qgroup_rescan_lock);
2109                         return 0;
2110                 }
2111         }
2112         mutex_unlock(&fs_info->qgroup_rescan_lock);
2113
2114         ASSERT(is_fstree(oper->ref_root));
2115
2116         trace_btrfs_qgroup_account(oper);
2117
2118         switch (oper->type) {
2119         case BTRFS_QGROUP_OPER_ADD_EXCL:
2120         case BTRFS_QGROUP_OPER_SUB_EXCL:
2121                 ret = qgroup_excl_accounting(fs_info, oper);
2122                 break;
2123         case BTRFS_QGROUP_OPER_ADD_SHARED:
2124         case BTRFS_QGROUP_OPER_SUB_SHARED:
2125                 ret = qgroup_shared_accounting(trans, fs_info, oper);
2126                 break;
2127         case BTRFS_QGROUP_OPER_SUB_SUBTREE:
2128                 ret = qgroup_subtree_accounting(trans, fs_info, oper);
2129                 break;
2130         default:
2131                 ASSERT(0);
2132         }
2133         return ret;
2134 }
2135
2136 /*
2137  * Needs to be called everytime we run delayed refs, even if there is an error
2138  * in order to cleanup outstanding operations.
2139  */
2140 int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
2141                                     struct btrfs_fs_info *fs_info)
2142 {
2143         struct btrfs_qgroup_operation *oper;
2144         int ret = 0;
2145
2146         while (!list_empty(&trans->qgroup_ref_list)) {
2147                 oper = list_first_entry(&trans->qgroup_ref_list,
2148                                         struct btrfs_qgroup_operation, list);
2149                 list_del_init(&oper->list);
2150                 if (!ret || !trans->aborted)
2151                         ret = btrfs_qgroup_account(trans, fs_info, oper);
2152                 spin_lock(&fs_info->qgroup_op_lock);
2153                 rb_erase(&oper->n, &fs_info->qgroup_op_tree);
2154                 spin_unlock(&fs_info->qgroup_op_lock);
2155                 btrfs_put_tree_mod_seq(fs_info, &oper->elem);
2156                 kfree(oper);
2157         }
2158         return ret;
2159 }
2160
2161 /*
2162  * called from commit_transaction. Writes all changed qgroups to disk.
2163  */
2164 int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
2165                       struct btrfs_fs_info *fs_info)
2166 {
2167         struct btrfs_root *quota_root = fs_info->quota_root;
2168         int ret = 0;
2169         int start_rescan_worker = 0;
2170
2171         if (!quota_root)
2172                 goto out;
2173
2174         if (!fs_info->quota_enabled && fs_info->pending_quota_state)
2175                 start_rescan_worker = 1;
2176
2177         fs_info->quota_enabled = fs_info->pending_quota_state;
2178
2179         spin_lock(&fs_info->qgroup_lock);
2180         while (!list_empty(&fs_info->dirty_qgroups)) {
2181                 struct btrfs_qgroup *qgroup;
2182                 qgroup = list_first_entry(&fs_info->dirty_qgroups,
2183                                           struct btrfs_qgroup, dirty);
2184                 list_del_init(&qgroup->dirty);
2185                 spin_unlock(&fs_info->qgroup_lock);
2186                 ret = update_qgroup_info_item(trans, quota_root, qgroup);
2187                 if (ret)
2188                         fs_info->qgroup_flags |=
2189                                         BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2190                 ret = update_qgroup_limit_item(trans, quota_root, qgroup);
2191                 if (ret)
2192                         fs_info->qgroup_flags |=
2193                                         BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2194                 spin_lock(&fs_info->qgroup_lock);
2195         }
2196         if (fs_info->quota_enabled)
2197                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON;
2198         else
2199                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
2200         spin_unlock(&fs_info->qgroup_lock);
2201
2202         ret = update_qgroup_status_item(trans, fs_info, quota_root);
2203         if (ret)
2204                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2205
2206         if (!ret && start_rescan_worker) {
2207                 ret = qgroup_rescan_init(fs_info, 0, 1);
2208                 if (!ret) {
2209                         qgroup_rescan_zero_tracking(fs_info);
2210                         btrfs_queue_work(fs_info->qgroup_rescan_workers,
2211                                          &fs_info->qgroup_rescan_work);
2212                 }
2213                 ret = 0;
2214         }
2215
2216 out:
2217
2218         return ret;
2219 }
2220
2221 /*
2222  * copy the acounting information between qgroups. This is necessary when a
2223  * snapshot or a subvolume is created
2224  */
2225 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
2226                          struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
2227                          struct btrfs_qgroup_inherit *inherit)
2228 {
2229         int ret = 0;
2230         int i;
2231         u64 *i_qgroups;
2232         struct btrfs_root *quota_root = fs_info->quota_root;
2233         struct btrfs_qgroup *srcgroup;
2234         struct btrfs_qgroup *dstgroup;
2235         u32 level_size = 0;
2236         u64 nums;
2237
2238         mutex_lock(&fs_info->qgroup_ioctl_lock);
2239         if (!fs_info->quota_enabled)
2240                 goto out;
2241
2242         if (!quota_root) {
2243                 ret = -EINVAL;
2244                 goto out;
2245         }
2246
2247         if (inherit) {
2248                 i_qgroups = (u64 *)(inherit + 1);
2249                 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
2250                        2 * inherit->num_excl_copies;
2251                 for (i = 0; i < nums; ++i) {
2252                         srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
2253                         if (!srcgroup) {
2254                                 ret = -EINVAL;
2255                                 goto out;
2256                         }
2257
2258                         if ((srcgroup->qgroupid >> 48) <= (objectid >> 48)) {
2259                                 ret = -EINVAL;
2260                                 goto out;
2261                         }
2262                         ++i_qgroups;
2263                 }
2264         }
2265
2266         /*
2267          * create a tracking group for the subvol itself
2268          */
2269         ret = add_qgroup_item(trans, quota_root, objectid);
2270         if (ret)
2271                 goto out;
2272
2273         if (srcid) {
2274                 struct btrfs_root *srcroot;
2275                 struct btrfs_key srckey;
2276
2277                 srckey.objectid = srcid;
2278                 srckey.type = BTRFS_ROOT_ITEM_KEY;
2279                 srckey.offset = (u64)-1;
2280                 srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey);
2281                 if (IS_ERR(srcroot)) {
2282                         ret = PTR_ERR(srcroot);
2283                         goto out;
2284                 }
2285
2286                 rcu_read_lock();
2287                 level_size = srcroot->nodesize;
2288                 rcu_read_unlock();
2289         }
2290
2291         /*
2292          * add qgroup to all inherited groups
2293          */
2294         if (inherit) {
2295                 i_qgroups = (u64 *)(inherit + 1);
2296                 for (i = 0; i < inherit->num_qgroups; ++i) {
2297                         ret = add_qgroup_relation_item(trans, quota_root,
2298                                                        objectid, *i_qgroups);
2299                         if (ret)
2300                                 goto out;
2301                         ret = add_qgroup_relation_item(trans, quota_root,
2302                                                        *i_qgroups, objectid);
2303                         if (ret)
2304                                 goto out;
2305                         ++i_qgroups;
2306                 }
2307         }
2308
2309
2310         spin_lock(&fs_info->qgroup_lock);
2311
2312         dstgroup = add_qgroup_rb(fs_info, objectid);
2313         if (IS_ERR(dstgroup)) {
2314                 ret = PTR_ERR(dstgroup);
2315                 goto unlock;
2316         }
2317
2318         if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
2319                 dstgroup->lim_flags = inherit->lim.flags;
2320                 dstgroup->max_rfer = inherit->lim.max_rfer;
2321                 dstgroup->max_excl = inherit->lim.max_excl;
2322                 dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
2323                 dstgroup->rsv_excl = inherit->lim.rsv_excl;
2324
2325                 ret = update_qgroup_limit_item(trans, quota_root, dstgroup);
2326                 if (ret) {
2327                         fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2328                         btrfs_info(fs_info, "unable to update quota limit for %llu",
2329                                dstgroup->qgroupid);
2330                         goto unlock;
2331                 }
2332         }
2333
2334         if (srcid) {
2335                 srcgroup = find_qgroup_rb(fs_info, srcid);
2336                 if (!srcgroup)
2337                         goto unlock;
2338
2339                 /*
2340                  * We call inherit after we clone the root in order to make sure
2341                  * our counts don't go crazy, so at this point the only
2342                  * difference between the two roots should be the root node.
2343                  */
2344                 dstgroup->rfer = srcgroup->rfer;
2345                 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr;
2346                 dstgroup->excl = level_size;
2347                 dstgroup->excl_cmpr = level_size;
2348                 srcgroup->excl = level_size;
2349                 srcgroup->excl_cmpr = level_size;
2350
2351                 /* inherit the limit info */
2352                 dstgroup->lim_flags = srcgroup->lim_flags;
2353                 dstgroup->max_rfer = srcgroup->max_rfer;
2354                 dstgroup->max_excl = srcgroup->max_excl;
2355                 dstgroup->rsv_rfer = srcgroup->rsv_rfer;
2356                 dstgroup->rsv_excl = srcgroup->rsv_excl;
2357
2358                 qgroup_dirty(fs_info, dstgroup);
2359                 qgroup_dirty(fs_info, srcgroup);
2360         }
2361
2362         if (!inherit)
2363                 goto unlock;
2364
2365         i_qgroups = (u64 *)(inherit + 1);
2366         for (i = 0; i < inherit->num_qgroups; ++i) {
2367                 ret = add_relation_rb(quota_root->fs_info, objectid,
2368                                       *i_qgroups);
2369                 if (ret)
2370                         goto unlock;
2371                 ++i_qgroups;
2372         }
2373
2374         for (i = 0; i <  inherit->num_ref_copies; ++i) {
2375                 struct btrfs_qgroup *src;
2376                 struct btrfs_qgroup *dst;
2377
2378                 src = find_qgroup_rb(fs_info, i_qgroups[0]);
2379                 dst = find_qgroup_rb(fs_info, i_qgroups[1]);
2380
2381                 if (!src || !dst) {
2382                         ret = -EINVAL;
2383                         goto unlock;
2384                 }
2385
2386                 dst->rfer = src->rfer - level_size;
2387                 dst->rfer_cmpr = src->rfer_cmpr - level_size;
2388                 i_qgroups += 2;
2389         }
2390         for (i = 0; i <  inherit->num_excl_copies; ++i) {
2391                 struct btrfs_qgroup *src;
2392                 struct btrfs_qgroup *dst;
2393
2394                 src = find_qgroup_rb(fs_info, i_qgroups[0]);
2395                 dst = find_qgroup_rb(fs_info, i_qgroups[1]);
2396
2397                 if (!src || !dst) {
2398                         ret = -EINVAL;
2399                         goto unlock;
2400                 }
2401
2402                 dst->excl = src->excl + level_size;
2403                 dst->excl_cmpr = src->excl_cmpr + level_size;
2404                 i_qgroups += 2;
2405         }
2406
2407 unlock:
2408         spin_unlock(&fs_info->qgroup_lock);
2409 out:
2410         mutex_unlock(&fs_info->qgroup_ioctl_lock);
2411         return ret;
2412 }
2413
2414 int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
2415 {
2416         struct btrfs_root *quota_root;
2417         struct btrfs_qgroup *qgroup;
2418         struct btrfs_fs_info *fs_info = root->fs_info;
2419         u64 ref_root = root->root_key.objectid;
2420         int ret = 0;
2421         struct ulist_node *unode;
2422         struct ulist_iterator uiter;
2423
2424         if (!is_fstree(ref_root))
2425                 return 0;
2426
2427         if (num_bytes == 0)
2428                 return 0;
2429
2430         spin_lock(&fs_info->qgroup_lock);
2431         quota_root = fs_info->quota_root;
2432         if (!quota_root)
2433                 goto out;
2434
2435         qgroup = find_qgroup_rb(fs_info, ref_root);
2436         if (!qgroup)
2437                 goto out;
2438
2439         /*
2440          * in a first step, we check all affected qgroups if any limits would
2441          * be exceeded
2442          */
2443         ulist_reinit(fs_info->qgroup_ulist);
2444         ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
2445                         (uintptr_t)qgroup, GFP_ATOMIC);
2446         if (ret < 0)
2447                 goto out;
2448         ULIST_ITER_INIT(&uiter);
2449         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
2450                 struct btrfs_qgroup *qg;
2451                 struct btrfs_qgroup_list *glist;
2452
2453                 qg = u64_to_ptr(unode->aux);
2454
2455                 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
2456                     qg->reserved + (s64)qg->rfer + num_bytes >
2457                     qg->max_rfer) {
2458                         ret = -EDQUOT;
2459                         goto out;
2460                 }
2461
2462                 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
2463                     qg->reserved + (s64)qg->excl + num_bytes >
2464                     qg->max_excl) {
2465                         ret = -EDQUOT;
2466                         goto out;
2467                 }
2468
2469                 list_for_each_entry(glist, &qg->groups, next_group) {
2470                         ret = ulist_add(fs_info->qgroup_ulist,
2471                                         glist->group->qgroupid,
2472                                         (uintptr_t)glist->group, GFP_ATOMIC);
2473                         if (ret < 0)
2474                                 goto out;
2475                 }
2476         }
2477         ret = 0;
2478         /*
2479          * no limits exceeded, now record the reservation into all qgroups
2480          */
2481         ULIST_ITER_INIT(&uiter);
2482         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
2483                 struct btrfs_qgroup *qg;
2484
2485                 qg = u64_to_ptr(unode->aux);
2486
2487                 qg->reserved += num_bytes;
2488         }
2489
2490 out:
2491         spin_unlock(&fs_info->qgroup_lock);
2492         return ret;
2493 }
2494
2495 void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
2496 {
2497         struct btrfs_root *quota_root;
2498         struct btrfs_qgroup *qgroup;
2499         struct btrfs_fs_info *fs_info = root->fs_info;
2500         struct ulist_node *unode;
2501         struct ulist_iterator uiter;
2502         u64 ref_root = root->root_key.objectid;
2503         int ret = 0;
2504
2505         if (!is_fstree(ref_root))
2506                 return;
2507
2508         if (num_bytes == 0)
2509                 return;
2510
2511         spin_lock(&fs_info->qgroup_lock);
2512
2513         quota_root = fs_info->quota_root;
2514         if (!quota_root)
2515                 goto out;
2516
2517         qgroup = find_qgroup_rb(fs_info, ref_root);
2518         if (!qgroup)
2519                 goto out;
2520
2521         ulist_reinit(fs_info->qgroup_ulist);
2522         ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
2523                         (uintptr_t)qgroup, GFP_ATOMIC);
2524         if (ret < 0)
2525                 goto out;
2526         ULIST_ITER_INIT(&uiter);
2527         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
2528                 struct btrfs_qgroup *qg;
2529                 struct btrfs_qgroup_list *glist;
2530
2531                 qg = u64_to_ptr(unode->aux);
2532
2533                 qg->reserved -= num_bytes;
2534
2535                 list_for_each_entry(glist, &qg->groups, next_group) {
2536                         ret = ulist_add(fs_info->qgroup_ulist,
2537                                         glist->group->qgroupid,
2538                                         (uintptr_t)glist->group, GFP_ATOMIC);
2539                         if (ret < 0)
2540                                 goto out;
2541                 }
2542         }
2543
2544 out:
2545         spin_unlock(&fs_info->qgroup_lock);
2546 }
2547
2548 void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
2549 {
2550         if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
2551                 return;
2552         btrfs_err(trans->root->fs_info,
2553                 "qgroups not uptodate in trans handle %p:  list is%s empty, "
2554                 "seq is %#x.%x",
2555                 trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
2556                 (u32)(trans->delayed_ref_elem.seq >> 32),
2557                 (u32)trans->delayed_ref_elem.seq);
2558         BUG();
2559 }
2560
2561 /*
2562  * returns < 0 on error, 0 when more leafs are to be scanned.
2563  * returns 1 when done.
2564  */
2565 static int
2566 qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
2567                    struct btrfs_trans_handle *trans, struct ulist *qgroups,
2568                    struct ulist *tmp, struct extent_buffer *scratch_leaf)
2569 {
2570         struct btrfs_key found;
2571         struct ulist *roots = NULL;
2572         struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
2573         u64 num_bytes;
2574         u64 seq;
2575         int new_roots;
2576         int slot;
2577         int ret;
2578
2579         path->leave_spinning = 1;
2580         mutex_lock(&fs_info->qgroup_rescan_lock);
2581         ret = btrfs_search_slot_for_read(fs_info->extent_root,
2582                                          &fs_info->qgroup_rescan_progress,
2583                                          path, 1, 0);
2584
2585         pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
2586                  fs_info->qgroup_rescan_progress.objectid,
2587                  fs_info->qgroup_rescan_progress.type,
2588                  fs_info->qgroup_rescan_progress.offset, ret);
2589
2590         if (ret) {
2591                 /*
2592                  * The rescan is about to end, we will not be scanning any
2593                  * further blocks. We cannot unset the RESCAN flag here, because
2594                  * we want to commit the transaction if everything went well.
2595                  * To make the live accounting work in this phase, we set our
2596                  * scan progress pointer such that every real extent objectid
2597                  * will be smaller.
2598                  */
2599                 fs_info->qgroup_rescan_progress.objectid = (u64)-1;
2600                 btrfs_release_path(path);
2601                 mutex_unlock(&fs_info->qgroup_rescan_lock);
2602                 return ret;
2603         }
2604
2605         btrfs_item_key_to_cpu(path->nodes[0], &found,
2606                               btrfs_header_nritems(path->nodes[0]) - 1);
2607         fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
2608
2609         btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
2610         memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
2611         slot = path->slots[0];
2612         btrfs_release_path(path);
2613         mutex_unlock(&fs_info->qgroup_rescan_lock);
2614
2615         for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
2616                 btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
2617                 if (found.type != BTRFS_EXTENT_ITEM_KEY &&
2618                     found.type != BTRFS_METADATA_ITEM_KEY)
2619                         continue;
2620                 if (found.type == BTRFS_METADATA_ITEM_KEY)
2621                         num_bytes = fs_info->extent_root->nodesize;
2622                 else
2623                         num_bytes = found.offset;
2624
2625                 ulist_reinit(qgroups);
2626                 ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
2627                                            &roots);
2628                 if (ret < 0)
2629                         goto out;
2630                 spin_lock(&fs_info->qgroup_lock);
2631                 seq = fs_info->qgroup_seq;
2632                 fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
2633
2634                 new_roots = 0;
2635                 ret = qgroup_calc_old_refcnt(fs_info, 0, tmp, roots, qgroups,
2636                                              seq, &new_roots, 1);
2637                 if (ret < 0) {
2638                         spin_unlock(&fs_info->qgroup_lock);
2639                         ulist_free(roots);
2640                         goto out;
2641                 }
2642
2643                 ret = qgroup_adjust_counters(fs_info, 0, num_bytes, qgroups,
2644                                              seq, 0, new_roots, 1);
2645                 if (ret < 0) {
2646                         spin_unlock(&fs_info->qgroup_lock);
2647                         ulist_free(roots);
2648                         goto out;
2649                 }
2650                 spin_unlock(&fs_info->qgroup_lock);
2651                 ulist_free(roots);
2652         }
2653 out:
2654         btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
2655
2656         return ret;
2657 }
2658
2659 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
2660 {
2661         struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info,
2662                                                      qgroup_rescan_work);
2663         struct btrfs_path *path;
2664         struct btrfs_trans_handle *trans = NULL;
2665         struct ulist *tmp = NULL, *qgroups = NULL;
2666         struct extent_buffer *scratch_leaf = NULL;
2667         int err = -ENOMEM;
2668         int ret = 0;
2669
2670         path = btrfs_alloc_path();
2671         if (!path)
2672                 goto out;
2673         qgroups = ulist_alloc(GFP_NOFS);
2674         if (!qgroups)
2675                 goto out;
2676         tmp = ulist_alloc(GFP_NOFS);
2677         if (!tmp)
2678                 goto out;
2679         scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
2680         if (!scratch_leaf)
2681                 goto out;
2682
2683         err = 0;
2684         while (!err) {
2685                 trans = btrfs_start_transaction(fs_info->fs_root, 0);
2686                 if (IS_ERR(trans)) {
2687                         err = PTR_ERR(trans);
2688                         break;
2689                 }
2690                 if (!fs_info->quota_enabled) {
2691                         err = -EINTR;
2692                 } else {
2693                         err = qgroup_rescan_leaf(fs_info, path, trans,
2694                                                  qgroups, tmp, scratch_leaf);
2695                 }
2696                 if (err > 0)
2697                         btrfs_commit_transaction(trans, fs_info->fs_root);
2698                 else
2699                         btrfs_end_transaction(trans, fs_info->fs_root);
2700         }
2701
2702 out:
2703         kfree(scratch_leaf);
2704         ulist_free(qgroups);
2705         ulist_free(tmp);
2706         btrfs_free_path(path);
2707
2708         mutex_lock(&fs_info->qgroup_rescan_lock);
2709         fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2710
2711         if (err > 0 &&
2712             fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
2713                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2714         } else if (err < 0) {
2715                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2716         }
2717         mutex_unlock(&fs_info->qgroup_rescan_lock);
2718
2719         /*
2720          * only update status, since the previous part has alreay updated the
2721          * qgroup info.
2722          */
2723         trans = btrfs_start_transaction(fs_info->quota_root, 1);
2724         if (IS_ERR(trans)) {
2725                 err = PTR_ERR(trans);
2726                 btrfs_err(fs_info,
2727                           "fail to start transaction for status update: %d\n",
2728                           err);
2729                 goto done;
2730         }
2731         ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root);
2732         if (ret < 0) {
2733                 err = ret;
2734                 btrfs_err(fs_info, "fail to update qgroup status: %d\n",
2735                           err);
2736                 btrfs_abort_transaction(trans, fs_info->quota_root, err);
2737                 goto done;
2738         }
2739         btrfs_end_transaction(trans, fs_info->quota_root);
2740
2741         if (err >= 0) {
2742                 btrfs_info(fs_info, "qgroup scan completed%s",
2743                         err > 0 ? " (inconsistency flag cleared)" : "");
2744         } else {
2745                 btrfs_err(fs_info, "qgroup scan failed with %d", err);
2746         }
2747
2748 done:
2749         complete_all(&fs_info->qgroup_rescan_completion);
2750 }
2751
2752 /*
2753  * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all
2754  * memory required for the rescan context.
2755  */
2756 static int
2757 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
2758                    int init_flags)
2759 {
2760         int ret = 0;
2761
2762         if (!init_flags &&
2763             (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) ||
2764              !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) {
2765                 ret = -EINVAL;
2766                 goto err;
2767         }
2768
2769         mutex_lock(&fs_info->qgroup_rescan_lock);
2770         spin_lock(&fs_info->qgroup_lock);
2771
2772         if (init_flags) {
2773                 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2774                         ret = -EINPROGRESS;
2775                 else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
2776                         ret = -EINVAL;
2777
2778                 if (ret) {
2779                         spin_unlock(&fs_info->qgroup_lock);
2780                         mutex_unlock(&fs_info->qgroup_rescan_lock);
2781                         goto err;
2782                 }
2783                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2784         }
2785
2786         memset(&fs_info->qgroup_rescan_progress, 0,
2787                 sizeof(fs_info->qgroup_rescan_progress));
2788         fs_info->qgroup_rescan_progress.objectid = progress_objectid;
2789
2790         spin_unlock(&fs_info->qgroup_lock);
2791         mutex_unlock(&fs_info->qgroup_rescan_lock);
2792
2793         init_completion(&fs_info->qgroup_rescan_completion);
2794
2795         memset(&fs_info->qgroup_rescan_work, 0,
2796                sizeof(fs_info->qgroup_rescan_work));
2797         btrfs_init_work(&fs_info->qgroup_rescan_work,
2798                         btrfs_qgroup_rescan_helper,
2799                         btrfs_qgroup_rescan_worker, NULL, NULL);
2800
2801         if (ret) {
2802 err:
2803                 btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret);
2804                 return ret;
2805         }
2806
2807         return 0;
2808 }
2809
2810 static void
2811 qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info)
2812 {
2813         struct rb_node *n;
2814         struct btrfs_qgroup *qgroup;
2815
2816         spin_lock(&fs_info->qgroup_lock);
2817         /* clear all current qgroup tracking information */
2818         for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
2819                 qgroup = rb_entry(n, struct btrfs_qgroup, node);
2820                 qgroup->rfer = 0;
2821                 qgroup->rfer_cmpr = 0;
2822                 qgroup->excl = 0;
2823                 qgroup->excl_cmpr = 0;
2824         }
2825         spin_unlock(&fs_info->qgroup_lock);
2826 }
2827
2828 int
2829 btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
2830 {
2831         int ret = 0;
2832         struct btrfs_trans_handle *trans;
2833
2834         ret = qgroup_rescan_init(fs_info, 0, 1);
2835         if (ret)
2836                 return ret;
2837
2838         /*
2839          * We have set the rescan_progress to 0, which means no more
2840          * delayed refs will be accounted by btrfs_qgroup_account_ref.
2841          * However, btrfs_qgroup_account_ref may be right after its call
2842          * to btrfs_find_all_roots, in which case it would still do the
2843          * accounting.
2844          * To solve this, we're committing the transaction, which will
2845          * ensure we run all delayed refs and only after that, we are
2846          * going to clear all tracking information for a clean start.
2847          */
2848
2849         trans = btrfs_join_transaction(fs_info->fs_root);
2850         if (IS_ERR(trans)) {
2851                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2852                 return PTR_ERR(trans);
2853         }
2854         ret = btrfs_commit_transaction(trans, fs_info->fs_root);
2855         if (ret) {
2856                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2857                 return ret;
2858         }
2859
2860         qgroup_rescan_zero_tracking(fs_info);
2861
2862         btrfs_queue_work(fs_info->qgroup_rescan_workers,
2863                          &fs_info->qgroup_rescan_work);
2864
2865         return 0;
2866 }
2867
2868 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info)
2869 {
2870         int running;
2871         int ret = 0;
2872
2873         mutex_lock(&fs_info->qgroup_rescan_lock);
2874         spin_lock(&fs_info->qgroup_lock);
2875         running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2876         spin_unlock(&fs_info->qgroup_lock);
2877         mutex_unlock(&fs_info->qgroup_rescan_lock);
2878
2879         if (running)
2880                 ret = wait_for_completion_interruptible(
2881                                         &fs_info->qgroup_rescan_completion);
2882
2883         return ret;
2884 }
2885
2886 /*
2887  * this is only called from open_ctree where we're still single threaded, thus
2888  * locking is omitted here.
2889  */
2890 void
2891 btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
2892 {
2893         if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2894                 btrfs_queue_work(fs_info->qgroup_rescan_workers,
2895                                  &fs_info->qgroup_rescan_work);
2896 }