]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - fs/btrfs/qgroup.c
81773176d01f6b90db1b8ac1279d764d775b7ee5
[karo-tx-linux.git] / fs / btrfs / qgroup.c
1 /*
2  * Copyright (C) 2011 STRATO.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include <linux/pagemap.h>
21 #include <linux/writeback.h>
22 #include <linux/blkdev.h>
23 #include <linux/rbtree.h>
24 #include <linux/slab.h>
25 #include <linux/workqueue.h>
26 #include <linux/btrfs.h>
27
28 #include "ctree.h"
29 #include "transaction.h"
30 #include "disk-io.h"
31 #include "locking.h"
32 #include "ulist.h"
33 #include "backref.h"
34 #include "extent_io.h"
35 #include "qgroup.h"
36
37 /* TODO XXX FIXME
38  *  - subvol delete -> delete when ref goes to 0? delete limits also?
39  *  - reorganize keys
40  *  - compressed
41  *  - sync
42  *  - copy also limits on subvol creation
43  *  - limit
44  *  - caches fuer ulists
45  *  - performance benchmarks
46  *  - check all ioctl parameters
47  */
48
49 /*
50  * one struct for each qgroup, organized in fs_info->qgroup_tree.
51  */
52 struct btrfs_qgroup {
53         u64 qgroupid;
54
55         /*
56          * state
57          */
58         u64 rfer;       /* referenced */
59         u64 rfer_cmpr;  /* referenced compressed */
60         u64 excl;       /* exclusive */
61         u64 excl_cmpr;  /* exclusive compressed */
62
63         /*
64          * limits
65          */
66         u64 lim_flags;  /* which limits are set */
67         u64 max_rfer;
68         u64 max_excl;
69         u64 rsv_rfer;
70         u64 rsv_excl;
71
72         /*
73          * reservation tracking
74          */
75         u64 reserved;
76
77         /*
78          * lists
79          */
80         struct list_head groups;  /* groups this group is member of */
81         struct list_head members; /* groups that are members of this group */
82         struct list_head dirty;   /* dirty groups */
83         struct rb_node node;      /* tree of qgroups */
84
85         /*
86          * temp variables for accounting operations
87          * Refer to qgroup_shared_accouting() for details.
88          */
89         u64 old_refcnt;
90         u64 new_refcnt;
91 };
92
93 static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq,
94                                            int mod)
95 {
96         if (qg->old_refcnt < seq)
97                 qg->old_refcnt = seq;
98         qg->old_refcnt += mod;
99 }
100
101 static void btrfs_qgroup_update_new_refcnt(struct btrfs_qgroup *qg, u64 seq,
102                                            int mod)
103 {
104         if (qg->new_refcnt < seq)
105                 qg->new_refcnt = seq;
106         qg->new_refcnt += mod;
107 }
108
109 static inline u64 btrfs_qgroup_get_old_refcnt(struct btrfs_qgroup *qg, u64 seq)
110 {
111         if (qg->old_refcnt < seq)
112                 return 0;
113         return qg->old_refcnt - seq;
114 }
115
116 static inline u64 btrfs_qgroup_get_new_refcnt(struct btrfs_qgroup *qg, u64 seq)
117 {
118         if (qg->new_refcnt < seq)
119                 return 0;
120         return qg->new_refcnt - seq;
121 }
122
123 /*
124  * glue structure to represent the relations between qgroups.
125  */
126 struct btrfs_qgroup_list {
127         struct list_head next_group;
128         struct list_head next_member;
129         struct btrfs_qgroup *group;
130         struct btrfs_qgroup *member;
131 };
132
133 #define ptr_to_u64(x) ((u64)(uintptr_t)x)
134 #define u64_to_ptr(x) ((struct btrfs_qgroup *)(uintptr_t)x)
135
136 static int
137 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
138                    int init_flags);
139 static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info);
140
141 /* must be called with qgroup_ioctl_lock held */
142 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
143                                            u64 qgroupid)
144 {
145         struct rb_node *n = fs_info->qgroup_tree.rb_node;
146         struct btrfs_qgroup *qgroup;
147
148         while (n) {
149                 qgroup = rb_entry(n, struct btrfs_qgroup, node);
150                 if (qgroup->qgroupid < qgroupid)
151                         n = n->rb_left;
152                 else if (qgroup->qgroupid > qgroupid)
153                         n = n->rb_right;
154                 else
155                         return qgroup;
156         }
157         return NULL;
158 }
159
160 /* must be called with qgroup_lock held */
161 static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
162                                           u64 qgroupid)
163 {
164         struct rb_node **p = &fs_info->qgroup_tree.rb_node;
165         struct rb_node *parent = NULL;
166         struct btrfs_qgroup *qgroup;
167
168         while (*p) {
169                 parent = *p;
170                 qgroup = rb_entry(parent, struct btrfs_qgroup, node);
171
172                 if (qgroup->qgroupid < qgroupid)
173                         p = &(*p)->rb_left;
174                 else if (qgroup->qgroupid > qgroupid)
175                         p = &(*p)->rb_right;
176                 else
177                         return qgroup;
178         }
179
180         qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC);
181         if (!qgroup)
182                 return ERR_PTR(-ENOMEM);
183
184         qgroup->qgroupid = qgroupid;
185         INIT_LIST_HEAD(&qgroup->groups);
186         INIT_LIST_HEAD(&qgroup->members);
187         INIT_LIST_HEAD(&qgroup->dirty);
188
189         rb_link_node(&qgroup->node, parent, p);
190         rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
191
192         return qgroup;
193 }
194
195 static void __del_qgroup_rb(struct btrfs_qgroup *qgroup)
196 {
197         struct btrfs_qgroup_list *list;
198
199         list_del(&qgroup->dirty);
200         while (!list_empty(&qgroup->groups)) {
201                 list = list_first_entry(&qgroup->groups,
202                                         struct btrfs_qgroup_list, next_group);
203                 list_del(&list->next_group);
204                 list_del(&list->next_member);
205                 kfree(list);
206         }
207
208         while (!list_empty(&qgroup->members)) {
209                 list = list_first_entry(&qgroup->members,
210                                         struct btrfs_qgroup_list, next_member);
211                 list_del(&list->next_group);
212                 list_del(&list->next_member);
213                 kfree(list);
214         }
215         kfree(qgroup);
216 }
217
218 /* must be called with qgroup_lock held */
219 static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
220 {
221         struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
222
223         if (!qgroup)
224                 return -ENOENT;
225
226         rb_erase(&qgroup->node, &fs_info->qgroup_tree);
227         __del_qgroup_rb(qgroup);
228         return 0;
229 }
230
231 /* must be called with qgroup_lock held */
232 static int add_relation_rb(struct btrfs_fs_info *fs_info,
233                            u64 memberid, u64 parentid)
234 {
235         struct btrfs_qgroup *member;
236         struct btrfs_qgroup *parent;
237         struct btrfs_qgroup_list *list;
238
239         member = find_qgroup_rb(fs_info, memberid);
240         parent = find_qgroup_rb(fs_info, parentid);
241         if (!member || !parent)
242                 return -ENOENT;
243
244         list = kzalloc(sizeof(*list), GFP_ATOMIC);
245         if (!list)
246                 return -ENOMEM;
247
248         list->group = parent;
249         list->member = member;
250         list_add_tail(&list->next_group, &member->groups);
251         list_add_tail(&list->next_member, &parent->members);
252
253         return 0;
254 }
255
256 /* must be called with qgroup_lock held */
257 static int del_relation_rb(struct btrfs_fs_info *fs_info,
258                            u64 memberid, u64 parentid)
259 {
260         struct btrfs_qgroup *member;
261         struct btrfs_qgroup *parent;
262         struct btrfs_qgroup_list *list;
263
264         member = find_qgroup_rb(fs_info, memberid);
265         parent = find_qgroup_rb(fs_info, parentid);
266         if (!member || !parent)
267                 return -ENOENT;
268
269         list_for_each_entry(list, &member->groups, next_group) {
270                 if (list->group == parent) {
271                         list_del(&list->next_group);
272                         list_del(&list->next_member);
273                         kfree(list);
274                         return 0;
275                 }
276         }
277         return -ENOENT;
278 }
279
280 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
281 int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
282                                u64 rfer, u64 excl)
283 {
284         struct btrfs_qgroup *qgroup;
285
286         qgroup = find_qgroup_rb(fs_info, qgroupid);
287         if (!qgroup)
288                 return -EINVAL;
289         if (qgroup->rfer != rfer || qgroup->excl != excl)
290                 return -EINVAL;
291         return 0;
292 }
293 #endif
294
295 /*
296  * The full config is read in one go, only called from open_ctree()
297  * It doesn't use any locking, as at this point we're still single-threaded
298  */
299 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
300 {
301         struct btrfs_key key;
302         struct btrfs_key found_key;
303         struct btrfs_root *quota_root = fs_info->quota_root;
304         struct btrfs_path *path = NULL;
305         struct extent_buffer *l;
306         int slot;
307         int ret = 0;
308         u64 flags = 0;
309         u64 rescan_progress = 0;
310
311         if (!fs_info->quota_enabled)
312                 return 0;
313
314         fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
315         if (!fs_info->qgroup_ulist) {
316                 ret = -ENOMEM;
317                 goto out;
318         }
319
320         path = btrfs_alloc_path();
321         if (!path) {
322                 ret = -ENOMEM;
323                 goto out;
324         }
325
326         /* default this to quota off, in case no status key is found */
327         fs_info->qgroup_flags = 0;
328
329         /*
330          * pass 1: read status, all qgroup infos and limits
331          */
332         key.objectid = 0;
333         key.type = 0;
334         key.offset = 0;
335         ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1);
336         if (ret)
337                 goto out;
338
339         while (1) {
340                 struct btrfs_qgroup *qgroup;
341
342                 slot = path->slots[0];
343                 l = path->nodes[0];
344                 btrfs_item_key_to_cpu(l, &found_key, slot);
345
346                 if (found_key.type == BTRFS_QGROUP_STATUS_KEY) {
347                         struct btrfs_qgroup_status_item *ptr;
348
349                         ptr = btrfs_item_ptr(l, slot,
350                                              struct btrfs_qgroup_status_item);
351
352                         if (btrfs_qgroup_status_version(l, ptr) !=
353                             BTRFS_QGROUP_STATUS_VERSION) {
354                                 btrfs_err(fs_info,
355                                  "old qgroup version, quota disabled");
356                                 goto out;
357                         }
358                         if (btrfs_qgroup_status_generation(l, ptr) !=
359                             fs_info->generation) {
360                                 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
361                                 btrfs_err(fs_info,
362                                         "qgroup generation mismatch, "
363                                         "marked as inconsistent");
364                         }
365                         fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
366                                                                           ptr);
367                         rescan_progress = btrfs_qgroup_status_rescan(l, ptr);
368                         goto next1;
369                 }
370
371                 if (found_key.type != BTRFS_QGROUP_INFO_KEY &&
372                     found_key.type != BTRFS_QGROUP_LIMIT_KEY)
373                         goto next1;
374
375                 qgroup = find_qgroup_rb(fs_info, found_key.offset);
376                 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
377                     (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
378                         btrfs_err(fs_info, "inconsitent qgroup config");
379                         flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
380                 }
381                 if (!qgroup) {
382                         qgroup = add_qgroup_rb(fs_info, found_key.offset);
383                         if (IS_ERR(qgroup)) {
384                                 ret = PTR_ERR(qgroup);
385                                 goto out;
386                         }
387                 }
388                 switch (found_key.type) {
389                 case BTRFS_QGROUP_INFO_KEY: {
390                         struct btrfs_qgroup_info_item *ptr;
391
392                         ptr = btrfs_item_ptr(l, slot,
393                                              struct btrfs_qgroup_info_item);
394                         qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr);
395                         qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr);
396                         qgroup->excl = btrfs_qgroup_info_excl(l, ptr);
397                         qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr);
398                         /* generation currently unused */
399                         break;
400                 }
401                 case BTRFS_QGROUP_LIMIT_KEY: {
402                         struct btrfs_qgroup_limit_item *ptr;
403
404                         ptr = btrfs_item_ptr(l, slot,
405                                              struct btrfs_qgroup_limit_item);
406                         qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr);
407                         qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr);
408                         qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr);
409                         qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr);
410                         qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr);
411                         break;
412                 }
413                 }
414 next1:
415                 ret = btrfs_next_item(quota_root, path);
416                 if (ret < 0)
417                         goto out;
418                 if (ret)
419                         break;
420         }
421         btrfs_release_path(path);
422
423         /*
424          * pass 2: read all qgroup relations
425          */
426         key.objectid = 0;
427         key.type = BTRFS_QGROUP_RELATION_KEY;
428         key.offset = 0;
429         ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0);
430         if (ret)
431                 goto out;
432         while (1) {
433                 slot = path->slots[0];
434                 l = path->nodes[0];
435                 btrfs_item_key_to_cpu(l, &found_key, slot);
436
437                 if (found_key.type != BTRFS_QGROUP_RELATION_KEY)
438                         goto next2;
439
440                 if (found_key.objectid > found_key.offset) {
441                         /* parent <- member, not needed to build config */
442                         /* FIXME should we omit the key completely? */
443                         goto next2;
444                 }
445
446                 ret = add_relation_rb(fs_info, found_key.objectid,
447                                       found_key.offset);
448                 if (ret == -ENOENT) {
449                         btrfs_warn(fs_info,
450                                 "orphan qgroup relation 0x%llx->0x%llx",
451                                 found_key.objectid, found_key.offset);
452                         ret = 0;        /* ignore the error */
453                 }
454                 if (ret)
455                         goto out;
456 next2:
457                 ret = btrfs_next_item(quota_root, path);
458                 if (ret < 0)
459                         goto out;
460                 if (ret)
461                         break;
462         }
463 out:
464         fs_info->qgroup_flags |= flags;
465         if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) {
466                 fs_info->quota_enabled = 0;
467                 fs_info->pending_quota_state = 0;
468         } else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
469                    ret >= 0) {
470                 ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
471         }
472         btrfs_free_path(path);
473
474         if (ret < 0) {
475                 ulist_free(fs_info->qgroup_ulist);
476                 fs_info->qgroup_ulist = NULL;
477                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
478         }
479
480         return ret < 0 ? ret : 0;
481 }
482
483 /*
484  * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
485  * first two are in single-threaded paths.And for the third one, we have set
486  * quota_root to be null with qgroup_lock held before, so it is safe to clean
487  * up the in-memory structures without qgroup_lock held.
488  */
489 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
490 {
491         struct rb_node *n;
492         struct btrfs_qgroup *qgroup;
493
494         while ((n = rb_first(&fs_info->qgroup_tree))) {
495                 qgroup = rb_entry(n, struct btrfs_qgroup, node);
496                 rb_erase(n, &fs_info->qgroup_tree);
497                 __del_qgroup_rb(qgroup);
498         }
499         /*
500          * we call btrfs_free_qgroup_config() when umounting
501          * filesystem and disabling quota, so we set qgroup_ulit
502          * to be null here to avoid double free.
503          */
504         ulist_free(fs_info->qgroup_ulist);
505         fs_info->qgroup_ulist = NULL;
506 }
507
508 static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
509                                     struct btrfs_root *quota_root,
510                                     u64 src, u64 dst)
511 {
512         int ret;
513         struct btrfs_path *path;
514         struct btrfs_key key;
515
516         path = btrfs_alloc_path();
517         if (!path)
518                 return -ENOMEM;
519
520         key.objectid = src;
521         key.type = BTRFS_QGROUP_RELATION_KEY;
522         key.offset = dst;
523
524         ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
525
526         btrfs_mark_buffer_dirty(path->nodes[0]);
527
528         btrfs_free_path(path);
529         return ret;
530 }
531
532 static int del_qgroup_relation_item(struct btrfs_trans_handle *trans,
533                                     struct btrfs_root *quota_root,
534                                     u64 src, u64 dst)
535 {
536         int ret;
537         struct btrfs_path *path;
538         struct btrfs_key key;
539
540         path = btrfs_alloc_path();
541         if (!path)
542                 return -ENOMEM;
543
544         key.objectid = src;
545         key.type = BTRFS_QGROUP_RELATION_KEY;
546         key.offset = dst;
547
548         ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
549         if (ret < 0)
550                 goto out;
551
552         if (ret > 0) {
553                 ret = -ENOENT;
554                 goto out;
555         }
556
557         ret = btrfs_del_item(trans, quota_root, path);
558 out:
559         btrfs_free_path(path);
560         return ret;
561 }
562
563 static int add_qgroup_item(struct btrfs_trans_handle *trans,
564                            struct btrfs_root *quota_root, u64 qgroupid)
565 {
566         int ret;
567         struct btrfs_path *path;
568         struct btrfs_qgroup_info_item *qgroup_info;
569         struct btrfs_qgroup_limit_item *qgroup_limit;
570         struct extent_buffer *leaf;
571         struct btrfs_key key;
572
573         if (btrfs_test_is_dummy_root(quota_root))
574                 return 0;
575
576         path = btrfs_alloc_path();
577         if (!path)
578                 return -ENOMEM;
579
580         key.objectid = 0;
581         key.type = BTRFS_QGROUP_INFO_KEY;
582         key.offset = qgroupid;
583
584         /*
585          * Avoid a transaction abort by catching -EEXIST here. In that
586          * case, we proceed by re-initializing the existing structure
587          * on disk.
588          */
589
590         ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
591                                       sizeof(*qgroup_info));
592         if (ret && ret != -EEXIST)
593                 goto out;
594
595         leaf = path->nodes[0];
596         qgroup_info = btrfs_item_ptr(leaf, path->slots[0],
597                                  struct btrfs_qgroup_info_item);
598         btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid);
599         btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0);
600         btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0);
601         btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0);
602         btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0);
603
604         btrfs_mark_buffer_dirty(leaf);
605
606         btrfs_release_path(path);
607
608         key.type = BTRFS_QGROUP_LIMIT_KEY;
609         ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
610                                       sizeof(*qgroup_limit));
611         if (ret && ret != -EEXIST)
612                 goto out;
613
614         leaf = path->nodes[0];
615         qgroup_limit = btrfs_item_ptr(leaf, path->slots[0],
616                                   struct btrfs_qgroup_limit_item);
617         btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0);
618         btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0);
619         btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0);
620         btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
621         btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
622
623         btrfs_mark_buffer_dirty(leaf);
624
625         ret = 0;
626 out:
627         btrfs_free_path(path);
628         return ret;
629 }
630
631 static int del_qgroup_item(struct btrfs_trans_handle *trans,
632                            struct btrfs_root *quota_root, u64 qgroupid)
633 {
634         int ret;
635         struct btrfs_path *path;
636         struct btrfs_key key;
637
638         path = btrfs_alloc_path();
639         if (!path)
640                 return -ENOMEM;
641
642         key.objectid = 0;
643         key.type = BTRFS_QGROUP_INFO_KEY;
644         key.offset = qgroupid;
645         ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
646         if (ret < 0)
647                 goto out;
648
649         if (ret > 0) {
650                 ret = -ENOENT;
651                 goto out;
652         }
653
654         ret = btrfs_del_item(trans, quota_root, path);
655         if (ret)
656                 goto out;
657
658         btrfs_release_path(path);
659
660         key.type = BTRFS_QGROUP_LIMIT_KEY;
661         ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
662         if (ret < 0)
663                 goto out;
664
665         if (ret > 0) {
666                 ret = -ENOENT;
667                 goto out;
668         }
669
670         ret = btrfs_del_item(trans, quota_root, path);
671
672 out:
673         btrfs_free_path(path);
674         return ret;
675 }
676
677 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
678                                     struct btrfs_root *root,
679                                     struct btrfs_qgroup *qgroup)
680 {
681         struct btrfs_path *path;
682         struct btrfs_key key;
683         struct extent_buffer *l;
684         struct btrfs_qgroup_limit_item *qgroup_limit;
685         int ret;
686         int slot;
687
688         key.objectid = 0;
689         key.type = BTRFS_QGROUP_LIMIT_KEY;
690         key.offset = qgroup->qgroupid;
691
692         path = btrfs_alloc_path();
693         if (!path)
694                 return -ENOMEM;
695
696         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
697         if (ret > 0)
698                 ret = -ENOENT;
699
700         if (ret)
701                 goto out;
702
703         l = path->nodes[0];
704         slot = path->slots[0];
705         qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item);
706         btrfs_set_qgroup_limit_flags(l, qgroup_limit, qgroup->lim_flags);
707         btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, qgroup->max_rfer);
708         btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl);
709         btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer);
710         btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl);
711
712         btrfs_mark_buffer_dirty(l);
713
714 out:
715         btrfs_free_path(path);
716         return ret;
717 }
718
719 static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
720                                    struct btrfs_root *root,
721                                    struct btrfs_qgroup *qgroup)
722 {
723         struct btrfs_path *path;
724         struct btrfs_key key;
725         struct extent_buffer *l;
726         struct btrfs_qgroup_info_item *qgroup_info;
727         int ret;
728         int slot;
729
730         if (btrfs_test_is_dummy_root(root))
731                 return 0;
732
733         key.objectid = 0;
734         key.type = BTRFS_QGROUP_INFO_KEY;
735         key.offset = qgroup->qgroupid;
736
737         path = btrfs_alloc_path();
738         if (!path)
739                 return -ENOMEM;
740
741         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
742         if (ret > 0)
743                 ret = -ENOENT;
744
745         if (ret)
746                 goto out;
747
748         l = path->nodes[0];
749         slot = path->slots[0];
750         qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item);
751         btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
752         btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
753         btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
754         btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
755         btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
756
757         btrfs_mark_buffer_dirty(l);
758
759 out:
760         btrfs_free_path(path);
761         return ret;
762 }
763
764 static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
765                                      struct btrfs_fs_info *fs_info,
766                                     struct btrfs_root *root)
767 {
768         struct btrfs_path *path;
769         struct btrfs_key key;
770         struct extent_buffer *l;
771         struct btrfs_qgroup_status_item *ptr;
772         int ret;
773         int slot;
774
775         key.objectid = 0;
776         key.type = BTRFS_QGROUP_STATUS_KEY;
777         key.offset = 0;
778
779         path = btrfs_alloc_path();
780         if (!path)
781                 return -ENOMEM;
782
783         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
784         if (ret > 0)
785                 ret = -ENOENT;
786
787         if (ret)
788                 goto out;
789
790         l = path->nodes[0];
791         slot = path->slots[0];
792         ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
793         btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
794         btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
795         btrfs_set_qgroup_status_rescan(l, ptr,
796                                 fs_info->qgroup_rescan_progress.objectid);
797
798         btrfs_mark_buffer_dirty(l);
799
800 out:
801         btrfs_free_path(path);
802         return ret;
803 }
804
805 /*
806  * called with qgroup_lock held
807  */
808 static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
809                                   struct btrfs_root *root)
810 {
811         struct btrfs_path *path;
812         struct btrfs_key key;
813         struct extent_buffer *leaf = NULL;
814         int ret;
815         int nr = 0;
816
817         path = btrfs_alloc_path();
818         if (!path)
819                 return -ENOMEM;
820
821         path->leave_spinning = 1;
822
823         key.objectid = 0;
824         key.offset = 0;
825         key.type = 0;
826
827         while (1) {
828                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
829                 if (ret < 0)
830                         goto out;
831                 leaf = path->nodes[0];
832                 nr = btrfs_header_nritems(leaf);
833                 if (!nr)
834                         break;
835                 /*
836                  * delete the leaf one by one
837                  * since the whole tree is going
838                  * to be deleted.
839                  */
840                 path->slots[0] = 0;
841                 ret = btrfs_del_items(trans, root, path, 0, nr);
842                 if (ret)
843                         goto out;
844
845                 btrfs_release_path(path);
846         }
847         ret = 0;
848 out:
849         root->fs_info->pending_quota_state = 0;
850         btrfs_free_path(path);
851         return ret;
852 }
853
854 int btrfs_quota_enable(struct btrfs_trans_handle *trans,
855                        struct btrfs_fs_info *fs_info)
856 {
857         struct btrfs_root *quota_root;
858         struct btrfs_root *tree_root = fs_info->tree_root;
859         struct btrfs_path *path = NULL;
860         struct btrfs_qgroup_status_item *ptr;
861         struct extent_buffer *leaf;
862         struct btrfs_key key;
863         struct btrfs_key found_key;
864         struct btrfs_qgroup *qgroup = NULL;
865         int ret = 0;
866         int slot;
867
868         mutex_lock(&fs_info->qgroup_ioctl_lock);
869         if (fs_info->quota_root) {
870                 fs_info->pending_quota_state = 1;
871                 goto out;
872         }
873
874         fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
875         if (!fs_info->qgroup_ulist) {
876                 ret = -ENOMEM;
877                 goto out;
878         }
879
880         /*
881          * initially create the quota tree
882          */
883         quota_root = btrfs_create_tree(trans, fs_info,
884                                        BTRFS_QUOTA_TREE_OBJECTID);
885         if (IS_ERR(quota_root)) {
886                 ret =  PTR_ERR(quota_root);
887                 goto out;
888         }
889
890         path = btrfs_alloc_path();
891         if (!path) {
892                 ret = -ENOMEM;
893                 goto out_free_root;
894         }
895
896         key.objectid = 0;
897         key.type = BTRFS_QGROUP_STATUS_KEY;
898         key.offset = 0;
899
900         ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
901                                       sizeof(*ptr));
902         if (ret)
903                 goto out_free_path;
904
905         leaf = path->nodes[0];
906         ptr = btrfs_item_ptr(leaf, path->slots[0],
907                                  struct btrfs_qgroup_status_item);
908         btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
909         btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
910         fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
911                                 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
912         btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
913         btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
914
915         btrfs_mark_buffer_dirty(leaf);
916
917         key.objectid = 0;
918         key.type = BTRFS_ROOT_REF_KEY;
919         key.offset = 0;
920
921         btrfs_release_path(path);
922         ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
923         if (ret > 0)
924                 goto out_add_root;
925         if (ret < 0)
926                 goto out_free_path;
927
928
929         while (1) {
930                 slot = path->slots[0];
931                 leaf = path->nodes[0];
932                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
933
934                 if (found_key.type == BTRFS_ROOT_REF_KEY) {
935                         ret = add_qgroup_item(trans, quota_root,
936                                               found_key.offset);
937                         if (ret)
938                                 goto out_free_path;
939
940                         qgroup = add_qgroup_rb(fs_info, found_key.offset);
941                         if (IS_ERR(qgroup)) {
942                                 ret = PTR_ERR(qgroup);
943                                 goto out_free_path;
944                         }
945                 }
946                 ret = btrfs_next_item(tree_root, path);
947                 if (ret < 0)
948                         goto out_free_path;
949                 if (ret)
950                         break;
951         }
952
953 out_add_root:
954         btrfs_release_path(path);
955         ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
956         if (ret)
957                 goto out_free_path;
958
959         qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
960         if (IS_ERR(qgroup)) {
961                 ret = PTR_ERR(qgroup);
962                 goto out_free_path;
963         }
964         spin_lock(&fs_info->qgroup_lock);
965         fs_info->quota_root = quota_root;
966         fs_info->pending_quota_state = 1;
967         spin_unlock(&fs_info->qgroup_lock);
968 out_free_path:
969         btrfs_free_path(path);
970 out_free_root:
971         if (ret) {
972                 free_extent_buffer(quota_root->node);
973                 free_extent_buffer(quota_root->commit_root);
974                 kfree(quota_root);
975         }
976 out:
977         if (ret) {
978                 ulist_free(fs_info->qgroup_ulist);
979                 fs_info->qgroup_ulist = NULL;
980         }
981         mutex_unlock(&fs_info->qgroup_ioctl_lock);
982         return ret;
983 }
984
985 int btrfs_quota_disable(struct btrfs_trans_handle *trans,
986                         struct btrfs_fs_info *fs_info)
987 {
988         struct btrfs_root *tree_root = fs_info->tree_root;
989         struct btrfs_root *quota_root;
990         int ret = 0;
991
992         mutex_lock(&fs_info->qgroup_ioctl_lock);
993         if (!fs_info->quota_root)
994                 goto out;
995         spin_lock(&fs_info->qgroup_lock);
996         fs_info->quota_enabled = 0;
997         fs_info->pending_quota_state = 0;
998         quota_root = fs_info->quota_root;
999         fs_info->quota_root = NULL;
1000         fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
1001         spin_unlock(&fs_info->qgroup_lock);
1002
1003         btrfs_free_qgroup_config(fs_info);
1004
1005         ret = btrfs_clean_quota_tree(trans, quota_root);
1006         if (ret)
1007                 goto out;
1008
1009         ret = btrfs_del_root(trans, tree_root, &quota_root->root_key);
1010         if (ret)
1011                 goto out;
1012
1013         list_del(&quota_root->dirty_list);
1014
1015         btrfs_tree_lock(quota_root->node);
1016         clean_tree_block(trans, tree_root->fs_info, quota_root->node);
1017         btrfs_tree_unlock(quota_root->node);
1018         btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
1019
1020         free_extent_buffer(quota_root->node);
1021         free_extent_buffer(quota_root->commit_root);
1022         kfree(quota_root);
1023 out:
1024         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1025         return ret;
1026 }
1027
1028 static void qgroup_dirty(struct btrfs_fs_info *fs_info,
1029                          struct btrfs_qgroup *qgroup)
1030 {
1031         if (list_empty(&qgroup->dirty))
1032                 list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
1033 }
1034
1035 /*
1036  * The easy accounting, if we are adding/removing the only ref for an extent
1037  * then this qgroup and all of the parent qgroups get their refrence and
1038  * exclusive counts adjusted.
1039  *
1040  * Caller should hold fs_info->qgroup_lock.
1041  */
1042 static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
1043                                     struct ulist *tmp, u64 ref_root,
1044                                     u64 num_bytes, int sign)
1045 {
1046         struct btrfs_qgroup *qgroup;
1047         struct btrfs_qgroup_list *glist;
1048         struct ulist_node *unode;
1049         struct ulist_iterator uiter;
1050         int ret = 0;
1051
1052         qgroup = find_qgroup_rb(fs_info, ref_root);
1053         if (!qgroup)
1054                 goto out;
1055
1056         qgroup->rfer += sign * num_bytes;
1057         qgroup->rfer_cmpr += sign * num_bytes;
1058
1059         WARN_ON(sign < 0 && qgroup->excl < num_bytes);
1060         qgroup->excl += sign * num_bytes;
1061         qgroup->excl_cmpr += sign * num_bytes;
1062         if (sign > 0)
1063                 qgroup->reserved -= num_bytes;
1064
1065         qgroup_dirty(fs_info, qgroup);
1066
1067         /* Get all of the parent groups that contain this qgroup */
1068         list_for_each_entry(glist, &qgroup->groups, next_group) {
1069                 ret = ulist_add(tmp, glist->group->qgroupid,
1070                                 ptr_to_u64(glist->group), GFP_ATOMIC);
1071                 if (ret < 0)
1072                         goto out;
1073         }
1074
1075         /* Iterate all of the parents and adjust their reference counts */
1076         ULIST_ITER_INIT(&uiter);
1077         while ((unode = ulist_next(tmp, &uiter))) {
1078                 qgroup = u64_to_ptr(unode->aux);
1079                 qgroup->rfer += sign * num_bytes;
1080                 qgroup->rfer_cmpr += sign * num_bytes;
1081                 WARN_ON(sign < 0 && qgroup->excl < num_bytes);
1082                 qgroup->excl += sign * num_bytes;
1083                 if (sign > 0)
1084                         qgroup->reserved -= num_bytes;
1085                 qgroup->excl_cmpr += sign * num_bytes;
1086                 qgroup_dirty(fs_info, qgroup);
1087
1088                 /* Add any parents of the parents */
1089                 list_for_each_entry(glist, &qgroup->groups, next_group) {
1090                         ret = ulist_add(tmp, glist->group->qgroupid,
1091                                         ptr_to_u64(glist->group), GFP_ATOMIC);
1092                         if (ret < 0)
1093                                 goto out;
1094                 }
1095         }
1096         ret = 0;
1097 out:
1098         return ret;
1099 }
1100
1101
1102 /*
1103  * Quick path for updating qgroup with only excl refs.
1104  *
1105  * In that case, just update all parent will be enough.
1106  * Or we needs to do a full rescan.
1107  * Caller should also hold fs_info->qgroup_lock.
1108  *
1109  * Return 0 for quick update, return >0 for need to full rescan
1110  * and mark INCONSISTENT flag.
1111  * Return < 0 for other error.
1112  */
1113 static int quick_update_accounting(struct btrfs_fs_info *fs_info,
1114                                    struct ulist *tmp, u64 src, u64 dst,
1115                                    int sign)
1116 {
1117         struct btrfs_qgroup *qgroup;
1118         int ret = 1;
1119         int err = 0;
1120
1121         qgroup = find_qgroup_rb(fs_info, src);
1122         if (!qgroup)
1123                 goto out;
1124         if (qgroup->excl == qgroup->rfer) {
1125                 ret = 0;
1126                 err = __qgroup_excl_accounting(fs_info, tmp, dst,
1127                                                qgroup->excl, sign);
1128                 if (err < 0) {
1129                         ret = err;
1130                         goto out;
1131                 }
1132         }
1133 out:
1134         if (ret)
1135                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1136         return ret;
1137 }
1138
1139 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
1140                               struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1141 {
1142         struct btrfs_root *quota_root;
1143         struct btrfs_qgroup *parent;
1144         struct btrfs_qgroup *member;
1145         struct btrfs_qgroup_list *list;
1146         struct ulist *tmp;
1147         int ret = 0;
1148
1149         /* Check the level of src and dst first */
1150         if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
1151                 return -EINVAL;
1152
1153         tmp = ulist_alloc(GFP_NOFS);
1154         if (!tmp)
1155                 return -ENOMEM;
1156
1157         mutex_lock(&fs_info->qgroup_ioctl_lock);
1158         quota_root = fs_info->quota_root;
1159         if (!quota_root) {
1160                 ret = -EINVAL;
1161                 goto out;
1162         }
1163         member = find_qgroup_rb(fs_info, src);
1164         parent = find_qgroup_rb(fs_info, dst);
1165         if (!member || !parent) {
1166                 ret = -EINVAL;
1167                 goto out;
1168         }
1169
1170         /* check if such qgroup relation exist firstly */
1171         list_for_each_entry(list, &member->groups, next_group) {
1172                 if (list->group == parent) {
1173                         ret = -EEXIST;
1174                         goto out;
1175                 }
1176         }
1177
1178         ret = add_qgroup_relation_item(trans, quota_root, src, dst);
1179         if (ret)
1180                 goto out;
1181
1182         ret = add_qgroup_relation_item(trans, quota_root, dst, src);
1183         if (ret) {
1184                 del_qgroup_relation_item(trans, quota_root, src, dst);
1185                 goto out;
1186         }
1187
1188         spin_lock(&fs_info->qgroup_lock);
1189         ret = add_relation_rb(quota_root->fs_info, src, dst);
1190         if (ret < 0) {
1191                 spin_unlock(&fs_info->qgroup_lock);
1192                 goto out;
1193         }
1194         ret = quick_update_accounting(fs_info, tmp, src, dst, 1);
1195         spin_unlock(&fs_info->qgroup_lock);
1196 out:
1197         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1198         ulist_free(tmp);
1199         return ret;
1200 }
1201
1202 int __del_qgroup_relation(struct btrfs_trans_handle *trans,
1203                               struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1204 {
1205         struct btrfs_root *quota_root;
1206         struct btrfs_qgroup *parent;
1207         struct btrfs_qgroup *member;
1208         struct btrfs_qgroup_list *list;
1209         struct ulist *tmp;
1210         int ret = 0;
1211         int err;
1212
1213         tmp = ulist_alloc(GFP_NOFS);
1214         if (!tmp)
1215                 return -ENOMEM;
1216
1217         quota_root = fs_info->quota_root;
1218         if (!quota_root) {
1219                 ret = -EINVAL;
1220                 goto out;
1221         }
1222
1223         member = find_qgroup_rb(fs_info, src);
1224         parent = find_qgroup_rb(fs_info, dst);
1225         if (!member || !parent) {
1226                 ret = -EINVAL;
1227                 goto out;
1228         }
1229
1230         /* check if such qgroup relation exist firstly */
1231         list_for_each_entry(list, &member->groups, next_group) {
1232                 if (list->group == parent)
1233                         goto exist;
1234         }
1235         ret = -ENOENT;
1236         goto out;
1237 exist:
1238         ret = del_qgroup_relation_item(trans, quota_root, src, dst);
1239         err = del_qgroup_relation_item(trans, quota_root, dst, src);
1240         if (err && !ret)
1241                 ret = err;
1242
1243         spin_lock(&fs_info->qgroup_lock);
1244         del_relation_rb(fs_info, src, dst);
1245         ret = quick_update_accounting(fs_info, tmp, src, dst, -1);
1246         spin_unlock(&fs_info->qgroup_lock);
1247 out:
1248         ulist_free(tmp);
1249         return ret;
1250 }
1251
1252 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
1253                               struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1254 {
1255         int ret = 0;
1256
1257         mutex_lock(&fs_info->qgroup_ioctl_lock);
1258         ret = __del_qgroup_relation(trans, fs_info, src, dst);
1259         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1260
1261         return ret;
1262 }
1263
1264 int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
1265                         struct btrfs_fs_info *fs_info, u64 qgroupid)
1266 {
1267         struct btrfs_root *quota_root;
1268         struct btrfs_qgroup *qgroup;
1269         int ret = 0;
1270
1271         mutex_lock(&fs_info->qgroup_ioctl_lock);
1272         quota_root = fs_info->quota_root;
1273         if (!quota_root) {
1274                 ret = -EINVAL;
1275                 goto out;
1276         }
1277         qgroup = find_qgroup_rb(fs_info, qgroupid);
1278         if (qgroup) {
1279                 ret = -EEXIST;
1280                 goto out;
1281         }
1282
1283         ret = add_qgroup_item(trans, quota_root, qgroupid);
1284         if (ret)
1285                 goto out;
1286
1287         spin_lock(&fs_info->qgroup_lock);
1288         qgroup = add_qgroup_rb(fs_info, qgroupid);
1289         spin_unlock(&fs_info->qgroup_lock);
1290
1291         if (IS_ERR(qgroup))
1292                 ret = PTR_ERR(qgroup);
1293 out:
1294         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1295         return ret;
1296 }
1297
1298 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
1299                         struct btrfs_fs_info *fs_info, u64 qgroupid)
1300 {
1301         struct btrfs_root *quota_root;
1302         struct btrfs_qgroup *qgroup;
1303         struct btrfs_qgroup_list *list;
1304         int ret = 0;
1305
1306         mutex_lock(&fs_info->qgroup_ioctl_lock);
1307         quota_root = fs_info->quota_root;
1308         if (!quota_root) {
1309                 ret = -EINVAL;
1310                 goto out;
1311         }
1312
1313         qgroup = find_qgroup_rb(fs_info, qgroupid);
1314         if (!qgroup) {
1315                 ret = -ENOENT;
1316                 goto out;
1317         } else {
1318                 /* check if there are no children of this qgroup */
1319                 if (!list_empty(&qgroup->members)) {
1320                         ret = -EBUSY;
1321                         goto out;
1322                 }
1323         }
1324         ret = del_qgroup_item(trans, quota_root, qgroupid);
1325
1326         while (!list_empty(&qgroup->groups)) {
1327                 list = list_first_entry(&qgroup->groups,
1328                                         struct btrfs_qgroup_list, next_group);
1329                 ret = __del_qgroup_relation(trans, fs_info,
1330                                            qgroupid,
1331                                            list->group->qgroupid);
1332                 if (ret)
1333                         goto out;
1334         }
1335
1336         spin_lock(&fs_info->qgroup_lock);
1337         del_qgroup_rb(quota_root->fs_info, qgroupid);
1338         spin_unlock(&fs_info->qgroup_lock);
1339 out:
1340         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1341         return ret;
1342 }
1343
1344 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1345                        struct btrfs_fs_info *fs_info, u64 qgroupid,
1346                        struct btrfs_qgroup_limit *limit)
1347 {
1348         struct btrfs_root *quota_root;
1349         struct btrfs_qgroup *qgroup;
1350         int ret = 0;
1351
1352         mutex_lock(&fs_info->qgroup_ioctl_lock);
1353         quota_root = fs_info->quota_root;
1354         if (!quota_root) {
1355                 ret = -EINVAL;
1356                 goto out;
1357         }
1358
1359         qgroup = find_qgroup_rb(fs_info, qgroupid);
1360         if (!qgroup) {
1361                 ret = -ENOENT;
1362                 goto out;
1363         }
1364
1365         spin_lock(&fs_info->qgroup_lock);
1366         if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER)
1367                 qgroup->max_rfer = limit->max_rfer;
1368         if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL)
1369                 qgroup->max_excl = limit->max_excl;
1370         if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER)
1371                 qgroup->rsv_rfer = limit->rsv_rfer;
1372         if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL)
1373                 qgroup->rsv_excl = limit->rsv_excl;
1374         qgroup->lim_flags |= limit->flags;
1375
1376         spin_unlock(&fs_info->qgroup_lock);
1377
1378         ret = update_qgroup_limit_item(trans, quota_root, qgroup);
1379         if (ret) {
1380                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1381                 btrfs_info(fs_info, "unable to update quota limit for %llu",
1382                        qgroupid);
1383         }
1384
1385 out:
1386         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1387         return ret;
1388 }
1389
1390 static int comp_oper_exist(struct btrfs_qgroup_operation *oper1,
1391                            struct btrfs_qgroup_operation *oper2)
1392 {
1393         /*
1394          * Ignore seq and type here, we're looking for any operation
1395          * at all related to this extent on that root.
1396          */
1397         if (oper1->bytenr < oper2->bytenr)
1398                 return -1;
1399         if (oper1->bytenr > oper2->bytenr)
1400                 return 1;
1401         if (oper1->ref_root < oper2->ref_root)
1402                 return -1;
1403         if (oper1->ref_root > oper2->ref_root)
1404                 return 1;
1405         return 0;
1406 }
1407
1408 static int qgroup_oper_exists(struct btrfs_fs_info *fs_info,
1409                               struct btrfs_qgroup_operation *oper)
1410 {
1411         struct rb_node *n;
1412         struct btrfs_qgroup_operation *cur;
1413         int cmp;
1414
1415         spin_lock(&fs_info->qgroup_op_lock);
1416         n = fs_info->qgroup_op_tree.rb_node;
1417         while (n) {
1418                 cur = rb_entry(n, struct btrfs_qgroup_operation, n);
1419                 cmp = comp_oper_exist(cur, oper);
1420                 if (cmp < 0) {
1421                         n = n->rb_right;
1422                 } else if (cmp) {
1423                         n = n->rb_left;
1424                 } else {
1425                         spin_unlock(&fs_info->qgroup_op_lock);
1426                         return -EEXIST;
1427                 }
1428         }
1429         spin_unlock(&fs_info->qgroup_op_lock);
1430         return 0;
1431 }
1432
1433 static int comp_oper(struct btrfs_qgroup_operation *oper1,
1434                      struct btrfs_qgroup_operation *oper2)
1435 {
1436         if (oper1->bytenr < oper2->bytenr)
1437                 return -1;
1438         if (oper1->bytenr > oper2->bytenr)
1439                 return 1;
1440         if (oper1->ref_root < oper2->ref_root)
1441                 return -1;
1442         if (oper1->ref_root > oper2->ref_root)
1443                 return 1;
1444         if (oper1->seq < oper2->seq)
1445                 return -1;
1446         if (oper1->seq > oper2->seq)
1447                 return 1;
1448         if (oper1->type < oper2->type)
1449                 return -1;
1450         if (oper1->type > oper2->type)
1451                 return 1;
1452         return 0;
1453 }
1454
1455 static int insert_qgroup_oper(struct btrfs_fs_info *fs_info,
1456                               struct btrfs_qgroup_operation *oper)
1457 {
1458         struct rb_node **p;
1459         struct rb_node *parent = NULL;
1460         struct btrfs_qgroup_operation *cur;
1461         int cmp;
1462
1463         spin_lock(&fs_info->qgroup_op_lock);
1464         p = &fs_info->qgroup_op_tree.rb_node;
1465         while (*p) {
1466                 parent = *p;
1467                 cur = rb_entry(parent, struct btrfs_qgroup_operation, n);
1468                 cmp = comp_oper(cur, oper);
1469                 if (cmp < 0) {
1470                         p = &(*p)->rb_right;
1471                 } else if (cmp) {
1472                         p = &(*p)->rb_left;
1473                 } else {
1474                         spin_unlock(&fs_info->qgroup_op_lock);
1475                         return -EEXIST;
1476                 }
1477         }
1478         rb_link_node(&oper->n, parent, p);
1479         rb_insert_color(&oper->n, &fs_info->qgroup_op_tree);
1480         spin_unlock(&fs_info->qgroup_op_lock);
1481         return 0;
1482 }
1483
1484 /*
1485  * Record a quota operation for processing later on.
1486  * @trans: the transaction we are adding the delayed op to.
1487  * @fs_info: the fs_info for this fs.
1488  * @ref_root: the root of the reference we are acting on,
1489  * @bytenr: the bytenr we are acting on.
1490  * @num_bytes: the number of bytes in the reference.
1491  * @type: the type of operation this is.
1492  * @mod_seq: do we need to get a sequence number for looking up roots.
1493  *
1494  * We just add it to our trans qgroup_ref_list and carry on and process these
1495  * operations in order at some later point.  If the reference root isn't a fs
1496  * root then we don't bother with doing anything.
1497  *
1498  * MUST BE HOLDING THE REF LOCK.
1499  */
1500 int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
1501                             struct btrfs_fs_info *fs_info, u64 ref_root,
1502                             u64 bytenr, u64 num_bytes,
1503                             enum btrfs_qgroup_operation_type type, int mod_seq)
1504 {
1505         struct btrfs_qgroup_operation *oper;
1506         int ret;
1507
1508         if (!is_fstree(ref_root) || !fs_info->quota_enabled)
1509                 return 0;
1510
1511         oper = kmalloc(sizeof(*oper), GFP_NOFS);
1512         if (!oper)
1513                 return -ENOMEM;
1514
1515         oper->ref_root = ref_root;
1516         oper->bytenr = bytenr;
1517         oper->num_bytes = num_bytes;
1518         oper->type = type;
1519         oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq);
1520         INIT_LIST_HEAD(&oper->elem.list);
1521         oper->elem.seq = 0;
1522
1523         trace_btrfs_qgroup_record_ref(oper);
1524
1525         if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) {
1526                 /*
1527                  * If any operation for this bytenr/ref_root combo
1528                  * exists, then we know it's not exclusively owned and
1529                  * shouldn't be queued up.
1530                  *
1531                  * This also catches the case where we have a cloned
1532                  * extent that gets queued up multiple times during
1533                  * drop snapshot.
1534                  */
1535                 if (qgroup_oper_exists(fs_info, oper)) {
1536                         kfree(oper);
1537                         return 0;
1538                 }
1539         }
1540
1541         ret = insert_qgroup_oper(fs_info, oper);
1542         if (ret) {
1543                 /* Shouldn't happen so have an assert for developers */
1544                 ASSERT(0);
1545                 kfree(oper);
1546                 return ret;
1547         }
1548         list_add_tail(&oper->list, &trans->qgroup_ref_list);
1549
1550         if (mod_seq)
1551                 btrfs_get_tree_mod_seq(fs_info, &oper->elem);
1552
1553         return 0;
1554 }
1555
1556 static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
1557                                   struct btrfs_qgroup_operation *oper)
1558 {
1559         struct ulist *tmp;
1560         int sign = 0;
1561         int ret = 0;
1562
1563         tmp = ulist_alloc(GFP_NOFS);
1564         if (!tmp)
1565                 return -ENOMEM;
1566
1567         spin_lock(&fs_info->qgroup_lock);
1568         if (!fs_info->quota_root)
1569                 goto out;
1570
1571         switch (oper->type) {
1572         case BTRFS_QGROUP_OPER_ADD_EXCL:
1573                 sign = 1;
1574                 break;
1575         case BTRFS_QGROUP_OPER_SUB_EXCL:
1576                 sign = -1;
1577                 break;
1578         default:
1579                 ASSERT(0);
1580         }
1581         ret = __qgroup_excl_accounting(fs_info, tmp, oper->ref_root,
1582                                        oper->num_bytes, sign);
1583 out:
1584         spin_unlock(&fs_info->qgroup_lock);
1585         ulist_free(tmp);
1586         return ret;
1587 }
1588
1589 /*
1590  * Walk all of the roots that pointed to our bytenr and adjust their refcnts as
1591  * properly.
1592  */
1593 static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info,
1594                                   u64 root_to_skip, struct ulist *tmp,
1595                                   struct ulist *roots, struct ulist *qgroups,
1596                                   u64 seq, int *old_roots, int rescan)
1597 {
1598         struct ulist_node *unode;
1599         struct ulist_iterator uiter;
1600         struct ulist_node *tmp_unode;
1601         struct ulist_iterator tmp_uiter;
1602         struct btrfs_qgroup *qg;
1603         int ret;
1604
1605         ULIST_ITER_INIT(&uiter);
1606         while ((unode = ulist_next(roots, &uiter))) {
1607                 /* We don't count our current root here */
1608                 if (unode->val == root_to_skip)
1609                         continue;
1610                 qg = find_qgroup_rb(fs_info, unode->val);
1611                 if (!qg)
1612                         continue;
1613                 /*
1614                  * We could have a pending removal of this same ref so we may
1615                  * not have actually found our ref root when doing
1616                  * btrfs_find_all_roots, so we need to keep track of how many
1617                  * old roots we find in case we removed ours and added a
1618                  * different one at the same time.  I don't think this could
1619                  * happen in practice but that sort of thinking leads to pain
1620                  * and suffering and to the dark side.
1621                  */
1622                 (*old_roots)++;
1623
1624                 ulist_reinit(tmp);
1625                 ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
1626                                 GFP_ATOMIC);
1627                 if (ret < 0)
1628                         return ret;
1629                 ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), GFP_ATOMIC);
1630                 if (ret < 0)
1631                         return ret;
1632                 ULIST_ITER_INIT(&tmp_uiter);
1633                 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1634                         struct btrfs_qgroup_list *glist;
1635                         int mod;
1636
1637                         qg = u64_to_ptr(tmp_unode->aux);
1638                         /*
1639                          * We use this sequence number to keep from having to
1640                          * run the whole list and 0 out the refcnt every time.
1641                          * We basically use sequnce as the known 0 count and
1642                          * then add 1 everytime we see a qgroup.  This is how we
1643                          * get how many of the roots actually point up to the
1644                          * upper level qgroups in order to determine exclusive
1645                          * counts.
1646                          *
1647                          * For rescan none of the extent is recorded before so
1648                          * we just don't add old_refcnt.
1649                          */
1650                         if (rescan)
1651                                 mod = 0;
1652                         else
1653                                 mod = 1;
1654                         btrfs_qgroup_update_old_refcnt(qg, seq, mod);
1655                         btrfs_qgroup_update_new_refcnt(qg, seq, 1);
1656                         list_for_each_entry(glist, &qg->groups, next_group) {
1657                                 ret = ulist_add(qgroups, glist->group->qgroupid,
1658                                                 ptr_to_u64(glist->group),
1659                                                 GFP_ATOMIC);
1660                                 if (ret < 0)
1661                                         return ret;
1662                                 ret = ulist_add(tmp, glist->group->qgroupid,
1663                                                 ptr_to_u64(glist->group),
1664                                                 GFP_ATOMIC);
1665                                 if (ret < 0)
1666                                         return ret;
1667                         }
1668                 }
1669         }
1670         return 0;
1671 }
1672
1673 /*
1674  * We need to walk forward in our operation tree and account for any roots that
1675  * were deleted after we made this operation.
1676  */
1677 static int qgroup_account_deleted_refs(struct btrfs_fs_info *fs_info,
1678                                        struct btrfs_qgroup_operation *oper,
1679                                        struct ulist *tmp,
1680                                        struct ulist *qgroups, u64 seq,
1681                                        int *old_roots)
1682 {
1683         struct ulist_node *unode;
1684         struct ulist_iterator uiter;
1685         struct btrfs_qgroup *qg;
1686         struct btrfs_qgroup_operation *tmp_oper;
1687         struct rb_node *n;
1688         int ret;
1689
1690         ulist_reinit(tmp);
1691
1692         /*
1693          * We only walk forward in the tree since we're only interested in
1694          * removals that happened _after_  our operation.
1695          */
1696         spin_lock(&fs_info->qgroup_op_lock);
1697         n = rb_next(&oper->n);
1698         spin_unlock(&fs_info->qgroup_op_lock);
1699         if (!n)
1700                 return 0;
1701         tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
1702         while (tmp_oper->bytenr == oper->bytenr) {
1703                 /*
1704                  * If it's not a removal we don't care, additions work out
1705                  * properly with our refcnt tracking.
1706                  */
1707                 if (tmp_oper->type != BTRFS_QGROUP_OPER_SUB_SHARED &&
1708                     tmp_oper->type != BTRFS_QGROUP_OPER_SUB_EXCL)
1709                         goto next;
1710                 qg = find_qgroup_rb(fs_info, tmp_oper->ref_root);
1711                 if (!qg)
1712                         goto next;
1713                 ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
1714                                 GFP_ATOMIC);
1715                 if (ret) {
1716                         if (ret < 0)
1717                                 return ret;
1718                         /*
1719                          * We only want to increase old_roots if this qgroup is
1720                          * not already in the list of qgroups.  If it is already
1721                          * there then that means it must have been re-added or
1722                          * the delete will be discarded because we had an
1723                          * existing ref that we haven't looked up yet.  In this
1724                          * case we don't want to increase old_roots.  So if ret
1725                          * == 1 then we know that this is the first time we've
1726                          * seen this qgroup and we can bump the old_roots.
1727                          */
1728                         (*old_roots)++;
1729                         ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg),
1730                                         GFP_ATOMIC);
1731                         if (ret < 0)
1732                                 return ret;
1733                 }
1734 next:
1735                 spin_lock(&fs_info->qgroup_op_lock);
1736                 n = rb_next(&tmp_oper->n);
1737                 spin_unlock(&fs_info->qgroup_op_lock);
1738                 if (!n)
1739                         break;
1740                 tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
1741         }
1742
1743         /* Ok now process the qgroups we found */
1744         ULIST_ITER_INIT(&uiter);
1745         while ((unode = ulist_next(tmp, &uiter))) {
1746                 struct btrfs_qgroup_list *glist;
1747
1748                 qg = u64_to_ptr(unode->aux);
1749                 btrfs_qgroup_update_old_refcnt(qg, seq, 1);
1750                 btrfs_qgroup_update_new_refcnt(qg, seq, 1);
1751                 list_for_each_entry(glist, &qg->groups, next_group) {
1752                         ret = ulist_add(qgroups, glist->group->qgroupid,
1753                                         ptr_to_u64(glist->group), GFP_ATOMIC);
1754                         if (ret < 0)
1755                                 return ret;
1756                         ret = ulist_add(tmp, glist->group->qgroupid,
1757                                         ptr_to_u64(glist->group), GFP_ATOMIC);
1758                         if (ret < 0)
1759                                 return ret;
1760                 }
1761         }
1762         return 0;
1763 }
1764
1765 /* Add refcnt for the newly added reference. */
1766 static int qgroup_calc_new_refcnt(struct btrfs_fs_info *fs_info,
1767                                   struct btrfs_qgroup_operation *oper,
1768                                   struct btrfs_qgroup *qgroup,
1769                                   struct ulist *tmp, struct ulist *qgroups,
1770                                   u64 seq)
1771 {
1772         struct ulist_node *unode;
1773         struct ulist_iterator uiter;
1774         struct btrfs_qgroup *qg;
1775         int ret;
1776
1777         ulist_reinit(tmp);
1778         ret = ulist_add(qgroups, qgroup->qgroupid, ptr_to_u64(qgroup),
1779                         GFP_ATOMIC);
1780         if (ret < 0)
1781                 return ret;
1782         ret = ulist_add(tmp, qgroup->qgroupid, ptr_to_u64(qgroup),
1783                         GFP_ATOMIC);
1784         if (ret < 0)
1785                 return ret;
1786         ULIST_ITER_INIT(&uiter);
1787         while ((unode = ulist_next(tmp, &uiter))) {
1788                 struct btrfs_qgroup_list *glist;
1789
1790                 qg = u64_to_ptr(unode->aux);
1791                 if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED)
1792                         btrfs_qgroup_update_new_refcnt(qg, seq, 1);
1793                 else
1794                         btrfs_qgroup_update_old_refcnt(qg, seq, 1);
1795                 list_for_each_entry(glist, &qg->groups, next_group) {
1796                         ret = ulist_add(tmp, glist->group->qgroupid,
1797                                         ptr_to_u64(glist->group), GFP_ATOMIC);
1798                         if (ret < 0)
1799                                 return ret;
1800                         ret = ulist_add(qgroups, glist->group->qgroupid,
1801                                         ptr_to_u64(glist->group), GFP_ATOMIC);
1802                         if (ret < 0)
1803                                 return ret;
1804                 }
1805         }
1806         return 0;
1807 }
1808
1809 #define UPDATE_NEW      0
1810 #define UPDATE_OLD      1
1811 /*
1812  * Walk all of the roots that points to the bytenr and adjust their refcnts.
1813  */
1814 static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info,
1815                                 struct ulist *roots, struct ulist *tmp,
1816                                 struct ulist *qgroups, u64 seq, int update_old)
1817 {
1818         struct ulist_node *unode;
1819         struct ulist_iterator uiter;
1820         struct ulist_node *tmp_unode;
1821         struct ulist_iterator tmp_uiter;
1822         struct btrfs_qgroup *qg;
1823         int ret = 0;
1824
1825         if (!roots)
1826                 return 0;
1827         ULIST_ITER_INIT(&uiter);
1828         while ((unode = ulist_next(roots, &uiter))) {
1829                 qg = find_qgroup_rb(fs_info, unode->val);
1830                 if (!qg)
1831                         continue;
1832
1833                 ulist_reinit(tmp);
1834                 ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
1835                                 GFP_ATOMIC);
1836                 if (ret < 0)
1837                         return ret;
1838                 ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), GFP_ATOMIC);
1839                 if (ret < 0)
1840                         return ret;
1841                 ULIST_ITER_INIT(&tmp_uiter);
1842                 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1843                         struct btrfs_qgroup_list *glist;
1844
1845                         qg = u64_to_ptr(tmp_unode->aux);
1846                         if (update_old)
1847                                 btrfs_qgroup_update_old_refcnt(qg, seq, 1);
1848                         else
1849                                 btrfs_qgroup_update_new_refcnt(qg, seq, 1);
1850                         list_for_each_entry(glist, &qg->groups, next_group) {
1851                                 ret = ulist_add(qgroups, glist->group->qgroupid,
1852                                                 ptr_to_u64(glist->group),
1853                                                 GFP_ATOMIC);
1854                                 if (ret < 0)
1855                                         return ret;
1856                                 ret = ulist_add(tmp, glist->group->qgroupid,
1857                                                 ptr_to_u64(glist->group),
1858                                                 GFP_ATOMIC);
1859                                 if (ret < 0)
1860                                         return ret;
1861                         }
1862                 }
1863         }
1864         return 0;
1865 }
1866
1867 /*
1868  * This adjusts the counters for all referenced qgroups if need be.
1869  */
1870 static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info,
1871                                   u64 root_to_skip, u64 num_bytes,
1872                                   struct ulist *qgroups, u64 seq,
1873                                   int old_roots, int new_roots, int rescan)
1874 {
1875         struct ulist_node *unode;
1876         struct ulist_iterator uiter;
1877         struct btrfs_qgroup *qg;
1878         u64 cur_new_count, cur_old_count;
1879
1880         ULIST_ITER_INIT(&uiter);
1881         while ((unode = ulist_next(qgroups, &uiter))) {
1882                 bool dirty = false;
1883
1884                 qg = u64_to_ptr(unode->aux);
1885                 cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq);
1886                 cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq);
1887
1888                 /*
1889                  * Wasn't referenced before but is now, add to the reference
1890                  * counters.
1891                  */
1892                 if (cur_old_count == 0 && cur_new_count > 0) {
1893                         qg->rfer += num_bytes;
1894                         qg->rfer_cmpr += num_bytes;
1895                         dirty = true;
1896                 }
1897
1898                 /*
1899                  * Was referenced before but isn't now, subtract from the
1900                  * reference counters.
1901                  */
1902                 if (cur_old_count > 0 && cur_new_count == 0) {
1903                         qg->rfer -= num_bytes;
1904                         qg->rfer_cmpr -= num_bytes;
1905                         dirty = true;
1906                 }
1907
1908                 /*
1909                  * If our refcount was the same as the roots previously but our
1910                  * new count isn't the same as the number of roots now then we
1911                  * went from having a exclusive reference on this range to not.
1912                  */
1913                 if (old_roots && cur_old_count == old_roots &&
1914                     (cur_new_count != new_roots || new_roots == 0)) {
1915                         WARN_ON(cur_new_count != new_roots && new_roots == 0);
1916                         qg->excl -= num_bytes;
1917                         qg->excl_cmpr -= num_bytes;
1918                         dirty = true;
1919                 }
1920
1921                 /*
1922                  * If we didn't reference all the roots before but now we do we
1923                  * have an exclusive reference to this range.
1924                  */
1925                 if ((!old_roots || (old_roots && cur_old_count != old_roots))
1926                     && cur_new_count == new_roots) {
1927                         qg->excl += num_bytes;
1928                         qg->excl_cmpr += num_bytes;
1929                         dirty = true;
1930                 }
1931
1932                 if (dirty)
1933                         qgroup_dirty(fs_info, qg);
1934         }
1935         return 0;
1936 }
1937
1938 /*
1939  * If we removed a data extent and there were other references for that bytenr
1940  * then we need to lookup all referenced roots to make sure we still don't
1941  * reference this bytenr.  If we do then we can just discard this operation.
1942  */
1943 static int check_existing_refs(struct btrfs_trans_handle *trans,
1944                                struct btrfs_fs_info *fs_info,
1945                                struct btrfs_qgroup_operation *oper)
1946 {
1947         struct ulist *roots = NULL;
1948         struct ulist_node *unode;
1949         struct ulist_iterator uiter;
1950         int ret = 0;
1951
1952         ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
1953                                    oper->elem.seq, &roots);
1954         if (ret < 0)
1955                 return ret;
1956         ret = 0;
1957
1958         ULIST_ITER_INIT(&uiter);
1959         while ((unode = ulist_next(roots, &uiter))) {
1960                 if (unode->val == oper->ref_root) {
1961                         ret = 1;
1962                         break;
1963                 }
1964         }
1965         ulist_free(roots);
1966         btrfs_put_tree_mod_seq(fs_info, &oper->elem);
1967
1968         return ret;
1969 }
1970
1971 /*
1972  * If we share a reference across multiple roots then we may need to adjust
1973  * various qgroups referenced and exclusive counters.  The basic premise is this
1974  *
1975  * 1) We have seq to represent a 0 count.  Instead of looping through all of the
1976  * qgroups and resetting their refcount to 0 we just constantly bump this
1977  * sequence number to act as the base reference count.  This means that if
1978  * anybody is equal to or below this sequence they were never referenced.  We
1979  * jack this sequence up by the number of roots we found each time in order to
1980  * make sure we don't have any overlap.
1981  *
1982  * 2) We first search all the roots that reference the area _except_ the root
1983  * we're acting on currently.  This makes up the old_refcnt of all the qgroups
1984  * before.
1985  *
1986  * 3) We walk all of the qgroups referenced by the root we are currently acting
1987  * on, and will either adjust old_refcnt in the case of a removal or the
1988  * new_refcnt in the case of an addition.
1989  *
1990  * 4) Finally we walk all the qgroups that are referenced by this range
1991  * including the root we are acting on currently.  We will adjust the counters
1992  * based on the number of roots we had and will have after this operation.
1993  *
1994  * Take this example as an illustration
1995  *
1996  *                      [qgroup 1/0]
1997  *                   /         |          \
1998  *              [qg 0/0]   [qg 0/1]     [qg 0/2]
1999  *                 \          |            /
2000  *                [        extent           ]
2001  *
2002  * Say we are adding a reference that is covered by qg 0/0.  The first step
2003  * would give a refcnt of 1 to qg 0/1 and 0/2 and a refcnt of 2 to qg 1/0 with
2004  * old_roots being 2.  Because it is adding new_roots will be 1.  We then go
2005  * through qg 0/0 which will get the new_refcnt set to 1 and add 1 to qg 1/0's
2006  * new_refcnt, bringing it to 3.  We then walk through all of the qgroups, we
2007  * notice that the old refcnt for qg 0/0 < the new refcnt, so we added a
2008  * reference and thus must add the size to the referenced bytes.  Everything
2009  * else is the same so nothing else changes.
2010  */
2011 static int qgroup_shared_accounting(struct btrfs_trans_handle *trans,
2012                                     struct btrfs_fs_info *fs_info,
2013                                     struct btrfs_qgroup_operation *oper)
2014 {
2015         struct ulist *roots = NULL;
2016         struct ulist *qgroups, *tmp;
2017         struct btrfs_qgroup *qgroup;
2018         struct seq_list elem = SEQ_LIST_INIT(elem);
2019         u64 seq;
2020         int old_roots = 0;
2021         int new_roots = 0;
2022         int ret = 0;
2023
2024         if (oper->elem.seq) {
2025                 ret = check_existing_refs(trans, fs_info, oper);
2026                 if (ret < 0)
2027                         return ret;
2028                 if (ret)
2029                         return 0;
2030         }
2031
2032         qgroups = ulist_alloc(GFP_NOFS);
2033         if (!qgroups)
2034                 return -ENOMEM;
2035
2036         tmp = ulist_alloc(GFP_NOFS);
2037         if (!tmp) {
2038                 ulist_free(qgroups);
2039                 return -ENOMEM;
2040         }
2041
2042         btrfs_get_tree_mod_seq(fs_info, &elem);
2043         ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq,
2044                                    &roots);
2045         btrfs_put_tree_mod_seq(fs_info, &elem);
2046         if (ret < 0) {
2047                 ulist_free(qgroups);
2048                 ulist_free(tmp);
2049                 return ret;
2050         }
2051         spin_lock(&fs_info->qgroup_lock);
2052         qgroup = find_qgroup_rb(fs_info, oper->ref_root);
2053         if (!qgroup)
2054                 goto out;
2055         seq = fs_info->qgroup_seq;
2056
2057         /*
2058          * So roots is the list of all the roots currently pointing at the
2059          * bytenr, including the ref we are adding if we are adding, or not if
2060          * we are removing a ref.  So we pass in the ref_root to skip that root
2061          * in our calculations.  We set old_refnct and new_refcnt cause who the
2062          * hell knows what everything looked like before, and it doesn't matter
2063          * except...
2064          */
2065         ret = qgroup_calc_old_refcnt(fs_info, oper->ref_root, tmp, roots, qgroups,
2066                                      seq, &old_roots, 0);
2067         if (ret < 0)
2068                 goto out;
2069
2070         /*
2071          * Now adjust the refcounts of the qgroups that care about this
2072          * reference, either the old_count in the case of removal or new_count
2073          * in the case of an addition.
2074          */
2075         ret = qgroup_calc_new_refcnt(fs_info, oper, qgroup, tmp, qgroups,
2076                                      seq);
2077         if (ret < 0)
2078                 goto out;
2079
2080         /*
2081          * ...in the case of removals.  If we had a removal before we got around
2082          * to processing this operation then we need to find that guy and count
2083          * his references as if they really existed so we don't end up screwing
2084          * up the exclusive counts.  Then whenever we go to process the delete
2085          * everything will be grand and we can account for whatever exclusive
2086          * changes need to be made there.  We also have to pass in old_roots so
2087          * we have an accurate count of the roots as it pertains to this
2088          * operations view of the world.
2089          */
2090         ret = qgroup_account_deleted_refs(fs_info, oper, tmp, qgroups, seq,
2091                                           &old_roots);
2092         if (ret < 0)
2093                 goto out;
2094
2095         /*
2096          * We are adding our root, need to adjust up the number of roots,
2097          * otherwise old_roots is the number of roots we want.
2098          */
2099         if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
2100                 new_roots = old_roots + 1;
2101         } else {
2102                 new_roots = old_roots;
2103                 old_roots++;
2104         }
2105
2106         /*
2107          * Bump qgroup_seq to avoid seq overlap
2108          * XXX: This makes qgroup_seq mismatch with oper->seq.
2109          */
2110         fs_info->qgroup_seq += old_roots + 1;
2111
2112
2113         /*
2114          * And now the magic happens, bless Arne for having a pretty elegant
2115          * solution for this.
2116          */
2117         qgroup_adjust_counters(fs_info, oper->ref_root, oper->num_bytes,
2118                                qgroups, seq, old_roots, new_roots, 0);
2119 out:
2120         spin_unlock(&fs_info->qgroup_lock);
2121         ulist_free(qgroups);
2122         ulist_free(roots);
2123         ulist_free(tmp);
2124         return ret;
2125 }
2126
2127 /*
2128  * Process a reference to a shared subtree. This type of operation is
2129  * queued during snapshot removal when we encounter extents which are
2130  * shared between more than one root.
2131  */
2132 static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
2133                                      struct btrfs_fs_info *fs_info,
2134                                      struct btrfs_qgroup_operation *oper)
2135 {
2136         struct ulist *roots = NULL;
2137         struct ulist_node *unode;
2138         struct ulist_iterator uiter;
2139         struct btrfs_qgroup_list *glist;
2140         struct ulist *parents;
2141         int ret = 0;
2142         int err;
2143         struct btrfs_qgroup *qg;
2144         u64 root_obj = 0;
2145         struct seq_list elem = SEQ_LIST_INIT(elem);
2146
2147         parents = ulist_alloc(GFP_NOFS);
2148         if (!parents)
2149                 return -ENOMEM;
2150
2151         btrfs_get_tree_mod_seq(fs_info, &elem);
2152         ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
2153                                    elem.seq, &roots);
2154         btrfs_put_tree_mod_seq(fs_info, &elem);
2155         if (ret < 0)
2156                 goto out;
2157
2158         if (roots->nnodes != 1)
2159                 goto out;
2160
2161         ULIST_ITER_INIT(&uiter);
2162         unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */
2163         /*
2164          * If we find our ref root then that means all refs
2165          * this extent has to the root have not yet been
2166          * deleted. In that case, we do nothing and let the
2167          * last ref for this bytenr drive our update.
2168          *
2169          * This can happen for example if an extent is
2170          * referenced multiple times in a snapshot (clone,
2171          * etc). If we are in the middle of snapshot removal,
2172          * queued updates for such an extent will find the
2173          * root if we have not yet finished removing the
2174          * snapshot.
2175          */
2176         if (unode->val == oper->ref_root)
2177                 goto out;
2178
2179         root_obj = unode->val;
2180         BUG_ON(!root_obj);
2181
2182         spin_lock(&fs_info->qgroup_lock);
2183         qg = find_qgroup_rb(fs_info, root_obj);
2184         if (!qg)
2185                 goto out_unlock;
2186
2187         qg->excl += oper->num_bytes;
2188         qg->excl_cmpr += oper->num_bytes;
2189         qgroup_dirty(fs_info, qg);
2190
2191         /*
2192          * Adjust counts for parent groups. First we find all
2193          * parents, then in the 2nd loop we do the adjustment
2194          * while adding parents of the parents to our ulist.
2195          */
2196         list_for_each_entry(glist, &qg->groups, next_group) {
2197                 err = ulist_add(parents, glist->group->qgroupid,
2198                                 ptr_to_u64(glist->group), GFP_ATOMIC);
2199                 if (err < 0) {
2200                         ret = err;
2201                         goto out_unlock;
2202                 }
2203         }
2204
2205         ULIST_ITER_INIT(&uiter);
2206         while ((unode = ulist_next(parents, &uiter))) {
2207                 qg = u64_to_ptr(unode->aux);
2208                 qg->excl += oper->num_bytes;
2209                 qg->excl_cmpr += oper->num_bytes;
2210                 qgroup_dirty(fs_info, qg);
2211
2212                 /* Add any parents of the parents */
2213                 list_for_each_entry(glist, &qg->groups, next_group) {
2214                         err = ulist_add(parents, glist->group->qgroupid,
2215                                         ptr_to_u64(glist->group), GFP_ATOMIC);
2216                         if (err < 0) {
2217                                 ret = err;
2218                                 goto out_unlock;
2219                         }
2220                 }
2221         }
2222
2223 out_unlock:
2224         spin_unlock(&fs_info->qgroup_lock);
2225
2226 out:
2227         ulist_free(roots);
2228         ulist_free(parents);
2229         return ret;
2230 }
2231
2232 /*
2233  * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
2234  * from the fs. First, all roots referencing the extent are searched, and
2235  * then the space is accounted accordingly to the different roots. The
2236  * accounting algorithm works in 3 steps documented inline.
2237  */
2238 static int btrfs_qgroup_account(struct btrfs_trans_handle *trans,
2239                                 struct btrfs_fs_info *fs_info,
2240                                 struct btrfs_qgroup_operation *oper)
2241 {
2242         int ret = 0;
2243
2244         if (!fs_info->quota_enabled)
2245                 return 0;
2246
2247         BUG_ON(!fs_info->quota_root);
2248
2249         mutex_lock(&fs_info->qgroup_rescan_lock);
2250         if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
2251                 if (fs_info->qgroup_rescan_progress.objectid <= oper->bytenr) {
2252                         mutex_unlock(&fs_info->qgroup_rescan_lock);
2253                         return 0;
2254                 }
2255         }
2256         mutex_unlock(&fs_info->qgroup_rescan_lock);
2257
2258         ASSERT(is_fstree(oper->ref_root));
2259
2260         trace_btrfs_qgroup_account(oper);
2261
2262         switch (oper->type) {
2263         case BTRFS_QGROUP_OPER_ADD_EXCL:
2264         case BTRFS_QGROUP_OPER_SUB_EXCL:
2265                 ret = qgroup_excl_accounting(fs_info, oper);
2266                 break;
2267         case BTRFS_QGROUP_OPER_ADD_SHARED:
2268         case BTRFS_QGROUP_OPER_SUB_SHARED:
2269                 ret = qgroup_shared_accounting(trans, fs_info, oper);
2270                 break;
2271         case BTRFS_QGROUP_OPER_SUB_SUBTREE:
2272                 ret = qgroup_subtree_accounting(trans, fs_info, oper);
2273                 break;
2274         default:
2275                 ASSERT(0);
2276         }
2277         return ret;
2278 }
2279
2280 /*
2281  * Needs to be called everytime we run delayed refs, even if there is an error
2282  * in order to cleanup outstanding operations.
2283  */
2284 int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
2285                                     struct btrfs_fs_info *fs_info)
2286 {
2287         struct btrfs_qgroup_operation *oper;
2288         int ret = 0;
2289
2290         while (!list_empty(&trans->qgroup_ref_list)) {
2291                 oper = list_first_entry(&trans->qgroup_ref_list,
2292                                         struct btrfs_qgroup_operation, list);
2293                 list_del_init(&oper->list);
2294                 if (!ret || !trans->aborted)
2295                         ret = btrfs_qgroup_account(trans, fs_info, oper);
2296                 spin_lock(&fs_info->qgroup_op_lock);
2297                 rb_erase(&oper->n, &fs_info->qgroup_op_tree);
2298                 spin_unlock(&fs_info->qgroup_op_lock);
2299                 btrfs_put_tree_mod_seq(fs_info, &oper->elem);
2300                 kfree(oper);
2301         }
2302         return ret;
2303 }
2304
2305 /*
2306  * called from commit_transaction. Writes all changed qgroups to disk.
2307  */
2308 int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
2309                       struct btrfs_fs_info *fs_info)
2310 {
2311         struct btrfs_root *quota_root = fs_info->quota_root;
2312         int ret = 0;
2313         int start_rescan_worker = 0;
2314
2315         if (!quota_root)
2316                 goto out;
2317
2318         if (!fs_info->quota_enabled && fs_info->pending_quota_state)
2319                 start_rescan_worker = 1;
2320
2321         fs_info->quota_enabled = fs_info->pending_quota_state;
2322
2323         spin_lock(&fs_info->qgroup_lock);
2324         while (!list_empty(&fs_info->dirty_qgroups)) {
2325                 struct btrfs_qgroup *qgroup;
2326                 qgroup = list_first_entry(&fs_info->dirty_qgroups,
2327                                           struct btrfs_qgroup, dirty);
2328                 list_del_init(&qgroup->dirty);
2329                 spin_unlock(&fs_info->qgroup_lock);
2330                 ret = update_qgroup_info_item(trans, quota_root, qgroup);
2331                 if (ret)
2332                         fs_info->qgroup_flags |=
2333                                         BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2334                 ret = update_qgroup_limit_item(trans, quota_root, qgroup);
2335                 if (ret)
2336                         fs_info->qgroup_flags |=
2337                                         BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2338                 spin_lock(&fs_info->qgroup_lock);
2339         }
2340         if (fs_info->quota_enabled)
2341                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON;
2342         else
2343                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
2344         spin_unlock(&fs_info->qgroup_lock);
2345
2346         ret = update_qgroup_status_item(trans, fs_info, quota_root);
2347         if (ret)
2348                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2349
2350         if (!ret && start_rescan_worker) {
2351                 ret = qgroup_rescan_init(fs_info, 0, 1);
2352                 if (!ret) {
2353                         qgroup_rescan_zero_tracking(fs_info);
2354                         btrfs_queue_work(fs_info->qgroup_rescan_workers,
2355                                          &fs_info->qgroup_rescan_work);
2356                 }
2357                 ret = 0;
2358         }
2359
2360 out:
2361
2362         return ret;
2363 }
2364
2365 /*
2366  * copy the acounting information between qgroups. This is necessary when a
2367  * snapshot or a subvolume is created
2368  */
2369 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
2370                          struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
2371                          struct btrfs_qgroup_inherit *inherit)
2372 {
2373         int ret = 0;
2374         int i;
2375         u64 *i_qgroups;
2376         struct btrfs_root *quota_root = fs_info->quota_root;
2377         struct btrfs_qgroup *srcgroup;
2378         struct btrfs_qgroup *dstgroup;
2379         u32 level_size = 0;
2380         u64 nums;
2381
2382         mutex_lock(&fs_info->qgroup_ioctl_lock);
2383         if (!fs_info->quota_enabled)
2384                 goto out;
2385
2386         if (!quota_root) {
2387                 ret = -EINVAL;
2388                 goto out;
2389         }
2390
2391         if (inherit) {
2392                 i_qgroups = (u64 *)(inherit + 1);
2393                 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
2394                        2 * inherit->num_excl_copies;
2395                 for (i = 0; i < nums; ++i) {
2396                         srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
2397                         if (!srcgroup) {
2398                                 ret = -EINVAL;
2399                                 goto out;
2400                         }
2401
2402                         if ((srcgroup->qgroupid >> 48) <= (objectid >> 48)) {
2403                                 ret = -EINVAL;
2404                                 goto out;
2405                         }
2406                         ++i_qgroups;
2407                 }
2408         }
2409
2410         /*
2411          * create a tracking group for the subvol itself
2412          */
2413         ret = add_qgroup_item(trans, quota_root, objectid);
2414         if (ret)
2415                 goto out;
2416
2417         if (srcid) {
2418                 struct btrfs_root *srcroot;
2419                 struct btrfs_key srckey;
2420
2421                 srckey.objectid = srcid;
2422                 srckey.type = BTRFS_ROOT_ITEM_KEY;
2423                 srckey.offset = (u64)-1;
2424                 srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey);
2425                 if (IS_ERR(srcroot)) {
2426                         ret = PTR_ERR(srcroot);
2427                         goto out;
2428                 }
2429
2430                 rcu_read_lock();
2431                 level_size = srcroot->nodesize;
2432                 rcu_read_unlock();
2433         }
2434
2435         /*
2436          * add qgroup to all inherited groups
2437          */
2438         if (inherit) {
2439                 i_qgroups = (u64 *)(inherit + 1);
2440                 for (i = 0; i < inherit->num_qgroups; ++i) {
2441                         ret = add_qgroup_relation_item(trans, quota_root,
2442                                                        objectid, *i_qgroups);
2443                         if (ret)
2444                                 goto out;
2445                         ret = add_qgroup_relation_item(trans, quota_root,
2446                                                        *i_qgroups, objectid);
2447                         if (ret)
2448                                 goto out;
2449                         ++i_qgroups;
2450                 }
2451         }
2452
2453
2454         spin_lock(&fs_info->qgroup_lock);
2455
2456         dstgroup = add_qgroup_rb(fs_info, objectid);
2457         if (IS_ERR(dstgroup)) {
2458                 ret = PTR_ERR(dstgroup);
2459                 goto unlock;
2460         }
2461
2462         if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
2463                 dstgroup->lim_flags = inherit->lim.flags;
2464                 dstgroup->max_rfer = inherit->lim.max_rfer;
2465                 dstgroup->max_excl = inherit->lim.max_excl;
2466                 dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
2467                 dstgroup->rsv_excl = inherit->lim.rsv_excl;
2468
2469                 ret = update_qgroup_limit_item(trans, quota_root, dstgroup);
2470                 if (ret) {
2471                         fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2472                         btrfs_info(fs_info, "unable to update quota limit for %llu",
2473                                dstgroup->qgroupid);
2474                         goto unlock;
2475                 }
2476         }
2477
2478         if (srcid) {
2479                 srcgroup = find_qgroup_rb(fs_info, srcid);
2480                 if (!srcgroup)
2481                         goto unlock;
2482
2483                 /*
2484                  * We call inherit after we clone the root in order to make sure
2485                  * our counts don't go crazy, so at this point the only
2486                  * difference between the two roots should be the root node.
2487                  */
2488                 dstgroup->rfer = srcgroup->rfer;
2489                 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr;
2490                 dstgroup->excl = level_size;
2491                 dstgroup->excl_cmpr = level_size;
2492                 srcgroup->excl = level_size;
2493                 srcgroup->excl_cmpr = level_size;
2494
2495                 /* inherit the limit info */
2496                 dstgroup->lim_flags = srcgroup->lim_flags;
2497                 dstgroup->max_rfer = srcgroup->max_rfer;
2498                 dstgroup->max_excl = srcgroup->max_excl;
2499                 dstgroup->rsv_rfer = srcgroup->rsv_rfer;
2500                 dstgroup->rsv_excl = srcgroup->rsv_excl;
2501
2502                 qgroup_dirty(fs_info, dstgroup);
2503                 qgroup_dirty(fs_info, srcgroup);
2504         }
2505
2506         if (!inherit)
2507                 goto unlock;
2508
2509         i_qgroups = (u64 *)(inherit + 1);
2510         for (i = 0; i < inherit->num_qgroups; ++i) {
2511                 ret = add_relation_rb(quota_root->fs_info, objectid,
2512                                       *i_qgroups);
2513                 if (ret)
2514                         goto unlock;
2515                 ++i_qgroups;
2516         }
2517
2518         for (i = 0; i <  inherit->num_ref_copies; ++i) {
2519                 struct btrfs_qgroup *src;
2520                 struct btrfs_qgroup *dst;
2521
2522                 src = find_qgroup_rb(fs_info, i_qgroups[0]);
2523                 dst = find_qgroup_rb(fs_info, i_qgroups[1]);
2524
2525                 if (!src || !dst) {
2526                         ret = -EINVAL;
2527                         goto unlock;
2528                 }
2529
2530                 dst->rfer = src->rfer - level_size;
2531                 dst->rfer_cmpr = src->rfer_cmpr - level_size;
2532                 i_qgroups += 2;
2533         }
2534         for (i = 0; i <  inherit->num_excl_copies; ++i) {
2535                 struct btrfs_qgroup *src;
2536                 struct btrfs_qgroup *dst;
2537
2538                 src = find_qgroup_rb(fs_info, i_qgroups[0]);
2539                 dst = find_qgroup_rb(fs_info, i_qgroups[1]);
2540
2541                 if (!src || !dst) {
2542                         ret = -EINVAL;
2543                         goto unlock;
2544                 }
2545
2546                 dst->excl = src->excl + level_size;
2547                 dst->excl_cmpr = src->excl_cmpr + level_size;
2548                 i_qgroups += 2;
2549         }
2550
2551 unlock:
2552         spin_unlock(&fs_info->qgroup_lock);
2553 out:
2554         mutex_unlock(&fs_info->qgroup_ioctl_lock);
2555         return ret;
2556 }
2557
2558 int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
2559 {
2560         struct btrfs_root *quota_root;
2561         struct btrfs_qgroup *qgroup;
2562         struct btrfs_fs_info *fs_info = root->fs_info;
2563         u64 ref_root = root->root_key.objectid;
2564         int ret = 0;
2565         struct ulist_node *unode;
2566         struct ulist_iterator uiter;
2567
2568         if (!is_fstree(ref_root))
2569                 return 0;
2570
2571         if (num_bytes == 0)
2572                 return 0;
2573
2574         spin_lock(&fs_info->qgroup_lock);
2575         quota_root = fs_info->quota_root;
2576         if (!quota_root)
2577                 goto out;
2578
2579         qgroup = find_qgroup_rb(fs_info, ref_root);
2580         if (!qgroup)
2581                 goto out;
2582
2583         /*
2584          * in a first step, we check all affected qgroups if any limits would
2585          * be exceeded
2586          */
2587         ulist_reinit(fs_info->qgroup_ulist);
2588         ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
2589                         (uintptr_t)qgroup, GFP_ATOMIC);
2590         if (ret < 0)
2591                 goto out;
2592         ULIST_ITER_INIT(&uiter);
2593         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
2594                 struct btrfs_qgroup *qg;
2595                 struct btrfs_qgroup_list *glist;
2596
2597                 qg = u64_to_ptr(unode->aux);
2598
2599                 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
2600                     qg->reserved + (s64)qg->rfer + num_bytes >
2601                     qg->max_rfer) {
2602                         ret = -EDQUOT;
2603                         goto out;
2604                 }
2605
2606                 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
2607                     qg->reserved + (s64)qg->excl + num_bytes >
2608                     qg->max_excl) {
2609                         ret = -EDQUOT;
2610                         goto out;
2611                 }
2612
2613                 list_for_each_entry(glist, &qg->groups, next_group) {
2614                         ret = ulist_add(fs_info->qgroup_ulist,
2615                                         glist->group->qgroupid,
2616                                         (uintptr_t)glist->group, GFP_ATOMIC);
2617                         if (ret < 0)
2618                                 goto out;
2619                 }
2620         }
2621         ret = 0;
2622         /*
2623          * no limits exceeded, now record the reservation into all qgroups
2624          */
2625         ULIST_ITER_INIT(&uiter);
2626         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
2627                 struct btrfs_qgroup *qg;
2628
2629                 qg = u64_to_ptr(unode->aux);
2630
2631                 qg->reserved += num_bytes;
2632         }
2633
2634 out:
2635         spin_unlock(&fs_info->qgroup_lock);
2636         return ret;
2637 }
2638
2639 void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
2640 {
2641         struct btrfs_root *quota_root;
2642         struct btrfs_qgroup *qgroup;
2643         struct btrfs_fs_info *fs_info = root->fs_info;
2644         struct ulist_node *unode;
2645         struct ulist_iterator uiter;
2646         u64 ref_root = root->root_key.objectid;
2647         int ret = 0;
2648
2649         if (!is_fstree(ref_root))
2650                 return;
2651
2652         if (num_bytes == 0)
2653                 return;
2654
2655         spin_lock(&fs_info->qgroup_lock);
2656
2657         quota_root = fs_info->quota_root;
2658         if (!quota_root)
2659                 goto out;
2660
2661         qgroup = find_qgroup_rb(fs_info, ref_root);
2662         if (!qgroup)
2663                 goto out;
2664
2665         ulist_reinit(fs_info->qgroup_ulist);
2666         ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
2667                         (uintptr_t)qgroup, GFP_ATOMIC);
2668         if (ret < 0)
2669                 goto out;
2670         ULIST_ITER_INIT(&uiter);
2671         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
2672                 struct btrfs_qgroup *qg;
2673                 struct btrfs_qgroup_list *glist;
2674
2675                 qg = u64_to_ptr(unode->aux);
2676
2677                 qg->reserved -= num_bytes;
2678
2679                 list_for_each_entry(glist, &qg->groups, next_group) {
2680                         ret = ulist_add(fs_info->qgroup_ulist,
2681                                         glist->group->qgroupid,
2682                                         (uintptr_t)glist->group, GFP_ATOMIC);
2683                         if (ret < 0)
2684                                 goto out;
2685                 }
2686         }
2687
2688 out:
2689         spin_unlock(&fs_info->qgroup_lock);
2690 }
2691
2692 void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
2693 {
2694         if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
2695                 return;
2696         btrfs_err(trans->root->fs_info,
2697                 "qgroups not uptodate in trans handle %p:  list is%s empty, "
2698                 "seq is %#x.%x",
2699                 trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
2700                 (u32)(trans->delayed_ref_elem.seq >> 32),
2701                 (u32)trans->delayed_ref_elem.seq);
2702         BUG();
2703 }
2704
2705 /*
2706  * returns < 0 on error, 0 when more leafs are to be scanned.
2707  * returns 1 when done.
2708  */
2709 static int
2710 qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
2711                    struct btrfs_trans_handle *trans, struct ulist *qgroups,
2712                    struct ulist *tmp, struct extent_buffer *scratch_leaf)
2713 {
2714         struct btrfs_key found;
2715         struct ulist *roots = NULL;
2716         struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
2717         u64 num_bytes;
2718         u64 seq;
2719         int new_roots;
2720         int slot;
2721         int ret;
2722
2723         path->leave_spinning = 1;
2724         mutex_lock(&fs_info->qgroup_rescan_lock);
2725         ret = btrfs_search_slot_for_read(fs_info->extent_root,
2726                                          &fs_info->qgroup_rescan_progress,
2727                                          path, 1, 0);
2728
2729         pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
2730                  fs_info->qgroup_rescan_progress.objectid,
2731                  fs_info->qgroup_rescan_progress.type,
2732                  fs_info->qgroup_rescan_progress.offset, ret);
2733
2734         if (ret) {
2735                 /*
2736                  * The rescan is about to end, we will not be scanning any
2737                  * further blocks. We cannot unset the RESCAN flag here, because
2738                  * we want to commit the transaction if everything went well.
2739                  * To make the live accounting work in this phase, we set our
2740                  * scan progress pointer such that every real extent objectid
2741                  * will be smaller.
2742                  */
2743                 fs_info->qgroup_rescan_progress.objectid = (u64)-1;
2744                 btrfs_release_path(path);
2745                 mutex_unlock(&fs_info->qgroup_rescan_lock);
2746                 return ret;
2747         }
2748
2749         btrfs_item_key_to_cpu(path->nodes[0], &found,
2750                               btrfs_header_nritems(path->nodes[0]) - 1);
2751         fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
2752
2753         btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
2754         memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
2755         slot = path->slots[0];
2756         btrfs_release_path(path);
2757         mutex_unlock(&fs_info->qgroup_rescan_lock);
2758
2759         for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
2760                 btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
2761                 if (found.type != BTRFS_EXTENT_ITEM_KEY &&
2762                     found.type != BTRFS_METADATA_ITEM_KEY)
2763                         continue;
2764                 if (found.type == BTRFS_METADATA_ITEM_KEY)
2765                         num_bytes = fs_info->extent_root->nodesize;
2766                 else
2767                         num_bytes = found.offset;
2768
2769                 ulist_reinit(qgroups);
2770                 ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
2771                                            &roots);
2772                 if (ret < 0)
2773                         goto out;
2774                 spin_lock(&fs_info->qgroup_lock);
2775                 seq = fs_info->qgroup_seq;
2776                 fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
2777
2778                 new_roots = 0;
2779                 ret = qgroup_calc_old_refcnt(fs_info, 0, tmp, roots, qgroups,
2780                                              seq, &new_roots, 1);
2781                 if (ret < 0) {
2782                         spin_unlock(&fs_info->qgroup_lock);
2783                         ulist_free(roots);
2784                         goto out;
2785                 }
2786
2787                 ret = qgroup_adjust_counters(fs_info, 0, num_bytes, qgroups,
2788                                              seq, 0, new_roots, 1);
2789                 if (ret < 0) {
2790                         spin_unlock(&fs_info->qgroup_lock);
2791                         ulist_free(roots);
2792                         goto out;
2793                 }
2794                 spin_unlock(&fs_info->qgroup_lock);
2795                 ulist_free(roots);
2796         }
2797 out:
2798         btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
2799
2800         return ret;
2801 }
2802
2803 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
2804 {
2805         struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info,
2806                                                      qgroup_rescan_work);
2807         struct btrfs_path *path;
2808         struct btrfs_trans_handle *trans = NULL;
2809         struct ulist *tmp = NULL, *qgroups = NULL;
2810         struct extent_buffer *scratch_leaf = NULL;
2811         int err = -ENOMEM;
2812         int ret = 0;
2813
2814         path = btrfs_alloc_path();
2815         if (!path)
2816                 goto out;
2817         qgroups = ulist_alloc(GFP_NOFS);
2818         if (!qgroups)
2819                 goto out;
2820         tmp = ulist_alloc(GFP_NOFS);
2821         if (!tmp)
2822                 goto out;
2823         scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
2824         if (!scratch_leaf)
2825                 goto out;
2826
2827         err = 0;
2828         while (!err) {
2829                 trans = btrfs_start_transaction(fs_info->fs_root, 0);
2830                 if (IS_ERR(trans)) {
2831                         err = PTR_ERR(trans);
2832                         break;
2833                 }
2834                 if (!fs_info->quota_enabled) {
2835                         err = -EINTR;
2836                 } else {
2837                         err = qgroup_rescan_leaf(fs_info, path, trans,
2838                                                  qgroups, tmp, scratch_leaf);
2839                 }
2840                 if (err > 0)
2841                         btrfs_commit_transaction(trans, fs_info->fs_root);
2842                 else
2843                         btrfs_end_transaction(trans, fs_info->fs_root);
2844         }
2845
2846 out:
2847         kfree(scratch_leaf);
2848         ulist_free(qgroups);
2849         ulist_free(tmp);
2850         btrfs_free_path(path);
2851
2852         mutex_lock(&fs_info->qgroup_rescan_lock);
2853         fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2854
2855         if (err > 0 &&
2856             fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
2857                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2858         } else if (err < 0) {
2859                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2860         }
2861         mutex_unlock(&fs_info->qgroup_rescan_lock);
2862
2863         /*
2864          * only update status, since the previous part has alreay updated the
2865          * qgroup info.
2866          */
2867         trans = btrfs_start_transaction(fs_info->quota_root, 1);
2868         if (IS_ERR(trans)) {
2869                 err = PTR_ERR(trans);
2870                 btrfs_err(fs_info,
2871                           "fail to start transaction for status update: %d\n",
2872                           err);
2873                 goto done;
2874         }
2875         ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root);
2876         if (ret < 0) {
2877                 err = ret;
2878                 btrfs_err(fs_info, "fail to update qgroup status: %d\n", err);
2879         }
2880         btrfs_end_transaction(trans, fs_info->quota_root);
2881
2882         if (err >= 0) {
2883                 btrfs_info(fs_info, "qgroup scan completed%s",
2884                         err > 0 ? " (inconsistency flag cleared)" : "");
2885         } else {
2886                 btrfs_err(fs_info, "qgroup scan failed with %d", err);
2887         }
2888
2889 done:
2890         complete_all(&fs_info->qgroup_rescan_completion);
2891 }
2892
2893 /*
2894  * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all
2895  * memory required for the rescan context.
2896  */
2897 static int
2898 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
2899                    int init_flags)
2900 {
2901         int ret = 0;
2902
2903         if (!init_flags &&
2904             (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) ||
2905              !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) {
2906                 ret = -EINVAL;
2907                 goto err;
2908         }
2909
2910         mutex_lock(&fs_info->qgroup_rescan_lock);
2911         spin_lock(&fs_info->qgroup_lock);
2912
2913         if (init_flags) {
2914                 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2915                         ret = -EINPROGRESS;
2916                 else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
2917                         ret = -EINVAL;
2918
2919                 if (ret) {
2920                         spin_unlock(&fs_info->qgroup_lock);
2921                         mutex_unlock(&fs_info->qgroup_rescan_lock);
2922                         goto err;
2923                 }
2924                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2925         }
2926
2927         memset(&fs_info->qgroup_rescan_progress, 0,
2928                 sizeof(fs_info->qgroup_rescan_progress));
2929         fs_info->qgroup_rescan_progress.objectid = progress_objectid;
2930
2931         spin_unlock(&fs_info->qgroup_lock);
2932         mutex_unlock(&fs_info->qgroup_rescan_lock);
2933
2934         init_completion(&fs_info->qgroup_rescan_completion);
2935
2936         memset(&fs_info->qgroup_rescan_work, 0,
2937                sizeof(fs_info->qgroup_rescan_work));
2938         btrfs_init_work(&fs_info->qgroup_rescan_work,
2939                         btrfs_qgroup_rescan_helper,
2940                         btrfs_qgroup_rescan_worker, NULL, NULL);
2941
2942         if (ret) {
2943 err:
2944                 btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret);
2945                 return ret;
2946         }
2947
2948         return 0;
2949 }
2950
2951 static void
2952 qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info)
2953 {
2954         struct rb_node *n;
2955         struct btrfs_qgroup *qgroup;
2956
2957         spin_lock(&fs_info->qgroup_lock);
2958         /* clear all current qgroup tracking information */
2959         for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
2960                 qgroup = rb_entry(n, struct btrfs_qgroup, node);
2961                 qgroup->rfer = 0;
2962                 qgroup->rfer_cmpr = 0;
2963                 qgroup->excl = 0;
2964                 qgroup->excl_cmpr = 0;
2965         }
2966         spin_unlock(&fs_info->qgroup_lock);
2967 }
2968
2969 int
2970 btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
2971 {
2972         int ret = 0;
2973         struct btrfs_trans_handle *trans;
2974
2975         ret = qgroup_rescan_init(fs_info, 0, 1);
2976         if (ret)
2977                 return ret;
2978
2979         /*
2980          * We have set the rescan_progress to 0, which means no more
2981          * delayed refs will be accounted by btrfs_qgroup_account_ref.
2982          * However, btrfs_qgroup_account_ref may be right after its call
2983          * to btrfs_find_all_roots, in which case it would still do the
2984          * accounting.
2985          * To solve this, we're committing the transaction, which will
2986          * ensure we run all delayed refs and only after that, we are
2987          * going to clear all tracking information for a clean start.
2988          */
2989
2990         trans = btrfs_join_transaction(fs_info->fs_root);
2991         if (IS_ERR(trans)) {
2992                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2993                 return PTR_ERR(trans);
2994         }
2995         ret = btrfs_commit_transaction(trans, fs_info->fs_root);
2996         if (ret) {
2997                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2998                 return ret;
2999         }
3000
3001         qgroup_rescan_zero_tracking(fs_info);
3002
3003         btrfs_queue_work(fs_info->qgroup_rescan_workers,
3004                          &fs_info->qgroup_rescan_work);
3005
3006         return 0;
3007 }
3008
3009 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info)
3010 {
3011         int running;
3012         int ret = 0;
3013
3014         mutex_lock(&fs_info->qgroup_rescan_lock);
3015         spin_lock(&fs_info->qgroup_lock);
3016         running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN;
3017         spin_unlock(&fs_info->qgroup_lock);
3018         mutex_unlock(&fs_info->qgroup_rescan_lock);
3019
3020         if (running)
3021                 ret = wait_for_completion_interruptible(
3022                                         &fs_info->qgroup_rescan_completion);
3023
3024         return ret;
3025 }
3026
3027 /*
3028  * this is only called from open_ctree where we're still single threaded, thus
3029  * locking is omitted here.
3030  */
3031 void
3032 btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
3033 {
3034         if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
3035                 btrfs_queue_work(fs_info->qgroup_rescan_workers,
3036                                  &fs_info->qgroup_rescan_work);
3037 }