]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - fs/nilfs2/btree.c
Merge tag 'sound-4.7-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai...
[karo-tx-linux.git] / fs / nilfs2 / btree.c
1 /*
2  * btree.c - NILFS B-tree.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * Written by Koji Sato.
17  */
18
19 #include <linux/slab.h>
20 #include <linux/string.h>
21 #include <linux/errno.h>
22 #include <linux/pagevec.h>
23 #include "nilfs.h"
24 #include "page.h"
25 #include "btnode.h"
26 #include "btree.h"
27 #include "alloc.h"
28 #include "dat.h"
29
30 static void __nilfs_btree_init(struct nilfs_bmap *bmap);
31
32 static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
33 {
34         struct nilfs_btree_path *path;
35         int level = NILFS_BTREE_LEVEL_DATA;
36
37         path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
38         if (path == NULL)
39                 goto out;
40
41         for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
42                 path[level].bp_bh = NULL;
43                 path[level].bp_sib_bh = NULL;
44                 path[level].bp_index = 0;
45                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
46                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
47                 path[level].bp_op = NULL;
48         }
49
50 out:
51         return path;
52 }
53
54 static void nilfs_btree_free_path(struct nilfs_btree_path *path)
55 {
56         int level = NILFS_BTREE_LEVEL_DATA;
57
58         for (; level < NILFS_BTREE_LEVEL_MAX; level++)
59                 brelse(path[level].bp_bh);
60
61         kmem_cache_free(nilfs_btree_path_cache, path);
62 }
63
64 /*
65  * B-tree node operations
66  */
67 static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
68                                      __u64 ptr, struct buffer_head **bhp)
69 {
70         struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
71         struct buffer_head *bh;
72
73         bh = nilfs_btnode_create_block(btnc, ptr);
74         if (!bh)
75                 return -ENOMEM;
76
77         set_buffer_nilfs_volatile(bh);
78         *bhp = bh;
79         return 0;
80 }
81
82 static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
83 {
84         return node->bn_flags;
85 }
86
87 static void
88 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
89 {
90         node->bn_flags = flags;
91 }
92
93 static int nilfs_btree_node_root(const struct nilfs_btree_node *node)
94 {
95         return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
96 }
97
98 static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
99 {
100         return node->bn_level;
101 }
102
103 static void
104 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
105 {
106         node->bn_level = level;
107 }
108
109 static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
110 {
111         return le16_to_cpu(node->bn_nchildren);
112 }
113
114 static void
115 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
116 {
117         node->bn_nchildren = cpu_to_le16(nchildren);
118 }
119
120 static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
121 {
122         return 1 << btree->b_inode->i_blkbits;
123 }
124
125 static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
126 {
127         return btree->b_nchildren_per_block;
128 }
129
130 static __le64 *
131 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
132 {
133         return (__le64 *)((char *)(node + 1) +
134                           (nilfs_btree_node_root(node) ?
135                            0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
136 }
137
138 static __le64 *
139 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax)
140 {
141         return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax);
142 }
143
144 static __u64
145 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
146 {
147         return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
148 }
149
150 static void
151 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
152 {
153         *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
154 }
155
156 static __u64
157 nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index,
158                          int ncmax)
159 {
160         return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index));
161 }
162
163 static void
164 nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr,
165                          int ncmax)
166 {
167         *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr);
168 }
169
170 static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags,
171                                   int level, int nchildren, int ncmax,
172                                   const __u64 *keys, const __u64 *ptrs)
173 {
174         __le64 *dkeys;
175         __le64 *dptrs;
176         int i;
177
178         nilfs_btree_node_set_flags(node, flags);
179         nilfs_btree_node_set_level(node, level);
180         nilfs_btree_node_set_nchildren(node, nchildren);
181
182         dkeys = nilfs_btree_node_dkeys(node);
183         dptrs = nilfs_btree_node_dptrs(node, ncmax);
184         for (i = 0; i < nchildren; i++) {
185                 dkeys[i] = cpu_to_le64(keys[i]);
186                 dptrs[i] = cpu_to_le64(ptrs[i]);
187         }
188 }
189
190 /* Assume the buffer heads corresponding to left and right are locked. */
191 static void nilfs_btree_node_move_left(struct nilfs_btree_node *left,
192                                        struct nilfs_btree_node *right,
193                                        int n, int lncmax, int rncmax)
194 {
195         __le64 *ldkeys, *rdkeys;
196         __le64 *ldptrs, *rdptrs;
197         int lnchildren, rnchildren;
198
199         ldkeys = nilfs_btree_node_dkeys(left);
200         ldptrs = nilfs_btree_node_dptrs(left, lncmax);
201         lnchildren = nilfs_btree_node_get_nchildren(left);
202
203         rdkeys = nilfs_btree_node_dkeys(right);
204         rdptrs = nilfs_btree_node_dptrs(right, rncmax);
205         rnchildren = nilfs_btree_node_get_nchildren(right);
206
207         memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
208         memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
209         memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
210         memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
211
212         lnchildren += n;
213         rnchildren -= n;
214         nilfs_btree_node_set_nchildren(left, lnchildren);
215         nilfs_btree_node_set_nchildren(right, rnchildren);
216 }
217
218 /* Assume that the buffer heads corresponding to left and right are locked. */
219 static void nilfs_btree_node_move_right(struct nilfs_btree_node *left,
220                                         struct nilfs_btree_node *right,
221                                         int n, int lncmax, int rncmax)
222 {
223         __le64 *ldkeys, *rdkeys;
224         __le64 *ldptrs, *rdptrs;
225         int lnchildren, rnchildren;
226
227         ldkeys = nilfs_btree_node_dkeys(left);
228         ldptrs = nilfs_btree_node_dptrs(left, lncmax);
229         lnchildren = nilfs_btree_node_get_nchildren(left);
230
231         rdkeys = nilfs_btree_node_dkeys(right);
232         rdptrs = nilfs_btree_node_dptrs(right, rncmax);
233         rnchildren = nilfs_btree_node_get_nchildren(right);
234
235         memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
236         memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
237         memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
238         memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
239
240         lnchildren -= n;
241         rnchildren += n;
242         nilfs_btree_node_set_nchildren(left, lnchildren);
243         nilfs_btree_node_set_nchildren(right, rnchildren);
244 }
245
246 /* Assume that the buffer head corresponding to node is locked. */
247 static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index,
248                                     __u64 key, __u64 ptr, int ncmax)
249 {
250         __le64 *dkeys;
251         __le64 *dptrs;
252         int nchildren;
253
254         dkeys = nilfs_btree_node_dkeys(node);
255         dptrs = nilfs_btree_node_dptrs(node, ncmax);
256         nchildren = nilfs_btree_node_get_nchildren(node);
257         if (index < nchildren) {
258                 memmove(dkeys + index + 1, dkeys + index,
259                         (nchildren - index) * sizeof(*dkeys));
260                 memmove(dptrs + index + 1, dptrs + index,
261                         (nchildren - index) * sizeof(*dptrs));
262         }
263         dkeys[index] = cpu_to_le64(key);
264         dptrs[index] = cpu_to_le64(ptr);
265         nchildren++;
266         nilfs_btree_node_set_nchildren(node, nchildren);
267 }
268
269 /* Assume that the buffer head corresponding to node is locked. */
270 static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index,
271                                     __u64 *keyp, __u64 *ptrp, int ncmax)
272 {
273         __u64 key;
274         __u64 ptr;
275         __le64 *dkeys;
276         __le64 *dptrs;
277         int nchildren;
278
279         dkeys = nilfs_btree_node_dkeys(node);
280         dptrs = nilfs_btree_node_dptrs(node, ncmax);
281         key = le64_to_cpu(dkeys[index]);
282         ptr = le64_to_cpu(dptrs[index]);
283         nchildren = nilfs_btree_node_get_nchildren(node);
284         if (keyp != NULL)
285                 *keyp = key;
286         if (ptrp != NULL)
287                 *ptrp = ptr;
288
289         if (index < nchildren - 1) {
290                 memmove(dkeys + index, dkeys + index + 1,
291                         (nchildren - index - 1) * sizeof(*dkeys));
292                 memmove(dptrs + index, dptrs + index + 1,
293                         (nchildren - index - 1) * sizeof(*dptrs));
294         }
295         nchildren--;
296         nilfs_btree_node_set_nchildren(node, nchildren);
297 }
298
299 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
300                                    __u64 key, int *indexp)
301 {
302         __u64 nkey;
303         int index, low, high, s;
304
305         /* binary search */
306         low = 0;
307         high = nilfs_btree_node_get_nchildren(node) - 1;
308         index = 0;
309         s = 0;
310         while (low <= high) {
311                 index = (low + high) / 2;
312                 nkey = nilfs_btree_node_get_key(node, index);
313                 if (nkey == key) {
314                         s = 0;
315                         goto out;
316                 } else if (nkey < key) {
317                         low = index + 1;
318                         s = -1;
319                 } else {
320                         high = index - 1;
321                         s = 1;
322                 }
323         }
324
325         /* adjust index */
326         if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
327                 if (s > 0 && index > 0)
328                         index--;
329         } else if (s < 0)
330                 index++;
331
332  out:
333         *indexp = index;
334
335         return s == 0;
336 }
337
338 /**
339  * nilfs_btree_node_broken - verify consistency of btree node
340  * @node: btree node block to be examined
341  * @size: node size (in bytes)
342  * @blocknr: block number
343  *
344  * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
345  */
346 static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
347                                    size_t size, sector_t blocknr)
348 {
349         int level, flags, nchildren;
350         int ret = 0;
351
352         level = nilfs_btree_node_get_level(node);
353         flags = nilfs_btree_node_get_flags(node);
354         nchildren = nilfs_btree_node_get_nchildren(node);
355
356         if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
357                      level >= NILFS_BTREE_LEVEL_MAX ||
358                      (flags & NILFS_BTREE_NODE_ROOT) ||
359                      nchildren < 0 ||
360                      nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
361                 printk(KERN_CRIT "NILFS: bad btree node (blocknr=%llu): "
362                        "level = %d, flags = 0x%x, nchildren = %d\n",
363                        (unsigned long long)blocknr, level, flags, nchildren);
364                 ret = 1;
365         }
366         return ret;
367 }
368
369 /**
370  * nilfs_btree_root_broken - verify consistency of btree root node
371  * @node: btree root node to be examined
372  * @ino: inode number
373  *
374  * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
375  */
376 static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
377                                    unsigned long ino)
378 {
379         int level, flags, nchildren;
380         int ret = 0;
381
382         level = nilfs_btree_node_get_level(node);
383         flags = nilfs_btree_node_get_flags(node);
384         nchildren = nilfs_btree_node_get_nchildren(node);
385
386         if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
387                      level >= NILFS_BTREE_LEVEL_MAX ||
388                      nchildren < 0 ||
389                      nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
390                 pr_crit("NILFS: bad btree root (inode number=%lu): level = %d, flags = 0x%x, nchildren = %d\n",
391                         ino, level, flags, nchildren);
392                 ret = 1;
393         }
394         return ret;
395 }
396
397 int nilfs_btree_broken_node_block(struct buffer_head *bh)
398 {
399         int ret;
400
401         if (buffer_nilfs_checked(bh))
402                 return 0;
403
404         ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
405                                        bh->b_size, bh->b_blocknr);
406         if (likely(!ret))
407                 set_buffer_nilfs_checked(bh);
408         return ret;
409 }
410
411 static struct nilfs_btree_node *
412 nilfs_btree_get_root(const struct nilfs_bmap *btree)
413 {
414         return (struct nilfs_btree_node *)btree->b_u.u_data;
415 }
416
417 static struct nilfs_btree_node *
418 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
419 {
420         return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
421 }
422
423 static struct nilfs_btree_node *
424 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
425 {
426         return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
427 }
428
429 static int nilfs_btree_height(const struct nilfs_bmap *btree)
430 {
431         return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
432 }
433
434 static struct nilfs_btree_node *
435 nilfs_btree_get_node(const struct nilfs_bmap *btree,
436                      const struct nilfs_btree_path *path,
437                      int level, int *ncmaxp)
438 {
439         struct nilfs_btree_node *node;
440
441         if (level == nilfs_btree_height(btree) - 1) {
442                 node = nilfs_btree_get_root(btree);
443                 *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX;
444         } else {
445                 node = nilfs_btree_get_nonroot_node(path, level);
446                 *ncmaxp = nilfs_btree_nchildren_per_block(btree);
447         }
448         return node;
449 }
450
451 static int
452 nilfs_btree_bad_node(struct nilfs_btree_node *node, int level)
453 {
454         if (unlikely(nilfs_btree_node_get_level(node) != level)) {
455                 dump_stack();
456                 printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n",
457                        nilfs_btree_node_get_level(node), level);
458                 return 1;
459         }
460         return 0;
461 }
462
463 struct nilfs_btree_readahead_info {
464         struct nilfs_btree_node *node;  /* parent node */
465         int max_ra_blocks;              /* max nof blocks to read ahead */
466         int index;                      /* current index on the parent node */
467         int ncmax;                      /* nof children in the parent node */
468 };
469
470 static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
471                                    struct buffer_head **bhp,
472                                    const struct nilfs_btree_readahead_info *ra)
473 {
474         struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
475         struct buffer_head *bh, *ra_bh;
476         sector_t submit_ptr = 0;
477         int ret;
478
479         ret = nilfs_btnode_submit_block(btnc, ptr, 0, READ, &bh, &submit_ptr);
480         if (ret) {
481                 if (ret != -EEXIST)
482                         return ret;
483                 goto out_check;
484         }
485
486         if (ra) {
487                 int i, n;
488                 __u64 ptr2;
489
490                 /* read ahead sibling nodes */
491                 for (n = ra->max_ra_blocks, i = ra->index + 1;
492                      n > 0 && i < ra->ncmax; n--, i++) {
493                         ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
494
495                         ret = nilfs_btnode_submit_block(btnc, ptr2, 0, READA,
496                                                         &ra_bh, &submit_ptr);
497                         if (likely(!ret || ret == -EEXIST))
498                                 brelse(ra_bh);
499                         else if (ret != -EBUSY)
500                                 break;
501                         if (!buffer_locked(bh))
502                                 goto out_no_wait;
503                 }
504         }
505
506         wait_on_buffer(bh);
507
508  out_no_wait:
509         if (!buffer_uptodate(bh)) {
510                 brelse(bh);
511                 return -EIO;
512         }
513
514  out_check:
515         if (nilfs_btree_broken_node_block(bh)) {
516                 clear_buffer_uptodate(bh);
517                 brelse(bh);
518                 return -EINVAL;
519         }
520
521         *bhp = bh;
522         return 0;
523 }
524
525 static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
526                                    struct buffer_head **bhp)
527 {
528         return __nilfs_btree_get_block(btree, ptr, bhp, NULL);
529 }
530
531 static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
532                                  struct nilfs_btree_path *path,
533                                  __u64 key, __u64 *ptrp, int minlevel,
534                                  int readahead)
535 {
536         struct nilfs_btree_node *node;
537         struct nilfs_btree_readahead_info p, *ra;
538         __u64 ptr;
539         int level, index, found, ncmax, ret;
540
541         node = nilfs_btree_get_root(btree);
542         level = nilfs_btree_node_get_level(node);
543         if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
544                 return -ENOENT;
545
546         found = nilfs_btree_node_lookup(node, key, &index);
547         ptr = nilfs_btree_node_get_ptr(node, index,
548                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
549         path[level].bp_bh = NULL;
550         path[level].bp_index = index;
551
552         ncmax = nilfs_btree_nchildren_per_block(btree);
553
554         while (--level >= minlevel) {
555                 ra = NULL;
556                 if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) {
557                         p.node = nilfs_btree_get_node(btree, path, level + 1,
558                                                       &p.ncmax);
559                         p.index = index;
560                         p.max_ra_blocks = 7;
561                         ra = &p;
562                 }
563                 ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh,
564                                               ra);
565                 if (ret < 0)
566                         return ret;
567
568                 node = nilfs_btree_get_nonroot_node(path, level);
569                 if (nilfs_btree_bad_node(node, level))
570                         return -EINVAL;
571                 if (!found)
572                         found = nilfs_btree_node_lookup(node, key, &index);
573                 else
574                         index = 0;
575                 if (index < ncmax) {
576                         ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
577                 } else {
578                         WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
579                         /* insert */
580                         ptr = NILFS_BMAP_INVALID_PTR;
581                 }
582                 path[level].bp_index = index;
583         }
584         if (!found)
585                 return -ENOENT;
586
587         if (ptrp != NULL)
588                 *ptrp = ptr;
589
590         return 0;
591 }
592
593 static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
594                                       struct nilfs_btree_path *path,
595                                       __u64 *keyp, __u64 *ptrp)
596 {
597         struct nilfs_btree_node *node;
598         __u64 ptr;
599         int index, level, ncmax, ret;
600
601         node = nilfs_btree_get_root(btree);
602         index = nilfs_btree_node_get_nchildren(node) - 1;
603         if (index < 0)
604                 return -ENOENT;
605         level = nilfs_btree_node_get_level(node);
606         ptr = nilfs_btree_node_get_ptr(node, index,
607                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
608         path[level].bp_bh = NULL;
609         path[level].bp_index = index;
610         ncmax = nilfs_btree_nchildren_per_block(btree);
611
612         for (level--; level > 0; level--) {
613                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
614                 if (ret < 0)
615                         return ret;
616                 node = nilfs_btree_get_nonroot_node(path, level);
617                 if (nilfs_btree_bad_node(node, level))
618                         return -EINVAL;
619                 index = nilfs_btree_node_get_nchildren(node) - 1;
620                 ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
621                 path[level].bp_index = index;
622         }
623
624         if (keyp != NULL)
625                 *keyp = nilfs_btree_node_get_key(node, index);
626         if (ptrp != NULL)
627                 *ptrp = ptr;
628
629         return 0;
630 }
631
632 /**
633  * nilfs_btree_get_next_key - get next valid key from btree path array
634  * @btree: bmap struct of btree
635  * @path: array of nilfs_btree_path struct
636  * @minlevel: start level
637  * @nextkey: place to store the next valid key
638  *
639  * Return Value: If a next key was found, 0 is returned. Otherwise,
640  * -ENOENT is returned.
641  */
642 static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree,
643                                     const struct nilfs_btree_path *path,
644                                     int minlevel, __u64 *nextkey)
645 {
646         struct nilfs_btree_node *node;
647         int maxlevel = nilfs_btree_height(btree) - 1;
648         int index, next_adj, level;
649
650         /* Next index is already set to bp_index for leaf nodes. */
651         next_adj = 0;
652         for (level = minlevel; level <= maxlevel; level++) {
653                 if (level == maxlevel)
654                         node = nilfs_btree_get_root(btree);
655                 else
656                         node = nilfs_btree_get_nonroot_node(path, level);
657
658                 index = path[level].bp_index + next_adj;
659                 if (index < nilfs_btree_node_get_nchildren(node)) {
660                         /* Next key is in this node */
661                         *nextkey = nilfs_btree_node_get_key(node, index);
662                         return 0;
663                 }
664                 /* For non-leaf nodes, next index is stored at bp_index + 1. */
665                 next_adj = 1;
666         }
667         return -ENOENT;
668 }
669
670 static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
671                               __u64 key, int level, __u64 *ptrp)
672 {
673         struct nilfs_btree_path *path;
674         int ret;
675
676         path = nilfs_btree_alloc_path();
677         if (path == NULL)
678                 return -ENOMEM;
679
680         ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0);
681
682         nilfs_btree_free_path(path);
683
684         return ret;
685 }
686
687 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
688                                      __u64 key, __u64 *ptrp,
689                                      unsigned int maxblocks)
690 {
691         struct nilfs_btree_path *path;
692         struct nilfs_btree_node *node;
693         struct inode *dat = NULL;
694         __u64 ptr, ptr2;
695         sector_t blocknr;
696         int level = NILFS_BTREE_LEVEL_NODE_MIN;
697         int ret, cnt, index, maxlevel, ncmax;
698         struct nilfs_btree_readahead_info p;
699
700         path = nilfs_btree_alloc_path();
701         if (path == NULL)
702                 return -ENOMEM;
703
704         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1);
705         if (ret < 0)
706                 goto out;
707
708         if (NILFS_BMAP_USE_VBN(btree)) {
709                 dat = nilfs_bmap_get_dat(btree);
710                 ret = nilfs_dat_translate(dat, ptr, &blocknr);
711                 if (ret < 0)
712                         goto out;
713                 ptr = blocknr;
714         }
715         cnt = 1;
716         if (cnt == maxblocks)
717                 goto end;
718
719         maxlevel = nilfs_btree_height(btree) - 1;
720         node = nilfs_btree_get_node(btree, path, level, &ncmax);
721         index = path[level].bp_index + 1;
722         for (;;) {
723                 while (index < nilfs_btree_node_get_nchildren(node)) {
724                         if (nilfs_btree_node_get_key(node, index) !=
725                             key + cnt)
726                                 goto end;
727                         ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
728                         if (dat) {
729                                 ret = nilfs_dat_translate(dat, ptr2, &blocknr);
730                                 if (ret < 0)
731                                         goto out;
732                                 ptr2 = blocknr;
733                         }
734                         if (ptr2 != ptr + cnt || ++cnt == maxblocks)
735                                 goto end;
736                         index++;
737                         continue;
738                 }
739                 if (level == maxlevel)
740                         break;
741
742                 /* look-up right sibling node */
743                 p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax);
744                 p.index = path[level + 1].bp_index + 1;
745                 p.max_ra_blocks = 7;
746                 if (p.index >= nilfs_btree_node_get_nchildren(p.node) ||
747                     nilfs_btree_node_get_key(p.node, p.index) != key + cnt)
748                         break;
749                 ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax);
750                 path[level + 1].bp_index = p.index;
751
752                 brelse(path[level].bp_bh);
753                 path[level].bp_bh = NULL;
754
755                 ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh,
756                                               &p);
757                 if (ret < 0)
758                         goto out;
759                 node = nilfs_btree_get_nonroot_node(path, level);
760                 ncmax = nilfs_btree_nchildren_per_block(btree);
761                 index = 0;
762                 path[level].bp_index = index;
763         }
764  end:
765         *ptrp = ptr;
766         ret = cnt;
767  out:
768         nilfs_btree_free_path(path);
769         return ret;
770 }
771
772 static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
773                                     struct nilfs_btree_path *path,
774                                     int level, __u64 key)
775 {
776         if (level < nilfs_btree_height(btree) - 1) {
777                 do {
778                         nilfs_btree_node_set_key(
779                                 nilfs_btree_get_nonroot_node(path, level),
780                                 path[level].bp_index, key);
781                         if (!buffer_dirty(path[level].bp_bh))
782                                 mark_buffer_dirty(path[level].bp_bh);
783                 } while ((path[level].bp_index == 0) &&
784                          (++level < nilfs_btree_height(btree) - 1));
785         }
786
787         /* root */
788         if (level == nilfs_btree_height(btree) - 1) {
789                 nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
790                                          path[level].bp_index, key);
791         }
792 }
793
794 static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
795                                   struct nilfs_btree_path *path,
796                                   int level, __u64 *keyp, __u64 *ptrp)
797 {
798         struct nilfs_btree_node *node;
799         int ncblk;
800
801         if (level < nilfs_btree_height(btree) - 1) {
802                 node = nilfs_btree_get_nonroot_node(path, level);
803                 ncblk = nilfs_btree_nchildren_per_block(btree);
804                 nilfs_btree_node_insert(node, path[level].bp_index,
805                                         *keyp, *ptrp, ncblk);
806                 if (!buffer_dirty(path[level].bp_bh))
807                         mark_buffer_dirty(path[level].bp_bh);
808
809                 if (path[level].bp_index == 0)
810                         nilfs_btree_promote_key(btree, path, level + 1,
811                                                 nilfs_btree_node_get_key(node,
812                                                                          0));
813         } else {
814                 node = nilfs_btree_get_root(btree);
815                 nilfs_btree_node_insert(node, path[level].bp_index,
816                                         *keyp, *ptrp,
817                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
818         }
819 }
820
821 static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
822                                    struct nilfs_btree_path *path,
823                                    int level, __u64 *keyp, __u64 *ptrp)
824 {
825         struct nilfs_btree_node *node, *left;
826         int nchildren, lnchildren, n, move, ncblk;
827
828         node = nilfs_btree_get_nonroot_node(path, level);
829         left = nilfs_btree_get_sib_node(path, level);
830         nchildren = nilfs_btree_node_get_nchildren(node);
831         lnchildren = nilfs_btree_node_get_nchildren(left);
832         ncblk = nilfs_btree_nchildren_per_block(btree);
833         move = 0;
834
835         n = (nchildren + lnchildren + 1) / 2 - lnchildren;
836         if (n > path[level].bp_index) {
837                 /* move insert point */
838                 n--;
839                 move = 1;
840         }
841
842         nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
843
844         if (!buffer_dirty(path[level].bp_bh))
845                 mark_buffer_dirty(path[level].bp_bh);
846         if (!buffer_dirty(path[level].bp_sib_bh))
847                 mark_buffer_dirty(path[level].bp_sib_bh);
848
849         nilfs_btree_promote_key(btree, path, level + 1,
850                                 nilfs_btree_node_get_key(node, 0));
851
852         if (move) {
853                 brelse(path[level].bp_bh);
854                 path[level].bp_bh = path[level].bp_sib_bh;
855                 path[level].bp_sib_bh = NULL;
856                 path[level].bp_index += lnchildren;
857                 path[level + 1].bp_index--;
858         } else {
859                 brelse(path[level].bp_sib_bh);
860                 path[level].bp_sib_bh = NULL;
861                 path[level].bp_index -= n;
862         }
863
864         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
865 }
866
867 static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
868                                     struct nilfs_btree_path *path,
869                                     int level, __u64 *keyp, __u64 *ptrp)
870 {
871         struct nilfs_btree_node *node, *right;
872         int nchildren, rnchildren, n, move, ncblk;
873
874         node = nilfs_btree_get_nonroot_node(path, level);
875         right = nilfs_btree_get_sib_node(path, level);
876         nchildren = nilfs_btree_node_get_nchildren(node);
877         rnchildren = nilfs_btree_node_get_nchildren(right);
878         ncblk = nilfs_btree_nchildren_per_block(btree);
879         move = 0;
880
881         n = (nchildren + rnchildren + 1) / 2 - rnchildren;
882         if (n > nchildren - path[level].bp_index) {
883                 /* move insert point */
884                 n--;
885                 move = 1;
886         }
887
888         nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
889
890         if (!buffer_dirty(path[level].bp_bh))
891                 mark_buffer_dirty(path[level].bp_bh);
892         if (!buffer_dirty(path[level].bp_sib_bh))
893                 mark_buffer_dirty(path[level].bp_sib_bh);
894
895         path[level + 1].bp_index++;
896         nilfs_btree_promote_key(btree, path, level + 1,
897                                 nilfs_btree_node_get_key(right, 0));
898         path[level + 1].bp_index--;
899
900         if (move) {
901                 brelse(path[level].bp_bh);
902                 path[level].bp_bh = path[level].bp_sib_bh;
903                 path[level].bp_sib_bh = NULL;
904                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
905                 path[level + 1].bp_index++;
906         } else {
907                 brelse(path[level].bp_sib_bh);
908                 path[level].bp_sib_bh = NULL;
909         }
910
911         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
912 }
913
914 static void nilfs_btree_split(struct nilfs_bmap *btree,
915                               struct nilfs_btree_path *path,
916                               int level, __u64 *keyp, __u64 *ptrp)
917 {
918         struct nilfs_btree_node *node, *right;
919         int nchildren, n, move, ncblk;
920
921         node = nilfs_btree_get_nonroot_node(path, level);
922         right = nilfs_btree_get_sib_node(path, level);
923         nchildren = nilfs_btree_node_get_nchildren(node);
924         ncblk = nilfs_btree_nchildren_per_block(btree);
925         move = 0;
926
927         n = (nchildren + 1) / 2;
928         if (n > nchildren - path[level].bp_index) {
929                 n--;
930                 move = 1;
931         }
932
933         nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
934
935         if (!buffer_dirty(path[level].bp_bh))
936                 mark_buffer_dirty(path[level].bp_bh);
937         if (!buffer_dirty(path[level].bp_sib_bh))
938                 mark_buffer_dirty(path[level].bp_sib_bh);
939
940         if (move) {
941                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
942                 nilfs_btree_node_insert(right, path[level].bp_index,
943                                         *keyp, *ptrp, ncblk);
944
945                 *keyp = nilfs_btree_node_get_key(right, 0);
946                 *ptrp = path[level].bp_newreq.bpr_ptr;
947
948                 brelse(path[level].bp_bh);
949                 path[level].bp_bh = path[level].bp_sib_bh;
950                 path[level].bp_sib_bh = NULL;
951         } else {
952                 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
953
954                 *keyp = nilfs_btree_node_get_key(right, 0);
955                 *ptrp = path[level].bp_newreq.bpr_ptr;
956
957                 brelse(path[level].bp_sib_bh);
958                 path[level].bp_sib_bh = NULL;
959         }
960
961         path[level + 1].bp_index++;
962 }
963
964 static void nilfs_btree_grow(struct nilfs_bmap *btree,
965                              struct nilfs_btree_path *path,
966                              int level, __u64 *keyp, __u64 *ptrp)
967 {
968         struct nilfs_btree_node *root, *child;
969         int n, ncblk;
970
971         root = nilfs_btree_get_root(btree);
972         child = nilfs_btree_get_sib_node(path, level);
973         ncblk = nilfs_btree_nchildren_per_block(btree);
974
975         n = nilfs_btree_node_get_nchildren(root);
976
977         nilfs_btree_node_move_right(root, child, n,
978                                     NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
979         nilfs_btree_node_set_level(root, level + 1);
980
981         if (!buffer_dirty(path[level].bp_sib_bh))
982                 mark_buffer_dirty(path[level].bp_sib_bh);
983
984         path[level].bp_bh = path[level].bp_sib_bh;
985         path[level].bp_sib_bh = NULL;
986
987         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
988
989         *keyp = nilfs_btree_node_get_key(child, 0);
990         *ptrp = path[level].bp_newreq.bpr_ptr;
991 }
992
993 static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree,
994                                    const struct nilfs_btree_path *path)
995 {
996         struct nilfs_btree_node *node;
997         int level, ncmax;
998
999         if (path == NULL)
1000                 return NILFS_BMAP_INVALID_PTR;
1001
1002         /* left sibling */
1003         level = NILFS_BTREE_LEVEL_NODE_MIN;
1004         if (path[level].bp_index > 0) {
1005                 node = nilfs_btree_get_node(btree, path, level, &ncmax);
1006                 return nilfs_btree_node_get_ptr(node,
1007                                                 path[level].bp_index - 1,
1008                                                 ncmax);
1009         }
1010
1011         /* parent */
1012         level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
1013         if (level <= nilfs_btree_height(btree) - 1) {
1014                 node = nilfs_btree_get_node(btree, path, level, &ncmax);
1015                 return nilfs_btree_node_get_ptr(node, path[level].bp_index,
1016                                                 ncmax);
1017         }
1018
1019         return NILFS_BMAP_INVALID_PTR;
1020 }
1021
1022 static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
1023                                        const struct nilfs_btree_path *path,
1024                                        __u64 key)
1025 {
1026         __u64 ptr;
1027
1028         ptr = nilfs_bmap_find_target_seq(btree, key);
1029         if (ptr != NILFS_BMAP_INVALID_PTR)
1030                 /* sequential access */
1031                 return ptr;
1032
1033         ptr = nilfs_btree_find_near(btree, path);
1034         if (ptr != NILFS_BMAP_INVALID_PTR)
1035                 /* near */
1036                 return ptr;
1037
1038         /* block group */
1039         return nilfs_bmap_find_target_in_group(btree);
1040 }
1041
1042 static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree,
1043                                       struct nilfs_btree_path *path,
1044                                       int *levelp, __u64 key, __u64 ptr,
1045                                       struct nilfs_bmap_stats *stats)
1046 {
1047         struct buffer_head *bh;
1048         struct nilfs_btree_node *node, *parent, *sib;
1049         __u64 sibptr;
1050         int pindex, level, ncmax, ncblk, ret;
1051         struct inode *dat = NULL;
1052
1053         stats->bs_nblocks = 0;
1054         level = NILFS_BTREE_LEVEL_DATA;
1055
1056         /* allocate a new ptr for data block */
1057         if (NILFS_BMAP_USE_VBN(btree)) {
1058                 path[level].bp_newreq.bpr_ptr =
1059                         nilfs_btree_find_target_v(btree, path, key);
1060                 dat = nilfs_bmap_get_dat(btree);
1061         }
1062
1063         ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1064         if (ret < 0)
1065                 goto err_out_data;
1066
1067         ncblk = nilfs_btree_nchildren_per_block(btree);
1068
1069         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1070              level < nilfs_btree_height(btree) - 1;
1071              level++) {
1072                 node = nilfs_btree_get_nonroot_node(path, level);
1073                 if (nilfs_btree_node_get_nchildren(node) < ncblk) {
1074                         path[level].bp_op = nilfs_btree_do_insert;
1075                         stats->bs_nblocks++;
1076                         goto out;
1077                 }
1078
1079                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1080                 pindex = path[level + 1].bp_index;
1081
1082                 /* left sibling */
1083                 if (pindex > 0) {
1084                         sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1085                                                           ncmax);
1086                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1087                         if (ret < 0)
1088                                 goto err_out_child_node;
1089                         sib = (struct nilfs_btree_node *)bh->b_data;
1090                         if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1091                                 path[level].bp_sib_bh = bh;
1092                                 path[level].bp_op = nilfs_btree_carry_left;
1093                                 stats->bs_nblocks++;
1094                                 goto out;
1095                         } else {
1096                                 brelse(bh);
1097                         }
1098                 }
1099
1100                 /* right sibling */
1101                 if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) {
1102                         sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1103                                                           ncmax);
1104                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1105                         if (ret < 0)
1106                                 goto err_out_child_node;
1107                         sib = (struct nilfs_btree_node *)bh->b_data;
1108                         if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1109                                 path[level].bp_sib_bh = bh;
1110                                 path[level].bp_op = nilfs_btree_carry_right;
1111                                 stats->bs_nblocks++;
1112                                 goto out;
1113                         } else {
1114                                 brelse(bh);
1115                         }
1116                 }
1117
1118                 /* split */
1119                 path[level].bp_newreq.bpr_ptr =
1120                         path[level - 1].bp_newreq.bpr_ptr + 1;
1121                 ret = nilfs_bmap_prepare_alloc_ptr(btree,
1122                                                    &path[level].bp_newreq, dat);
1123                 if (ret < 0)
1124                         goto err_out_child_node;
1125                 ret = nilfs_btree_get_new_block(btree,
1126                                                 path[level].bp_newreq.bpr_ptr,
1127                                                 &bh);
1128                 if (ret < 0)
1129                         goto err_out_curr_node;
1130
1131                 stats->bs_nblocks++;
1132
1133                 sib = (struct nilfs_btree_node *)bh->b_data;
1134                 nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL);
1135                 path[level].bp_sib_bh = bh;
1136                 path[level].bp_op = nilfs_btree_split;
1137         }
1138
1139         /* root */
1140         node = nilfs_btree_get_root(btree);
1141         if (nilfs_btree_node_get_nchildren(node) <
1142             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1143                 path[level].bp_op = nilfs_btree_do_insert;
1144                 stats->bs_nblocks++;
1145                 goto out;
1146         }
1147
1148         /* grow */
1149         path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1150         ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1151         if (ret < 0)
1152                 goto err_out_child_node;
1153         ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1154                                         &bh);
1155         if (ret < 0)
1156                 goto err_out_curr_node;
1157
1158         nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data,
1159                               0, level, 0, ncblk, NULL, NULL);
1160         path[level].bp_sib_bh = bh;
1161         path[level].bp_op = nilfs_btree_grow;
1162
1163         level++;
1164         path[level].bp_op = nilfs_btree_do_insert;
1165
1166         /* a newly-created node block and a data block are added */
1167         stats->bs_nblocks += 2;
1168
1169         /* success */
1170  out:
1171         *levelp = level;
1172         return ret;
1173
1174         /* error */
1175  err_out_curr_node:
1176         nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1177  err_out_child_node:
1178         for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1179                 nilfs_btnode_delete(path[level].bp_sib_bh);
1180                 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1181
1182         }
1183
1184         nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1185  err_out_data:
1186         *levelp = level;
1187         stats->bs_nblocks = 0;
1188         return ret;
1189 }
1190
1191 static void nilfs_btree_commit_insert(struct nilfs_bmap *btree,
1192                                       struct nilfs_btree_path *path,
1193                                       int maxlevel, __u64 key, __u64 ptr)
1194 {
1195         struct inode *dat = NULL;
1196         int level;
1197
1198         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1199         ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1200         if (NILFS_BMAP_USE_VBN(btree)) {
1201                 nilfs_bmap_set_target_v(btree, key, ptr);
1202                 dat = nilfs_bmap_get_dat(btree);
1203         }
1204
1205         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1206                 nilfs_bmap_commit_alloc_ptr(btree,
1207                                             &path[level - 1].bp_newreq, dat);
1208                 path[level].bp_op(btree, path, level, &key, &ptr);
1209         }
1210
1211         if (!nilfs_bmap_dirty(btree))
1212                 nilfs_bmap_set_dirty(btree);
1213 }
1214
1215 static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1216 {
1217         struct nilfs_btree_path *path;
1218         struct nilfs_bmap_stats stats;
1219         int level, ret;
1220
1221         path = nilfs_btree_alloc_path();
1222         if (path == NULL)
1223                 return -ENOMEM;
1224
1225         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1226                                     NILFS_BTREE_LEVEL_NODE_MIN, 0);
1227         if (ret != -ENOENT) {
1228                 if (ret == 0)
1229                         ret = -EEXIST;
1230                 goto out;
1231         }
1232
1233         ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1234         if (ret < 0)
1235                 goto out;
1236         nilfs_btree_commit_insert(btree, path, level, key, ptr);
1237         nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1238
1239  out:
1240         nilfs_btree_free_path(path);
1241         return ret;
1242 }
1243
1244 static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1245                                   struct nilfs_btree_path *path,
1246                                   int level, __u64 *keyp, __u64 *ptrp)
1247 {
1248         struct nilfs_btree_node *node;
1249         int ncblk;
1250
1251         if (level < nilfs_btree_height(btree) - 1) {
1252                 node = nilfs_btree_get_nonroot_node(path, level);
1253                 ncblk = nilfs_btree_nchildren_per_block(btree);
1254                 nilfs_btree_node_delete(node, path[level].bp_index,
1255                                         keyp, ptrp, ncblk);
1256                 if (!buffer_dirty(path[level].bp_bh))
1257                         mark_buffer_dirty(path[level].bp_bh);
1258                 if (path[level].bp_index == 0)
1259                         nilfs_btree_promote_key(btree, path, level + 1,
1260                                 nilfs_btree_node_get_key(node, 0));
1261         } else {
1262                 node = nilfs_btree_get_root(btree);
1263                 nilfs_btree_node_delete(node, path[level].bp_index,
1264                                         keyp, ptrp,
1265                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
1266         }
1267 }
1268
1269 static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1270                                     struct nilfs_btree_path *path,
1271                                     int level, __u64 *keyp, __u64 *ptrp)
1272 {
1273         struct nilfs_btree_node *node, *left;
1274         int nchildren, lnchildren, n, ncblk;
1275
1276         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1277
1278         node = nilfs_btree_get_nonroot_node(path, level);
1279         left = nilfs_btree_get_sib_node(path, level);
1280         nchildren = nilfs_btree_node_get_nchildren(node);
1281         lnchildren = nilfs_btree_node_get_nchildren(left);
1282         ncblk = nilfs_btree_nchildren_per_block(btree);
1283
1284         n = (nchildren + lnchildren) / 2 - nchildren;
1285
1286         nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1287
1288         if (!buffer_dirty(path[level].bp_bh))
1289                 mark_buffer_dirty(path[level].bp_bh);
1290         if (!buffer_dirty(path[level].bp_sib_bh))
1291                 mark_buffer_dirty(path[level].bp_sib_bh);
1292
1293         nilfs_btree_promote_key(btree, path, level + 1,
1294                                 nilfs_btree_node_get_key(node, 0));
1295
1296         brelse(path[level].bp_sib_bh);
1297         path[level].bp_sib_bh = NULL;
1298         path[level].bp_index += n;
1299 }
1300
1301 static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1302                                      struct nilfs_btree_path *path,
1303                                      int level, __u64 *keyp, __u64 *ptrp)
1304 {
1305         struct nilfs_btree_node *node, *right;
1306         int nchildren, rnchildren, n, ncblk;
1307
1308         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1309
1310         node = nilfs_btree_get_nonroot_node(path, level);
1311         right = nilfs_btree_get_sib_node(path, level);
1312         nchildren = nilfs_btree_node_get_nchildren(node);
1313         rnchildren = nilfs_btree_node_get_nchildren(right);
1314         ncblk = nilfs_btree_nchildren_per_block(btree);
1315
1316         n = (nchildren + rnchildren) / 2 - nchildren;
1317
1318         nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1319
1320         if (!buffer_dirty(path[level].bp_bh))
1321                 mark_buffer_dirty(path[level].bp_bh);
1322         if (!buffer_dirty(path[level].bp_sib_bh))
1323                 mark_buffer_dirty(path[level].bp_sib_bh);
1324
1325         path[level + 1].bp_index++;
1326         nilfs_btree_promote_key(btree, path, level + 1,
1327                                 nilfs_btree_node_get_key(right, 0));
1328         path[level + 1].bp_index--;
1329
1330         brelse(path[level].bp_sib_bh);
1331         path[level].bp_sib_bh = NULL;
1332 }
1333
1334 static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1335                                     struct nilfs_btree_path *path,
1336                                     int level, __u64 *keyp, __u64 *ptrp)
1337 {
1338         struct nilfs_btree_node *node, *left;
1339         int n, ncblk;
1340
1341         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1342
1343         node = nilfs_btree_get_nonroot_node(path, level);
1344         left = nilfs_btree_get_sib_node(path, level);
1345         ncblk = nilfs_btree_nchildren_per_block(btree);
1346
1347         n = nilfs_btree_node_get_nchildren(node);
1348
1349         nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1350
1351         if (!buffer_dirty(path[level].bp_sib_bh))
1352                 mark_buffer_dirty(path[level].bp_sib_bh);
1353
1354         nilfs_btnode_delete(path[level].bp_bh);
1355         path[level].bp_bh = path[level].bp_sib_bh;
1356         path[level].bp_sib_bh = NULL;
1357         path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1358 }
1359
1360 static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1361                                      struct nilfs_btree_path *path,
1362                                      int level, __u64 *keyp, __u64 *ptrp)
1363 {
1364         struct nilfs_btree_node *node, *right;
1365         int n, ncblk;
1366
1367         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1368
1369         node = nilfs_btree_get_nonroot_node(path, level);
1370         right = nilfs_btree_get_sib_node(path, level);
1371         ncblk = nilfs_btree_nchildren_per_block(btree);
1372
1373         n = nilfs_btree_node_get_nchildren(right);
1374
1375         nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1376
1377         if (!buffer_dirty(path[level].bp_bh))
1378                 mark_buffer_dirty(path[level].bp_bh);
1379
1380         nilfs_btnode_delete(path[level].bp_sib_bh);
1381         path[level].bp_sib_bh = NULL;
1382         path[level + 1].bp_index++;
1383 }
1384
1385 static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1386                                struct nilfs_btree_path *path,
1387                                int level, __u64 *keyp, __u64 *ptrp)
1388 {
1389         struct nilfs_btree_node *root, *child;
1390         int n, ncblk;
1391
1392         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1393
1394         root = nilfs_btree_get_root(btree);
1395         child = nilfs_btree_get_nonroot_node(path, level);
1396         ncblk = nilfs_btree_nchildren_per_block(btree);
1397
1398         nilfs_btree_node_delete(root, 0, NULL, NULL,
1399                                 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1400         nilfs_btree_node_set_level(root, level);
1401         n = nilfs_btree_node_get_nchildren(child);
1402         nilfs_btree_node_move_left(root, child, n,
1403                                    NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
1404
1405         nilfs_btnode_delete(path[level].bp_bh);
1406         path[level].bp_bh = NULL;
1407 }
1408
1409 static void nilfs_btree_nop(struct nilfs_bmap *btree,
1410                             struct nilfs_btree_path *path,
1411                             int level, __u64 *keyp, __u64 *ptrp)
1412 {
1413 }
1414
1415 static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1416                                       struct nilfs_btree_path *path,
1417                                       int *levelp,
1418                                       struct nilfs_bmap_stats *stats,
1419                                       struct inode *dat)
1420 {
1421         struct buffer_head *bh;
1422         struct nilfs_btree_node *node, *parent, *sib;
1423         __u64 sibptr;
1424         int pindex, dindex, level, ncmin, ncmax, ncblk, ret;
1425
1426         ret = 0;
1427         stats->bs_nblocks = 0;
1428         ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1429         ncblk = nilfs_btree_nchildren_per_block(btree);
1430
1431         for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index;
1432              level < nilfs_btree_height(btree) - 1;
1433              level++) {
1434                 node = nilfs_btree_get_nonroot_node(path, level);
1435                 path[level].bp_oldreq.bpr_ptr =
1436                         nilfs_btree_node_get_ptr(node, dindex, ncblk);
1437                 ret = nilfs_bmap_prepare_end_ptr(btree,
1438                                                  &path[level].bp_oldreq, dat);
1439                 if (ret < 0)
1440                         goto err_out_child_node;
1441
1442                 if (nilfs_btree_node_get_nchildren(node) > ncmin) {
1443                         path[level].bp_op = nilfs_btree_do_delete;
1444                         stats->bs_nblocks++;
1445                         goto out;
1446                 }
1447
1448                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1449                 pindex = path[level + 1].bp_index;
1450                 dindex = pindex;
1451
1452                 if (pindex > 0) {
1453                         /* left sibling */
1454                         sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1455                                                           ncmax);
1456                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1457                         if (ret < 0)
1458                                 goto err_out_curr_node;
1459                         sib = (struct nilfs_btree_node *)bh->b_data;
1460                         if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1461                                 path[level].bp_sib_bh = bh;
1462                                 path[level].bp_op = nilfs_btree_borrow_left;
1463                                 stats->bs_nblocks++;
1464                                 goto out;
1465                         } else {
1466                                 path[level].bp_sib_bh = bh;
1467                                 path[level].bp_op = nilfs_btree_concat_left;
1468                                 stats->bs_nblocks++;
1469                                 /* continue; */
1470                         }
1471                 } else if (pindex <
1472                            nilfs_btree_node_get_nchildren(parent) - 1) {
1473                         /* right sibling */
1474                         sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1475                                                           ncmax);
1476                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1477                         if (ret < 0)
1478                                 goto err_out_curr_node;
1479                         sib = (struct nilfs_btree_node *)bh->b_data;
1480                         if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1481                                 path[level].bp_sib_bh = bh;
1482                                 path[level].bp_op = nilfs_btree_borrow_right;
1483                                 stats->bs_nblocks++;
1484                                 goto out;
1485                         } else {
1486                                 path[level].bp_sib_bh = bh;
1487                                 path[level].bp_op = nilfs_btree_concat_right;
1488                                 stats->bs_nblocks++;
1489                                 /*
1490                                  * When merging right sibling node
1491                                  * into the current node, pointer to
1492                                  * the right sibling node must be
1493                                  * terminated instead.  The adjustment
1494                                  * below is required for that.
1495                                  */
1496                                 dindex = pindex + 1;
1497                                 /* continue; */
1498                         }
1499                 } else {
1500                         /* no siblings */
1501                         /* the only child of the root node */
1502                         WARN_ON(level != nilfs_btree_height(btree) - 2);
1503                         if (nilfs_btree_node_get_nchildren(node) - 1 <=
1504                             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1505                                 path[level].bp_op = nilfs_btree_shrink;
1506                                 stats->bs_nblocks += 2;
1507                                 level++;
1508                                 path[level].bp_op = nilfs_btree_nop;
1509                                 goto shrink_root_child;
1510                         } else {
1511                                 path[level].bp_op = nilfs_btree_do_delete;
1512                                 stats->bs_nblocks++;
1513                                 goto out;
1514                         }
1515                 }
1516         }
1517
1518         /* child of the root node is deleted */
1519         path[level].bp_op = nilfs_btree_do_delete;
1520         stats->bs_nblocks++;
1521
1522 shrink_root_child:
1523         node = nilfs_btree_get_root(btree);
1524         path[level].bp_oldreq.bpr_ptr =
1525                 nilfs_btree_node_get_ptr(node, dindex,
1526                                          NILFS_BTREE_ROOT_NCHILDREN_MAX);
1527
1528         ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1529         if (ret < 0)
1530                 goto err_out_child_node;
1531
1532         /* success */
1533  out:
1534         *levelp = level;
1535         return ret;
1536
1537         /* error */
1538  err_out_curr_node:
1539         nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1540  err_out_child_node:
1541         for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1542                 brelse(path[level].bp_sib_bh);
1543                 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1544         }
1545         *levelp = level;
1546         stats->bs_nblocks = 0;
1547         return ret;
1548 }
1549
1550 static void nilfs_btree_commit_delete(struct nilfs_bmap *btree,
1551                                       struct nilfs_btree_path *path,
1552                                       int maxlevel, struct inode *dat)
1553 {
1554         int level;
1555
1556         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1557                 nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat);
1558                 path[level].bp_op(btree, path, level, NULL, NULL);
1559         }
1560
1561         if (!nilfs_bmap_dirty(btree))
1562                 nilfs_bmap_set_dirty(btree);
1563 }
1564
1565 static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1566
1567 {
1568         struct nilfs_btree_path *path;
1569         struct nilfs_bmap_stats stats;
1570         struct inode *dat;
1571         int level, ret;
1572
1573         path = nilfs_btree_alloc_path();
1574         if (path == NULL)
1575                 return -ENOMEM;
1576
1577         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1578                                     NILFS_BTREE_LEVEL_NODE_MIN, 0);
1579         if (ret < 0)
1580                 goto out;
1581
1582
1583         dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1584
1585         ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1586         if (ret < 0)
1587                 goto out;
1588         nilfs_btree_commit_delete(btree, path, level, dat);
1589         nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks);
1590
1591 out:
1592         nilfs_btree_free_path(path);
1593         return ret;
1594 }
1595
1596 static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start,
1597                                 __u64 *keyp)
1598 {
1599         struct nilfs_btree_path *path;
1600         const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN;
1601         int ret;
1602
1603         path = nilfs_btree_alloc_path();
1604         if (!path)
1605                 return -ENOMEM;
1606
1607         ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0);
1608         if (!ret)
1609                 *keyp = start;
1610         else if (ret == -ENOENT)
1611                 ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp);
1612
1613         nilfs_btree_free_path(path);
1614         return ret;
1615 }
1616
1617 static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1618 {
1619         struct nilfs_btree_path *path;
1620         int ret;
1621
1622         path = nilfs_btree_alloc_path();
1623         if (path == NULL)
1624                 return -ENOMEM;
1625
1626         ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1627
1628         nilfs_btree_free_path(path);
1629
1630         return ret;
1631 }
1632
1633 static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
1634 {
1635         struct buffer_head *bh;
1636         struct nilfs_btree_node *root, *node;
1637         __u64 maxkey, nextmaxkey;
1638         __u64 ptr;
1639         int nchildren, ret;
1640
1641         root = nilfs_btree_get_root(btree);
1642         switch (nilfs_btree_height(btree)) {
1643         case 2:
1644                 bh = NULL;
1645                 node = root;
1646                 break;
1647         case 3:
1648                 nchildren = nilfs_btree_node_get_nchildren(root);
1649                 if (nchildren > 1)
1650                         return 0;
1651                 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1652                                                NILFS_BTREE_ROOT_NCHILDREN_MAX);
1653                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1654                 if (ret < 0)
1655                         return ret;
1656                 node = (struct nilfs_btree_node *)bh->b_data;
1657                 break;
1658         default:
1659                 return 0;
1660         }
1661
1662         nchildren = nilfs_btree_node_get_nchildren(node);
1663         maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1664         nextmaxkey = (nchildren > 1) ?
1665                 nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1666         if (bh != NULL)
1667                 brelse(bh);
1668
1669         return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1670 }
1671
1672 static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
1673                                    __u64 *keys, __u64 *ptrs, int nitems)
1674 {
1675         struct buffer_head *bh;
1676         struct nilfs_btree_node *node, *root;
1677         __le64 *dkeys;
1678         __le64 *dptrs;
1679         __u64 ptr;
1680         int nchildren, ncmax, i, ret;
1681
1682         root = nilfs_btree_get_root(btree);
1683         switch (nilfs_btree_height(btree)) {
1684         case 2:
1685                 bh = NULL;
1686                 node = root;
1687                 ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX;
1688                 break;
1689         case 3:
1690                 nchildren = nilfs_btree_node_get_nchildren(root);
1691                 WARN_ON(nchildren > 1);
1692                 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1693                                                NILFS_BTREE_ROOT_NCHILDREN_MAX);
1694                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1695                 if (ret < 0)
1696                         return ret;
1697                 node = (struct nilfs_btree_node *)bh->b_data;
1698                 ncmax = nilfs_btree_nchildren_per_block(btree);
1699                 break;
1700         default:
1701                 node = NULL;
1702                 return -EINVAL;
1703         }
1704
1705         nchildren = nilfs_btree_node_get_nchildren(node);
1706         if (nchildren < nitems)
1707                 nitems = nchildren;
1708         dkeys = nilfs_btree_node_dkeys(node);
1709         dptrs = nilfs_btree_node_dptrs(node, ncmax);
1710         for (i = 0; i < nitems; i++) {
1711                 keys[i] = le64_to_cpu(dkeys[i]);
1712                 ptrs[i] = le64_to_cpu(dptrs[i]);
1713         }
1714
1715         if (bh != NULL)
1716                 brelse(bh);
1717
1718         return nitems;
1719 }
1720
1721 static int
1722 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
1723                                        union nilfs_bmap_ptr_req *dreq,
1724                                        union nilfs_bmap_ptr_req *nreq,
1725                                        struct buffer_head **bhp,
1726                                        struct nilfs_bmap_stats *stats)
1727 {
1728         struct buffer_head *bh;
1729         struct inode *dat = NULL;
1730         int ret;
1731
1732         stats->bs_nblocks = 0;
1733
1734         /* for data */
1735         /* cannot find near ptr */
1736         if (NILFS_BMAP_USE_VBN(btree)) {
1737                 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1738                 dat = nilfs_bmap_get_dat(btree);
1739         }
1740
1741         ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
1742         if (ret < 0)
1743                 return ret;
1744
1745         *bhp = NULL;
1746         stats->bs_nblocks++;
1747         if (nreq != NULL) {
1748                 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1749                 ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat);
1750                 if (ret < 0)
1751                         goto err_out_dreq;
1752
1753                 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1754                 if (ret < 0)
1755                         goto err_out_nreq;
1756
1757                 *bhp = bh;
1758                 stats->bs_nblocks++;
1759         }
1760
1761         /* success */
1762         return 0;
1763
1764         /* error */
1765  err_out_nreq:
1766         nilfs_bmap_abort_alloc_ptr(btree, nreq, dat);
1767  err_out_dreq:
1768         nilfs_bmap_abort_alloc_ptr(btree, dreq, dat);
1769         stats->bs_nblocks = 0;
1770         return ret;
1771
1772 }
1773
1774 static void
1775 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1776                                       __u64 key, __u64 ptr,
1777                                       const __u64 *keys, const __u64 *ptrs,
1778                                       int n,
1779                                       union nilfs_bmap_ptr_req *dreq,
1780                                       union nilfs_bmap_ptr_req *nreq,
1781                                       struct buffer_head *bh)
1782 {
1783         struct nilfs_btree_node *node;
1784         struct inode *dat;
1785         __u64 tmpptr;
1786         int ncblk;
1787
1788         /* free resources */
1789         if (btree->b_ops->bop_clear != NULL)
1790                 btree->b_ops->bop_clear(btree);
1791
1792         /* ptr must be a pointer to a buffer head. */
1793         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1794
1795         /* convert and insert */
1796         dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1797         __nilfs_btree_init(btree);
1798         if (nreq != NULL) {
1799                 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1800                 nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
1801
1802                 /* create child node at level 1 */
1803                 node = (struct nilfs_btree_node *)bh->b_data;
1804                 ncblk = nilfs_btree_nchildren_per_block(btree);
1805                 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1806                 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1807                 if (!buffer_dirty(bh))
1808                         mark_buffer_dirty(bh);
1809                 if (!nilfs_bmap_dirty(btree))
1810                         nilfs_bmap_set_dirty(btree);
1811
1812                 brelse(bh);
1813
1814                 /* create root node at level 2 */
1815                 node = nilfs_btree_get_root(btree);
1816                 tmpptr = nreq->bpr_ptr;
1817                 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1,
1818                                       NILFS_BTREE_ROOT_NCHILDREN_MAX,
1819                                       &keys[0], &tmpptr);
1820         } else {
1821                 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1822
1823                 /* create root node at level 1 */
1824                 node = nilfs_btree_get_root(btree);
1825                 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n,
1826                                       NILFS_BTREE_ROOT_NCHILDREN_MAX,
1827                                       keys, ptrs);
1828                 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr,
1829                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
1830                 if (!nilfs_bmap_dirty(btree))
1831                         nilfs_bmap_set_dirty(btree);
1832         }
1833
1834         if (NILFS_BMAP_USE_VBN(btree))
1835                 nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr);
1836 }
1837
1838 /**
1839  * nilfs_btree_convert_and_insert -
1840  * @bmap:
1841  * @key:
1842  * @ptr:
1843  * @keys:
1844  * @ptrs:
1845  * @n:
1846  */
1847 int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1848                                    __u64 key, __u64 ptr,
1849                                    const __u64 *keys, const __u64 *ptrs, int n)
1850 {
1851         struct buffer_head *bh = NULL;
1852         union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1853         struct nilfs_bmap_stats stats;
1854         int ret;
1855
1856         if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1857                 di = &dreq;
1858                 ni = NULL;
1859         } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1860                            1 << btree->b_inode->i_blkbits)) {
1861                 di = &dreq;
1862                 ni = &nreq;
1863         } else {
1864                 di = NULL;
1865                 ni = NULL;
1866                 BUG();
1867         }
1868
1869         ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh,
1870                                                      &stats);
1871         if (ret < 0)
1872                 return ret;
1873         nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1874                                               di, ni, bh);
1875         nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1876         return 0;
1877 }
1878
1879 static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1880                                    struct nilfs_btree_path *path,
1881                                    int level,
1882                                    struct buffer_head *bh)
1883 {
1884         while ((++level < nilfs_btree_height(btree) - 1) &&
1885                !buffer_dirty(path[level].bp_bh))
1886                 mark_buffer_dirty(path[level].bp_bh);
1887
1888         return 0;
1889 }
1890
1891 static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
1892                                         struct nilfs_btree_path *path,
1893                                         int level, struct inode *dat)
1894 {
1895         struct nilfs_btree_node *parent;
1896         int ncmax, ret;
1897
1898         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1899         path[level].bp_oldreq.bpr_ptr =
1900                 nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
1901                                          ncmax);
1902         path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1903         ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1904                                        &path[level].bp_newreq.bpr_req);
1905         if (ret < 0)
1906                 return ret;
1907
1908         if (buffer_nilfs_node(path[level].bp_bh)) {
1909                 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1910                 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1911                 path[level].bp_ctxt.bh = path[level].bp_bh;
1912                 ret = nilfs_btnode_prepare_change_key(
1913                         &NILFS_BMAP_I(btree)->i_btnode_cache,
1914                         &path[level].bp_ctxt);
1915                 if (ret < 0) {
1916                         nilfs_dat_abort_update(dat,
1917                                                &path[level].bp_oldreq.bpr_req,
1918                                                &path[level].bp_newreq.bpr_req);
1919                         return ret;
1920                 }
1921         }
1922
1923         return 0;
1924 }
1925
1926 static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
1927                                         struct nilfs_btree_path *path,
1928                                         int level, struct inode *dat)
1929 {
1930         struct nilfs_btree_node *parent;
1931         int ncmax;
1932
1933         nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1934                                 &path[level].bp_newreq.bpr_req,
1935                                 btree->b_ptr_type == NILFS_BMAP_PTR_VS);
1936
1937         if (buffer_nilfs_node(path[level].bp_bh)) {
1938                 nilfs_btnode_commit_change_key(
1939                         &NILFS_BMAP_I(btree)->i_btnode_cache,
1940                         &path[level].bp_ctxt);
1941                 path[level].bp_bh = path[level].bp_ctxt.bh;
1942         }
1943         set_buffer_nilfs_volatile(path[level].bp_bh);
1944
1945         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1946         nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index,
1947                                  path[level].bp_newreq.bpr_ptr, ncmax);
1948 }
1949
1950 static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
1951                                        struct nilfs_btree_path *path,
1952                                        int level, struct inode *dat)
1953 {
1954         nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1955                                &path[level].bp_newreq.bpr_req);
1956         if (buffer_nilfs_node(path[level].bp_bh))
1957                 nilfs_btnode_abort_change_key(
1958                         &NILFS_BMAP_I(btree)->i_btnode_cache,
1959                         &path[level].bp_ctxt);
1960 }
1961
1962 static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree,
1963                                            struct nilfs_btree_path *path,
1964                                            int minlevel, int *maxlevelp,
1965                                            struct inode *dat)
1966 {
1967         int level, ret;
1968
1969         level = minlevel;
1970         if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1971                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1972                 if (ret < 0)
1973                         return ret;
1974         }
1975         while ((++level < nilfs_btree_height(btree) - 1) &&
1976                !buffer_dirty(path[level].bp_bh)) {
1977
1978                 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1979                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1980                 if (ret < 0)
1981                         goto out;
1982         }
1983
1984         /* success */
1985         *maxlevelp = level - 1;
1986         return 0;
1987
1988         /* error */
1989  out:
1990         while (--level > minlevel)
1991                 nilfs_btree_abort_update_v(btree, path, level, dat);
1992         if (!buffer_nilfs_volatile(path[level].bp_bh))
1993                 nilfs_btree_abort_update_v(btree, path, level, dat);
1994         return ret;
1995 }
1996
1997 static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree,
1998                                            struct nilfs_btree_path *path,
1999                                            int minlevel, int maxlevel,
2000                                            struct buffer_head *bh,
2001                                            struct inode *dat)
2002 {
2003         int level;
2004
2005         if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
2006                 nilfs_btree_commit_update_v(btree, path, minlevel, dat);
2007
2008         for (level = minlevel + 1; level <= maxlevel; level++)
2009                 nilfs_btree_commit_update_v(btree, path, level, dat);
2010 }
2011
2012 static int nilfs_btree_propagate_v(struct nilfs_bmap *btree,
2013                                    struct nilfs_btree_path *path,
2014                                    int level, struct buffer_head *bh)
2015 {
2016         int maxlevel = 0, ret;
2017         struct nilfs_btree_node *parent;
2018         struct inode *dat = nilfs_bmap_get_dat(btree);
2019         __u64 ptr;
2020         int ncmax;
2021
2022         get_bh(bh);
2023         path[level].bp_bh = bh;
2024         ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
2025                                               dat);
2026         if (ret < 0)
2027                 goto out;
2028
2029         if (buffer_nilfs_volatile(path[level].bp_bh)) {
2030                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2031                 ptr = nilfs_btree_node_get_ptr(parent,
2032                                                path[level + 1].bp_index,
2033                                                ncmax);
2034                 ret = nilfs_dat_mark_dirty(dat, ptr);
2035                 if (ret < 0)
2036                         goto out;
2037         }
2038
2039         nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
2040
2041  out:
2042         brelse(path[level].bp_bh);
2043         path[level].bp_bh = NULL;
2044         return ret;
2045 }
2046
2047 static int nilfs_btree_propagate(struct nilfs_bmap *btree,
2048                                  struct buffer_head *bh)
2049 {
2050         struct nilfs_btree_path *path;
2051         struct nilfs_btree_node *node;
2052         __u64 key;
2053         int level, ret;
2054
2055         WARN_ON(!buffer_dirty(bh));
2056
2057         path = nilfs_btree_alloc_path();
2058         if (path == NULL)
2059                 return -ENOMEM;
2060
2061         if (buffer_nilfs_node(bh)) {
2062                 node = (struct nilfs_btree_node *)bh->b_data;
2063                 key = nilfs_btree_node_get_key(node, 0);
2064                 level = nilfs_btree_node_get_level(node);
2065         } else {
2066                 key = nilfs_bmap_data_get_key(btree, bh);
2067                 level = NILFS_BTREE_LEVEL_DATA;
2068         }
2069
2070         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2071         if (ret < 0) {
2072                 if (unlikely(ret == -ENOENT))
2073                         printk(KERN_CRIT "%s: key = %llu, level == %d\n",
2074                                __func__, (unsigned long long)key, level);
2075                 goto out;
2076         }
2077
2078         ret = NILFS_BMAP_USE_VBN(btree) ?
2079                 nilfs_btree_propagate_v(btree, path, level, bh) :
2080                 nilfs_btree_propagate_p(btree, path, level, bh);
2081
2082  out:
2083         nilfs_btree_free_path(path);
2084
2085         return ret;
2086 }
2087
2088 static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree,
2089                                     struct buffer_head *bh)
2090 {
2091         return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr);
2092 }
2093
2094 static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
2095                                          struct list_head *lists,
2096                                          struct buffer_head *bh)
2097 {
2098         struct list_head *head;
2099         struct buffer_head *cbh;
2100         struct nilfs_btree_node *node, *cnode;
2101         __u64 key, ckey;
2102         int level;
2103
2104         get_bh(bh);
2105         node = (struct nilfs_btree_node *)bh->b_data;
2106         key = nilfs_btree_node_get_key(node, 0);
2107         level = nilfs_btree_node_get_level(node);
2108         if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
2109             level >= NILFS_BTREE_LEVEL_MAX) {
2110                 dump_stack();
2111                 printk(KERN_WARNING
2112                        "%s: invalid btree level: %d (key=%llu, ino=%lu, "
2113                        "blocknr=%llu)\n",
2114                        __func__, level, (unsigned long long)key,
2115                        NILFS_BMAP_I(btree)->vfs_inode.i_ino,
2116                        (unsigned long long)bh->b_blocknr);
2117                 return;
2118         }
2119
2120         list_for_each(head, &lists[level]) {
2121                 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2122                 cnode = (struct nilfs_btree_node *)cbh->b_data;
2123                 ckey = nilfs_btree_node_get_key(cnode, 0);
2124                 if (key < ckey)
2125                         break;
2126         }
2127         list_add_tail(&bh->b_assoc_buffers, head);
2128 }
2129
2130 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
2131                                              struct list_head *listp)
2132 {
2133         struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache;
2134         struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2135         struct pagevec pvec;
2136         struct buffer_head *bh, *head;
2137         pgoff_t index = 0;
2138         int level, i;
2139
2140         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2141              level < NILFS_BTREE_LEVEL_MAX;
2142              level++)
2143                 INIT_LIST_HEAD(&lists[level]);
2144
2145         pagevec_init(&pvec, 0);
2146
2147         while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
2148                                   PAGEVEC_SIZE)) {
2149                 for (i = 0; i < pagevec_count(&pvec); i++) {
2150                         bh = head = page_buffers(pvec.pages[i]);
2151                         do {
2152                                 if (buffer_dirty(bh))
2153                                         nilfs_btree_add_dirty_buffer(btree,
2154                                                                      lists, bh);
2155                         } while ((bh = bh->b_this_page) != head);
2156                 }
2157                 pagevec_release(&pvec);
2158                 cond_resched();
2159         }
2160
2161         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2162              level < NILFS_BTREE_LEVEL_MAX;
2163              level++)
2164                 list_splice_tail(&lists[level], listp);
2165 }
2166
2167 static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
2168                                 struct nilfs_btree_path *path,
2169                                 int level,
2170                                 struct buffer_head **bh,
2171                                 sector_t blocknr,
2172                                 union nilfs_binfo *binfo)
2173 {
2174         struct nilfs_btree_node *parent;
2175         __u64 key;
2176         __u64 ptr;
2177         int ncmax, ret;
2178
2179         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2180         ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2181                                        ncmax);
2182         if (buffer_nilfs_node(*bh)) {
2183                 path[level].bp_ctxt.oldkey = ptr;
2184                 path[level].bp_ctxt.newkey = blocknr;
2185                 path[level].bp_ctxt.bh = *bh;
2186                 ret = nilfs_btnode_prepare_change_key(
2187                         &NILFS_BMAP_I(btree)->i_btnode_cache,
2188                         &path[level].bp_ctxt);
2189                 if (ret < 0)
2190                         return ret;
2191                 nilfs_btnode_commit_change_key(
2192                         &NILFS_BMAP_I(btree)->i_btnode_cache,
2193                         &path[level].bp_ctxt);
2194                 *bh = path[level].bp_ctxt.bh;
2195         }
2196
2197         nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr,
2198                                  ncmax);
2199
2200         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2201         /* on-disk format */
2202         binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
2203         binfo->bi_dat.bi_level = level;
2204
2205         return 0;
2206 }
2207
2208 static int nilfs_btree_assign_v(struct nilfs_bmap *btree,
2209                                 struct nilfs_btree_path *path,
2210                                 int level,
2211                                 struct buffer_head **bh,
2212                                 sector_t blocknr,
2213                                 union nilfs_binfo *binfo)
2214 {
2215         struct nilfs_btree_node *parent;
2216         struct inode *dat = nilfs_bmap_get_dat(btree);
2217         __u64 key;
2218         __u64 ptr;
2219         union nilfs_bmap_ptr_req req;
2220         int ncmax, ret;
2221
2222         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2223         ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2224                                        ncmax);
2225         req.bpr_ptr = ptr;
2226         ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2227         if (ret < 0)
2228                 return ret;
2229         nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2230
2231         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2232         /* on-disk format */
2233         binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
2234         binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2235
2236         return 0;
2237 }
2238
2239 static int nilfs_btree_assign(struct nilfs_bmap *btree,
2240                               struct buffer_head **bh,
2241                               sector_t blocknr,
2242                               union nilfs_binfo *binfo)
2243 {
2244         struct nilfs_btree_path *path;
2245         struct nilfs_btree_node *node;
2246         __u64 key;
2247         int level, ret;
2248
2249         path = nilfs_btree_alloc_path();
2250         if (path == NULL)
2251                 return -ENOMEM;
2252
2253         if (buffer_nilfs_node(*bh)) {
2254                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2255                 key = nilfs_btree_node_get_key(node, 0);
2256                 level = nilfs_btree_node_get_level(node);
2257         } else {
2258                 key = nilfs_bmap_data_get_key(btree, *bh);
2259                 level = NILFS_BTREE_LEVEL_DATA;
2260         }
2261
2262         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2263         if (ret < 0) {
2264                 WARN_ON(ret == -ENOENT);
2265                 goto out;
2266         }
2267
2268         ret = NILFS_BMAP_USE_VBN(btree) ?
2269                 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2270                 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2271
2272  out:
2273         nilfs_btree_free_path(path);
2274
2275         return ret;
2276 }
2277
2278 static int nilfs_btree_assign_gc(struct nilfs_bmap *btree,
2279                                  struct buffer_head **bh,
2280                                  sector_t blocknr,
2281                                  union nilfs_binfo *binfo)
2282 {
2283         struct nilfs_btree_node *node;
2284         __u64 key;
2285         int ret;
2286
2287         ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr,
2288                              blocknr);
2289         if (ret < 0)
2290                 return ret;
2291
2292         if (buffer_nilfs_node(*bh)) {
2293                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2294                 key = nilfs_btree_node_get_key(node, 0);
2295         } else
2296                 key = nilfs_bmap_data_get_key(btree, *bh);
2297
2298         /* on-disk format */
2299         binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2300         binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2301
2302         return 0;
2303 }
2304
2305 static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2306 {
2307         struct buffer_head *bh;
2308         struct nilfs_btree_path *path;
2309         __u64 ptr;
2310         int ret;
2311
2312         path = nilfs_btree_alloc_path();
2313         if (path == NULL)
2314                 return -ENOMEM;
2315
2316         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0);
2317         if (ret < 0) {
2318                 WARN_ON(ret == -ENOENT);
2319                 goto out;
2320         }
2321         ret = nilfs_btree_get_block(btree, ptr, &bh);
2322         if (ret < 0) {
2323                 WARN_ON(ret == -ENOENT);
2324                 goto out;
2325         }
2326
2327         if (!buffer_dirty(bh))
2328                 mark_buffer_dirty(bh);
2329         brelse(bh);
2330         if (!nilfs_bmap_dirty(btree))
2331                 nilfs_bmap_set_dirty(btree);
2332
2333  out:
2334         nilfs_btree_free_path(path);
2335         return ret;
2336 }
2337
2338 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2339         .bop_lookup             =       nilfs_btree_lookup,
2340         .bop_lookup_contig      =       nilfs_btree_lookup_contig,
2341         .bop_insert             =       nilfs_btree_insert,
2342         .bop_delete             =       nilfs_btree_delete,
2343         .bop_clear              =       NULL,
2344
2345         .bop_propagate          =       nilfs_btree_propagate,
2346
2347         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2348
2349         .bop_assign             =       nilfs_btree_assign,
2350         .bop_mark               =       nilfs_btree_mark,
2351
2352         .bop_seek_key           =       nilfs_btree_seek_key,
2353         .bop_last_key           =       nilfs_btree_last_key,
2354
2355         .bop_check_insert       =       NULL,
2356         .bop_check_delete       =       nilfs_btree_check_delete,
2357         .bop_gather_data        =       nilfs_btree_gather_data,
2358 };
2359
2360 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2361         .bop_lookup             =       NULL,
2362         .bop_lookup_contig      =       NULL,
2363         .bop_insert             =       NULL,
2364         .bop_delete             =       NULL,
2365         .bop_clear              =       NULL,
2366
2367         .bop_propagate          =       nilfs_btree_propagate_gc,
2368
2369         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2370
2371         .bop_assign             =       nilfs_btree_assign_gc,
2372         .bop_mark               =       NULL,
2373
2374         .bop_seek_key           =       NULL,
2375         .bop_last_key           =       NULL,
2376
2377         .bop_check_insert       =       NULL,
2378         .bop_check_delete       =       NULL,
2379         .bop_gather_data        =       NULL,
2380 };
2381
2382 static void __nilfs_btree_init(struct nilfs_bmap *bmap)
2383 {
2384         bmap->b_ops = &nilfs_btree_ops;
2385         bmap->b_nchildren_per_block =
2386                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2387 }
2388
2389 int nilfs_btree_init(struct nilfs_bmap *bmap)
2390 {
2391         int ret = 0;
2392
2393         __nilfs_btree_init(bmap);
2394
2395         if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap),
2396                                     bmap->b_inode->i_ino))
2397                 ret = -EIO;
2398         return ret;
2399 }
2400
2401 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2402 {
2403         bmap->b_ops = &nilfs_btree_ops_gc;
2404         bmap->b_nchildren_per_block =
2405                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2406 }